xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td (revision ae8d58814089308028046ac80aeeb9cbb784bd0a)
1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13//===----------------------------------------------------------------------===//
14// ARM Instruction Predicate Definitions.
15//
16def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
17                                 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
18def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
19                                 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
20def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
21                                 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
22def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
23                                 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
24def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
25                                 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
26def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
27                                 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
28def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
29                                 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
30def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
31                                 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
32def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
33                                 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
34def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
35                                 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
36def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
37                                 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
38def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
39                                 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
40def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
41                                 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
42def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
43                                 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
44def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
45                                 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
46def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
47                                 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
48
49def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
50                       AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
51
52def HasEL3           : Predicate<"Subtarget->hasEL3()">,
53                       AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
54
55def HasVH            : Predicate<"Subtarget->hasVH()">,
56                       AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
57
58def HasLOR           : Predicate<"Subtarget->hasLOR()">,
59                       AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
60
61def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
62                       AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
63
64def HasPAuthLR       : Predicate<"Subtarget->hasPAuthLR()">,
65                       AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
66
67def HasJS            : Predicate<"Subtarget->hasJS()">,
68                       AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
69
70def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
71                       AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
72
73def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
74                       AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
75
76def HasNV            : Predicate<"Subtarget->hasNV()">,
77                       AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
78
79def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
80                       AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
81
82def HasDIT           : Predicate<"Subtarget->hasDIT()">,
83                       AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
84
85def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
86                       AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
87
88def HasAM            : Predicate<"Subtarget->hasAM()">,
89                       AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
90
91def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
92                       AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
93
94def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
95                       AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
96
97def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
98                       AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
99
100def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
101                       AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
102
103def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
104                               AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
105def HasNEON          : Predicate<"Subtarget->hasNEON()">,
106                                 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
107def HasSM4           : Predicate<"Subtarget->hasSM4()">,
108                                 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
109def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
110                                 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
111def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
112                                 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
113def HasAES           : Predicate<"Subtarget->hasAES()">,
114                                 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
115def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
116                                 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
117def HasCRC           : Predicate<"Subtarget->hasCRC()">,
118                                 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
119def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
120                                 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
121def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
122def HasLSE           : Predicate<"Subtarget->hasLSE()">,
123                                 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
124def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
125def HasRAS           : Predicate<"Subtarget->hasRAS()">,
126                                 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
127def HasRDM           : Predicate<"Subtarget->hasRDM()">,
128                                 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
129def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
130                                 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
131def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
132                                 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
133def HasSPE           : Predicate<"Subtarget->hasSPE()">,
134                                 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
135def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
136                                 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
137                                 "fuse-aes">;
138def HasSVE           : Predicate<"Subtarget->hasSVE()">,
139                                 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
140def HasSVE2          : Predicate<"Subtarget->hasSVE2()">,
141                                 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
142def HasSVE2p1        : Predicate<"Subtarget->hasSVE2p1()">,
143                                 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
144def HasSVE2AES       : Predicate<"Subtarget->hasSVE2AES()">,
145                                 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
146def HasSVE2SM4       : Predicate<"Subtarget->hasSVE2SM4()">,
147                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
148def HasSVE2SHA3      : Predicate<"Subtarget->hasSVE2SHA3()">,
149                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
150def HasSVE2BitPerm   : Predicate<"Subtarget->hasSVE2BitPerm()">,
151                                 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
152def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
153                                 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
154def HasSME           : Predicate<"Subtarget->hasSME()">,
155                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
156def HasSMEF64F64     : Predicate<"Subtarget->hasSMEF64F64()">,
157                                 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
158def HasSMEF16F16     : Predicate<"Subtarget->hasSMEF16F16()">,
159                                 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
160def HasSMEFA64       : Predicate<"Subtarget->hasSMEFA64()">,
161                                 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
162def HasSMEI16I64     : Predicate<"Subtarget->hasSMEI16I64()">,
163                                 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
164def HasSME2          : Predicate<"Subtarget->hasSME2()">,
165                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
166def HasSME2p1        : Predicate<"Subtarget->hasSME2p1()">,
167                                 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
168def HasFPMR          : Predicate<"Subtarget->hasFPMR()">,
169                                 AssemblerPredicateWithAll<(all_of FeatureFPMR), "fpmr">;
170def HasFP8           : Predicate<"Subtarget->hasFP8()">,
171                                 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
172def HasFAMINMAX      : Predicate<"Subtarget->hasFAMINMAX()">,
173                                 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
174def HasFP8FMA        : Predicate<"Subtarget->hasFP8FMA()">,
175                                 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
176def HasSSVE_FP8FMA   : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
177                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
178                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
179                                                           (all_of FeatureSVE2, FeatureFP8FMA)),
180                                                           "ssve-fp8fma or (sve2 and fp8fma)">;
181def HasFP8DOT2       : Predicate<"Subtarget->hasFP8DOT2()">,
182                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
183def HasSSVE_FP8DOT2  : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
184                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
185                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
186                                                           (all_of FeatureSVE2, FeatureFP8DOT2)),
187                                "ssve-fp8dot2 or (sve2 and fp8dot2)">;
188def HasFP8DOT4       : Predicate<"Subtarget->hasFP8DOT4()">,
189                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
190def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
191                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
192                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
193                                                           (all_of FeatureSVE2, FeatureFP8DOT4)),
194                                 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
195def HasLUT          : Predicate<"Subtarget->hasLUT()">,
196                                 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
197def HasSME_LUTv2     : Predicate<"Subtarget->hasSME_LUTv2()">,
198                                 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
199def HasSMEF8F16     : Predicate<"Subtarget->hasSMEF8F16()">,
200                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
201def HasSMEF8F32     : Predicate<"Subtarget->hasSMEF8F32()">,
202                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
203
204// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
205// they should be enabled if either has been specified.
206def HasSVEorSME
207    : Predicate<"Subtarget->hasSVEorSME()">,
208                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
209                "sve or sme">;
210def HasSVE2orSME
211    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME()">,
212                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
213                "sve2 or sme">;
214def HasSVE2orSME2
215    : Predicate<"Subtarget->hasSVE2() || Subtarget->hasSME2()">,
216                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
217                "sve2 or sme2">;
218def HasSVE2p1_or_HasSME
219    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME()">,
220                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
221def HasSVE2p1_or_HasSME2
222    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2()">,
223                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
224def HasSVE2p1_or_HasSME2p1
225    : Predicate<"Subtarget->hasSVE2p1() || Subtarget->hasSME2p1()">,
226                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
227// A subset of NEON instructions are legal in Streaming SVE execution mode,
228// they should be enabled if either has been specified.
229def HasNEONorSME
230    : Predicate<"Subtarget->hasNEON() || Subtarget->hasSME()">,
231                AssemblerPredicateWithAll<(any_of FeatureNEON, FeatureSME),
232                "neon or sme">;
233def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
234                                 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
235def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
236                       AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
237def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
238                       AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
239def HasSB            : Predicate<"Subtarget->hasSB()">,
240                       AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
241def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
242                       AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
243def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
244                       AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
245def HasBTI           : Predicate<"Subtarget->hasBTI()">,
246                       AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
247def HasMTE           : Predicate<"Subtarget->hasMTE()">,
248                       AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
249def HasTME           : Predicate<"Subtarget->hasTME()">,
250                       AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
251def HasETE           : Predicate<"Subtarget->hasETE()">,
252                       AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
253def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
254                       AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
255def HasBF16          : Predicate<"Subtarget->hasBF16()">,
256                       AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
257def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
258                       AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
259def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
260                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
261def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
262                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
263def HasXS            : Predicate<"Subtarget->hasXS()">,
264                       AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
265def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
266                       AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
267def HasLS64          : Predicate<"Subtarget->hasLS64()">,
268                       AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
269def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
270                       AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
271def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
272                       AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
273def HasHBC           : Predicate<"Subtarget->hasHBC()">,
274                       AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
275def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
276                       AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
277def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
278                       AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
279def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
280                       AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
281def HasITE           : Predicate<"Subtarget->hasITE()">,
282                       AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
283def HasTHE           : Predicate<"Subtarget->hasTHE()">,
284                       AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
285def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
286                       AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
287def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
288                       AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
289def HasD128          : Predicate<"Subtarget->hasD128()">,
290                       AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
291def HasCHK           : Predicate<"Subtarget->hasCHK()">,
292                       AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
293def HasGCS           : Predicate<"Subtarget->hasGCS()">,
294                       AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
295def HasCPA           : Predicate<"Subtarget->hasCPA()">,
296                       AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
297def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
298def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
299def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
300def UseExperimentalZeroingPseudos
301    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
302def UseAlternateSExtLoadCVTF32
303    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
304
305def UseNegativeImmediates
306    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
307                                             "NegativeImmediates">;
308
309def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
310
311def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
312
313def IsNeonAvailable : Predicate<"Subtarget->isNeonAvailable()">;
314
315def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
316                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
317                                                       SDTCisInt<1>]>>;
318
319
320//===----------------------------------------------------------------------===//
321// AArch64-specific DAG Nodes.
322//
323
324// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
325def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
326                                              [SDTCisSameAs<0, 2>,
327                                               SDTCisSameAs<0, 3>,
328                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
329
330// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
331def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
332                                            [SDTCisSameAs<0, 1>,
333                                             SDTCisSameAs<0, 2>,
334                                             SDTCisInt<0>,
335                                             SDTCisVT<3, i32>]>;
336
337// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
338def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
339                                            [SDTCisSameAs<0, 2>,
340                                             SDTCisSameAs<0, 3>,
341                                             SDTCisInt<0>,
342                                             SDTCisVT<1, i32>,
343                                             SDTCisVT<4, i32>]>;
344
345def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
346                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
347                                      SDTCisVT<2, i32>]>;
348def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
349def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
350                                        SDTCisVT<2, OtherVT>]>;
351
352
353def SDT_AArch64CSel  : SDTypeProfile<1, 4,
354                                   [SDTCisSameAs<0, 1>,
355                                    SDTCisSameAs<0, 2>,
356                                    SDTCisInt<3>,
357                                    SDTCisVT<4, i32>]>;
358def SDT_AArch64CCMP : SDTypeProfile<1, 5,
359                                    [SDTCisVT<0, i32>,
360                                     SDTCisInt<1>,
361                                     SDTCisSameAs<1, 2>,
362                                     SDTCisInt<3>,
363                                     SDTCisInt<4>,
364                                     SDTCisVT<5, i32>]>;
365def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
366                                     [SDTCisVT<0, i32>,
367                                      SDTCisFP<1>,
368                                      SDTCisSameAs<1, 2>,
369                                      SDTCisInt<3>,
370                                      SDTCisInt<4>,
371                                      SDTCisVT<5, i32>]>;
372def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
373                                   [SDTCisFP<0>,
374                                    SDTCisSameAs<0, 1>]>;
375def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
376def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
377def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
378def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
379                                          SDTCisSameAs<0, 1>,
380                                          SDTCisSameAs<0, 2>]>;
381def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
382def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
383def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
384                                           SDTCisInt<2>, SDTCisInt<3>]>;
385def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
386def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
387                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
388def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
389def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
390                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
391
392def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
393                                                 SDTCisSameAs<0,1>,
394                                                 SDTCisSameAs<0,2>]>;
395
396def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
397def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
398def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
399def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
400                                           SDTCisSameAs<0,2>]>;
401def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
402                                           SDTCisSameAs<0,2>,
403                                           SDTCisSameAs<0,3>]>;
404def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
405def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
406
407def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
408
409def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
410                                                 SDTCisPtrTy<1>]>;
411
412def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
413
414def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
415def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
416def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
417def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
418def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
419def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
420
421// Generates the general dynamic sequences, i.e.
422//  adrp  x0, :tlsdesc:var
423//  ldr   x1, [x0, #:tlsdesc_lo12:var]
424//  add   x0, x0, #:tlsdesc_lo12:var
425//  .tlsdesccall var
426//  blr   x1
427
428// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
429// number of operands (the variable)
430def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
431                                          [SDTCisPtrTy<0>]>;
432
433def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
434                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
435                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
436                                         SDTCisSameAs<1, 4>]>;
437
438def SDT_AArch64TBL : SDTypeProfile<1, 2, [
439  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
440]>;
441
442// non-extending masked load fragment.
443def nonext_masked_load :
444  PatFrag<(ops node:$ptr, node:$pred, node:$def),
445          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
446  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
447         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
448         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
449}]>;
450// Any/Zero extending masked load fragments.
451def azext_masked_load :
452  PatFrag<(ops node:$ptr, node:$pred, node:$def),
453          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
454  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
455          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
456         cast<MaskedLoadSDNode>(N)->isUnindexed();
457}]>;
458def azext_masked_load_i8 :
459  PatFrag<(ops node:$ptr, node:$pred, node:$def),
460          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
461  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
462}]>;
463def azext_masked_load_i16 :
464  PatFrag<(ops node:$ptr, node:$pred, node:$def),
465          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
466  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
467}]>;
468def azext_masked_load_i32 :
469  PatFrag<(ops node:$ptr, node:$pred, node:$def),
470          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
471  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
472}]>;
473// Sign extending masked load fragments.
474def sext_masked_load :
475  PatFrag<(ops node:$ptr, node:$pred, node:$def),
476          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
477  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
478         cast<MaskedLoadSDNode>(N)->isUnindexed();
479}]>;
480def sext_masked_load_i8 :
481  PatFrag<(ops node:$ptr, node:$pred, node:$def),
482          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
483  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
484}]>;
485def sext_masked_load_i16 :
486  PatFrag<(ops node:$ptr, node:$pred, node:$def),
487          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
488  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
489}]>;
490def sext_masked_load_i32 :
491  PatFrag<(ops node:$ptr, node:$pred, node:$def),
492          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
493  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
494}]>;
495
496def non_temporal_load :
497   PatFrag<(ops node:$ptr, node:$pred, node:$def),
498           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
499   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
500          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
501          cast<MaskedLoadSDNode>(N)->isNonTemporal();
502}]>;
503
504// non-truncating masked store fragment.
505def nontrunc_masked_store :
506  PatFrag<(ops node:$val, node:$ptr, node:$pred),
507          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
508  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
509         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
510         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
511}]>;
512// truncating masked store fragments.
513def trunc_masked_store :
514  PatFrag<(ops node:$val, node:$ptr, node:$pred),
515          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
516  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
517         cast<MaskedStoreSDNode>(N)->isUnindexed();
518}]>;
519def trunc_masked_store_i8 :
520  PatFrag<(ops node:$val, node:$ptr, node:$pred),
521          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
522  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
523}]>;
524def trunc_masked_store_i16 :
525  PatFrag<(ops node:$val, node:$ptr, node:$pred),
526          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
527  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
528}]>;
529def trunc_masked_store_i32 :
530  PatFrag<(ops node:$val, node:$ptr, node:$pred),
531          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
532  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
533}]>;
534
535def non_temporal_store :
536  PatFrag<(ops node:$val, node:$ptr, node:$pred),
537          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
538  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
539         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
540         cast<MaskedStoreSDNode>(N)->isNonTemporal();
541}]>;
542
543multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
544  // offsets = (signed)Index << sizeof(elt)
545  def NAME#_signed_scaled :
546    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
547            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
548    auto MGS = cast<MaskedGatherScatterSDNode>(N);
549    bool Signed = MGS->isIndexSigned() ||
550        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
551    return Signed && MGS->isIndexScaled();
552  }]>;
553  // offsets = (signed)Index
554  def NAME#_signed_unscaled :
555    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
556            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
557    auto MGS = cast<MaskedGatherScatterSDNode>(N);
558    bool Signed = MGS->isIndexSigned() ||
559        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
560    return Signed && !MGS->isIndexScaled();
561  }]>;
562  // offsets = (unsigned)Index << sizeof(elt)
563  def NAME#_unsigned_scaled :
564    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
565            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
566    auto MGS = cast<MaskedGatherScatterSDNode>(N);
567    bool Signed = MGS->isIndexSigned() ||
568        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569    return !Signed && MGS->isIndexScaled();
570  }]>;
571  // offsets = (unsigned)Index
572  def NAME#_unsigned_unscaled :
573    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
574            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
575    auto MGS = cast<MaskedGatherScatterSDNode>(N);
576    bool Signed = MGS->isIndexSigned() ||
577        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578    return !Signed && !MGS->isIndexScaled();
579  }]>;
580}
581
582defm nonext_masked_gather    : masked_gather_scatter<nonext_masked_gather>;
583defm azext_masked_gather_i8  : masked_gather_scatter<azext_masked_gather_i8>;
584defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
585defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
586defm sext_masked_gather_i8   : masked_gather_scatter<sext_masked_gather_i8>;
587defm sext_masked_gather_i16  : masked_gather_scatter<sext_masked_gather_i16>;
588defm sext_masked_gather_i32  : masked_gather_scatter<sext_masked_gather_i32>;
589
590defm nontrunc_masked_scatter  : masked_gather_scatter<nontrunc_masked_scatter>;
591defm trunc_masked_scatter_i8  : masked_gather_scatter<trunc_masked_scatter_i8>;
592defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
593defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
594
595// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
596def top16Zero: PatLeaf<(i32 GPR32:$src), [{
597  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
598         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
599  }]>;
600
601// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
602def top32Zero: PatLeaf<(i64 GPR64:$src), [{
603  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
604         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
605  }]>;
606
607// topbitsallzero - Return true if all bits except the lowest bit are known zero
608def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
609  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
610         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
611  }]>;
612def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
613  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
614         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
615  }]>;
616
617// Node definitions.
618def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
619def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
620def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
621def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
622def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
623                                SDCallSeqStart<[ SDTCisVT<0, i32>,
624                                                 SDTCisVT<1, i32> ]>,
625                                [SDNPHasChain, SDNPOutGlue]>;
626def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
627                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
628                                               SDTCisVT<1, i32> ]>,
629                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
630def AArch64call          : SDNode<"AArch64ISD::CALL",
631                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
632                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
633                                 SDNPVariadic]>;
634
635def AArch64call_bti      : SDNode<"AArch64ISD::CALL_BTI",
636                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
637                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
638                                 SDNPVariadic]>;
639
640def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
641                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
642                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
643                              SDNPVariadic]>;
644
645def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
646                                      SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
647                                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
648                                       SDNPVariadic]>;
649
650def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
651                                [SDNPHasChain]>;
652def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
653                                [SDNPHasChain]>;
654def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
655                                [SDNPHasChain]>;
656def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
657                                [SDNPHasChain]>;
658def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
659                                [SDNPHasChain]>;
660
661
662def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
663def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
664def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
665def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
666def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
667                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
668def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
669def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
670def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
671                            [SDNPCommutative]>;
672def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
673def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
674                            [SDNPCommutative]>;
675def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
676def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
677
678def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
679def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
680def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
681
682def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
683
684def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
685def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
686                                 [SDNPHasChain]>;
687def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
688                                 [SDNPHasChain]>;
689def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
690                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
691                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
692
693def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
694def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
695def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
696def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
697def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
698def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
699
700def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
701
702def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
703def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
704def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
705def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
706def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
707def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
708
709def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
710def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
711def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
712def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
713def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
714def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
715def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
716
717def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
718def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
719def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
720def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
721
722def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
723def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
724def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
725def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
726def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
727def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
728def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
729def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
730def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
731def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
732
733def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
734def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
735
736def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
737def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
738def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
739def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
740def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
741
742def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
743def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
744def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
745
746def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
747def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
748def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
749def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
750def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
751def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
752                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
753
754def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
755def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
756def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
757def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
758def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
759
760def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
761def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
762
763def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
764                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
765
766def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
767                               [SDNPHasChain, SDNPSideEffect]>;
768
769def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
770def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
771
772def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
773                                    SDT_AArch64TLSDescCallSeq,
774                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
775                                     SDNPVariadic]>;
776
777
778def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
779                                 SDT_AArch64WrapperLarge>;
780
781def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
782
783def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
784                                    SDTCisSameAs<1, 2>]>;
785def AArch64pmull    : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
786                             [SDNPCommutative]>;
787def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
788                             [SDNPCommutative]>;
789def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
790                             [SDNPCommutative]>;
791
792def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
793def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
794def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
795def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
796
797def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
798def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
799
800def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
801def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
802def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
803def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
804def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
805def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
806def AArch64uaddlv   : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
807def AArch64saddlv   : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
808
809def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
810                               [(abdu node:$lhs, node:$rhs),
811                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
812def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
813                               [(abds node:$lhs, node:$rhs),
814                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
815
816def AArch64addp_n   : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
817def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
818def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
819def AArch64addp     : PatFrags<(ops node:$Rn, node:$Rm),
820                               [(AArch64addp_n node:$Rn, node:$Rm),
821                                (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
822def AArch64uaddlp   : PatFrags<(ops node:$src),
823                               [(AArch64uaddlp_n node:$src),
824                                (int_aarch64_neon_uaddlp node:$src)]>;
825def AArch64saddlp   : PatFrags<(ops node:$src),
826                               [(AArch64saddlp_n node:$src),
827                                (int_aarch64_neon_saddlp node:$src)]>;
828def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
829                                [(AArch64addp_n node:$Rn, node:$Rm),
830                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
831def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
832def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS),
833                            [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)),
834                             (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>;
835def AArch64facge     : PatFrags<(ops node:$Rn, node:$Rm),
836                                [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
837                                 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
838def AArch64facgt     : PatFrags<(ops node:$Rn, node:$Rm),
839                                [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
840                                 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
841
842def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
843                              [(vecreduce_fmax node:$Rn),
844                               (int_aarch64_neon_fmaxnmv node:$Rn)]>;
845def AArch64fminnmv : PatFrags<(ops node:$Rn),
846                              [(vecreduce_fmin node:$Rn),
847                               (int_aarch64_neon_fminnmv node:$Rn)]>;
848def AArch64fmaxv : PatFrags<(ops node:$Rn),
849                            [(vecreduce_fmaximum node:$Rn),
850                             (int_aarch64_neon_fmaxv node:$Rn)]>;
851def AArch64fminv : PatFrags<(ops node:$Rn),
852                            [(vecreduce_fminimum node:$Rn),
853                             (int_aarch64_neon_fminv node:$Rn)]>;
854
855def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
856def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
857def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
858def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
859def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
860
861def SDT_AArch64unpk : SDTypeProfile<1, 1, [
862    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
863]>;
864def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
865def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
866def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
867def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
868
869def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
870def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
871def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
872def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
873def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
874def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
875
876def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
877
878def AArch64probedalloca
879    : SDNode<"AArch64ISD::PROBED_ALLOCA",
880             SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
881             [SDNPHasChain, SDNPMayStore]>;
882
883def AArch64mrs : SDNode<"AArch64ISD::MRS",
884                        SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
885                        [SDNPHasChain, SDNPOutGlue]>;
886
887def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
888def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
889def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
890                            [(AArch64rshrnb node:$rs, node:$i),
891                            (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
892
893def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
894                             [SDTCisInt<0>, SDTCisVec<1>]>, []>;
895
896// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
897// have no common bits.
898def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
899                         [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
900   if (N->getOpcode() == ISD::ADD)
901     return true;
902   return CurDAG->haveNoCommonBitsSet(N->getOperand(0), N->getOperand(1));
903}]> {
904  let GISelPredicateCode = [{
905     // Only handle G_ADD for now. FIXME. build capability to compute whether
906     // operands of G_OR have common bits set or not.
907     return MI.getOpcode() == TargetOpcode::G_ADD;
908  }];
909}
910
911// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
912def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
913  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
914         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
915}]>;
916
917//===----------------------------------------------------------------------===//
918
919//===----------------------------------------------------------------------===//
920
921// AArch64 Instruction Predicate Definitions.
922// We could compute these on a per-module basis but doing so requires accessing
923// the Function object through the <Target>Subtarget and objections were raised
924// to that (see post-commit review comments for r301750).
925let RecomputePerFunction = 1 in {
926  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
927  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
928  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
929  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
930
931  def UseBTI : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
932  def NotUseBTI : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() }]>;
933
934  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
935  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
936  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
937  // optimizing. This allows us to selectively use patterns without impacting
938  // SelectionDAG's behaviour.
939  // FIXME: One day there will probably be a nicer way to check for this, but
940  // today is not that day.
941  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
942}
943
944include "AArch64InstrFormats.td"
945include "SVEInstrFormats.td"
946include "SMEInstrFormats.td"
947
948//===----------------------------------------------------------------------===//
949
950//===----------------------------------------------------------------------===//
951// Miscellaneous instructions.
952//===----------------------------------------------------------------------===//
953
954let hasSideEffects = 1, isCodeGenOnly = 1 in {
955let Defs = [SP], Uses = [SP] in {
956// We set Sched to empty list because we expect these instructions to simply get
957// removed in most cases.
958def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
959                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
960                              Sched<[]>;
961def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
962                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
963                            Sched<[]>;
964
965}
966
967let Defs = [SP, NZCV], Uses = [SP] in {
968// Probed stack allocation of a constant size, used in function prologues when
969// stack-clash protection is enabled.
970def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
971                               (ins i64imm:$stacksize, i64imm:$fixed_offset,
972                                i64imm:$scalable_offset),
973                               []>,
974                               Sched<[]>;
975
976// Probed stack allocation of a variable size, used in function prologues when
977// stack-clash protection is enabled.
978def PROBED_STACKALLOC_VAR : Pseudo<(outs),
979                                   (ins GPR64sp:$target),
980                                   []>,
981                                   Sched<[]>;
982
983// Probed stack allocations of a variable size, used for allocas of unknown size
984// when stack-clash protection is enabled.
985let usesCustomInserter = 1 in
986def PROBED_STACKALLOC_DYN : Pseudo<(outs),
987                                   (ins GPR64common:$target),
988                                   [(AArch64probedalloca GPR64common:$target)]>,
989                                   Sched<[]>;
990
991} // Defs = [SP, NZCV], Uses = [SP] in
992} // hasSideEffects = 1, isCodeGenOnly = 1
993
994let isReMaterializable = 1, isCodeGenOnly = 1 in {
995// FIXME: The following pseudo instructions are only needed because remat
996// cannot handle multiple instructions.  When that changes, they can be
997// removed, along with the AArch64Wrapper node.
998
999let AddedComplexity = 10 in
1000def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
1001                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
1002              Sched<[WriteLDAdr]>;
1003
1004// The MOVaddr instruction should match only when the add is not folded
1005// into a load or store address.
1006def MOVaddr
1007    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1008             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
1009                                            tglobaladdr:$low))]>,
1010      Sched<[WriteAdrAdr]>;
1011def MOVaddrJT
1012    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1013             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
1014                                             tjumptable:$low))]>,
1015      Sched<[WriteAdrAdr]>;
1016def MOVaddrCP
1017    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1018             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
1019                                             tconstpool:$low))]>,
1020      Sched<[WriteAdrAdr]>;
1021def MOVaddrBA
1022    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1023             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
1024                                             tblockaddress:$low))]>,
1025      Sched<[WriteAdrAdr]>;
1026def MOVaddrTLS
1027    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1028             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
1029                                            tglobaltlsaddr:$low))]>,
1030      Sched<[WriteAdrAdr]>;
1031def MOVaddrEXT
1032    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1033             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
1034                                            texternalsym:$low))]>,
1035      Sched<[WriteAdrAdr]>;
1036// Normally AArch64addlow either gets folded into a following ldr/str,
1037// or together with an adrp into MOVaddr above. For cases with TLS, it
1038// might appear without either of them, so allow lowering it into a plain
1039// add.
1040def ADDlowTLS
1041    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
1042             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
1043                                            tglobaltlsaddr:$low))]>,
1044      Sched<[WriteAdr]>;
1045
1046} // isReMaterializable, isCodeGenOnly
1047
1048def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
1049          (LOADgot tglobaltlsaddr:$addr)>;
1050
1051def : Pat<(AArch64LOADgot texternalsym:$addr),
1052          (LOADgot texternalsym:$addr)>;
1053
1054def : Pat<(AArch64LOADgot tconstpool:$addr),
1055          (LOADgot tconstpool:$addr)>;
1056
1057// In general these get lowered into a sequence of three 4-byte instructions.
1058// 32-bit jump table destination is actually only 2 instructions since we can
1059// use the table itself as a PC-relative base. But optimization occurs after
1060// branch relaxation so be pessimistic.
1061let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
1062    isNotDuplicable = 1 in {
1063def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1064                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1065                      Sched<[]>;
1066def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1067                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1068                      Sched<[]>;
1069def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1070                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1071                     Sched<[]>;
1072}
1073
1074// Space-consuming pseudo to aid testing of placement and reachability
1075// algorithms. Immediate operand is the number of bytes this "instruction"
1076// occupies; register operands can be used to enforce dependency and constrain
1077// the scheduler.
1078let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
1079def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
1080                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
1081            Sched<[]>;
1082
1083let hasSideEffects = 1, isCodeGenOnly = 1 in {
1084  def SpeculationSafeValueX
1085      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
1086  def SpeculationSafeValueW
1087      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
1088}
1089
1090// SpeculationBarrierEndBB must only be used after an unconditional control
1091// flow, i.e. after a terminator for which isBarrier is True.
1092let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
1093  // This gets lowered to a pair of 4-byte instructions.
1094  let Size = 8 in
1095  def SpeculationBarrierISBDSBEndBB
1096      : Pseudo<(outs), (ins), []>, Sched<[]>;
1097  // This gets lowered to a 4-byte instruction.
1098  let Size = 4 in
1099  def SpeculationBarrierSBEndBB
1100      : Pseudo<(outs), (ins), []>, Sched<[]>;
1101}
1102
1103//===----------------------------------------------------------------------===//
1104// System instructions.
1105//===----------------------------------------------------------------------===//
1106
1107def HINT : HintI<"hint">;
1108def : InstAlias<"nop",  (HINT 0b000)>;
1109def : InstAlias<"yield",(HINT 0b001)>;
1110def : InstAlias<"wfe",  (HINT 0b010)>;
1111def : InstAlias<"wfi",  (HINT 0b011)>;
1112def : InstAlias<"sev",  (HINT 0b100)>;
1113def : InstAlias<"sevl", (HINT 0b101)>;
1114def : InstAlias<"dgh",  (HINT 0b110)>;
1115def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
1116def : InstAlias<"csdb", (HINT 20)>;
1117// In order to be able to write readable assembly, LLVM should accept assembly
1118// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1119// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1120// should not emit these mnemonics unless BTI is enabled.
1121def : InstAlias<"bti",  (HINT 32), 0>;
1122def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1123def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
1124def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1125
1126// v8.2a Statistical Profiling extension
1127def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1128
1129// As far as LLVM is concerned this writes to the system's exclusive monitors.
1130let mayLoad = 1, mayStore = 1 in
1131def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1132
1133// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1134// model patterns with sufficiently fine granularity.
1135let mayLoad = ?, mayStore = ? in {
1136def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
1137                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1138
1139def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
1140                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1141
1142def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
1143                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1144
1145def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1146  let CRm        = 0b0010;
1147  let Inst{12}   = 0;
1148  let Predicates = [HasTRACEV8_4];
1149}
1150
1151def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1152  let CRm{1-0}   = 0b11;
1153  let Inst{9-8}  = 0b10;
1154  let Predicates = [HasXS];
1155}
1156
1157let Predicates = [HasWFxT] in {
1158def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1159def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1160}
1161
1162// Branch Record Buffer two-word mnemonic instructions
1163class BRBEI<bits<3> op2, string keyword>
1164    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1165  let Inst{31-8} = 0b110101010000100101110010;
1166  let Inst{7-5} = op2;
1167  let Predicates = [HasBRBE];
1168}
1169def BRB_IALL: BRBEI<0b100, "\tiall">;
1170def BRB_INJ:  BRBEI<0b101, "\tinj">;
1171
1172}
1173
1174// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1175def : TokenAlias<"INJ", "inj">;
1176def : TokenAlias<"IALL", "iall">;
1177
1178
1179// ARMv9.4-A Guarded Control Stack
1180class GCSNoOp<bits<3> op2, string mnemonic>
1181    : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1182  let Inst{20-8} = 0b0100001110111;
1183  let Inst{7-5} = op2;
1184  let Predicates = [HasGCS];
1185}
1186def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1187def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1188def GCSPOPX  : GCSNoOp<0b110, "gcspopx">;
1189
1190class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1191            list<dag> pattern = []>
1192    : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1193  let Inst{20-19} = 0b01;
1194  let Inst{18-16} = op1;
1195  let Inst{15-8} = 0b01110111;
1196  let Inst{7-5} = op2;
1197  let Predicates = [HasGCS];
1198}
1199
1200def GCSSS1   : GCSRtIn<0b011, 0b010, "gcsss1">;
1201def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1202
1203class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1204            list<dag> pattern = []>
1205    : RtSystemI<1, (outs GPR64:$Rt), (ins), mnemonic, "\t$Rt", pattern> {
1206  let Inst{20-19} = 0b01;
1207  let Inst{18-16} = op1;
1208  let Inst{15-8} = 0b01110111;
1209  let Inst{7-5} = op2;
1210  let Predicates = [HasGCS];
1211}
1212
1213def GCSSS2  : GCSRtOut<0b011, 0b011, "gcsss2">;
1214def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm">;
1215def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1216
1217def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1218def GCSB_DSYNC         : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1219
1220def : TokenAlias<"DSYNC", "dsync">;
1221
1222let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1223  def CHKFEAT   : SystemNoOperands<0b000, "hint\t#40">;
1224}
1225def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1226def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1227
1228class GCSSt<string mnemonic, bits<3> op>
1229    : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, $Rn", "", []>, Sched<[]> {
1230  bits<5> Rt;
1231  bits<5> Rn;
1232  let Inst{31-15} = 0b11011001000111110;
1233  let Inst{14-12} = op;
1234  let Inst{11-10} = 0b11;
1235  let Inst{9-5} = Rn;
1236  let Inst{4-0} = Rt;
1237  let Predicates = [HasGCS];
1238}
1239def GCSSTR  : GCSSt<"gcsstr",  0b000>;
1240def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1241
1242
1243// ARMv8.2-A Dot Product
1244let Predicates = [HasDotProd] in {
1245defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1246defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1247defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1248defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1249}
1250
1251// ARMv8.6-A BFloat
1252let Predicates = [HasNEON, HasBF16] in {
1253defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
1254defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1255def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1256def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1257def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1258def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1259def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1260def BFCVTN       : SIMD_BFCVTN;
1261def BFCVTN2      : SIMD_BFCVTN2;
1262
1263// Vector-scalar BFDOT:
1264// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1265// register (the instruction uses a single 32-bit lane from it), so the pattern
1266// is a bit tricky.
1267def : Pat<(v2f32 (int_aarch64_neon_bfdot
1268                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1269                    (v4bf16 (bitconvert
1270                      (v2i32 (AArch64duplane32
1271                        (v4i32 (bitconvert
1272                          (v8bf16 (insert_subvector undef,
1273                            (v4bf16 V64:$Rm),
1274                            (i64 0))))),
1275                        VectorIndexS:$idx)))))),
1276          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1277                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1278                             VectorIndexS:$idx)>;
1279}
1280
1281let Predicates = [HasNEONorSME, HasBF16] in {
1282def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1283}
1284
1285// ARMv8.6A AArch64 matrix multiplication
1286let Predicates = [HasMatMulInt8] in {
1287def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1288def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1289def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1290defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1291defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1292
1293// sudot lane has a pattern where usdot is expected (there is no sudot).
1294// The second operand is used in the dup operation to repeat the indexed
1295// element.
1296class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1297                         string rhs_kind, RegisterOperand RegType,
1298                         ValueType AccumType, ValueType InputType>
1299      : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind,
1300                                        lhs_kind, rhs_kind, RegType, AccumType,
1301                                        InputType, null_frag> {
1302  let Pattern = [(set (AccumType RegType:$dst),
1303                      (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
1304                                 (InputType (bitconvert (AccumType
1305                                    (AArch64duplane32 (v4i32 V128:$Rm),
1306                                        VectorIndexS:$idx)))),
1307                                 (InputType RegType:$Rn))))];
1308}
1309
1310multiclass SIMDSUDOTIndex {
1311  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1312  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1313}
1314
1315defm SUDOTlane : SIMDSUDOTIndex;
1316
1317}
1318
1319// ARMv8.2-A FP16 Fused Multiply-Add Long
1320let Predicates = [HasNEON, HasFP16FML] in {
1321defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1322defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1323defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1324defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1325defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1326defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1327defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1328defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1329}
1330
1331// Armv8.2-A Crypto extensions
1332let Predicates = [HasSHA3] in {
1333def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
1334def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1335def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1336def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1337def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
1338def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
1339def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
1340def XAR       : CryptoRRRi6<"xar">;
1341
1342class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1343  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1344        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1345
1346def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1347          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1348
1349def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1350def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1351def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1352
1353def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1354def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1355def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1356def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1357
1358class EOR3_pattern<ValueType VecTy>
1359  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1360        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1361
1362def : EOR3_pattern<v16i8>;
1363def : EOR3_pattern<v8i16>;
1364def : EOR3_pattern<v4i32>;
1365def : EOR3_pattern<v2i64>;
1366
1367class BCAX_pattern<ValueType VecTy>
1368  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1369        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1370
1371def : BCAX_pattern<v16i8>;
1372def : BCAX_pattern<v8i16>;
1373def : BCAX_pattern<v4i32>;
1374def : BCAX_pattern<v2i64>;
1375
1376def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1377def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1378def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1379def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1380
1381def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1382def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1383def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1384def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1385
1386def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1387def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1388def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1389def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1390
1391def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1392          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1393
1394def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1395          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1396
1397def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1398          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1399
1400} // HasSHA3
1401
1402let Predicates = [HasSM4] in {
1403def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1404def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1405def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1406def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1407def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1408def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1409def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1410def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1411def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1412
1413def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1414          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1415
1416class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1417  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1418        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1419
1420class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1421  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1422        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1423
1424class SM4_pattern<Instruction INST, Intrinsic OpNode>
1425  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1426        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1427
1428def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1429def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1430
1431def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1432def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1433def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1434def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1435
1436def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1437def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1438} // HasSM4
1439
1440let Predicates = [HasRCPC] in {
1441  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1442  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1443  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1444  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1445  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1446}
1447
1448// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1449// inside the multiclass as the FP16 versions need different predicates.
1450defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1451                                               "fcmla", null_frag>;
1452defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1453                                           "fcadd", null_frag>;
1454defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1455
1456let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1457  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1458            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1459  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1460            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1461  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1462            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1463  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1464            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1465}
1466
1467let Predicates = [HasComplxNum, HasNEON] in {
1468  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1469            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1470  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1471            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1472  foreach Ty = [v4f32, v2f64] in {
1473    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1474              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1475    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1476              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1477  }
1478}
1479
1480multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1481  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1482            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1483  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1484            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1485  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1486            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1487  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1488            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1489}
1490
1491multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1492  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1493            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1494  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1495            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1496  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1497            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1498  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1499            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1500}
1501
1502
1503let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1504  defm : FCMLA_PATS<v4f16, V64>;
1505  defm : FCMLA_PATS<v8f16, V128>;
1506
1507  defm : FCMLA_LANE_PATS<v4f16, V64,
1508                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1509  defm : FCMLA_LANE_PATS<v8f16, V128,
1510                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1511}
1512let Predicates = [HasComplxNum, HasNEON] in {
1513  defm : FCMLA_PATS<v2f32, V64>;
1514  defm : FCMLA_PATS<v4f32, V128>;
1515  defm : FCMLA_PATS<v2f64, V128>;
1516
1517  defm : FCMLA_LANE_PATS<v4f32, V128,
1518                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1519}
1520
1521// v8.3a Pointer Authentication
1522// These instructions inhabit part of the hint space and so can be used for
1523// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1524// important for compatibility with other assemblers (e.g. GAS) when building
1525// software compatible with both CPUs that do or don't implement PA.
1526let Uses = [LR], Defs = [LR] in {
1527  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1528  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1529  let isAuthenticated = 1 in {
1530    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1531    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1532  }
1533}
1534let Uses = [LR, SP], Defs = [LR] in {
1535  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1536  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1537  let isAuthenticated = 1 in {
1538    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1539    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1540  }
1541}
1542let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1543  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1544  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1545  let isAuthenticated = 1 in {
1546    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1547    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1548  }
1549}
1550
1551let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1552  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1553}
1554
1555// In order to be able to write readable assembly, LLVM should accept assembly
1556// inputs that use pointer authentication mnemonics, even with PA disabled.
1557// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1558// should not emit these mnemonics unless PA is enabled.
1559def : InstAlias<"paciaz", (PACIAZ), 0>;
1560def : InstAlias<"pacibz", (PACIBZ), 0>;
1561def : InstAlias<"autiaz", (AUTIAZ), 0>;
1562def : InstAlias<"autibz", (AUTIBZ), 0>;
1563def : InstAlias<"paciasp", (PACIASP), 0>;
1564def : InstAlias<"pacibsp", (PACIBSP), 0>;
1565def : InstAlias<"autiasp", (AUTIASP), 0>;
1566def : InstAlias<"autibsp", (AUTIBSP), 0>;
1567def : InstAlias<"pacia1716", (PACIA1716), 0>;
1568def : InstAlias<"pacib1716", (PACIB1716), 0>;
1569def : InstAlias<"autia1716", (AUTIA1716), 0>;
1570def : InstAlias<"autib1716", (AUTIB1716), 0>;
1571def : InstAlias<"xpaclri", (XPACLRI), 0>;
1572
1573// Pseudos
1574
1575let Uses = [LR, SP], Defs = [LR] in {
1576// Insertion point of LR signing code.
1577def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1578// Insertion point of LR authentication code.
1579// The RET terminator of the containing machine basic block may be replaced
1580// with a combined RETA(A|B) instruction when rewriting this Pseudo.
1581def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1582}
1583
1584// These pointer authentication instructions require armv8.3a
1585let Predicates = [HasPAuth] in {
1586
1587  // When PA is enabled, a better mnemonic should be emitted.
1588  def : InstAlias<"paciaz", (PACIAZ), 1>;
1589  def : InstAlias<"pacibz", (PACIBZ), 1>;
1590  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1591  def : InstAlias<"autibz", (AUTIBZ), 1>;
1592  def : InstAlias<"paciasp", (PACIASP), 1>;
1593  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1594  def : InstAlias<"autiasp", (AUTIASP), 1>;
1595  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1596  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1597  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1598  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1599  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1600  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1601
1602  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1603                      SDPatternOperator op> {
1604    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm,  "ia"), op>;
1605    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm,  "ib"), op>;
1606    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm,  "da"), op>;
1607    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm,  "db"), op>;
1608    def IZA  : SignAuthZero<prefix_z,  0b00, !strconcat(asm, "iza"), op>;
1609    def DZA  : SignAuthZero<prefix_z,  0b10, !strconcat(asm, "dza"), op>;
1610    def IZB  : SignAuthZero<prefix_z,  0b01, !strconcat(asm, "izb"), op>;
1611    def DZB  : SignAuthZero<prefix_z,  0b11, !strconcat(asm, "dzb"), op>;
1612  }
1613
1614  defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1615  defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1616
1617  def XPACI : ClearAuth<0, "xpaci">;
1618  def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1619  def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1620
1621  def XPACD : ClearAuth<1, "xpacd">;
1622  def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1623  def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1624
1625  def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1626
1627  // Combined Instructions
1628  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1629    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1630    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1631  }
1632  let isCall = 1, Defs = [LR], Uses = [SP] in {
1633    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1634    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1635  }
1636
1637  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1638    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1639    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1640  }
1641  let isCall = 1, Defs = [LR], Uses = [SP] in {
1642    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1643    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1644  }
1645
1646  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1647    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1648    def RETAB   : AuthReturn<0b010, 1, "retab">;
1649    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1650    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1651  }
1652
1653  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1654  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1655
1656}
1657
1658// v9.5-A pointer authentication extensions
1659
1660// Always accept "pacm" as an alias for "hint #39", but don't emit it when
1661// disassembling if we don't have the pauth-lr feature.
1662let CRm = 0b0100 in {
1663  def PACM : SystemNoOperands<0b111, "hint\t#39">;
1664}
1665def : InstAlias<"pacm", (PACM), 0>;
1666
1667let Predicates = [HasPAuthLR] in {
1668  let Defs = [LR], Uses = [LR, SP] in {
1669    //                                opcode2, opcode,   asm
1670    def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">;
1671    def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">;
1672    def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">;
1673    def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">;
1674    //                             opc,  asm
1675    def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">;
1676    def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">;
1677    //                              opcode2, opcode,   asm
1678    def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppc">;
1679    def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppc">;
1680    //                                  opcode2, opcode,   asm
1681    def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">;
1682    def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">;
1683    def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">;
1684    def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">;
1685  }
1686
1687  let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1688    //                                   opc,   op2,     asm
1689    def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">;
1690    def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">;
1691    //                                 op3,      asm
1692    def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppc">;
1693    def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppc">;
1694  }
1695  def : InstAlias<"pacm", (PACM), 1>;
1696}
1697
1698
1699// v8.3a floating point conversion for javascript
1700let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1701def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1702                                      "fjcvtzs",
1703                                      [(set GPR32:$Rd,
1704                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1705  let Inst{31} = 0;
1706} // HasJS, HasFPARMv8
1707
1708// v8.4 Flag manipulation instructions
1709let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1710def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1711  let Inst{20-5} = 0b0000001000000000;
1712}
1713def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1714def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1715def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1716                        "{\t$Rn, $imm, $mask}">;
1717} // HasFlagM
1718
1719// v8.5 flag manipulation instructions
1720let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1721
1722def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1723  let Inst{18-16} = 0b000;
1724  let Inst{11-8} = 0b0000;
1725  let Unpredictable{11-8} = 0b1111;
1726  let Inst{7-5} = 0b001;
1727}
1728
1729def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1730  let Inst{18-16} = 0b000;
1731  let Inst{11-8} = 0b0000;
1732  let Unpredictable{11-8} = 0b1111;
1733  let Inst{7-5} = 0b010;
1734}
1735} // HasAltNZCV
1736
1737
1738// Armv8.5-A speculation barrier
1739def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1740  let Inst{20-5} = 0b0001100110000111;
1741  let Unpredictable{11-8} = 0b1111;
1742  let Predicates = [HasSB];
1743  let hasSideEffects = 1;
1744}
1745
1746def : InstAlias<"clrex", (CLREX 0xf)>;
1747def : InstAlias<"isb", (ISB 0xf)>;
1748def : InstAlias<"ssbb", (DSB 0)>;
1749def : InstAlias<"pssbb", (DSB 4)>;
1750def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
1751
1752def MRS    : MRSI;
1753def MSR    : MSRI;
1754def MSRpstateImm1 : MSRpstateImm0_1;
1755def MSRpstateImm4 : MSRpstateImm0_15;
1756
1757def : Pat<(AArch64mrs imm:$id),
1758          (MRS imm:$id)>;
1759
1760// The thread pointer (on Linux, at least, where this has been implemented) is
1761// TPIDR_EL0.
1762def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
1763                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
1764
1765// This gets lowered into a 24-byte instruction sequence
1766let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
1767def KCFI_CHECK : Pseudo<
1768  (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
1769}
1770
1771let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
1772def HWASAN_CHECK_MEMACCESS : Pseudo<
1773  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1774  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1775  Sched<[]>;
1776}
1777
1778let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
1779def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
1780  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
1781  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
1782  Sched<[]>;
1783}
1784
1785// The virtual cycle counter register is CNTVCT_EL0.
1786def : Pat<(readcyclecounter), (MRS 0xdf02)>;
1787
1788// FPCR register
1789let Uses = [FPCR] in
1790def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
1791                      [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
1792               PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
1793               Sched<[WriteSys]>;
1794let Defs = [FPCR] in
1795def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
1796                      [(int_aarch64_set_fpcr i64:$val)]>,
1797               PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
1798               Sched<[WriteSys]>;
1799
1800// Generic system instructions
1801def SYSxt  : SystemXtI<0, "sys">;
1802def SYSLxt : SystemLXtI<1, "sysl">;
1803
1804def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
1805                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
1806                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
1807
1808
1809let Predicates = [HasTME] in {
1810
1811def TSTART : TMSystemI<0b0000, "tstart",
1812                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
1813
1814def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
1815
1816def TCANCEL : TMSystemException<0b011, "tcancel",
1817                                [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
1818
1819def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
1820  let mayLoad = 0;
1821  let mayStore = 0;
1822}
1823} // HasTME
1824
1825//===----------------------------------------------------------------------===//
1826// Move immediate instructions.
1827//===----------------------------------------------------------------------===//
1828
1829defm MOVK : InsertImmediate<0b11, "movk">;
1830defm MOVN : MoveImmediate<0b00, "movn">;
1831
1832let PostEncoderMethod = "fixMOVZ" in
1833defm MOVZ : MoveImmediate<0b10, "movz">;
1834
1835// First group of aliases covers an implicit "lsl #0".
1836def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
1837def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
1838def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1839def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1840def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
1841def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
1842
1843// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
1844def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1845def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1846def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1847def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1848
1849def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
1850def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
1851def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
1852def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
1853
1854def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
1855def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
1856def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
1857def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
1858
1859def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1860def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1861
1862def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
1863def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
1864
1865def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
1866def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
1867
1868// Final group of aliases covers true "mov $Rd, $imm" cases.
1869multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
1870                          int width, int shift> {
1871  def _asmoperand : AsmOperandClass {
1872    let Name = basename # width # "_lsl" # shift # "MovAlias";
1873    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
1874                               # shift # ">";
1875    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
1876  }
1877
1878  def _movimm : Operand<i32> {
1879    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
1880  }
1881
1882  def : InstAlias<"mov $Rd, $imm",
1883                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
1884}
1885
1886defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
1887defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
1888
1889defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
1890defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
1891defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
1892defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
1893
1894defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
1895defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
1896
1897defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
1898defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
1899defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
1900defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
1901
1902let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
1903    isAsCheapAsAMove = 1 in {
1904// FIXME: The following pseudo instructions are only needed because remat
1905// cannot handle multiple instructions.  When that changes, we can select
1906// directly to the real instructions and get rid of these pseudos.
1907
1908def MOVi32imm
1909    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
1910             [(set GPR32:$dst, imm:$src)]>,
1911      Sched<[WriteImm]>;
1912def MOVi64imm
1913    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
1914             [(set GPR64:$dst, imm:$src)]>,
1915      Sched<[WriteImm]>;
1916} // isReMaterializable, isCodeGenOnly
1917
1918// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
1919// eventual expansion code fewer bits to worry about getting right. Marshalling
1920// the types is a little tricky though:
1921def i64imm_32bit : ImmLeaf<i64, [{
1922  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
1923}]>;
1924
1925def s64imm_32bit : ImmLeaf<i64, [{
1926  int64_t Imm64 = static_cast<int64_t>(Imm);
1927  return Imm64 >= std::numeric_limits<int32_t>::min() &&
1928         Imm64 <= std::numeric_limits<int32_t>::max();
1929}]>;
1930
1931def trunc_imm : SDNodeXForm<imm, [{
1932  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
1933}]>;
1934
1935def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
1936  GISDNodeXFormEquiv<trunc_imm>;
1937
1938let Predicates = [OptimizedGISelOrOtherSelector] in {
1939// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
1940// copies.
1941def : Pat<(i64 i64imm_32bit:$src),
1942          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
1943}
1944
1945// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
1946def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
1947return CurDAG->getTargetConstant(
1948  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
1949}]>;
1950
1951def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
1952return CurDAG->getTargetConstant(
1953  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
1954}]>;
1955
1956
1957def : Pat<(f32 fpimm:$in),
1958  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
1959def : Pat<(f64 fpimm:$in),
1960  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
1961
1962
1963// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
1964// sequences.
1965def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
1966                             tglobaladdr:$g1, tglobaladdr:$g0),
1967          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
1968                                  tglobaladdr:$g1, 16),
1969                          tglobaladdr:$g2, 32),
1970                  tglobaladdr:$g3, 48)>;
1971
1972def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
1973                             tblockaddress:$g1, tblockaddress:$g0),
1974          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
1975                                  tblockaddress:$g1, 16),
1976                          tblockaddress:$g2, 32),
1977                  tblockaddress:$g3, 48)>;
1978
1979def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
1980                             tconstpool:$g1, tconstpool:$g0),
1981          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
1982                                  tconstpool:$g1, 16),
1983                          tconstpool:$g2, 32),
1984                  tconstpool:$g3, 48)>;
1985
1986def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
1987                             tjumptable:$g1, tjumptable:$g0),
1988          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
1989                                  tjumptable:$g1, 16),
1990                          tjumptable:$g2, 32),
1991                  tjumptable:$g3, 48)>;
1992
1993
1994//===----------------------------------------------------------------------===//
1995// Arithmetic instructions.
1996//===----------------------------------------------------------------------===//
1997
1998// Add/subtract with carry.
1999defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
2000defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
2001
2002def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
2003def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
2004def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
2005def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
2006
2007// Add/subtract
2008defm ADD : AddSub<0, "add", "sub", add>;
2009defm SUB : AddSub<1, "sub", "add">;
2010
2011def : InstAlias<"mov $dst, $src",
2012                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
2013def : InstAlias<"mov $dst, $src",
2014                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
2015def : InstAlias<"mov $dst, $src",
2016                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
2017def : InstAlias<"mov $dst, $src",
2018                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
2019
2020defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
2021defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
2022
2023def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
2024  return N->getOpcode() == ISD::CopyFromReg &&
2025         cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2026}]>;
2027
2028// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2029def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
2030          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
2031def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
2032          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
2033def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
2034          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
2035def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
2036          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
2037def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
2038          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
2039def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
2040          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
2041let AddedComplexity = 1 in {
2042def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
2043          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
2044def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
2045          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
2046def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
2047          (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
2048}
2049
2050// Because of the immediate format for add/sub-imm instructions, the
2051// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2052//  These patterns capture that transformation.
2053let AddedComplexity = 1 in {
2054def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2055          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2056def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2057          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2058def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2059          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2060def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2061          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2062}
2063
2064// Because of the immediate format for add/sub-imm instructions, the
2065// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2066//  These patterns capture that transformation.
2067let AddedComplexity = 1 in {
2068def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2069          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2070def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2071          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2072def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2073          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2074def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2075          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2076}
2077
2078def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2079def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2080def : InstAlias<"neg $dst, $src$shift",
2081                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2082def : InstAlias<"neg $dst, $src$shift",
2083                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2084
2085def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2086def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2087def : InstAlias<"negs $dst, $src$shift",
2088                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2089def : InstAlias<"negs $dst, $src$shift",
2090                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2091
2092
2093// Unsigned/Signed divide
2094defm UDIV : Div<0, "udiv", udiv>;
2095defm SDIV : Div<1, "sdiv", sdiv>;
2096
2097def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
2098def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
2099def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
2100def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
2101
2102// Variable shift
2103defm ASRV : Shift<0b10, "asr", sra>;
2104defm LSLV : Shift<0b00, "lsl", shl>;
2105defm LSRV : Shift<0b01, "lsr", srl>;
2106defm RORV : Shift<0b11, "ror", rotr>;
2107
2108def : ShiftAlias<"asrv", ASRVWr, GPR32>;
2109def : ShiftAlias<"asrv", ASRVXr, GPR64>;
2110def : ShiftAlias<"lslv", LSLVWr, GPR32>;
2111def : ShiftAlias<"lslv", LSLVXr, GPR64>;
2112def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
2113def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
2114def : ShiftAlias<"rorv", RORVWr, GPR32>;
2115def : ShiftAlias<"rorv", RORVXr, GPR64>;
2116
2117// Multiply-add
2118let AddedComplexity = 5 in {
2119defm MADD : MulAccum<0, "madd">;
2120defm MSUB : MulAccum<1, "msub">;
2121
2122def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
2123          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2124def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
2125          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2126
2127def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
2128          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2129def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
2130          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2131def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
2132          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2133def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
2134          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2135} // AddedComplexity = 5
2136
2137let AddedComplexity = 5 in {
2138def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
2139def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
2140def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
2141def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
2142
2143def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
2144          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2145def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
2146          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2147def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
2148          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2149def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
2150          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2151def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
2152          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2153def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2154          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2155
2156def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2157          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2158def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2159          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2160
2161def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2162          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2163def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2164          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2165def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2166          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2167                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2168
2169def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2170          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2171def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2172          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2173def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2174          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2175                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2176
2177def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2178          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2179def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2180          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2181def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2182                    GPR64:$Ra)),
2183          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2184                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2185
2186def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2187          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2188def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2189          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2190def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2191                                    (s64imm_32bit:$C)))),
2192          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2193                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2194
2195def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2196          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2197def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2198          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2199
2200def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2201          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2202def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2203          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2204
2205def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2206          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2207def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2208          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2209
2210def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2211          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2212def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2213          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2214
2215def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2216          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2217def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2218          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2219
2220def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2221          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2222def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2223          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2224
2225def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2226          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2227def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2228          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2229
2230def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2231          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2232def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2233          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2234} // AddedComplexity = 5
2235
2236def : MulAccumWAlias<"mul", MADDWrrr>;
2237def : MulAccumXAlias<"mul", MADDXrrr>;
2238def : MulAccumWAlias<"mneg", MSUBWrrr>;
2239def : MulAccumXAlias<"mneg", MSUBXrrr>;
2240def : WideMulAccumAlias<"smull", SMADDLrrr>;
2241def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2242def : WideMulAccumAlias<"umull", UMADDLrrr>;
2243def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2244
2245// Multiply-high
2246def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2247def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2248
2249// CRC32
2250def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2251def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2252def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2253def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2254
2255def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2256def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2257def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2258def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2259
2260// v8.1 atomic CAS
2261defm CAS   : CompareAndSwap<0, 0, "">;
2262defm CASA  : CompareAndSwap<1, 0, "a">;
2263defm CASL  : CompareAndSwap<0, 1, "l">;
2264defm CASAL : CompareAndSwap<1, 1, "al">;
2265
2266// v8.1 atomic CASP
2267defm CASP   : CompareAndSwapPair<0, 0, "">;
2268defm CASPA  : CompareAndSwapPair<1, 0, "a">;
2269defm CASPL  : CompareAndSwapPair<0, 1, "l">;
2270defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2271
2272// v8.1 atomic SWP
2273defm SWP   : Swap<0, 0, "">;
2274defm SWPA  : Swap<1, 0, "a">;
2275defm SWPL  : Swap<0, 1, "l">;
2276defm SWPAL : Swap<1, 1, "al">;
2277
2278// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2279defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
2280defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
2281defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
2282defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2283
2284defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
2285defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
2286defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
2287defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2288
2289defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
2290defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
2291defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
2292defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2293
2294defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
2295defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
2296defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
2297defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2298
2299defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
2300defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
2301defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
2302defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2303
2304defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
2305defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
2306defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
2307defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2308
2309defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
2310defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
2311defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
2312defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2313
2314defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
2315defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
2316defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
2317defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2318
2319// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2320defm : STOPregister<"stadd","LDADD">; // STADDx
2321defm : STOPregister<"stclr","LDCLR">; // STCLRx
2322defm : STOPregister<"steor","LDEOR">; // STEORx
2323defm : STOPregister<"stset","LDSET">; // STSETx
2324defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2325defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2326defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2327defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2328
2329// v8.5 Memory Tagging Extension
2330let Predicates = [HasMTE] in {
2331
2332def IRG   : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2333                                 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2334
2335def GMI   : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2336                                 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2337  let isNotDuplicable = 1;
2338}
2339def ADDG  : AddSubG<0, "addg", null_frag>;
2340def SUBG  : AddSubG<1, "subg", null_frag>;
2341
2342def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2343
2344def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2345def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2346  let Defs = [NZCV];
2347}
2348
2349def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2350
2351def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2352
2353def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2354          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2355def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
2356          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2357
2358def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2359
2360def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2361                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2362def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2363                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2364def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2365                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2366  let Inst{23} = 0;
2367}
2368
2369defm STG   : MemTagStore<0b00, "stg">;
2370defm STZG  : MemTagStore<0b01, "stzg">;
2371defm ST2G  : MemTagStore<0b10, "st2g">;
2372defm STZ2G : MemTagStore<0b11, "stz2g">;
2373
2374def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2375          (STGi $Rn, $Rm, $imm)>;
2376def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2377          (STZGi $Rn, $Rm, $imm)>;
2378def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2379          (ST2Gi $Rn, $Rm, $imm)>;
2380def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2381          (STZ2Gi $Rn, $Rm, $imm)>;
2382
2383defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2384def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2385def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2386
2387def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2388          (STGi GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2389
2390def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2391          (STGPi $Rt, $Rt2, $Rn, $imm)>;
2392
2393def IRGstack
2394    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2395      Sched<[]>;
2396def TAGPstack
2397    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2398      Sched<[]>;
2399
2400// Explicit SP in the first operand prevents ShrinkWrap optimization
2401// from leaving this instruction out of the stack frame. When IRGstack
2402// is transformed into IRG, this operand is replaced with the actual
2403// register / expression for the tagged base pointer of the current function.
2404def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2405
2406// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2407// $Rn_wback is one past the end of the range. $Rm is the loop counter.
2408let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2409def STGloop_wback
2410    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2411             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2412      Sched<[WriteAdr, WriteST]>;
2413
2414def STZGloop_wback
2415    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2416             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2417      Sched<[WriteAdr, WriteST]>;
2418
2419// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2420// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2421def STGloop
2422    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2423             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2424      Sched<[WriteAdr, WriteST]>;
2425
2426def STZGloop
2427    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2428             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2429      Sched<[WriteAdr, WriteST]>;
2430}
2431
2432} // Predicates = [HasMTE]
2433
2434//===----------------------------------------------------------------------===//
2435// Logical instructions.
2436//===----------------------------------------------------------------------===//
2437
2438// (immediate)
2439defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2440defm AND  : LogicalImm<0b00, "and", and, "bic">;
2441defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
2442defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
2443
2444// FIXME: these aliases *are* canonical sometimes (when movz can't be
2445// used). Actually, it seems to be working right now, but putting logical_immXX
2446// here is a bit dodgy on the AsmParser side too.
2447def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2448                                          logical_imm32:$imm), 0>;
2449def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2450                                          logical_imm64:$imm), 0>;
2451
2452
2453// (register)
2454defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2455defm BICS : LogicalRegS<0b11, 1, "bics",
2456                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2457defm AND  : LogicalReg<0b00, 0, "and", and>;
2458defm BIC  : LogicalReg<0b00, 1, "bic",
2459                       BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2460defm EON  : LogicalReg<0b10, 1, "eon",
2461                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2462defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
2463defm ORN  : LogicalReg<0b01, 1, "orn",
2464                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2465defm ORR  : LogicalReg<0b01, 0, "orr", or>;
2466
2467def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2468def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2469
2470def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2471def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2472
2473def : InstAlias<"mvn $Wd, $Wm$sh",
2474                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2475def : InstAlias<"mvn $Xd, $Xm$sh",
2476                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2477
2478def : InstAlias<"tst $src1, $src2",
2479                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2480def : InstAlias<"tst $src1, $src2",
2481                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2482
2483def : InstAlias<"tst $src1, $src2",
2484                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2485def : InstAlias<"tst $src1, $src2",
2486                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2487
2488def : InstAlias<"tst $src1, $src2$sh",
2489               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2490def : InstAlias<"tst $src1, $src2$sh",
2491               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2492
2493
2494def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2495def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2496
2497
2498//===----------------------------------------------------------------------===//
2499// One operand data processing instructions.
2500//===----------------------------------------------------------------------===//
2501
2502defm CLS    : OneOperandData<0b000101, "cls">;
2503defm CLZ    : OneOperandData<0b000100, "clz", ctlz>;
2504defm RBIT   : OneOperandData<0b000000, "rbit", bitreverse>;
2505
2506def  REV16Wr : OneWRegData<0b000001, "rev16",
2507                                     UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2508def  REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2509
2510def : Pat<(cttz GPR32:$Rn),
2511          (CLZWr (RBITWr GPR32:$Rn))>;
2512def : Pat<(cttz GPR64:$Rn),
2513          (CLZXr (RBITXr GPR64:$Rn))>;
2514def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2515                (i32 1))),
2516          (CLSWr GPR32:$Rn)>;
2517def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2518                (i64 1))),
2519          (CLSXr GPR64:$Rn)>;
2520def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2521def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2522
2523// Unlike the other one operand instructions, the instructions with the "rev"
2524// mnemonic do *not* just different in the size bit, but actually use different
2525// opcode bits for the different sizes.
2526def REVWr   : OneWRegData<0b000010, "rev", bswap>;
2527def REVXr   : OneXRegData<0b000011, "rev", bswap>;
2528def REV32Xr : OneXRegData<0b000010, "rev32",
2529                                    UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2530
2531def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2532
2533// The bswap commutes with the rotr so we want a pattern for both possible
2534// orders.
2535def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2536def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2537
2538// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2539def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
2540def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
2541
2542def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
2543              (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
2544          (REV16Xr GPR64:$Rn)>;
2545
2546//===----------------------------------------------------------------------===//
2547// Bitfield immediate extraction instruction.
2548//===----------------------------------------------------------------------===//
2549let hasSideEffects = 0 in
2550defm EXTR : ExtractImm<"extr">;
2551def : InstAlias<"ror $dst, $src, $shift",
2552            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2553def : InstAlias<"ror $dst, $src, $shift",
2554            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2555
2556def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2557          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2558def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2559          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2560
2561//===----------------------------------------------------------------------===//
2562// Other bitfield immediate instructions.
2563//===----------------------------------------------------------------------===//
2564let hasSideEffects = 0 in {
2565defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
2566defm SBFM : BitfieldImm<0b00, "sbfm">;
2567defm UBFM : BitfieldImm<0b10, "ubfm">;
2568}
2569
2570def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2571  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2572  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2573}]>;
2574
2575def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2576  uint64_t enc = 31 - N->getZExtValue();
2577  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2578}]>;
2579
2580// min(7, 31 - shift_amt)
2581def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2582  uint64_t enc = 31 - N->getZExtValue();
2583  enc = enc > 7 ? 7 : enc;
2584  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2585}]>;
2586
2587// min(15, 31 - shift_amt)
2588def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2589  uint64_t enc = 31 - N->getZExtValue();
2590  enc = enc > 15 ? 15 : enc;
2591  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2592}]>;
2593
2594def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2595  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2596  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2597}]>;
2598
2599def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2600  uint64_t enc = 63 - N->getZExtValue();
2601  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2602}]>;
2603
2604// min(7, 63 - shift_amt)
2605def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2606  uint64_t enc = 63 - N->getZExtValue();
2607  enc = enc > 7 ? 7 : enc;
2608  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2609}]>;
2610
2611// min(15, 63 - shift_amt)
2612def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2613  uint64_t enc = 63 - N->getZExtValue();
2614  enc = enc > 15 ? 15 : enc;
2615  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2616}]>;
2617
2618// min(31, 63 - shift_amt)
2619def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2620  uint64_t enc = 63 - N->getZExtValue();
2621  enc = enc > 31 ? 31 : enc;
2622  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2623}]>;
2624
2625def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2626          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2627                              (i64 (i32shift_b imm0_31:$imm)))>;
2628def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2629          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2630                              (i64 (i64shift_b imm0_63:$imm)))>;
2631
2632let AddedComplexity = 10 in {
2633def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2634          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2635def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2636          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2637}
2638
2639def : InstAlias<"asr $dst, $src, $shift",
2640                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2641def : InstAlias<"asr $dst, $src, $shift",
2642                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2643def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2644def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2645def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2646def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2647def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2648
2649def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2650          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2651def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2652          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2653
2654def : InstAlias<"lsr $dst, $src, $shift",
2655                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2656def : InstAlias<"lsr $dst, $src, $shift",
2657                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2658def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2659def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2660def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2661def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2662def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2663
2664//===----------------------------------------------------------------------===//
2665// Conditional comparison instructions.
2666//===----------------------------------------------------------------------===//
2667defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2668defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2669
2670//===----------------------------------------------------------------------===//
2671// Conditional select instructions.
2672//===----------------------------------------------------------------------===//
2673defm CSEL  : CondSelect<0, 0b00, "csel">;
2674
2675def inc : PatFrag<(ops node:$in), (add node:$in, 1)>;
2676defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2677defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2678defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2679
2680def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2681          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2682def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2683          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2684def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2685          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2686def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2687          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2688def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2689          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2690def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2691          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2692
2693def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2694          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2695def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2696          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2697def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2698          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2699def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2700          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2701def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2702          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2703def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2704          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2705def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2706          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2707def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2708          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2709def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2710          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2711def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
2712          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2713def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
2714          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2715def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
2716          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2717
2718def : Pat<(add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2719          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
2720def : Pat<(add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2721          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
2722
2723def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2724          (CSINCWr GPR32:$val, WZR, imm:$cc)>;
2725def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2726          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2727def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2728          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
2729
2730def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
2731          (CSELWr WZR, GPR32:$val, imm:$cc)>;
2732def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
2733          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2734def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
2735          (CSELXr XZR, GPR64:$val, imm:$cc)>;
2736
2737// The inverse of the condition code from the alias instruction is what is used
2738// in the aliased instruction. The parser all ready inverts the condition code
2739// for these aliases.
2740def : InstAlias<"cset $dst, $cc",
2741                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2742def : InstAlias<"cset $dst, $cc",
2743                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2744
2745def : InstAlias<"csetm $dst, $cc",
2746                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
2747def : InstAlias<"csetm $dst, $cc",
2748                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
2749
2750def : InstAlias<"cinc $dst, $src, $cc",
2751                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2752def : InstAlias<"cinc $dst, $src, $cc",
2753                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2754
2755def : InstAlias<"cinv $dst, $src, $cc",
2756                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2757def : InstAlias<"cinv $dst, $src, $cc",
2758                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2759
2760def : InstAlias<"cneg $dst, $src, $cc",
2761                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
2762def : InstAlias<"cneg $dst, $src, $cc",
2763                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
2764
2765//===----------------------------------------------------------------------===//
2766// PC-relative instructions.
2767//===----------------------------------------------------------------------===//
2768let isReMaterializable = 1 in {
2769let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
2770def ADR  : ADRI<0, "adr", adrlabel,
2771                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
2772} // hasSideEffects = 0
2773
2774def ADRP : ADRI<1, "adrp", adrplabel,
2775                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
2776} // isReMaterializable = 1
2777
2778// page address of a constant pool entry, block address
2779def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
2780def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
2781def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
2782def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
2783def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
2784def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
2785def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
2786
2787//===----------------------------------------------------------------------===//
2788// Unconditional branch (register) instructions.
2789//===----------------------------------------------------------------------===//
2790
2791let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
2792def RET  : BranchReg<0b0010, "ret", []>;
2793def DRPS : SpecialReturn<0b0101, "drps">;
2794def ERET : SpecialReturn<0b0100, "eret">;
2795} // isReturn = 1, isTerminator = 1, isBarrier = 1
2796
2797// Default to the LR register.
2798def : InstAlias<"ret", (RET LR)>;
2799
2800let isCall = 1, Defs = [LR], Uses = [SP] in {
2801  def BLR : BranchReg<0b0001, "blr", []>;
2802  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
2803                Sched<[WriteBrReg]>,
2804                PseudoInstExpansion<(BLR GPR64:$Rn)>;
2805  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
2806                     Sched<[WriteBrReg]>;
2807  def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
2808                Sched<[WriteBrReg]>;
2809  let Uses = [X16, SP] in
2810  def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>,
2811                Sched<[WriteBrReg]>,
2812                PseudoInstExpansion<(BLR X16)>;
2813} // isCall
2814
2815def : Pat<(AArch64call GPR64:$Rn),
2816          (BLR GPR64:$Rn)>,
2817      Requires<[NoSLSBLRMitigation]>;
2818def : Pat<(AArch64call GPR64noip:$Rn),
2819          (BLRNoIP GPR64noip:$Rn)>,
2820      Requires<[SLSBLRMitigation]>;
2821
2822def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
2823          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
2824      Requires<[NoSLSBLRMitigation]>;
2825
2826def : Pat<(AArch64call_bti GPR64:$Rn),
2827          (BLR_BTI GPR64:$Rn)>,
2828      Requires<[NoSLSBLRMitigation]>;
2829def : Pat<(AArch64call_bti GPR64noip:$Rn),
2830          (BLR_BTI GPR64noip:$Rn)>,
2831      Requires<[SLSBLRMitigation]>;
2832
2833let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
2834def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
2835} // isBranch, isTerminator, isBarrier, isIndirectBranch
2836
2837// Create a separate pseudo-instruction for codegen to use so that we don't
2838// flag lr as used in every function. It'll be restored before the RET by the
2839// epilogue if it's legitimately used.
2840def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
2841                   Sched<[WriteBrReg]> {
2842  let isTerminator = 1;
2843  let isBarrier = 1;
2844  let isReturn = 1;
2845}
2846
2847// This is a directive-like pseudo-instruction. The purpose is to insert an
2848// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
2849// (which in the usual case is a BLR).
2850let hasSideEffects = 1 in
2851def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
2852  let AsmString = ".tlsdesccall $sym";
2853}
2854
2855// Pseudo instruction to tell the streamer to emit a 'B' character into the
2856// augmentation string.
2857def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
2858
2859// Pseudo instruction to tell the streamer to emit a 'G' character into the
2860// augmentation string.
2861def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
2862
2863// FIXME: maybe the scratch register used shouldn't be fixed to X1?
2864// FIXME: can "hasSideEffects be dropped?
2865// This gets lowered to an instruction sequence which takes 16 bytes
2866let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
2867    isCodeGenOnly = 1 in
2868def TLSDESC_CALLSEQ
2869    : Pseudo<(outs), (ins i64imm:$sym),
2870             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
2871      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
2872def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
2873          (TLSDESC_CALLSEQ texternalsym:$sym)>;
2874
2875//===----------------------------------------------------------------------===//
2876// Conditional branch (immediate) instruction.
2877//===----------------------------------------------------------------------===//
2878def Bcc : BranchCond<0, "b">;
2879
2880// Armv8.8-A variant form which hints to the branch predictor that
2881// this branch is very likely to go the same way nearly all the time
2882// (even though it is not known at compile time _which_ way that is).
2883def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
2884
2885//===----------------------------------------------------------------------===//
2886// Compare-and-branch instructions.
2887//===----------------------------------------------------------------------===//
2888defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
2889defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
2890
2891//===----------------------------------------------------------------------===//
2892// Test-bit-and-branch instructions.
2893//===----------------------------------------------------------------------===//
2894defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
2895defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
2896
2897//===----------------------------------------------------------------------===//
2898// Unconditional branch (immediate) instructions.
2899//===----------------------------------------------------------------------===//
2900let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
2901def B  : BranchImm<0, "b", [(br bb:$addr)]>;
2902} // isBranch, isTerminator, isBarrier
2903
2904let isCall = 1, Defs = [LR], Uses = [SP] in {
2905def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
2906} // isCall
2907def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
2908
2909//===----------------------------------------------------------------------===//
2910// Exception generation instructions.
2911//===----------------------------------------------------------------------===//
2912let isTrap = 1 in {
2913def BRK   : ExceptionGeneration<0b001, 0b00, "brk",
2914                                [(int_aarch64_break timm32_0_65535:$imm)]>;
2915}
2916def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
2917def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
2918def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
2919def HLT   : ExceptionGeneration<0b010, 0b00, "hlt">;
2920def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
2921def SMC   : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
2922def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
2923
2924// DCPSn defaults to an immediate operand of zero if unspecified.
2925def : InstAlias<"dcps1", (DCPS1 0)>;
2926def : InstAlias<"dcps2", (DCPS2 0)>;
2927def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
2928
2929def UDF : UDFType<0, "udf">;
2930
2931//===----------------------------------------------------------------------===//
2932// Load instructions.
2933//===----------------------------------------------------------------------===//
2934
2935// Pair (indexed, offset)
2936defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
2937defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
2938let Predicates = [HasFPARMv8] in {
2939defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
2940defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
2941defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
2942}
2943
2944defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2945
2946// Pair (pre-indexed)
2947def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2948def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2949let Predicates = [HasFPARMv8] in {
2950def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2951def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2952def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2953}
2954
2955def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2956
2957// Pair (post-indexed)
2958def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
2959def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
2960let Predicates = [HasFPARMv8] in {
2961def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
2962def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
2963def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
2964}
2965
2966def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
2967
2968
2969// Pair (no allocate)
2970defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
2971defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
2972let Predicates = [HasFPARMv8] in {
2973defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
2974defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
2975defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
2976}
2977
2978def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
2979          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
2980
2981def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
2982          (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
2983//---
2984// (register offset)
2985//---
2986
2987// Integer
2988defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
2989defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
2990defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
2991defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
2992
2993// Floating-point
2994let Predicates = [HasFPARMv8] in {
2995defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", i8, load>;
2996defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
2997defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
2998defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
2999defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
3000}
3001
3002// Load sign-extended half-word
3003defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
3004defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
3005
3006// Load sign-extended byte
3007defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
3008defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
3009
3010// Load sign-extended word
3011defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
3012
3013// Pre-fetch.
3014defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
3015
3016// For regular load, we do not have any alignment requirement.
3017// Thus, it is safe to directly map the vector loads with interesting
3018// addressing modes.
3019// FIXME: We could do the same for bitconvert to floating point vectors.
3020multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
3021                              ValueType ScalTy, ValueType VecTy,
3022                              Instruction LOADW, Instruction LOADX,
3023                              SubRegIndex sub> {
3024  def : Pat<(VecTy (scalar_to_vector (ScalTy
3025              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
3026            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3027                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
3028                           sub)>;
3029
3030  def : Pat<(VecTy (scalar_to_vector (ScalTy
3031              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
3032            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3033                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
3034                           sub)>;
3035}
3036
3037let AddedComplexity = 10 in {
3038defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
3039defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
3040
3041defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
3042defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
3043
3044defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
3045defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
3046
3047defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
3048defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
3049
3050defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
3051defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
3052
3053defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
3054
3055defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
3056
3057
3058def : Pat <(v1i64 (scalar_to_vector (i64
3059                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
3060                                           ro_Wextend64:$extend))))),
3061           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
3062
3063def : Pat <(v1i64 (scalar_to_vector (i64
3064                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
3065                                           ro_Xextend64:$extend))))),
3066           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
3067}
3068
3069// Match all load 64 bits width whose type is compatible with FPR64
3070multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
3071                        Instruction LOADW, Instruction LOADX> {
3072
3073  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3074            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3075
3076  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3077            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3078}
3079
3080let AddedComplexity = 10 in {
3081let Predicates = [IsLE] in {
3082  // We must do vector loads with LD1 in big-endian.
3083  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
3084  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
3085  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
3086  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
3087  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
3088  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
3089}
3090
3091defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
3092defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
3093
3094// Match all load 128 bits width whose type is compatible with FPR128
3095let Predicates = [IsLE] in {
3096  // We must do vector loads with LD1 in big-endian.
3097  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
3098  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
3099  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
3100  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
3101  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
3102  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
3103  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
3104  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
3105}
3106} // AddedComplexity = 10
3107
3108// zextload -> i64
3109multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
3110                            Instruction INSTW, Instruction INSTX> {
3111  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3112            (SUBREG_TO_REG (i64 0),
3113                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
3114                           sub_32)>;
3115
3116  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3117            (SUBREG_TO_REG (i64 0),
3118                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
3119                           sub_32)>;
3120}
3121
3122let AddedComplexity = 10 in {
3123  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
3124  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
3125  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
3126
3127  // zextloadi1 -> zextloadi8
3128  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
3129
3130  // extload -> zextload
3131  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3132  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3133  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3134
3135  // extloadi1 -> zextloadi8
3136  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
3137}
3138
3139
3140// zextload -> i64
3141multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
3142                            Instruction INSTW, Instruction INSTX> {
3143  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3144            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3145
3146  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3147            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3148
3149}
3150
3151let AddedComplexity = 10 in {
3152  // extload -> zextload
3153  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3154  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3155  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3156
3157  // zextloadi1 -> zextloadi8
3158  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
3159}
3160
3161//---
3162// (unsigned immediate)
3163//---
3164defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
3165                   [(set GPR64z:$Rt,
3166                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3167defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3168                   [(set GPR32z:$Rt,
3169                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3170let Predicates = [HasFPARMv8] in {
3171defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3172                   [(set FPR8Op:$Rt,
3173                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3174defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3175                   [(set (f16 FPR16Op:$Rt),
3176                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3177defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3178                   [(set (f32 FPR32Op:$Rt),
3179                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3180defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3181                   [(set (f64 FPR64Op:$Rt),
3182                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3183defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3184                 [(set (f128 FPR128Op:$Rt),
3185                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3186}
3187
3188// bf16 load pattern
3189def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3190           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3191
3192// For regular load, we do not have any alignment requirement.
3193// Thus, it is safe to directly map the vector loads with interesting
3194// addressing modes.
3195// FIXME: We could do the same for bitconvert to floating point vectors.
3196def : Pat <(v8i8 (scalar_to_vector (i32
3197               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3198           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
3199                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3200def : Pat <(v16i8 (scalar_to_vector (i32
3201               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3202           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3203                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3204def : Pat <(v4i16 (scalar_to_vector (i32
3205               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3206           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
3207                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3208def : Pat <(v8i16 (scalar_to_vector (i32
3209               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3210           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3211                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3212def : Pat <(v2i32 (scalar_to_vector (i32
3213               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3214           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3215                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3216def : Pat <(v4i32 (scalar_to_vector (i32
3217               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3218           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3219                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3220def : Pat <(v1i64 (scalar_to_vector (i64
3221               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3222           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3223def : Pat <(v2i64 (scalar_to_vector (i64
3224               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3225           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3226                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
3227
3228// Match all load 64 bits width whose type is compatible with FPR64
3229let Predicates = [IsLE] in {
3230  // We must use LD1 to perform vector loads in big-endian.
3231  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3232            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3233  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3234            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3235  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3236            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3237  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3238            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3239  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3240            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3241  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3242            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3243}
3244def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3245          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3246def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3247          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3248
3249// Match all load 128 bits width whose type is compatible with FPR128
3250let Predicates = [IsLE] in {
3251  // We must use LD1 to perform vector loads in big-endian.
3252  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3253            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3254  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3255            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3256  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3257            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3258  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3259            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3260  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3261            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3262  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3263            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3264  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3265            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3266  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3267            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3268}
3269def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3270          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3271
3272defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3273                    [(set GPR32:$Rt,
3274                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3275                                                     uimm12s2:$offset)))]>;
3276defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3277                    [(set GPR32:$Rt,
3278                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3279                                                   uimm12s1:$offset)))]>;
3280// zextload -> i64
3281def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3282    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3283def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3284    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3285
3286// zextloadi1 -> zextloadi8
3287def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3288          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3289def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3290    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3291
3292// extload -> zextload
3293def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3294          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3295def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3296          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3297def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3298          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3299def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3300    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3301def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3302    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3303def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3304    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3305def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3306    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3307
3308// load sign-extended half-word
3309defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3310                     [(set GPR32:$Rt,
3311                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3312                                                      uimm12s2:$offset)))]>;
3313defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3314                     [(set GPR64:$Rt,
3315                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3316                                                      uimm12s2:$offset)))]>;
3317
3318// load sign-extended byte
3319defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3320                     [(set GPR32:$Rt,
3321                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3322                                                    uimm12s1:$offset)))]>;
3323defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3324                     [(set GPR64:$Rt,
3325                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3326                                                    uimm12s1:$offset)))]>;
3327
3328// load sign-extended word
3329defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3330                     [(set GPR64:$Rt,
3331                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3332                                                      uimm12s4:$offset)))]>;
3333
3334// load zero-extended word
3335def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3336      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3337
3338// Pre-fetch.
3339def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3340                        [(AArch64Prefetch timm:$Rt,
3341                                        (am_indexed64 GPR64sp:$Rn,
3342                                                      uimm12s8:$offset))]>;
3343
3344def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3345
3346//---
3347// (literal)
3348
3349def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3350  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3351    const DataLayout &DL = MF->getDataLayout();
3352    Align Align = G->getGlobal()->getPointerAlignment(DL);
3353    return Align >= 4 && G->getOffset() % 4 == 0;
3354  }
3355  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3356    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3357  return false;
3358}]>;
3359
3360def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3361  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3362def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3363  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3364let Predicates = [HasFPARMv8] in {
3365def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3366  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3367def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3368  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3369def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3370  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3371}
3372
3373// load sign-extended word
3374def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3375  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3376
3377let AddedComplexity = 20 in {
3378def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3379        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3380}
3381
3382// prefetch
3383def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3384//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3385
3386//---
3387// (unscaled immediate)
3388defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3389                    [(set GPR64z:$Rt,
3390                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3391defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3392                    [(set GPR32z:$Rt,
3393                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3394let Predicates = [HasFPARMv8] in {
3395defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3396                    [(set FPR8Op:$Rt,
3397                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3398defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3399                    [(set (f16 FPR16Op:$Rt),
3400                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3401defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3402                    [(set (f32 FPR32Op:$Rt),
3403                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3404defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3405                    [(set (f64 FPR64Op:$Rt),
3406                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3407defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3408                    [(set (f128 FPR128Op:$Rt),
3409                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3410}
3411
3412defm LDURHH
3413    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3414             [(set GPR32:$Rt,
3415                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3416defm LDURBB
3417    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3418             [(set GPR32:$Rt,
3419                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3420
3421// bf16 load pattern
3422def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3423           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3424
3425// Match all load 64 bits width whose type is compatible with FPR64
3426let Predicates = [IsLE] in {
3427  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3428            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3429  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3430            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3431  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3432            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3433  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3434            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3435  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3436            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3437}
3438def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3439          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3440def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3441          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3442
3443// Match all load 128 bits width whose type is compatible with FPR128
3444let Predicates = [IsLE] in {
3445  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3446            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3447  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3448            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3449  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3450            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3451  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3452            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3453  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3454            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3455  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3456            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3457  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3458            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3459}
3460
3461//  anyext -> zext
3462def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3463          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3464def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3465          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3466def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3467          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3468def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3469    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3470def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3471    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3472def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3473    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3474def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3475    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3476// unscaled zext
3477def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3478          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3479def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3480          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3481def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3482          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3483def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3484    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3485def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3486    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3487def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3488    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3489def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3490    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3491
3492
3493//---
3494// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3495
3496// Define new assembler match classes as we want to only match these when
3497// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3498// associate a DiagnosticType either, as we want the diagnostic for the
3499// canonical form (the scaled operand) to take precedence.
3500class SImm9OffsetOperand<int Width> : AsmOperandClass {
3501  let Name = "SImm9OffsetFB" # Width;
3502  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3503  let RenderMethod = "addImmOperands";
3504}
3505
3506def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3507def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3508def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3509def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3510def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3511
3512def simm9_offset_fb8 : Operand<i64> {
3513  let ParserMatchClass = SImm9OffsetFB8Operand;
3514}
3515def simm9_offset_fb16 : Operand<i64> {
3516  let ParserMatchClass = SImm9OffsetFB16Operand;
3517}
3518def simm9_offset_fb32 : Operand<i64> {
3519  let ParserMatchClass = SImm9OffsetFB32Operand;
3520}
3521def simm9_offset_fb64 : Operand<i64> {
3522  let ParserMatchClass = SImm9OffsetFB64Operand;
3523}
3524def simm9_offset_fb128 : Operand<i64> {
3525  let ParserMatchClass = SImm9OffsetFB128Operand;
3526}
3527
3528def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3529                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3530def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3531                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3532def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3533                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3534def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3535                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3536def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3537                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3538def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3539                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3540def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3541               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3542
3543// zextload -> i64
3544def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3545  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3546def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3547  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3548
3549// load sign-extended half-word
3550defm LDURSHW
3551    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3552               [(set GPR32:$Rt,
3553                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3554defm LDURSHX
3555    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3556              [(set GPR64:$Rt,
3557                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3558
3559// load sign-extended byte
3560defm LDURSBW
3561    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3562                [(set GPR32:$Rt,
3563                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3564defm LDURSBX
3565    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3566                [(set GPR64:$Rt,
3567                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3568
3569// load sign-extended word
3570defm LDURSW
3571    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3572              [(set GPR64:$Rt,
3573                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3574
3575// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3576def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3577                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3578def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3579                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3580def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3581                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3582def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3583                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3584def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3585                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3586def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3587                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3588def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3589                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3590
3591// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
3592// load, 0) can use a single load.
3593multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
3594                                  ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
3595                                  ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
3596                                  SubRegIndex SubReg> {
3597  // Scaled
3598  def : Pat <(vector_insert (VT immAllZerosV),
3599                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3600            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3601  // Unscaled
3602  def : Pat <(vector_insert (VT immAllZerosV),
3603                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3604             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3605
3606  // Half-vector patterns
3607  def : Pat <(vector_insert (HVT immAllZerosV),
3608                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3609             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3610  // Unscaled
3611  def : Pat <(vector_insert (HVT immAllZerosV),
3612                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3613             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3614
3615  // SVE patterns
3616  def : Pat <(vector_insert (SVT immAllZerosV),
3617                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3618             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3619  // Unscaled
3620  def : Pat <(vector_insert (SVT immAllZerosV),
3621                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3622             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3623}
3624
3625defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDURBi,
3626                              am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
3627defm : LoadInsertZeroPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,  LDRHui, LDURHi,
3628                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3629defm : LoadInsertZeroPatterns<load,       v4i32,  v2i32,  nxv4i32,  i32,  LDRSui, LDURSi,
3630                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3631defm : LoadInsertZeroPatterns<load,       v2i64,  v1i64,  nxv2i64,  i64,  LDRDui, LDURDi,
3632                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3633defm : LoadInsertZeroPatterns<load,       v8f16,  v4f16,  nxv8f16,  f16,  LDRHui, LDURHi,
3634                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3635defm : LoadInsertZeroPatterns<load,       v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
3636                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3637defm : LoadInsertZeroPatterns<load,       v4f32,  v2f32,  nxv4f32,  f32,  LDRSui, LDURSi,
3638                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3639defm : LoadInsertZeroPatterns<load,       v2f64,  v1f64,  nxv2f64,  f64,  LDRDui, LDURDi,
3640                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3641
3642// Pre-fetch.
3643defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3644                  [(AArch64Prefetch timm:$Rt,
3645                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3646
3647//---
3648// (unscaled immediate, unprivileged)
3649defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3650defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3651
3652defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3653defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3654
3655// load sign-extended half-word
3656defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3657defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3658
3659// load sign-extended byte
3660defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3661defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3662
3663// load sign-extended word
3664defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3665
3666//---
3667// (immediate pre-indexed)
3668def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3669def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3670let Predicates = [HasFPARMv8] in {
3671def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3672def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3673def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3674def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3675def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3676}
3677
3678// load sign-extended half-word
3679def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3680def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3681
3682// load sign-extended byte
3683def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3684def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3685
3686// load zero-extended byte
3687def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3688def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3689
3690// load sign-extended word
3691def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3692
3693//---
3694// (immediate post-indexed)
3695def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3696def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3697let Predicates = [HasFPARMv8] in {
3698def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3699def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3700def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3701def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3702def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3703}
3704
3705// load sign-extended half-word
3706def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3707def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3708
3709// load sign-extended byte
3710def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3711def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3712
3713// load zero-extended byte
3714def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3715def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3716
3717// load sign-extended word
3718def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3719
3720//===----------------------------------------------------------------------===//
3721// Store instructions.
3722//===----------------------------------------------------------------------===//
3723
3724// Pair (indexed, offset)
3725// FIXME: Use dedicated range-checked addressing mode operand here.
3726defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
3727defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
3728let Predicates = [HasFPARMv8] in {
3729defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
3730defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
3731defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
3732}
3733
3734// Pair (pre-indexed)
3735def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3736def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3737let Predicates = [HasFPARMv8] in {
3738def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3739def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3740def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3741}
3742
3743// Pair (post-indexed)
3744def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
3745def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
3746let Predicates = [HasFPARMv8] in {
3747def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
3748def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
3749def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
3750}
3751
3752// Pair (no allocate)
3753defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
3754defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
3755let Predicates = [HasFPARMv8] in {
3756defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
3757defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
3758defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
3759}
3760
3761def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3762          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
3763
3764def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3765          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
3766
3767
3768//---
3769// (Register offset)
3770
3771// Integer
3772defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
3773defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
3774defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
3775defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
3776
3777
3778// Floating-point
3779let Predicates = [HasFPARMv8] in {
3780defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", i8, store>;
3781defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
3782defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
3783defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
3784defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
3785}
3786
3787let Predicates = [UseSTRQro], AddedComplexity = 10 in {
3788  def : Pat<(store (f128 FPR128:$Rt),
3789                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
3790                                        ro_Wextend128:$extend)),
3791            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
3792  def : Pat<(store (f128 FPR128:$Rt),
3793                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
3794                                        ro_Xextend128:$extend)),
3795            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
3796}
3797
3798multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
3799                                 Instruction STRW, Instruction STRX> {
3800
3801  def : Pat<(storeop GPR64:$Rt,
3802                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3803            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3804                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3805
3806  def : Pat<(storeop GPR64:$Rt,
3807                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3808            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
3809                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3810}
3811
3812let AddedComplexity = 10 in {
3813  // truncstore i64
3814  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
3815  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
3816  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
3817}
3818
3819multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
3820                         Instruction STRW, Instruction STRX> {
3821  def : Pat<(store (VecTy FPR:$Rt),
3822                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3823            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3824
3825  def : Pat<(store (VecTy FPR:$Rt),
3826                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3827            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3828}
3829
3830let AddedComplexity = 10 in {
3831// Match all store 64 bits width whose type is compatible with FPR64
3832let Predicates = [IsLE] in {
3833  // We must use ST1 to store vectors in big-endian.
3834  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
3835  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
3836  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
3837  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
3838  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
3839  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
3840}
3841
3842defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
3843defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
3844
3845// Match all store 128 bits width whose type is compatible with FPR128
3846let Predicates = [IsLE, UseSTRQro] in {
3847  // We must use ST1 to store vectors in big-endian.
3848  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
3849  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
3850  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
3851  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
3852  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
3853  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
3854  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
3855  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
3856}
3857} // AddedComplexity = 10
3858
3859// Match stores from lane 0 to the appropriate subreg's store.
3860multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
3861                              ValueType VecTy, ValueType STy,
3862                              ValueType SubRegTy,
3863                              SubRegIndex SubRegIdx,
3864                              Instruction STRW, Instruction STRX> {
3865
3866  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
3867                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
3868            (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
3869                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3870
3871  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
3872                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
3873            (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
3874                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3875}
3876
3877let AddedComplexity = 19 in {
3878  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
3879  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
3880  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
3881  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
3882  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
3883  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
3884}
3885
3886//---
3887// (unsigned immediate)
3888defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
3889                   [(store GPR64z:$Rt,
3890                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3891defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
3892                    [(store GPR32z:$Rt,
3893                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3894let Predicates = [HasFPARMv8] in {
3895defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
3896                    [(store FPR8Op:$Rt,
3897                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
3898defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
3899                    [(store (f16 FPR16Op:$Rt),
3900                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
3901defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
3902                    [(store (f32 FPR32Op:$Rt),
3903                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
3904defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
3905                    [(store (f64 FPR64Op:$Rt),
3906                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
3907defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
3908}
3909
3910defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
3911                     [(truncstorei16 GPR32z:$Rt,
3912                                     (am_indexed16 GPR64sp:$Rn,
3913                                                   uimm12s2:$offset))]>;
3914defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
3915                     [(truncstorei8 GPR32z:$Rt,
3916                                    (am_indexed8 GPR64sp:$Rn,
3917                                                 uimm12s1:$offset))]>;
3918
3919// bf16 store pattern
3920def : Pat<(store (bf16 FPR16Op:$Rt),
3921                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3922          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
3923
3924let AddedComplexity = 10 in {
3925
3926// Match all store 64 bits width whose type is compatible with FPR64
3927def : Pat<(store (v1i64 FPR64:$Rt),
3928                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3929          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3930def : Pat<(store (v1f64 FPR64:$Rt),
3931                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3932          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3933
3934let Predicates = [IsLE] in {
3935  // We must use ST1 to store vectors in big-endian.
3936  def : Pat<(store (v2f32 FPR64:$Rt),
3937                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3938            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3939  def : Pat<(store (v8i8 FPR64:$Rt),
3940                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3941            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3942  def : Pat<(store (v4i16 FPR64:$Rt),
3943                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3944            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3945  def : Pat<(store (v2i32 FPR64:$Rt),
3946                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3947            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3948  def : Pat<(store (v4f16 FPR64:$Rt),
3949                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3950            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3951  def : Pat<(store (v4bf16 FPR64:$Rt),
3952                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
3953            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
3954}
3955
3956// Match all store 128 bits width whose type is compatible with FPR128
3957def : Pat<(store (f128  FPR128:$Rt),
3958                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3959          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3960
3961let Predicates = [IsLE] in {
3962  // We must use ST1 to store vectors in big-endian.
3963  def : Pat<(store (v4f32 FPR128:$Rt),
3964                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3965            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3966  def : Pat<(store (v2f64 FPR128:$Rt),
3967                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3968            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3969  def : Pat<(store (v16i8 FPR128:$Rt),
3970                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3971            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3972  def : Pat<(store (v8i16 FPR128:$Rt),
3973                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3974            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3975  def : Pat<(store (v4i32 FPR128:$Rt),
3976                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3977            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3978  def : Pat<(store (v2i64 FPR128:$Rt),
3979                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3980            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3981  def : Pat<(store (v8f16 FPR128:$Rt),
3982                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3983            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3984  def : Pat<(store (v8bf16 FPR128:$Rt),
3985                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
3986            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
3987}
3988
3989// truncstore i64
3990def : Pat<(truncstorei32 GPR64:$Rt,
3991                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
3992  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
3993def : Pat<(truncstorei16 GPR64:$Rt,
3994                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
3995  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
3996def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
3997  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
3998
3999} // AddedComplexity = 10
4000
4001// Match stores from lane 0 to the appropriate subreg's store.
4002multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
4003                            ValueType VTy, ValueType STy,
4004                            ValueType SubRegTy,
4005                            SubRegIndex SubRegIdx, Operand IndexType,
4006                            Instruction STR> {
4007  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
4008                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
4009            (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4010                 GPR64sp:$Rn, IndexType:$offset)>;
4011}
4012
4013let AddedComplexity = 19 in {
4014  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
4015  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
4016  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
4017  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
4018  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
4019  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
4020}
4021
4022//---
4023// (unscaled immediate)
4024defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
4025                         [(store GPR64z:$Rt,
4026                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4027defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
4028                         [(store GPR32z:$Rt,
4029                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4030let Predicates = [HasFPARMv8] in {
4031defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4032                         [(store FPR8Op:$Rt,
4033                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4034defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
4035                         [(store (f16 FPR16Op:$Rt),
4036                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4037defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
4038                         [(store (f32 FPR32Op:$Rt),
4039                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4040defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
4041                         [(store (f64 FPR64Op:$Rt),
4042                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4043defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
4044                         [(store (f128 FPR128Op:$Rt),
4045                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
4046}
4047defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
4048                         [(truncstorei16 GPR32z:$Rt,
4049                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4050defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
4051                         [(truncstorei8 GPR32z:$Rt,
4052                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4053
4054// bf16 store pattern
4055def : Pat<(store (bf16 FPR16Op:$Rt),
4056                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4057          (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4058
4059// Armv8.4 Weaker Release Consistency enhancements
4060//         LDAPR & STLR with Immediate Offset instructions
4061let Predicates = [HasRCPC_IMMO] in {
4062defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
4063defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
4064defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
4065defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
4066defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
4067defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
4068defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
4069defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
4070defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
4071defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
4072defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
4073defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
4074defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
4075}
4076
4077// Match all store 64 bits width whose type is compatible with FPR64
4078def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4079          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4080def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4081          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4082
4083let AddedComplexity = 10 in {
4084
4085let Predicates = [IsLE] in {
4086  // We must use ST1 to store vectors in big-endian.
4087  def : Pat<(store (v2f32 FPR64:$Rt),
4088                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4089            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4090  def : Pat<(store (v8i8 FPR64:$Rt),
4091                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4092            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4093  def : Pat<(store (v4i16 FPR64:$Rt),
4094                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4095            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4096  def : Pat<(store (v2i32 FPR64:$Rt),
4097                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4098            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4099  def : Pat<(store (v4f16 FPR64:$Rt),
4100                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4101            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4102  def : Pat<(store (v4bf16 FPR64:$Rt),
4103                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4104            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4105}
4106
4107// Match all store 128 bits width whose type is compatible with FPR128
4108def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4109          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4110
4111let Predicates = [IsLE] in {
4112  // We must use ST1 to store vectors in big-endian.
4113  def : Pat<(store (v4f32 FPR128:$Rt),
4114                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4115            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4116  def : Pat<(store (v2f64 FPR128:$Rt),
4117                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4118            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4119  def : Pat<(store (v16i8 FPR128:$Rt),
4120                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4121            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4122  def : Pat<(store (v8i16 FPR128:$Rt),
4123                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4124            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4125  def : Pat<(store (v4i32 FPR128:$Rt),
4126                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4127            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4128  def : Pat<(store (v2i64 FPR128:$Rt),
4129                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4130            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4131  def : Pat<(store (v2f64 FPR128:$Rt),
4132                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4133            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4134  def : Pat<(store (v8f16 FPR128:$Rt),
4135                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4136            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4137  def : Pat<(store (v8bf16 FPR128:$Rt),
4138                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4139            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4140}
4141
4142} // AddedComplexity = 10
4143
4144// unscaled i64 truncating stores
4145def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
4146  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4147def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4148  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4149def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4150  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4151
4152// Match stores from lane 0 to the appropriate subreg's store.
4153multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
4154                             ValueType VTy, ValueType STy,
4155                             ValueType SubRegTy,
4156                             SubRegIndex SubRegIdx, Instruction STR> {
4157  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
4158}
4159
4160let AddedComplexity = 19 in {
4161  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
4162  defm : VecStoreULane0Pat<store,         v8f16, f16, f16, hsub, STURHi>;
4163  defm : VecStoreULane0Pat<store,         v4i32, i32, i32, ssub, STURSi>;
4164  defm : VecStoreULane0Pat<store,         v4f32, f32, i32, ssub, STURSi>;
4165  defm : VecStoreULane0Pat<store,         v2i64, i64, i64, dsub, STURDi>;
4166  defm : VecStoreULane0Pat<store,         v2f64, f64, i64, dsub, STURDi>;
4167}
4168
4169//---
4170// STR mnemonics fall back to STUR for negative or unaligned offsets.
4171def : InstAlias<"str $Rt, [$Rn, $offset]",
4172                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4173def : InstAlias<"str $Rt, [$Rn, $offset]",
4174                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4175def : InstAlias<"str $Rt, [$Rn, $offset]",
4176                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4177def : InstAlias<"str $Rt, [$Rn, $offset]",
4178                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4179def : InstAlias<"str $Rt, [$Rn, $offset]",
4180                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4181def : InstAlias<"str $Rt, [$Rn, $offset]",
4182                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4183def : InstAlias<"str $Rt, [$Rn, $offset]",
4184                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
4185
4186def : InstAlias<"strb $Rt, [$Rn, $offset]",
4187                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4188def : InstAlias<"strh $Rt, [$Rn, $offset]",
4189                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4190
4191//---
4192// (unscaled immediate, unprivileged)
4193defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
4194defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
4195
4196defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
4197defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
4198
4199//---
4200// (immediate pre-indexed)
4201def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
4202def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
4203let Predicates = [HasFPARMv8] in {
4204def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, i8>;
4205def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
4206def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
4207def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
4208def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4209}
4210
4211def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
4212def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4213
4214// truncstore i64
4215def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4216  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4217           simm9:$off)>;
4218def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4219  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4220            simm9:$off)>;
4221def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4222  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4223            simm9:$off)>;
4224
4225def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4226          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4227def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4228          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4229def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4230          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4231def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4232          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4233def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4234          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4235def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4236          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4237def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4238          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4239
4240def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4241          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4242def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4243          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4244def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4245          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4246def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4247          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4248def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4249          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4250def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4251          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4252def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4253          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4254
4255//---
4256// (immediate post-indexed)
4257def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
4258def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
4259let Predicates = [HasFPARMv8] in {
4260def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, i8>;
4261def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
4262def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
4263def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
4264def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4265}
4266
4267def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4268def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4269
4270// truncstore i64
4271def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4272  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4273            simm9:$off)>;
4274def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4275  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4276             simm9:$off)>;
4277def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4278  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4279             simm9:$off)>;
4280
4281def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4282          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4283
4284def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4285          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4286def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4287          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4288def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4289          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4290def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4291          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4292def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4293          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4294def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4295          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4296def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4297          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4298def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4299          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4300
4301def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4302          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4303def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4304          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4305def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4306          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4307def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4308          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4309def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4310          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4311def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4312          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4313def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4314          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4315def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4316          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4317
4318//===----------------------------------------------------------------------===//
4319// Load/store exclusive instructions.
4320//===----------------------------------------------------------------------===//
4321
4322def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4323def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4324def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4325def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4326
4327def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4328def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4329def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4330def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4331
4332def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4333def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4334def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4335def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4336
4337def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4338def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4339def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4340def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4341
4342/*
4343Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4344of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4345alias for the case of immediate #0. This is because new STLR versions (from
4346LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4347appropriate anymore (it parses and discards the optional zero). This is not the
4348case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4349and the immediate values are not inside the [] brackets and thus not accepted
4350by GPR64sp0 parser.
4351*/
4352def STLRW0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4353def STLRX0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4354def STLRB0  : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4355def STLRH0  : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4356
4357def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4358def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4359def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4360def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4361
4362def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4363def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4364def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4365def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4366
4367def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4368def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4369
4370def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4371def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4372
4373def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4374def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4375
4376def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4377def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4378
4379let Predicates = [HasLOR] in {
4380  // v8.1a "Limited Order Region" extension load-acquire instructions
4381  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4382  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4383  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4384  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4385
4386  // v8.1a "Limited Order Region" extension store-release instructions
4387  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4388  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4389  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4390  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4391
4392  // Aliases for when offset=0
4393  def STLLRW0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4394  def STLLRX0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4395  def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]",  (STLLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4396  def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4397}
4398
4399//===----------------------------------------------------------------------===//
4400// Scaled floating point to integer conversion instructions.
4401//===----------------------------------------------------------------------===//
4402
4403defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4404defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4405defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4406defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4407defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4408defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4409defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4410defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4411defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4412defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4413defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4414defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4415
4416// AArch64's FCVT instructions saturate when out of range.
4417multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4418  let Predicates = [HasFullFP16] in {
4419  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4420            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4421  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4422            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4423  }
4424  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4425            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4426  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4427            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4428  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4429            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4430  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4431            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4432
4433  let Predicates = [HasFullFP16] in {
4434  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4435            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4436  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4437            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4438  }
4439  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4440            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4441  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4442            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4443  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4444            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4445  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4446            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4447}
4448
4449defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4450defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4451
4452multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4453  let Predicates = [HasFullFP16] in {
4454  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4455  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4456  }
4457  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4458  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4459  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4460  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4461
4462  let Predicates = [HasFullFP16] in {
4463  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4464            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4465  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4466            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4467  }
4468  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4469            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4470  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4471            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4472  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4473            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4474  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4475            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4476}
4477
4478defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
4479defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
4480
4481multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
4482  def : Pat<(i32 (to_int (round f32:$Rn))),
4483            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4484  def : Pat<(i64 (to_int (round f32:$Rn))),
4485            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4486  def : Pat<(i32 (to_int (round f64:$Rn))),
4487            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4488  def : Pat<(i64 (to_int (round f64:$Rn))),
4489            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4490
4491  // These instructions saturate like fp_to_[su]int_sat.
4492  let Predicates = [HasFullFP16] in {
4493  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
4494            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4495  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
4496            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4497  }
4498  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
4499            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4500  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
4501            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4502  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
4503            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4504  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
4505            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4506}
4507
4508defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
4509defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
4510defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
4511defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
4512defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
4513defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
4514defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
4515defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
4516
4517
4518
4519let Predicates = [HasFullFP16] in {
4520  def : Pat<(i32 (any_lround f16:$Rn)),
4521            (FCVTASUWHr f16:$Rn)>;
4522  def : Pat<(i64 (any_lround f16:$Rn)),
4523            (FCVTASUXHr f16:$Rn)>;
4524  def : Pat<(i64 (any_llround f16:$Rn)),
4525            (FCVTASUXHr f16:$Rn)>;
4526}
4527def : Pat<(i32 (any_lround f32:$Rn)),
4528          (FCVTASUWSr f32:$Rn)>;
4529def : Pat<(i32 (any_lround f64:$Rn)),
4530          (FCVTASUWDr f64:$Rn)>;
4531def : Pat<(i64 (any_lround f32:$Rn)),
4532          (FCVTASUXSr f32:$Rn)>;
4533def : Pat<(i64 (any_lround f64:$Rn)),
4534          (FCVTASUXDr f64:$Rn)>;
4535def : Pat<(i64 (any_llround f32:$Rn)),
4536          (FCVTASUXSr f32:$Rn)>;
4537def : Pat<(i64 (any_llround f64:$Rn)),
4538          (FCVTASUXDr f64:$Rn)>;
4539
4540//===----------------------------------------------------------------------===//
4541// Scaled integer to floating point conversion instructions.
4542//===----------------------------------------------------------------------===//
4543
4544defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
4545defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4546
4547def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4548          (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4549def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4550          (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4551def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4552          (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4553
4554def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4555          (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4556def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4557          (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4558def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4559          (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4560
4561def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4562          (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4563def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4564          (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4565def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4566          (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4567
4568def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4569          (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4570def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4571          (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4572def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4573          (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4574
4575//===----------------------------------------------------------------------===//
4576// Unscaled integer to floating point conversion instruction.
4577//===----------------------------------------------------------------------===//
4578
4579defm FMOV : UnscaledConversion<"fmov">;
4580
4581// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
4582let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1,
4583    Predicates = [HasFPARMv8] in {
4584def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
4585    Sched<[WriteF]>;
4586def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
4587    Sched<[WriteF]>;
4588def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
4589    Sched<[WriteF]>;
4590}
4591
4592// Similarly add aliases
4593def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
4594    Requires<[HasFullFP16]>;
4595def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
4596def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
4597
4598def : Pat<(bf16 fpimm0),
4599          (FMOVH0)>;
4600
4601// Pattern for FP16 and BF16 immediates
4602let Predicates = [HasFullFP16] in {
4603  def : Pat<(f16 fpimm:$in),
4604            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4605
4606  def : Pat<(bf16 fpimm:$in),
4607            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
4608}
4609
4610//===----------------------------------------------------------------------===//
4611// Floating point conversion instruction.
4612//===----------------------------------------------------------------------===//
4613
4614defm FCVT : FPConversion<"fcvt">;
4615
4616//===----------------------------------------------------------------------===//
4617// Floating point single operand instructions.
4618//===----------------------------------------------------------------------===//
4619
4620defm FABS   : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
4621defm FMOV   : SingleOperandFPDataNoException<0b0000, "fmov">;
4622defm FNEG   : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
4623defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
4624defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
4625defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
4626defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
4627defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
4628
4629defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
4630defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
4631
4632let SchedRW = [WriteFDiv] in {
4633defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
4634}
4635
4636let Predicates = [HasFRInt3264] in {
4637  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
4638  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
4639  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
4640  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
4641} // HasFRInt3264
4642
4643// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4644def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
4645          (FRINT32ZDr FPR64:$Rn)>;
4646def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
4647          (FRINT64ZDr FPR64:$Rn)>;
4648def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
4649          (FRINT32XDr FPR64:$Rn)>;
4650def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
4651          (FRINT64XDr FPR64:$Rn)>;
4652
4653// Emitting strict_lrint as two instructions is valid as any exceptions that
4654// occur will happen in exactly one of the instructions (e.g. if the input is
4655// not an integer the inexact exception will happen in the FRINTX but not then
4656// in the FCVTZS as the output of FRINTX is an integer).
4657let Predicates = [HasFullFP16] in {
4658  def : Pat<(i32 (any_lrint f16:$Rn)),
4659            (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
4660  def : Pat<(i64 (any_lrint f16:$Rn)),
4661            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4662  def : Pat<(i64 (any_llrint f16:$Rn)),
4663            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4664}
4665def : Pat<(i32 (any_lrint f32:$Rn)),
4666          (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
4667def : Pat<(i32 (any_lrint f64:$Rn)),
4668          (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
4669def : Pat<(i64 (any_lrint f32:$Rn)),
4670          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4671def : Pat<(i64 (any_lrint f64:$Rn)),
4672          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4673def : Pat<(i64 (any_llrint f32:$Rn)),
4674          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4675def : Pat<(i64 (any_llrint f64:$Rn)),
4676          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4677
4678//===----------------------------------------------------------------------===//
4679// Floating point two operand instructions.
4680//===----------------------------------------------------------------------===//
4681
4682defm FADD   : TwoOperandFPData<0b0010, "fadd", any_fadd>;
4683let SchedRW = [WriteFDiv] in {
4684defm FDIV   : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
4685}
4686defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
4687defm FMAX   : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
4688defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
4689defm FMIN   : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
4690let SchedRW = [WriteFMul] in {
4691defm FMUL   : TwoOperandFPData<0b0000, "fmul", any_fmul>;
4692defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
4693}
4694defm FSUB   : TwoOperandFPData<0b0011, "fsub", any_fsub>;
4695
4696multiclass FMULScalarFromIndexedLane0Patterns<string inst,
4697                                              string inst_f16_suffix,
4698                                              string inst_f32_suffix,
4699                                              string inst_f64_suffix,
4700                                              SDPatternOperator OpNode,
4701                                              list<Predicate> preds = []> {
4702  let Predicates = !listconcat(preds, [HasFullFP16]) in {
4703  def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
4704                         (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
4705            (!cast<Instruction>(inst # inst_f16_suffix)
4706              FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
4707  }
4708  let Predicates = preds in {
4709  def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
4710                         (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
4711            (!cast<Instruction>(inst # inst_f32_suffix)
4712              FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
4713  def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
4714                         (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
4715            (!cast<Instruction>(inst # inst_f64_suffix)
4716              FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
4717  }
4718}
4719
4720defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
4721                                          any_fmul>;
4722
4723// Match reassociated forms of FNMUL.
4724def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
4725          (FNMULHrr FPR16:$a, FPR16:$b)>,
4726          Requires<[HasFullFP16]>;
4727def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
4728          (FNMULSrr FPR32:$a, FPR32:$b)>;
4729def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
4730          (FNMULDrr FPR64:$a, FPR64:$b)>;
4731
4732def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4733          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
4734def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4735          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
4736def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4737          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
4738def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
4739          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
4740
4741//===----------------------------------------------------------------------===//
4742// Floating point three operand instructions.
4743//===----------------------------------------------------------------------===//
4744
4745defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
4746defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
4747     TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
4748defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
4749     TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
4750defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
4751     TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
4752
4753// The following def pats catch the case where the LHS of an FMA is negated.
4754// The TriOpFrag above catches the case where the middle operand is negated.
4755
4756// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
4757// the NEON variant.
4758
4759// Here we handle first -(a + b*c) for FNMADD:
4760
4761let Predicates = [HasNEON, HasFullFP16] in
4762def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
4763          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4764
4765def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
4766          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4767
4768def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
4769          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4770
4771// Now it's time for "(-a) + (-b)*c"
4772
4773let Predicates = [HasNEON, HasFullFP16] in
4774def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
4775          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
4776
4777def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
4778          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
4779
4780def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
4781          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
4782
4783//===----------------------------------------------------------------------===//
4784// Floating point comparison instructions.
4785//===----------------------------------------------------------------------===//
4786
4787defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
4788defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
4789
4790//===----------------------------------------------------------------------===//
4791// Floating point conditional comparison instructions.
4792//===----------------------------------------------------------------------===//
4793
4794defm FCCMPE : FPCondComparison<1, "fccmpe">;
4795defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
4796
4797//===----------------------------------------------------------------------===//
4798// Floating point conditional select instruction.
4799//===----------------------------------------------------------------------===//
4800
4801defm FCSEL : FPCondSelect<"fcsel">;
4802
4803let Predicates = [HasFullFP16] in
4804def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
4805          (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
4806
4807// CSEL instructions providing f128 types need to be handled by a
4808// pseudo-instruction since the eventual code will need to introduce basic
4809// blocks and control flow.
4810let Predicates = [HasFPARMv8] in
4811def F128CSEL : Pseudo<(outs FPR128:$Rd),
4812                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
4813                      [(set (f128 FPR128:$Rd),
4814                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
4815                                       (i32 imm:$cond), NZCV))]> {
4816  let Uses = [NZCV];
4817  let usesCustomInserter = 1;
4818  let hasNoSchedulingInfo = 1;
4819}
4820
4821//===----------------------------------------------------------------------===//
4822// Instructions used for emitting unwind opcodes on ARM64 Windows.
4823//===----------------------------------------------------------------------===//
4824let isPseudo = 1 in {
4825  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
4826  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4827  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4828  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4829  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4830  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4831  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4832  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4833  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
4834  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4835  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4836  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
4837  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
4838  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
4839  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4840  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
4841  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
4842  def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
4843  def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4844  def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
4845}
4846
4847// Pseudo instructions for Windows EH
4848//===----------------------------------------------------------------------===//
4849let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
4850    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
4851   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret)]>, Sched<[]>;
4852   let usesCustomInserter = 1 in
4853     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
4854                    Sched<[]>;
4855}
4856
4857// Pseudo instructions for homogeneous prolog/epilog
4858let isPseudo = 1 in {
4859  // Save CSRs in order, {FPOffset}
4860  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4861  // Restore CSRs in order
4862  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
4863}
4864
4865//===----------------------------------------------------------------------===//
4866// Floating point immediate move.
4867//===----------------------------------------------------------------------===//
4868
4869let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
4870defm FMOV : FPMoveImmediate<"fmov">;
4871}
4872
4873let Predicates = [HasFullFP16] in {
4874  def : Pat<(bf16 fpimmbf16:$in),
4875            (FMOVHi (fpimm16XForm bf16:$in))>;
4876}
4877
4878//===----------------------------------------------------------------------===//
4879// Advanced SIMD two vector instructions.
4880//===----------------------------------------------------------------------===//
4881
4882defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
4883                                          AArch64uabd>;
4884// Match UABDL in log2-shuffle patterns.
4885def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
4886                           (zext (v8i8 V64:$opB))))),
4887          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
4888def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
4889                           (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
4890          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
4891def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
4892                           (zext (v4i16 V64:$opB))))),
4893          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
4894def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
4895                           (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
4896          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
4897def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
4898                           (zext (v2i32 V64:$opB))))),
4899          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
4900def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
4901                           (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
4902          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
4903
4904defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
4905defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
4906defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
4907defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
4908defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
4909defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
4910defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
4911defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
4912defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
4913defm FABS   : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
4914
4915def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
4916          (CMLTv8i8rz V64:$Rn)>;
4917def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
4918          (CMLTv4i16rz V64:$Rn)>;
4919def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
4920          (CMLTv2i32rz V64:$Rn)>;
4921def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
4922          (CMLTv16i8rz V128:$Rn)>;
4923def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
4924          (CMLTv8i16rz V128:$Rn)>;
4925def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
4926          (CMLTv4i32rz V128:$Rn)>;
4927def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
4928          (CMLTv2i64rz V128:$Rn)>;
4929
4930defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
4931defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
4932defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
4933defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
4934defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
4935defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
4936defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
4937defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
4938def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
4939          (FCVTLv4i16 V64:$Rn)>;
4940def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
4941                                                                (i64 4)))),
4942          (FCVTLv8i16 V128:$Rn)>;
4943def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
4944          (FCVTLv2i32 V64:$Rn)>;
4945def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
4946          (FCVTLv4i32 V128:$Rn)>;
4947def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
4948          (FCVTLv4i16 V64:$Rn)>;
4949def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
4950          (FCVTLv8i16 V128:$Rn)>;
4951
4952defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
4953defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
4954defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
4955defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
4956defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
4957def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
4958          (FCVTNv4i16 V128:$Rn)>;
4959def : Pat<(concat_vectors V64:$Rd,
4960                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
4961          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4962def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
4963          (FCVTNv2i32 V128:$Rn)>;
4964def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
4965          (FCVTNv4i16 V128:$Rn)>;
4966def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
4967          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4968def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
4969          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
4970defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
4971defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
4972defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
4973                                        int_aarch64_neon_fcvtxn>;
4974defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
4975defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
4976
4977// AArch64's FCVT instructions saturate when out of range.
4978multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
4979  let Predicates = [HasFullFP16] in {
4980  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
4981            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
4982  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
4983            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
4984  }
4985  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
4986            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
4987  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
4988            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
4989  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
4990            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
4991}
4992defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
4993defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
4994
4995def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
4996def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
4997def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
4998def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
4999def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
5000
5001def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
5002def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
5003def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
5004def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
5005def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
5006
5007defm FNEG   : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
5008defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
5009defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
5010defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
5011defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
5012defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
5013defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
5014defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
5015defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
5016
5017let Predicates = [HasFRInt3264] in {
5018  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
5019  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
5020  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
5021  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
5022} // HasFRInt3264
5023
5024defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
5025defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
5026defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
5027                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5028defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
5029// Aliases for MVN -> NOT.
5030def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
5031                (NOTv8i8 V64:$Vd, V64:$Vn)>;
5032def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
5033                (NOTv16i8 V128:$Vd, V128:$Vn)>;
5034
5035def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5036def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5037def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5038def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5039def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5040def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5041
5042defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
5043defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
5044defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
5045defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
5046defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
5047       BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
5048defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
5049defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
5050defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
5051defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5052defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5053defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
5054defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
5055defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
5056defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
5057       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
5058defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
5059defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
5060defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
5061defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
5062defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
5063defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
5064defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
5065
5066def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5067def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5068def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5069def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5070def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5071def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5072def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5073def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5074def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
5075def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
5076
5077// Patterns for vector long shift (by element width). These need to match all
5078// three of zext, sext and anyext so it's easier to pull the patterns out of the
5079// definition.
5080multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
5081  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
5082            (SHLLv8i8 V64:$Rn)>;
5083  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
5084            (SHLLv16i8 V128:$Rn)>;
5085  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
5086            (SHLLv4i16 V64:$Rn)>;
5087  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
5088            (SHLLv8i16 V128:$Rn)>;
5089  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
5090            (SHLLv2i32 V64:$Rn)>;
5091  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
5092            (SHLLv4i32 V128:$Rn)>;
5093}
5094
5095defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
5096defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
5097defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
5098
5099// Constant vector values, used in the S/UQXTN patterns below.
5100def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
5101def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
5102def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
5103def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
5104def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
5105def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
5106
5107// trunc(umin(X, 255)) -> UQXTRN v8i8
5108def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5109          (UQXTNv8i8 V128:$Vn)>;
5110// trunc(umin(X, 65535)) -> UQXTRN v4i16
5111def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5112          (UQXTNv4i16 V128:$Vn)>;
5113// trunc(smin(smax(X, -128), 128)) -> SQXTRN
5114//  with reversed min/max
5115def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5116                             (v8i16 VImm7F)))),
5117          (SQXTNv8i8 V128:$Vn)>;
5118def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5119                             (v8i16 VImm80)))),
5120          (SQXTNv8i8 V128:$Vn)>;
5121// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5122//  with reversed min/max
5123def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5124                              (v4i32 VImm7FFF)))),
5125          (SQXTNv4i16 V128:$Vn)>;
5126def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5127                              (v4i32 VImm8000)))),
5128          (SQXTNv4i16 V128:$Vn)>;
5129
5130// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5131// with reversed min/max
5132def : Pat<(v16i8 (concat_vectors
5133                 (v8i8 V64:$Vd),
5134                 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5135                                          (v8i16 VImm7F)))))),
5136          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5137def : Pat<(v16i8 (concat_vectors
5138                 (v8i8 V64:$Vd),
5139                 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5140                                          (v8i16 VImm80)))))),
5141          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5142
5143// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5144// with reversed min/max
5145def : Pat<(v8i16 (concat_vectors
5146                 (v4i16 V64:$Vd),
5147                 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5148                                           (v4i32 VImm7FFF)))))),
5149          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5150def : Pat<(v8i16 (concat_vectors
5151                 (v4i16 V64:$Vd),
5152                 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5153                                           (v4i32 VImm8000)))))),
5154          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5155
5156//===----------------------------------------------------------------------===//
5157// Advanced SIMD three vector instructions.
5158//===----------------------------------------------------------------------===//
5159
5160defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
5161defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
5162defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
5163defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
5164defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
5165defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
5166defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
5167defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
5168foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
5169def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
5170}
5171defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
5172let Predicates = [HasNEON] in {
5173foreach VT = [ v2f32, v4f32, v2f64 ] in
5174def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5175}
5176let Predicates = [HasNEON, HasFullFP16] in {
5177foreach VT = [ v4f16, v8f16 ] in
5178def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5179}
5180defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
5181defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
5182defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
5183defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
5184defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5185defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5186defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5187defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
5188defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
5189defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
5190defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
5191defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
5192defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
5193defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
5194defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
5195defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
5196
5197// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
5198// instruction expects the addend first, while the fma intrinsic puts it last.
5199defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
5200            TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
5201defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
5202            TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
5203
5204defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
5205defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
5206defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
5207defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
5208defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
5209
5210// MLA and MLS are generated in MachineCombine
5211defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
5212defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
5213
5214defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
5215defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
5216defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
5217      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
5218defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
5219defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
5220defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
5221defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
5222defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
5223defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
5224defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5225defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5226defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
5227defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
5228defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
5229defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5230defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5231defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
5232defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
5233defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
5234defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5235defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5236      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
5237defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5238defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5239defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5240defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5241defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5242defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5243defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5244defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5245defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5246defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5247defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5248defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5249defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5250defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5251defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5252                                                  int_aarch64_neon_sqrdmlah>;
5253defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5254                                                    int_aarch64_neon_sqrdmlsh>;
5255
5256// Extra saturate patterns, other than the intrinsics matches above
5257defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5258defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5259defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5260defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5261
5262defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5263defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5264                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5265defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5266defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5267                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5268defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5269
5270// Pseudo bitwise select pattern BSP.
5271// It is expanded into BSL/BIT/BIF after register allocation.
5272defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5273                                                      (and (vnot node:$LHS), node:$RHS))>>;
5274defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5275defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit", AArch64bit>;
5276defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5277
5278def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5279          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5280def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5281          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5282def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5283          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5284def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5285          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5286
5287def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5288          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5289def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5290          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5291def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5292          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5293def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5294          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5295
5296def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5297                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5298def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5299                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5300def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5301                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5302def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5303                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5304
5305def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5306                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5307def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5308                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5309def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5310                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5311def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5312                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5313
5314def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5315                "|cmls.8b\t$dst, $src1, $src2}",
5316                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5317def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5318                "|cmls.16b\t$dst, $src1, $src2}",
5319                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5320def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5321                "|cmls.4h\t$dst, $src1, $src2}",
5322                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5323def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5324                "|cmls.8h\t$dst, $src1, $src2}",
5325                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5326def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5327                "|cmls.2s\t$dst, $src1, $src2}",
5328                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5329def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5330                "|cmls.4s\t$dst, $src1, $src2}",
5331                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5332def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5333                "|cmls.2d\t$dst, $src1, $src2}",
5334                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5335
5336def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5337                "|cmlo.8b\t$dst, $src1, $src2}",
5338                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5339def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5340                "|cmlo.16b\t$dst, $src1, $src2}",
5341                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5342def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5343                "|cmlo.4h\t$dst, $src1, $src2}",
5344                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5345def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
5346                "|cmlo.8h\t$dst, $src1, $src2}",
5347                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5348def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
5349                "|cmlo.2s\t$dst, $src1, $src2}",
5350                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5351def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
5352                "|cmlo.4s\t$dst, $src1, $src2}",
5353                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5354def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
5355                "|cmlo.2d\t$dst, $src1, $src2}",
5356                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5357
5358def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
5359                "|cmle.8b\t$dst, $src1, $src2}",
5360                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5361def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
5362                "|cmle.16b\t$dst, $src1, $src2}",
5363                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5364def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
5365                "|cmle.4h\t$dst, $src1, $src2}",
5366                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5367def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
5368                "|cmle.8h\t$dst, $src1, $src2}",
5369                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5370def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
5371                "|cmle.2s\t$dst, $src1, $src2}",
5372                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5373def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
5374                "|cmle.4s\t$dst, $src1, $src2}",
5375                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5376def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
5377                "|cmle.2d\t$dst, $src1, $src2}",
5378                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5379
5380def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
5381                "|cmlt.8b\t$dst, $src1, $src2}",
5382                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5383def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
5384                "|cmlt.16b\t$dst, $src1, $src2}",
5385                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5386def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
5387                "|cmlt.4h\t$dst, $src1, $src2}",
5388                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5389def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
5390                "|cmlt.8h\t$dst, $src1, $src2}",
5391                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5392def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
5393                "|cmlt.2s\t$dst, $src1, $src2}",
5394                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5395def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
5396                "|cmlt.4s\t$dst, $src1, $src2}",
5397                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5398def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
5399                "|cmlt.2d\t$dst, $src1, $src2}",
5400                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5401
5402let Predicates = [HasNEON, HasFullFP16] in {
5403def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
5404                "|fcmle.4h\t$dst, $src1, $src2}",
5405                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5406def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
5407                "|fcmle.8h\t$dst, $src1, $src2}",
5408                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5409}
5410def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
5411                "|fcmle.2s\t$dst, $src1, $src2}",
5412                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5413def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
5414                "|fcmle.4s\t$dst, $src1, $src2}",
5415                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5416def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
5417                "|fcmle.2d\t$dst, $src1, $src2}",
5418                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5419
5420let Predicates = [HasNEON, HasFullFP16] in {
5421def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
5422                "|fcmlt.4h\t$dst, $src1, $src2}",
5423                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5424def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
5425                "|fcmlt.8h\t$dst, $src1, $src2}",
5426                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5427}
5428def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
5429                "|fcmlt.2s\t$dst, $src1, $src2}",
5430                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5431def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
5432                "|fcmlt.4s\t$dst, $src1, $src2}",
5433                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5434def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
5435                "|fcmlt.2d\t$dst, $src1, $src2}",
5436                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5437
5438let Predicates = [HasNEON, HasFullFP16] in {
5439def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
5440                "|facle.4h\t$dst, $src1, $src2}",
5441                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5442def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
5443                "|facle.8h\t$dst, $src1, $src2}",
5444                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5445}
5446def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
5447                "|facle.2s\t$dst, $src1, $src2}",
5448                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5449def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
5450                "|facle.4s\t$dst, $src1, $src2}",
5451                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5452def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
5453                "|facle.2d\t$dst, $src1, $src2}",
5454                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5455
5456let Predicates = [HasNEON, HasFullFP16] in {
5457def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
5458                "|faclt.4h\t$dst, $src1, $src2}",
5459                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5460def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
5461                "|faclt.8h\t$dst, $src1, $src2}",
5462                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5463}
5464def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
5465                "|faclt.2s\t$dst, $src1, $src2}",
5466                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5467def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
5468                "|faclt.4s\t$dst, $src1, $src2}",
5469                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5470def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
5471                "|faclt.2d\t$dst, $src1, $src2}",
5472                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5473
5474//===----------------------------------------------------------------------===//
5475// Advanced SIMD three scalar instructions.
5476//===----------------------------------------------------------------------===//
5477
5478defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
5479defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5480defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
5481defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
5482defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5483defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5484defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
5485defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5486def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5487          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
5488let Predicates = [HasNEON, HasFullFP16] in {
5489def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
5490}
5491let Predicates = [HasNEON] in {
5492def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
5493def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
5494}
5495defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5496                                     int_aarch64_neon_facge>;
5497defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5498                                     int_aarch64_neon_facgt>;
5499defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5500defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5501defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5502defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONorSME>;
5503defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONorSME>;
5504defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONorSME>;
5505defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
5506defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
5507defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5508defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
5509defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
5510defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
5511defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
5512defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
5513defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
5514defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5515defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5516defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5517defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5518defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5519defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5520let Predicates = [HasRDM] in {
5521  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5522  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5523  def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5524                                            (i32 FPR32:$Rm))),
5525            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5526  def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5527                                            (i32 FPR32:$Rm))),
5528            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5529}
5530
5531defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
5532                                          int_aarch64_neon_fmulx,
5533                                          [HasNEONorSME]>;
5534
5535def : InstAlias<"cmls $dst, $src1, $src2",
5536                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5537def : InstAlias<"cmle $dst, $src1, $src2",
5538                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5539def : InstAlias<"cmlo $dst, $src1, $src2",
5540                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5541def : InstAlias<"cmlt $dst, $src1, $src2",
5542                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5543def : InstAlias<"fcmle $dst, $src1, $src2",
5544                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5545def : InstAlias<"fcmle $dst, $src1, $src2",
5546                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5547def : InstAlias<"fcmlt $dst, $src1, $src2",
5548                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5549def : InstAlias<"fcmlt $dst, $src1, $src2",
5550                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5551def : InstAlias<"facle $dst, $src1, $src2",
5552                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5553def : InstAlias<"facle $dst, $src1, $src2",
5554                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5555def : InstAlias<"faclt $dst, $src1, $src2",
5556                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5557def : InstAlias<"faclt $dst, $src1, $src2",
5558                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5559
5560//===----------------------------------------------------------------------===//
5561// Advanced SIMD three scalar instructions (mixed operands).
5562//===----------------------------------------------------------------------===//
5563defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
5564                                       int_aarch64_neon_sqdmulls_scalar>;
5565defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
5566defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
5567
5568def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
5569                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5570                                                        (i32 FPR32:$Rm))))),
5571          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5572def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
5573                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
5574                                                        (i32 FPR32:$Rm))))),
5575          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5576
5577//===----------------------------------------------------------------------===//
5578// Advanced SIMD two scalar instructions.
5579//===----------------------------------------------------------------------===//
5580
5581defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs, [HasNoCSSC]>;
5582defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
5583defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
5584defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
5585defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
5586defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
5587defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5588defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5589defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5590defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5591defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5592defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
5593defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
5594defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
5595defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
5596defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
5597defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
5598defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
5599defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
5600def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
5601defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
5602defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
5603defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe", HasNEONorSME>;
5604defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx", HasNEONorSME>;
5605defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte", HasNEONorSME>;
5606defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
5607                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5608defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
5609defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5610defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5611defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
5612defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
5613defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
5614                                     int_aarch64_neon_suqadd>;
5615defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
5616defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
5617defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
5618                                    int_aarch64_neon_usqadd>;
5619
5620def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
5621          (CMLTv1i64rz V64:$Rn)>;
5622
5623def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
5624          (FCVTASv1i64 FPR64:$Rn)>;
5625def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
5626          (FCVTAUv1i64 FPR64:$Rn)>;
5627def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
5628          (FCVTMSv1i64 FPR64:$Rn)>;
5629def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
5630          (FCVTMUv1i64 FPR64:$Rn)>;
5631def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
5632          (FCVTNSv1i64 FPR64:$Rn)>;
5633def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
5634          (FCVTNUv1i64 FPR64:$Rn)>;
5635def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
5636          (FCVTPSv1i64 FPR64:$Rn)>;
5637def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
5638          (FCVTPUv1i64 FPR64:$Rn)>;
5639def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
5640          (FCVTZSv1i64 FPR64:$Rn)>;
5641def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
5642          (FCVTZUv1i64 FPR64:$Rn)>;
5643
5644def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
5645          (FRECPEv1f16 FPR16:$Rn)>;
5646def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
5647          (FRECPEv1i32 FPR32:$Rn)>;
5648def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
5649          (FRECPEv1i64 FPR64:$Rn)>;
5650def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
5651          (FRECPEv1i64 FPR64:$Rn)>;
5652
5653def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
5654          (FRECPEv1i32 FPR32:$Rn)>;
5655def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
5656          (FRECPEv2f32 V64:$Rn)>;
5657def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
5658          (FRECPEv4f32 FPR128:$Rn)>;
5659def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
5660          (FRECPEv1i64 FPR64:$Rn)>;
5661def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
5662          (FRECPEv1i64 FPR64:$Rn)>;
5663def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
5664          (FRECPEv2f64 FPR128:$Rn)>;
5665
5666def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5667          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
5668def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5669          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
5670def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5671          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5672def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5673          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
5674def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5675          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5676
5677def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
5678          (FRECPXv1f16 FPR16:$Rn)>;
5679def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
5680          (FRECPXv1i32 FPR32:$Rn)>;
5681def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
5682          (FRECPXv1i64 FPR64:$Rn)>;
5683
5684def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
5685          (FRSQRTEv1f16 FPR16:$Rn)>;
5686def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
5687          (FRSQRTEv1i32 FPR32:$Rn)>;
5688def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
5689          (FRSQRTEv1i64 FPR64:$Rn)>;
5690def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
5691          (FRSQRTEv1i64 FPR64:$Rn)>;
5692
5693def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
5694          (FRSQRTEv1i32 FPR32:$Rn)>;
5695def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
5696          (FRSQRTEv2f32 V64:$Rn)>;
5697def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
5698          (FRSQRTEv4f32 FPR128:$Rn)>;
5699def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
5700          (FRSQRTEv1i64 FPR64:$Rn)>;
5701def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
5702          (FRSQRTEv1i64 FPR64:$Rn)>;
5703def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
5704          (FRSQRTEv2f64 FPR128:$Rn)>;
5705
5706def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
5707          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
5708def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
5709          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
5710def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
5711          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
5712def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
5713          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
5714def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
5715          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
5716
5717// Some float -> int -> float conversion patterns for which we want to keep the
5718// int values in FP registers using the corresponding NEON instructions to
5719// avoid more costly int <-> fp register transfers.
5720let Predicates = [HasNEON] in {
5721def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
5722          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
5723def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
5724          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
5725def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
5726          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
5727def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
5728          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
5729
5730let Predicates = [HasFullFP16] in {
5731def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
5732          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
5733def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
5734          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
5735}
5736// If an integer is about to be converted to a floating point value,
5737// just load it on the floating point unit.
5738// Here are the patterns for 8 and 16-bits to float.
5739// 8-bits -> float.
5740multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
5741                             SDPatternOperator loadop, Instruction UCVTF,
5742                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
5743                             SubRegIndex sub> {
5744  def : Pat<(DstTy (uint_to_fp (SrcTy
5745                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
5746                                      ro.Wext:$extend))))),
5747           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5748                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
5749                                 sub))>;
5750
5751  def : Pat<(DstTy (uint_to_fp (SrcTy
5752                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
5753                                      ro.Wext:$extend))))),
5754           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
5755                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
5756                                 sub))>;
5757}
5758
5759defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
5760                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
5761def : Pat <(f32 (uint_to_fp (i32
5762               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5763           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5764                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5765def : Pat <(f32 (uint_to_fp (i32
5766                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5767           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5768                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5769// 16-bits -> float.
5770defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
5771                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
5772def : Pat <(f32 (uint_to_fp (i32
5773                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5774           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5775                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5776def : Pat <(f32 (uint_to_fp (i32
5777                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5778           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
5779                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5780// 32-bits are handled in target specific dag combine:
5781// performIntToFpCombine.
5782// 64-bits integer to 32-bits floating point, not possible with
5783// UCVTF on floating point registers (both source and destination
5784// must have the same size).
5785
5786// Here are the patterns for 8, 16, 32, and 64-bits to double.
5787// 8-bits -> double.
5788defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
5789                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
5790def : Pat <(f64 (uint_to_fp (i32
5791                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
5792           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5793                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
5794def : Pat <(f64 (uint_to_fp (i32
5795                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
5796           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5797                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
5798// 16-bits -> double.
5799defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
5800                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
5801def : Pat <(f64 (uint_to_fp (i32
5802                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
5803           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5804                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
5805def : Pat <(f64 (uint_to_fp (i32
5806                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
5807           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5808                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
5809// 32-bits -> double.
5810defm : UIntToFPROLoadPat<f64, i32, load,
5811                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
5812def : Pat <(f64 (uint_to_fp (i32
5813                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
5814           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5815                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
5816def : Pat <(f64 (uint_to_fp (i32
5817                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
5818           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
5819                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
5820// 64-bits -> double are handled in target specific dag combine:
5821// performIntToFpCombine.
5822} // let Predicates = [HasNEON]
5823
5824//===----------------------------------------------------------------------===//
5825// Advanced SIMD three different-sized vector instructions.
5826//===----------------------------------------------------------------------===//
5827
5828defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
5829defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
5830defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
5831defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
5832defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
5833defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
5834                                             AArch64sabd>;
5835defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
5836                                          AArch64sabd>;
5837defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
5838            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
5839defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
5840                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
5841defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
5842    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5843defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
5844    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
5845defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
5846defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
5847                                               int_aarch64_neon_sqadd>;
5848defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
5849                                               int_aarch64_neon_sqsub>;
5850defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
5851                                     int_aarch64_neon_sqdmull>;
5852defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
5853                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
5854defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
5855                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
5856defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
5857                                              AArch64uabd>;
5858defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
5859                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
5860defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
5861                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
5862defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
5863    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5864defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
5865    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
5866defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
5867defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
5868                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
5869defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
5870                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
5871
5872// Additional patterns for [SU]ML[AS]L
5873multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
5874  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
5875  def : Pat<(v4i16 (opnode
5876                    V64:$Ra,
5877                    (v4i16 (extract_subvector
5878                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
5879                            (i64 0))))),
5880             (EXTRACT_SUBREG (v8i16 (INST8B
5881                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
5882                                     V64:$Rn, V64:$Rm)), dsub)>;
5883  def : Pat<(v2i32 (opnode
5884                    V64:$Ra,
5885                    (v2i32 (extract_subvector
5886                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
5887                            (i64 0))))),
5888             (EXTRACT_SUBREG (v4i32 (INST4H
5889                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
5890                                     V64:$Rn, V64:$Rm)), dsub)>;
5891  def : Pat<(v1i64 (opnode
5892                    V64:$Ra,
5893                    (v1i64 (extract_subvector
5894                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
5895                            (i64 0))))),
5896             (EXTRACT_SUBREG (v2i64 (INST2S
5897                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
5898                                     V64:$Rn, V64:$Rm)), dsub)>;
5899}
5900
5901defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
5902     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
5903defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
5904     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
5905defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
5906     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
5907defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
5908     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
5909
5910
5911multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
5912  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
5913                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5914            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
5915  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
5916                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5917            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
5918  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
5919                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5920            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
5921
5922  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
5923                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
5924            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5925  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
5926                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
5927            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5928  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
5929                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
5930            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
5931}
5932
5933defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
5934defm : Neon_addl_extract_patterns<add, sext, "SADD">;
5935defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
5936defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
5937
5938// CodeGen patterns for addhn and subhn instructions, which can actually be
5939// written in LLVM IR without too much difficulty.
5940
5941// Prioritize ADDHN and SUBHN over UZP2.
5942let AddedComplexity = 10 in {
5943
5944// ADDHN
5945def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
5946          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5947def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5948                                           (i32 16))))),
5949          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5950def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5951                                           (i32 32))))),
5952          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5953def : Pat<(concat_vectors (v8i8 V64:$Rd),
5954                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5955                                                    (i32 8))))),
5956          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5957                            V128:$Rn, V128:$Rm)>;
5958def : Pat<(concat_vectors (v4i16 V64:$Rd),
5959                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5960                                                    (i32 16))))),
5961          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5962                            V128:$Rn, V128:$Rm)>;
5963def : Pat<(concat_vectors (v2i32 V64:$Rd),
5964                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
5965                                                    (i32 32))))),
5966          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5967                            V128:$Rn, V128:$Rm)>;
5968
5969// SUBHN
5970def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
5971          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
5972def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5973                                           (i32 16))))),
5974          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
5975def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5976                                           (i32 32))))),
5977          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
5978def : Pat<(concat_vectors (v8i8 V64:$Rd),
5979                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5980                                                    (i32 8))))),
5981          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5982                            V128:$Rn, V128:$Rm)>;
5983def : Pat<(concat_vectors (v4i16 V64:$Rd),
5984                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5985                                                    (i32 16))))),
5986          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5987                            V128:$Rn, V128:$Rm)>;
5988def : Pat<(concat_vectors (v2i32 V64:$Rd),
5989                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
5990                                                    (i32 32))))),
5991          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
5992                            V128:$Rn, V128:$Rm)>;
5993
5994} // AddedComplexity = 10
5995
5996//----------------------------------------------------------------------------
5997// AdvSIMD bitwise extract from vector instruction.
5998//----------------------------------------------------------------------------
5999
6000defm EXT : SIMDBitwiseExtract<"ext">;
6001
6002def AdjustExtImm : SDNodeXForm<imm, [{
6003  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6004}]>;
6005multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
6006  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
6007            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
6008  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
6009            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
6010  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6011  // 128-bit vector.
6012  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
6013            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
6014  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6015  // single 128-bit EXT.
6016  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
6017                              (extract_subvector V128:$Rn, (i64 N)),
6018                              (i32 imm:$imm))),
6019            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
6020  // A 64-bit EXT of the high half of a 128-bit register can be done using a
6021  // 128-bit EXT of the whole register with an adjustment to the immediate. The
6022  // top half of the other operand will be unset, but that doesn't matter as it
6023  // will not be used.
6024  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
6025                              V64:$Rm,
6026                              (i32 imm:$imm))),
6027            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
6028                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6029                                      (AdjustExtImm imm:$imm)), dsub)>;
6030}
6031
6032defm : ExtPat<v8i8, v16i8, 8>;
6033defm : ExtPat<v4i16, v8i16, 4>;
6034defm : ExtPat<v4f16, v8f16, 4>;
6035defm : ExtPat<v4bf16, v8bf16, 4>;
6036defm : ExtPat<v2i32, v4i32, 2>;
6037defm : ExtPat<v2f32, v4f32, 2>;
6038defm : ExtPat<v1i64, v2i64, 1>;
6039defm : ExtPat<v1f64, v2f64, 1>;
6040
6041//----------------------------------------------------------------------------
6042// AdvSIMD zip vector
6043//----------------------------------------------------------------------------
6044
6045defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
6046defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
6047defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
6048defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
6049defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
6050defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
6051
6052def : Pat<(v16i8 (concat_vectors (v8i8 (trunc (v8i16 V128:$Vn))),
6053                                 (v8i8 (trunc (v8i16 V128:$Vm))))),
6054          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
6055def : Pat<(v8i16 (concat_vectors (v4i16 (trunc (v4i32 V128:$Vn))),
6056                                 (v4i16 (trunc (v4i32 V128:$Vm))))),
6057          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
6058def : Pat<(v4i32 (concat_vectors (v2i32 (trunc (v2i64 V128:$Vn))),
6059                                 (v2i32 (trunc (v2i64 V128:$Vm))))),
6060          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
6061// These are the same as above, with an optional assertzext node that can be
6062// generated from fptoi lowering.
6063def : Pat<(v16i8 (concat_vectors (v8i8 (assertzext (trunc (v8i16 V128:$Vn)))),
6064                                 (v8i8 (assertzext (trunc (v8i16 V128:$Vm)))))),
6065          (UZP1v16i8 V128:$Vn, V128:$Vm)>;
6066def : Pat<(v8i16 (concat_vectors (v4i16 (assertzext (trunc (v4i32 V128:$Vn)))),
6067                                 (v4i16 (assertzext (trunc (v4i32 V128:$Vm)))))),
6068          (UZP1v8i16 V128:$Vn, V128:$Vm)>;
6069def : Pat<(v4i32 (concat_vectors (v2i32 (assertzext (trunc (v2i64 V128:$Vn)))),
6070                                 (v2i32 (assertzext (trunc (v2i64 V128:$Vm)))))),
6071          (UZP1v4i32 V128:$Vn, V128:$Vm)>;
6072
6073def : Pat<(v16i8 (concat_vectors
6074                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
6075                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
6076          (UZP2v16i8 V128:$Vn, V128:$Vm)>;
6077def : Pat<(v8i16 (concat_vectors
6078                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
6079                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
6080          (UZP2v8i16 V128:$Vn, V128:$Vm)>;
6081def : Pat<(v4i32 (concat_vectors
6082                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
6083                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
6084          (UZP2v4i32 V128:$Vn, V128:$Vm)>;
6085
6086//----------------------------------------------------------------------------
6087// AdvSIMD TBL/TBX instructions
6088//----------------------------------------------------------------------------
6089
6090defm TBL : SIMDTableLookup<    0, "tbl">;
6091defm TBX : SIMDTableLookupTied<1, "tbx">;
6092
6093def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6094          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
6095def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6096          (TBLv16i8One V128:$Ri, V128:$Rn)>;
6097
6098def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
6099                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6100          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
6101def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
6102                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6103          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
6104
6105//----------------------------------------------------------------------------
6106// AdvSIMD LUT instructions
6107//----------------------------------------------------------------------------
6108let Predicates = [HasLUT] in {
6109  defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
6110  defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
6111}
6112
6113//----------------------------------------------------------------------------
6114// AdvSIMD scalar DUP instruction
6115//----------------------------------------------------------------------------
6116
6117defm DUP : SIMDScalarDUP<"mov">;
6118
6119//----------------------------------------------------------------------------
6120// AdvSIMD scalar pairwise instructions
6121//----------------------------------------------------------------------------
6122
6123defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
6124defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
6125defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
6126defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
6127defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
6128defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
6129
6130// Only the lower half of the result of the inner FADDP is used in the patterns
6131// below, so the second operand does not matter. Re-use the first input
6132// operand, so no additional dependencies need to be introduced.
6133let Predicates = [HasFullFP16] in {
6134def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
6135            (FADDPv2i16p
6136              (EXTRACT_SUBREG
6137                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
6138               dsub))>;
6139def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
6140          (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
6141}
6142def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
6143          (FADDPv2i32p
6144            (EXTRACT_SUBREG
6145              (FADDPv4f32 V128:$Rn, V128:$Rn),
6146             dsub))>;
6147def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
6148          (FADDPv2i32p V64:$Rn)>;
6149def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
6150          (FADDPv2i64p V128:$Rn)>;
6151
6152def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
6153          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6154def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
6155          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6156def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
6157          (FADDPv2i32p V64:$Rn)>;
6158def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
6159          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
6160def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
6161          (FADDPv2i64p V128:$Rn)>;
6162def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
6163          (FMAXNMPv2i32p V64:$Rn)>;
6164def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
6165          (FMAXNMPv2i64p V128:$Rn)>;
6166def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
6167          (FMAXPv2i32p V64:$Rn)>;
6168def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
6169          (FMAXPv2i64p V128:$Rn)>;
6170def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
6171          (FMINNMPv2i32p V64:$Rn)>;
6172def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
6173          (FMINNMPv2i64p V128:$Rn)>;
6174def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
6175          (FMINPv2i32p V64:$Rn)>;
6176def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
6177          (FMINPv2i64p V128:$Rn)>;
6178
6179//----------------------------------------------------------------------------
6180// AdvSIMD INS/DUP instructions
6181//----------------------------------------------------------------------------
6182
6183def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
6184def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
6185def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
6186def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
6187def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
6188def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
6189def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
6190
6191def DUPv2i64lane : SIMDDup64FromElement;
6192def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
6193def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
6194def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
6195def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
6196def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
6197def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
6198
6199// DUP from a 64-bit register to a 64-bit register is just a copy
6200def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
6201          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
6202def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
6203          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
6204
6205def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
6206          (v2f32 (DUPv2i32lane
6207            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6208            (i64 0)))>;
6209def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
6210          (v4f32 (DUPv4i32lane
6211            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6212            (i64 0)))>;
6213def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
6214          (v2f64 (DUPv2i64lane
6215            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
6216            (i64 0)))>;
6217def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
6218          (v4f16 (DUPv4i16lane
6219            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6220            (i64 0)))>;
6221def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
6222          (v4bf16 (DUPv4i16lane
6223            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6224            (i64 0)))>;
6225def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
6226          (v8f16 (DUPv8i16lane
6227            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6228            (i64 0)))>;
6229def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
6230          (v8bf16 (DUPv8i16lane
6231            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6232            (i64 0)))>;
6233
6234def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6235          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6236def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6237          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6238
6239def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6240          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6241def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6242          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6243
6244def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6245          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
6246def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6247         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
6248def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
6249          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
6250
6251// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
6252// instruction even if the types don't match: we just have to remap the lane
6253// carefully. N.b. this trick only applies to truncations.
6254def VecIndex_x2 : SDNodeXForm<imm, [{
6255  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6256}]>;
6257def VecIndex_x4 : SDNodeXForm<imm, [{
6258  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6259}]>;
6260def VecIndex_x8 : SDNodeXForm<imm, [{
6261  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6262}]>;
6263
6264multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
6265                            ValueType Src128VT, ValueType ScalVT,
6266                            Instruction DUP, SDNodeXForm IdxXFORM> {
6267  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
6268                                                     imm:$idx)))),
6269            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6270
6271  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
6272                                                     imm:$idx)))),
6273            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6274}
6275
6276defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
6277defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
6278defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
6279
6280defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
6281defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
6282defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
6283
6284multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
6285                               SDNodeXForm IdxXFORM> {
6286  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
6287                                                         imm:$idx))))),
6288            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6289
6290  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
6291                                                       imm:$idx))))),
6292            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6293}
6294
6295defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
6296defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
6297defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
6298
6299defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
6300defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
6301defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
6302
6303// SMOV and UMOV definitions, with some extra patterns for convenience
6304defm SMOV : SMov;
6305defm UMOV : UMov;
6306
6307def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6308          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
6309def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6310          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6311def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6312          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6313def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6314          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6315def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6316          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6317def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
6318          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
6319
6320def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6321            VectorIndexB:$idx)))), i8),
6322          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6323def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6324            VectorIndexH:$idx)))), i16),
6325          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6326
6327// Extracting i8 or i16 elements will have the zero-extend transformed to
6328// an 'and' mask by type legalization since neither i8 nor i16 are legal types
6329// for AArch64. Match these patterns here since UMOV already zeroes out the high
6330// bits of the destination register.
6331def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
6332               (i32 0xff)),
6333          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
6334def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
6335               (i32 0xffff)),
6336          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
6337
6338def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6339            VectorIndexB:$idx)))), (i64 0xff))),
6340          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
6341def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6342            VectorIndexH:$idx)))), (i64 0xffff))),
6343          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
6344
6345defm INS : SIMDIns;
6346
6347def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6348          (SUBREG_TO_REG (i32 0),
6349                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6350def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6351          (SUBREG_TO_REG (i32 0),
6352                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6353
6354// The top bits will be zero from the FMOVWSr
6355def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
6356          (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
6357
6358def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6359          (SUBREG_TO_REG (i32 0),
6360                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6361def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6362          (SUBREG_TO_REG (i32 0),
6363                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6364
6365def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6366          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6367def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6368          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6369
6370def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6371          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6372def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6373          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6374
6375def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6376            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
6377                                  (i32 FPR32:$Rn), ssub))>;
6378def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6379            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6380                                  (i32 FPR32:$Rn), ssub))>;
6381
6382def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6383            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
6384                                  (i64 FPR64:$Rn), dsub))>;
6385
6386def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6387          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6388def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6389          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6390
6391def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6392          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6393def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6394          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6395
6396def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6397          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6398def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6399          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6400
6401def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6402          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
6403
6404def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
6405            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6406          (EXTRACT_SUBREG
6407            (INSvi16lane
6408              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6409              VectorIndexS:$imm,
6410              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6411              (i64 0)),
6412            dsub)>;
6413
6414def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6415          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
6416def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6417          (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
6418def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6419          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
6420def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6421          (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
6422def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
6423          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
6424
6425def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
6426            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6427          (INSvi16lane
6428            V128:$Rn, VectorIndexH:$imm,
6429            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6430            (i64 0))>;
6431
6432def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
6433            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6434          (EXTRACT_SUBREG
6435            (INSvi16lane
6436              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6437              VectorIndexS:$imm,
6438              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6439              (i64 0)),
6440            dsub)>;
6441
6442def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
6443            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6444          (INSvi16lane
6445            V128:$Rn, VectorIndexH:$imm,
6446            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6447            (i64 0))>;
6448
6449def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
6450            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6451          (EXTRACT_SUBREG
6452            (INSvi32lane
6453              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6454              VectorIndexS:$imm,
6455              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6456              (i64 0)),
6457            dsub)>;
6458def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
6459            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6460          (INSvi32lane
6461            V128:$Rn, VectorIndexS:$imm,
6462            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6463            (i64 0))>;
6464def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
6465            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
6466          (INSvi64lane
6467            V128:$Rn, VectorIndexD:$imm,
6468            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
6469            (i64 0))>;
6470
6471def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
6472          (EXTRACT_SUBREG
6473            (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6474                        VectorIndexS:$imm, GPR32:$Rm),
6475            dsub)>;
6476def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
6477          (EXTRACT_SUBREG
6478            (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6479                        VectorIndexH:$imm, GPR32:$Rm),
6480            dsub)>;
6481def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
6482          (EXTRACT_SUBREG
6483            (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6484                       VectorIndexB:$imm, GPR32:$Rm),
6485            dsub)>;
6486
6487// Copy an element at a constant index in one vector into a constant indexed
6488// element of another.
6489// FIXME refactor to a shared class/dev parameterized on vector type, vector
6490// index type and INS extension
6491def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
6492                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
6493                   VectorIndexB:$idx2)),
6494          (v16i8 (INSvi8lane
6495                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
6496          )>;
6497def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
6498                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
6499                   VectorIndexH:$idx2)),
6500          (v8i16 (INSvi16lane
6501                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
6502          )>;
6503def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
6504                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
6505                   VectorIndexS:$idx2)),
6506          (v4i32 (INSvi32lane
6507                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
6508          )>;
6509def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
6510                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
6511                   VectorIndexD:$idx2)),
6512          (v2i64 (INSvi64lane
6513                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
6514          )>;
6515
6516multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
6517                                ValueType VTScal, Instruction INS> {
6518  def : Pat<(VT128 (vector_insert V128:$src,
6519                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6520                        imm:$Immd)),
6521            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
6522
6523  def : Pat<(VT128 (vector_insert V128:$src,
6524                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6525                        imm:$Immd)),
6526            (INS V128:$src, imm:$Immd,
6527                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
6528
6529  def : Pat<(VT64 (vector_insert V64:$src,
6530                        (VTScal (vector_extract (VT128 V128:$Rn), imm:$Immn)),
6531                        imm:$Immd)),
6532            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
6533                                 imm:$Immd, V128:$Rn, imm:$Immn),
6534                            dsub)>;
6535
6536  def : Pat<(VT64 (vector_insert V64:$src,
6537                        (VTScal (vector_extract (VT64 V64:$Rn), imm:$Immn)),
6538                        imm:$Immd)),
6539            (EXTRACT_SUBREG
6540                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
6541                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
6542                dsub)>;
6543}
6544
6545defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
6546defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
6547defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
6548defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
6549
6550defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
6551defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
6552defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
6553defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
6554
6555// Insert from bitcast
6556// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
6557def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6558          (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
6559def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), imm:$Immd)),
6560          (EXTRACT_SUBREG
6561            (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
6562                         imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
6563            dsub)>;
6564def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), imm:$Immd)),
6565          (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
6566
6567// bitcast of an extract
6568// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
6569def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
6570          (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
6571def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
6572          (EXTRACT_SUBREG V128:$src, ssub)>;
6573def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
6574          (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
6575def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
6576          (EXTRACT_SUBREG V128:$src, dsub)>;
6577
6578// Floating point vector extractions are codegen'd as either a sequence of
6579// subregister extractions, or a MOV (aka DUP here) if
6580// the lane number is anything other than zero.
6581def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
6582          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
6583def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
6584          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
6585def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
6586          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6587def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
6588          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
6589
6590
6591def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
6592          (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
6593def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
6594          (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
6595def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
6596          (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6597def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
6598          (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
6599
6600// All concat_vectors operations are canonicalised to act on i64 vectors for
6601// AArch64. In the general case we need an instruction, which had just as well be
6602// INS.
6603class ConcatPat<ValueType DstTy, ValueType SrcTy>
6604  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
6605        (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
6606                     (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
6607
6608def : ConcatPat<v2i64, v1i64>;
6609def : ConcatPat<v2f64, v1f64>;
6610def : ConcatPat<v4i32, v2i32>;
6611def : ConcatPat<v4f32, v2f32>;
6612def : ConcatPat<v8i16, v4i16>;
6613def : ConcatPat<v8f16, v4f16>;
6614def : ConcatPat<v8bf16, v4bf16>;
6615def : ConcatPat<v16i8, v8i8>;
6616
6617// If the high lanes are undef, though, we can just ignore them:
6618class ConcatUndefPat<ValueType DstTy, ValueType SrcTy>
6619  : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
6620        (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
6621
6622def : ConcatUndefPat<v2i64, v1i64>;
6623def : ConcatUndefPat<v2f64, v1f64>;
6624def : ConcatUndefPat<v4i32, v2i32>;
6625def : ConcatUndefPat<v4f32, v2f32>;
6626def : ConcatUndefPat<v8i16, v4i16>;
6627def : ConcatUndefPat<v16i8, v8i8>;
6628
6629//----------------------------------------------------------------------------
6630// AdvSIMD across lanes instructions
6631//----------------------------------------------------------------------------
6632
6633defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
6634defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
6635defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
6636defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
6637defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
6638defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
6639defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
6640defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
6641defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
6642defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
6643defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
6644
6645multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
6646  // Patterns for addv(addlp(x)) ==> addlv
6647  def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
6648              (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
6649              (i64 0))), (i64 0))),
6650            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6651              (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
6652  def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
6653            (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6654              (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
6655  def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
6656            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
6657
6658  // Patterns for addp(addlp(x))) ==> addlv
6659  def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
6660            (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
6661  def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
6662            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
6663}
6664
6665defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
6666defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
6667
6668// Pattern is used for GlobalISel
6669multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
6670  // Patterns for addv(addlp(x)) ==> addlv
6671  def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
6672            (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
6673  def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
6674            (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
6675  def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
6676            (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;
6677
6678  // Patterns for addp(addlp(x))) ==> addlv
6679  def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
6680            (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
6681  def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
6682            (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
6683}
6684
6685defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
6686defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;
6687
6688// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
6689def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6690          (i64 (EXTRACT_SUBREG
6691            (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
6692            dsub))>;
6693
6694def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6695          (i32 (EXTRACT_SUBREG
6696            (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
6697            ssub))>;
6698
6699def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
6700          (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;
6701
6702def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
6703          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
6704
6705def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
6706          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
6707
6708multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> {
6709  def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))),
6710            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>;
6711
6712  def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))),
6713            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>;
6714
6715  def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))),
6716            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>;
6717
6718  def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))),
6719            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>;
6720
6721  def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))),
6722            (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>;
6723}
6724
6725defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>;
6726defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>;
6727
6728// Patterns for across-vector intrinsics, that have a node equivalent, that
6729// returns a vector (with only the low lane defined) instead of a scalar.
6730// In effect, opNode is the same as (scalar_to_vector (IntNode)).
6731multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
6732                                    SDPatternOperator opNode> {
6733// If a lane instruction caught the vector_extract around opNode, we can
6734// directly match the latter to the instruction.
6735def : Pat<(v8i8 (opNode V64:$Rn)),
6736          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6737           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
6738def : Pat<(v16i8 (opNode V128:$Rn)),
6739          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6740           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
6741def : Pat<(v4i16 (opNode V64:$Rn)),
6742          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6743           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
6744def : Pat<(v8i16 (opNode V128:$Rn)),
6745          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6746           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
6747def : Pat<(v4i32 (opNode V128:$Rn)),
6748          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6749           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
6750
6751
6752// If none did, fallback to the explicit patterns, consuming the vector_extract.
6753def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
6754            (i64 0)), (i64 0))),
6755          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
6756            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
6757            bsub), ssub)>;
6758def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
6759          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6760            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
6761            bsub), ssub)>;
6762def : Pat<(i32 (vector_extract (insert_subvector undef,
6763            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
6764          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
6765            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
6766            hsub), ssub)>;
6767def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
6768          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
6769            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
6770            hsub), ssub)>;
6771def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
6772          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6773            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
6774            ssub), ssub)>;
6775
6776}
6777
6778multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
6779                                          SDPatternOperator opNode>
6780    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6781// If there is a sign extension after this intrinsic, consume it as smov already
6782// performed it
6783def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6784            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
6785          (i32 (SMOVvi8to32
6786            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6787              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6788            (i64 0)))>;
6789def : Pat<(i32 (sext_inreg (i32 (vector_extract
6790            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
6791          (i32 (SMOVvi8to32
6792            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6793             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6794            (i64 0)))>;
6795def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
6796            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
6797          (i32 (SMOVvi16to32
6798           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6799            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6800           (i64 0)))>;
6801def : Pat<(i32 (sext_inreg (i32 (vector_extract
6802            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
6803          (i32 (SMOVvi16to32
6804            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6805             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6806            (i64 0)))>;
6807}
6808
6809multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
6810                                            SDPatternOperator opNode>
6811    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
6812// If there is a masking operation keeping only what has been actually
6813// generated, consume it.
6814def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6815            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
6816      (i32 (EXTRACT_SUBREG
6817        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6818          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
6819        ssub))>;
6820def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
6821            maski8_or_more)),
6822        (i32 (EXTRACT_SUBREG
6823          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6824            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
6825          ssub))>;
6826def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
6827            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
6828          (i32 (EXTRACT_SUBREG
6829            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6830              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
6831            ssub))>;
6832def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
6833            maski16_or_more)),
6834        (i32 (EXTRACT_SUBREG
6835          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6836            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
6837          ssub))>;
6838}
6839
6840// For vecreduce_add, used by GlobalISel not SDAG
6841def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
6842          (i8 (ADDVv8i8v V64:$Rn))>;
6843def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
6844          (i8 (ADDVv16i8v V128:$Rn))>;
6845def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
6846          (i16 (ADDVv4i16v V64:$Rn))>;
6847def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
6848          (i16 (ADDVv8i16v V128:$Rn))>;
6849def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
6850          (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6851def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
6852          (i32 (ADDVv4i32v V128:$Rn))>;
6853def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
6854          (i64 (ADDPv2i64p V128:$Rn))>;
6855
6856defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
6857// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6858def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
6859          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6860
6861defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
6862// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
6863def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
6864          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
6865
6866defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
6867def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
6868          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
6869
6870defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
6871def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
6872          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
6873
6874defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
6875def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
6876          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
6877
6878defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
6879def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
6880          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
6881
6882// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment
6883// because GlobalISel allows us to specify the return register to be a FPR
6884multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc,
6885                                               SDPatternOperator opNode> {
6886def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))),
6887          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>;
6888
6889def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))),
6890          (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>;
6891
6892def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
6893          (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>;
6894
6895def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
6896          (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
6897
6898def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
6899          (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
6900}
6901
6902// For v2i32 source type, the pairwise instruction can be used instead
6903defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
6904def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
6905          (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6906
6907defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
6908def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
6909          (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6910
6911defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
6912def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
6913          (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6914
6915defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
6916def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
6917          (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
6918
6919multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
6920  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6921        (i32 (SMOVvi16to32
6922          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6923            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6924          (i64 0)))>;
6925def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6926        (i32 (SMOVvi16to32
6927          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6928           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6929          (i64 0)))>;
6930
6931def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6932          (i32 (EXTRACT_SUBREG
6933           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6934            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6935           ssub))>;
6936def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6937        (i32 (EXTRACT_SUBREG
6938          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6939           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6940          ssub))>;
6941
6942def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6943        (i64 (EXTRACT_SUBREG
6944          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6945           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6946          dsub))>;
6947}
6948
6949multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
6950                                                Intrinsic intOp> {
6951  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
6952        (i32 (EXTRACT_SUBREG
6953          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6954            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
6955          ssub))>;
6956def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
6957        (i32 (EXTRACT_SUBREG
6958          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6959            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
6960          ssub))>;
6961
6962def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
6963          (i32 (EXTRACT_SUBREG
6964            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6965              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
6966            ssub))>;
6967def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
6968        (i32 (EXTRACT_SUBREG
6969          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6970            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
6971          ssub))>;
6972
6973def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
6974        (i64 (EXTRACT_SUBREG
6975          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6976            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
6977          dsub))>;
6978}
6979
6980defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
6981defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
6982
6983// The vaddlv_s32 intrinsic gets mapped to SADDLP.
6984def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
6985          (i64 (EXTRACT_SUBREG
6986            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6987              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
6988            dsub))>;
6989// The vaddlv_u32 intrinsic gets mapped to UADDLP.
6990def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
6991          (i64 (EXTRACT_SUBREG
6992            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
6993              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
6994            dsub))>;
6995
6996//------------------------------------------------------------------------------
6997// AdvSIMD modified immediate instructions
6998//------------------------------------------------------------------------------
6999
7000// AdvSIMD BIC
7001defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
7002// AdvSIMD ORR
7003defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
7004
7005def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7006def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7007def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7008def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7009
7010def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7011def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7012def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7013def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7014
7015def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7016def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7017def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7018def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7019
7020def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7021def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7022def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7023def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7024
7025// AdvSIMD FMOV
7026def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
7027                                              "fmov", ".2d",
7028                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7029def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
7030                                              "fmov", ".2s",
7031                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7032def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
7033                                              "fmov", ".4s",
7034                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7035let Predicates = [HasNEON, HasFullFP16] in {
7036def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
7037                                              "fmov", ".4h",
7038                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7039def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
7040                                              "fmov", ".8h",
7041                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7042} // Predicates = [HasNEON, HasFullFP16]
7043
7044// AdvSIMD MOVI
7045
7046// EDIT byte mask: scalar
7047let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7048def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
7049                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
7050// The movi_edit node has the immediate value already encoded, so we use
7051// a plain imm0_255 here.
7052def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
7053          (MOVID imm0_255:$shift)>;
7054
7055// EDIT byte mask: 2d
7056
7057// The movi_edit node has the immediate value already encoded, so we use
7058// a plain imm0_255 in the pattern
7059let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7060def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
7061                                                simdimmtype10,
7062                                                "movi", ".2d",
7063                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
7064
7065def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7066def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7067def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7068def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7069def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7070def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7071def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7072def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7073
7074def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7075def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7076def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7077def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7078
7079// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7080// extract is free and this gives better MachineCSE results.
7081def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7082def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7083def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7084def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7085def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
7086def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
7087def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
7088def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
7089
7090def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7091def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7092def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7093def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7094
7095// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7096let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7097defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
7098
7099let Predicates = [HasNEON] in {
7100  // Using the MOVI to materialize fp constants.
7101  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
7102            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
7103                                       (i32 24)),
7104                            ssub)>;
7105}
7106
7107def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7108def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7109def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7110def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7111
7112def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7113def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7114def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7115def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7116
7117def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7118          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
7119def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7120          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
7121def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7122          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
7123def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7124          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
7125
7126let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7127// EDIT per word: 2s & 4s with MSL shifter
7128def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
7129                      [(set (v2i32 V64:$Rd),
7130                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7131def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
7132                      [(set (v4i32 V128:$Rd),
7133                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7134
7135// Per byte: 8b & 16b
7136def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
7137                                                 "movi", ".8b",
7138                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
7139
7140def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
7141                                                 "movi", ".16b",
7142                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
7143}
7144
7145// AdvSIMD MVNI
7146
7147// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7148let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7149defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
7150
7151def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7152def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7153def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7154def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7155
7156def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7157def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7158def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7159def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7160
7161def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7162          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
7163def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7164          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
7165def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7166          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
7167def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7168          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
7169
7170// EDIT per word: 2s & 4s with MSL shifter
7171let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7172def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
7173                      [(set (v2i32 V64:$Rd),
7174                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7175def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
7176                      [(set (v4i32 V128:$Rd),
7177                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7178}
7179
7180//----------------------------------------------------------------------------
7181// AdvSIMD indexed element
7182//----------------------------------------------------------------------------
7183
7184let hasSideEffects = 0 in {
7185  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
7186  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
7187}
7188
7189// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
7190// instruction expects the addend first, while the intrinsic expects it last.
7191
7192// On the other hand, there are quite a few valid combinatorial options due to
7193// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7194defm : SIMDFPIndexedTiedPatterns<"FMLA",
7195           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
7196defm : SIMDFPIndexedTiedPatterns<"FMLA",
7197           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
7198
7199defm : SIMDFPIndexedTiedPatterns<"FMLS",
7200           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
7201defm : SIMDFPIndexedTiedPatterns<"FMLS",
7202           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
7203defm : SIMDFPIndexedTiedPatterns<"FMLS",
7204           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
7205defm : SIMDFPIndexedTiedPatterns<"FMLS",
7206           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
7207
7208multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
7209  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7210  // and DUP scalar.
7211  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7212                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7213                                           VectorIndexS:$idx))),
7214            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7215  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7216                           (v2f32 (AArch64duplane32
7217                                      (v4f32 (insert_subvector undef,
7218                                                 (v2f32 (fneg V64:$Rm)),
7219                                                 (i64 0))),
7220                                      VectorIndexS:$idx)))),
7221            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7222                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7223                               VectorIndexS:$idx)>;
7224  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7225                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7226            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7227                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7228
7229  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7230  // and DUP scalar.
7231  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7232                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7233                                           VectorIndexS:$idx))),
7234            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
7235                               VectorIndexS:$idx)>;
7236  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7237                           (v4f32 (AArch64duplane32
7238                                      (v4f32 (insert_subvector undef,
7239                                                 (v2f32 (fneg V64:$Rm)),
7240                                                 (i64 0))),
7241                                      VectorIndexS:$idx)))),
7242            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7243                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7244                               VectorIndexS:$idx)>;
7245  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7246                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7247            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7248                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7249
7250  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7251  // (DUPLANE from 64-bit would be trivial).
7252  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7253                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
7254                                           VectorIndexD:$idx))),
7255            (FMLSv2i64_indexed
7256                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7257  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7258                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
7259            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
7260                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
7261
7262  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7263  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7264                         (vector_extract (v4f32 (fneg V128:$Rm)),
7265                                         VectorIndexS:$idx))),
7266            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7267                V128:$Rm, VectorIndexS:$idx)>;
7268  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7269                         (vector_extract (v4f32 (insert_subvector undef,
7270                                                    (v2f32 (fneg V64:$Rm)),
7271                                                    (i64 0))),
7272                                         VectorIndexS:$idx))),
7273            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7274                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
7275
7276  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7277  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
7278                         (vector_extract (v2f64 (fneg V128:$Rm)),
7279                                         VectorIndexS:$idx))),
7280            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
7281                V128:$Rm, VectorIndexS:$idx)>;
7282}
7283
7284defm : FMLSIndexedAfterNegPatterns<
7285           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
7286defm : FMLSIndexedAfterNegPatterns<
7287           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
7288
7289defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
7290defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
7291
7292def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7293          (FMULv2i32_indexed V64:$Rn,
7294            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7295            (i64 0))>;
7296def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7297          (FMULv4i32_indexed V128:$Rn,
7298            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7299            (i64 0))>;
7300def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
7301          (FMULv2i64_indexed V128:$Rn,
7302            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
7303            (i64 0))>;
7304
7305defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
7306defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
7307
7308defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
7309                                     int_aarch64_neon_sqdmulh_laneq>;
7310defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
7311                                      int_aarch64_neon_sqrdmulh_laneq>;
7312
7313// Generated by MachineCombine
7314defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
7315defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
7316
7317defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
7318defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
7319    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7320defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
7321    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7322defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
7323defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
7324                                           int_aarch64_neon_sqadd>;
7325defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
7326                                           int_aarch64_neon_sqsub>;
7327defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
7328                                          int_aarch64_neon_sqrdmlah>;
7329defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
7330                                          int_aarch64_neon_sqrdmlsh>;
7331defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
7332defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
7333    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7334defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
7335    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7336defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
7337
7338// A scalar sqdmull with the second operand being a vector lane can be
7339// handled directly with the indexed instruction encoding.
7340def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
7341                                          (vector_extract (v4i32 V128:$Vm),
7342                                                           VectorIndexS:$idx)),
7343          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
7344
7345//----------------------------------------------------------------------------
7346// AdvSIMD scalar shift instructions
7347//----------------------------------------------------------------------------
7348defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
7349defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
7350defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
7351defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
7352// Codegen patterns for the above. We don't put these directly on the
7353// instructions because TableGen's type inference can't handle the truth.
7354// Having the same base pattern for fp <--> int totally freaks it out.
7355def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
7356          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
7357def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
7358          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
7359def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
7360          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7361def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
7362          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7363def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
7364                                            vecshiftR64:$imm)),
7365          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7366def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
7367                                            vecshiftR64:$imm)),
7368          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7369def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
7370          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7371def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7372          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7373def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
7374                                            vecshiftR64:$imm)),
7375          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7376def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7377          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7378def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
7379                                            vecshiftR64:$imm)),
7380          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7381def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7382          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7383
7384// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7385
7386def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7387          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7388def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7389          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7390def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7391          (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7392def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7393            (and FPR32:$Rn, (i32 65535)),
7394            vecshiftR16:$imm)),
7395          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7396def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7397          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7398def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7399          (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7400def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7401          (i32 (INSERT_SUBREG
7402            (i32 (IMPLICIT_DEF)),
7403            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
7404            hsub))>;
7405def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
7406          (i64 (INSERT_SUBREG
7407            (i64 (IMPLICIT_DEF)),
7408            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
7409            hsub))>;
7410def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
7411          (i32 (INSERT_SUBREG
7412            (i32 (IMPLICIT_DEF)),
7413            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
7414            hsub))>;
7415def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
7416          (i64 (INSERT_SUBREG
7417            (i64 (IMPLICIT_DEF)),
7418            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
7419            hsub))>;
7420def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7421          (i32 (INSERT_SUBREG
7422            (i32 (IMPLICIT_DEF)),
7423            (FACGE16 FPR16:$Rn, FPR16:$Rm),
7424            hsub))>;
7425def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7426          (i32 (INSERT_SUBREG
7427            (i32 (IMPLICIT_DEF)),
7428            (FACGT16 FPR16:$Rn, FPR16:$Rm),
7429            hsub))>;
7430
7431defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
7432defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7433defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
7434                                     int_aarch64_neon_sqrshrn>;
7435defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7436                                     int_aarch64_neon_sqrshrun>;
7437defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7438defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7439defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
7440                                     int_aarch64_neon_sqshrn>;
7441defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7442                                     int_aarch64_neon_sqshrun>;
7443defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
7444defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
7445defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
7446    TriOpFrag<(add node:$LHS,
7447                   (AArch64srshri node:$MHS, node:$RHS))>>;
7448defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
7449defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
7450    TriOpFrag<(add_and_or_is_add node:$LHS,
7451                   (AArch64vashr node:$MHS, node:$RHS))>>;
7452defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7453                                     int_aarch64_neon_uqrshrn>;
7454defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7455defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7456                                     int_aarch64_neon_uqshrn>;
7457defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
7458defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
7459    TriOpFrag<(add node:$LHS,
7460                   (AArch64urshri node:$MHS, node:$RHS))>>;
7461defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
7462defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
7463    TriOpFrag<(add_and_or_is_add node:$LHS,
7464                   (AArch64vlshr node:$MHS, node:$RHS))>>;
7465
7466//----------------------------------------------------------------------------
7467// AdvSIMD vector shift instructions
7468//----------------------------------------------------------------------------
7469defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
7470defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7471defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
7472                                   int_aarch64_neon_vcvtfxs2fp>;
7473defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
7474defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
7475
7476// X << 1 ==> X + X
7477class SHLToADDPat<ValueType ty, RegisterClass regtype>
7478  : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
7479            (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
7480
7481def : SHLToADDPat<v16i8, FPR128>;
7482def : SHLToADDPat<v8i16, FPR128>;
7483def : SHLToADDPat<v4i32, FPR128>;
7484def : SHLToADDPat<v2i64, FPR128>;
7485def : SHLToADDPat<v8i8,  FPR64>;
7486def : SHLToADDPat<v4i16, FPR64>;
7487def : SHLToADDPat<v2i32, FPR64>;
7488
7489defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
7490                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
7491defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
7492def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7493                                      (i32 vecshiftL64:$imm))),
7494          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
7495defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
7496                                         int_aarch64_neon_sqrshrn>;
7497defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
7498                                         int_aarch64_neon_sqrshrun>;
7499defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7500defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7501defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
7502                                         int_aarch64_neon_sqshrn>;
7503defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
7504                                         int_aarch64_neon_sqshrun>;
7505defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
7506def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
7507                                      (i32 vecshiftR64:$imm))),
7508          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
7509defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
7510defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
7511                 TriOpFrag<(add node:$LHS,
7512                                (AArch64srshri node:$MHS, node:$RHS))> >;
7513defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
7514                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
7515
7516defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
7517defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
7518                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
7519defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
7520                        int_aarch64_neon_vcvtfxu2fp>;
7521defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
7522                                         int_aarch64_neon_uqrshrn>;
7523defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7524defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
7525                                         int_aarch64_neon_uqshrn>;
7526defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
7527defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
7528                TriOpFrag<(add node:$LHS,
7529                               (AArch64urshri node:$MHS, node:$RHS))> >;
7530defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
7531                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
7532defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
7533defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
7534                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
7535
7536// RADDHN patterns for when RSHRN shifts by half the size of the vector element
7537def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))),
7538          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7539def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))),
7540          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7541let AddedComplexity = 5 in
7542def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))),
7543          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7544def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
7545          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
7546def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
7547          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
7548def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
7549          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
7550
7551// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
7552def : Pat<(v16i8 (concat_vectors
7553                 (v8i8 V64:$Vd),
7554                 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), (AArch64movi_shift (i32 128), (i32 0))), (i32 8)))))),
7555          (RADDHNv8i16_v16i8
7556                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7557                 (v8i16 (MOVIv2d_ns (i32 0))))>;
7558def : Pat<(v8i16 (concat_vectors
7559                 (v4i16 V64:$Vd),
7560                 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), (AArch64movi_shift (i32 128), (i32 8))), (i32 16)))))),
7561          (RADDHNv4i32_v8i16
7562                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7563                 (v4i32 (MOVIv2d_ns (i32 0))))>;
7564let AddedComplexity = 5 in
7565def : Pat<(v4i32 (concat_vectors
7566                 (v2i32 V64:$Vd),
7567                 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), (AArch64dup (i64 2147483648))), (i32 32)))))),
7568          (RADDHNv2i64_v4i32
7569                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7570                 (v2i64 (MOVIv2d_ns (i32 0))))>;
7571def : Pat<(v16i8 (concat_vectors
7572                 (v8i8 V64:$Vd),
7573                 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
7574          (RADDHNv8i16_v16i8
7575                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7576                 (v8i16 (MOVIv2d_ns (i32 0))))>;
7577def : Pat<(v8i16 (concat_vectors
7578                 (v4i16 V64:$Vd),
7579                 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
7580          (RADDHNv4i32_v8i16
7581                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7582                 (v4i32 (MOVIv2d_ns (i32 0))))>;
7583def : Pat<(v4i32 (concat_vectors
7584                 (v2i32 V64:$Vd),
7585                 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
7586          (RADDHNv2i64_v4i32
7587                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
7588                 (v2i64 (MOVIv2d_ns (i32 0))))>;
7589
7590// SHRN patterns for when a logical right shift was used instead of arithmetic
7591// (the immediate guarantees no sign bits actually end up in the result so it
7592// doesn't matter).
7593def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
7594          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
7595def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
7596          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
7597def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
7598          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
7599
7600def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
7601                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
7602                                                    vecshiftR16Narrow:$imm)))),
7603          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7604                           V128:$Rn, vecshiftR16Narrow:$imm)>;
7605def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
7606                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
7607                                                    vecshiftR32Narrow:$imm)))),
7608          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7609                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7610def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
7611                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
7612                                                    vecshiftR64Narrow:$imm)))),
7613          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
7614                           V128:$Rn, vecshiftR32Narrow:$imm)>;
7615
7616// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
7617// Anyexts are implemented as zexts.
7618def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
7619def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7620def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
7621def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
7622def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7623def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
7624def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
7625def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7626def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
7627// Also match an extend from the upper half of a 128 bit source register.
7628def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7629          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7630def : Pat<(v8i16 (zext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7631          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
7632def : Pat<(v8i16 (sext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
7633          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
7634def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7635          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7636def : Pat<(v4i32 (zext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7637          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
7638def : Pat<(v4i32 (sext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
7639          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
7640def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7641          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7642def : Pat<(v2i64 (zext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7643          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
7644def : Pat<(v2i64 (sext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
7645          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
7646
7647// Vector shift sxtl aliases
7648def : InstAlias<"sxtl.8h $dst, $src1",
7649                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7650def : InstAlias<"sxtl $dst.8h, $src1.8b",
7651                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7652def : InstAlias<"sxtl.4s $dst, $src1",
7653                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7654def : InstAlias<"sxtl $dst.4s, $src1.4h",
7655                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7656def : InstAlias<"sxtl.2d $dst, $src1",
7657                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7658def : InstAlias<"sxtl $dst.2d, $src1.2s",
7659                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7660
7661// Vector shift sxtl2 aliases
7662def : InstAlias<"sxtl2.8h $dst, $src1",
7663                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7664def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
7665                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7666def : InstAlias<"sxtl2.4s $dst, $src1",
7667                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7668def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
7669                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7670def : InstAlias<"sxtl2.2d $dst, $src1",
7671                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7672def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
7673                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7674
7675// Vector shift uxtl aliases
7676def : InstAlias<"uxtl.8h $dst, $src1",
7677                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7678def : InstAlias<"uxtl $dst.8h, $src1.8b",
7679                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
7680def : InstAlias<"uxtl.4s $dst, $src1",
7681                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7682def : InstAlias<"uxtl $dst.4s, $src1.4h",
7683                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
7684def : InstAlias<"uxtl.2d $dst, $src1",
7685                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7686def : InstAlias<"uxtl $dst.2d, $src1.2s",
7687                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
7688
7689// Vector shift uxtl2 aliases
7690def : InstAlias<"uxtl2.8h $dst, $src1",
7691                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7692def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
7693                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
7694def : InstAlias<"uxtl2.4s $dst, $src1",
7695                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7696def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
7697                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
7698def : InstAlias<"uxtl2.2d $dst, $src1",
7699                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7700def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
7701                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
7702
7703// If an integer is about to be converted to a floating point value,
7704// just load it on the floating point unit.
7705// These patterns are more complex because floating point loads do not
7706// support sign extension.
7707// The sign extension has to be explicitly added and is only supported for
7708// one step: byte-to-half, half-to-word, word-to-doubleword.
7709// SCVTF GPR -> FPR is 9 cycles.
7710// SCVTF FPR -> FPR is 4 cyclces.
7711// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
7712// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
7713// and still being faster.
7714// However, this is not good for code size.
7715// 8-bits -> float. 2 sizes step-up.
7716class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
7717  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
7718        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7719                            (SSHLLv4i16_shift
7720                              (f64
7721                                (EXTRACT_SUBREG
7722                                  (SSHLLv8i8_shift
7723                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7724                                        INST,
7725                                        bsub),
7726                                    0),
7727                                  dsub)),
7728                               0),
7729                             ssub)))>,
7730    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7731
7732def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
7733                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
7734def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
7735                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
7736def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
7737                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
7738def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
7739                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
7740
7741// 16-bits -> float. 1 size step-up.
7742class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
7743  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7744        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
7745                            (SSHLLv4i16_shift
7746                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7747                                  INST,
7748                                  hsub),
7749                                0),
7750                            ssub)))>,
7751    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7752
7753def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7754                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7755def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7756                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7757def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7758                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7759def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7760                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7761
7762// 32-bits to 32-bits are handled in target specific dag combine:
7763// performIntToFpCombine.
7764// 64-bits integer to 32-bits floating point, not possible with
7765// SCVTF on floating point registers (both source and destination
7766// must have the same size).
7767
7768// Here are the patterns for 8, 16, 32, and 64-bits to double.
7769// 8-bits -> double. 3 size step-up: give up.
7770// 16-bits -> double. 2 size step.
7771class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
7772  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
7773           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7774                              (SSHLLv2i32_shift
7775                                 (f64
7776                                  (EXTRACT_SUBREG
7777                                    (SSHLLv4i16_shift
7778                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7779                                        INST,
7780                                        hsub),
7781                                     0),
7782                                   dsub)),
7783                               0),
7784                             dsub)))>,
7785    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7786
7787def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
7788                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
7789def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
7790                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
7791def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
7792                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
7793def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
7794                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
7795// 32-bits -> double. 1 size step-up.
7796class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
7797  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
7798           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
7799                              (SSHLLv2i32_shift
7800                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
7801                                  INST,
7802                                  ssub),
7803                               0),
7804                             dsub)))>,
7805    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
7806
7807def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
7808                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
7809def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
7810                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
7811def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
7812                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
7813def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
7814                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
7815
7816// 64-bits -> double are handled in target specific dag combine:
7817// performIntToFpCombine.
7818
7819
7820//----------------------------------------------------------------------------
7821// AdvSIMD Load-Store Structure
7822//----------------------------------------------------------------------------
7823defm LD1 : SIMDLd1Multiple<"ld1">;
7824defm LD2 : SIMDLd2Multiple<"ld2">;
7825defm LD3 : SIMDLd3Multiple<"ld3">;
7826defm LD4 : SIMDLd4Multiple<"ld4">;
7827
7828defm ST1 : SIMDSt1Multiple<"st1">;
7829defm ST2 : SIMDSt2Multiple<"st2">;
7830defm ST3 : SIMDSt3Multiple<"st3">;
7831defm ST4 : SIMDSt4Multiple<"st4">;
7832
7833class Ld1Pat<ValueType ty, Instruction INST>
7834  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
7835
7836def : Ld1Pat<v16i8, LD1Onev16b>;
7837def : Ld1Pat<v8i16, LD1Onev8h>;
7838def : Ld1Pat<v4i32, LD1Onev4s>;
7839def : Ld1Pat<v2i64, LD1Onev2d>;
7840def : Ld1Pat<v8i8,  LD1Onev8b>;
7841def : Ld1Pat<v4i16, LD1Onev4h>;
7842def : Ld1Pat<v2i32, LD1Onev2s>;
7843def : Ld1Pat<v1i64, LD1Onev1d>;
7844
7845class St1Pat<ValueType ty, Instruction INST>
7846  : Pat<(store ty:$Vt, GPR64sp:$Rn),
7847        (INST ty:$Vt, GPR64sp:$Rn)>;
7848
7849def : St1Pat<v16i8, ST1Onev16b>;
7850def : St1Pat<v8i16, ST1Onev8h>;
7851def : St1Pat<v4i32, ST1Onev4s>;
7852def : St1Pat<v2i64, ST1Onev2d>;
7853def : St1Pat<v8i8,  ST1Onev8b>;
7854def : St1Pat<v4i16, ST1Onev4h>;
7855def : St1Pat<v2i32, ST1Onev2s>;
7856def : St1Pat<v1i64, ST1Onev1d>;
7857
7858//---
7859// Single-element
7860//---
7861
7862defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
7863defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
7864defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
7865defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
7866let mayLoad = 1, hasSideEffects = 0 in {
7867defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
7868defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
7869defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
7870defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
7871defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
7872defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
7873defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
7874defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
7875defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
7876defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
7877defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
7878defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
7879defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
7880defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
7881defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
7882defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
7883}
7884
7885def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7886          (LD1Rv8b GPR64sp:$Rn)>;
7887def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
7888          (LD1Rv16b GPR64sp:$Rn)>;
7889def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7890          (LD1Rv4h GPR64sp:$Rn)>;
7891def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
7892          (LD1Rv8h GPR64sp:$Rn)>;
7893def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7894          (LD1Rv2s GPR64sp:$Rn)>;
7895def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
7896          (LD1Rv4s GPR64sp:$Rn)>;
7897def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7898          (LD1Rv2d GPR64sp:$Rn)>;
7899def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
7900          (LD1Rv1d GPR64sp:$Rn)>;
7901
7902def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7903          (LD1Rv8b GPR64sp:$Rn)>;
7904def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
7905          (LD1Rv16b GPR64sp:$Rn)>;
7906def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7907          (LD1Rv4h GPR64sp:$Rn)>;
7908def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
7909          (LD1Rv8h GPR64sp:$Rn)>;
7910def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
7911          (LD1Rv2s GPR64sp:$Rn)>;
7912def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
7913          (LD1Rv4s GPR64sp:$Rn)>;
7914def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
7915          (LD1Rv2d GPR64sp:$Rn)>;
7916
7917// Grab the floating point version too
7918def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7919          (LD1Rv2s GPR64sp:$Rn)>;
7920def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
7921          (LD1Rv4s GPR64sp:$Rn)>;
7922def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7923          (LD1Rv2d GPR64sp:$Rn)>;
7924def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
7925          (LD1Rv1d GPR64sp:$Rn)>;
7926def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7927          (LD1Rv4h GPR64sp:$Rn)>;
7928def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
7929          (LD1Rv8h GPR64sp:$Rn)>;
7930def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7931          (LD1Rv4h GPR64sp:$Rn)>;
7932def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
7933          (LD1Rv8h GPR64sp:$Rn)>;
7934
7935class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
7936                    ValueType VTy, ValueType STy, Instruction LD1>
7937  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7938           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7939        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
7940
7941def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
7942def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
7943def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
7944def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
7945def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
7946def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
7947def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
7948def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
7949
7950// Generate LD1 for extload if memory type does not match the
7951// destination type, for example:
7952//
7953//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
7954//
7955// In this case, the index must be adjusted to match LD1 type.
7956//
7957class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
7958                         VecIndex, ValueType VTy, ValueType STy,
7959                         Instruction LD1, SDNodeXForm IdxOp>
7960  : Pat<(vector_insert (VTy VecListOne128:$Rd),
7961                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7962        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
7963
7964class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
7965                        ValueType VTy, ValueType STy, Instruction LD1,
7966                        SDNodeXForm IdxOp>
7967  : Pat<(vector_insert (VTy VecListOne64:$Rd),
7968                       (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
7969        (EXTRACT_SUBREG
7970            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
7971                (IdxOp VecIndex:$idx), GPR64sp:$Rn),
7972            dsub)>;
7973
7974def VectorIndexStoH : SDNodeXForm<imm, [{
7975  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7976}]>;
7977def VectorIndexStoB : SDNodeXForm<imm, [{
7978  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
7979}]>;
7980def VectorIndexHtoB : SDNodeXForm<imm, [{
7981  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
7982}]>;
7983
7984def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
7985def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
7986def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
7987
7988def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
7989def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
7990def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
7991
7992// Same as above, but the first element is populated using
7993// scalar_to_vector + insert_subvector instead of insert_vector_elt.
7994let Predicates = [IsNeonAvailable] in {
7995  class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
7996                          SDPatternOperator ExtLoad, Instruction LD1>
7997    : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
7998            (ResultTy (EXTRACT_SUBREG
7999              (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
8000
8001  def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
8002  def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
8003  def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
8004}
8005class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
8006                   ValueType VTy, ValueType STy, Instruction LD1>
8007  : Pat<(vector_insert (VTy VecListOne64:$Rd),
8008           (STy (scalar_load GPR64sp:$Rn)), VecIndex:$idx),
8009        (EXTRACT_SUBREG
8010            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8011                          VecIndex:$idx, GPR64sp:$Rn),
8012            dsub)>;
8013
8014def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
8015def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
8016def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
8017def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
8018def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
8019def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
8020
8021
8022defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
8023defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
8024defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
8025defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
8026
8027// Stores
8028defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
8029defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
8030defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
8031defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
8032
8033let AddedComplexity = 19 in
8034class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8035                    ValueType VTy, ValueType STy, Instruction ST1>
8036  : Pat<(scalar_store
8037             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8038             GPR64sp:$Rn),
8039        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
8040
8041def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
8042def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
8043def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
8044def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
8045def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
8046def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
8047def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
8048def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
8049
8050let AddedComplexity = 19 in
8051class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8052                   ValueType VTy, ValueType STy, Instruction ST1>
8053  : Pat<(scalar_store
8054             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8055             GPR64sp:$Rn),
8056        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8057             VecIndex:$idx, GPR64sp:$Rn)>;
8058
8059def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
8060def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
8061def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
8062def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
8063def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
8064def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
8065
8066multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8067                             ValueType VTy, ValueType STy, Instruction ST1,
8068                             int offset> {
8069  def : Pat<(scalar_store
8070              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8071              GPR64sp:$Rn, offset),
8072        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8073             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8074
8075  def : Pat<(scalar_store
8076              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8077              GPR64sp:$Rn, GPR64:$Rm),
8078        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8079             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8080}
8081
8082defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
8083defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
8084                        2>;
8085defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
8086defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
8087defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
8088defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
8089defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
8090defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
8091
8092multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8093                             ValueType VTy, ValueType STy, Instruction ST1,
8094                             int offset> {
8095  def : Pat<(scalar_store
8096              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8097              GPR64sp:$Rn, offset),
8098        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8099
8100  def : Pat<(scalar_store
8101              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8102              GPR64sp:$Rn, GPR64:$Rm),
8103        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8104}
8105
8106defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
8107                         1>;
8108defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
8109                         2>;
8110defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
8111defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
8112defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
8113defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
8114defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
8115defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
8116
8117let mayStore = 1, hasSideEffects = 0 in {
8118defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
8119defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
8120defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
8121defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
8122defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
8123defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
8124defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
8125defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
8126defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
8127defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
8128defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
8129defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
8130}
8131
8132defm ST1 : SIMDLdSt1SingleAliases<"st1">;
8133defm ST2 : SIMDLdSt2SingleAliases<"st2">;
8134defm ST3 : SIMDLdSt3SingleAliases<"st3">;
8135defm ST4 : SIMDLdSt4SingleAliases<"st4">;
8136
8137//----------------------------------------------------------------------------
8138// Crypto extensions
8139//----------------------------------------------------------------------------
8140
8141let Predicates = [HasAES] in {
8142def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
8143def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
8144def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
8145def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
8146}
8147
8148// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
8149// for AES fusion on some CPUs.
8150let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
8151def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8152                        Sched<[WriteVq]>;
8153def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8154                         Sched<[WriteVq]>;
8155}
8156
8157// Only use constrained versions of AES(I)MC instructions if they are paired with
8158// AESE/AESD.
8159def : Pat<(v16i8 (int_aarch64_crypto_aesmc
8160            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
8161                                            (v16i8 V128:$src2))))),
8162          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
8163                                             (v16i8 V128:$src2)))))>,
8164          Requires<[HasFuseAES]>;
8165
8166def : Pat<(v16i8 (int_aarch64_crypto_aesimc
8167            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
8168                                            (v16i8 V128:$src2))))),
8169          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
8170                                              (v16i8 V128:$src2)))))>,
8171          Requires<[HasFuseAES]>;
8172
8173let Predicates = [HasSHA2] in {
8174def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
8175def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
8176def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
8177def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
8178def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
8179def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
8180def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
8181
8182def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
8183def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
8184def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
8185}
8186
8187//----------------------------------------------------------------------------
8188// Compiler-pseudos
8189//----------------------------------------------------------------------------
8190// FIXME: Like for X86, these should go in their own separate .td file.
8191
8192// For an anyext, we don't care what the high bits are, so we can perform an
8193// INSERT_SUBREF into an IMPLICIT_DEF.
8194def : Pat<(i64 (anyext GPR32:$src)),
8195          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
8196
8197// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8198// then assert the extension has happened.
8199def : Pat<(i64 (zext GPR32:$src)),
8200          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
8201
8202// To sign extend, we use a signed bitfield move instruction (SBFM) on the
8203// containing super-reg.
8204def : Pat<(i64 (sext GPR32:$src)),
8205   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
8206def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
8207def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
8208def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
8209def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
8210def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
8211def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
8212def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
8213
8214def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
8215          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
8216                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
8217def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
8218          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
8219                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
8220
8221def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
8222          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
8223                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
8224def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
8225          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
8226                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
8227
8228def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
8229          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8230                   (i64 (i64shift_a        imm0_63:$imm)),
8231                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8232
8233def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
8234          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8235                   (i64 (i64shift_a        imm0_63:$imm)),
8236                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8237
8238// sra patterns have an AddedComplexity of 10, so make sure we have a higher
8239// AddedComplexity for the following patterns since we want to match sext + sra
8240// patterns before we attempt to match a single sra node.
8241let AddedComplexity = 20 in {
8242// We support all sext + sra combinations which preserve at least one bit of the
8243// original value which is to be sign extended. E.g. we support shifts up to
8244// bitwidth-1 bits.
8245def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
8246          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
8247def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
8248          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
8249
8250def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
8251          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
8252def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
8253          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
8254
8255def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
8256          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8257                   (i64 imm0_31:$imm), 31)>;
8258} // AddedComplexity = 20
8259
8260// To truncate, we can simply extract from a subregister.
8261def : Pat<(i32 (trunc GPR64sp:$src)),
8262          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
8263
8264// __builtin_trap() uses the BRK instruction on AArch64.
8265def : Pat<(trap), (BRK 1)>;
8266def : Pat<(debugtrap), (BRK 0xF000)>;
8267
8268def ubsan_trap_xform : SDNodeXForm<timm, [{
8269  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8270}]>;
8271
8272def ubsan_trap_imm : TImmLeaf<i32, [{
8273  return isUInt<8>(Imm);
8274}], ubsan_trap_xform>;
8275
8276def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
8277
8278// Multiply high patterns which multiply the lower subvector using smull/umull
8279// and the upper subvector with smull2/umull2. Then shuffle the high the high
8280// part of both results together.
8281def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
8282          (UZP2v16i8
8283           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8284                            (EXTRACT_SUBREG V128:$Rm, dsub)),
8285           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8286def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
8287          (UZP2v8i16
8288           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8289                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8290           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8291def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
8292          (UZP2v4i32
8293           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8294                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8295           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8296
8297def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
8298          (UZP2v16i8
8299           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8300                            (EXTRACT_SUBREG V128:$Rm, dsub)),
8301           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8302def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
8303          (UZP2v8i16
8304           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8305                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8306           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8307def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
8308          (UZP2v4i32
8309           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8310                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8311           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8312
8313// Conversions within AdvSIMD types in the same register size are free.
8314// But because we need a consistent lane ordering, in big endian many
8315// conversions require one or more REV instructions.
8316//
8317// Consider a simple memory load followed by a bitconvert then a store.
8318//   v0 = load v2i32
8319//   v1 = BITCAST v2i32 v0 to v4i16
8320//        store v4i16 v2
8321//
8322// In big endian mode every memory access has an implicit byte swap. LDR and
8323// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8324// is, they treat the vector as a sequence of elements to be byte-swapped.
8325// The two pairs of instructions are fundamentally incompatible. We've decided
8326// to use LD1/ST1 only to simplify compiler implementation.
8327//
8328// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
8329// the original code sequence:
8330//   v0 = load v2i32
8331//   v1 = REV v2i32                  (implicit)
8332//   v2 = BITCAST v2i32 v1 to v4i16
8333//   v3 = REV v4i16 v2               (implicit)
8334//        store v4i16 v3
8335//
8336// But this is now broken - the value stored is different to the value loaded
8337// due to lane reordering. To fix this, on every BITCAST we must perform two
8338// other REVs:
8339//   v0 = load v2i32
8340//   v1 = REV v2i32                  (implicit)
8341//   v2 = REV v2i32
8342//   v3 = BITCAST v2i32 v2 to v4i16
8343//   v4 = REV v4i16
8344//   v5 = REV v4i16 v4               (implicit)
8345//        store v4i16 v5
8346//
8347// This means an extra two instructions, but actually in most cases the two REV
8348// instructions can be combined into one. For example:
8349//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
8350//
8351// There is also no 128-bit REV instruction. This must be synthesized with an
8352// EXT instruction.
8353//
8354// Most bitconverts require some sort of conversion. The only exceptions are:
8355//   a) Identity conversions -  vNfX <-> vNiX
8356//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
8357//
8358
8359// Natural vector casts (64 bit)
8360foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8361  foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8362    def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
8363              (VT FPR64:$src)>;
8364
8365// Natural vector casts (128 bit)
8366foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8367  foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8368    def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
8369              (VT FPR128:$src)>;
8370
8371let Predicates = [IsLE] in {
8372def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8373def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8374def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8375def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8376def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8377def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8378
8379def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8380          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8381def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8382          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8383def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8384          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8385def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8386          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8387def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8388          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8389def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8390          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8391def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8392          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8393}
8394let Predicates = [IsBE] in {
8395def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
8396                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8397def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
8398                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8399def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
8400                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8401def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
8402                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8403def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
8404                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8405def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
8406                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8407
8408def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8409          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8410def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8411          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8412def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8413          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8414def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8415          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8416def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8417          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8418def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8419          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8420}
8421def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8422def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8423def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
8424          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8425def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8426          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8427def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8428          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8429def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8430
8431def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
8432          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
8433def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
8434          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
8435def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
8436          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8437def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
8438          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
8439def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8440          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8441
8442def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
8443def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
8444
8445let Predicates = [IsLE] in {
8446def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
8447def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
8448def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
8449def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
8450def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
8451def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
8452}
8453let Predicates = [IsBE] in {
8454def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
8455                             (v1i64 (REV64v2i32 FPR64:$src))>;
8456def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
8457                             (v1i64 (REV64v4i16 FPR64:$src))>;
8458def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
8459                             (v1i64 (REV64v8i8 FPR64:$src))>;
8460def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
8461                             (v1i64 (REV64v4i16 FPR64:$src))>;
8462def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
8463                             (v1i64 (REV64v4i16 FPR64:$src))>;
8464def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
8465                             (v1i64 (REV64v2i32 FPR64:$src))>;
8466}
8467def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
8468def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
8469
8470let Predicates = [IsLE] in {
8471def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
8472def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
8473def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
8474def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
8475def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
8476def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
8477def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
8478}
8479let Predicates = [IsBE] in {
8480def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
8481                             (v2i32 (REV64v2i32 FPR64:$src))>;
8482def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
8483                             (v2i32 (REV32v4i16 FPR64:$src))>;
8484def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
8485                             (v2i32 (REV32v8i8 FPR64:$src))>;
8486def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
8487                             (v2i32 (REV64v2i32 FPR64:$src))>;
8488def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
8489                             (v2i32 (REV64v2i32 FPR64:$src))>;
8490def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
8491                             (v2i32 (REV32v4i16 FPR64:$src))>;
8492def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
8493                             (v2i32 (REV32v4i16 FPR64:$src))>;
8494}
8495def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
8496
8497let Predicates = [IsLE] in {
8498def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
8499def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
8500def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
8501def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
8502def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
8503def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
8504}
8505let Predicates = [IsBE] in {
8506def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
8507                             (v4i16 (REV64v4i16 FPR64:$src))>;
8508def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
8509                             (v4i16 (REV32v4i16 FPR64:$src))>;
8510def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
8511                             (v4i16 (REV16v8i8 FPR64:$src))>;
8512def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
8513                             (v4i16 (REV64v4i16 FPR64:$src))>;
8514def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
8515                             (v4i16 (REV32v4i16 FPR64:$src))>;
8516def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
8517                             (v4i16 (REV64v4i16 FPR64:$src))>;
8518}
8519def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
8520def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
8521
8522let Predicates = [IsLE] in {
8523def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
8524def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
8525def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
8526def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
8527def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
8528def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
8529
8530def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8531def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8532def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
8533def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
8534def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
8535def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
8536}
8537let Predicates = [IsBE] in {
8538def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
8539                             (v4f16 (REV64v4i16 FPR64:$src))>;
8540def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
8541                             (v4f16 (REV32v4i16 FPR64:$src))>;
8542def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
8543                             (v4f16 (REV16v8i8 FPR64:$src))>;
8544def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
8545                             (v4f16 (REV64v4i16 FPR64:$src))>;
8546def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
8547                             (v4f16 (REV32v4i16 FPR64:$src))>;
8548def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
8549                             (v4f16 (REV64v4i16 FPR64:$src))>;
8550
8551def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
8552                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8553def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
8554                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8555def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
8556                             (v4bf16 (REV16v8i8 FPR64:$src))>;
8557def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
8558                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8559def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
8560                             (v4bf16 (REV32v4i16 FPR64:$src))>;
8561def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
8562                             (v4bf16 (REV64v4i16 FPR64:$src))>;
8563}
8564def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
8565def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
8566
8567let Predicates = [IsLE] in {
8568def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
8569def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
8570def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
8571def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
8572def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
8573def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
8574def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
8575def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
8576}
8577let Predicates = [IsBE] in {
8578def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
8579                             (v8i8 (REV64v8i8 FPR64:$src))>;
8580def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
8581                             (v8i8 (REV32v8i8 FPR64:$src))>;
8582def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
8583                             (v8i8 (REV16v8i8 FPR64:$src))>;
8584def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
8585                             (v8i8 (REV64v8i8 FPR64:$src))>;
8586def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
8587                             (v8i8 (REV32v8i8 FPR64:$src))>;
8588def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
8589                             (v8i8 (REV64v8i8 FPR64:$src))>;
8590def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
8591                             (v8i8 (REV16v8i8 FPR64:$src))>;
8592def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
8593                             (v8i8 (REV16v8i8 FPR64:$src))>;
8594}
8595
8596let Predicates = [IsLE] in {
8597def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
8598def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
8599def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
8600def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
8601def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
8602def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
8603}
8604let Predicates = [IsBE] in {
8605def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
8606                             (f64 (REV64v2i32 FPR64:$src))>;
8607def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
8608                             (f64 (REV64v4i16 FPR64:$src))>;
8609def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
8610                             (f64 (REV64v2i32 FPR64:$src))>;
8611def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
8612                             (f64 (REV64v8i8 FPR64:$src))>;
8613def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
8614                             (f64 (REV64v4i16 FPR64:$src))>;
8615def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
8616                             (f64 (REV64v4i16 FPR64:$src))>;
8617}
8618def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
8619def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
8620
8621let Predicates = [IsLE] in {
8622def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
8623def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
8624def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
8625def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
8626def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
8627def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
8628}
8629let Predicates = [IsBE] in {
8630def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
8631                             (v1f64 (REV64v2i32 FPR64:$src))>;
8632def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
8633                             (v1f64 (REV64v4i16 FPR64:$src))>;
8634def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
8635                             (v1f64 (REV64v8i8 FPR64:$src))>;
8636def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
8637                             (v1f64 (REV64v2i32 FPR64:$src))>;
8638def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
8639                             (v1f64 (REV64v4i16 FPR64:$src))>;
8640def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
8641                             (v1f64 (REV64v4i16 FPR64:$src))>;
8642}
8643def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
8644def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
8645
8646let Predicates = [IsLE] in {
8647def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
8648def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
8649def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
8650def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
8651def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
8652def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
8653def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
8654}
8655let Predicates = [IsBE] in {
8656def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
8657                             (v2f32 (REV64v2i32 FPR64:$src))>;
8658def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
8659                             (v2f32 (REV32v4i16 FPR64:$src))>;
8660def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
8661                             (v2f32 (REV32v8i8 FPR64:$src))>;
8662def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
8663                             (v2f32 (REV64v2i32 FPR64:$src))>;
8664def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
8665                             (v2f32 (REV64v2i32 FPR64:$src))>;
8666def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
8667                             (v2f32 (REV32v4i16 FPR64:$src))>;
8668def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
8669                             (v2f32 (REV32v4i16 FPR64:$src))>;
8670}
8671def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
8672
8673let Predicates = [IsLE] in {
8674def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
8675def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
8676def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
8677def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
8678def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
8679def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
8680def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
8681def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
8682}
8683let Predicates = [IsBE] in {
8684def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
8685                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8686def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
8687                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8688                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8689def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
8690                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8691                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8692def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
8693                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8694                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8695def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
8696                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
8697                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
8698def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
8699                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
8700def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
8701                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
8702                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
8703def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
8704                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
8705                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
8706}
8707
8708let Predicates = [IsLE] in {
8709def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
8710def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
8711def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
8712def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
8713def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
8714def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
8715def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
8716}
8717let Predicates = [IsBE] in {
8718def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
8719                             (v2f64 (EXTv16i8 FPR128:$src,
8720                                              FPR128:$src, (i32 8)))>;
8721def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
8722                             (v2f64 (REV64v4i32 FPR128:$src))>;
8723def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
8724                             (v2f64 (REV64v8i16 FPR128:$src))>;
8725def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
8726                             (v2f64 (REV64v8i16 FPR128:$src))>;
8727def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
8728                             (v2f64 (REV64v8i16 FPR128:$src))>;
8729def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
8730                             (v2f64 (REV64v16i8 FPR128:$src))>;
8731def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
8732                             (v2f64 (REV64v4i32 FPR128:$src))>;
8733}
8734def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
8735
8736let Predicates = [IsLE] in {
8737def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
8738def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
8739def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
8740def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
8741def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
8742def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
8743def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
8744}
8745let Predicates = [IsBE] in {
8746def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
8747                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8748                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
8749def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
8750                             (v4f32 (REV32v8i16 FPR128:$src))>;
8751def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
8752                             (v4f32 (REV32v8i16 FPR128:$src))>;
8753def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
8754                             (v4f32 (REV32v8i16 FPR128:$src))>;
8755def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
8756                             (v4f32 (REV32v16i8 FPR128:$src))>;
8757def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
8758                             (v4f32 (REV64v4i32 FPR128:$src))>;
8759def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
8760                             (v4f32 (REV64v4i32 FPR128:$src))>;
8761}
8762def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
8763
8764let Predicates = [IsLE] in {
8765def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
8766def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
8767def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
8768def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
8769def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
8770def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
8771def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
8772}
8773let Predicates = [IsBE] in {
8774def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
8775                             (v2i64 (EXTv16i8 FPR128:$src,
8776                                              FPR128:$src, (i32 8)))>;
8777def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
8778                             (v2i64 (REV64v4i32 FPR128:$src))>;
8779def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
8780                             (v2i64 (REV64v8i16 FPR128:$src))>;
8781def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
8782                             (v2i64 (REV64v16i8 FPR128:$src))>;
8783def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
8784                             (v2i64 (REV64v4i32 FPR128:$src))>;
8785def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
8786                             (v2i64 (REV64v8i16 FPR128:$src))>;
8787def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
8788                             (v2i64 (REV64v8i16 FPR128:$src))>;
8789}
8790def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
8791
8792let Predicates = [IsLE] in {
8793def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
8794def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
8795def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
8796def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
8797def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
8798def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
8799def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
8800}
8801let Predicates = [IsBE] in {
8802def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
8803                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
8804                                              (REV64v4i32 FPR128:$src),
8805                                              (i32 8)))>;
8806def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
8807                             (v4i32 (REV64v4i32 FPR128:$src))>;
8808def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
8809                             (v4i32 (REV32v8i16 FPR128:$src))>;
8810def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
8811                             (v4i32 (REV32v16i8 FPR128:$src))>;
8812def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
8813                             (v4i32 (REV64v4i32 FPR128:$src))>;
8814def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
8815                             (v4i32 (REV32v8i16 FPR128:$src))>;
8816def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
8817                             (v4i32 (REV32v8i16 FPR128:$src))>;
8818}
8819def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
8820
8821let Predicates = [IsLE] in {
8822def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
8823def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
8824def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
8825def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
8826def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
8827def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
8828}
8829let Predicates = [IsBE] in {
8830def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
8831                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8832                                              (REV64v8i16 FPR128:$src),
8833                                              (i32 8)))>;
8834def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
8835                             (v8i16 (REV64v8i16 FPR128:$src))>;
8836def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
8837                             (v8i16 (REV32v8i16 FPR128:$src))>;
8838def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
8839                             (v8i16 (REV16v16i8 FPR128:$src))>;
8840def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
8841                             (v8i16 (REV64v8i16 FPR128:$src))>;
8842def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
8843                             (v8i16 (REV32v8i16 FPR128:$src))>;
8844}
8845def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
8846def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
8847
8848let Predicates = [IsLE] in {
8849def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
8850def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
8851def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
8852def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
8853def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
8854def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
8855
8856def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
8857def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8858def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8859def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
8860def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
8861def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
8862}
8863let Predicates = [IsBE] in {
8864def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
8865                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8866                                              (REV64v8i16 FPR128:$src),
8867                                              (i32 8)))>;
8868def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
8869                             (v8f16 (REV64v8i16 FPR128:$src))>;
8870def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
8871                             (v8f16 (REV32v8i16 FPR128:$src))>;
8872def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
8873                             (v8f16 (REV16v16i8 FPR128:$src))>;
8874def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
8875                             (v8f16 (REV64v8i16 FPR128:$src))>;
8876def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
8877                             (v8f16 (REV32v8i16 FPR128:$src))>;
8878
8879def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
8880                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
8881                                              (REV64v8i16 FPR128:$src),
8882                                              (i32 8)))>;
8883def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
8884                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8885def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
8886                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8887def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
8888                             (v8bf16 (REV16v16i8 FPR128:$src))>;
8889def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
8890                             (v8bf16 (REV64v8i16 FPR128:$src))>;
8891def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
8892                             (v8bf16 (REV32v8i16 FPR128:$src))>;
8893}
8894def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
8895def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
8896
8897let Predicates = [IsLE] in {
8898def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
8899def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
8900def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
8901def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
8902def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
8903def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
8904def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
8905def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
8906}
8907let Predicates = [IsBE] in {
8908def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
8909                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
8910                                              (REV64v16i8 FPR128:$src),
8911                                              (i32 8)))>;
8912def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
8913                             (v16i8 (REV64v16i8 FPR128:$src))>;
8914def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
8915                             (v16i8 (REV32v16i8 FPR128:$src))>;
8916def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
8917                             (v16i8 (REV16v16i8 FPR128:$src))>;
8918def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
8919                             (v16i8 (REV64v16i8 FPR128:$src))>;
8920def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
8921                             (v16i8 (REV32v16i8 FPR128:$src))>;
8922def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
8923                             (v16i8 (REV16v16i8 FPR128:$src))>;
8924def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
8925                             (v16i8 (REV16v16i8 FPR128:$src))>;
8926}
8927
8928def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
8929           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8930def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
8931           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8932def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
8933           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8934def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
8935           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8936def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
8937           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8938def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
8939           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8940def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
8941           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8942def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
8943           (EXTRACT_SUBREG V128:$Rn, dsub)>;
8944
8945def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
8946          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8947def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
8948          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8949def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
8950          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8951def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
8952          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
8953
8954// A 64-bit subvector insert to the first 128-bit vector position
8955// is a subregister copy that needs no instruction.
8956multiclass InsertSubvectorUndef<ValueType Ty> {
8957  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
8958            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8959  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
8960            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8961  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
8962            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8963  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
8964            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8965  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
8966            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8967  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
8968            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8969  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
8970            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8971  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
8972            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
8973}
8974
8975defm : InsertSubvectorUndef<i32>;
8976defm : InsertSubvectorUndef<i64>;
8977
8978// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
8979// or v2f32.
8980def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
8981                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
8982           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
8983def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
8984                         (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
8985           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
8986    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
8987    // so we match on v4f32 here, not v2f32. This will also catch adding
8988    // the low two lanes of a true v4f32 vector.
8989def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
8990                    (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
8991          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
8992def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
8993                    (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
8994          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
8995
8996// Prefer using the bottom lanes of addp Rn, Rn compared to
8997// addp extractlow(Rn), extracthigh(Rn)
8998def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
8999                       (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
9000          (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
9001def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
9002                       (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
9003          (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
9004def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
9005                       (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
9006          (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
9007
9008def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
9009                        (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
9010          (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
9011def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
9012                        (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
9013          (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
9014
9015// Scalar 64-bit shifts in FPR64 registers.
9016def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9017          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9018def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9019          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9020def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9021          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9022def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9023          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9024
9025// Patterns for nontemporal/no-allocate stores.
9026// We have to resort to tricks to turn a single-input store into a store pair,
9027// because there is no single-input nontemporal store, only STNP.
9028let Predicates = [IsLE] in {
9029let AddedComplexity = 15 in {
9030class NTStore128Pat<ValueType VT> :
9031  Pat<(nontemporalstore (VT FPR128:$Rt),
9032        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
9033      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
9034              (DUPi64 FPR128:$Rt, (i64 1)),
9035              GPR64sp:$Rn, simm7s8:$offset)>;
9036
9037def : NTStore128Pat<v2i64>;
9038def : NTStore128Pat<v4i32>;
9039def : NTStore128Pat<v8i16>;
9040def : NTStore128Pat<v16i8>;
9041
9042class NTStore64Pat<ValueType VT> :
9043  Pat<(nontemporalstore (VT FPR64:$Rt),
9044        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9045      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
9046              (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
9047              GPR64sp:$Rn, simm7s4:$offset)>;
9048
9049// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9050def : NTStore64Pat<v1f64>;
9051def : NTStore64Pat<v1i64>;
9052def : NTStore64Pat<v2i32>;
9053def : NTStore64Pat<v4i16>;
9054def : NTStore64Pat<v8i8>;
9055
9056def : Pat<(nontemporalstore GPR64:$Rt,
9057            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9058          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
9059                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
9060                  GPR64sp:$Rn, simm7s4:$offset)>;
9061} // AddedComplexity=10
9062} // Predicates = [IsLE]
9063
9064// Tail call return handling. These are all compiler pseudo-instructions,
9065// so no encoding information or anything like that.
9066let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
9067  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
9068                   Sched<[WriteBrReg]>;
9069  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
9070                   Sched<[WriteBrReg]>;
9071  // Indirect tail-call with any register allowed, used by MachineOutliner when
9072  // this is proven safe.
9073  // FIXME: If we have to add any more hacks like this, we should instead relax
9074  // some verifier checks for outlined functions.
9075  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
9076                      Sched<[WriteBrReg]>;
9077  // Indirect tail-call limited to only use registers (x16 and x17) which are
9078  // allowed to tail-call a "BTI c" instruction.
9079  def TCRETURNriBTI : Pseudo<(outs), (ins rtcGPR64:$dst, i32imm:$FPDiff), []>,
9080                      Sched<[WriteBrReg]>;
9081}
9082
9083def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
9084          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
9085      Requires<[NotUseBTI]>;
9086def : Pat<(AArch64tcret rtcGPR64:$dst, (i32 timm:$FPDiff)),
9087          (TCRETURNriBTI rtcGPR64:$dst, imm:$FPDiff)>,
9088      Requires<[UseBTI]>;
9089def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
9090          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9091def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
9092          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9093
9094def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
9095def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
9096
9097// Extracting lane zero is a special case where we can just use a plain
9098// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
9099// rest of the compiler, especially the register allocator and copy propagation,
9100// to reason about, so is preferred when it's possible to use it.
9101let AddedComplexity = 10 in {
9102  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
9103  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
9104  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
9105}
9106
9107// dot_v4i8
9108class mul_v4i8<SDPatternOperator ldop> :
9109  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
9110          (mul (ldop (add node:$Rn, node:$offset)),
9111               (ldop (add node:$Rm, node:$offset)))>;
9112class mulz_v4i8<SDPatternOperator ldop> :
9113  PatFrag<(ops node:$Rn, node:$Rm),
9114          (mul (ldop node:$Rn), (ldop node:$Rm))>;
9115
9116def load_v4i8 :
9117  OutPatFrag<(ops node:$R),
9118             (INSERT_SUBREG
9119              (v2i32 (IMPLICIT_DEF)),
9120               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
9121              ssub)>;
9122
9123class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
9124  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
9125           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
9126           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
9127                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
9128      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
9129                                (load_v4i8 GPR64sp:$Rn),
9130                                (load_v4i8 GPR64sp:$Rm))),
9131                      sub_32)>, Requires<[HasDotProd]>;
9132
9133// dot_v8i8
9134class ee_v8i8<SDPatternOperator extend> :
9135  PatFrag<(ops node:$V, node:$K),
9136          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
9137
9138class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9139  PatFrag<(ops node:$M, node:$N, node:$K),
9140          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
9141                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
9142
9143class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9144  PatFrag<(ops node:$M, node:$N),
9145          (i32 (extractelt
9146           (v4i32 (AArch64uaddv
9147            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
9148                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
9149           (i64 0)))>;
9150
9151// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9152def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
9153
9154class odot_v8i8<Instruction DOT> :
9155  OutPatFrag<(ops node:$Vm, node:$Vn),
9156             (EXTRACT_SUBREG
9157              (VADDV_32
9158               (i64 (DOT (DUPv2i32gpr WZR),
9159                         (v8i8 node:$Vm),
9160                         (v8i8 node:$Vn)))),
9161              sub_32)>;
9162
9163class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
9164                    SDPatternOperator extend> :
9165  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
9166      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
9167  Requires<[HasDotProd]>;
9168
9169// dot_v16i8
9170class ee_v16i8<SDPatternOperator extend> :
9171  PatFrag<(ops node:$V, node:$K1, node:$K2),
9172          (v4i16 (extract_subvector
9173           (v8i16 (extend
9174            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
9175
9176class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
9177  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
9178          (v4i32
9179           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
9180                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
9181
9182class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
9183  PatFrag<(ops node:$M, node:$N),
9184          (i32 (extractelt
9185           (v4i32 (AArch64uaddv
9186            (add
9187             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
9188                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
9189             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
9190                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
9191           (i64 0)))>;
9192
9193class odot_v16i8<Instruction DOT> :
9194  OutPatFrag<(ops node:$Vm, node:$Vn),
9195             (i32 (ADDVv4i32v
9196              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
9197
9198class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
9199                SDPatternOperator extend> :
9200  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
9201      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
9202  Requires<[HasDotProd]>;
9203
9204let AddedComplexity = 10 in {
9205  def : dot_v4i8<SDOTv8i8, sextloadi8>;
9206  def : dot_v4i8<UDOTv8i8, zextloadi8>;
9207  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
9208  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
9209  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
9210  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
9211
9212  // FIXME: add patterns to generate vector by element dot product.
9213  // FIXME: add SVE dot-product patterns.
9214}
9215
9216// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9217// so that it can be used as input to inline asm, and vice versa.
9218def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
9219def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
9220def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
9221                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
9222          (REG_SEQUENCE GPR64x8Class,
9223              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
9224              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
9225foreach i = 0-7 in {
9226  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
9227            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
9228}
9229
9230let Predicates = [HasLS64] in {
9231  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
9232                                          (outs GPR64x8:$Rt)>;
9233  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
9234                                          (outs)>;
9235  def ST64BV:   Store64BV<0b011, "st64bv">;
9236  def ST64BV0:  Store64BV<0b010, "st64bv0">;
9237
9238  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
9239    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
9240          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
9241
9242  def : ST64BPattern<int_aarch64_st64b, ST64B>;
9243  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
9244  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
9245}
9246
9247let Predicates = [HasMOPS] in {
9248  let Defs = [NZCV] in {
9249    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
9250
9251    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
9252
9253    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
9254  }
9255  let Uses = [NZCV] in {
9256    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
9257    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
9258
9259    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
9260    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
9261
9262    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
9263    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
9264  }
9265}
9266let Predicates = [HasMOPS, HasMTE] in {
9267  let Defs = [NZCV] in {
9268    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
9269  }
9270  let Uses = [NZCV] in {
9271    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
9272    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
9273    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
9274  }
9275}
9276
9277// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
9278// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
9279def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
9280def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
9281def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
9282def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
9283def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
9284
9285// MOPS operations always contain three 4-byte instructions
9286let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
9287  let mayLoad = 1 in {
9288    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9289                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9290                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9291    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9292                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9293                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9294  }
9295  let mayLoad = 0 in {
9296    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9297                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9298                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
9299  }
9300}
9301let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
9302  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9303                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9304                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
9305}
9306
9307//-----------------------------------------------------------------------------
9308// v8.3 Pointer Authentication late patterns
9309
9310let Predicates = [HasPAuth] in {
9311def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
9312          (MOVKXi GPR64:$Rd, (trunc_imm imm64_0_65535:$imm), 48)>;
9313def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
9314          (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
9315}
9316
9317//-----------------------------------------------------------------------------
9318
9319// This gets lowered into an instruction sequence of 20 bytes
9320let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
9321def StoreSwiftAsyncContext
9322      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
9323               []>, Sched<[]>;
9324
9325def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
9326def : Pat<(AArch64AssertZExtBool GPR32:$op),
9327          (i32 GPR32:$op)>;
9328
9329//===----------------------------===//
9330// 2022 Architecture Extensions:
9331//===----------------------------===//
9332
9333def : InstAlias<"clrbhb",  (HINT 22), 0>;
9334let Predicates = [HasCLRBHB] in {
9335  def : InstAlias<"clrbhb",  (HINT 22), 1>;
9336}
9337
9338//===----------------------------------------------------------------------===//
9339// Translation Hardening Extension (FEAT_THE)
9340//===----------------------------------------------------------------------===//
9341defm RCW     : ReadCheckWriteCompareAndSwap;
9342
9343defm RCWCLR  : ReadCheckWriteOperation<0b001, "clr">;
9344defm RCWSET  : ReadCheckWriteOperation<0b011, "set">;
9345defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
9346
9347//===----------------------------------------------------------------------===//
9348// General Data-Processing Instructions (FEAT_V94_DP)
9349//===----------------------------------------------------------------------===//
9350defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
9351defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
9352defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
9353
9354defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
9355defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
9356defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
9357defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
9358
9359def RPRFM:
9360    I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
9361      "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
9362    Sched<[]> {
9363  bits<6> Rt;
9364  bits<5> Rn;
9365  bits<5> Rm;
9366  let Inst{2-0} = Rt{2-0};
9367  let Inst{4-3} = 0b11;
9368  let Inst{9-5} = Rn;
9369  let Inst{11-10} = 0b10;
9370  let Inst{13-12} = Rt{4-3};
9371  let Inst{14} = 0b1;
9372  let Inst{15} = Rt{5};
9373  let Inst{20-16} = Rm;
9374  let Inst{31-21} = 0b11111000101;
9375  let mayLoad = 0;
9376  let mayStore = 0;
9377  let hasSideEffects = 1;
9378  // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
9379  // Fail, the decoder should attempt to decode RPRFM. This requires setting
9380  // the decoder namespace to "Fallback".
9381  let DecoderNamespace = "Fallback";
9382}
9383
9384//===----------------------------------------------------------------------===//
9385// 128-bit Atomics (FEAT_LSE128)
9386//===----------------------------------------------------------------------===//
9387let Predicates = [HasLSE128] in {
9388  def SWPP     : LSE128Base<0b000, 0b00, 0b1, "swpp">;
9389  def SWPPA    : LSE128Base<0b000, 0b10, 0b1, "swppa">;
9390  def SWPPAL   : LSE128Base<0b000, 0b11, 0b1, "swppal">;
9391  def SWPPL    : LSE128Base<0b000, 0b01, 0b1, "swppl">;
9392  def LDCLRP   : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
9393  def LDCLRPA  : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
9394  def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
9395  def LDCLRPL  : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
9396  def LDSETP   : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
9397  def LDSETPA  : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
9398  def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
9399  def LDSETPL  : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
9400}
9401
9402//===----------------------------------------------------------------------===//
9403// RCPC Instructions (FEAT_LRCPC3)
9404//===----------------------------------------------------------------------===//
9405
9406let Predicates = [HasRCPC3] in {
9407  //                                              size   opc    opc2
9408  def STILPWpre:   BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
9409  def STILPXpre:   BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
9410  def STILPW:      BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9411  def STILPX:      BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
9412  def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
9413  def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
9414  def LDIAPPW:     BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9415  def LDIAPPX:     BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
9416
9417  def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
9418  def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
9419
9420  // Aliases for when offset=0
9421  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
9422  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
9423
9424  //                                         size   opc
9425  def STLRWpre:   BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
9426  def STLRXpre:   BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
9427  def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #4",   "$Rn = $wback">;
9428  def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #8",   "$Rn = $wback">;
9429}
9430
9431let Predicates = [HasRCPC3, HasNEON] in {
9432  //                                              size   opc regtype
9433  defm STLURb:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8  , (outs), (ins FPR8  :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9434  defm STLURh:  LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9435  defm STLURs:  LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9436  defm STLURd:  LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9437  defm STLURq:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
9438  defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8  , (outs FPR8  :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9439  defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9440  defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9441  defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9442  defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
9443
9444  //                                L
9445  def STL1:  LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
9446  def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
9447
9448  // Aliases for when offset=0
9449  def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
9450}
9451
9452//===----------------------------------------------------------------------===//
9453// 128-bit System Instructions (FEAT_SYSINSTR128)
9454//===----------------------------------------------------------------------===//
9455let Predicates = [HasD128] in {
9456  def SYSPxt  : SystemPXtI<0, "sysp">;
9457
9458  def SYSPxt_XZR
9459    : BaseSystemI<0, (outs),
9460        (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
9461        "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
9462      Sched<[WriteSys]>
9463  {
9464    // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
9465    // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
9466    // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
9467    // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
9468    // is based off of the asm template (maybe) and therefore wants to print 5 operands.
9469    // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
9470    // overlap with the main SYSP instruction.
9471    let DecoderMethod = "DecodeSyspXzrInstruction";
9472    bits<3> op1;
9473    bits<4> Cn;
9474    bits<4> Cm;
9475    bits<3> op2;
9476    let Inst{22}    = 0b1; // override BaseSystemI
9477    let Inst{20-19} = 0b01;
9478    let Inst{18-16} = op1;
9479    let Inst{15-12} = Cn;
9480    let Inst{11-8}  = Cm;
9481    let Inst{7-5}   = op2;
9482    let Inst{4-0}   = 0b11111;
9483  }
9484
9485  def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
9486                  (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
9487}
9488
9489//---
9490// 128-bit System Registers (FEAT_SYSREG128)
9491//---
9492
9493// Instruction encoding:
9494//
9495//          31       22|21|20|19|18 16|15 12|11 8|7 5|4 0
9496// MRRS      1101010101| 1| 1|o0|  op1|   Cn|  Cm|op2| Rt
9497// MSRR      1101010101| 0| 1|o0|  op1|   Cn|  Cm|op2| Rt
9498
9499// Instruction syntax:
9500//
9501// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
9502// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
9503//
9504// ...where t is even (X0, X2, etc).
9505
9506let Predicates = [HasD128] in {
9507  def MRRS : RtSystemI128<1,
9508    (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
9509    "mrrs", "\t$Rt, $systemreg">
9510  {
9511    bits<16> systemreg;
9512    let Inst{20-5} = systemreg;
9513  }
9514
9515  def MSRR : RtSystemI128<0,
9516    (outs),  (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
9517    "msrr", "\t$systemreg, $Rt">
9518  {
9519    bits<16> systemreg;
9520    let Inst{20-5} = systemreg;
9521  }
9522}
9523
9524//===----------------------------===//
9525// 2023 Architecture Extensions:
9526//===----------------------------===//
9527
9528let Predicates = [HasFP8] in {
9529  defm F1CVTL  : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
9530  defm F2CVTL  : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
9531  defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
9532  defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
9533  defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
9534  defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
9535  defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
9536} // End let Predicates = [HasFP8]
9537
9538let Predicates = [HasFAMINMAX] in {
9539 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
9540 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
9541} // End let Predicates = [HasFAMAXMIN]
9542
9543let Predicates = [HasFP8FMA] in {
9544 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
9545 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
9546 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
9547 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
9548 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
9549 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
9550
9551 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
9552 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
9553 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
9554 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
9555 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
9556 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
9557} // End let Predicates = [HasFP8FMA]
9558
9559let Predicates = [HasFP8DOT2] in {
9560 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">;
9561 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">;
9562} // End let Predicates = [HasFP8DOT2]
9563
9564let Predicates = [HasFP8DOT4] in {
9565 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">;
9566 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
9567} // End let Predicates = [HasFP8DOT4]
9568
9569//===----------------------------------------------------------------------===//
9570// Checked Pointer Arithmetic (FEAT_CPA)
9571//===----------------------------------------------------------------------===//
9572let Predicates = [HasCPA] in {
9573  // Scalar add/subtract
9574  defm ADDPT : AddSubCPA<0, "addpt">;
9575  defm SUBPT : AddSubCPA<1, "subpt">;
9576
9577  // Scalar multiply-add/subtract
9578  def MADDPT : MulAccumCPA<0, "maddpt">;
9579  def MSUBPT : MulAccumCPA<1, "msubpt">;
9580}
9581
9582include "AArch64InstrAtomics.td"
9583include "AArch64SVEInstrInfo.td"
9584include "AArch64SMEInstrInfo.td"
9585include "AArch64InstrGISel.td"
9586