xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64InstrInfo.td (revision 415efcecd8b80f68e76376ef2b854cb6f5c84b5a)
1//=- AArch64InstrInfo.td - Describe the AArch64 Instructions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// AArch64 Instruction definitions.
10//
11//===----------------------------------------------------------------------===//
12
13
14//===----------------------------------------------------------------------===//
15// ARM Instruction Predicate Definitions.
16//
17
18class AssemblerPredicateWithAll<dag cond, string name="">
19    : AssemblerPredicate<(any_of FeatureAll, cond), name>;
20
21def HasV8_0a         : Predicate<"Subtarget->hasV8_0aOps()">,
22                                 AssemblerPredicate<(all_of HasV8_0aOps), "armv8.0a">;
23def HasV8_1a         : Predicate<"Subtarget->hasV8_1aOps()">,
24                                 AssemblerPredicateWithAll<(all_of HasV8_1aOps), "armv8.1a">;
25def HasV8_2a         : Predicate<"Subtarget->hasV8_2aOps()">,
26                                 AssemblerPredicateWithAll<(all_of HasV8_2aOps), "armv8.2a">;
27def HasV8_3a         : Predicate<"Subtarget->hasV8_3aOps()">,
28                                 AssemblerPredicateWithAll<(all_of HasV8_3aOps), "armv8.3a">;
29def HasV8_4a         : Predicate<"Subtarget->hasV8_4aOps()">,
30                                 AssemblerPredicateWithAll<(all_of HasV8_4aOps), "armv8.4a">;
31def HasV8_5a         : Predicate<"Subtarget->hasV8_5aOps()">,
32                                 AssemblerPredicateWithAll<(all_of HasV8_5aOps), "armv8.5a">;
33def HasV8_6a         : Predicate<"Subtarget->hasV8_6aOps()">,
34                                 AssemblerPredicateWithAll<(all_of HasV8_6aOps), "armv8.6a">;
35def HasV8_7a         : Predicate<"Subtarget->hasV8_7aOps()">,
36                                 AssemblerPredicateWithAll<(all_of HasV8_7aOps), "armv8.7a">;
37def HasV8_8a         : Predicate<"Subtarget->hasV8_8aOps()">,
38                                 AssemblerPredicateWithAll<(all_of HasV8_8aOps), "armv8.8a">;
39def HasV8_9a         : Predicate<"Subtarget->hasV8_9aOps()">,
40                                 AssemblerPredicateWithAll<(all_of HasV8_9aOps), "armv8.9a">;
41def HasV9_0a         : Predicate<"Subtarget->hasV9_0aOps()">,
42                                 AssemblerPredicateWithAll<(all_of HasV9_0aOps), "armv9-a">;
43def HasV9_1a         : Predicate<"Subtarget->hasV9_1aOps()">,
44                                 AssemblerPredicateWithAll<(all_of HasV9_1aOps), "armv9.1a">;
45def HasV9_2a         : Predicate<"Subtarget->hasV9_2aOps()">,
46                                 AssemblerPredicateWithAll<(all_of HasV9_2aOps), "armv9.2a">;
47def HasV9_3a         : Predicate<"Subtarget->hasV9_3aOps()">,
48                                 AssemblerPredicateWithAll<(all_of HasV9_3aOps), "armv9.3a">;
49def HasV9_4a         : Predicate<"Subtarget->hasV9_4aOps()">,
50                                 AssemblerPredicateWithAll<(all_of HasV9_4aOps), "armv9.4a">;
51def HasV8_0r         : Predicate<"Subtarget->hasV8_0rOps()">,
52                                 AssemblerPredicateWithAll<(all_of HasV8_0rOps), "armv8-r">;
53
54def HasEL2VMSA       : Predicate<"Subtarget->hasEL2VMSA()">,
55                       AssemblerPredicateWithAll<(all_of FeatureEL2VMSA), "el2vmsa">;
56
57def HasEL3           : Predicate<"Subtarget->hasEL3()">,
58                       AssemblerPredicateWithAll<(all_of FeatureEL3), "el3">;
59
60def HasVH            : Predicate<"Subtarget->hasVH()">,
61                       AssemblerPredicateWithAll<(all_of FeatureVH), "vh">;
62
63def HasLOR           : Predicate<"Subtarget->hasLOR()">,
64                       AssemblerPredicateWithAll<(all_of FeatureLOR), "lor">;
65
66def HasPAuth         : Predicate<"Subtarget->hasPAuth()">,
67                       AssemblerPredicateWithAll<(all_of FeaturePAuth), "pauth">;
68
69def HasPAuthLR       : Predicate<"Subtarget->hasPAuthLR()">,
70                       AssemblerPredicateWithAll<(all_of FeaturePAuthLR), "pauth-lr">;
71
72def HasJS            : Predicate<"Subtarget->hasJS()">,
73                       AssemblerPredicateWithAll<(all_of FeatureJS), "jsconv">;
74
75def HasCCIDX         : Predicate<"Subtarget->hasCCIDX()">,
76                       AssemblerPredicateWithAll<(all_of FeatureCCIDX), "ccidx">;
77
78def HasComplxNum      : Predicate<"Subtarget->hasComplxNum()">,
79                       AssemblerPredicateWithAll<(all_of FeatureComplxNum), "complxnum">;
80
81def HasNV            : Predicate<"Subtarget->hasNV()">,
82                       AssemblerPredicateWithAll<(all_of FeatureNV), "nv">;
83
84def HasMPAM          : Predicate<"Subtarget->hasMPAM()">,
85                       AssemblerPredicateWithAll<(all_of FeatureMPAM), "mpam">;
86
87def HasDIT           : Predicate<"Subtarget->hasDIT()">,
88                       AssemblerPredicateWithAll<(all_of FeatureDIT), "dit">;
89
90def HasTRACEV8_4         : Predicate<"Subtarget->hasTRACEV8_4()">,
91                       AssemblerPredicateWithAll<(all_of FeatureTRACEV8_4), "tracev8.4">;
92
93def HasAM            : Predicate<"Subtarget->hasAM()">,
94                       AssemblerPredicateWithAll<(all_of FeatureAM), "am">;
95
96def HasSEL2          : Predicate<"Subtarget->hasSEL2()">,
97                       AssemblerPredicateWithAll<(all_of FeatureSEL2), "sel2">;
98
99def HasTLB_RMI          : Predicate<"Subtarget->hasTLB_RMI()">,
100                       AssemblerPredicateWithAll<(all_of FeatureTLB_RMI), "tlb-rmi">;
101
102def HasFlagM         : Predicate<"Subtarget->hasFlagM()">,
103                       AssemblerPredicateWithAll<(all_of FeatureFlagM), "flagm">;
104
105def HasRCPC_IMMO      : Predicate<"Subtarget->hasRCPC_IMMO()">,
106                       AssemblerPredicateWithAll<(all_of FeatureRCPC_IMMO), "rcpc-immo">;
107
108def HasFPARMv8       : Predicate<"Subtarget->hasFPARMv8()">,
109                               AssemblerPredicateWithAll<(all_of FeatureFPARMv8), "fp-armv8">;
110def HasNEON          : Predicate<"Subtarget->isNeonAvailable()">,
111                                 AssemblerPredicateWithAll<(all_of FeatureNEON), "neon">;
112def HasSM4           : Predicate<"Subtarget->hasSM4()">,
113                                 AssemblerPredicateWithAll<(all_of FeatureSM4), "sm4">;
114def HasSHA3          : Predicate<"Subtarget->hasSHA3()">,
115                                 AssemblerPredicateWithAll<(all_of FeatureSHA3), "sha3">;
116def HasSHA2          : Predicate<"Subtarget->hasSHA2()">,
117                                 AssemblerPredicateWithAll<(all_of FeatureSHA2), "sha2">;
118def HasAES           : Predicate<"Subtarget->hasAES()">,
119                                 AssemblerPredicateWithAll<(all_of FeatureAES), "aes">;
120def HasDotProd       : Predicate<"Subtarget->hasDotProd()">,
121                                 AssemblerPredicateWithAll<(all_of FeatureDotProd), "dotprod">;
122def HasCRC           : Predicate<"Subtarget->hasCRC()">,
123                                 AssemblerPredicateWithAll<(all_of FeatureCRC), "crc">;
124def HasCSSC          : Predicate<"Subtarget->hasCSSC()">,
125                                 AssemblerPredicateWithAll<(all_of FeatureCSSC), "cssc">;
126def HasNoCSSC        : Predicate<"!Subtarget->hasCSSC()">;
127def HasLSE           : Predicate<"Subtarget->hasLSE()">,
128                                 AssemblerPredicateWithAll<(all_of FeatureLSE), "lse">;
129def HasNoLSE         : Predicate<"!Subtarget->hasLSE()">;
130def HasRAS           : Predicate<"Subtarget->hasRAS()">,
131                                 AssemblerPredicateWithAll<(all_of FeatureRAS), "ras">;
132def HasRDM           : Predicate<"Subtarget->hasRDM()">,
133                                 AssemblerPredicateWithAll<(all_of FeatureRDM), "rdm">;
134def HasFullFP16      : Predicate<"Subtarget->hasFullFP16()">,
135                                 AssemblerPredicateWithAll<(all_of FeatureFullFP16), "fullfp16">;
136def HasNoFullFP16    : Predicate<"!Subtarget->hasFullFP16()">;
137def HasFP16FML       : Predicate<"Subtarget->hasFP16FML()">,
138                                 AssemblerPredicateWithAll<(all_of FeatureFP16FML), "fp16fml">;
139def HasSPE           : Predicate<"Subtarget->hasSPE()">,
140                                 AssemblerPredicateWithAll<(all_of FeatureSPE), "spe">;
141def HasFuseAES       : Predicate<"Subtarget->hasFuseAES()">,
142                                 AssemblerPredicateWithAll<(all_of FeatureFuseAES),
143                                 "fuse-aes">;
144def HasSVE           : Predicate<"Subtarget->isSVEAvailable()">,
145                                 AssemblerPredicateWithAll<(all_of FeatureSVE), "sve">;
146def HasSVE2          : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2()">,
147                                 AssemblerPredicateWithAll<(all_of FeatureSVE2), "sve2">;
148def HasSVE2p1        : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2p1()">,
149                                 AssemblerPredicateWithAll<(all_of FeatureSVE2p1), "sve2p1">;
150def HasSVE2AES       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2AES()">,
151                                 AssemblerPredicateWithAll<(all_of FeatureSVE2AES), "sve2-aes">;
152def HasSVE2SM4       : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SM4()">,
153                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SM4), "sve2-sm4">;
154def HasSVE2SHA3      : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2SHA3()">,
155                                 AssemblerPredicateWithAll<(all_of FeatureSVE2SHA3), "sve2-sha3">;
156def HasSVE2BitPerm   : Predicate<"Subtarget->isSVEAvailable() && Subtarget->hasSVE2BitPerm()">,
157                                 AssemblerPredicateWithAll<(all_of FeatureSVE2BitPerm), "sve2-bitperm">;
158def HasB16B16        : Predicate<"Subtarget->hasB16B16()">,
159                                 AssemblerPredicateWithAll<(all_of FeatureB16B16), "b16b16">;
160def HasSMEandIsNonStreamingSafe
161                     : Predicate<"Subtarget->hasSME()">,
162                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
163def HasSME           : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME()">,
164                                 AssemblerPredicateWithAll<(all_of FeatureSME), "sme">;
165def HasSMEF64F64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF64F64()">,
166                                 AssemblerPredicateWithAll<(all_of FeatureSMEF64F64), "sme-f64f64">;
167def HasSMEF16F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF16F16()">,
168                                 AssemblerPredicateWithAll<(all_of FeatureSMEF16F16), "sme-f16f16">;
169def HasSMEFA64       : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEFA64()">,
170                                 AssemblerPredicateWithAll<(all_of FeatureSMEFA64), "sme-fa64">;
171def HasSMEI16I64     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEI16I64()">,
172                                 AssemblerPredicateWithAll<(all_of FeatureSMEI16I64), "sme-i16i64">;
173def HasSME2andIsNonStreamingSafe
174                     : Predicate<"Subtarget->hasSME2()">,
175                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
176def HasSME2          : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2()">,
177                                 AssemblerPredicateWithAll<(all_of FeatureSME2), "sme2">;
178def HasSME2p1        : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME2p1()">,
179                                 AssemblerPredicateWithAll<(all_of FeatureSME2p1), "sme2p1">;
180def HasFP8           : Predicate<"Subtarget->hasFP8()">,
181                                 AssemblerPredicateWithAll<(all_of FeatureFP8), "fp8">;
182def HasFAMINMAX      : Predicate<"Subtarget->hasFAMINMAX()">,
183                                 AssemblerPredicateWithAll<(all_of FeatureFAMINMAX), "faminmax">;
184def HasFP8FMA        : Predicate<"Subtarget->hasFP8FMA()">,
185                                 AssemblerPredicateWithAll<(all_of FeatureFP8FMA), "fp8fma">;
186def HasSSVE_FP8FMA   : Predicate<"Subtarget->hasSSVE_FP8FMA() || "
187                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8FMA())">,
188                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8FMA,
189                                                           (all_of FeatureSVE2, FeatureFP8FMA)),
190                                                           "ssve-fp8fma or (sve2 and fp8fma)">;
191def HasFP8DOT2       : Predicate<"Subtarget->hasFP8DOT2()">,
192                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT2), "fp8dot2">;
193def HasSSVE_FP8DOT2  : Predicate<"Subtarget->hasSSVE_FP8DOT2() || "
194                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT2())">,
195                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT2,
196                                                           (all_of FeatureSVE2, FeatureFP8DOT2)),
197                                "ssve-fp8dot2 or (sve2 and fp8dot2)">;
198def HasFP8DOT4       : Predicate<"Subtarget->hasFP8DOT4()">,
199                                 AssemblerPredicateWithAll<(all_of FeatureFP8DOT4), "fp8dot4">;
200def HasSSVE_FP8DOT4  : Predicate<"Subtarget->hasSSVE_FP8DOT4() || "
201                                 "(Subtarget->hasSVE2() && Subtarget->hasFP8DOT4())">,
202                                 AssemblerPredicateWithAll<(any_of FeatureSSVE_FP8DOT4,
203                                                           (all_of FeatureSVE2, FeatureFP8DOT4)),
204                                 "ssve-fp8dot4 or (sve2 and fp8dot4)">;
205def HasLUT          : Predicate<"Subtarget->hasLUT()">,
206                                 AssemblerPredicateWithAll<(all_of FeatureLUT), "lut">;
207def HasSME_LUTv2     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSME_LUTv2()">,
208                                 AssemblerPredicateWithAll<(all_of FeatureSME_LUTv2), "sme-lutv2">;
209def HasSMEF8F16     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F16()">,
210                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F16), "sme-f8f16">;
211def HasSMEF8F32     : Predicate<"Subtarget->isStreaming() && Subtarget->hasSMEF8F32()">,
212                                 AssemblerPredicateWithAll<(all_of FeatureSMEF8F32), "sme-f8f32">;
213
214// A subset of SVE(2) instructions are legal in Streaming SVE execution mode,
215// they should be enabled if either has been specified.
216def HasSVEorSME
217    : Predicate<"Subtarget->hasSVE() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
218                AssemblerPredicateWithAll<(any_of FeatureSVE, FeatureSME),
219                "sve or sme">;
220def HasSVE2orSME
221    : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
222                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME),
223                "sve2 or sme">;
224def HasSVE2orSME2
225    : Predicate<"Subtarget->hasSVE2() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
226                AssemblerPredicateWithAll<(any_of FeatureSVE2, FeatureSME2),
227                "sve2 or sme2">;
228def HasSVE2p1_or_HasSME
229    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME())">,
230                 AssemblerPredicateWithAll<(any_of FeatureSME, FeatureSVE2p1), "sme or sve2p1">;
231def HasSVE2p1_or_HasSME2
232    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2())">,
233                 AssemblerPredicateWithAll<(any_of FeatureSME2, FeatureSVE2p1), "sme2 or sve2p1">;
234def HasSVE2p1_or_HasSME2p1
235    : Predicate<"Subtarget->hasSVE2p1() || (Subtarget->isStreaming() && Subtarget->hasSME2p1())">,
236                 AssemblerPredicateWithAll<(any_of FeatureSME2p1, FeatureSVE2p1), "sme2p1 or sve2p1">;
237
238def HasSMEF16F16orSMEF8F16
239    : Predicate<"Subtarget->isStreaming() && (Subtarget->hasSMEF16F16() || Subtarget->hasSMEF8F16())">,
240                AssemblerPredicateWithAll<(any_of FeatureSMEF16F16, FeatureSMEF8F16),
241                "sme-f16f16 or sme-f8f16">;
242
243// A subset of NEON instructions are legal in Streaming SVE execution mode,
244// so don't need the additional check for 'isNeonAvailable'.
245def HasNEONandIsStreamingSafe
246    : Predicate<"Subtarget->hasNEON()">,
247      AssemblerPredicateWithAll<(any_of FeatureNEON), "neon">;
248def HasRCPC          : Predicate<"Subtarget->hasRCPC()">,
249                                 AssemblerPredicateWithAll<(all_of FeatureRCPC), "rcpc">;
250def HasAltNZCV       : Predicate<"Subtarget->hasAlternativeNZCV()">,
251                       AssemblerPredicateWithAll<(all_of FeatureAltFPCmp), "altnzcv">;
252def HasFRInt3264     : Predicate<"Subtarget->hasFRInt3264()">,
253                       AssemblerPredicateWithAll<(all_of FeatureFRInt3264), "frint3264">;
254def HasSB            : Predicate<"Subtarget->hasSB()">,
255                       AssemblerPredicateWithAll<(all_of FeatureSB), "sb">;
256def HasPredRes      : Predicate<"Subtarget->hasPredRes()">,
257                       AssemblerPredicateWithAll<(all_of FeaturePredRes), "predres">;
258def HasCCDP          : Predicate<"Subtarget->hasCCDP()">,
259                       AssemblerPredicateWithAll<(all_of FeatureCacheDeepPersist), "ccdp">;
260def HasBTI           : Predicate<"Subtarget->hasBTI()">,
261                       AssemblerPredicateWithAll<(all_of FeatureBranchTargetId), "bti">;
262def HasMTE           : Predicate<"Subtarget->hasMTE()">,
263                       AssemblerPredicateWithAll<(all_of FeatureMTE), "mte">;
264def HasTME           : Predicate<"Subtarget->hasTME()">,
265                       AssemblerPredicateWithAll<(all_of FeatureTME), "tme">;
266def HasETE           : Predicate<"Subtarget->hasETE()">,
267                       AssemblerPredicateWithAll<(all_of FeatureETE), "ete">;
268def HasTRBE          : Predicate<"Subtarget->hasTRBE()">,
269                       AssemblerPredicateWithAll<(all_of FeatureTRBE), "trbe">;
270def HasBF16          : Predicate<"Subtarget->hasBF16()">,
271                       AssemblerPredicateWithAll<(all_of FeatureBF16), "bf16">;
272def HasNoBF16        : Predicate<"!Subtarget->hasBF16()">;
273def HasMatMulInt8    : Predicate<"Subtarget->hasMatMulInt8()">,
274                       AssemblerPredicateWithAll<(all_of FeatureMatMulInt8), "i8mm">;
275def HasMatMulFP32    : Predicate<"Subtarget->hasMatMulFP32()">,
276                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP32), "f32mm">;
277def HasMatMulFP64    : Predicate<"Subtarget->hasMatMulFP64()">,
278                       AssemblerPredicateWithAll<(all_of FeatureMatMulFP64), "f64mm">;
279def HasFPAC          : Predicate<"Subtarget->hasFPAC())">,
280                       AssemblerPredicateWithAll<(all_of FeatureFPAC), "fpac">;
281def HasXS            : Predicate<"Subtarget->hasXS()">,
282                       AssemblerPredicateWithAll<(all_of FeatureXS), "xs">;
283def HasWFxT          : Predicate<"Subtarget->hasWFxT()">,
284                       AssemblerPredicateWithAll<(all_of FeatureWFxT), "wfxt">;
285def HasLS64          : Predicate<"Subtarget->hasLS64()">,
286                       AssemblerPredicateWithAll<(all_of FeatureLS64), "ls64">;
287def HasBRBE          : Predicate<"Subtarget->hasBRBE()">,
288                       AssemblerPredicateWithAll<(all_of FeatureBRBE), "brbe">;
289def HasSPE_EEF       : Predicate<"Subtarget->hasSPE_EEF()">,
290                       AssemblerPredicateWithAll<(all_of FeatureSPE_EEF), "spe-eef">;
291def HasHBC           : Predicate<"Subtarget->hasHBC()">,
292                       AssemblerPredicateWithAll<(all_of FeatureHBC), "hbc">;
293def HasMOPS          : Predicate<"Subtarget->hasMOPS()">,
294                       AssemblerPredicateWithAll<(all_of FeatureMOPS), "mops">;
295def HasCLRBHB        : Predicate<"Subtarget->hasCLRBHB()">,
296                       AssemblerPredicateWithAll<(all_of FeatureCLRBHB), "clrbhb">;
297def HasSPECRES2      : Predicate<"Subtarget->hasSPECRES2()">,
298                       AssemblerPredicateWithAll<(all_of FeatureSPECRES2), "specres2">;
299def HasITE           : Predicate<"Subtarget->hasITE()">,
300                       AssemblerPredicateWithAll<(all_of FeatureITE), "ite">;
301def HasTHE           : Predicate<"Subtarget->hasTHE()">,
302                       AssemblerPredicateWithAll<(all_of FeatureTHE), "the">;
303def HasRCPC3         : Predicate<"Subtarget->hasRCPC3()">,
304                       AssemblerPredicateWithAll<(all_of FeatureRCPC3), "rcpc3">;
305def HasLSE128        : Predicate<"Subtarget->hasLSE128()">,
306                       AssemblerPredicateWithAll<(all_of FeatureLSE128), "lse128">;
307def HasD128          : Predicate<"Subtarget->hasD128()">,
308                       AssemblerPredicateWithAll<(all_of FeatureD128), "d128">;
309def HasCHK           : Predicate<"Subtarget->hasCHK()">,
310                       AssemblerPredicateWithAll<(all_of FeatureCHK), "chk">;
311def HasGCS           : Predicate<"Subtarget->hasGCS()">,
312                       AssemblerPredicateWithAll<(all_of FeatureGCS), "gcs">;
313def HasCPA           : Predicate<"Subtarget->hasCPA()">,
314                       AssemblerPredicateWithAll<(all_of FeatureCPA), "cpa">;
315def IsLE             : Predicate<"Subtarget->isLittleEndian()">;
316def IsBE             : Predicate<"!Subtarget->isLittleEndian()">;
317def IsWindows        : Predicate<"Subtarget->isTargetWindows()">;
318def UseExperimentalZeroingPseudos
319    : Predicate<"Subtarget->useExperimentalZeroingPseudos()">;
320def UseAlternateSExtLoadCVTF32
321    : Predicate<"Subtarget->useAlternateSExtLoadCVTF32Pattern()">;
322
323def UseNegativeImmediates
324    : Predicate<"false">, AssemblerPredicate<(all_of (not FeatureNoNegativeImmediates)),
325                                             "NegativeImmediates">;
326
327def UseScalarIncVL : Predicate<"Subtarget->useScalarIncVL()">;
328
329def NoUseScalarIncVL : Predicate<"!Subtarget->useScalarIncVL()">;
330
331def UseSVEFPLD1R : Predicate<"!Subtarget->noSVEFPLD1R()">;
332
333def AArch64LocalRecover : SDNode<"ISD::LOCAL_RECOVER",
334                                  SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
335                                                       SDTCisInt<1>]>>;
336
337
338//===----------------------------------------------------------------------===//
339// AArch64-specific DAG Nodes.
340//
341
342// SDTBinaryArithWithFlagsOut - RES1, FLAGS = op LHS, RHS
343def SDTBinaryArithWithFlagsOut : SDTypeProfile<2, 2,
344                                              [SDTCisSameAs<0, 2>,
345                                               SDTCisSameAs<0, 3>,
346                                               SDTCisInt<0>, SDTCisVT<1, i32>]>;
347
348// SDTBinaryArithWithFlagsIn - RES1, FLAGS = op LHS, RHS, FLAGS
349def SDTBinaryArithWithFlagsIn : SDTypeProfile<1, 3,
350                                            [SDTCisSameAs<0, 1>,
351                                             SDTCisSameAs<0, 2>,
352                                             SDTCisInt<0>,
353                                             SDTCisVT<3, i32>]>;
354
355// SDTBinaryArithWithFlagsInOut - RES1, FLAGS = op LHS, RHS, FLAGS
356def SDTBinaryArithWithFlagsInOut : SDTypeProfile<2, 3,
357                                            [SDTCisSameAs<0, 2>,
358                                             SDTCisSameAs<0, 3>,
359                                             SDTCisInt<0>,
360                                             SDTCisVT<1, i32>,
361                                             SDTCisVT<4, i32>]>;
362
363def SDT_AArch64Brcond  : SDTypeProfile<0, 3,
364                                     [SDTCisVT<0, OtherVT>, SDTCisVT<1, i32>,
365                                      SDTCisVT<2, i32>]>;
366def SDT_AArch64cbz : SDTypeProfile<0, 2, [SDTCisInt<0>, SDTCisVT<1, OtherVT>]>;
367def SDT_AArch64tbz : SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>,
368                                        SDTCisVT<2, OtherVT>]>;
369
370
371def SDT_AArch64CSel  : SDTypeProfile<1, 4,
372                                   [SDTCisSameAs<0, 1>,
373                                    SDTCisSameAs<0, 2>,
374                                    SDTCisInt<3>,
375                                    SDTCisVT<4, i32>]>;
376def SDT_AArch64CCMP : SDTypeProfile<1, 5,
377                                    [SDTCisVT<0, i32>,
378                                     SDTCisInt<1>,
379                                     SDTCisSameAs<1, 2>,
380                                     SDTCisInt<3>,
381                                     SDTCisInt<4>,
382                                     SDTCisVT<5, i32>]>;
383def SDT_AArch64FCCMP : SDTypeProfile<1, 5,
384                                     [SDTCisVT<0, i32>,
385                                      SDTCisFP<1>,
386                                      SDTCisSameAs<1, 2>,
387                                      SDTCisInt<3>,
388                                      SDTCisInt<4>,
389                                      SDTCisVT<5, i32>]>;
390def SDT_AArch64FCmp   : SDTypeProfile<0, 2,
391                                   [SDTCisFP<0>,
392                                    SDTCisSameAs<0, 1>]>;
393def SDT_AArch64Dup   : SDTypeProfile<1, 1, [SDTCisVec<0>]>;
394def SDT_AArch64DupLane   : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<2>]>;
395def SDT_AArch64Insr  : SDTypeProfile<1, 2, [SDTCisVec<0>]>;
396def SDT_AArch64Zip   : SDTypeProfile<1, 2, [SDTCisVec<0>,
397                                          SDTCisSameAs<0, 1>,
398                                          SDTCisSameAs<0, 2>]>;
399def SDT_AArch64MOVIedit : SDTypeProfile<1, 1, [SDTCisInt<1>]>;
400def SDT_AArch64MOVIshift : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
401def SDT_AArch64vecimm : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
402                                           SDTCisInt<2>, SDTCisInt<3>]>;
403def SDT_AArch64UnaryVec: SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
404def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
405                                          SDTCisSameAs<0,2>, SDTCisInt<3>]>;
406def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
407def SDT_AArch64Dot: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
408                                         SDTCisVec<2>, SDTCisSameAs<2,3>]>;
409
410def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
411                                                 SDTCisSameAs<0,1>,
412                                                 SDTCisSameAs<0,2>]>;
413
414def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
415def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
416def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
417def SDT_AArch64binvec : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
418                                           SDTCisSameAs<0,2>]>;
419def SDT_AArch64trivec : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
420                                           SDTCisSameAs<0,2>,
421                                           SDTCisSameAs<0,3>]>;
422def SDT_AArch64TCRET : SDTypeProfile<0, 2, [SDTCisPtrTy<0>]>;
423def SDT_AArch64PREFETCH : SDTypeProfile<0, 2, [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>;
424
425def SDT_AArch64ITOF  : SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisSameAs<0,1>]>;
426
427def SDT_AArch64TLSDescCall : SDTypeProfile<0, -2, [SDTCisPtrTy<0>,
428                                                 SDTCisPtrTy<1>]>;
429
430def SDT_AArch64uaddlp : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisVec<1>]>;
431
432def SDT_AArch64ldp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
433def SDT_AArch64ldiapp : SDTypeProfile<2, 1, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
434def SDT_AArch64ldnp : SDTypeProfile<2, 1, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
435def SDT_AArch64stp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
436def SDT_AArch64stilp : SDTypeProfile<0, 3, [SDTCisVT<0, i64>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
437def SDT_AArch64stnp : SDTypeProfile<0, 3, [SDTCisVT<0, v4i32>, SDTCisSameAs<0, 1>, SDTCisPtrTy<2>]>;
438
439// Generates the general dynamic sequences, i.e.
440//  adrp  x0, :tlsdesc:var
441//  ldr   x1, [x0, #:tlsdesc_lo12:var]
442//  add   x0, x0, #:tlsdesc_lo12:var
443//  .tlsdesccall var
444//  blr   x1
445
446// (the TPIDR_EL0 offset is put directly in X0, hence no "result" here)
447// number of operands (the variable)
448def SDT_AArch64TLSDescCallSeq : SDTypeProfile<0,1,
449                                          [SDTCisPtrTy<0>]>;
450
451def SDT_AArch64WrapperLarge : SDTypeProfile<1, 4,
452                                        [SDTCisVT<0, i64>, SDTCisVT<1, i32>,
453                                         SDTCisSameAs<1, 2>, SDTCisSameAs<1, 3>,
454                                         SDTCisSameAs<1, 4>]>;
455
456def SDT_AArch64TBL : SDTypeProfile<1, 2, [
457  SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisInt<2>
458]>;
459
460// non-extending masked load fragment.
461def nonext_masked_load :
462  PatFrag<(ops node:$ptr, node:$pred, node:$def),
463          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
464  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
465         cast<MaskedLoadSDNode>(N)->isUnindexed() &&
466         !cast<MaskedLoadSDNode>(N)->isNonTemporal();
467}]>;
468// Any/Zero extending masked load fragments.
469def azext_masked_load :
470  PatFrag<(ops node:$ptr, node:$pred, node:$def),
471          (masked_ld node:$ptr, undef, node:$pred, node:$def),[{
472  return (cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD ||
473          cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD) &&
474         cast<MaskedLoadSDNode>(N)->isUnindexed();
475}]>;
476def azext_masked_load_i8 :
477  PatFrag<(ops node:$ptr, node:$pred, node:$def),
478          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
479  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
480}]>;
481def azext_masked_load_i16 :
482  PatFrag<(ops node:$ptr, node:$pred, node:$def),
483          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
484  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
485}]>;
486def azext_masked_load_i32 :
487  PatFrag<(ops node:$ptr, node:$pred, node:$def),
488          (azext_masked_load node:$ptr, node:$pred, node:$def), [{
489  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
490}]>;
491// Sign extending masked load fragments.
492def sext_masked_load :
493  PatFrag<(ops node:$ptr, node:$pred, node:$def),
494          (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
495  return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD &&
496         cast<MaskedLoadSDNode>(N)->isUnindexed();
497}]>;
498def sext_masked_load_i8 :
499  PatFrag<(ops node:$ptr, node:$pred, node:$def),
500          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
501  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
502}]>;
503def sext_masked_load_i16 :
504  PatFrag<(ops node:$ptr, node:$pred, node:$def),
505          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
506  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
507}]>;
508def sext_masked_load_i32 :
509  PatFrag<(ops node:$ptr, node:$pred, node:$def),
510          (sext_masked_load node:$ptr, node:$pred, node:$def), [{
511  return cast<MaskedLoadSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
512}]>;
513
514def non_temporal_load :
515   PatFrag<(ops node:$ptr, node:$pred, node:$def),
516           (masked_ld node:$ptr, undef, node:$pred, node:$def), [{
517   return cast<MaskedLoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD &&
518          cast<MaskedLoadSDNode>(N)->isUnindexed() &&
519          cast<MaskedLoadSDNode>(N)->isNonTemporal();
520}]>;
521
522// non-truncating masked store fragment.
523def nontrunc_masked_store :
524  PatFrag<(ops node:$val, node:$ptr, node:$pred),
525          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
526  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
527         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
528         !cast<MaskedStoreSDNode>(N)->isNonTemporal();
529}]>;
530// truncating masked store fragments.
531def trunc_masked_store :
532  PatFrag<(ops node:$val, node:$ptr, node:$pred),
533          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
534  return cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
535         cast<MaskedStoreSDNode>(N)->isUnindexed();
536}]>;
537def trunc_masked_store_i8 :
538  PatFrag<(ops node:$val, node:$ptr, node:$pred),
539          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
540  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i8;
541}]>;
542def trunc_masked_store_i16 :
543  PatFrag<(ops node:$val, node:$ptr, node:$pred),
544          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
545  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i16;
546}]>;
547def trunc_masked_store_i32 :
548  PatFrag<(ops node:$val, node:$ptr, node:$pred),
549          (trunc_masked_store node:$val, node:$ptr, node:$pred), [{
550  return cast<MaskedStoreSDNode>(N)->getMemoryVT().getScalarType() == MVT::i32;
551}]>;
552
553def non_temporal_store :
554  PatFrag<(ops node:$val, node:$ptr, node:$pred),
555          (masked_st node:$val, node:$ptr, undef, node:$pred), [{
556  return !cast<MaskedStoreSDNode>(N)->isTruncatingStore() &&
557         cast<MaskedStoreSDNode>(N)->isUnindexed() &&
558         cast<MaskedStoreSDNode>(N)->isNonTemporal();
559}]>;
560
561multiclass masked_gather_scatter<PatFrags GatherScatterOp> {
562  // offsets = (signed)Index << sizeof(elt)
563  def NAME#_signed_scaled :
564    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
565            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
566    auto MGS = cast<MaskedGatherScatterSDNode>(N);
567    bool Signed = MGS->isIndexSigned() ||
568        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
569    return Signed && MGS->isIndexScaled();
570  }]>;
571  // offsets = (signed)Index
572  def NAME#_signed_unscaled :
573    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
574            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
575    auto MGS = cast<MaskedGatherScatterSDNode>(N);
576    bool Signed = MGS->isIndexSigned() ||
577        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
578    return Signed && !MGS->isIndexScaled();
579  }]>;
580  // offsets = (unsigned)Index << sizeof(elt)
581  def NAME#_unsigned_scaled :
582    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
583            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
584    auto MGS = cast<MaskedGatherScatterSDNode>(N);
585    bool Signed = MGS->isIndexSigned() ||
586        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
587    return !Signed && MGS->isIndexScaled();
588  }]>;
589  // offsets = (unsigned)Index
590  def NAME#_unsigned_unscaled :
591    PatFrag<(ops node:$val, node:$pred, node:$ptr, node:$idx),
592            (GatherScatterOp node:$val, node:$pred, node:$ptr, node:$idx),[{
593    auto MGS = cast<MaskedGatherScatterSDNode>(N);
594    bool Signed = MGS->isIndexSigned() ||
595        MGS->getIndex().getValueType().getVectorElementType() == MVT::i64;
596    return !Signed && !MGS->isIndexScaled();
597  }]>;
598}
599
600defm nonext_masked_gather    : masked_gather_scatter<nonext_masked_gather>;
601defm azext_masked_gather_i8  : masked_gather_scatter<azext_masked_gather_i8>;
602defm azext_masked_gather_i16 : masked_gather_scatter<azext_masked_gather_i16>;
603defm azext_masked_gather_i32 : masked_gather_scatter<azext_masked_gather_i32>;
604defm sext_masked_gather_i8   : masked_gather_scatter<sext_masked_gather_i8>;
605defm sext_masked_gather_i16  : masked_gather_scatter<sext_masked_gather_i16>;
606defm sext_masked_gather_i32  : masked_gather_scatter<sext_masked_gather_i32>;
607
608defm nontrunc_masked_scatter  : masked_gather_scatter<nontrunc_masked_scatter>;
609defm trunc_masked_scatter_i8  : masked_gather_scatter<trunc_masked_scatter_i8>;
610defm trunc_masked_scatter_i16 : masked_gather_scatter<trunc_masked_scatter_i16>;
611defm trunc_masked_scatter_i32 : masked_gather_scatter<trunc_masked_scatter_i32>;
612
613// top16Zero - answer true if the upper 16 bits of $src are 0, false otherwise
614def top16Zero: PatLeaf<(i32 GPR32:$src), [{
615  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
616         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 16));
617  }]>;
618
619// top32Zero - answer true if the upper 32 bits of $src are 0, false otherwise
620def top32Zero: PatLeaf<(i64 GPR64:$src), [{
621  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
622         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 32));
623  }]>;
624
625// topbitsallzero - Return true if all bits except the lowest bit are known zero
626def topbitsallzero32: PatLeaf<(i32 GPR32:$src), [{
627  return SDValue(N,0)->getValueType(0) == MVT::i32 &&
628         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(32, 31));
629  }]>;
630def topbitsallzero64: PatLeaf<(i64 GPR64:$src), [{
631  return SDValue(N,0)->getValueType(0) == MVT::i64 &&
632         CurDAG->MaskedValueIsZero(SDValue(N,0), APInt::getHighBitsSet(64, 63));
633  }]>;
634
635// Node definitions.
636def AArch64adrp          : SDNode<"AArch64ISD::ADRP", SDTIntUnaryOp, []>;
637def AArch64adr           : SDNode<"AArch64ISD::ADR", SDTIntUnaryOp, []>;
638def AArch64addlow        : SDNode<"AArch64ISD::ADDlow", SDTIntBinOp, []>;
639def AArch64LOADgot       : SDNode<"AArch64ISD::LOADgot", SDTIntUnaryOp>;
640def AArch64callseq_start : SDNode<"ISD::CALLSEQ_START",
641                                SDCallSeqStart<[ SDTCisVT<0, i32>,
642                                                 SDTCisVT<1, i32> ]>,
643                                [SDNPHasChain, SDNPOutGlue]>;
644def AArch64callseq_end   : SDNode<"ISD::CALLSEQ_END",
645                                SDCallSeqEnd<[ SDTCisVT<0, i32>,
646                                               SDTCisVT<1, i32> ]>,
647                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
648def AArch64call          : SDNode<"AArch64ISD::CALL",
649                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
650                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
651                                 SDNPVariadic]>;
652
653def AArch64call_bti      : SDNode<"AArch64ISD::CALL_BTI",
654                                SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
655                                [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
656                                 SDNPVariadic]>;
657
658def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER",
659                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
660                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
661                              SDNPVariadic]>;
662
663def AArch64call_arm64ec_to_x64 : SDNode<"AArch64ISD::CALL_ARM64EC_TO_X64",
664                                      SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>,
665                                      [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
666                                       SDNPVariadic]>;
667
668def AArch64authcall : SDNode<"AArch64ISD::AUTH_CALL",
669                             SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
670                                                   SDTCisVT<1, i32>,
671                                                   SDTCisVT<2, i64>,
672                                                   SDTCisVT<3, i64>]>,
673                             [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
674                              SDNPVariadic]>;
675
676def AArch64authtcret: SDNode<"AArch64ISD::AUTH_TC_RETURN",
677                             SDTypeProfile<0, 5, [SDTCisPtrTy<0>,
678                                                  SDTCisVT<2, i32>,
679                                                  SDTCisVT<3, i64>,
680                                                  SDTCisVT<4, i64>]>,
681                             [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
682
683def AArch64authcall_rvmarker : SDNode<"AArch64ISD::AUTH_CALL_RVMARKER",
684                                 SDTypeProfile<0, -1, [SDTCisPtrTy<0>,
685                                                       SDTCisPtrTy<1>,
686                                                       SDTCisVT<2, i32>,
687                                                       SDTCisVT<3, i64>,
688                                                       SDTCisVT<4, i64>]>,
689                                 [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
690                                  SDNPVariadic]>;
691
692def AArch64brcond        : SDNode<"AArch64ISD::BRCOND", SDT_AArch64Brcond,
693                                [SDNPHasChain]>;
694def AArch64cbz           : SDNode<"AArch64ISD::CBZ", SDT_AArch64cbz,
695                                [SDNPHasChain]>;
696def AArch64cbnz           : SDNode<"AArch64ISD::CBNZ", SDT_AArch64cbz,
697                                [SDNPHasChain]>;
698def AArch64tbz           : SDNode<"AArch64ISD::TBZ", SDT_AArch64tbz,
699                                [SDNPHasChain]>;
700def AArch64tbnz           : SDNode<"AArch64ISD::TBNZ", SDT_AArch64tbz,
701                                [SDNPHasChain]>;
702
703
704def AArch64csel          : SDNode<"AArch64ISD::CSEL", SDT_AArch64CSel>;
705def AArch64csinv         : SDNode<"AArch64ISD::CSINV", SDT_AArch64CSel>;
706def AArch64csneg         : SDNode<"AArch64ISD::CSNEG", SDT_AArch64CSel>;
707def AArch64csinc         : SDNode<"AArch64ISD::CSINC", SDT_AArch64CSel>;
708def AArch64retglue       : SDNode<"AArch64ISD::RET_GLUE", SDTNone,
709                                [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
710def AArch64adc       : SDNode<"AArch64ISD::ADC",  SDTBinaryArithWithFlagsIn >;
711def AArch64sbc       : SDNode<"AArch64ISD::SBC",  SDTBinaryArithWithFlagsIn>;
712def AArch64add_flag  : SDNode<"AArch64ISD::ADDS",  SDTBinaryArithWithFlagsOut,
713                            [SDNPCommutative]>;
714def AArch64sub_flag  : SDNode<"AArch64ISD::SUBS",  SDTBinaryArithWithFlagsOut>;
715def AArch64and_flag  : SDNode<"AArch64ISD::ANDS",  SDTBinaryArithWithFlagsOut,
716                            [SDNPCommutative]>;
717def AArch64adc_flag  : SDNode<"AArch64ISD::ADCS",  SDTBinaryArithWithFlagsInOut>;
718def AArch64sbc_flag  : SDNode<"AArch64ISD::SBCS",  SDTBinaryArithWithFlagsInOut>;
719
720def AArch64ccmp      : SDNode<"AArch64ISD::CCMP",  SDT_AArch64CCMP>;
721def AArch64ccmn      : SDNode<"AArch64ISD::CCMN",  SDT_AArch64CCMP>;
722def AArch64fccmp     : SDNode<"AArch64ISD::FCCMP", SDT_AArch64FCCMP>;
723
724def AArch64threadpointer : SDNode<"AArch64ISD::THREAD_POINTER", SDTPtrLeaf>;
725
726def AArch64fcmp         : SDNode<"AArch64ISD::FCMP", SDT_AArch64FCmp>;
727def AArch64strict_fcmp  : SDNode<"AArch64ISD::STRICT_FCMP", SDT_AArch64FCmp,
728                                 [SDNPHasChain]>;
729def AArch64strict_fcmpe : SDNode<"AArch64ISD::STRICT_FCMPE", SDT_AArch64FCmp,
730                                 [SDNPHasChain]>;
731def AArch64any_fcmp     : PatFrags<(ops node:$lhs, node:$rhs),
732                                   [(AArch64strict_fcmp node:$lhs, node:$rhs),
733                                    (AArch64fcmp node:$lhs, node:$rhs)]>;
734
735def AArch64dup       : SDNode<"AArch64ISD::DUP", SDT_AArch64Dup>;
736def AArch64duplane8  : SDNode<"AArch64ISD::DUPLANE8", SDT_AArch64DupLane>;
737def AArch64duplane16 : SDNode<"AArch64ISD::DUPLANE16", SDT_AArch64DupLane>;
738def AArch64duplane32 : SDNode<"AArch64ISD::DUPLANE32", SDT_AArch64DupLane>;
739def AArch64duplane64 : SDNode<"AArch64ISD::DUPLANE64", SDT_AArch64DupLane>;
740def AArch64duplane128 : SDNode<"AArch64ISD::DUPLANE128", SDT_AArch64DupLane>;
741
742def AArch64insr      : SDNode<"AArch64ISD::INSR", SDT_AArch64Insr>;
743
744def AArch64zip1      : SDNode<"AArch64ISD::ZIP1", SDT_AArch64Zip>;
745def AArch64zip2      : SDNode<"AArch64ISD::ZIP2", SDT_AArch64Zip>;
746def AArch64uzp1      : SDNode<"AArch64ISD::UZP1", SDT_AArch64Zip>;
747def AArch64uzp2      : SDNode<"AArch64ISD::UZP2", SDT_AArch64Zip>;
748def AArch64trn1      : SDNode<"AArch64ISD::TRN1", SDT_AArch64Zip>;
749def AArch64trn2      : SDNode<"AArch64ISD::TRN2", SDT_AArch64Zip>;
750
751def AArch64movi_edit : SDNode<"AArch64ISD::MOVIedit", SDT_AArch64MOVIedit>;
752def AArch64movi_shift : SDNode<"AArch64ISD::MOVIshift", SDT_AArch64MOVIshift>;
753def AArch64movi_msl : SDNode<"AArch64ISD::MOVImsl", SDT_AArch64MOVIshift>;
754def AArch64mvni_shift : SDNode<"AArch64ISD::MVNIshift", SDT_AArch64MOVIshift>;
755def AArch64mvni_msl : SDNode<"AArch64ISD::MVNImsl", SDT_AArch64MOVIshift>;
756def AArch64movi : SDNode<"AArch64ISD::MOVI", SDT_AArch64MOVIedit>;
757def AArch64fmov : SDNode<"AArch64ISD::FMOV", SDT_AArch64MOVIedit>;
758
759def AArch64rev16 : SDNode<"AArch64ISD::REV16", SDT_AArch64UnaryVec>;
760def AArch64rev32 : SDNode<"AArch64ISD::REV32", SDT_AArch64UnaryVec>;
761def AArch64rev64 : SDNode<"AArch64ISD::REV64", SDT_AArch64UnaryVec>;
762def AArch64ext : SDNode<"AArch64ISD::EXT", SDT_AArch64ExtVec>;
763
764def AArch64vashr : SDNode<"AArch64ISD::VASHR", SDT_AArch64vshift>;
765
766def AArch64vashr_exact : PatFrag<(ops          node:$lhs, node:$rhs),
767                                 (AArch64vashr node:$lhs, node:$rhs), [{
768  return N->getFlags().hasExact();
769}]>;
770
771def AArch64vlshr : SDNode<"AArch64ISD::VLSHR", SDT_AArch64vshift>;
772def AArch64vshl : SDNode<"AArch64ISD::VSHL", SDT_AArch64vshift>;
773def AArch64sqshli : SDNode<"AArch64ISD::SQSHL_I", SDT_AArch64vshift>;
774def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
775def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
776def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
777def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
778def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
779def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
780
781def AArch64bsp: SDNode<"AArch64ISD::BSP", SDT_AArch64trivec>;
782
783def AArch64cmeq: SDNode<"AArch64ISD::CMEQ", SDT_AArch64binvec>;
784def AArch64cmge: SDNode<"AArch64ISD::CMGE", SDT_AArch64binvec>;
785def AArch64cmgt: SDNode<"AArch64ISD::CMGT", SDT_AArch64binvec>;
786def AArch64cmhi: SDNode<"AArch64ISD::CMHI", SDT_AArch64binvec>;
787def AArch64cmhs: SDNode<"AArch64ISD::CMHS", SDT_AArch64binvec>;
788
789def AArch64fcmeq: SDNode<"AArch64ISD::FCMEQ", SDT_AArch64fcmp>;
790def AArch64fcmge: SDNode<"AArch64ISD::FCMGE", SDT_AArch64fcmp>;
791def AArch64fcmgt: SDNode<"AArch64ISD::FCMGT", SDT_AArch64fcmp>;
792
793def AArch64cmeqz: SDNode<"AArch64ISD::CMEQz", SDT_AArch64unvec>;
794def AArch64cmgez: SDNode<"AArch64ISD::CMGEz", SDT_AArch64unvec>;
795def AArch64cmgtz: SDNode<"AArch64ISD::CMGTz", SDT_AArch64unvec>;
796def AArch64cmlez: SDNode<"AArch64ISD::CMLEz", SDT_AArch64unvec>;
797def AArch64cmltz: SDNode<"AArch64ISD::CMLTz", SDT_AArch64unvec>;
798def AArch64cmtst : PatFrag<(ops node:$LHS, node:$RHS),
799                        (vnot (AArch64cmeqz (and node:$LHS, node:$RHS)))>;
800
801def AArch64fcmeqz: SDNode<"AArch64ISD::FCMEQz", SDT_AArch64fcmpz>;
802def AArch64fcmgez: SDNode<"AArch64ISD::FCMGEz", SDT_AArch64fcmpz>;
803def AArch64fcmgtz: SDNode<"AArch64ISD::FCMGTz", SDT_AArch64fcmpz>;
804def AArch64fcmlez: SDNode<"AArch64ISD::FCMLEz", SDT_AArch64fcmpz>;
805def AArch64fcmltz: SDNode<"AArch64ISD::FCMLTz", SDT_AArch64fcmpz>;
806
807def AArch64fcvtxn_n: SDNode<"AArch64ISD::FCVTXN", SDTFPRoundOp>;
808def AArch64fcvtxnsdr: PatFrags<(ops node:$Rn),
809                               [(f32 (int_aarch64_sisd_fcvtxn (f64 node:$Rn))),
810                                (f32 (AArch64fcvtxn_n (f64 node:$Rn)))]>;
811def AArch64fcvtxnv: PatFrags<(ops node:$Rn),
812                                 [(int_aarch64_neon_fcvtxn node:$Rn),
813                                  (AArch64fcvtxn_n node:$Rn)]>;
814
815//def Aarch64softf32tobf16v8: SDNode<"AArch64ISD::", SDTFPRoundOp>;
816
817def AArch64bici: SDNode<"AArch64ISD::BICi", SDT_AArch64vecimm>;
818def AArch64orri: SDNode<"AArch64ISD::ORRi", SDT_AArch64vecimm>;
819
820def AArch64tcret: SDNode<"AArch64ISD::TC_RETURN", SDT_AArch64TCRET,
821                  [SDNPHasChain,  SDNPOptInGlue, SDNPVariadic]>;
822
823def AArch64Prefetch        : SDNode<"AArch64ISD::PREFETCH", SDT_AArch64PREFETCH,
824                               [SDNPHasChain, SDNPSideEffect]>;
825
826def AArch64sitof: SDNode<"AArch64ISD::SITOF", SDT_AArch64ITOF>;
827def AArch64uitof: SDNode<"AArch64ISD::UITOF", SDT_AArch64ITOF>;
828
829def AArch64tlsdesc_callseq : SDNode<"AArch64ISD::TLSDESC_CALLSEQ",
830                                    SDT_AArch64TLSDescCallSeq,
831                                    [SDNPInGlue, SDNPOutGlue, SDNPHasChain,
832                                     SDNPVariadic]>;
833
834
835def AArch64WrapperLarge : SDNode<"AArch64ISD::WrapperLarge",
836                                 SDT_AArch64WrapperLarge>;
837
838def AArch64NvCast : SDNode<"AArch64ISD::NVCAST", SDTUnaryOp>;
839
840def SDT_AArch64mull : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisInt<1>,
841                                    SDTCisSameAs<1, 2>]>;
842def AArch64pmull    : SDNode<"AArch64ISD::PMULL", SDT_AArch64mull,
843                             [SDNPCommutative]>;
844def AArch64smull    : SDNode<"AArch64ISD::SMULL", SDT_AArch64mull,
845                             [SDNPCommutative]>;
846def AArch64umull    : SDNode<"AArch64ISD::UMULL", SDT_AArch64mull,
847                             [SDNPCommutative]>;
848
849def AArch64frecpe   : SDNode<"AArch64ISD::FRECPE", SDTFPUnaryOp>;
850def AArch64frecps   : SDNode<"AArch64ISD::FRECPS", SDTFPBinOp>;
851def AArch64frsqrte  : SDNode<"AArch64ISD::FRSQRTE", SDTFPUnaryOp>;
852def AArch64frsqrts  : SDNode<"AArch64ISD::FRSQRTS", SDTFPBinOp>;
853
854def AArch64sdot     : SDNode<"AArch64ISD::SDOT", SDT_AArch64Dot>;
855def AArch64udot     : SDNode<"AArch64ISD::UDOT", SDT_AArch64Dot>;
856
857def AArch64saddv    : SDNode<"AArch64ISD::SADDV", SDT_AArch64UnaryVec>;
858def AArch64uaddv    : SDNode<"AArch64ISD::UADDV", SDT_AArch64UnaryVec>;
859def AArch64sminv    : SDNode<"AArch64ISD::SMINV", SDT_AArch64UnaryVec>;
860def AArch64uminv    : SDNode<"AArch64ISD::UMINV", SDT_AArch64UnaryVec>;
861def AArch64smaxv    : SDNode<"AArch64ISD::SMAXV", SDT_AArch64UnaryVec>;
862def AArch64umaxv    : SDNode<"AArch64ISD::UMAXV", SDT_AArch64UnaryVec>;
863def AArch64uaddlv   : SDNode<"AArch64ISD::UADDLV", SDT_AArch64uaddlp>;
864def AArch64saddlv   : SDNode<"AArch64ISD::SADDLV", SDT_AArch64uaddlp>;
865
866def AArch64uabd     : PatFrags<(ops node:$lhs, node:$rhs),
867                               [(abdu node:$lhs, node:$rhs),
868                                (int_aarch64_neon_uabd node:$lhs, node:$rhs)]>;
869def AArch64sabd     : PatFrags<(ops node:$lhs, node:$rhs),
870                               [(abds node:$lhs, node:$rhs),
871                                (int_aarch64_neon_sabd node:$lhs, node:$rhs)]>;
872
873def AArch64addp_n   : SDNode<"AArch64ISD::ADDP", SDT_AArch64Zip>;
874def AArch64uaddlp_n : SDNode<"AArch64ISD::UADDLP", SDT_AArch64uaddlp>;
875def AArch64saddlp_n : SDNode<"AArch64ISD::SADDLP", SDT_AArch64uaddlp>;
876def AArch64addp     : PatFrags<(ops node:$Rn, node:$Rm),
877                               [(AArch64addp_n node:$Rn, node:$Rm),
878                                (int_aarch64_neon_addp node:$Rn, node:$Rm)]>;
879def AArch64uaddlp   : PatFrags<(ops node:$src),
880                               [(AArch64uaddlp_n node:$src),
881                                (int_aarch64_neon_uaddlp node:$src)]>;
882def AArch64saddlp   : PatFrags<(ops node:$src),
883                               [(AArch64saddlp_n node:$src),
884                                (int_aarch64_neon_saddlp node:$src)]>;
885def AArch64faddp     : PatFrags<(ops node:$Rn, node:$Rm),
886                                [(AArch64addp_n node:$Rn, node:$Rm),
887                                 (int_aarch64_neon_faddp node:$Rn, node:$Rm)]>;
888def AArch64roundingvlshr : ComplexPattern<vAny, 2, "SelectRoundingVLShr", [AArch64vlshr]>;
889def AArch64rshrn : PatFrags<(ops node:$LHS, node:$RHS),
890                            [(trunc (AArch64roundingvlshr node:$LHS, node:$RHS)),
891                             (int_aarch64_neon_rshrn node:$LHS, node:$RHS)]>;
892def AArch64facge     : PatFrags<(ops node:$Rn, node:$Rm),
893                                [(AArch64fcmge (fabs node:$Rn), (fabs node:$Rm)),
894                                 (int_aarch64_neon_facge node:$Rn, node:$Rm)]>;
895def AArch64facgt     : PatFrags<(ops node:$Rn, node:$Rm),
896                                [(AArch64fcmgt (fabs node:$Rn), (fabs node:$Rm)),
897                                 (int_aarch64_neon_facgt node:$Rn, node:$Rm)]>;
898
899def AArch64fmaxnmv : PatFrags<(ops node:$Rn),
900                              [(vecreduce_fmax node:$Rn),
901                               (int_aarch64_neon_fmaxnmv node:$Rn)]>;
902def AArch64fminnmv : PatFrags<(ops node:$Rn),
903                              [(vecreduce_fmin node:$Rn),
904                               (int_aarch64_neon_fminnmv node:$Rn)]>;
905def AArch64fmaxv : PatFrags<(ops node:$Rn),
906                            [(vecreduce_fmaximum node:$Rn),
907                             (int_aarch64_neon_fmaxv node:$Rn)]>;
908def AArch64fminv : PatFrags<(ops node:$Rn),
909                            [(vecreduce_fminimum node:$Rn),
910                             (int_aarch64_neon_fminv node:$Rn)]>;
911
912def SDT_AArch64SETTAG : SDTypeProfile<0, 2, [SDTCisPtrTy<0>, SDTCisPtrTy<1>]>;
913def AArch64stg : SDNode<"AArch64ISD::STG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
914def AArch64stzg : SDNode<"AArch64ISD::STZG", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
915def AArch64st2g : SDNode<"AArch64ISD::ST2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
916def AArch64stz2g : SDNode<"AArch64ISD::STZ2G", SDT_AArch64SETTAG, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
917
918def SDT_AArch64unpk : SDTypeProfile<1, 1, [
919    SDTCisInt<0>, SDTCisInt<1>, SDTCisOpSmallerThanOp<1, 0>
920]>;
921def AArch64sunpkhi : SDNode<"AArch64ISD::SUNPKHI", SDT_AArch64unpk>;
922def AArch64sunpklo : SDNode<"AArch64ISD::SUNPKLO", SDT_AArch64unpk>;
923def AArch64uunpkhi : SDNode<"AArch64ISD::UUNPKHI", SDT_AArch64unpk>;
924def AArch64uunpklo : SDNode<"AArch64ISD::UUNPKLO", SDT_AArch64unpk>;
925
926def AArch64ldp : SDNode<"AArch64ISD::LDP", SDT_AArch64ldp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
927def AArch64ldiapp : SDNode<"AArch64ISD::LDIAPP", SDT_AArch64ldiapp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
928def AArch64ldnp : SDNode<"AArch64ISD::LDNP", SDT_AArch64ldnp, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
929def AArch64stp : SDNode<"AArch64ISD::STP", SDT_AArch64stp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
930def AArch64stilp : SDNode<"AArch64ISD::STILP", SDT_AArch64stilp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
931def AArch64stnp : SDNode<"AArch64ISD::STNP", SDT_AArch64stnp, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
932
933def AArch64tbl : SDNode<"AArch64ISD::TBL", SDT_AArch64TBL>;
934
935def AArch64probedalloca
936    : SDNode<"AArch64ISD::PROBED_ALLOCA",
937             SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>,
938             [SDNPHasChain, SDNPMayStore]>;
939
940def AArch64mrs : SDNode<"AArch64ISD::MRS",
941                        SDTypeProfile<1, 1, [SDTCisVT<0, i64>, SDTCisVT<1, i32>]>,
942                        [SDNPHasChain, SDNPOutGlue]>;
943
944def SD_AArch64rshrnb : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisInt<2>]>;
945def AArch64rshrnb : SDNode<"AArch64ISD::RSHRNB_I", SD_AArch64rshrnb>;
946def AArch64rshrnb_pf : PatFrags<(ops node:$rs, node:$i),
947                            [(AArch64rshrnb node:$rs, node:$i),
948                            (int_aarch64_sve_rshrnb node:$rs, node:$i)]>;
949
950def AArch64CttzElts : SDNode<"AArch64ISD::CTTZ_ELTS", SDTypeProfile<1, 1,
951                             [SDTCisInt<0>, SDTCisVec<1>]>, []>;
952
953// Match add node and also treat an 'or' node is as an 'add' if the or'ed operands
954// have no common bits.
955def add_and_or_is_add : PatFrags<(ops node:$lhs, node:$rhs),
956                         [(add node:$lhs, node:$rhs), (or node:$lhs, node:$rhs)],[{
957   if (N->getOpcode() == ISD::ADD)
958     return true;
959   return CurDAG->isADDLike(SDValue(N,0));
960}]> {
961  let GISelPredicateCode = [{
962     // Only handle G_ADD for now. FIXME. build capability to compute whether
963     // operands of G_OR have common bits set or not.
964     return MI.getOpcode() == TargetOpcode::G_ADD;
965  }];
966}
967
968// Match mul with enough sign-bits. Can be reduced to a smaller mul operand.
969def smullwithsignbits : PatFrag<(ops node:$l, node:$r), (mul node:$l, node:$r), [{
970  return CurDAG->ComputeNumSignBits(N->getOperand(0)) > 32 &&
971         CurDAG->ComputeNumSignBits(N->getOperand(1)) > 32;
972}]>;
973
974//===----------------------------------------------------------------------===//
975
976//===----------------------------------------------------------------------===//
977
978// AArch64 Instruction Predicate Definitions.
979// We could compute these on a per-module basis but doing so requires accessing
980// the Function object through the <Target>Subtarget and objections were raised
981// to that (see post-commit review comments for r301750).
982let RecomputePerFunction = 1 in {
983  def ForCodeSize   : Predicate<"shouldOptForSize(MF)">;
984  def NotForCodeSize   : Predicate<"!shouldOptForSize(MF)">;
985  // Avoid generating STRQro if it is slow, unless we're optimizing for code size.
986  def UseSTRQro : Predicate<"!Subtarget->isSTRQroSlow() || shouldOptForSize(MF)">;
987
988  // Register restrictions for indirect tail-calls:
989  // - If branch target enforcement is enabled, indirect calls must use x16 or
990  //   x17, because these are the only registers which can target the BTI C
991  //   instruction.
992  // - If PAuthLR is enabled, x16 is used in the epilogue to hold the address
993  //   of the signing instruction. This can't be changed because it is used by a
994  //   HINT instruction which only accepts x16. We can't load anything from the
995  //   stack after this because the authentication instruction checks that SP is
996  //   the same as it was at function entry, so we can't have anything on the
997  //   stack.
998
999  // BTI on, PAuthLR off: x16 or x17
1000  def TailCallX16X17 : Predicate<[{  MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1001  // BTI on, PAuthLR on: x17 only
1002  def TailCallX17 : Predicate<[{ MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1003  // BTI off, PAuthLR on: Any non-callee-saved register except x16
1004  def TailCallNotX16 : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1005  // BTI off, PAuthLR off: Any non-callee-saved register
1006  def TailCallAny : Predicate<[{ !MF->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement() && !MF->getInfo<AArch64FunctionInfo>()->branchProtectionPAuthLR() }]>;
1007
1008  def SLSBLRMitigation : Predicate<[{ MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1009  def NoSLSBLRMitigation : Predicate<[{ !MF->getSubtarget<AArch64Subtarget>().hardenSlsBlr() }]>;
1010  // Toggles patterns which aren't beneficial in GlobalISel when we aren't
1011  // optimizing. This allows us to selectively use patterns without impacting
1012  // SelectionDAG's behaviour.
1013  // FIXME: One day there will probably be a nicer way to check for this, but
1014  // today is not that day.
1015  def OptimizedGISelOrOtherSelector : Predicate<"!MF->getFunction().hasOptNone() || MF->getProperties().hasProperty(MachineFunctionProperties::Property::FailedISel) || !MF->getProperties().hasProperty(MachineFunctionProperties::Property::Legalized)">;
1016}
1017
1018include "AArch64InstrFormats.td"
1019include "SVEInstrFormats.td"
1020include "SMEInstrFormats.td"
1021
1022//===----------------------------------------------------------------------===//
1023
1024//===----------------------------------------------------------------------===//
1025// Miscellaneous instructions.
1026//===----------------------------------------------------------------------===//
1027
1028let hasSideEffects = 1, isCodeGenOnly = 1 in {
1029let Defs = [SP], Uses = [SP] in {
1030// We set Sched to empty list because we expect these instructions to simply get
1031// removed in most cases.
1032def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1033                              [(AArch64callseq_start timm:$amt1, timm:$amt2)]>,
1034                              Sched<[]>;
1035def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2),
1036                            [(AArch64callseq_end timm:$amt1, timm:$amt2)]>,
1037                            Sched<[]>;
1038
1039}
1040
1041let Defs = [SP, NZCV], Uses = [SP] in {
1042// Probed stack allocation of a constant size, used in function prologues when
1043// stack-clash protection is enabled.
1044def PROBED_STACKALLOC : Pseudo<(outs GPR64:$scratch),
1045                               (ins i64imm:$stacksize, i64imm:$fixed_offset,
1046                                i64imm:$scalable_offset),
1047                               []>,
1048                               Sched<[]>;
1049
1050// Probed stack allocation of a variable size, used in function prologues when
1051// stack-clash protection is enabled.
1052def PROBED_STACKALLOC_VAR : Pseudo<(outs),
1053                                   (ins GPR64sp:$target),
1054                                   []>,
1055                                   Sched<[]>;
1056
1057// Probed stack allocations of a variable size, used for allocas of unknown size
1058// when stack-clash protection is enabled.
1059let usesCustomInserter = 1 in
1060def PROBED_STACKALLOC_DYN : Pseudo<(outs),
1061                                   (ins GPR64common:$target),
1062                                   [(AArch64probedalloca GPR64common:$target)]>,
1063                                   Sched<[]>;
1064
1065} // Defs = [SP, NZCV], Uses = [SP] in
1066} // hasSideEffects = 1, isCodeGenOnly = 1
1067
1068let isReMaterializable = 1, isCodeGenOnly = 1 in {
1069// FIXME: The following pseudo instructions are only needed because remat
1070// cannot handle multiple instructions.  When that changes, they can be
1071// removed, along with the AArch64Wrapper node.
1072
1073let AddedComplexity = 10 in
1074def LOADgot : Pseudo<(outs GPR64common:$dst), (ins i64imm:$addr),
1075                     [(set GPR64common:$dst, (AArch64LOADgot tglobaladdr:$addr))]>,
1076              Sched<[WriteLDAdr]>;
1077
1078// The MOVaddr instruction should match only when the add is not folded
1079// into a load or store address.
1080def MOVaddr
1081    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1082             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaladdr:$hi),
1083                                            tglobaladdr:$low))]>,
1084      Sched<[WriteAdrAdr]>;
1085def MOVaddrJT
1086    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1087             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tjumptable:$hi),
1088                                             tjumptable:$low))]>,
1089      Sched<[WriteAdrAdr]>;
1090def MOVaddrCP
1091    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1092             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tconstpool:$hi),
1093                                             tconstpool:$low))]>,
1094      Sched<[WriteAdrAdr]>;
1095def MOVaddrBA
1096    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1097             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tblockaddress:$hi),
1098                                             tblockaddress:$low))]>,
1099      Sched<[WriteAdrAdr]>;
1100def MOVaddrTLS
1101    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1102             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp tglobaltlsaddr:$hi),
1103                                            tglobaltlsaddr:$low))]>,
1104      Sched<[WriteAdrAdr]>;
1105def MOVaddrEXT
1106    : Pseudo<(outs GPR64common:$dst), (ins i64imm:$hi, i64imm:$low),
1107             [(set GPR64common:$dst, (AArch64addlow (AArch64adrp texternalsym:$hi),
1108                                            texternalsym:$low))]>,
1109      Sched<[WriteAdrAdr]>;
1110// Normally AArch64addlow either gets folded into a following ldr/str,
1111// or together with an adrp into MOVaddr above. For cases with TLS, it
1112// might appear without either of them, so allow lowering it into a plain
1113// add.
1114def ADDlowTLS
1115    : Pseudo<(outs GPR64sp:$dst), (ins GPR64sp:$src, i64imm:$low),
1116             [(set GPR64sp:$dst, (AArch64addlow GPR64sp:$src,
1117                                            tglobaltlsaddr:$low))]>,
1118      Sched<[WriteAdr]>;
1119
1120} // isReMaterializable, isCodeGenOnly
1121
1122def : Pat<(AArch64LOADgot tglobaltlsaddr:$addr),
1123          (LOADgot tglobaltlsaddr:$addr)>;
1124
1125def : Pat<(AArch64LOADgot texternalsym:$addr),
1126          (LOADgot texternalsym:$addr)>;
1127
1128def : Pat<(AArch64LOADgot tconstpool:$addr),
1129          (LOADgot tconstpool:$addr)>;
1130
1131// In general these get lowered into a sequence of three 4-byte instructions.
1132// 32-bit jump table destination is actually only 2 instructions since we can
1133// use the table itself as a PC-relative base. But optimization occurs after
1134// branch relaxation so be pessimistic.
1135let Size = 12, Constraints = "@earlyclobber $dst,@earlyclobber $scratch",
1136    isNotDuplicable = 1 in {
1137def JumpTableDest32 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1138                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1139                      Sched<[]>;
1140def JumpTableDest16 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1141                             (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1142                      Sched<[]>;
1143def JumpTableDest8 : Pseudo<(outs GPR64:$dst, GPR64sp:$scratch),
1144                            (ins GPR64:$table, GPR64:$entry, i32imm:$jti), []>,
1145                     Sched<[]>;
1146}
1147
1148// A hardened but more expensive version of jump-table dispatch.
1149// This combines the target address computation (otherwise done using the
1150// JumpTableDest pseudos above) with the branch itself (otherwise done using
1151// a plain BR) in a single non-attackable sequence.
1152//
1153// We take the final entry index as an operand to allow isel freedom. This does
1154// mean that the index can be attacker-controlled.  To address that, we also do
1155// limited checking of the offset, mainly ensuring it still points within the
1156// jump-table array.  When it doesn't, this branches to the first entry.
1157// We might want to trap instead.
1158//
1159// This is intended for use in conjunction with ptrauth for other code pointers,
1160// to avoid signing jump-table entries and turning them into pointers.
1161//
1162// Entry index is passed in x16.  Clobbers x16/x17/nzcv.
1163let isNotDuplicable = 1 in
1164def BR_JumpTable : Pseudo<(outs), (ins i32imm:$jti), []>, Sched<[]> {
1165  let isBranch = 1;
1166  let isTerminator = 1;
1167  let isIndirectBranch = 1;
1168  let isBarrier = 1;
1169  let isNotDuplicable = 1;
1170  let Defs = [X16,X17,NZCV];
1171  let Uses = [X16];
1172  let Size = 44; // 28 fixed + 16 variable, for table size materialization
1173}
1174
1175// Space-consuming pseudo to aid testing of placement and reachability
1176// algorithms. Immediate operand is the number of bytes this "instruction"
1177// occupies; register operands can be used to enforce dependency and constrain
1178// the scheduler.
1179let hasSideEffects = 1, mayLoad = 1, mayStore = 1 in
1180def SPACE : Pseudo<(outs GPR64:$Rd), (ins i32imm:$size, GPR64:$Rn),
1181                   [(set GPR64:$Rd, (int_aarch64_space imm:$size, GPR64:$Rn))]>,
1182            Sched<[]>;
1183
1184let hasSideEffects = 1, isCodeGenOnly = 1 in {
1185  def SpeculationSafeValueX
1186      : Pseudo<(outs GPR64:$dst), (ins GPR64:$src), []>, Sched<[]>;
1187  def SpeculationSafeValueW
1188      : Pseudo<(outs GPR32:$dst), (ins GPR32:$src), []>, Sched<[]>;
1189}
1190
1191// SpeculationBarrierEndBB must only be used after an unconditional control
1192// flow, i.e. after a terminator for which isBarrier is True.
1193let hasSideEffects = 1, isCodeGenOnly = 1, isTerminator = 1, isBarrier = 1 in {
1194  // This gets lowered to a pair of 4-byte instructions.
1195  let Size = 8 in
1196  def SpeculationBarrierISBDSBEndBB
1197      : Pseudo<(outs), (ins), []>, Sched<[]>;
1198  // This gets lowered to a 4-byte instruction.
1199  let Size = 4 in
1200  def SpeculationBarrierSBEndBB
1201      : Pseudo<(outs), (ins), []>, Sched<[]>;
1202}
1203
1204//===----------------------------------------------------------------------===//
1205// System instructions.
1206//===----------------------------------------------------------------------===//
1207
1208def HINT : HintI<"hint">;
1209def : InstAlias<"nop",  (HINT 0b000)>;
1210def : InstAlias<"yield",(HINT 0b001)>;
1211def : InstAlias<"wfe",  (HINT 0b010)>;
1212def : InstAlias<"wfi",  (HINT 0b011)>;
1213def : InstAlias<"sev",  (HINT 0b100)>;
1214def : InstAlias<"sevl", (HINT 0b101)>;
1215def : InstAlias<"dgh",  (HINT 0b110)>;
1216def : InstAlias<"esb",  (HINT 0b10000)>, Requires<[HasRAS]>;
1217def : InstAlias<"csdb", (HINT 20)>;
1218// In order to be able to write readable assembly, LLVM should accept assembly
1219// inputs that use Branch Target Indentification mnemonics, even with BTI disabled.
1220// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1221// should not emit these mnemonics unless BTI is enabled.
1222def : InstAlias<"bti",  (HINT 32), 0>;
1223def : InstAlias<"bti $op", (HINT btihint_op:$op), 0>;
1224def : InstAlias<"bti",  (HINT 32)>, Requires<[HasBTI]>;
1225def : InstAlias<"bti $op", (HINT btihint_op:$op)>, Requires<[HasBTI]>;
1226
1227// v8.2a Statistical Profiling extension
1228def : InstAlias<"psb $op",  (HINT psbhint_op:$op)>, Requires<[HasSPE]>;
1229
1230// As far as LLVM is concerned this writes to the system's exclusive monitors.
1231let mayLoad = 1, mayStore = 1 in
1232def CLREX : CRmSystemI<imm0_15, 0b010, "clrex">;
1233
1234// NOTE: ideally, this would have mayStore = 0, mayLoad = 0, but we cannot
1235// model patterns with sufficiently fine granularity.
1236let mayLoad = ?, mayStore = ? in {
1237def DMB   : CRmSystemI<barrier_op, 0b101, "dmb",
1238                       [(int_aarch64_dmb (i32 imm32_0_15:$CRm))]>;
1239
1240def DSB   : CRmSystemI<barrier_op, 0b100, "dsb",
1241                       [(int_aarch64_dsb (i32 imm32_0_15:$CRm))]>;
1242
1243def ISB   : CRmSystemI<barrier_op, 0b110, "isb",
1244                       [(int_aarch64_isb (i32 imm32_0_15:$CRm))]>;
1245
1246def TSB   : CRmSystemI<barrier_op, 0b010, "tsb", []> {
1247  let CRm        = 0b0010;
1248  let Inst{12}   = 0;
1249  let Predicates = [HasTRACEV8_4];
1250}
1251
1252def DSBnXS  : CRmSystemI<barrier_nxs_op, 0b001, "dsb"> {
1253  let CRm{1-0}   = 0b11;
1254  let Inst{9-8}  = 0b10;
1255  let Predicates = [HasXS];
1256}
1257
1258let Predicates = [HasWFxT] in {
1259def WFET : RegInputSystemI<0b0000, 0b000, "wfet">;
1260def WFIT : RegInputSystemI<0b0000, 0b001, "wfit">;
1261}
1262
1263// Branch Record Buffer two-word mnemonic instructions
1264class BRBEI<bits<3> op2, string keyword>
1265    : SimpleSystemI<0, (ins), "brb", keyword>, Sched<[WriteSys]> {
1266  let Inst{31-8} = 0b110101010000100101110010;
1267  let Inst{7-5} = op2;
1268  let Predicates = [HasBRBE];
1269}
1270def BRB_IALL: BRBEI<0b100, "\tiall">;
1271def BRB_INJ:  BRBEI<0b101, "\tinj">;
1272
1273}
1274
1275// Allow uppercase and lowercase keyword arguments for BRB IALL and BRB INJ
1276def : TokenAlias<"INJ", "inj">;
1277def : TokenAlias<"IALL", "iall">;
1278
1279
1280// ARMv9.4-A Guarded Control Stack
1281class GCSNoOp<bits<3> op2, string mnemonic>
1282    : SimpleSystemI<0, (ins), mnemonic, "">, Sched<[]> {
1283  let Inst{20-8} = 0b0100001110111;
1284  let Inst{7-5} = op2;
1285  let Predicates = [HasGCS];
1286}
1287def GCSPUSHX : GCSNoOp<0b100, "gcspushx">;
1288def GCSPOPCX : GCSNoOp<0b101, "gcspopcx">;
1289def GCSPOPX  : GCSNoOp<0b110, "gcspopx">;
1290
1291class GCSRtIn<bits<3> op1, bits<3> op2, string mnemonic,
1292            list<dag> pattern = []>
1293    : RtSystemI<0, (outs), (ins GPR64:$Rt), mnemonic, "\t$Rt", pattern> {
1294  let Inst{20-19} = 0b01;
1295  let Inst{18-16} = op1;
1296  let Inst{15-8} = 0b01110111;
1297  let Inst{7-5} = op2;
1298  let Predicates = [HasGCS];
1299  let hasSideEffects = 1;
1300}
1301
1302let mayStore = 1, mayLoad = 1 in
1303def GCSSS1   : GCSRtIn<0b011, 0b010, "gcsss1">;
1304let mayStore = 1 in
1305def GCSPUSHM : GCSRtIn<0b011, 0b000, "gcspushm">;
1306
1307class GCSRtOut<bits<3> op1, bits<3> op2, string mnemonic,
1308            list<dag> pattern = []>
1309    : RtSystemI<1, (outs GPR64:$Rt), (ins GPR64:$src), mnemonic, "\t$Rt", pattern> {
1310  let Inst{20-19} = 0b01;
1311  let Inst{18-16} = op1;
1312  let Inst{15-8} = 0b01110111;
1313  let Inst{7-5} = op2;
1314  let Predicates = [HasGCS];
1315  let hasSideEffects = 1;
1316  // The input register is unchanged when GCS is disabled, so we need it as
1317  // both an input and output operand.
1318  let Constraints = "$src = $Rt";
1319}
1320
1321let mayStore = 1, mayLoad = 1 in
1322def GCSSS2  : GCSRtOut<0b011, 0b011, "gcsss2">;
1323// FIXME: mayStore = 1 only needed to match the intrinsic definition
1324let mayStore = 1, mayLoad = 1 in
1325def GCSPOPM : GCSRtOut<0b011, 0b001, "gcspopm",
1326                       [(set GPR64:$Rt, (int_aarch64_gcspopm GPR64:$src))]>;
1327def GCSPOPM_NoOp : InstAlias<"gcspopm", (GCSPOPM XZR)>, Requires<[HasGCS]>; // Rt defaults to XZR if absent
1328
1329def GCSB_DSYNC_disable : InstAlias<"gcsb\tdsync", (HINT 19), 0>;
1330def GCSB_DSYNC         : InstAlias<"gcsb\tdsync", (HINT 19), 1>, Requires<[HasGCS]>;
1331
1332def : TokenAlias<"DSYNC", "dsync">;
1333
1334let Uses = [X16], Defs = [X16], CRm = 0b0101 in {
1335  def CHKFEAT   : SystemNoOperands<0b000, "hint\t#40",
1336                                   [(set X16, (int_aarch64_chkfeat X16))]>;
1337}
1338def : InstAlias<"chkfeat\tx16", (CHKFEAT), 0>;
1339def : InstAlias<"chkfeat\tx16", (CHKFEAT), 1>, Requires<[HasCHK]>;
1340
1341class GCSSt<string mnemonic, bits<3> op>
1342    : I<(outs), (ins GPR64:$Rt, GPR64sp:$Rn), mnemonic, "\t$Rt, [$Rn]", "", []>, Sched<[]> {
1343  bits<5> Rt;
1344  bits<5> Rn;
1345  let Inst{31-15} = 0b11011001000111110;
1346  let Inst{14-12} = op;
1347  let Inst{11-10} = 0b11;
1348  let Inst{9-5} = Rn;
1349  let Inst{4-0} = Rt;
1350  let Predicates = [HasGCS];
1351}
1352def GCSSTR  : GCSSt<"gcsstr",  0b000>;
1353def GCSSTTR : GCSSt<"gcssttr", 0b001>;
1354
1355// ARMv8.2-A Dot Product
1356let Predicates = [HasDotProd] in {
1357defm SDOT : SIMDThreeSameVectorDot<0, 0, "sdot", AArch64sdot>;
1358defm UDOT : SIMDThreeSameVectorDot<1, 0, "udot", AArch64udot>;
1359defm SDOTlane : SIMDThreeSameVectorDotIndex<0, 0, 0b10, "sdot", AArch64sdot>;
1360defm UDOTlane : SIMDThreeSameVectorDotIndex<1, 0, 0b10, "udot", AArch64udot>;
1361}
1362
1363// ARMv8.6-A BFloat
1364let Predicates = [HasNEON, HasBF16] in {
1365defm BFDOT       : SIMDThreeSameVectorBFDot<1, "bfdot">;
1366defm BF16DOTlane : SIMDThreeSameVectorBF16DotI<0, "bfdot">;
1367def BFMMLA       : SIMDThreeSameVectorBF16MatrixMul<"bfmmla">;
1368def BFMLALB      : SIMDBF16MLAL<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1369def BFMLALT      : SIMDBF16MLAL<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1370def BFMLALBIdx   : SIMDBF16MLALIndex<0, "bfmlalb", int_aarch64_neon_bfmlalb>;
1371def BFMLALTIdx   : SIMDBF16MLALIndex<1, "bfmlalt", int_aarch64_neon_bfmlalt>;
1372def BFCVTN       : SIMD_BFCVTN;
1373def BFCVTN2      : SIMD_BFCVTN2;
1374
1375def : Pat<(v4bf16 (any_fpround (v4f32 V128:$Rn))),
1376          (EXTRACT_SUBREG (BFCVTN V128:$Rn), dsub)>;
1377
1378// Vector-scalar BFDOT:
1379// The second source operand of the 64-bit variant of BF16DOTlane is a 128-bit
1380// register (the instruction uses a single 32-bit lane from it), so the pattern
1381// is a bit tricky.
1382def : Pat<(v2f32 (int_aarch64_neon_bfdot
1383                    (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1384                    (v4bf16 (bitconvert
1385                      (v2i32 (AArch64duplane32
1386                        (v4i32 (bitconvert
1387                          (v8bf16 (insert_subvector undef,
1388                            (v4bf16 V64:$Rm),
1389                            (i64 0))))),
1390                        VectorIndexS:$idx)))))),
1391          (BF16DOTlanev4bf16 (v2f32 V64:$Rd), (v4bf16 V64:$Rn),
1392                             (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
1393                             VectorIndexS:$idx)>;
1394}
1395
1396let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in {
1397def BFCVT : BF16ToSinglePrecision<"bfcvt">;
1398// Round FP32 to BF16.
1399def : Pat<(bf16 (any_fpround (f32 FPR32:$Rn))), (BFCVT $Rn)>;
1400}
1401
1402// ARMv8.6A AArch64 matrix multiplication
1403let Predicates = [HasMatMulInt8] in {
1404def  SMMLA : SIMDThreeSameVectorMatMul<0, 0, "smmla", int_aarch64_neon_smmla>;
1405def  UMMLA : SIMDThreeSameVectorMatMul<0, 1, "ummla", int_aarch64_neon_ummla>;
1406def USMMLA : SIMDThreeSameVectorMatMul<1, 0, "usmmla", int_aarch64_neon_usmmla>;
1407defm USDOT : SIMDThreeSameVectorDot<0, 1, "usdot", int_aarch64_neon_usdot>;
1408defm USDOTlane : SIMDThreeSameVectorDotIndex<0, 1, 0b10, "usdot", int_aarch64_neon_usdot>;
1409
1410// sudot lane has a pattern where usdot is expected (there is no sudot).
1411// The second operand is used in the dup operation to repeat the indexed
1412// element.
1413class BaseSIMDSUDOTIndex<bit Q, string dst_kind, string lhs_kind,
1414                         string rhs_kind, RegisterOperand RegType,
1415                         ValueType AccumType, ValueType InputType>
1416      : BaseSIMDThreeSameVectorIndexS<Q, 0, 0b00, 0b1111, "sudot", dst_kind,
1417                                        lhs_kind, rhs_kind, RegType, AccumType,
1418                                        InputType, null_frag> {
1419  let Pattern = [(set (AccumType RegType:$dst),
1420                      (AccumType (int_aarch64_neon_usdot (AccumType RegType:$Rd),
1421                                 (InputType (bitconvert (AccumType
1422                                    (AArch64duplane32 (v4i32 V128:$Rm),
1423                                        VectorIndexS:$idx)))),
1424                                 (InputType RegType:$Rn))))];
1425}
1426
1427multiclass SIMDSUDOTIndex {
1428  def v8i8  : BaseSIMDSUDOTIndex<0, ".2s", ".8b", ".4b", V64, v2i32, v8i8>;
1429  def v16i8 : BaseSIMDSUDOTIndex<1, ".4s", ".16b", ".4b", V128, v4i32, v16i8>;
1430}
1431
1432defm SUDOTlane : SIMDSUDOTIndex;
1433
1434}
1435
1436// ARMv8.2-A FP16 Fused Multiply-Add Long
1437let Predicates = [HasNEON, HasFP16FML] in {
1438defm FMLAL      : SIMDThreeSameVectorFML<0, 1, 0b001, "fmlal", int_aarch64_neon_fmlal>;
1439defm FMLSL      : SIMDThreeSameVectorFML<0, 1, 0b101, "fmlsl", int_aarch64_neon_fmlsl>;
1440defm FMLAL2     : SIMDThreeSameVectorFML<1, 0, 0b001, "fmlal2", int_aarch64_neon_fmlal2>;
1441defm FMLSL2     : SIMDThreeSameVectorFML<1, 0, 0b101, "fmlsl2", int_aarch64_neon_fmlsl2>;
1442defm FMLALlane  : SIMDThreeSameVectorFMLIndex<0, 0b0000, "fmlal", int_aarch64_neon_fmlal>;
1443defm FMLSLlane  : SIMDThreeSameVectorFMLIndex<0, 0b0100, "fmlsl", int_aarch64_neon_fmlsl>;
1444defm FMLAL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1000, "fmlal2", int_aarch64_neon_fmlal2>;
1445defm FMLSL2lane : SIMDThreeSameVectorFMLIndex<1, 0b1100, "fmlsl2", int_aarch64_neon_fmlsl2>;
1446}
1447
1448// Armv8.2-A Crypto extensions
1449let Predicates = [HasSHA3] in {
1450def SHA512H   : CryptoRRRTied<0b0, 0b00, "sha512h">;
1451def SHA512H2  : CryptoRRRTied<0b0, 0b01, "sha512h2">;
1452def SHA512SU0 : CryptoRRTied_2D<0b0, 0b00, "sha512su0">;
1453def SHA512SU1 : CryptoRRRTied_2D<0b0, 0b10, "sha512su1">;
1454def RAX1      : CryptoRRR_2D<0b0,0b11, "rax1">;
1455def EOR3      : CryptoRRRR_16B<0b00, "eor3">;
1456def BCAX      : CryptoRRRR_16B<0b01, "bcax">;
1457def XAR       : CryptoRRRi6<"xar">;
1458
1459class SHA3_pattern<Instruction INST, Intrinsic OpNode, ValueType VecTy>
1460  : Pat<(VecTy (OpNode (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))),
1461        (INST (VecTy V128:$Vd), (VecTy V128:$Vn), (VecTy V128:$Vm))>;
1462
1463def : Pat<(v2i64 (int_aarch64_crypto_sha512su0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1464          (SHA512SU0 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1465
1466def : SHA3_pattern<SHA512H, int_aarch64_crypto_sha512h, v2i64>;
1467def : SHA3_pattern<SHA512H2, int_aarch64_crypto_sha512h2, v2i64>;
1468def : SHA3_pattern<SHA512SU1, int_aarch64_crypto_sha512su1, v2i64>;
1469
1470def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v16i8>;
1471def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
1472def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
1473def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
1474
1475class EOR3_pattern<ValueType VecTy>
1476  : Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1477        (EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1478
1479def : EOR3_pattern<v16i8>;
1480def : EOR3_pattern<v8i16>;
1481def : EOR3_pattern<v4i32>;
1482def : EOR3_pattern<v2i64>;
1483
1484class BCAX_pattern<ValueType VecTy>
1485  : Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),
1486        (BCAX (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1487
1488def : BCAX_pattern<v16i8>;
1489def : BCAX_pattern<v8i16>;
1490def : BCAX_pattern<v4i32>;
1491def : BCAX_pattern<v2i64>;
1492
1493def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v16i8>;
1494def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v8i16>;
1495def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v4i32>;
1496def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxu, v2i64>;
1497
1498def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v16i8>;
1499def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v8i16>;
1500def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v4i32>;
1501def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3s, v2i64>;
1502
1503def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v16i8>;
1504def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v8i16>;
1505def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v4i32>;
1506def : SHA3_pattern<BCAX, int_aarch64_crypto_bcaxs, v2i64>;
1507
1508def : Pat<(v2i64 (int_aarch64_crypto_rax1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))),
1509          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1510
1511def : Pat<(v2i64 (int_aarch64_crypto_xar (v2i64 V128:$Vn), (v2i64 V128:$Vm), (i64 timm0_63:$imm))),
1512          (XAR (v2i64 V128:$Vn), (v2i64 V128:$Vm), (timm0_63:$imm))>;
1513
1514def : Pat<(xor  (v2i64 V128:$Vn), (or (AArch64vlshr (v2i64 V128:$Vm), (i32 63)), (AArch64vshl (v2i64 V128:$Vm), (i32 1)))),
1515          (RAX1 (v2i64 V128:$Vn), (v2i64 V128:$Vm))>;
1516
1517} // HasSHA3
1518
1519let Predicates = [HasSM4] in {
1520def SM3TT1A   : CryptoRRRi2Tied<0b0, 0b00, "sm3tt1a">;
1521def SM3TT1B   : CryptoRRRi2Tied<0b0, 0b01, "sm3tt1b">;
1522def SM3TT2A   : CryptoRRRi2Tied<0b0, 0b10, "sm3tt2a">;
1523def SM3TT2B   : CryptoRRRi2Tied<0b0, 0b11, "sm3tt2b">;
1524def SM3SS1    : CryptoRRRR_4S<0b10, "sm3ss1">;
1525def SM3PARTW1 : CryptoRRRTied_4S<0b1, 0b00, "sm3partw1">;
1526def SM3PARTW2 : CryptoRRRTied_4S<0b1, 0b01, "sm3partw2">;
1527def SM4ENCKEY : CryptoRRR_4S<0b1, 0b10, "sm4ekey">;
1528def SM4E      : CryptoRRTied_4S<0b0, 0b01, "sm4e">;
1529
1530def : Pat<(v4i32 (int_aarch64_crypto_sm3ss1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))),
1531          (SM3SS1 (v4i32 V128:$Vn), (v4i32 V128:$Vm), (v4i32 V128:$Va))>;
1532
1533class SM3PARTW_pattern<Instruction INST, Intrinsic OpNode>
1534  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1535        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1536
1537class SM3TT_pattern<Instruction INST, Intrinsic OpNode>
1538  : Pat<(v4i32 (OpNode (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (i64 VectorIndexS_timm:$imm) )),
1539        (INST (v4i32 V128:$Vd), (v4i32 V128:$Vn), (v4i32 V128:$Vm), (VectorIndexS_timm:$imm))>;
1540
1541class SM4_pattern<Instruction INST, Intrinsic OpNode>
1542  : Pat<(v4i32 (OpNode (v4i32 V128:$Vn), (v4i32 V128:$Vm))),
1543        (INST (v4i32 V128:$Vn), (v4i32 V128:$Vm))>;
1544
1545def : SM3PARTW_pattern<SM3PARTW1, int_aarch64_crypto_sm3partw1>;
1546def : SM3PARTW_pattern<SM3PARTW2, int_aarch64_crypto_sm3partw2>;
1547
1548def : SM3TT_pattern<SM3TT1A, int_aarch64_crypto_sm3tt1a>;
1549def : SM3TT_pattern<SM3TT1B, int_aarch64_crypto_sm3tt1b>;
1550def : SM3TT_pattern<SM3TT2A, int_aarch64_crypto_sm3tt2a>;
1551def : SM3TT_pattern<SM3TT2B, int_aarch64_crypto_sm3tt2b>;
1552
1553def : SM4_pattern<SM4ENCKEY, int_aarch64_crypto_sm4ekey>;
1554def : SM4_pattern<SM4E, int_aarch64_crypto_sm4e>;
1555} // HasSM4
1556
1557let Predicates = [HasRCPC] in {
1558  // v8.3 Release Consistent Processor Consistent support, optional in v8.2.
1559  def LDAPRB  : RCPCLoad<0b00, "ldaprb", GPR32>;
1560  def LDAPRH  : RCPCLoad<0b01, "ldaprh", GPR32>;
1561  def LDAPRW  : RCPCLoad<0b10, "ldapr", GPR32>;
1562  def LDAPRX  : RCPCLoad<0b11, "ldapr", GPR64>;
1563}
1564
1565// v8.3a complex add and multiply-accumulate. No predicate here, that is done
1566// inside the multiclass as the FP16 versions need different predicates.
1567defm FCMLA : SIMDThreeSameVectorTiedComplexHSD<1, 0b110, complexrotateop,
1568                                               "fcmla", null_frag>;
1569defm FCADD : SIMDThreeSameVectorComplexHSD<1, 0b111, complexrotateopodd,
1570                                           "fcadd", null_frag>;
1571defm FCMLA : SIMDIndexedTiedComplexHSD<0, 1, complexrotateop, "fcmla">;
1572
1573let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1574  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot90 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1575            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 0))>;
1576  def : Pat<(v4f16 (int_aarch64_neon_vcadd_rot270 (v4f16 V64:$Rn), (v4f16 V64:$Rm))),
1577            (FCADDv4f16 (v4f16 V64:$Rn), (v4f16 V64:$Rm), (i32 1))>;
1578  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot90 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1579            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 0))>;
1580  def : Pat<(v8f16 (int_aarch64_neon_vcadd_rot270 (v8f16 V128:$Rn), (v8f16 V128:$Rm))),
1581            (FCADDv8f16 (v8f16 V128:$Rn), (v8f16 V128:$Rm), (i32 1))>;
1582}
1583
1584let Predicates = [HasComplxNum, HasNEON] in {
1585  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot90 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1586            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 0))>;
1587  def : Pat<(v2f32 (int_aarch64_neon_vcadd_rot270 (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
1588            (FCADDv2f32 (v2f32 V64:$Rn), (v2f32 V64:$Rm), (i32 1))>;
1589  foreach Ty = [v4f32, v2f64] in {
1590    def : Pat<(Ty (int_aarch64_neon_vcadd_rot90 (Ty V128:$Rn), (Ty V128:$Rm))),
1591              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 0))>;
1592    def : Pat<(Ty (int_aarch64_neon_vcadd_rot270 (Ty V128:$Rn), (Ty V128:$Rm))),
1593              (!cast<Instruction>("FCADD"#Ty) (Ty V128:$Rn), (Ty V128:$Rm), (i32 1))>;
1594  }
1595}
1596
1597multiclass FCMLA_PATS<ValueType ty, DAGOperand Reg> {
1598  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1599            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 0)>;
1600  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1601            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 1)>;
1602  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1603            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 2)>;
1604  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), (ty Reg:$Rm))),
1605            (!cast<Instruction>("FCMLA" # ty) $Rd, $Rn, $Rm, 3)>;
1606}
1607
1608multiclass FCMLA_LANE_PATS<ValueType ty, DAGOperand Reg, dag RHSDup> {
1609  def : Pat<(ty (int_aarch64_neon_vcmla_rot0 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1610            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 0)>;
1611  def : Pat<(ty (int_aarch64_neon_vcmla_rot90 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1612            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 1)>;
1613  def : Pat<(ty (int_aarch64_neon_vcmla_rot180 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1614            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 2)>;
1615  def : Pat<(ty (int_aarch64_neon_vcmla_rot270 (ty Reg:$Rd), (ty Reg:$Rn), RHSDup)),
1616            (!cast<Instruction>("FCMLA" # ty # "_indexed") $Rd, $Rn, $Rm, VectorIndexS:$idx, 3)>;
1617}
1618
1619
1620let Predicates = [HasComplxNum, HasNEON, HasFullFP16] in {
1621  defm : FCMLA_PATS<v4f16, V64>;
1622  defm : FCMLA_PATS<v8f16, V128>;
1623
1624  defm : FCMLA_LANE_PATS<v4f16, V64,
1625                         (v4f16 (bitconvert (v2i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexD:$idx))))>;
1626  defm : FCMLA_LANE_PATS<v8f16, V128,
1627                         (v8f16 (bitconvert (v4i32 (AArch64duplane32 (v4i32 V128:$Rm), VectorIndexS:$idx))))>;
1628}
1629let Predicates = [HasComplxNum, HasNEON] in {
1630  defm : FCMLA_PATS<v2f32, V64>;
1631  defm : FCMLA_PATS<v4f32, V128>;
1632  defm : FCMLA_PATS<v2f64, V128>;
1633
1634  defm : FCMLA_LANE_PATS<v4f32, V128,
1635                         (v4f32 (bitconvert (v2i64 (AArch64duplane64 (v2i64 V128:$Rm), VectorIndexD:$idx))))>;
1636}
1637
1638// v8.3a Pointer Authentication
1639// These instructions inhabit part of the hint space and so can be used for
1640// armv8 targets. Keeping the old HINT mnemonic when compiling without PA is
1641// important for compatibility with other assemblers (e.g. GAS) when building
1642// software compatible with both CPUs that do or don't implement PA.
1643let Uses = [LR], Defs = [LR] in {
1644  def PACIAZ   : SystemNoOperands<0b000, "hint\t#24">;
1645  def PACIBZ   : SystemNoOperands<0b010, "hint\t#26">;
1646  let isAuthenticated = 1 in {
1647    def AUTIAZ   : SystemNoOperands<0b100, "hint\t#28">;
1648    def AUTIBZ   : SystemNoOperands<0b110, "hint\t#30">;
1649  }
1650}
1651let Uses = [LR, SP], Defs = [LR] in {
1652  def PACIASP  : SystemNoOperands<0b001, "hint\t#25">;
1653  def PACIBSP  : SystemNoOperands<0b011, "hint\t#27">;
1654  let isAuthenticated = 1 in {
1655    def AUTIASP  : SystemNoOperands<0b101, "hint\t#29">;
1656    def AUTIBSP  : SystemNoOperands<0b111, "hint\t#31">;
1657  }
1658}
1659let Uses = [X16, X17], Defs = [X17], CRm = 0b0001 in {
1660  def PACIA1716  : SystemNoOperands<0b000, "hint\t#8">;
1661  def PACIB1716  : SystemNoOperands<0b010, "hint\t#10">;
1662  let isAuthenticated = 1 in {
1663    def AUTIA1716  : SystemNoOperands<0b100, "hint\t#12">;
1664    def AUTIB1716  : SystemNoOperands<0b110, "hint\t#14">;
1665  }
1666}
1667
1668let Uses = [LR], Defs = [LR], CRm = 0b0000 in {
1669  def XPACLRI   : SystemNoOperands<0b111, "hint\t#7">;
1670}
1671
1672// In order to be able to write readable assembly, LLVM should accept assembly
1673// inputs that use pointer authentication mnemonics, even with PA disabled.
1674// However, in order to be compatible with other assemblers (e.g. GAS), LLVM
1675// should not emit these mnemonics unless PA is enabled.
1676def : InstAlias<"paciaz", (PACIAZ), 0>;
1677def : InstAlias<"pacibz", (PACIBZ), 0>;
1678def : InstAlias<"autiaz", (AUTIAZ), 0>;
1679def : InstAlias<"autibz", (AUTIBZ), 0>;
1680def : InstAlias<"paciasp", (PACIASP), 0>;
1681def : InstAlias<"pacibsp", (PACIBSP), 0>;
1682def : InstAlias<"autiasp", (AUTIASP), 0>;
1683def : InstAlias<"autibsp", (AUTIBSP), 0>;
1684def : InstAlias<"pacia1716", (PACIA1716), 0>;
1685def : InstAlias<"pacib1716", (PACIB1716), 0>;
1686def : InstAlias<"autia1716", (AUTIA1716), 0>;
1687def : InstAlias<"autib1716", (AUTIB1716), 0>;
1688def : InstAlias<"xpaclri", (XPACLRI), 0>;
1689
1690// Pseudos
1691
1692let Uses = [LR, SP], Defs = [LR] in {
1693// Insertion point of LR signing code.
1694def PAUTH_PROLOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1695// Insertion point of LR authentication code.
1696// The RET terminator of the containing machine basic block may be replaced
1697// with a combined RETA(A|B) instruction when rewriting this Pseudo.
1698def PAUTH_EPILOGUE : Pseudo<(outs), (ins), []>, Sched<[]>;
1699}
1700
1701def PAUTH_BLEND : Pseudo<(outs GPR64:$disc),
1702                         (ins GPR64:$addr_disc, i32imm:$int_disc), []>, Sched<[]>;
1703
1704// These pointer authentication instructions require armv8.3a
1705let Predicates = [HasPAuth] in {
1706
1707  // When PA is enabled, a better mnemonic should be emitted.
1708  def : InstAlias<"paciaz", (PACIAZ), 1>;
1709  def : InstAlias<"pacibz", (PACIBZ), 1>;
1710  def : InstAlias<"autiaz", (AUTIAZ), 1>;
1711  def : InstAlias<"autibz", (AUTIBZ), 1>;
1712  def : InstAlias<"paciasp", (PACIASP), 1>;
1713  def : InstAlias<"pacibsp", (PACIBSP), 1>;
1714  def : InstAlias<"autiasp", (AUTIASP), 1>;
1715  def : InstAlias<"autibsp", (AUTIBSP), 1>;
1716  def : InstAlias<"pacia1716", (PACIA1716), 1>;
1717  def : InstAlias<"pacib1716", (PACIB1716), 1>;
1718  def : InstAlias<"autia1716", (AUTIA1716), 1>;
1719  def : InstAlias<"autib1716", (AUTIB1716), 1>;
1720  def : InstAlias<"xpaclri", (XPACLRI), 1>;
1721
1722  multiclass SignAuth<bits<3> prefix, bits<3> prefix_z, string asm,
1723                      SDPatternOperator op> {
1724    def IA   : SignAuthOneData<prefix, 0b00, !strconcat(asm,  "ia"), op>;
1725    def IB   : SignAuthOneData<prefix, 0b01, !strconcat(asm,  "ib"), op>;
1726    def DA   : SignAuthOneData<prefix, 0b10, !strconcat(asm,  "da"), op>;
1727    def DB   : SignAuthOneData<prefix, 0b11, !strconcat(asm,  "db"), op>;
1728    def IZA  : SignAuthZero<prefix_z,  0b00, !strconcat(asm, "iza"), op>;
1729    def DZA  : SignAuthZero<prefix_z,  0b10, !strconcat(asm, "dza"), op>;
1730    def IZB  : SignAuthZero<prefix_z,  0b01, !strconcat(asm, "izb"), op>;
1731    def DZB  : SignAuthZero<prefix_z,  0b11, !strconcat(asm, "dzb"), op>;
1732  }
1733
1734  defm PAC : SignAuth<0b000, 0b010, "pac", int_ptrauth_sign>;
1735  defm AUT : SignAuth<0b001, 0b011, "aut", null_frag>;
1736
1737  def XPACI : ClearAuth<0, "xpaci">;
1738  def : Pat<(int_ptrauth_strip GPR64:$Rd, 0), (XPACI GPR64:$Rd)>;
1739  def : Pat<(int_ptrauth_strip GPR64:$Rd, 1), (XPACI GPR64:$Rd)>;
1740
1741  def XPACD : ClearAuth<1, "xpacd">;
1742  def : Pat<(int_ptrauth_strip GPR64:$Rd, 2), (XPACD GPR64:$Rd)>;
1743  def : Pat<(int_ptrauth_strip GPR64:$Rd, 3), (XPACD GPR64:$Rd)>;
1744
1745  def PACGA : SignAuthTwoOperand<0b1100, "pacga", int_ptrauth_sign_generic>;
1746
1747  // Combined Instructions
1748  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1749    def BRAA    : AuthBranchTwoOperands<0, 0, "braa">;
1750    def BRAB    : AuthBranchTwoOperands<0, 1, "brab">;
1751  }
1752  let isCall = 1, Defs = [LR], Uses = [SP] in {
1753    def BLRAA   : AuthBranchTwoOperands<1, 0, "blraa">;
1754    def BLRAB   : AuthBranchTwoOperands<1, 1, "blrab">;
1755  }
1756
1757  let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1  in {
1758    def BRAAZ   : AuthOneOperand<0b000, 0, "braaz">;
1759    def BRABZ   : AuthOneOperand<0b000, 1, "brabz">;
1760  }
1761  let isCall = 1, Defs = [LR], Uses = [SP] in {
1762    def BLRAAZ  : AuthOneOperand<0b001, 0, "blraaz">;
1763    def BLRABZ  : AuthOneOperand<0b001, 1, "blrabz">;
1764  }
1765
1766  // BLRA pseudo, a generalized version of BLRAA/BLRAB/Z.
1767  // This directly manipulates x16/x17 to materialize the discriminator.
1768  // x16/x17 are generally used as the safe registers for sensitive ptrauth
1769  // operations (such as raw address manipulation or discriminator
1770  // materialization here), in part because they're handled in a safer way by
1771  // the kernel, notably on Darwin.
1772  def BLRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1773                                 GPR64noip:$AddrDisc),
1774                    [(AArch64authcall GPR64noip:$Rn, timm:$Key, timm:$Disc,
1775                                      GPR64noip:$AddrDisc)]>, Sched<[]> {
1776    let isCodeGenOnly = 1;
1777    let hasSideEffects = 1;
1778    let mayStore = 0;
1779    let mayLoad = 0;
1780    let isCall = 1;
1781    let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1782    let Defs = [X17,LR];
1783    let Uses = [SP];
1784  }
1785
1786  def BLRA_RVMARKER : Pseudo<
1787        (outs), (ins i64imm:$rvfunc, GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1788                     GPR64noip:$AddrDisc),
1789        [(AArch64authcall_rvmarker tglobaladdr:$rvfunc,
1790                                   GPR64noip:$Rn, timm:$Key, timm:$Disc,
1791                                   GPR64noip:$AddrDisc)]>, Sched<[]> {
1792    let isCodeGenOnly = 1;
1793    let isCall = 1;
1794    let Defs = [X17,LR];
1795    let Uses = [SP];
1796  }
1797
1798  // BRA pseudo, generalized version of BRAA/BRAB/Z.
1799  // This directly manipulates x16/x17, which are the only registers the OS
1800  // guarantees are safe to use for sensitive operations.
1801  def BRA : Pseudo<(outs), (ins GPR64noip:$Rn, i32imm:$Key, i64imm:$Disc,
1802                                GPR64noip:$AddrDisc), []>, Sched<[]> {
1803    let isCodeGenOnly = 1;
1804    let hasNoSchedulingInfo = 1;
1805    let hasSideEffects = 1;
1806    let mayStore = 0;
1807    let mayLoad = 0;
1808    let isBranch = 1;
1809    let isTerminator = 1;
1810    let isBarrier = 1;
1811    let isIndirectBranch = 1;
1812    let Size = 12; // 4 fixed + 8 variable, to compute discriminator.
1813    let Defs = [X17];
1814  }
1815
1816  let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1817    def RETAA   : AuthReturn<0b010, 0, "retaa">;
1818    def RETAB   : AuthReturn<0b010, 1, "retab">;
1819    def ERETAA  : AuthReturn<0b100, 0, "eretaa">;
1820    def ERETAB  : AuthReturn<0b100, 1, "eretab">;
1821  }
1822
1823  defm LDRAA  : AuthLoad<0, "ldraa", simm10Scaled>;
1824  defm LDRAB  : AuthLoad<1, "ldrab", simm10Scaled>;
1825
1826  // AUT pseudo.
1827  // This directly manipulates x16/x17, which are the only registers the OS
1828  // guarantees are safe to use for sensitive operations.
1829  def AUT : Pseudo<(outs), (ins i32imm:$Key, i64imm:$Disc, GPR64noip:$AddrDisc),
1830                   []>, Sched<[WriteI, ReadI]> {
1831    let isCodeGenOnly = 1;
1832    let hasSideEffects = 1;
1833    let mayStore = 0;
1834    let mayLoad = 0;
1835    let Size = 32;
1836    let Defs = [X16,X17,NZCV];
1837    let Uses = [X16];
1838  }
1839
1840  // AUT and re-PAC a value, using different keys/data.
1841  // This directly manipulates x16/x17, which are the only registers the OS
1842  // guarantees are safe to use for sensitive operations.
1843  def AUTPAC
1844      : Pseudo<(outs),
1845               (ins i32imm:$AUTKey, i64imm:$AUTDisc, GPR64noip:$AUTAddrDisc,
1846                    i32imm:$PACKey, i64imm:$PACDisc, GPR64noip:$PACAddrDisc),
1847               []>, Sched<[WriteI, ReadI]> {
1848    let isCodeGenOnly = 1;
1849    let hasSideEffects = 1;
1850    let mayStore = 0;
1851    let mayLoad = 0;
1852    let Size = 48;
1853    let Defs = [X16,X17,NZCV];
1854    let Uses = [X16];
1855  }
1856
1857  // Materialize a signed global address, with adrp+add and PAC.
1858  def MOVaddrPAC : Pseudo<(outs),
1859                          (ins i64imm:$Addr, i32imm:$Key,
1860                               GPR64noip:$AddrDisc, i64imm:$Disc), []>,
1861               Sched<[WriteI, ReadI]> {
1862    let isReMaterializable = 1;
1863    let isCodeGenOnly = 1;
1864    let Size = 40; // 12 fixed + 28 variable, for pointer offset, and discriminator
1865    let Defs = [X16,X17];
1866  }
1867
1868  // Materialize a signed global address, using a GOT load and PAC.
1869  def LOADgotPAC : Pseudo<(outs),
1870                          (ins i64imm:$Addr, i32imm:$Key,
1871                               GPR64noip:$AddrDisc, i64imm:$Disc), []>,
1872               Sched<[WriteI, ReadI]> {
1873    let isReMaterializable = 1;
1874    let isCodeGenOnly = 1;
1875    let Size = 40; // 12 fixed + 28 variable, for pointer offset, and discriminator
1876    let Defs = [X16,X17];
1877  }
1878
1879  // Load a signed global address from a special $auth_ptr$ stub slot.
1880  def LOADauthptrstatic : Pseudo<(outs GPR64:$dst),
1881                              (ins i64imm:$Addr, i32imm:$Key,
1882                                   i64imm:$Disc), []>,
1883               Sched<[WriteI, ReadI]> {
1884    let isReMaterializable = 1;
1885    let isCodeGenOnly = 1;
1886    let Size = 8;
1887  }
1888
1889  // Size 16: 4 fixed + 8 variable, to compute discriminator.
1890  let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Size = 16,
1891      Uses = [SP] in {
1892    def AUTH_TCRETURN
1893      : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff, i32imm:$Key,
1894                            i64imm:$Disc, tcGPR64:$AddrDisc),
1895               []>, Sched<[WriteBrReg]>;
1896    def AUTH_TCRETURN_BTI
1897      : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff, i32imm:$Key,
1898                            i64imm:$Disc, tcGPR64:$AddrDisc),
1899               []>, Sched<[WriteBrReg]>;
1900  }
1901
1902  let Predicates = [TailCallAny] in
1903    def : Pat<(AArch64authtcret tcGPR64:$dst, (i32 timm:$FPDiff), (i32 timm:$Key),
1904                                (i64 timm:$Disc), tcGPR64:$AddrDisc),
1905              (AUTH_TCRETURN tcGPR64:$dst, imm:$FPDiff, imm:$Key, imm:$Disc,
1906                             tcGPR64:$AddrDisc)>;
1907
1908  let Predicates = [TailCallX16X17] in
1909    def : Pat<(AArch64authtcret tcGPRx16x17:$dst, (i32 timm:$FPDiff),
1910                                (i32 timm:$Key), (i64 timm:$Disc),
1911                                tcGPR64:$AddrDisc),
1912              (AUTH_TCRETURN_BTI tcGPRx16x17:$dst, imm:$FPDiff, imm:$Key,
1913                                 imm:$Disc, tcGPR64:$AddrDisc)>;
1914}
1915
1916// v9.5-A pointer authentication extensions
1917
1918// Always accept "pacm" as an alias for "hint #39", but don't emit it when
1919// disassembling if we don't have the pauth-lr feature.
1920let CRm = 0b0100 in {
1921  def PACM : SystemNoOperands<0b111, "hint\t#39">;
1922}
1923def : InstAlias<"pacm", (PACM), 0>;
1924
1925let Predicates = [HasPAuthLR] in {
1926  let Defs = [LR], Uses = [LR, SP] in {
1927    //                                opcode2, opcode,   asm
1928    def PACIASPPC : SignAuthFixedRegs<0b00001, 0b101000, "paciasppc">;
1929    def PACIBSPPC : SignAuthFixedRegs<0b00001, 0b101001, "pacibsppc">;
1930    def PACNBIASPPC : SignAuthFixedRegs<0b00001, 0b100000, "pacnbiasppc">;
1931    def PACNBIBSPPC : SignAuthFixedRegs<0b00001, 0b100001, "pacnbibsppc">;
1932    //                             opc,  asm
1933    def AUTIASPPCi : SignAuthPCRel<0b00, "autiasppc">;
1934    def AUTIBSPPCi : SignAuthPCRel<0b01, "autibsppc">;
1935    //                              opcode2, opcode,   asm
1936    def AUTIASPPCr : SignAuthOneReg<0b00001, 0b100100, "autiasppcr">;
1937    def AUTIBSPPCr : SignAuthOneReg<0b00001, 0b100101, "autibsppcr">;
1938    //                                  opcode2, opcode,   asm
1939    def PACIA171615 : SignAuthFixedRegs<0b00001, 0b100010, "pacia171615">;
1940    def PACIB171615 : SignAuthFixedRegs<0b00001, 0b100011, "pacib171615">;
1941    def AUTIA171615 : SignAuthFixedRegs<0b00001, 0b101110, "autia171615">;
1942    def AUTIB171615 : SignAuthFixedRegs<0b00001, 0b101111, "autib171615">;
1943  }
1944
1945  let Uses = [LR, SP], isReturn = 1, isTerminator = 1, isBarrier = 1 in {
1946    //                                   opc,   op2,     asm
1947    def RETAASPPCi : SignAuthReturnPCRel<0b000, 0b11111, "retaasppc">;
1948    def RETABSPPCi : SignAuthReturnPCRel<0b001, 0b11111, "retabsppc">;
1949    //                                 op3,      asm
1950    def RETAASPPCr : SignAuthReturnReg<0b000010, "retaasppcr">;
1951    def RETABSPPCr : SignAuthReturnReg<0b000011, "retabsppcr">;
1952  }
1953  def : InstAlias<"pacm", (PACM), 1>;
1954}
1955
1956
1957// v8.3a floating point conversion for javascript
1958let Predicates = [HasJS, HasFPARMv8], Defs = [NZCV] in
1959def FJCVTZS  : BaseFPToIntegerUnscaled<0b01, 0b11, 0b110, FPR64, GPR32,
1960                                      "fjcvtzs",
1961                                      [(set GPR32:$Rd,
1962                                         (int_aarch64_fjcvtzs FPR64:$Rn))]> {
1963  let Inst{31} = 0;
1964} // HasJS, HasFPARMv8
1965
1966// v8.4 Flag manipulation instructions
1967let Predicates = [HasFlagM], Defs = [NZCV], Uses = [NZCV] in {
1968def CFINV : SimpleSystemI<0, (ins), "cfinv", "">, Sched<[WriteSys]> {
1969  let Inst{20-5} = 0b0000001000000000;
1970}
1971def SETF8  : BaseFlagManipulation<0, 0, (ins GPR32:$Rn), "setf8", "{\t$Rn}">;
1972def SETF16 : BaseFlagManipulation<0, 1, (ins GPR32:$Rn), "setf16", "{\t$Rn}">;
1973def RMIF   : FlagRotate<(ins GPR64:$Rn, uimm6:$imm, imm0_15:$mask), "rmif",
1974                        "{\t$Rn, $imm, $mask}">;
1975} // HasFlagM
1976
1977// v8.5 flag manipulation instructions
1978let Predicates = [HasAltNZCV], Uses = [NZCV], Defs = [NZCV] in {
1979
1980def XAFLAG : PstateWriteSimple<(ins), "xaflag", "">, Sched<[WriteSys]> {
1981  let Inst{18-16} = 0b000;
1982  let Inst{11-8} = 0b0000;
1983  let Unpredictable{11-8} = 0b1111;
1984  let Inst{7-5} = 0b001;
1985}
1986
1987def AXFLAG : PstateWriteSimple<(ins), "axflag", "">, Sched<[WriteSys]> {
1988  let Inst{18-16} = 0b000;
1989  let Inst{11-8} = 0b0000;
1990  let Unpredictable{11-8} = 0b1111;
1991  let Inst{7-5} = 0b010;
1992}
1993} // HasAltNZCV
1994
1995
1996// Armv8.5-A speculation barrier
1997def SB : SimpleSystemI<0, (ins), "sb", "">, Sched<[]> {
1998  let Inst{20-5} = 0b0001100110000111;
1999  let Unpredictable{11-8} = 0b1111;
2000  let Predicates = [HasSB];
2001  let hasSideEffects = 1;
2002}
2003
2004def : InstAlias<"clrex", (CLREX 0xf)>;
2005def : InstAlias<"isb", (ISB 0xf)>;
2006def : InstAlias<"ssbb", (DSB 0)>;
2007def : InstAlias<"pssbb", (DSB 4)>;
2008def : InstAlias<"dfb", (DSB 0b1100)>, Requires<[HasV8_0r]>;
2009
2010def MRS    : MRSI;
2011def MSR    : MSRI;
2012def MSRpstateImm1 : MSRpstateImm0_1;
2013def MSRpstateImm4 : MSRpstateImm0_15;
2014
2015def : Pat<(AArch64mrs imm:$id),
2016          (MRS imm:$id)>;
2017
2018// The thread pointer (on Linux, at least, where this has been implemented) is
2019// TPIDR_EL0.
2020def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
2021                       [(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
2022
2023// This gets lowered into a 24-byte instruction sequence
2024let Defs = [ X9, X16, X17, NZCV ], Size = 24 in {
2025def KCFI_CHECK : Pseudo<
2026  (outs), (ins GPR64:$ptr, i32imm:$type), []>, Sched<[]>;
2027}
2028
2029let Uses = [ X9 ], Defs = [ X16, X17, LR, NZCV ] in {
2030def HWASAN_CHECK_MEMACCESS : Pseudo<
2031  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
2032  [(int_hwasan_check_memaccess X9, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
2033  Sched<[]>;
2034}
2035
2036let Uses = [ X20 ], Defs = [ X16, X17, LR, NZCV ] in {
2037def HWASAN_CHECK_MEMACCESS_SHORTGRANULES : Pseudo<
2038  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo),
2039  [(int_hwasan_check_memaccess_shortgranules X20, GPR64noip:$ptr, (i32 timm:$accessinfo))]>,
2040  Sched<[]>;
2041}
2042
2043let Defs = [ X16, X17, LR, NZCV ] in {
2044def HWASAN_CHECK_MEMACCESS_FIXEDSHADOW : Pseudo<
2045  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow),
2046  [(int_hwasan_check_memaccess_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>,
2047  Sched<[]>;
2048}
2049
2050let Defs = [ X16, X17, LR, NZCV ] in {
2051def HWASAN_CHECK_MEMACCESS_SHORTGRANULES_FIXEDSHADOW : Pseudo<
2052  (outs), (ins GPR64noip:$ptr, i32imm:$accessinfo, i64imm:$fixed_shadow),
2053  [(int_hwasan_check_memaccess_shortgranules_fixedshadow GPR64noip:$ptr, (i32 timm:$accessinfo), (i64 timm:$fixed_shadow))]>,
2054  Sched<[]>;
2055}
2056
2057// The virtual cycle counter register is CNTVCT_EL0.
2058def : Pat<(readcyclecounter), (MRS 0xdf02)>;
2059
2060// FPCR and FPSR registers.
2061let Uses = [FPCR] in
2062def MRS_FPCR : Pseudo<(outs GPR64:$dst), (ins),
2063                      [(set GPR64:$dst, (int_aarch64_get_fpcr))]>,
2064               PseudoInstExpansion<(MRS GPR64:$dst, 0xda20)>,
2065               Sched<[WriteSys]>;
2066let Defs = [FPCR] in
2067def MSR_FPCR : Pseudo<(outs), (ins GPR64:$val),
2068                      [(int_aarch64_set_fpcr i64:$val)]>,
2069               PseudoInstExpansion<(MSR 0xda20, GPR64:$val)>,
2070               Sched<[WriteSys]>;
2071
2072let Uses = [FPSR] in
2073def MRS_FPSR : Pseudo<(outs GPR64:$dst), (ins),
2074                      [(set GPR64:$dst, (int_aarch64_get_fpsr))]>,
2075               PseudoInstExpansion<(MRS GPR64:$dst, 0xda21)>,
2076               Sched<[WriteSys]>;
2077let Defs = [FPSR] in
2078def MSR_FPSR : Pseudo<(outs), (ins GPR64:$val),
2079                      [(int_aarch64_set_fpsr i64:$val)]>,
2080               PseudoInstExpansion<(MSR 0xda21, GPR64:$val)>,
2081               Sched<[WriteSys]>;
2082
2083// Generic system instructions
2084def SYSxt  : SystemXtI<0, "sys">;
2085def SYSLxt : SystemLXtI<1, "sysl">;
2086
2087def : InstAlias<"sys $op1, $Cn, $Cm, $op2",
2088                (SYSxt imm0_7:$op1, sys_cr_op:$Cn,
2089                 sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
2090
2091
2092let Predicates = [HasTME] in {
2093
2094def TSTART : TMSystemI<0b0000, "tstart",
2095                      [(set GPR64:$Rt, (int_aarch64_tstart))]>;
2096
2097def TCOMMIT : TMSystemINoOperand<0b0000, "tcommit", [(int_aarch64_tcommit)]>;
2098
2099def TCANCEL : TMSystemException<0b011, "tcancel",
2100                                [(int_aarch64_tcancel timm64_0_65535:$imm)]>;
2101
2102def TTEST : TMSystemI<0b0001, "ttest", [(set GPR64:$Rt, (int_aarch64_ttest))]> {
2103  let mayLoad = 0;
2104  let mayStore = 0;
2105}
2106} // HasTME
2107
2108//===----------------------------------------------------------------------===//
2109// Move immediate instructions.
2110//===----------------------------------------------------------------------===//
2111
2112defm MOVK : InsertImmediate<0b11, "movk">;
2113defm MOVN : MoveImmediate<0b00, "movn">;
2114
2115let PostEncoderMethod = "fixMOVZ" in
2116defm MOVZ : MoveImmediate<0b10, "movz">;
2117
2118// First group of aliases covers an implicit "lsl #0".
2119def : InstAlias<"movk $dst, $imm", (MOVKWi GPR32:$dst, timm32_0_65535:$imm, 0), 0>;
2120def : InstAlias<"movk $dst, $imm", (MOVKXi GPR64:$dst, timm32_0_65535:$imm, 0), 0>;
2121def : InstAlias<"movn $dst, $imm", (MOVNWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
2122def : InstAlias<"movn $dst, $imm", (MOVNXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
2123def : InstAlias<"movz $dst, $imm", (MOVZWi GPR32:$dst, timm32_0_65535:$imm, 0)>;
2124def : InstAlias<"movz $dst, $imm", (MOVZXi GPR64:$dst, timm32_0_65535:$imm, 0)>;
2125
2126// Next, we have various ELF relocations with the ":XYZ_g0:sym" syntax.
2127def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
2128def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
2129def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
2130def : InstAlias<"movz $Rd, $sym", (MOVZXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
2131
2132def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g3:$sym, 48)>;
2133def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g2:$sym, 32)>;
2134def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g1:$sym, 16)>;
2135def : InstAlias<"movn $Rd, $sym", (MOVNXi GPR64:$Rd, movw_symbol_g0:$sym, 0)>;
2136
2137def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g3:$sym, 48), 0>;
2138def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g2:$sym, 32), 0>;
2139def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g1:$sym, 16), 0>;
2140def : InstAlias<"movk $Rd, $sym", (MOVKXi GPR64:$Rd, movw_symbol_g0:$sym, 0), 0>;
2141
2142def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
2143def : InstAlias<"movz $Rd, $sym", (MOVZWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
2144
2145def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g1:$sym, 16)>;
2146def : InstAlias<"movn $Rd, $sym", (MOVNWi GPR32:$Rd, movw_symbol_g0:$sym, 0)>;
2147
2148def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g1:$sym, 16), 0>;
2149def : InstAlias<"movk $Rd, $sym", (MOVKWi GPR32:$Rd, movw_symbol_g0:$sym, 0), 0>;
2150
2151// Final group of aliases covers true "mov $Rd, $imm" cases.
2152multiclass movw_mov_alias<string basename,Instruction INST, RegisterClass GPR,
2153                          int width, int shift> {
2154  def _asmoperand : AsmOperandClass {
2155    let Name = basename # width # "_lsl" # shift # "MovAlias";
2156    let PredicateMethod = "is" # basename # "MovAlias<" # width # ", "
2157                               # shift # ">";
2158    let RenderMethod = "add" # basename # "MovAliasOperands<" # shift # ">";
2159  }
2160
2161  def _movimm : Operand<i32> {
2162    let ParserMatchClass = !cast<AsmOperandClass>(NAME # "_asmoperand");
2163  }
2164
2165  def : InstAlias<"mov $Rd, $imm",
2166                  (INST GPR:$Rd, !cast<Operand>(NAME # "_movimm"):$imm, shift)>;
2167}
2168
2169defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 0>;
2170defm : movw_mov_alias<"MOVZ", MOVZWi, GPR32, 32, 16>;
2171
2172defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 0>;
2173defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 16>;
2174defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 32>;
2175defm : movw_mov_alias<"MOVZ", MOVZXi, GPR64, 64, 48>;
2176
2177defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 0>;
2178defm : movw_mov_alias<"MOVN", MOVNWi, GPR32, 32, 16>;
2179
2180defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 0>;
2181defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 16>;
2182defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 32>;
2183defm : movw_mov_alias<"MOVN", MOVNXi, GPR64, 64, 48>;
2184
2185let isReMaterializable = 1, isCodeGenOnly = 1, isMoveImm = 1,
2186    isAsCheapAsAMove = 1 in {
2187// FIXME: The following pseudo instructions are only needed because remat
2188// cannot handle multiple instructions.  When that changes, we can select
2189// directly to the real instructions and get rid of these pseudos.
2190
2191def MOVi32imm
2192    : Pseudo<(outs GPR32:$dst), (ins i32imm:$src),
2193             [(set GPR32:$dst, imm:$src)]>,
2194      Sched<[WriteImm]>;
2195def MOVi64imm
2196    : Pseudo<(outs GPR64:$dst), (ins i64imm:$src),
2197             [(set GPR64:$dst, imm:$src)]>,
2198      Sched<[WriteImm]>;
2199} // isReMaterializable, isCodeGenOnly
2200
2201// If possible, we want to use MOVi32imm even for 64-bit moves. This gives the
2202// eventual expansion code fewer bits to worry about getting right. Marshalling
2203// the types is a little tricky though:
2204def i64imm_32bit : ImmLeaf<i64, [{
2205  return (Imm & 0xffffffffULL) == static_cast<uint64_t>(Imm);
2206}]>;
2207
2208def s64imm_32bit : ImmLeaf<i64, [{
2209  int64_t Imm64 = static_cast<int64_t>(Imm);
2210  return Imm64 >= std::numeric_limits<int32_t>::min() &&
2211         Imm64 <= std::numeric_limits<int32_t>::max();
2212}]>;
2213
2214def trunc_imm : SDNodeXForm<imm, [{
2215  return CurDAG->getTargetConstant(N->getZExtValue(), SDLoc(N), MVT::i32);
2216}]>;
2217
2218def gi_trunc_imm : GICustomOperandRenderer<"renderTruncImm">,
2219  GISDNodeXFormEquiv<trunc_imm>;
2220
2221let Predicates = [OptimizedGISelOrOtherSelector] in {
2222// The SUBREG_TO_REG isn't eliminated at -O0, which can result in pointless
2223// copies.
2224def : Pat<(i64 i64imm_32bit:$src),
2225          (SUBREG_TO_REG (i64 0), (MOVi32imm (trunc_imm imm:$src)), sub_32)>;
2226}
2227
2228// Materialize FP constants via MOVi32imm/MOVi64imm (MachO large code model).
2229def bitcast_fpimm_to_i32 : SDNodeXForm<fpimm, [{
2230return CurDAG->getTargetConstant(
2231  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i32);
2232}]>;
2233
2234def bitcast_fpimm_to_i64 : SDNodeXForm<fpimm, [{
2235return CurDAG->getTargetConstant(
2236  N->getValueAPF().bitcastToAPInt().getZExtValue(), SDLoc(N), MVT::i64);
2237}]>;
2238
2239
2240def : Pat<(f32 fpimm:$in),
2241  (COPY_TO_REGCLASS (MOVi32imm (bitcast_fpimm_to_i32 f32:$in)), FPR32)>;
2242def : Pat<(f64 fpimm:$in),
2243  (COPY_TO_REGCLASS (MOVi64imm (bitcast_fpimm_to_i64 f64:$in)), FPR64)>;
2244
2245
2246// Deal with the various forms of (ELF) large addressing with MOVZ/MOVK
2247// sequences.
2248def : Pat<(AArch64WrapperLarge tglobaladdr:$g3, tglobaladdr:$g2,
2249                             tglobaladdr:$g1, tglobaladdr:$g0),
2250          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tglobaladdr:$g0, 0),
2251                                  tglobaladdr:$g1, 16),
2252                          tglobaladdr:$g2, 32),
2253                  tglobaladdr:$g3, 48)>;
2254
2255def : Pat<(AArch64WrapperLarge tblockaddress:$g3, tblockaddress:$g2,
2256                             tblockaddress:$g1, tblockaddress:$g0),
2257          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tblockaddress:$g0, 0),
2258                                  tblockaddress:$g1, 16),
2259                          tblockaddress:$g2, 32),
2260                  tblockaddress:$g3, 48)>;
2261
2262def : Pat<(AArch64WrapperLarge tconstpool:$g3, tconstpool:$g2,
2263                             tconstpool:$g1, tconstpool:$g0),
2264          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tconstpool:$g0, 0),
2265                                  tconstpool:$g1, 16),
2266                          tconstpool:$g2, 32),
2267                  tconstpool:$g3, 48)>;
2268
2269def : Pat<(AArch64WrapperLarge tjumptable:$g3, tjumptable:$g2,
2270                             tjumptable:$g1, tjumptable:$g0),
2271          (MOVKXi (MOVKXi (MOVKXi (MOVZXi tjumptable:$g0, 0),
2272                                  tjumptable:$g1, 16),
2273                          tjumptable:$g2, 32),
2274                  tjumptable:$g3, 48)>;
2275
2276
2277//===----------------------------------------------------------------------===//
2278// Arithmetic instructions.
2279//===----------------------------------------------------------------------===//
2280
2281// Add/subtract with carry.
2282defm ADC : AddSubCarry<0, "adc", "adcs", AArch64adc, AArch64adc_flag>;
2283defm SBC : AddSubCarry<1, "sbc", "sbcs", AArch64sbc, AArch64sbc_flag>;
2284
2285def : InstAlias<"ngc $dst, $src",  (SBCWr  GPR32:$dst, WZR, GPR32:$src)>;
2286def : InstAlias<"ngc $dst, $src",  (SBCXr  GPR64:$dst, XZR, GPR64:$src)>;
2287def : InstAlias<"ngcs $dst, $src", (SBCSWr GPR32:$dst, WZR, GPR32:$src)>;
2288def : InstAlias<"ngcs $dst, $src", (SBCSXr GPR64:$dst, XZR, GPR64:$src)>;
2289
2290// Add/subtract
2291defm ADD : AddSub<0, "add", "sub", add>;
2292defm SUB : AddSub<1, "sub", "add">;
2293
2294def : InstAlias<"mov $dst, $src",
2295                (ADDWri GPR32sponly:$dst, GPR32sp:$src, 0, 0)>;
2296def : InstAlias<"mov $dst, $src",
2297                (ADDWri GPR32sp:$dst, GPR32sponly:$src, 0, 0)>;
2298def : InstAlias<"mov $dst, $src",
2299                (ADDXri GPR64sponly:$dst, GPR64sp:$src, 0, 0)>;
2300def : InstAlias<"mov $dst, $src",
2301                (ADDXri GPR64sp:$dst, GPR64sponly:$src, 0, 0)>;
2302
2303defm ADDS : AddSubS<0, "adds", AArch64add_flag, "cmn", "subs", "cmp">;
2304defm SUBS : AddSubS<1, "subs", AArch64sub_flag, "cmp", "adds", "cmn">;
2305
2306def copyFromSP: PatLeaf<(i64 GPR64:$src), [{
2307  return N->getOpcode() == ISD::CopyFromReg &&
2308         cast<RegisterSDNode>(N->getOperand(1))->getReg() == AArch64::SP;
2309}]>;
2310
2311// Use SUBS instead of SUB to enable CSE between SUBS and SUB.
2312def : Pat<(sub GPR32sp:$Rn, addsub_shifted_imm32:$imm),
2313          (SUBSWri GPR32sp:$Rn, addsub_shifted_imm32:$imm)>;
2314def : Pat<(sub GPR64sp:$Rn, addsub_shifted_imm64:$imm),
2315          (SUBSXri GPR64sp:$Rn, addsub_shifted_imm64:$imm)>;
2316def : Pat<(sub GPR32:$Rn, GPR32:$Rm),
2317          (SUBSWrr GPR32:$Rn, GPR32:$Rm)>;
2318def : Pat<(sub GPR64:$Rn, GPR64:$Rm),
2319          (SUBSXrr GPR64:$Rn, GPR64:$Rm)>;
2320def : Pat<(sub GPR32:$Rn, arith_shifted_reg32:$Rm),
2321          (SUBSWrs GPR32:$Rn, arith_shifted_reg32:$Rm)>;
2322def : Pat<(sub GPR64:$Rn, arith_shifted_reg64:$Rm),
2323          (SUBSXrs GPR64:$Rn, arith_shifted_reg64:$Rm)>;
2324let AddedComplexity = 1 in {
2325def : Pat<(sub GPR32sp:$R2, arith_extended_reg32_i32:$R3),
2326          (SUBSWrx GPR32sp:$R2, arith_extended_reg32_i32:$R3)>;
2327def : Pat<(sub GPR64sp:$R2, arith_extended_reg32to64_i64:$R3),
2328          (SUBSXrx GPR64sp:$R2, arith_extended_reg32to64_i64:$R3)>;
2329def : Pat<(sub copyFromSP:$R2, (arith_uxtx GPR64:$R3, arith_extendlsl64:$imm)),
2330          (SUBXrx64 GPR64sp:$R2, GPR64:$R3, arith_extendlsl64:$imm)>;
2331}
2332
2333// Because of the immediate format for add/sub-imm instructions, the
2334// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2335//  These patterns capture that transformation.
2336let AddedComplexity = 1 in {
2337def : Pat<(add GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2338          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2339def : Pat<(add GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2340          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2341def : Pat<(sub GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2342          (ADDWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2343def : Pat<(sub GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2344          (ADDXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2345}
2346
2347// Because of the immediate format for add/sub-imm instructions, the
2348// expression (add x, -1) must be transformed to (SUB{W,X}ri x, 1).
2349//  These patterns capture that transformation.
2350let AddedComplexity = 1 in {
2351def : Pat<(AArch64add_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2352          (SUBSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2353def : Pat<(AArch64add_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2354          (SUBSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2355def : Pat<(AArch64sub_flag GPR32:$Rn, neg_addsub_shifted_imm32:$imm),
2356          (ADDSWri GPR32:$Rn, neg_addsub_shifted_imm32:$imm)>;
2357def : Pat<(AArch64sub_flag GPR64:$Rn, neg_addsub_shifted_imm64:$imm),
2358          (ADDSXri GPR64:$Rn, neg_addsub_shifted_imm64:$imm)>;
2359}
2360
2361def : InstAlias<"neg $dst, $src", (SUBWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2362def : InstAlias<"neg $dst, $src", (SUBXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2363def : InstAlias<"neg $dst, $src$shift",
2364                (SUBWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2365def : InstAlias<"neg $dst, $src$shift",
2366                (SUBXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2367
2368def : InstAlias<"negs $dst, $src", (SUBSWrs GPR32:$dst, WZR, GPR32:$src, 0), 3>;
2369def : InstAlias<"negs $dst, $src", (SUBSXrs GPR64:$dst, XZR, GPR64:$src, 0), 3>;
2370def : InstAlias<"negs $dst, $src$shift",
2371                (SUBSWrs GPR32:$dst, WZR, GPR32:$src, arith_shift32:$shift), 2>;
2372def : InstAlias<"negs $dst, $src$shift",
2373                (SUBSXrs GPR64:$dst, XZR, GPR64:$src, arith_shift64:$shift), 2>;
2374
2375
2376// Unsigned/Signed divide
2377defm UDIV : Div<0, "udiv", udiv>;
2378defm SDIV : Div<1, "sdiv", sdiv>;
2379
2380def : Pat<(int_aarch64_udiv GPR32:$Rn, GPR32:$Rm), (UDIVWr GPR32:$Rn, GPR32:$Rm)>;
2381def : Pat<(int_aarch64_udiv GPR64:$Rn, GPR64:$Rm), (UDIVXr GPR64:$Rn, GPR64:$Rm)>;
2382def : Pat<(int_aarch64_sdiv GPR32:$Rn, GPR32:$Rm), (SDIVWr GPR32:$Rn, GPR32:$Rm)>;
2383def : Pat<(int_aarch64_sdiv GPR64:$Rn, GPR64:$Rm), (SDIVXr GPR64:$Rn, GPR64:$Rm)>;
2384
2385// Variable shift
2386defm ASRV : Shift<0b10, "asr", sra>;
2387defm LSLV : Shift<0b00, "lsl", shl>;
2388defm LSRV : Shift<0b01, "lsr", srl>;
2389defm RORV : Shift<0b11, "ror", rotr>;
2390
2391def : ShiftAlias<"asrv", ASRVWr, GPR32>;
2392def : ShiftAlias<"asrv", ASRVXr, GPR64>;
2393def : ShiftAlias<"lslv", LSLVWr, GPR32>;
2394def : ShiftAlias<"lslv", LSLVXr, GPR64>;
2395def : ShiftAlias<"lsrv", LSRVWr, GPR32>;
2396def : ShiftAlias<"lsrv", LSRVXr, GPR64>;
2397def : ShiftAlias<"rorv", RORVWr, GPR32>;
2398def : ShiftAlias<"rorv", RORVXr, GPR64>;
2399
2400// Multiply-add
2401let AddedComplexity = 5 in {
2402defm MADD : MulAccum<0, "madd">;
2403defm MSUB : MulAccum<1, "msub">;
2404
2405def : Pat<(i32 (mul GPR32:$Rn, GPR32:$Rm)),
2406          (MADDWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2407def : Pat<(i64 (mul GPR64:$Rn, GPR64:$Rm)),
2408          (MADDXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2409
2410def : Pat<(i32 (ineg (mul GPR32:$Rn, GPR32:$Rm))),
2411          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2412def : Pat<(i64 (ineg (mul GPR64:$Rn, GPR64:$Rm))),
2413          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2414def : Pat<(i32 (mul (ineg GPR32:$Rn), GPR32:$Rm)),
2415          (MSUBWrrr GPR32:$Rn, GPR32:$Rm, WZR)>;
2416def : Pat<(i64 (mul (ineg GPR64:$Rn), GPR64:$Rm)),
2417          (MSUBXrrr GPR64:$Rn, GPR64:$Rm, XZR)>;
2418} // AddedComplexity = 5
2419
2420let AddedComplexity = 5 in {
2421def SMADDLrrr : WideMulAccum<0, 0b001, "smaddl", add, sext>;
2422def SMSUBLrrr : WideMulAccum<1, 0b001, "smsubl", sub, sext>;
2423def UMADDLrrr : WideMulAccum<0, 0b101, "umaddl", add, zext>;
2424def UMSUBLrrr : WideMulAccum<1, 0b101, "umsubl", sub, zext>;
2425
2426def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext_inreg GPR64:$Rm, i32))),
2427          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2428def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (sext GPR32:$Rm))),
2429          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2430def : Pat<(i64 (mul (sext GPR32:$Rn), (sext GPR32:$Rm))),
2431          (SMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2432def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (and GPR64:$Rm, 0xFFFFFFFF))),
2433          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2434def : Pat<(i64 (mul (and GPR64:$Rn, 0xFFFFFFFF), (zext GPR32:$Rm))),
2435          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2436def : Pat<(i64 (mul (zext GPR32:$Rn), (zext GPR32:$Rm))),
2437          (UMADDLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2438
2439def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (sext GPR32:$Rm)))),
2440          (SMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2441def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (zext GPR32:$Rm)))),
2442          (UMSUBLrrr GPR32:$Rn, GPR32:$Rm, XZR)>;
2443
2444def : Pat<(i64 (mul (sext GPR32:$Rn), (s64imm_32bit:$C))),
2445          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2446def : Pat<(i64 (mul (zext GPR32:$Rn), (i64imm_32bit:$C))),
2447          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2448def : Pat<(i64 (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C))),
2449          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2450                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2451
2452def : Pat<(i64 (ineg (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2453          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2454def : Pat<(i64 (ineg (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2455          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2456def : Pat<(i64 (ineg (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)))),
2457          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2458                     (MOVi32imm (trunc_imm imm:$C)), XZR)>;
2459
2460def : Pat<(i64 (add (mul (sext GPR32:$Rn), (s64imm_32bit:$C)), GPR64:$Ra)),
2461          (SMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2462def : Pat<(i64 (add (mul (zext GPR32:$Rn), (i64imm_32bit:$C)), GPR64:$Ra)),
2463          (UMADDLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2464def : Pat<(i64 (add (mul (sext_inreg GPR64:$Rn, i32), (s64imm_32bit:$C)),
2465                    GPR64:$Ra)),
2466          (SMADDLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2467                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2468
2469def : Pat<(i64 (sub GPR64:$Ra, (mul (sext GPR32:$Rn), (s64imm_32bit:$C)))),
2470          (SMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2471def : Pat<(i64 (sub GPR64:$Ra, (mul (zext GPR32:$Rn), (i64imm_32bit:$C)))),
2472          (UMSUBLrrr GPR32:$Rn, (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2473def : Pat<(i64 (sub GPR64:$Ra, (mul (sext_inreg GPR64:$Rn, i32),
2474                                    (s64imm_32bit:$C)))),
2475          (SMSUBLrrr (i32 (EXTRACT_SUBREG GPR64:$Rn, sub_32)),
2476                     (MOVi32imm (trunc_imm imm:$C)), GPR64:$Ra)>;
2477
2478def : Pat<(i64 (smullwithsignbits GPR64:$Rn, GPR64:$Rm)),
2479          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2480def : Pat<(i64 (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm))),
2481          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2482
2483def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, GPR64:$Rm), GPR64:$Ra)),
2484          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2485def : Pat<(i64 (add (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)), GPR64:$Ra)),
2486          (SMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2487
2488def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2489          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2490def : Pat<(i64 (ineg (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2491          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2492
2493def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, GPR64:$Rm))),
2494          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2495def : Pat<(i64 (sub GPR64:$Ra, (smullwithsignbits GPR64:$Rn, (sext GPR32:$Rm)))),
2496          (SMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2497
2498def : Pat<(i64 (mul top32Zero:$Rn, top32Zero:$Rm)),
2499          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2500def : Pat<(i64 (mul top32Zero:$Rn, (zext GPR32:$Rm))),
2501          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2502
2503def : Pat<(i64 (add (mul top32Zero:$Rn, top32Zero:$Rm), GPR64:$Ra)),
2504          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2505def : Pat<(i64 (add (mul top32Zero:$Rn, (zext GPR32:$Rm)), GPR64:$Ra)),
2506          (UMADDLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2507
2508def : Pat<(i64 (ineg (mul top32Zero:$Rn, top32Zero:$Rm))),
2509          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), XZR)>;
2510def : Pat<(i64 (ineg (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2511          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, XZR)>;
2512
2513def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, top32Zero:$Rm))),
2514          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), (EXTRACT_SUBREG $Rm, sub_32), GPR64:$Ra)>;
2515def : Pat<(i64 (sub GPR64:$Ra, (mul top32Zero:$Rn, (zext GPR32:$Rm)))),
2516          (UMSUBLrrr (EXTRACT_SUBREG $Rn, sub_32), $Rm, GPR64:$Ra)>;
2517} // AddedComplexity = 5
2518
2519def : MulAccumWAlias<"mul", MADDWrrr>;
2520def : MulAccumXAlias<"mul", MADDXrrr>;
2521def : MulAccumWAlias<"mneg", MSUBWrrr>;
2522def : MulAccumXAlias<"mneg", MSUBXrrr>;
2523def : WideMulAccumAlias<"smull", SMADDLrrr>;
2524def : WideMulAccumAlias<"smnegl", SMSUBLrrr>;
2525def : WideMulAccumAlias<"umull", UMADDLrrr>;
2526def : WideMulAccumAlias<"umnegl", UMSUBLrrr>;
2527
2528// Multiply-high
2529def SMULHrr : MulHi<0b010, "smulh", mulhs>;
2530def UMULHrr : MulHi<0b110, "umulh", mulhu>;
2531
2532// CRC32
2533def CRC32Brr : BaseCRC32<0, 0b00, 0, GPR32, int_aarch64_crc32b, "crc32b">;
2534def CRC32Hrr : BaseCRC32<0, 0b01, 0, GPR32, int_aarch64_crc32h, "crc32h">;
2535def CRC32Wrr : BaseCRC32<0, 0b10, 0, GPR32, int_aarch64_crc32w, "crc32w">;
2536def CRC32Xrr : BaseCRC32<1, 0b11, 0, GPR64, int_aarch64_crc32x, "crc32x">;
2537
2538def CRC32CBrr : BaseCRC32<0, 0b00, 1, GPR32, int_aarch64_crc32cb, "crc32cb">;
2539def CRC32CHrr : BaseCRC32<0, 0b01, 1, GPR32, int_aarch64_crc32ch, "crc32ch">;
2540def CRC32CWrr : BaseCRC32<0, 0b10, 1, GPR32, int_aarch64_crc32cw, "crc32cw">;
2541def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">;
2542
2543// v8.1 atomic CAS
2544defm CAS   : CompareAndSwap<0, 0, "">;
2545defm CASA  : CompareAndSwap<1, 0, "a">;
2546defm CASL  : CompareAndSwap<0, 1, "l">;
2547defm CASAL : CompareAndSwap<1, 1, "al">;
2548
2549// v8.1 atomic CASP
2550defm CASP   : CompareAndSwapPair<0, 0, "">;
2551defm CASPA  : CompareAndSwapPair<1, 0, "a">;
2552defm CASPL  : CompareAndSwapPair<0, 1, "l">;
2553defm CASPAL : CompareAndSwapPair<1, 1, "al">;
2554
2555// v8.1 atomic SWP
2556defm SWP   : Swap<0, 0, "">;
2557defm SWPA  : Swap<1, 0, "a">;
2558defm SWPL  : Swap<0, 1, "l">;
2559defm SWPAL : Swap<1, 1, "al">;
2560
2561// v8.1 atomic LD<OP>(register). Performs load and then ST<OP>(register)
2562defm LDADD   : LDOPregister<0b000, "add", 0, 0, "">;
2563defm LDADDA  : LDOPregister<0b000, "add", 1, 0, "a">;
2564defm LDADDL  : LDOPregister<0b000, "add", 0, 1, "l">;
2565defm LDADDAL : LDOPregister<0b000, "add", 1, 1, "al">;
2566
2567defm LDCLR   : LDOPregister<0b001, "clr", 0, 0, "">;
2568defm LDCLRA  : LDOPregister<0b001, "clr", 1, 0, "a">;
2569defm LDCLRL  : LDOPregister<0b001, "clr", 0, 1, "l">;
2570defm LDCLRAL : LDOPregister<0b001, "clr", 1, 1, "al">;
2571
2572defm LDEOR   : LDOPregister<0b010, "eor", 0, 0, "">;
2573defm LDEORA  : LDOPregister<0b010, "eor", 1, 0, "a">;
2574defm LDEORL  : LDOPregister<0b010, "eor", 0, 1, "l">;
2575defm LDEORAL : LDOPregister<0b010, "eor", 1, 1, "al">;
2576
2577defm LDSET   : LDOPregister<0b011, "set", 0, 0, "">;
2578defm LDSETA  : LDOPregister<0b011, "set", 1, 0, "a">;
2579defm LDSETL  : LDOPregister<0b011, "set", 0, 1, "l">;
2580defm LDSETAL : LDOPregister<0b011, "set", 1, 1, "al">;
2581
2582defm LDSMAX   : LDOPregister<0b100, "smax", 0, 0, "">;
2583defm LDSMAXA  : LDOPregister<0b100, "smax", 1, 0, "a">;
2584defm LDSMAXL  : LDOPregister<0b100, "smax", 0, 1, "l">;
2585defm LDSMAXAL : LDOPregister<0b100, "smax", 1, 1, "al">;
2586
2587defm LDSMIN   : LDOPregister<0b101, "smin", 0, 0, "">;
2588defm LDSMINA  : LDOPregister<0b101, "smin", 1, 0, "a">;
2589defm LDSMINL  : LDOPregister<0b101, "smin", 0, 1, "l">;
2590defm LDSMINAL : LDOPregister<0b101, "smin", 1, 1, "al">;
2591
2592defm LDUMAX   : LDOPregister<0b110, "umax", 0, 0, "">;
2593defm LDUMAXA  : LDOPregister<0b110, "umax", 1, 0, "a">;
2594defm LDUMAXL  : LDOPregister<0b110, "umax", 0, 1, "l">;
2595defm LDUMAXAL : LDOPregister<0b110, "umax", 1, 1, "al">;
2596
2597defm LDUMIN   : LDOPregister<0b111, "umin", 0, 0, "">;
2598defm LDUMINA  : LDOPregister<0b111, "umin", 1, 0, "a">;
2599defm LDUMINL  : LDOPregister<0b111, "umin", 0, 1, "l">;
2600defm LDUMINAL : LDOPregister<0b111, "umin", 1, 1, "al">;
2601
2602// v8.1 atomic ST<OP>(register) as aliases to "LD<OP>(register) when Rt=xZR"
2603defm : STOPregister<"stadd","LDADD">; // STADDx
2604defm : STOPregister<"stclr","LDCLR">; // STCLRx
2605defm : STOPregister<"steor","LDEOR">; // STEORx
2606defm : STOPregister<"stset","LDSET">; // STSETx
2607defm : STOPregister<"stsmax","LDSMAX">;// STSMAXx
2608defm : STOPregister<"stsmin","LDSMIN">;// STSMINx
2609defm : STOPregister<"stumax","LDUMAX">;// STUMAXx
2610defm : STOPregister<"stumin","LDUMIN">;// STUMINx
2611
2612// v8.5 Memory Tagging Extension
2613let Predicates = [HasMTE] in {
2614
2615def IRG   : BaseTwoOperandRegReg<0b1, 0b0, 0b000100, GPR64sp, "irg",
2616                                 int_aarch64_irg, GPR64sp, GPR64>, Sched<[]>;
2617
2618def GMI   : BaseTwoOperandRegReg<0b1, 0b0, 0b000101, GPR64, "gmi",
2619                                 int_aarch64_gmi, GPR64sp>, Sched<[]> {
2620  let isNotDuplicable = 1;
2621}
2622def ADDG  : AddSubG<0, "addg", null_frag>;
2623def SUBG  : AddSubG<1, "subg", null_frag>;
2624
2625def : InstAlias<"irg $dst, $src", (IRG GPR64sp:$dst, GPR64sp:$src, XZR), 1>;
2626
2627def SUBP : SUBP<0, "subp", int_aarch64_subp>, Sched<[]>;
2628def SUBPS : SUBP<1, "subps", null_frag>, Sched<[]>{
2629  let Defs = [NZCV];
2630}
2631
2632def : InstAlias<"cmpp $lhs, $rhs", (SUBPS XZR, GPR64sp:$lhs, GPR64sp:$rhs), 0>;
2633
2634def LDG : MemTagLoad<"ldg", "\t$Rt, [$Rn, $offset]">;
2635
2636def : Pat<(int_aarch64_addg (am_indexedu6s128 GPR64sp:$Rn, uimm6s16:$imm6), imm0_15:$imm4),
2637          (ADDG GPR64sp:$Rn, imm0_63:$imm6, imm0_15:$imm4)>;
2638def : Pat<(int_aarch64_ldg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn,  simm9s16:$offset)),
2639          (LDG GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2640
2641def : InstAlias<"ldg $Rt, [$Rn]", (LDG GPR64:$Rt, GPR64sp:$Rn, 0), 1>;
2642
2643def LDGM : MemTagVector<1, "ldgm", "\t$Rt, [$Rn]",
2644                   (outs GPR64:$Rt), (ins GPR64sp:$Rn)>;
2645def STGM : MemTagVector<0, "stgm", "\t$Rt, [$Rn]",
2646                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)>;
2647def STZGM : MemTagVector<0, "stzgm", "\t$Rt, [$Rn]",
2648                   (outs), (ins GPR64:$Rt, GPR64sp:$Rn)> {
2649  let Inst{23} = 0;
2650}
2651
2652defm STG   : MemTagStore<0b00, "stg">;
2653defm STZG  : MemTagStore<0b01, "stzg">;
2654defm ST2G  : MemTagStore<0b10, "st2g">;
2655defm STZ2G : MemTagStore<0b11, "stz2g">;
2656
2657def : Pat<(AArch64stg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2658          (STGi $Rn, $Rm, $imm)>;
2659def : Pat<(AArch64stzg GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2660          (STZGi $Rn, $Rm, $imm)>;
2661def : Pat<(AArch64st2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2662          (ST2Gi $Rn, $Rm, $imm)>;
2663def : Pat<(AArch64stz2g GPR64sp:$Rn, (am_indexeds9s128 GPR64sp:$Rm, simm9s16:$imm)),
2664          (STZ2Gi $Rn, $Rm, $imm)>;
2665
2666defm STGP     : StorePairOffset <0b01, 0, GPR64z, simm7s16, "stgp">;
2667def  STGPpre  : StorePairPreIdx <0b01, 0, GPR64z, simm7s16, "stgp">;
2668def  STGPpost : StorePairPostIdx<0b01, 0, GPR64z, simm7s16, "stgp">;
2669
2670def : Pat<(int_aarch64_stg GPR64:$Rt, (am_indexeds9s128 GPR64sp:$Rn, simm9s16:$offset)),
2671          (STGi GPR64:$Rt, GPR64sp:$Rn,  simm9s16:$offset)>;
2672
2673def : Pat<(int_aarch64_stgp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$imm), GPR64:$Rt, GPR64:$Rt2),
2674          (STGPi $Rt, $Rt2, $Rn, $imm)>;
2675
2676def IRGstack
2677    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rsp, GPR64:$Rm), []>,
2678      Sched<[]>;
2679def TAGPstack
2680    : Pseudo<(outs GPR64sp:$Rd), (ins GPR64sp:$Rn, uimm6s16:$imm6, GPR64sp:$Rm, imm0_15:$imm4), []>,
2681      Sched<[]>;
2682
2683// Explicit SP in the first operand prevents ShrinkWrap optimization
2684// from leaving this instruction out of the stack frame. When IRGstack
2685// is transformed into IRG, this operand is replaced with the actual
2686// register / expression for the tagged base pointer of the current function.
2687def : Pat<(int_aarch64_irg_sp i64:$Rm), (IRGstack SP, i64:$Rm)>;
2688
2689// Large STG to be expanded into a loop. $sz is the size, $Rn is start address.
2690// $Rn_wback is one past the end of the range. $Rm is the loop counter.
2691let isCodeGenOnly=1, mayStore=1, Defs=[NZCV] in {
2692def STGloop_wback
2693    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2694             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2695      Sched<[WriteAdr, WriteST]>;
2696
2697def STZGloop_wback
2698    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn_wback), (ins i64imm:$sz, GPR64sp:$Rn),
2699             [], "$Rn = $Rn_wback,@earlyclobber $Rn_wback,@earlyclobber $Rm" >,
2700      Sched<[WriteAdr, WriteST]>;
2701
2702// A variant of the above where $Rn2 is an independent register not tied to the input register $Rn.
2703// Their purpose is to use a FrameIndex operand as $Rn (which of course can not be written back).
2704def STGloop
2705    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2706             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2707      Sched<[WriteAdr, WriteST]>;
2708
2709def STZGloop
2710    : Pseudo<(outs GPR64common:$Rm, GPR64sp:$Rn2), (ins i64imm:$sz, GPR64sp:$Rn),
2711             [], "@earlyclobber $Rn2,@earlyclobber $Rm" >,
2712      Sched<[WriteAdr, WriteST]>;
2713}
2714
2715} // Predicates = [HasMTE]
2716
2717//===----------------------------------------------------------------------===//
2718// Logical instructions.
2719//===----------------------------------------------------------------------===//
2720
2721// (immediate)
2722defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">;
2723defm AND  : LogicalImm<0b00, "and", and, "bic">;
2724defm EOR  : LogicalImm<0b10, "eor", xor, "eon">;
2725defm ORR  : LogicalImm<0b01, "orr", or, "orn">;
2726
2727// FIXME: these aliases *are* canonical sometimes (when movz can't be
2728// used). Actually, it seems to be working right now, but putting logical_immXX
2729// here is a bit dodgy on the AsmParser side too.
2730def : InstAlias<"mov $dst, $imm", (ORRWri GPR32sp:$dst, WZR,
2731                                          logical_imm32:$imm), 0>;
2732def : InstAlias<"mov $dst, $imm", (ORRXri GPR64sp:$dst, XZR,
2733                                          logical_imm64:$imm), 0>;
2734
2735
2736// (register)
2737defm ANDS : LogicalRegS<0b11, 0, "ands", AArch64and_flag>;
2738defm BICS : LogicalRegS<0b11, 1, "bics",
2739                        BinOpFrag<(AArch64and_flag node:$LHS, (not node:$RHS))>>;
2740defm AND  : LogicalReg<0b00, 0, "and", and>;
2741defm BIC  : LogicalReg<0b00, 1, "bic",
2742                       BinOpFrag<(and node:$LHS, (not node:$RHS))>, 3>;
2743defm EON  : LogicalReg<0b10, 1, "eon",
2744                       BinOpFrag<(not (xor node:$LHS, node:$RHS))>>;
2745defm EOR  : LogicalReg<0b10, 0, "eor", xor>;
2746defm ORN  : LogicalReg<0b01, 1, "orn",
2747                       BinOpFrag<(or node:$LHS, (not node:$RHS))>>;
2748defm ORR  : LogicalReg<0b01, 0, "orr", or>;
2749
2750def : InstAlias<"mov $dst, $src", (ORRWrs GPR32:$dst, WZR, GPR32:$src, 0), 2>;
2751def : InstAlias<"mov $dst, $src", (ORRXrs GPR64:$dst, XZR, GPR64:$src, 0), 2>;
2752
2753def : InstAlias<"mvn $Wd, $Wm", (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, 0), 3>;
2754def : InstAlias<"mvn $Xd, $Xm", (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, 0), 3>;
2755
2756def : InstAlias<"mvn $Wd, $Wm$sh",
2757                (ORNWrs GPR32:$Wd, WZR, GPR32:$Wm, logical_shift32:$sh), 2>;
2758def : InstAlias<"mvn $Xd, $Xm$sh",
2759                (ORNXrs GPR64:$Xd, XZR, GPR64:$Xm, logical_shift64:$sh), 2>;
2760
2761def : InstAlias<"tst $src1, $src2",
2762                (ANDSWri WZR, GPR32:$src1, logical_imm32:$src2), 2>;
2763def : InstAlias<"tst $src1, $src2",
2764                (ANDSXri XZR, GPR64:$src1, logical_imm64:$src2), 2>;
2765
2766def : InstAlias<"tst $src1, $src2",
2767                        (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, 0), 3>;
2768def : InstAlias<"tst $src1, $src2",
2769                        (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, 0), 3>;
2770
2771def : InstAlias<"tst $src1, $src2$sh",
2772               (ANDSWrs WZR, GPR32:$src1, GPR32:$src2, logical_shift32:$sh), 2>;
2773def : InstAlias<"tst $src1, $src2$sh",
2774               (ANDSXrs XZR, GPR64:$src1, GPR64:$src2, logical_shift64:$sh), 2>;
2775
2776
2777def : Pat<(not GPR32:$Wm), (ORNWrr WZR, GPR32:$Wm)>;
2778def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>;
2779
2780// Emit (and 0xFFFFFFFF) as a ORRWrr move which may be eliminated.
2781let AddedComplexity = 6 in
2782def : Pat<(i64 (and GPR64:$Rn, 0xffffffff)),
2783          (SUBREG_TO_REG (i64 0), (ORRWrr WZR, (EXTRACT_SUBREG GPR64:$Rn, sub_32)), sub_32)>;
2784
2785
2786//===----------------------------------------------------------------------===//
2787// One operand data processing instructions.
2788//===----------------------------------------------------------------------===//
2789
2790defm CLS    : OneOperandData<0b000101, "cls">;
2791defm CLZ    : OneOperandData<0b000100, "clz", ctlz>;
2792defm RBIT   : OneOperandData<0b000000, "rbit", bitreverse>;
2793
2794def  REV16Wr : OneWRegData<0b000001, "rev16",
2795                                     UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>;
2796def  REV16Xr : OneXRegData<0b000001, "rev16", null_frag>;
2797
2798def : Pat<(cttz GPR32:$Rn),
2799          (CLZWr (RBITWr GPR32:$Rn))>;
2800def : Pat<(cttz GPR64:$Rn),
2801          (CLZXr (RBITXr GPR64:$Rn))>;
2802def : Pat<(ctlz (or (shl (xor (sra GPR32:$Rn, (i64 31)), GPR32:$Rn), (i64 1)),
2803                (i32 1))),
2804          (CLSWr GPR32:$Rn)>;
2805def : Pat<(ctlz (or (shl (xor (sra GPR64:$Rn, (i64 63)), GPR64:$Rn), (i64 1)),
2806                (i64 1))),
2807          (CLSXr GPR64:$Rn)>;
2808def : Pat<(int_aarch64_cls GPR32:$Rn), (CLSWr GPR32:$Rn)>;
2809def : Pat<(int_aarch64_cls64 GPR64:$Rm), (EXTRACT_SUBREG (CLSXr GPR64:$Rm), sub_32)>;
2810
2811// Unlike the other one operand instructions, the instructions with the "rev"
2812// mnemonic do *not* just different in the size bit, but actually use different
2813// opcode bits for the different sizes.
2814def REVWr   : OneWRegData<0b000010, "rev", bswap>;
2815def REVXr   : OneXRegData<0b000011, "rev", bswap>;
2816def REV32Xr : OneXRegData<0b000010, "rev32",
2817                                    UnOpFrag<(rotr (bswap node:$LHS), (i64 32))>>;
2818
2819def : InstAlias<"rev64 $Rd, $Rn", (REVXr GPR64:$Rd, GPR64:$Rn), 0>;
2820
2821// The bswap commutes with the rotr so we want a pattern for both possible
2822// orders.
2823def : Pat<(bswap (rotr GPR32:$Rn, (i64 16))), (REV16Wr GPR32:$Rn)>;
2824def : Pat<(bswap (rotr GPR64:$Rn, (i64 32))), (REV32Xr GPR64:$Rn)>;
2825
2826// Match (srl (bswap x), C) -> revC if the upper bswap bits are known zero.
2827def : Pat<(srl (bswap top16Zero:$Rn), (i64 16)), (REV16Wr GPR32:$Rn)>;
2828def : Pat<(srl (bswap top32Zero:$Rn), (i64 32)), (REV32Xr GPR64:$Rn)>;
2829
2830def : Pat<(or (and (srl GPR64:$Rn, (i64 8)), (i64 0x00ff00ff00ff00ff)),
2831              (and (shl GPR64:$Rn, (i64 8)), (i64 0xff00ff00ff00ff00))),
2832          (REV16Xr GPR64:$Rn)>;
2833
2834//===----------------------------------------------------------------------===//
2835// Bitfield immediate extraction instruction.
2836//===----------------------------------------------------------------------===//
2837let hasSideEffects = 0 in
2838defm EXTR : ExtractImm<"extr">;
2839def : InstAlias<"ror $dst, $src, $shift",
2840            (EXTRWrri GPR32:$dst, GPR32:$src, GPR32:$src, imm0_31:$shift)>;
2841def : InstAlias<"ror $dst, $src, $shift",
2842            (EXTRXrri GPR64:$dst, GPR64:$src, GPR64:$src, imm0_63:$shift)>;
2843
2844def : Pat<(rotr GPR32:$Rn, (i64 imm0_31:$imm)),
2845          (EXTRWrri GPR32:$Rn, GPR32:$Rn, imm0_31:$imm)>;
2846def : Pat<(rotr GPR64:$Rn, (i64 imm0_63:$imm)),
2847          (EXTRXrri GPR64:$Rn, GPR64:$Rn, imm0_63:$imm)>;
2848
2849//===----------------------------------------------------------------------===//
2850// Other bitfield immediate instructions.
2851//===----------------------------------------------------------------------===//
2852let hasSideEffects = 0 in {
2853defm BFM  : BitfieldImmWith2RegArgs<0b01, "bfm">;
2854defm SBFM : BitfieldImm<0b00, "sbfm">;
2855defm UBFM : BitfieldImm<0b10, "ubfm">;
2856}
2857
2858def i32shift_a : Operand<i64>, SDNodeXForm<imm, [{
2859  uint64_t enc = (32 - N->getZExtValue()) & 0x1f;
2860  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2861}]>;
2862
2863def i32shift_b : Operand<i64>, SDNodeXForm<imm, [{
2864  uint64_t enc = 31 - N->getZExtValue();
2865  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2866}]>;
2867
2868// min(7, 31 - shift_amt)
2869def i32shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2870  uint64_t enc = 31 - N->getZExtValue();
2871  enc = enc > 7 ? 7 : enc;
2872  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2873}]>;
2874
2875// min(15, 31 - shift_amt)
2876def i32shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2877  uint64_t enc = 31 - N->getZExtValue();
2878  enc = enc > 15 ? 15 : enc;
2879  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2880}]>;
2881
2882def i64shift_a : Operand<i64>, SDNodeXForm<imm, [{
2883  uint64_t enc = (64 - N->getZExtValue()) & 0x3f;
2884  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2885}]>;
2886
2887def i64shift_b : Operand<i64>, SDNodeXForm<imm, [{
2888  uint64_t enc = 63 - N->getZExtValue();
2889  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2890}]>;
2891
2892// min(7, 63 - shift_amt)
2893def i64shift_sext_i8 : Operand<i64>, SDNodeXForm<imm, [{
2894  uint64_t enc = 63 - N->getZExtValue();
2895  enc = enc > 7 ? 7 : enc;
2896  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2897}]>;
2898
2899// min(15, 63 - shift_amt)
2900def i64shift_sext_i16 : Operand<i64>, SDNodeXForm<imm, [{
2901  uint64_t enc = 63 - N->getZExtValue();
2902  enc = enc > 15 ? 15 : enc;
2903  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2904}]>;
2905
2906// min(31, 63 - shift_amt)
2907def i64shift_sext_i32 : Operand<i64>, SDNodeXForm<imm, [{
2908  uint64_t enc = 63 - N->getZExtValue();
2909  enc = enc > 31 ? 31 : enc;
2910  return CurDAG->getTargetConstant(enc, SDLoc(N), MVT::i64);
2911}]>;
2912
2913def : Pat<(shl GPR32:$Rn, (i64 imm0_31:$imm)),
2914          (UBFMWri GPR32:$Rn, (i64 (i32shift_a imm0_31:$imm)),
2915                              (i64 (i32shift_b imm0_31:$imm)))>;
2916def : Pat<(shl GPR64:$Rn, (i64 imm0_63:$imm)),
2917          (UBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
2918                              (i64 (i64shift_b imm0_63:$imm)))>;
2919
2920let AddedComplexity = 10 in {
2921def : Pat<(sra GPR32:$Rn, (i64 imm0_31:$imm)),
2922          (SBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2923def : Pat<(sra GPR64:$Rn, (i64 imm0_63:$imm)),
2924          (SBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2925}
2926
2927def : InstAlias<"asr $dst, $src, $shift",
2928                (SBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2929def : InstAlias<"asr $dst, $src, $shift",
2930                (SBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2931def : InstAlias<"sxtb $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2932def : InstAlias<"sxtb $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2933def : InstAlias<"sxth $dst, $src", (SBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2934def : InstAlias<"sxth $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2935def : InstAlias<"sxtw $dst, $src", (SBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2936
2937def : Pat<(srl GPR32:$Rn, (i64 imm0_31:$imm)),
2938          (UBFMWri GPR32:$Rn, imm0_31:$imm, 31)>;
2939def : Pat<(srl GPR64:$Rn, (i64 imm0_63:$imm)),
2940          (UBFMXri GPR64:$Rn, imm0_63:$imm, 63)>;
2941
2942def : InstAlias<"lsr $dst, $src, $shift",
2943                (UBFMWri GPR32:$dst, GPR32:$src, imm0_31:$shift, 31)>;
2944def : InstAlias<"lsr $dst, $src, $shift",
2945                (UBFMXri GPR64:$dst, GPR64:$src, imm0_63:$shift, 63)>;
2946def : InstAlias<"uxtb $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 7)>;
2947def : InstAlias<"uxtb $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 7)>;
2948def : InstAlias<"uxth $dst, $src", (UBFMWri GPR32:$dst, GPR32:$src, 0, 15)>;
2949def : InstAlias<"uxth $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 15)>;
2950def : InstAlias<"uxtw $dst, $src", (UBFMXri GPR64:$dst, GPR64:$src, 0, 31)>;
2951
2952//===----------------------------------------------------------------------===//
2953// Conditional comparison instructions.
2954//===----------------------------------------------------------------------===//
2955defm CCMN : CondComparison<0, "ccmn", AArch64ccmn>;
2956defm CCMP : CondComparison<1, "ccmp", AArch64ccmp>;
2957
2958//===----------------------------------------------------------------------===//
2959// Conditional select instructions.
2960//===----------------------------------------------------------------------===//
2961defm CSEL  : CondSelect<0, 0b00, "csel">;
2962
2963def inc : PatFrag<(ops node:$in), (add_and_or_is_add node:$in, 1)>;
2964defm CSINC : CondSelectOp<0, 0b01, "csinc", inc>;
2965defm CSINV : CondSelectOp<1, 0b00, "csinv", not>;
2966defm CSNEG : CondSelectOp<1, 0b01, "csneg", ineg>;
2967
2968def : Pat<(AArch64csinv GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2969          (CSINVWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2970def : Pat<(AArch64csinv GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2971          (CSINVXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2972def : Pat<(AArch64csneg GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2973          (CSNEGWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2974def : Pat<(AArch64csneg GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2975          (CSNEGXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2976def : Pat<(AArch64csinc GPR32:$tval, GPR32:$fval, (i32 imm:$cc), NZCV),
2977          (CSINCWr GPR32:$tval, GPR32:$fval, (i32 imm:$cc))>;
2978def : Pat<(AArch64csinc GPR64:$tval, GPR64:$fval, (i32 imm:$cc), NZCV),
2979          (CSINCXr GPR64:$tval, GPR64:$fval, (i32 imm:$cc))>;
2980
2981def : Pat<(AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV),
2982          (CSINCWr WZR, WZR, (i32 imm:$cc))>;
2983def : Pat<(AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV),
2984          (CSINCXr XZR, XZR, (i32 imm:$cc))>;
2985def : Pat<(AArch64csel GPR32:$tval, (i32 1), (i32 imm:$cc), NZCV),
2986          (CSINCWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2987def : Pat<(AArch64csel GPR64:$tval, (i64 1), (i32 imm:$cc), NZCV),
2988          (CSINCXr GPR64:$tval, XZR, (i32 imm:$cc))>;
2989def : Pat<(AArch64csel (i32 1), GPR32:$fval, (i32 imm:$cc), NZCV),
2990          (CSINCWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2991def : Pat<(AArch64csel (i64 1), GPR64:$fval, (i32 imm:$cc), NZCV),
2992          (CSINCXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
2993def : Pat<(AArch64csel (i32 0), (i32 -1), (i32 imm:$cc), NZCV),
2994          (CSINVWr WZR, WZR, (i32 imm:$cc))>;
2995def : Pat<(AArch64csel (i64 0), (i64 -1), (i32 imm:$cc), NZCV),
2996          (CSINVXr XZR, XZR, (i32 imm:$cc))>;
2997def : Pat<(AArch64csel GPR32:$tval, (i32 -1), (i32 imm:$cc), NZCV),
2998          (CSINVWr GPR32:$tval, WZR, (i32 imm:$cc))>;
2999def : Pat<(AArch64csel GPR64:$tval, (i64 -1), (i32 imm:$cc), NZCV),
3000          (CSINVXr GPR64:$tval, XZR, (i32 imm:$cc))>;
3001def : Pat<(AArch64csel (i32 -1), GPR32:$fval, (i32 imm:$cc), NZCV),
3002          (CSINVWr GPR32:$fval, WZR, (i32 (inv_cond_XFORM imm:$cc)))>;
3003def : Pat<(AArch64csel (i64 -1), GPR64:$fval, (i32 imm:$cc), NZCV),
3004          (CSINVXr GPR64:$fval, XZR, (i32 (inv_cond_XFORM imm:$cc)))>;
3005
3006def : Pat<(add_and_or_is_add GPR32:$val, (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3007          (CSINCWr GPR32:$val, GPR32:$val, (i32 imm:$cc))>;
3008def : Pat<(add_and_or_is_add GPR64:$val, (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3009          (CSINCXr GPR64:$val, GPR64:$val, (i32 imm:$cc))>;
3010
3011def : Pat<(or (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3012          (CSINCWr GPR32:$val, WZR, imm:$cc)>;
3013def : Pat<(or (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
3014          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
3015def : Pat<(or (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3016          (CSINCXr GPR64:$val, XZR, imm:$cc)>;
3017
3018def : Pat<(and (topbitsallzero32:$val), (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV)),
3019          (CSELWr WZR, GPR32:$val, imm:$cc)>;
3020def : Pat<(and (topbitsallzero64:$val), (AArch64csel (i64 0), (i64 1), (i32 imm:$cc), NZCV)),
3021          (CSELXr XZR, GPR64:$val, imm:$cc)>;
3022def : Pat<(and (topbitsallzero64:$val), (zext (AArch64csel (i32 0), (i32 1), (i32 imm:$cc), NZCV))),
3023          (CSELXr XZR, GPR64:$val, imm:$cc)>;
3024
3025// The inverse of the condition code from the alias instruction is what is used
3026// in the aliased instruction. The parser all ready inverts the condition code
3027// for these aliases.
3028def : InstAlias<"cset $dst, $cc",
3029                (CSINCWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
3030def : InstAlias<"cset $dst, $cc",
3031                (CSINCXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
3032
3033def : InstAlias<"csetm $dst, $cc",
3034                (CSINVWr GPR32:$dst, WZR, WZR, inv_ccode:$cc)>;
3035def : InstAlias<"csetm $dst, $cc",
3036                (CSINVXr GPR64:$dst, XZR, XZR, inv_ccode:$cc)>;
3037
3038def : InstAlias<"cinc $dst, $src, $cc",
3039                (CSINCWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
3040def : InstAlias<"cinc $dst, $src, $cc",
3041                (CSINCXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
3042
3043def : InstAlias<"cinv $dst, $src, $cc",
3044                (CSINVWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
3045def : InstAlias<"cinv $dst, $src, $cc",
3046                (CSINVXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
3047
3048def : InstAlias<"cneg $dst, $src, $cc",
3049                (CSNEGWr GPR32:$dst, GPR32:$src, GPR32:$src, inv_ccode:$cc)>;
3050def : InstAlias<"cneg $dst, $src, $cc",
3051                (CSNEGXr GPR64:$dst, GPR64:$src, GPR64:$src, inv_ccode:$cc)>;
3052
3053//===----------------------------------------------------------------------===//
3054// PC-relative instructions.
3055//===----------------------------------------------------------------------===//
3056let isReMaterializable = 1 in {
3057let hasSideEffects = 0, mayStore = 0, mayLoad = 0 in {
3058def ADR  : ADRI<0, "adr", adrlabel,
3059                [(set GPR64:$Xd, (AArch64adr tglobaladdr:$label))]>;
3060} // hasSideEffects = 0
3061
3062def ADRP : ADRI<1, "adrp", adrplabel,
3063                [(set GPR64:$Xd, (AArch64adrp tglobaladdr:$label))]>;
3064} // isReMaterializable = 1
3065
3066// page address of a constant pool entry, block address
3067def : Pat<(AArch64adr tconstpool:$cp), (ADR tconstpool:$cp)>;
3068def : Pat<(AArch64adr tblockaddress:$cp), (ADR tblockaddress:$cp)>;
3069def : Pat<(AArch64adr texternalsym:$sym), (ADR texternalsym:$sym)>;
3070def : Pat<(AArch64adr tjumptable:$sym), (ADR tjumptable:$sym)>;
3071def : Pat<(AArch64adrp tconstpool:$cp), (ADRP tconstpool:$cp)>;
3072def : Pat<(AArch64adrp tblockaddress:$cp), (ADRP tblockaddress:$cp)>;
3073def : Pat<(AArch64adrp texternalsym:$sym), (ADRP texternalsym:$sym)>;
3074
3075//===----------------------------------------------------------------------===//
3076// Unconditional branch (register) instructions.
3077//===----------------------------------------------------------------------===//
3078
3079let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
3080def RET  : BranchReg<0b0010, "ret", []>;
3081def DRPS : SpecialReturn<0b0101, "drps">;
3082def ERET : SpecialReturn<0b0100, "eret">;
3083} // isReturn = 1, isTerminator = 1, isBarrier = 1
3084
3085// Default to the LR register.
3086def : InstAlias<"ret", (RET LR)>;
3087
3088let isCall = 1, Defs = [LR], Uses = [SP] in {
3089  def BLR : BranchReg<0b0001, "blr", []>;
3090  def BLRNoIP : Pseudo<(outs), (ins GPR64noip:$Rn), []>,
3091                Sched<[WriteBrReg]>,
3092                PseudoInstExpansion<(BLR GPR64:$Rn)>;
3093  def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>,
3094                     Sched<[WriteBrReg]>;
3095  def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>,
3096                Sched<[WriteBrReg]>;
3097  let Uses = [X16, SP] in
3098  def BLR_X16 : Pseudo<(outs), (ins), [(AArch64call_arm64ec_to_x64 X16)]>,
3099                Sched<[WriteBrReg]>,
3100                PseudoInstExpansion<(BLR X16)>;
3101} // isCall
3102
3103def : Pat<(AArch64call GPR64:$Rn),
3104          (BLR GPR64:$Rn)>,
3105      Requires<[NoSLSBLRMitigation]>;
3106def : Pat<(AArch64call GPR64noip:$Rn),
3107          (BLRNoIP GPR64noip:$Rn)>,
3108      Requires<[SLSBLRMitigation]>;
3109
3110def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn),
3111          (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>,
3112      Requires<[NoSLSBLRMitigation]>;
3113
3114def : Pat<(AArch64call_bti GPR64:$Rn),
3115          (BLR_BTI GPR64:$Rn)>,
3116      Requires<[NoSLSBLRMitigation]>;
3117def : Pat<(AArch64call_bti GPR64noip:$Rn),
3118          (BLR_BTI GPR64noip:$Rn)>,
3119      Requires<[SLSBLRMitigation]>;
3120
3121let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
3122def BR  : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>;
3123} // isBranch, isTerminator, isBarrier, isIndirectBranch
3124
3125// Create a separate pseudo-instruction for codegen to use so that we don't
3126// flag lr as used in every function. It'll be restored before the RET by the
3127// epilogue if it's legitimately used.
3128def RET_ReallyLR : Pseudo<(outs), (ins), [(AArch64retglue)]>,
3129                   Sched<[WriteBrReg]> {
3130  let isTerminator = 1;
3131  let isBarrier = 1;
3132  let isReturn = 1;
3133}
3134
3135// This is a directive-like pseudo-instruction. The purpose is to insert an
3136// R_AARCH64_TLSDESC_CALL relocation at the offset of the following instruction
3137// (which in the usual case is a BLR).
3138let hasSideEffects = 1 in
3139def TLSDESCCALL : Pseudo<(outs), (ins i64imm:$sym), []>, Sched<[]> {
3140  let AsmString = ".tlsdesccall $sym";
3141}
3142
3143// Pseudo instruction to tell the streamer to emit a 'B' character into the
3144// augmentation string.
3145def EMITBKEY : Pseudo<(outs), (ins), []>, Sched<[]> {}
3146
3147// Pseudo instruction to tell the streamer to emit a 'G' character into the
3148// augmentation string.
3149def EMITMTETAGGED : Pseudo<(outs), (ins), []>, Sched<[]> {}
3150
3151// FIXME: maybe the scratch register used shouldn't be fixed to X1?
3152// FIXME: can "hasSideEffects be dropped?
3153// This gets lowered to an instruction sequence which takes 16 bytes
3154let isCall = 1, Defs = [NZCV, LR, X0, X1], hasSideEffects = 1, Size = 16,
3155    isCodeGenOnly = 1 in
3156def TLSDESC_CALLSEQ
3157    : Pseudo<(outs), (ins i64imm:$sym),
3158             [(AArch64tlsdesc_callseq tglobaltlsaddr:$sym)]>,
3159      Sched<[WriteI, WriteLD, WriteI, WriteBrReg]>;
3160def : Pat<(AArch64tlsdesc_callseq texternalsym:$sym),
3161          (TLSDESC_CALLSEQ texternalsym:$sym)>;
3162
3163//===----------------------------------------------------------------------===//
3164// Conditional branch (immediate) instruction.
3165//===----------------------------------------------------------------------===//
3166def Bcc : BranchCond<0, "b">;
3167
3168// Armv8.8-A variant form which hints to the branch predictor that
3169// this branch is very likely to go the same way nearly all the time
3170// (even though it is not known at compile time _which_ way that is).
3171def BCcc : BranchCond<1, "bc">, Requires<[HasHBC]>;
3172
3173//===----------------------------------------------------------------------===//
3174// Compare-and-branch instructions.
3175//===----------------------------------------------------------------------===//
3176defm CBZ  : CmpBranch<0, "cbz", AArch64cbz>;
3177defm CBNZ : CmpBranch<1, "cbnz", AArch64cbnz>;
3178
3179//===----------------------------------------------------------------------===//
3180// Test-bit-and-branch instructions.
3181//===----------------------------------------------------------------------===//
3182defm TBZ  : TestBranch<0, "tbz", AArch64tbz>;
3183defm TBNZ : TestBranch<1, "tbnz", AArch64tbnz>;
3184
3185//===----------------------------------------------------------------------===//
3186// Unconditional branch (immediate) instructions.
3187//===----------------------------------------------------------------------===//
3188let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
3189def B  : BranchImm<0, "b", [(br bb:$addr)]>;
3190} // isBranch, isTerminator, isBarrier
3191
3192let isCall = 1, Defs = [LR], Uses = [SP] in {
3193def BL : CallImm<1, "bl", [(AArch64call tglobaladdr:$addr)]>;
3194} // isCall
3195def : Pat<(AArch64call texternalsym:$func), (BL texternalsym:$func)>;
3196
3197//===----------------------------------------------------------------------===//
3198// Exception generation instructions.
3199//===----------------------------------------------------------------------===//
3200let isTrap = 1 in {
3201def BRK   : ExceptionGeneration<0b001, 0b00, "brk",
3202                                [(int_aarch64_break timm32_0_65535:$imm)]>;
3203}
3204def DCPS1 : ExceptionGeneration<0b101, 0b01, "dcps1">;
3205def DCPS2 : ExceptionGeneration<0b101, 0b10, "dcps2">;
3206def DCPS3 : ExceptionGeneration<0b101, 0b11, "dcps3">, Requires<[HasEL3]>;
3207def HLT   : ExceptionGeneration<0b010, 0b00, "hlt",
3208                                [(int_aarch64_hlt timm32_0_65535:$imm)]>;
3209def HVC   : ExceptionGeneration<0b000, 0b10, "hvc">;
3210def SMC   : ExceptionGeneration<0b000, 0b11, "smc">, Requires<[HasEL3]>;
3211def SVC   : ExceptionGeneration<0b000, 0b01, "svc">;
3212
3213// DCPSn defaults to an immediate operand of zero if unspecified.
3214def : InstAlias<"dcps1", (DCPS1 0)>;
3215def : InstAlias<"dcps2", (DCPS2 0)>;
3216def : InstAlias<"dcps3", (DCPS3 0)>, Requires<[HasEL3]>;
3217
3218def UDF : UDFType<0, "udf">;
3219
3220//===----------------------------------------------------------------------===//
3221// Load instructions.
3222//===----------------------------------------------------------------------===//
3223
3224// Pair (indexed, offset)
3225defm LDPW : LoadPairOffset<0b00, 0, GPR32z, simm7s4, "ldp">;
3226defm LDPX : LoadPairOffset<0b10, 0, GPR64z, simm7s8, "ldp">;
3227let Predicates = [HasFPARMv8] in {
3228defm LDPS : LoadPairOffset<0b00, 1, FPR32Op, simm7s4, "ldp">;
3229defm LDPD : LoadPairOffset<0b01, 1, FPR64Op, simm7s8, "ldp">;
3230defm LDPQ : LoadPairOffset<0b10, 1, FPR128Op, simm7s16, "ldp">;
3231}
3232
3233defm LDPSW : LoadPairOffset<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3234
3235// Pair (pre-indexed)
3236def LDPWpre : LoadPairPreIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
3237def LDPXpre : LoadPairPreIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
3238let Predicates = [HasFPARMv8] in {
3239def LDPSpre : LoadPairPreIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3240def LDPDpre : LoadPairPreIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3241def LDPQpre : LoadPairPreIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3242}
3243
3244def LDPSWpre : LoadPairPreIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3245
3246// Pair (post-indexed)
3247def LDPWpost : LoadPairPostIdx<0b00, 0, GPR32z, simm7s4, "ldp">;
3248def LDPXpost : LoadPairPostIdx<0b10, 0, GPR64z, simm7s8, "ldp">;
3249let Predicates = [HasFPARMv8] in {
3250def LDPSpost : LoadPairPostIdx<0b00, 1, FPR32Op, simm7s4, "ldp">;
3251def LDPDpost : LoadPairPostIdx<0b01, 1, FPR64Op, simm7s8, "ldp">;
3252def LDPQpost : LoadPairPostIdx<0b10, 1, FPR128Op, simm7s16, "ldp">;
3253}
3254
3255def LDPSWpost : LoadPairPostIdx<0b01, 0, GPR64z, simm7s4, "ldpsw">;
3256
3257
3258// Pair (no allocate)
3259defm LDNPW : LoadPairNoAlloc<0b00, 0, GPR32z, simm7s4, "ldnp">;
3260defm LDNPX : LoadPairNoAlloc<0b10, 0, GPR64z, simm7s8, "ldnp">;
3261let Predicates = [HasFPARMv8] in {
3262defm LDNPS : LoadPairNoAlloc<0b00, 1, FPR32Op, simm7s4, "ldnp">;
3263defm LDNPD : LoadPairNoAlloc<0b01, 1, FPR64Op, simm7s8, "ldnp">;
3264defm LDNPQ : LoadPairNoAlloc<0b10, 1, FPR128Op, simm7s16, "ldnp">;
3265}
3266
3267def : Pat<(AArch64ldp (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
3268          (LDPXi GPR64sp:$Rn, simm7s8:$offset)>;
3269
3270def : Pat<(AArch64ldnp (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
3271          (LDNPQi GPR64sp:$Rn, simm7s16:$offset)>;
3272//---
3273// (register offset)
3274//---
3275
3276// Integer
3277defm LDRBB : Load8RO<0b00,  0, 0b01, GPR32, "ldrb", i32, zextloadi8>;
3278defm LDRHH : Load16RO<0b01, 0, 0b01, GPR32, "ldrh", i32, zextloadi16>;
3279defm LDRW  : Load32RO<0b10, 0, 0b01, GPR32, "ldr", i32, load>;
3280defm LDRX  : Load64RO<0b11, 0, 0b01, GPR64, "ldr", i64, load>;
3281
3282// Floating-point
3283let Predicates = [HasFPARMv8] in {
3284defm LDRB : Load8RO<0b00,   1, 0b01, FPR8Op,   "ldr", i8, load>;
3285defm LDRH : Load16RO<0b01,  1, 0b01, FPR16Op,  "ldr", f16, load>;
3286defm LDRS : Load32RO<0b10,  1, 0b01, FPR32Op,  "ldr", f32, load>;
3287defm LDRD : Load64RO<0b11,  1, 0b01, FPR64Op,  "ldr", f64, load>;
3288defm LDRQ : Load128RO<0b00, 1, 0b11, FPR128Op, "ldr", f128, load>;
3289}
3290
3291// Load sign-extended half-word
3292defm LDRSHW : Load16RO<0b01, 0, 0b11, GPR32, "ldrsh", i32, sextloadi16>;
3293defm LDRSHX : Load16RO<0b01, 0, 0b10, GPR64, "ldrsh", i64, sextloadi16>;
3294
3295// Load sign-extended byte
3296defm LDRSBW : Load8RO<0b00, 0, 0b11, GPR32, "ldrsb", i32, sextloadi8>;
3297defm LDRSBX : Load8RO<0b00, 0, 0b10, GPR64, "ldrsb", i64, sextloadi8>;
3298
3299// Load sign-extended word
3300defm LDRSW  : Load32RO<0b10, 0, 0b10, GPR64, "ldrsw", i64, sextloadi32>;
3301
3302// Pre-fetch.
3303defm PRFM : PrefetchRO<0b11, 0, 0b10, "prfm">;
3304
3305// For regular load, we do not have any alignment requirement.
3306// Thus, it is safe to directly map the vector loads with interesting
3307// addressing modes.
3308// FIXME: We could do the same for bitconvert to floating point vectors.
3309multiclass ScalToVecROLoadPat<ROAddrMode ro, SDPatternOperator loadop,
3310                              ValueType ScalTy, ValueType VecTy,
3311                              Instruction LOADW, Instruction LOADX,
3312                              SubRegIndex sub> {
3313  def : Pat<(VecTy (scalar_to_vector (ScalTy
3314              (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset))))),
3315            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3316                           (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$offset),
3317                           sub)>;
3318
3319  def : Pat<(VecTy (scalar_to_vector (ScalTy
3320              (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset))))),
3321            (INSERT_SUBREG (VecTy (IMPLICIT_DEF)),
3322                           (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$offset),
3323                           sub)>;
3324}
3325
3326let AddedComplexity = 10 in {
3327defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v8i8,  LDRBroW, LDRBroX, bsub>;
3328defm : ScalToVecROLoadPat<ro8,  extloadi8,  i32, v16i8, LDRBroW, LDRBroX, bsub>;
3329
3330defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v4i16, LDRHroW, LDRHroX, hsub>;
3331defm : ScalToVecROLoadPat<ro16, extloadi16, i32, v8i16, LDRHroW, LDRHroX, hsub>;
3332
3333defm : ScalToVecROLoadPat<ro16, load,       i32, v4f16, LDRHroW, LDRHroX, hsub>;
3334defm : ScalToVecROLoadPat<ro16, load,       i32, v8f16, LDRHroW, LDRHroX, hsub>;
3335
3336defm : ScalToVecROLoadPat<ro32, load,       i32, v2i32, LDRSroW, LDRSroX, ssub>;
3337defm : ScalToVecROLoadPat<ro32, load,       i32, v4i32, LDRSroW, LDRSroX, ssub>;
3338
3339defm : ScalToVecROLoadPat<ro32, load,       f32, v2f32, LDRSroW, LDRSroX, ssub>;
3340defm : ScalToVecROLoadPat<ro32, load,       f32, v4f32, LDRSroW, LDRSroX, ssub>;
3341
3342defm : ScalToVecROLoadPat<ro64, load,       i64, v2i64, LDRDroW, LDRDroX, dsub>;
3343
3344defm : ScalToVecROLoadPat<ro64, load,       f64, v2f64, LDRDroW, LDRDroX, dsub>;
3345
3346
3347def : Pat <(v1i64 (scalar_to_vector (i64
3348                      (load (ro_Windexed64 GPR64sp:$Rn, GPR32:$Rm,
3349                                           ro_Wextend64:$extend))))),
3350           (LDRDroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend64:$extend)>;
3351
3352def : Pat <(v1i64 (scalar_to_vector (i64
3353                      (load (ro_Xindexed64 GPR64sp:$Rn, GPR64:$Rm,
3354                                           ro_Xextend64:$extend))))),
3355           (LDRDroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend64:$extend)>;
3356}
3357
3358// Match all load 64 bits width whose type is compatible with FPR64
3359multiclass VecROLoadPat<ROAddrMode ro, ValueType VecTy,
3360                        Instruction LOADW, Instruction LOADX> {
3361
3362  def : Pat<(VecTy (load (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3363            (LOADW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3364
3365  def : Pat<(VecTy (load (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3366            (LOADX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3367}
3368
3369let AddedComplexity = 10 in {
3370let Predicates = [IsLE] in {
3371  // We must do vector loads with LD1 in big-endian.
3372  defm : VecROLoadPat<ro64, v2i32, LDRDroW, LDRDroX>;
3373  defm : VecROLoadPat<ro64, v2f32, LDRDroW, LDRDroX>;
3374  defm : VecROLoadPat<ro64, v8i8,  LDRDroW, LDRDroX>;
3375  defm : VecROLoadPat<ro64, v4i16, LDRDroW, LDRDroX>;
3376  defm : VecROLoadPat<ro64, v4f16, LDRDroW, LDRDroX>;
3377  defm : VecROLoadPat<ro64, v4bf16, LDRDroW, LDRDroX>;
3378}
3379
3380defm : VecROLoadPat<ro64, v1i64,  LDRDroW, LDRDroX>;
3381defm : VecROLoadPat<ro64, v1f64,  LDRDroW, LDRDroX>;
3382
3383// Match all load 128 bits width whose type is compatible with FPR128
3384let Predicates = [IsLE] in {
3385  // We must do vector loads with LD1 in big-endian.
3386  defm : VecROLoadPat<ro128, v2i64,  LDRQroW, LDRQroX>;
3387  defm : VecROLoadPat<ro128, v2f64,  LDRQroW, LDRQroX>;
3388  defm : VecROLoadPat<ro128, v4i32,  LDRQroW, LDRQroX>;
3389  defm : VecROLoadPat<ro128, v4f32,  LDRQroW, LDRQroX>;
3390  defm : VecROLoadPat<ro128, v8i16,  LDRQroW, LDRQroX>;
3391  defm : VecROLoadPat<ro128, v8f16,  LDRQroW, LDRQroX>;
3392  defm : VecROLoadPat<ro128, v8bf16,  LDRQroW, LDRQroX>;
3393  defm : VecROLoadPat<ro128, v16i8,  LDRQroW, LDRQroX>;
3394}
3395} // AddedComplexity = 10
3396
3397// zextload -> i64
3398multiclass ExtLoadTo64ROPat<ROAddrMode ro, SDPatternOperator loadop,
3399                            Instruction INSTW, Instruction INSTX> {
3400  def : Pat<(i64 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3401            (SUBREG_TO_REG (i64 0),
3402                           (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
3403                           sub_32)>;
3404
3405  def : Pat<(i64 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3406            (SUBREG_TO_REG (i64 0),
3407                           (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
3408                           sub_32)>;
3409}
3410
3411let AddedComplexity = 10 in {
3412  defm : ExtLoadTo64ROPat<ro8,  zextloadi8,  LDRBBroW, LDRBBroX>;
3413  defm : ExtLoadTo64ROPat<ro16, zextloadi16, LDRHHroW, LDRHHroX>;
3414  defm : ExtLoadTo64ROPat<ro32, zextloadi32, LDRWroW,  LDRWroX>;
3415
3416  // zextloadi1 -> zextloadi8
3417  defm : ExtLoadTo64ROPat<ro8,  zextloadi1,  LDRBBroW, LDRBBroX>;
3418
3419  // extload -> zextload
3420  defm : ExtLoadTo64ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3421  defm : ExtLoadTo64ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3422  defm : ExtLoadTo64ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3423
3424  // extloadi1 -> zextloadi8
3425  defm : ExtLoadTo64ROPat<ro8,  extloadi1,   LDRBBroW, LDRBBroX>;
3426}
3427
3428
3429// zextload -> i64
3430multiclass ExtLoadTo32ROPat<ROAddrMode ro, SDPatternOperator loadop,
3431                            Instruction INSTW, Instruction INSTX> {
3432  def : Pat<(i32 (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend))),
3433            (INSTW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
3434
3435  def : Pat<(i32 (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend))),
3436            (INSTX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
3437
3438}
3439
3440let AddedComplexity = 10 in {
3441  // extload -> zextload
3442  defm : ExtLoadTo32ROPat<ro8,  extloadi8,   LDRBBroW, LDRBBroX>;
3443  defm : ExtLoadTo32ROPat<ro16, extloadi16,  LDRHHroW, LDRHHroX>;
3444  defm : ExtLoadTo32ROPat<ro32, extloadi32,  LDRWroW,  LDRWroX>;
3445
3446  // zextloadi1 -> zextloadi8
3447  defm : ExtLoadTo32ROPat<ro8, zextloadi1, LDRBBroW, LDRBBroX>;
3448}
3449
3450//---
3451// (unsigned immediate)
3452//---
3453defm LDRX : LoadUI<0b11, 0, 0b01, GPR64z, uimm12s8, "ldr",
3454                   [(set GPR64z:$Rt,
3455                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3456defm LDRW : LoadUI<0b10, 0, 0b01, GPR32z, uimm12s4, "ldr",
3457                   [(set GPR32z:$Rt,
3458                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3459let Predicates = [HasFPARMv8] in {
3460defm LDRB : LoadUI<0b00, 1, 0b01, FPR8Op, uimm12s1, "ldr",
3461                   [(set FPR8Op:$Rt,
3462                         (load (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)))]>;
3463defm LDRH : LoadUI<0b01, 1, 0b01, FPR16Op, uimm12s2, "ldr",
3464                   [(set (f16 FPR16Op:$Rt),
3465                         (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)))]>;
3466defm LDRS : LoadUI<0b10, 1, 0b01, FPR32Op, uimm12s4, "ldr",
3467                   [(set (f32 FPR32Op:$Rt),
3468                         (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)))]>;
3469defm LDRD : LoadUI<0b11, 1, 0b01, FPR64Op, uimm12s8, "ldr",
3470                   [(set (f64 FPR64Op:$Rt),
3471                         (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)))]>;
3472defm LDRQ : LoadUI<0b00, 1, 0b11, FPR128Op, uimm12s16, "ldr",
3473                 [(set (f128 FPR128Op:$Rt),
3474                       (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)))]>;
3475}
3476
3477// bf16 load pattern
3478def : Pat <(bf16 (load (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3479           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
3480
3481// For regular load, we do not have any alignment requirement.
3482// Thus, it is safe to directly map the vector loads with interesting
3483// addressing modes.
3484// FIXME: We could do the same for bitconvert to floating point vectors.
3485def : Pat <(v8i8 (scalar_to_vector (i32
3486               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3487           (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
3488                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3489def : Pat <(v16i8 (scalar_to_vector (i32
3490               (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
3491           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
3492                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub)>;
3493def : Pat <(v4i16 (scalar_to_vector (i32
3494               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3495           (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
3496                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3497def : Pat <(v8i16 (scalar_to_vector (i32
3498               (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
3499           (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
3500                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub)>;
3501def : Pat <(v2i32 (scalar_to_vector (i32
3502               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3503           (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
3504                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3505def : Pat <(v4i32 (scalar_to_vector (i32
3506               (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
3507           (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
3508                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub)>;
3509def : Pat <(v1i64 (scalar_to_vector (i64
3510               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3511           (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3512def : Pat <(v2i64 (scalar_to_vector (i64
3513               (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))))),
3514           (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
3515                          (LDRDui GPR64sp:$Rn, uimm12s8:$offset), dsub)>;
3516
3517// Match all load 64 bits width whose type is compatible with FPR64
3518let Predicates = [IsLE] in {
3519  // We must use LD1 to perform vector loads in big-endian.
3520  def : Pat<(v2f32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3521            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3522  def : Pat<(v8i8 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3523            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3524  def : Pat<(v4i16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3525            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3526  def : Pat<(v2i32 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3527            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3528  def : Pat<(v4f16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3529            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3530  def : Pat<(v4bf16 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3531            (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3532}
3533def : Pat<(v1f64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3534          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3535def : Pat<(v1i64 (load (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))),
3536          (LDRDui GPR64sp:$Rn, uimm12s8:$offset)>;
3537
3538// Match all load 128 bits width whose type is compatible with FPR128
3539let Predicates = [IsLE] in {
3540  // We must use LD1 to perform vector loads in big-endian.
3541  def : Pat<(v4f32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3542            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3543  def : Pat<(v2f64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3544            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3545  def : Pat<(v16i8 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3546            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3547  def : Pat<(v8i16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3548            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3549  def : Pat<(v4i32 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3550            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3551  def : Pat<(v2i64 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3552            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3553  def : Pat<(v8f16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3554            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3555  def : Pat<(v8bf16 (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3556            (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3557}
3558def : Pat<(f128  (load (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset))),
3559          (LDRQui GPR64sp:$Rn, uimm12s16:$offset)>;
3560
3561defm LDRHH : LoadUI<0b01, 0, 0b01, GPR32, uimm12s2, "ldrh",
3562                    [(set GPR32:$Rt,
3563                          (zextloadi16 (am_indexed16 GPR64sp:$Rn,
3564                                                     uimm12s2:$offset)))]>;
3565defm LDRBB : LoadUI<0b00, 0, 0b01, GPR32, uimm12s1, "ldrb",
3566                    [(set GPR32:$Rt,
3567                          (zextloadi8 (am_indexed8 GPR64sp:$Rn,
3568                                                   uimm12s1:$offset)))]>;
3569// zextload -> i64
3570def : Pat<(i64 (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3571    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3572def : Pat<(i64 (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3573    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3574
3575// zextloadi1 -> zextloadi8
3576def : Pat<(i32 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3577          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3578def : Pat<(i64 (zextloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3579    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3580
3581// extload -> zextload
3582def : Pat<(i32 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3583          (LDRHHui GPR64sp:$Rn, uimm12s2:$offset)>;
3584def : Pat<(i32 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3585          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3586def : Pat<(i32 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3587          (LDRBBui GPR64sp:$Rn, uimm12s1:$offset)>;
3588def : Pat<(i64 (extloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3589    (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3590def : Pat<(i64 (extloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))),
3591    (SUBREG_TO_REG (i64 0), (LDRHHui GPR64sp:$Rn, uimm12s2:$offset), sub_32)>;
3592def : Pat<(i64 (extloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3593    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3594def : Pat<(i64 (extloadi1 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))),
3595    (SUBREG_TO_REG (i64 0), (LDRBBui GPR64sp:$Rn, uimm12s1:$offset), sub_32)>;
3596
3597// load sign-extended half-word
3598defm LDRSHW : LoadUI<0b01, 0, 0b11, GPR32, uimm12s2, "ldrsh",
3599                     [(set GPR32:$Rt,
3600                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3601                                                      uimm12s2:$offset)))]>;
3602defm LDRSHX : LoadUI<0b01, 0, 0b10, GPR64, uimm12s2, "ldrsh",
3603                     [(set GPR64:$Rt,
3604                           (sextloadi16 (am_indexed16 GPR64sp:$Rn,
3605                                                      uimm12s2:$offset)))]>;
3606
3607// load sign-extended byte
3608defm LDRSBW : LoadUI<0b00, 0, 0b11, GPR32, uimm12s1, "ldrsb",
3609                     [(set GPR32:$Rt,
3610                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3611                                                    uimm12s1:$offset)))]>;
3612defm LDRSBX : LoadUI<0b00, 0, 0b10, GPR64, uimm12s1, "ldrsb",
3613                     [(set GPR64:$Rt,
3614                           (sextloadi8 (am_indexed8 GPR64sp:$Rn,
3615                                                    uimm12s1:$offset)))]>;
3616
3617// load sign-extended word
3618defm LDRSW  : LoadUI<0b10, 0, 0b10, GPR64, uimm12s4, "ldrsw",
3619                     [(set GPR64:$Rt,
3620                           (sextloadi32 (am_indexed32 GPR64sp:$Rn,
3621                                                      uimm12s4:$offset)))]>;
3622
3623// load zero-extended word
3624def : Pat<(i64 (zextloadi32 (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))),
3625      (SUBREG_TO_REG (i64 0), (LDRWui GPR64sp:$Rn, uimm12s4:$offset), sub_32)>;
3626
3627// Pre-fetch.
3628def PRFMui : PrefetchUI<0b11, 0, 0b10, "prfm",
3629                        [(AArch64Prefetch timm:$Rt,
3630                                        (am_indexed64 GPR64sp:$Rn,
3631                                                      uimm12s8:$offset))]>;
3632
3633def : InstAlias<"prfm $Rt, [$Rn]", (PRFMui prfop:$Rt, GPR64sp:$Rn, 0)>;
3634
3635//---
3636// (literal)
3637
3638def alignedglobal : PatLeaf<(iPTR iPTR:$label), [{
3639  if (auto *G = dyn_cast<GlobalAddressSDNode>(N)) {
3640    const DataLayout &DL = MF->getDataLayout();
3641    Align Align = G->getGlobal()->getPointerAlignment(DL);
3642    return Align >= 4 && G->getOffset() % 4 == 0;
3643  }
3644  if (auto *C = dyn_cast<ConstantPoolSDNode>(N))
3645    return C->getAlign() >= 4 && C->getOffset() % 4 == 0;
3646  return false;
3647}]>;
3648
3649def LDRWl : LoadLiteral<0b00, 0, GPR32z, "ldr",
3650  [(set GPR32z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3651def LDRXl : LoadLiteral<0b01, 0, GPR64z, "ldr",
3652  [(set GPR64z:$Rt, (load (AArch64adr alignedglobal:$label)))]>;
3653let Predicates = [HasFPARMv8] in {
3654def LDRSl : LoadLiteral<0b00, 1, FPR32Op, "ldr",
3655  [(set (f32 FPR32Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3656def LDRDl : LoadLiteral<0b01, 1, FPR64Op, "ldr",
3657  [(set (f64 FPR64Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3658def LDRQl : LoadLiteral<0b10, 1, FPR128Op, "ldr",
3659  [(set (f128 FPR128Op:$Rt), (load (AArch64adr alignedglobal:$label)))]>;
3660}
3661
3662// load sign-extended word
3663def LDRSWl : LoadLiteral<0b10, 0, GPR64z, "ldrsw",
3664  [(set GPR64z:$Rt, (sextloadi32 (AArch64adr alignedglobal:$label)))]>;
3665
3666let AddedComplexity = 20 in {
3667def : Pat<(i64 (zextloadi32 (AArch64adr alignedglobal:$label))),
3668        (SUBREG_TO_REG (i64 0), (LDRWl $label), sub_32)>;
3669}
3670
3671// prefetch
3672def PRFMl : PrefetchLiteral<0b11, 0, "prfm", []>;
3673//                   [(AArch64Prefetch imm:$Rt, tglobaladdr:$label)]>;
3674
3675//---
3676// (unscaled immediate)
3677defm LDURX : LoadUnscaled<0b11, 0, 0b01, GPR64z, "ldur",
3678                    [(set GPR64z:$Rt,
3679                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3680defm LDURW : LoadUnscaled<0b10, 0, 0b01, GPR32z, "ldur",
3681                    [(set GPR32z:$Rt,
3682                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3683let Predicates = [HasFPARMv8] in {
3684defm LDURB : LoadUnscaled<0b00, 1, 0b01, FPR8Op, "ldur",
3685                    [(set FPR8Op:$Rt,
3686                          (load (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3687defm LDURH : LoadUnscaled<0b01, 1, 0b01, FPR16Op, "ldur",
3688                    [(set (f16 FPR16Op:$Rt),
3689                          (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3690defm LDURS : LoadUnscaled<0b10, 1, 0b01, FPR32Op, "ldur",
3691                    [(set (f32 FPR32Op:$Rt),
3692                          (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3693defm LDURD : LoadUnscaled<0b11, 1, 0b01, FPR64Op, "ldur",
3694                    [(set (f64 FPR64Op:$Rt),
3695                          (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset)))]>;
3696defm LDURQ : LoadUnscaled<0b00, 1, 0b11, FPR128Op, "ldur",
3697                    [(set (f128 FPR128Op:$Rt),
3698                          (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset)))]>;
3699}
3700
3701defm LDURHH
3702    : LoadUnscaled<0b01, 0, 0b01, GPR32, "ldurh",
3703             [(set GPR32:$Rt,
3704                    (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3705defm LDURBB
3706    : LoadUnscaled<0b00, 0, 0b01, GPR32, "ldurb",
3707             [(set GPR32:$Rt,
3708                    (zextloadi8 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3709
3710// bf16 load pattern
3711def : Pat <(bf16 (load (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3712           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
3713
3714// Match all load 64 bits width whose type is compatible with FPR64
3715let Predicates = [IsLE] in {
3716  def : Pat<(v2f32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3717            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3718  def : Pat<(v2i32 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3719            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3720  def : Pat<(v4i16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3721            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3722  def : Pat<(v8i8 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3723            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3724  def : Pat<(v4f16 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3725            (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3726}
3727def : Pat<(v1f64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3728          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3729def : Pat<(v1i64 (load (am_unscaled64 GPR64sp:$Rn, simm9:$offset))),
3730          (LDURDi GPR64sp:$Rn, simm9:$offset)>;
3731
3732// Match all load 128 bits width whose type is compatible with FPR128
3733let Predicates = [IsLE] in {
3734  def : Pat<(v2f64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3735            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3736  def : Pat<(v2i64 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3737            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3738  def : Pat<(v4f32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3739            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3740  def : Pat<(v4i32 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3741            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3742  def : Pat<(v8i16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3743            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3744  def : Pat<(v16i8 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3745            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3746  def : Pat<(v8f16 (load (am_unscaled128 GPR64sp:$Rn, simm9:$offset))),
3747            (LDURQi GPR64sp:$Rn, simm9:$offset)>;
3748}
3749
3750//  anyext -> zext
3751def : Pat<(i32 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3752          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3753def : Pat<(i32 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3754          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3755def : Pat<(i32 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3756          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3757def : Pat<(i64 (extloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3758    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3759def : Pat<(i64 (extloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3760    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3761def : Pat<(i64 (extloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3762    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3763def : Pat<(i64 (extloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3764    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3765// unscaled zext
3766def : Pat<(i32 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3767          (LDURHHi GPR64sp:$Rn, simm9:$offset)>;
3768def : Pat<(i32 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3769          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3770def : Pat<(i32 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3771          (LDURBBi GPR64sp:$Rn, simm9:$offset)>;
3772def : Pat<(i64 (zextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))),
3773    (SUBREG_TO_REG (i64 0), (LDURWi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3774def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3775    (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3776def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3777    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3778def : Pat<(i64 (zextloadi1 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3779    (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3780
3781
3782//---
3783// LDR mnemonics fall back to LDUR for negative or unaligned offsets.
3784
3785// Define new assembler match classes as we want to only match these when
3786// the don't otherwise match the scaled addressing mode for LDR/STR. Don't
3787// associate a DiagnosticType either, as we want the diagnostic for the
3788// canonical form (the scaled operand) to take precedence.
3789class SImm9OffsetOperand<int Width> : AsmOperandClass {
3790  let Name = "SImm9OffsetFB" # Width;
3791  let PredicateMethod = "isSImm9OffsetFB<" # Width # ">";
3792  let RenderMethod = "addImmOperands";
3793}
3794
3795def SImm9OffsetFB8Operand : SImm9OffsetOperand<8>;
3796def SImm9OffsetFB16Operand : SImm9OffsetOperand<16>;
3797def SImm9OffsetFB32Operand : SImm9OffsetOperand<32>;
3798def SImm9OffsetFB64Operand : SImm9OffsetOperand<64>;
3799def SImm9OffsetFB128Operand : SImm9OffsetOperand<128>;
3800
3801def simm9_offset_fb8 : Operand<i64> {
3802  let ParserMatchClass = SImm9OffsetFB8Operand;
3803}
3804def simm9_offset_fb16 : Operand<i64> {
3805  let ParserMatchClass = SImm9OffsetFB16Operand;
3806}
3807def simm9_offset_fb32 : Operand<i64> {
3808  let ParserMatchClass = SImm9OffsetFB32Operand;
3809}
3810def simm9_offset_fb64 : Operand<i64> {
3811  let ParserMatchClass = SImm9OffsetFB64Operand;
3812}
3813def simm9_offset_fb128 : Operand<i64> {
3814  let ParserMatchClass = SImm9OffsetFB128Operand;
3815}
3816
3817def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3818                (LDURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3819def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3820                (LDURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3821let Predicates = [HasFPARMv8] in {
3822def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3823                (LDURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3824def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3825                (LDURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3826def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3827                (LDURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3828def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3829                (LDURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
3830def : InstAlias<"ldr $Rt, [$Rn, $offset]",
3831               (LDURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
3832}
3833
3834// zextload -> i64
3835def : Pat<(i64 (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))),
3836  (SUBREG_TO_REG (i64 0), (LDURBBi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3837def : Pat<(i64 (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))),
3838  (SUBREG_TO_REG (i64 0), (LDURHHi GPR64sp:$Rn, simm9:$offset), sub_32)>;
3839
3840// load sign-extended half-word
3841defm LDURSHW
3842    : LoadUnscaled<0b01, 0, 0b11, GPR32, "ldursh",
3843               [(set GPR32:$Rt,
3844                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3845defm LDURSHX
3846    : LoadUnscaled<0b01, 0, 0b10, GPR64, "ldursh",
3847              [(set GPR64:$Rt,
3848                    (sextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)))]>;
3849
3850// load sign-extended byte
3851defm LDURSBW
3852    : LoadUnscaled<0b00, 0, 0b11, GPR32, "ldursb",
3853                [(set GPR32:$Rt,
3854                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3855defm LDURSBX
3856    : LoadUnscaled<0b00, 0, 0b10, GPR64, "ldursb",
3857                [(set GPR64:$Rt,
3858                      (sextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset)))]>;
3859
3860// load sign-extended word
3861defm LDURSW
3862    : LoadUnscaled<0b10, 0, 0b10, GPR64, "ldursw",
3863              [(set GPR64:$Rt,
3864                    (sextloadi32 (am_unscaled32 GPR64sp:$Rn, simm9:$offset)))]>;
3865
3866// zero and sign extending aliases from generic LDR* mnemonics to LDUR*.
3867def : InstAlias<"ldrb $Rt, [$Rn, $offset]",
3868                (LDURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3869def : InstAlias<"ldrh $Rt, [$Rn, $offset]",
3870                (LDURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3871def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3872                (LDURSBWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3873def : InstAlias<"ldrsb $Rt, [$Rn, $offset]",
3874                (LDURSBXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
3875def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3876                (LDURSHWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3877def : InstAlias<"ldrsh $Rt, [$Rn, $offset]",
3878                (LDURSHXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
3879def : InstAlias<"ldrsw $Rt, [$Rn, $offset]",
3880                (LDURSWi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
3881
3882// A LDR will implicitly zero the rest of the vector, so vector_insert(zeros,
3883// load, 0) can use a single load.
3884multiclass LoadInsertZeroPatterns<SDPatternOperator LoadOp, ValueType VT, ValueType HVT, ValueType SVT,
3885                                  ValueType ScalarVT, Instruction LoadInst, Instruction UnscaledLoadInst,
3886                                  ComplexPattern Addr, ComplexPattern UnscaledAddr, Operand AddrImm,
3887                                  SubRegIndex SubReg> {
3888  // Scaled
3889  def : Pat <(vector_insert (VT immAllZerosV),
3890                (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3891            (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3892  // Unscaled
3893  def : Pat <(vector_insert (VT immAllZerosV),
3894                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3895             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3896
3897  // Half-vector patterns
3898  def : Pat <(vector_insert (HVT immAllZerosV),
3899                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3900             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3901  // Unscaled
3902  def : Pat <(vector_insert (HVT immAllZerosV),
3903                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3904             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3905
3906  // SVE patterns
3907  def : Pat <(vector_insert (SVT immAllZerosV),
3908                 (ScalarVT (LoadOp (Addr GPR64sp:$Rn, AddrImm:$offset))), (i64 0)),
3909             (SUBREG_TO_REG (i64 0), (LoadInst GPR64sp:$Rn, AddrImm:$offset), SubReg)>;
3910  // Unscaled
3911  def : Pat <(vector_insert (SVT immAllZerosV),
3912                 (ScalarVT (LoadOp (UnscaledAddr GPR64sp:$Rn, simm9:$offset))), (i64 0)),
3913             (SUBREG_TO_REG (i64 0), (UnscaledLoadInst GPR64sp:$Rn, simm9:$offset), SubReg)>;
3914}
3915
3916defm : LoadInsertZeroPatterns<extloadi8,  v16i8,  v8i8,   nxv16i8,  i32,  LDRBui, LDURBi,
3917                              am_indexed8,  am_unscaled8,  uimm12s1, bsub>;
3918defm : LoadInsertZeroPatterns<extloadi16, v8i16,  v4i16,  nxv8i16,  i32,  LDRHui, LDURHi,
3919                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3920defm : LoadInsertZeroPatterns<load,       v4i32,  v2i32,  nxv4i32,  i32,  LDRSui, LDURSi,
3921                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3922defm : LoadInsertZeroPatterns<load,       v2i64,  v1i64,  nxv2i64,  i64,  LDRDui, LDURDi,
3923                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3924defm : LoadInsertZeroPatterns<load,       v8f16,  v4f16,  nxv8f16,  f16,  LDRHui, LDURHi,
3925                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3926defm : LoadInsertZeroPatterns<load,       v8bf16, v4bf16, nxv8bf16, bf16, LDRHui, LDURHi,
3927                              am_indexed16, am_unscaled16, uimm12s2, hsub>;
3928defm : LoadInsertZeroPatterns<load,       v4f32,  v2f32,  nxv4f32,  f32,  LDRSui, LDURSi,
3929                              am_indexed32, am_unscaled32, uimm12s4, ssub>;
3930defm : LoadInsertZeroPatterns<load,       v2f64,  v1f64,  nxv2f64,  f64,  LDRDui, LDURDi,
3931                              am_indexed64, am_unscaled64, uimm12s8, dsub>;
3932
3933// Pre-fetch.
3934defm PRFUM : PrefetchUnscaled<0b11, 0, 0b10, "prfum",
3935                  [(AArch64Prefetch timm:$Rt,
3936                                  (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
3937
3938//---
3939// (unscaled immediate, unprivileged)
3940defm LDTRX : LoadUnprivileged<0b11, 0, 0b01, GPR64, "ldtr">;
3941defm LDTRW : LoadUnprivileged<0b10, 0, 0b01, GPR32, "ldtr">;
3942
3943defm LDTRH : LoadUnprivileged<0b01, 0, 0b01, GPR32, "ldtrh">;
3944defm LDTRB : LoadUnprivileged<0b00, 0, 0b01, GPR32, "ldtrb">;
3945
3946// load sign-extended half-word
3947defm LDTRSHW : LoadUnprivileged<0b01, 0, 0b11, GPR32, "ldtrsh">;
3948defm LDTRSHX : LoadUnprivileged<0b01, 0, 0b10, GPR64, "ldtrsh">;
3949
3950// load sign-extended byte
3951defm LDTRSBW : LoadUnprivileged<0b00, 0, 0b11, GPR32, "ldtrsb">;
3952defm LDTRSBX : LoadUnprivileged<0b00, 0, 0b10, GPR64, "ldtrsb">;
3953
3954// load sign-extended word
3955defm LDTRSW  : LoadUnprivileged<0b10, 0, 0b10, GPR64, "ldtrsw">;
3956
3957//---
3958// (immediate pre-indexed)
3959def LDRWpre : LoadPreIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3960def LDRXpre : LoadPreIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3961let Predicates = [HasFPARMv8] in {
3962def LDRBpre : LoadPreIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3963def LDRHpre : LoadPreIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3964def LDRSpre : LoadPreIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3965def LDRDpre : LoadPreIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3966def LDRQpre : LoadPreIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3967}
3968
3969// load sign-extended half-word
3970def LDRSHWpre : LoadPreIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3971def LDRSHXpre : LoadPreIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3972
3973// load sign-extended byte
3974def LDRSBWpre : LoadPreIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
3975def LDRSBXpre : LoadPreIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
3976
3977// load zero-extended byte
3978def LDRBBpre : LoadPreIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
3979def LDRHHpre : LoadPreIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
3980
3981// load sign-extended word
3982def LDRSWpre : LoadPreIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
3983
3984//---
3985// (immediate post-indexed)
3986def LDRWpost : LoadPostIdx<0b10, 0, 0b01, GPR32z, "ldr">;
3987def LDRXpost : LoadPostIdx<0b11, 0, 0b01, GPR64z, "ldr">;
3988let Predicates = [HasFPARMv8] in {
3989def LDRBpost : LoadPostIdx<0b00, 1, 0b01, FPR8Op,  "ldr">;
3990def LDRHpost : LoadPostIdx<0b01, 1, 0b01, FPR16Op, "ldr">;
3991def LDRSpost : LoadPostIdx<0b10, 1, 0b01, FPR32Op, "ldr">;
3992def LDRDpost : LoadPostIdx<0b11, 1, 0b01, FPR64Op, "ldr">;
3993def LDRQpost : LoadPostIdx<0b00, 1, 0b11, FPR128Op, "ldr">;
3994}
3995
3996// load sign-extended half-word
3997def LDRSHWpost : LoadPostIdx<0b01, 0, 0b11, GPR32z, "ldrsh">;
3998def LDRSHXpost : LoadPostIdx<0b01, 0, 0b10, GPR64z, "ldrsh">;
3999
4000// load sign-extended byte
4001def LDRSBWpost : LoadPostIdx<0b00, 0, 0b11, GPR32z, "ldrsb">;
4002def LDRSBXpost : LoadPostIdx<0b00, 0, 0b10, GPR64z, "ldrsb">;
4003
4004// load zero-extended byte
4005def LDRBBpost : LoadPostIdx<0b00, 0, 0b01, GPR32z, "ldrb">;
4006def LDRHHpost : LoadPostIdx<0b01, 0, 0b01, GPR32z, "ldrh">;
4007
4008// load sign-extended word
4009def LDRSWpost : LoadPostIdx<0b10, 0, 0b10, GPR64z, "ldrsw">;
4010
4011//===----------------------------------------------------------------------===//
4012// Store instructions.
4013//===----------------------------------------------------------------------===//
4014
4015// Pair (indexed, offset)
4016// FIXME: Use dedicated range-checked addressing mode operand here.
4017defm STPW : StorePairOffset<0b00, 0, GPR32z, simm7s4, "stp">;
4018defm STPX : StorePairOffset<0b10, 0, GPR64z, simm7s8, "stp">;
4019let Predicates = [HasFPARMv8] in {
4020defm STPS : StorePairOffset<0b00, 1, FPR32Op, simm7s4, "stp">;
4021defm STPD : StorePairOffset<0b01, 1, FPR64Op, simm7s8, "stp">;
4022defm STPQ : StorePairOffset<0b10, 1, FPR128Op, simm7s16, "stp">;
4023}
4024
4025// Pair (pre-indexed)
4026def STPWpre : StorePairPreIdx<0b00, 0, GPR32z, simm7s4, "stp">;
4027def STPXpre : StorePairPreIdx<0b10, 0, GPR64z, simm7s8, "stp">;
4028let Predicates = [HasFPARMv8] in {
4029def STPSpre : StorePairPreIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
4030def STPDpre : StorePairPreIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
4031def STPQpre : StorePairPreIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
4032}
4033
4034// Pair (post-indexed)
4035def STPWpost : StorePairPostIdx<0b00, 0, GPR32z, simm7s4, "stp">;
4036def STPXpost : StorePairPostIdx<0b10, 0, GPR64z, simm7s8, "stp">;
4037let Predicates = [HasFPARMv8] in {
4038def STPSpost : StorePairPostIdx<0b00, 1, FPR32Op, simm7s4, "stp">;
4039def STPDpost : StorePairPostIdx<0b01, 1, FPR64Op, simm7s8, "stp">;
4040def STPQpost : StorePairPostIdx<0b10, 1, FPR128Op, simm7s16, "stp">;
4041}
4042
4043// Pair (no allocate)
4044defm STNPW : StorePairNoAlloc<0b00, 0, GPR32z, simm7s4, "stnp">;
4045defm STNPX : StorePairNoAlloc<0b10, 0, GPR64z, simm7s8, "stnp">;
4046let Predicates = [HasFPARMv8] in {
4047defm STNPS : StorePairNoAlloc<0b00, 1, FPR32Op, simm7s4, "stnp">;
4048defm STNPD : StorePairNoAlloc<0b01, 1, FPR64Op, simm7s8, "stnp">;
4049defm STNPQ : StorePairNoAlloc<0b10, 1, FPR128Op, simm7s16, "stnp">;
4050}
4051
4052def : Pat<(AArch64stp GPR64z:$Rt, GPR64z:$Rt2, (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
4053          (STPXi GPR64z:$Rt, GPR64z:$Rt2, GPR64sp:$Rn, simm7s8:$offset)>;
4054
4055def : Pat<(AArch64stnp FPR128:$Rt, FPR128:$Rt2, (am_indexed7s128 GPR64sp:$Rn, simm7s16:$offset)),
4056          (STNPQi FPR128:$Rt, FPR128:$Rt2, GPR64sp:$Rn, simm7s16:$offset)>;
4057
4058
4059//---
4060// (Register offset)
4061
4062// Integer
4063defm STRBB : Store8RO< 0b00, 0, 0b00, GPR32, "strb", i32, truncstorei8>;
4064defm STRHH : Store16RO<0b01, 0, 0b00, GPR32, "strh", i32, truncstorei16>;
4065defm STRW  : Store32RO<0b10, 0, 0b00, GPR32, "str",  i32, store>;
4066defm STRX  : Store64RO<0b11, 0, 0b00, GPR64, "str",  i64, store>;
4067
4068
4069// Floating-point
4070let Predicates = [HasFPARMv8] in {
4071defm STRB : Store8RO< 0b00,  1, 0b00, FPR8Op,   "str", i8, store>;
4072defm STRH : Store16RO<0b01,  1, 0b00, FPR16Op,  "str", f16,     store>;
4073defm STRS : Store32RO<0b10,  1, 0b00, FPR32Op,  "str", f32,     store>;
4074defm STRD : Store64RO<0b11,  1, 0b00, FPR64Op,  "str", f64,     store>;
4075defm STRQ : Store128RO<0b00, 1, 0b10, FPR128Op, "str">;
4076}
4077
4078let Predicates = [UseSTRQro], AddedComplexity = 10 in {
4079  def : Pat<(store (f128 FPR128:$Rt),
4080                        (ro_Windexed128 GPR64sp:$Rn, GPR32:$Rm,
4081                                        ro_Wextend128:$extend)),
4082            (STRQroW FPR128:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro_Wextend128:$extend)>;
4083  def : Pat<(store (f128 FPR128:$Rt),
4084                        (ro_Xindexed128 GPR64sp:$Rn, GPR64:$Rm,
4085                                        ro_Xextend128:$extend)),
4086            (STRQroX FPR128:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro_Wextend128:$extend)>;
4087}
4088
4089multiclass TruncStoreFrom64ROPat<ROAddrMode ro, SDPatternOperator storeop,
4090                                 Instruction STRW, Instruction STRX> {
4091
4092  def : Pat<(storeop GPR64:$Rt,
4093                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
4094            (STRW (EXTRACT_SUBREG GPR64:$Rt, sub_32),
4095                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
4096
4097  def : Pat<(storeop GPR64:$Rt,
4098                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
4099            (STRX (EXTRACT_SUBREG GPR64:$Rt, sub_32),
4100                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
4101}
4102
4103let AddedComplexity = 10 in {
4104  // truncstore i64
4105  defm : TruncStoreFrom64ROPat<ro8,  truncstorei8,  STRBBroW, STRBBroX>;
4106  defm : TruncStoreFrom64ROPat<ro16, truncstorei16, STRHHroW, STRHHroX>;
4107  defm : TruncStoreFrom64ROPat<ro32, truncstorei32, STRWroW,  STRWroX>;
4108}
4109
4110multiclass VecROStorePat<ROAddrMode ro, ValueType VecTy, RegisterClass FPR,
4111                         Instruction STRW, Instruction STRX> {
4112  def : Pat<(store (VecTy FPR:$Rt),
4113                   (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
4114            (STRW FPR:$Rt, GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
4115
4116  def : Pat<(store (VecTy FPR:$Rt),
4117                   (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
4118            (STRX FPR:$Rt, GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
4119}
4120
4121let AddedComplexity = 10 in {
4122// Match all store 64 bits width whose type is compatible with FPR64
4123let Predicates = [IsLE] in {
4124  // We must use ST1 to store vectors in big-endian.
4125  defm : VecROStorePat<ro64, v2i32, FPR64, STRDroW, STRDroX>;
4126  defm : VecROStorePat<ro64, v2f32, FPR64, STRDroW, STRDroX>;
4127  defm : VecROStorePat<ro64, v4i16, FPR64, STRDroW, STRDroX>;
4128  defm : VecROStorePat<ro64, v8i8, FPR64, STRDroW, STRDroX>;
4129  defm : VecROStorePat<ro64, v4f16, FPR64, STRDroW, STRDroX>;
4130  defm : VecROStorePat<ro64, v4bf16, FPR64, STRDroW, STRDroX>;
4131}
4132
4133defm : VecROStorePat<ro64, v1i64, FPR64, STRDroW, STRDroX>;
4134defm : VecROStorePat<ro64, v1f64, FPR64, STRDroW, STRDroX>;
4135
4136// Match all store 128 bits width whose type is compatible with FPR128
4137let Predicates = [IsLE, UseSTRQro] in {
4138  // We must use ST1 to store vectors in big-endian.
4139  defm : VecROStorePat<ro128, v2i64, FPR128, STRQroW, STRQroX>;
4140  defm : VecROStorePat<ro128, v2f64, FPR128, STRQroW, STRQroX>;
4141  defm : VecROStorePat<ro128, v4i32, FPR128, STRQroW, STRQroX>;
4142  defm : VecROStorePat<ro128, v4f32, FPR128, STRQroW, STRQroX>;
4143  defm : VecROStorePat<ro128, v8i16, FPR128, STRQroW, STRQroX>;
4144  defm : VecROStorePat<ro128, v16i8, FPR128, STRQroW, STRQroX>;
4145  defm : VecROStorePat<ro128, v8f16, FPR128, STRQroW, STRQroX>;
4146  defm : VecROStorePat<ro128, v8bf16, FPR128, STRQroW, STRQroX>;
4147}
4148} // AddedComplexity = 10
4149
4150// Match stores from lane 0 to the appropriate subreg's store.
4151multiclass VecROStoreLane0Pat<ROAddrMode ro, SDPatternOperator storeop,
4152                              ValueType VecTy, ValueType STy,
4153                              ValueType SubRegTy,
4154                              SubRegIndex SubRegIdx,
4155                              Instruction STRW, Instruction STRX> {
4156
4157  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
4158                     (ro.Wpat GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)),
4159            (STRW (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4160                  GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend)>;
4161
4162  def : Pat<(storeop (STy (vector_extract (VecTy VecListOne128:$Vt), (i64 0))),
4163                     (ro.Xpat GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)),
4164            (STRX (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4165                  GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend)>;
4166}
4167
4168let AddedComplexity = 19 in {
4169  defm : VecROStoreLane0Pat<ro16, truncstorei16, v8i16, i32, f16, hsub, STRHroW, STRHroX>;
4170  defm : VecROStoreLane0Pat<ro16,         store, v8f16, f16, f16, hsub, STRHroW, STRHroX>;
4171  defm : VecROStoreLane0Pat<ro32,         store, v4i32, i32, i32, ssub, STRSroW, STRSroX>;
4172  defm : VecROStoreLane0Pat<ro32,         store, v4f32, f32, i32, ssub, STRSroW, STRSroX>;
4173  defm : VecROStoreLane0Pat<ro64,         store, v2i64, i64, i64, dsub, STRDroW, STRDroX>;
4174  defm : VecROStoreLane0Pat<ro64,         store, v2f64, f64, i64, dsub, STRDroW, STRDroX>;
4175}
4176
4177//---
4178// (unsigned immediate)
4179defm STRX : StoreUIz<0b11, 0, 0b00, GPR64z, uimm12s8, "str",
4180                   [(store GPR64z:$Rt,
4181                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
4182defm STRW : StoreUIz<0b10, 0, 0b00, GPR32z, uimm12s4, "str",
4183                    [(store GPR32z:$Rt,
4184                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
4185let Predicates = [HasFPARMv8] in {
4186defm STRB : StoreUI<0b00, 1, 0b00, FPR8Op, uimm12s1, "str",
4187                    [(store FPR8Op:$Rt,
4188                            (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))]>;
4189defm STRH : StoreUI<0b01, 1, 0b00, FPR16Op, uimm12s2, "str",
4190                    [(store (f16 FPR16Op:$Rt),
4191                            (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))]>;
4192defm STRS : StoreUI<0b10, 1, 0b00, FPR32Op, uimm12s4, "str",
4193                    [(store (f32 FPR32Op:$Rt),
4194                            (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))]>;
4195defm STRD : StoreUI<0b11, 1, 0b00, FPR64Op, uimm12s8, "str",
4196                    [(store (f64 FPR64Op:$Rt),
4197                            (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset))]>;
4198defm STRQ : StoreUI<0b00, 1, 0b10, FPR128Op, uimm12s16, "str", []>;
4199}
4200
4201defm STRHH : StoreUIz<0b01, 0, 0b00, GPR32z, uimm12s2, "strh",
4202                     [(truncstorei16 GPR32z:$Rt,
4203                                     (am_indexed16 GPR64sp:$Rn,
4204                                                   uimm12s2:$offset))]>;
4205defm STRBB : StoreUIz<0b00, 0, 0b00, GPR32z, uimm12s1,  "strb",
4206                     [(truncstorei8 GPR32z:$Rt,
4207                                    (am_indexed8 GPR64sp:$Rn,
4208                                                 uimm12s1:$offset))]>;
4209
4210// bf16 store pattern
4211def : Pat<(store (bf16 FPR16Op:$Rt),
4212                 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
4213          (STRHui FPR16:$Rt, GPR64sp:$Rn, uimm12s2:$offset)>;
4214
4215let AddedComplexity = 10 in {
4216
4217// Match all store 64 bits width whose type is compatible with FPR64
4218def : Pat<(store (v1i64 FPR64:$Rt),
4219                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4220          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4221def : Pat<(store (v1f64 FPR64:$Rt),
4222                 (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4223          (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4224
4225let Predicates = [IsLE] in {
4226  // We must use ST1 to store vectors in big-endian.
4227  def : Pat<(store (v2f32 FPR64:$Rt),
4228                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4229            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4230  def : Pat<(store (v8i8 FPR64:$Rt),
4231                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4232            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4233  def : Pat<(store (v4i16 FPR64:$Rt),
4234                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4235            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4236  def : Pat<(store (v2i32 FPR64:$Rt),
4237                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4238            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4239  def : Pat<(store (v4f16 FPR64:$Rt),
4240                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4241            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4242  def : Pat<(store (v4bf16 FPR64:$Rt),
4243                   (am_indexed64 GPR64sp:$Rn, uimm12s8:$offset)),
4244            (STRDui FPR64:$Rt, GPR64sp:$Rn, uimm12s8:$offset)>;
4245}
4246
4247// Match all store 128 bits width whose type is compatible with FPR128
4248def : Pat<(store (f128  FPR128:$Rt),
4249                 (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4250          (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4251
4252let Predicates = [IsLE] in {
4253  // We must use ST1 to store vectors in big-endian.
4254  def : Pat<(store (v4f32 FPR128:$Rt),
4255                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4256            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4257  def : Pat<(store (v2f64 FPR128:$Rt),
4258                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4259            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4260  def : Pat<(store (v16i8 FPR128:$Rt),
4261                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4262            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4263  def : Pat<(store (v8i16 FPR128:$Rt),
4264                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4265            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4266  def : Pat<(store (v4i32 FPR128:$Rt),
4267                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4268            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4269  def : Pat<(store (v2i64 FPR128:$Rt),
4270                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4271            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4272  def : Pat<(store (v8f16 FPR128:$Rt),
4273                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4274            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4275  def : Pat<(store (v8bf16 FPR128:$Rt),
4276                   (am_indexed128 GPR64sp:$Rn, uimm12s16:$offset)),
4277            (STRQui FPR128:$Rt, GPR64sp:$Rn, uimm12s16:$offset)>;
4278}
4279
4280// truncstore i64
4281def : Pat<(truncstorei32 GPR64:$Rt,
4282                         (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset)),
4283  (STRWui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s4:$offset)>;
4284def : Pat<(truncstorei16 GPR64:$Rt,
4285                         (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)),
4286  (STRHHui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s2:$offset)>;
4287def : Pat<(truncstorei8 GPR64:$Rt, (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset)),
4288  (STRBBui (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, uimm12s1:$offset)>;
4289
4290} // AddedComplexity = 10
4291
4292// Match stores from lane 0 to the appropriate subreg's store.
4293multiclass VecStoreLane0Pat<ComplexPattern UIAddrMode, SDPatternOperator storeop,
4294                            ValueType VTy, ValueType STy,
4295                            ValueType SubRegTy,
4296                            SubRegIndex SubRegIdx, Operand IndexType,
4297                            Instruction STR> {
4298  def : Pat<(storeop (STy (vector_extract (VTy VecListOne128:$Vt), (i64 0))),
4299                     (UIAddrMode GPR64sp:$Rn, IndexType:$offset)),
4300            (STR (SubRegTy (EXTRACT_SUBREG VecListOne128:$Vt, SubRegIdx)),
4301                 GPR64sp:$Rn, IndexType:$offset)>;
4302}
4303
4304let AddedComplexity = 19 in {
4305  defm : VecStoreLane0Pat<am_indexed16, truncstorei16, v8i16, i32, f16, hsub, uimm12s2, STRHui>;
4306  defm : VecStoreLane0Pat<am_indexed16,         store, v8f16, f16, f16, hsub, uimm12s2, STRHui>;
4307  defm : VecStoreLane0Pat<am_indexed32,         store, v4i32, i32, i32, ssub, uimm12s4, STRSui>;
4308  defm : VecStoreLane0Pat<am_indexed32,         store, v4f32, f32, i32, ssub, uimm12s4, STRSui>;
4309  defm : VecStoreLane0Pat<am_indexed64,         store, v2i64, i64, i64, dsub, uimm12s8, STRDui>;
4310  defm : VecStoreLane0Pat<am_indexed64,         store, v2f64, f64, i64, dsub, uimm12s8, STRDui>;
4311}
4312
4313//---
4314// (unscaled immediate)
4315defm STURX : StoreUnscaled<0b11, 0, 0b00, GPR64z, "stur",
4316                         [(store GPR64z:$Rt,
4317                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4318defm STURW : StoreUnscaled<0b10, 0, 0b00, GPR32z, "stur",
4319                         [(store GPR32z:$Rt,
4320                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4321let Predicates = [HasFPARMv8] in {
4322defm STURB : StoreUnscaled<0b00, 1, 0b00, FPR8Op, "stur",
4323                         [(store FPR8Op:$Rt,
4324                                 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4325defm STURH : StoreUnscaled<0b01, 1, 0b00, FPR16Op, "stur",
4326                         [(store (f16 FPR16Op:$Rt),
4327                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4328defm STURS : StoreUnscaled<0b10, 1, 0b00, FPR32Op, "stur",
4329                         [(store (f32 FPR32Op:$Rt),
4330                                 (am_unscaled32 GPR64sp:$Rn, simm9:$offset))]>;
4331defm STURD : StoreUnscaled<0b11, 1, 0b00, FPR64Op, "stur",
4332                         [(store (f64 FPR64Op:$Rt),
4333                                 (am_unscaled64 GPR64sp:$Rn, simm9:$offset))]>;
4334defm STURQ : StoreUnscaled<0b00, 1, 0b10, FPR128Op, "stur",
4335                         [(store (f128 FPR128Op:$Rt),
4336                                 (am_unscaled128 GPR64sp:$Rn, simm9:$offset))]>;
4337}
4338defm STURHH : StoreUnscaled<0b01, 0, 0b00, GPR32z, "sturh",
4339                         [(truncstorei16 GPR32z:$Rt,
4340                                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))]>;
4341defm STURBB : StoreUnscaled<0b00, 0, 0b00, GPR32z, "sturb",
4342                         [(truncstorei8 GPR32z:$Rt,
4343                                  (am_unscaled8 GPR64sp:$Rn, simm9:$offset))]>;
4344
4345// bf16 store pattern
4346def : Pat<(store (bf16 FPR16Op:$Rt),
4347                 (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4348          (STURHi FPR16:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4349
4350// Armv8.4 Weaker Release Consistency enhancements
4351//         LDAPR & STLR with Immediate Offset instructions
4352let Predicates = [HasRCPC_IMMO] in {
4353defm STLURB     : BaseStoreUnscaleV84<"stlurb",  0b00, 0b00, GPR32>;
4354defm STLURH     : BaseStoreUnscaleV84<"stlurh",  0b01, 0b00, GPR32>;
4355defm STLURW     : BaseStoreUnscaleV84<"stlur",   0b10, 0b00, GPR32>;
4356defm STLURX     : BaseStoreUnscaleV84<"stlur",   0b11, 0b00, GPR64>;
4357defm LDAPURB    : BaseLoadUnscaleV84<"ldapurb",  0b00, 0b01, GPR32>;
4358defm LDAPURSBW  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b11, GPR32>;
4359defm LDAPURSBX  : BaseLoadUnscaleV84<"ldapursb", 0b00, 0b10, GPR64>;
4360defm LDAPURH    : BaseLoadUnscaleV84<"ldapurh",  0b01, 0b01, GPR32>;
4361defm LDAPURSHW  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b11, GPR32>;
4362defm LDAPURSHX  : BaseLoadUnscaleV84<"ldapursh", 0b01, 0b10, GPR64>;
4363defm LDAPUR     : BaseLoadUnscaleV84<"ldapur",   0b10, 0b01, GPR32>;
4364defm LDAPURSW   : BaseLoadUnscaleV84<"ldapursw", 0b10, 0b10, GPR64>;
4365defm LDAPURX    : BaseLoadUnscaleV84<"ldapur",   0b11, 0b01, GPR64>;
4366}
4367
4368// Match all store 64 bits width whose type is compatible with FPR64
4369def : Pat<(store (v1f64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4370          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4371def : Pat<(store (v1i64 FPR64:$Rt), (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4372          (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4373
4374let AddedComplexity = 10 in {
4375
4376let Predicates = [IsLE] in {
4377  // We must use ST1 to store vectors in big-endian.
4378  def : Pat<(store (v2f32 FPR64:$Rt),
4379                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4380            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4381  def : Pat<(store (v8i8 FPR64:$Rt),
4382                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4383            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4384  def : Pat<(store (v4i16 FPR64:$Rt),
4385                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4386            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4387  def : Pat<(store (v2i32 FPR64:$Rt),
4388                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4389            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4390  def : Pat<(store (v4f16 FPR64:$Rt),
4391                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4392            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4393  def : Pat<(store (v4bf16 FPR64:$Rt),
4394                   (am_unscaled64 GPR64sp:$Rn, simm9:$offset)),
4395            (STURDi FPR64:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4396}
4397
4398// Match all store 128 bits width whose type is compatible with FPR128
4399def : Pat<(store (f128 FPR128:$Rt), (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4400          (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4401
4402let Predicates = [IsLE] in {
4403  // We must use ST1 to store vectors in big-endian.
4404  def : Pat<(store (v4f32 FPR128:$Rt),
4405                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4406            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4407  def : Pat<(store (v2f64 FPR128:$Rt),
4408                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4409            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4410  def : Pat<(store (v16i8 FPR128:$Rt),
4411                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4412            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4413  def : Pat<(store (v8i16 FPR128:$Rt),
4414                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4415            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4416  def : Pat<(store (v4i32 FPR128:$Rt),
4417                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4418            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4419  def : Pat<(store (v2i64 FPR128:$Rt),
4420                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4421            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4422  def : Pat<(store (v2f64 FPR128:$Rt),
4423                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4424            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4425  def : Pat<(store (v8f16 FPR128:$Rt),
4426                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4427            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4428  def : Pat<(store (v8bf16 FPR128:$Rt),
4429                   (am_unscaled128 GPR64sp:$Rn, simm9:$offset)),
4430            (STURQi FPR128:$Rt, GPR64sp:$Rn, simm9:$offset)>;
4431}
4432
4433} // AddedComplexity = 10
4434
4435// unscaled i64 truncating stores
4436def : Pat<(truncstorei32 GPR64:$Rt, (am_unscaled32 GPR64sp:$Rn, simm9:$offset)),
4437  (STURWi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4438def : Pat<(truncstorei16 GPR64:$Rt, (am_unscaled16 GPR64sp:$Rn, simm9:$offset)),
4439  (STURHHi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4440def : Pat<(truncstorei8 GPR64:$Rt, (am_unscaled8 GPR64sp:$Rn, simm9:$offset)),
4441  (STURBBi (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$Rn, simm9:$offset)>;
4442
4443// Match stores from lane 0 to the appropriate subreg's store.
4444multiclass VecStoreULane0Pat<SDPatternOperator StoreOp,
4445                             ValueType VTy, ValueType STy,
4446                             ValueType SubRegTy,
4447                             SubRegIndex SubRegIdx, Instruction STR> {
4448  defm : VecStoreLane0Pat<am_unscaled128, StoreOp, VTy, STy, SubRegTy, SubRegIdx, simm9, STR>;
4449}
4450
4451let AddedComplexity = 19 in {
4452  defm : VecStoreULane0Pat<truncstorei16, v8i16, i32, f16, hsub, STURHi>;
4453  defm : VecStoreULane0Pat<store,         v8f16, f16, f16, hsub, STURHi>;
4454  defm : VecStoreULane0Pat<store,         v4i32, i32, i32, ssub, STURSi>;
4455  defm : VecStoreULane0Pat<store,         v4f32, f32, i32, ssub, STURSi>;
4456  defm : VecStoreULane0Pat<store,         v2i64, i64, i64, dsub, STURDi>;
4457  defm : VecStoreULane0Pat<store,         v2f64, f64, i64, dsub, STURDi>;
4458}
4459
4460//---
4461// STR mnemonics fall back to STUR for negative or unaligned offsets.
4462def : InstAlias<"str $Rt, [$Rn, $offset]",
4463                (STURXi GPR64:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4464def : InstAlias<"str $Rt, [$Rn, $offset]",
4465                (STURWi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4466let Predicates = [HasFPARMv8] in {
4467def : InstAlias<"str $Rt, [$Rn, $offset]",
4468                (STURBi FPR8Op:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4469def : InstAlias<"str $Rt, [$Rn, $offset]",
4470                (STURHi FPR16Op:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4471def : InstAlias<"str $Rt, [$Rn, $offset]",
4472                (STURSi FPR32Op:$Rt, GPR64sp:$Rn, simm9_offset_fb32:$offset), 0>;
4473def : InstAlias<"str $Rt, [$Rn, $offset]",
4474                (STURDi FPR64Op:$Rt, GPR64sp:$Rn, simm9_offset_fb64:$offset), 0>;
4475def : InstAlias<"str $Rt, [$Rn, $offset]",
4476                (STURQi FPR128Op:$Rt, GPR64sp:$Rn, simm9_offset_fb128:$offset), 0>;
4477}
4478
4479def : InstAlias<"strb $Rt, [$Rn, $offset]",
4480                (STURBBi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb8:$offset), 0>;
4481def : InstAlias<"strh $Rt, [$Rn, $offset]",
4482                (STURHHi GPR32:$Rt, GPR64sp:$Rn, simm9_offset_fb16:$offset), 0>;
4483
4484//---
4485// (unscaled immediate, unprivileged)
4486defm STTRW : StoreUnprivileged<0b10, 0, 0b00, GPR32, "sttr">;
4487defm STTRX : StoreUnprivileged<0b11, 0, 0b00, GPR64, "sttr">;
4488
4489defm STTRH : StoreUnprivileged<0b01, 0, 0b00, GPR32, "sttrh">;
4490defm STTRB : StoreUnprivileged<0b00, 0, 0b00, GPR32, "sttrb">;
4491
4492//---
4493// (immediate pre-indexed)
4494def STRWpre : StorePreIdx<0b10, 0, 0b00, GPR32z, "str",  pre_store, i32>;
4495def STRXpre : StorePreIdx<0b11, 0, 0b00, GPR64z, "str",  pre_store, i64>;
4496let Predicates = [HasFPARMv8] in {
4497def STRBpre : StorePreIdx<0b00, 1, 0b00, FPR8Op,  "str",  pre_store, i8>;
4498def STRHpre : StorePreIdx<0b01, 1, 0b00, FPR16Op, "str",  pre_store, f16>;
4499def STRSpre : StorePreIdx<0b10, 1, 0b00, FPR32Op, "str",  pre_store, f32>;
4500def STRDpre : StorePreIdx<0b11, 1, 0b00, FPR64Op, "str",  pre_store, f64>;
4501def STRQpre : StorePreIdx<0b00, 1, 0b10, FPR128Op, "str", pre_store, f128>;
4502}
4503
4504def STRBBpre : StorePreIdx<0b00, 0, 0b00, GPR32z, "strb", pre_truncsti8,  i32>;
4505def STRHHpre : StorePreIdx<0b01, 0, 0b00, GPR32z, "strh", pre_truncsti16, i32>;
4506
4507// truncstore i64
4508def : Pat<(pre_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4509  (STRWpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4510           simm9:$off)>;
4511def : Pat<(pre_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4512  (STRHHpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4513            simm9:$off)>;
4514def : Pat<(pre_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4515  (STRBBpre (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4516            simm9:$off)>;
4517
4518def : Pat<(pre_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4519          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4520def : Pat<(pre_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4521          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4522def : Pat<(pre_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4523          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4524def : Pat<(pre_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4525          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4526def : Pat<(pre_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4527          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4528def : Pat<(pre_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4529          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4530def : Pat<(pre_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4531          (STRDpre FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4532
4533def : Pat<(pre_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4534          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4535def : Pat<(pre_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4536          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4537def : Pat<(pre_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4538          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4539def : Pat<(pre_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4540          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4541def : Pat<(pre_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4542          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4543def : Pat<(pre_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4544          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4545def : Pat<(pre_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4546          (STRQpre FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4547
4548//---
4549// (immediate post-indexed)
4550def STRWpost : StorePostIdx<0b10, 0, 0b00, GPR32z,  "str", post_store, i32>;
4551def STRXpost : StorePostIdx<0b11, 0, 0b00, GPR64z,  "str", post_store, i64>;
4552let Predicates = [HasFPARMv8] in {
4553def STRBpost : StorePostIdx<0b00, 1, 0b00, FPR8Op,   "str", post_store, i8>;
4554def STRHpost : StorePostIdx<0b01, 1, 0b00, FPR16Op,  "str", post_store, f16>;
4555def STRSpost : StorePostIdx<0b10, 1, 0b00, FPR32Op,  "str", post_store, f32>;
4556def STRDpost : StorePostIdx<0b11, 1, 0b00, FPR64Op,  "str", post_store, f64>;
4557def STRQpost : StorePostIdx<0b00, 1, 0b10, FPR128Op, "str", post_store, f128>;
4558}
4559
4560def STRBBpost : StorePostIdx<0b00, 0, 0b00, GPR32z, "strb", post_truncsti8, i32>;
4561def STRHHpost : StorePostIdx<0b01, 0, 0b00, GPR32z, "strh", post_truncsti16, i32>;
4562
4563// truncstore i64
4564def : Pat<(post_truncsti32 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4565  (STRWpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4566            simm9:$off)>;
4567def : Pat<(post_truncsti16 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4568  (STRHHpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4569             simm9:$off)>;
4570def : Pat<(post_truncsti8 GPR64:$Rt, GPR64sp:$addr, simm9:$off),
4571  (STRBBpost (EXTRACT_SUBREG GPR64:$Rt, sub_32), GPR64sp:$addr,
4572             simm9:$off)>;
4573
4574def : Pat<(post_store (bf16 FPR16:$Rt), GPR64sp:$addr, simm9:$off),
4575          (STRHpost FPR16:$Rt, GPR64sp:$addr, simm9:$off)>;
4576
4577def : Pat<(post_store (v8i8 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4578          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4579def : Pat<(post_store (v4i16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4580          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4581def : Pat<(post_store (v2i32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4582          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4583def : Pat<(post_store (v2f32 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4584          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4585def : Pat<(post_store (v1i64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4586          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4587def : Pat<(post_store (v1f64 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4588          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4589def : Pat<(post_store (v4f16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4590          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4591def : Pat<(post_store (v4bf16 FPR64:$Rt), GPR64sp:$addr, simm9:$off),
4592          (STRDpost FPR64:$Rt, GPR64sp:$addr, simm9:$off)>;
4593
4594def : Pat<(post_store (v16i8 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4595          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4596def : Pat<(post_store (v8i16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4597          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4598def : Pat<(post_store (v4i32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4599          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4600def : Pat<(post_store (v4f32 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4601          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4602def : Pat<(post_store (v2i64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4603          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4604def : Pat<(post_store (v2f64 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4605          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4606def : Pat<(post_store (v8f16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4607          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4608def : Pat<(post_store (v8bf16 FPR128:$Rt), GPR64sp:$addr, simm9:$off),
4609          (STRQpost FPR128:$Rt, GPR64sp:$addr, simm9:$off)>;
4610
4611//===----------------------------------------------------------------------===//
4612// Load/store exclusive instructions.
4613//===----------------------------------------------------------------------===//
4614
4615def LDARW  : LoadAcquire   <0b10, 1, 1, 0, 1, GPR32, "ldar">;
4616def LDARX  : LoadAcquire   <0b11, 1, 1, 0, 1, GPR64, "ldar">;
4617def LDARB  : LoadAcquire   <0b00, 1, 1, 0, 1, GPR32, "ldarb">;
4618def LDARH  : LoadAcquire   <0b01, 1, 1, 0, 1, GPR32, "ldarh">;
4619
4620def LDAXRW : LoadExclusive <0b10, 0, 1, 0, 1, GPR32, "ldaxr">;
4621def LDAXRX : LoadExclusive <0b11, 0, 1, 0, 1, GPR64, "ldaxr">;
4622def LDAXRB : LoadExclusive <0b00, 0, 1, 0, 1, GPR32, "ldaxrb">;
4623def LDAXRH : LoadExclusive <0b01, 0, 1, 0, 1, GPR32, "ldaxrh">;
4624
4625def LDXRW  : LoadExclusive <0b10, 0, 1, 0, 0, GPR32, "ldxr">;
4626def LDXRX  : LoadExclusive <0b11, 0, 1, 0, 0, GPR64, "ldxr">;
4627def LDXRB  : LoadExclusive <0b00, 0, 1, 0, 0, GPR32, "ldxrb">;
4628def LDXRH  : LoadExclusive <0b01, 0, 1, 0, 0, GPR32, "ldxrh">;
4629
4630def STLRW  : StoreRelease  <0b10, 1, 0, 0, 1, GPR32, "stlr">;
4631def STLRX  : StoreRelease  <0b11, 1, 0, 0, 1, GPR64, "stlr">;
4632def STLRB  : StoreRelease  <0b00, 1, 0, 0, 1, GPR32, "stlrb">;
4633def STLRH  : StoreRelease  <0b01, 1, 0, 0, 1, GPR32, "stlrh">;
4634
4635/*
4636Aliases for when offset=0. Note that in contrast to LoadAcquire which has a $Rn
4637of type GPR64sp0, we deliberately choose to make $Rn of type GPR64sp and add an
4638alias for the case of immediate #0. This is because new STLR versions (from
4639LRCPC3 extension) do have a non-zero immediate value, so GPR64sp0 is not
4640appropriate anymore (it parses and discards the optional zero). This is not the
4641case for LoadAcquire because the new LRCPC3 LDAR instructions are post-indexed,
4642and the immediate values are not inside the [] brackets and thus not accepted
4643by GPR64sp0 parser.
4644*/
4645def STLRW0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4646def STLRX0  : InstAlias<"stlr\t$Rt, [$Rn, #0]" , (STLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4647def STLRB0  : InstAlias<"stlrb\t$Rt, [$Rn, #0]", (STLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4648def STLRH0  : InstAlias<"stlrh\t$Rt, [$Rn, #0]", (STLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4649
4650def STLXRW : StoreExclusive<0b10, 0, 0, 0, 1, GPR32, "stlxr">;
4651def STLXRX : StoreExclusive<0b11, 0, 0, 0, 1, GPR64, "stlxr">;
4652def STLXRB : StoreExclusive<0b00, 0, 0, 0, 1, GPR32, "stlxrb">;
4653def STLXRH : StoreExclusive<0b01, 0, 0, 0, 1, GPR32, "stlxrh">;
4654
4655def STXRW  : StoreExclusive<0b10, 0, 0, 0, 0, GPR32, "stxr">;
4656def STXRX  : StoreExclusive<0b11, 0, 0, 0, 0, GPR64, "stxr">;
4657def STXRB  : StoreExclusive<0b00, 0, 0, 0, 0, GPR32, "stxrb">;
4658def STXRH  : StoreExclusive<0b01, 0, 0, 0, 0, GPR32, "stxrh">;
4659
4660def LDAXPW : LoadExclusivePair<0b10, 0, 1, 1, 1, GPR32, "ldaxp">;
4661def LDAXPX : LoadExclusivePair<0b11, 0, 1, 1, 1, GPR64, "ldaxp">;
4662
4663def LDXPW  : LoadExclusivePair<0b10, 0, 1, 1, 0, GPR32, "ldxp">;
4664def LDXPX  : LoadExclusivePair<0b11, 0, 1, 1, 0, GPR64, "ldxp">;
4665
4666def STLXPW : StoreExclusivePair<0b10, 0, 0, 1, 1, GPR32, "stlxp">;
4667def STLXPX : StoreExclusivePair<0b11, 0, 0, 1, 1, GPR64, "stlxp">;
4668
4669def STXPW  : StoreExclusivePair<0b10, 0, 0, 1, 0, GPR32, "stxp">;
4670def STXPX  : StoreExclusivePair<0b11, 0, 0, 1, 0, GPR64, "stxp">;
4671
4672let Predicates = [HasLOR] in {
4673  // v8.1a "Limited Order Region" extension load-acquire instructions
4674  def LDLARW  : LoadAcquire   <0b10, 1, 1, 0, 0, GPR32, "ldlar">;
4675  def LDLARX  : LoadAcquire   <0b11, 1, 1, 0, 0, GPR64, "ldlar">;
4676  def LDLARB  : LoadAcquire   <0b00, 1, 1, 0, 0, GPR32, "ldlarb">;
4677  def LDLARH  : LoadAcquire   <0b01, 1, 1, 0, 0, GPR32, "ldlarh">;
4678
4679  // v8.1a "Limited Order Region" extension store-release instructions
4680  def STLLRW  : StoreRelease   <0b10, 1, 0, 0, 0, GPR32, "stllr">;
4681  def STLLRX  : StoreRelease   <0b11, 1, 0, 0, 0, GPR64, "stllr">;
4682  def STLLRB  : StoreRelease   <0b00, 1, 0, 0, 0, GPR32, "stllrb">;
4683  def STLLRH  : StoreRelease   <0b01, 1, 0, 0, 0, GPR32, "stllrh">;
4684
4685  // Aliases for when offset=0
4686  def STLLRW0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRW   GPR32: $Rt, GPR64sp:$Rn)>;
4687  def STLLRX0 : InstAlias<"stllr\t$Rt,  [$Rn, #0]",  (STLLRX   GPR64: $Rt, GPR64sp:$Rn)>;
4688  def STLLRB0 : InstAlias<"stllrb\t$Rt, [$Rn, #0]",  (STLLRB   GPR32: $Rt, GPR64sp:$Rn)>;
4689  def STLLRH0 : InstAlias<"stllrh\t$Rt, [$Rn, #0]",  (STLLRH   GPR32: $Rt, GPR64sp:$Rn)>;
4690}
4691
4692//===----------------------------------------------------------------------===//
4693// Scaled floating point to integer conversion instructions.
4694//===----------------------------------------------------------------------===//
4695
4696defm FCVTAS : FPToIntegerUnscaled<0b00, 0b100, "fcvtas", int_aarch64_neon_fcvtas>;
4697defm FCVTAU : FPToIntegerUnscaled<0b00, 0b101, "fcvtau", int_aarch64_neon_fcvtau>;
4698defm FCVTMS : FPToIntegerUnscaled<0b10, 0b000, "fcvtms", int_aarch64_neon_fcvtms>;
4699defm FCVTMU : FPToIntegerUnscaled<0b10, 0b001, "fcvtmu", int_aarch64_neon_fcvtmu>;
4700defm FCVTNS : FPToIntegerUnscaled<0b00, 0b000, "fcvtns", int_aarch64_neon_fcvtns>;
4701defm FCVTNU : FPToIntegerUnscaled<0b00, 0b001, "fcvtnu", int_aarch64_neon_fcvtnu>;
4702defm FCVTPS : FPToIntegerUnscaled<0b01, 0b000, "fcvtps", int_aarch64_neon_fcvtps>;
4703defm FCVTPU : FPToIntegerUnscaled<0b01, 0b001, "fcvtpu", int_aarch64_neon_fcvtpu>;
4704defm FCVTZS : FPToIntegerUnscaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4705defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4706defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
4707defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
4708
4709// AArch64's FCVT instructions saturate when out of range.
4710multiclass FPToIntegerSatPats<SDNode to_int_sat, string INST> {
4711  let Predicates = [HasFullFP16] in {
4712  def : Pat<(i32 (to_int_sat f16:$Rn, i32)),
4713            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4714  def : Pat<(i64 (to_int_sat f16:$Rn, i64)),
4715            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4716  }
4717  def : Pat<(i32 (to_int_sat f32:$Rn, i32)),
4718            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4719  def : Pat<(i64 (to_int_sat f32:$Rn, i64)),
4720            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4721  def : Pat<(i32 (to_int_sat f64:$Rn, i32)),
4722            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4723  def : Pat<(i64 (to_int_sat f64:$Rn, i64)),
4724            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4725
4726  let Predicates = [HasFullFP16] in {
4727  def : Pat<(i32 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i32:$scale), i32)),
4728            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4729  def : Pat<(i64 (to_int_sat (fmul f16:$Rn, fixedpoint_f16_i64:$scale), i64)),
4730            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4731  }
4732  def : Pat<(i32 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i32:$scale), i32)),
4733            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4734  def : Pat<(i64 (to_int_sat (fmul f32:$Rn, fixedpoint_f32_i64:$scale), i64)),
4735            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4736  def : Pat<(i32 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i32:$scale), i32)),
4737            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4738  def : Pat<(i64 (to_int_sat (fmul f64:$Rn, fixedpoint_f64_i64:$scale), i64)),
4739            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4740}
4741
4742defm : FPToIntegerSatPats<fp_to_sint_sat, "FCVTZS">;
4743defm : FPToIntegerSatPats<fp_to_uint_sat, "FCVTZU">;
4744
4745multiclass FPToIntegerIntPats<Intrinsic round, string INST> {
4746  let Predicates = [HasFullFP16] in {
4747  def : Pat<(i32 (round f16:$Rn)), (!cast<Instruction>(INST # UWHr) $Rn)>;
4748  def : Pat<(i64 (round f16:$Rn)), (!cast<Instruction>(INST # UXHr) $Rn)>;
4749  }
4750  def : Pat<(i32 (round f32:$Rn)), (!cast<Instruction>(INST # UWSr) $Rn)>;
4751  def : Pat<(i64 (round f32:$Rn)), (!cast<Instruction>(INST # UXSr) $Rn)>;
4752  def : Pat<(i32 (round f64:$Rn)), (!cast<Instruction>(INST # UWDr) $Rn)>;
4753  def : Pat<(i64 (round f64:$Rn)), (!cast<Instruction>(INST # UXDr) $Rn)>;
4754
4755  let Predicates = [HasFullFP16] in {
4756  def : Pat<(i32 (round (fmul f16:$Rn, fixedpoint_f16_i32:$scale))),
4757            (!cast<Instruction>(INST # SWHri) $Rn, $scale)>;
4758  def : Pat<(i64 (round (fmul f16:$Rn, fixedpoint_f16_i64:$scale))),
4759            (!cast<Instruction>(INST # SXHri) $Rn, $scale)>;
4760  }
4761  def : Pat<(i32 (round (fmul f32:$Rn, fixedpoint_f32_i32:$scale))),
4762            (!cast<Instruction>(INST # SWSri) $Rn, $scale)>;
4763  def : Pat<(i64 (round (fmul f32:$Rn, fixedpoint_f32_i64:$scale))),
4764            (!cast<Instruction>(INST # SXSri) $Rn, $scale)>;
4765  def : Pat<(i32 (round (fmul f64:$Rn, fixedpoint_f64_i32:$scale))),
4766            (!cast<Instruction>(INST # SWDri) $Rn, $scale)>;
4767  def : Pat<(i64 (round (fmul f64:$Rn, fixedpoint_f64_i64:$scale))),
4768            (!cast<Instruction>(INST # SXDri) $Rn, $scale)>;
4769}
4770
4771defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzs, "FCVTZS">;
4772defm : FPToIntegerIntPats<int_aarch64_neon_fcvtzu, "FCVTZU">;
4773
4774multiclass FPToIntegerPats<SDNode to_int, SDNode to_int_sat, SDNode round, string INST> {
4775  def : Pat<(i32 (to_int (round f32:$Rn))),
4776            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4777  def : Pat<(i64 (to_int (round f32:$Rn))),
4778            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4779  def : Pat<(i32 (to_int (round f64:$Rn))),
4780            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4781  def : Pat<(i64 (to_int (round f64:$Rn))),
4782            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4783
4784  // These instructions saturate like fp_to_[su]int_sat.
4785  let Predicates = [HasFullFP16] in {
4786  def : Pat<(i32 (to_int_sat (round f16:$Rn), i32)),
4787            (!cast<Instruction>(INST # UWHr) f16:$Rn)>;
4788  def : Pat<(i64 (to_int_sat (round f16:$Rn), i64)),
4789            (!cast<Instruction>(INST # UXHr) f16:$Rn)>;
4790  }
4791  def : Pat<(i32 (to_int_sat (round f32:$Rn), i32)),
4792            (!cast<Instruction>(INST # UWSr) f32:$Rn)>;
4793  def : Pat<(i64 (to_int_sat (round f32:$Rn), i64)),
4794            (!cast<Instruction>(INST # UXSr) f32:$Rn)>;
4795  def : Pat<(i32 (to_int_sat (round f64:$Rn), i32)),
4796            (!cast<Instruction>(INST # UWDr) f64:$Rn)>;
4797  def : Pat<(i64 (to_int_sat (round f64:$Rn), i64)),
4798            (!cast<Instruction>(INST # UXDr) f64:$Rn)>;
4799}
4800
4801defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fceil,  "FCVTPS">;
4802defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fceil,  "FCVTPU">;
4803defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ffloor, "FCVTMS">;
4804defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ffloor, "FCVTMU">;
4805defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, ftrunc, "FCVTZS">;
4806defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, ftrunc, "FCVTZU">;
4807defm : FPToIntegerPats<fp_to_sint, fp_to_sint_sat, fround, "FCVTAS">;
4808defm : FPToIntegerPats<fp_to_uint, fp_to_uint_sat, fround, "FCVTAU">;
4809
4810
4811
4812let Predicates = [HasFullFP16] in {
4813  def : Pat<(i32 (any_lround f16:$Rn)),
4814            (FCVTASUWHr f16:$Rn)>;
4815  def : Pat<(i64 (any_lround f16:$Rn)),
4816            (FCVTASUXHr f16:$Rn)>;
4817  def : Pat<(i64 (any_llround f16:$Rn)),
4818            (FCVTASUXHr f16:$Rn)>;
4819}
4820def : Pat<(i32 (any_lround f32:$Rn)),
4821          (FCVTASUWSr f32:$Rn)>;
4822def : Pat<(i32 (any_lround f64:$Rn)),
4823          (FCVTASUWDr f64:$Rn)>;
4824def : Pat<(i64 (any_lround f32:$Rn)),
4825          (FCVTASUXSr f32:$Rn)>;
4826def : Pat<(i64 (any_lround f64:$Rn)),
4827          (FCVTASUXDr f64:$Rn)>;
4828def : Pat<(i64 (any_llround f32:$Rn)),
4829          (FCVTASUXSr f32:$Rn)>;
4830def : Pat<(i64 (any_llround f64:$Rn)),
4831          (FCVTASUXDr f64:$Rn)>;
4832
4833//===----------------------------------------------------------------------===//
4834// Scaled integer to floating point conversion instructions.
4835//===----------------------------------------------------------------------===//
4836
4837defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
4838defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
4839
4840def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4841          (SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4842def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4843          (SCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4844def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4845          (SCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4846
4847def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4848          (SCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4849def : Pat<(f32 (fdiv (f32 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4850          (SCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4851def : Pat<(f64 (fdiv (f64 (any_sint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4852          (SCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4853
4854def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f16_i64:$scale)),
4855          (UCVTFSXHri GPR64:$Rn, fixedpoint_f16_i64:$scale)>;
4856def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f32_i64:$scale)),
4857          (UCVTFSXSri GPR64:$Rn, fixedpoint_f32_i64:$scale)>;
4858def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i64 GPR64:$Rn))), fixedpoint_f64_i64:$scale)),
4859          (UCVTFSXDri GPR64:$Rn, fixedpoint_f64_i64:$scale)>;
4860
4861def : Pat<(f16 (fdiv (f16 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
4862          (UCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
4863def : Pat<(f32 (fdiv (f32 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f32_i32:$scale)),
4864          (UCVTFSWSri GPR32:$Rn, fixedpoint_f32_i32:$scale)>;
4865def : Pat<(f64 (fdiv (f64 (any_uint_to_fp (i32 GPR32:$Rn))), fixedpoint_f64_i32:$scale)),
4866          (UCVTFSWDri GPR32:$Rn, fixedpoint_f64_i32:$scale)>;
4867
4868//===----------------------------------------------------------------------===//
4869// Unscaled integer to floating point conversion instruction.
4870//===----------------------------------------------------------------------===//
4871
4872defm FMOV : UnscaledConversion<"fmov">;
4873
4874// Add pseudo ops for FMOV 0 so we can mark them as isReMaterializable
4875let isReMaterializable = 1, isCodeGenOnly = 1, isAsCheapAsAMove = 1,
4876    Predicates = [HasFPARMv8] in {
4877def FMOVH0 : Pseudo<(outs FPR16:$Rd), (ins), [(set f16:$Rd, (fpimm0))]>,
4878    Sched<[WriteF]>;
4879def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
4880    Sched<[WriteF]>;
4881def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
4882    Sched<[WriteF]>;
4883}
4884
4885// Similarly add aliases
4886def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
4887    Requires<[HasFullFP16]>;
4888let Predicates = [HasFPARMv8] in {
4889def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
4890def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
4891}
4892
4893def : Pat<(bf16 fpimm0),
4894          (FMOVH0)>;
4895
4896// Pattern for FP16 and BF16 immediates
4897let Predicates = [HasFullFP16] in {
4898  def : Pat<(f16 fpimm:$in),
4899            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4900
4901  def : Pat<(bf16 fpimm:$in),
4902            (FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
4903}
4904
4905//===----------------------------------------------------------------------===//
4906// Floating point conversion instruction.
4907//===----------------------------------------------------------------------===//
4908
4909defm FCVT : FPConversion<"fcvt">;
4910// Helper to get bf16 into fp32.
4911def cvt_bf16_to_fp32 :
4912  OutPatFrag<(ops node:$Rn),
4913             (f32 (COPY_TO_REGCLASS
4914	         (i32 (UBFMWri
4915		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
4916		                           node:$Rn, hsub), GPR32)),
4917	           (i64 (i32shift_a (i64 16))),
4918                   (i64 (i32shift_b (i64 16))))),
4919	         FPR32))>;
4920// Pattern for bf16 -> fp32.
4921def : Pat<(f32 (any_fpextend (bf16 FPR16:$Rn))),
4922          (cvt_bf16_to_fp32 FPR16:$Rn)>;
4923// Pattern for bf16 -> fp64.
4924def : Pat<(f64 (any_fpextend (bf16 FPR16:$Rn))),
4925          (FCVTDSr (f32 (cvt_bf16_to_fp32 FPR16:$Rn)))>;
4926
4927//===----------------------------------------------------------------------===//
4928// Floating point single operand instructions.
4929//===----------------------------------------------------------------------===//
4930
4931defm FABS   : SingleOperandFPDataNoException<0b0001, "fabs", fabs>;
4932defm FMOV   : SingleOperandFPDataNoException<0b0000, "fmov">;
4933defm FNEG   : SingleOperandFPDataNoException<0b0010, "fneg", fneg>;
4934defm FRINTA : SingleOperandFPData<0b1100, "frinta", any_fround>;
4935defm FRINTI : SingleOperandFPData<0b1111, "frinti", any_fnearbyint>;
4936defm FRINTM : SingleOperandFPData<0b1010, "frintm", any_ffloor>;
4937defm FRINTN : SingleOperandFPData<0b1000, "frintn", any_froundeven>;
4938defm FRINTP : SingleOperandFPData<0b1001, "frintp", any_fceil>;
4939
4940defm FRINTX : SingleOperandFPData<0b1110, "frintx", any_frint>;
4941defm FRINTZ : SingleOperandFPData<0b1011, "frintz", any_ftrunc>;
4942
4943let SchedRW = [WriteFDiv] in {
4944defm FSQRT  : SingleOperandFPData<0b0011, "fsqrt", any_fsqrt>;
4945}
4946
4947let Predicates = [HasFRInt3264] in {
4948  defm FRINT32Z : FRIntNNT<0b00, "frint32z", int_aarch64_frint32z>;
4949  defm FRINT64Z : FRIntNNT<0b10, "frint64z", int_aarch64_frint64z>;
4950  defm FRINT32X : FRIntNNT<0b01, "frint32x", int_aarch64_frint32x>;
4951  defm FRINT64X : FRIntNNT<0b11, "frint64x", int_aarch64_frint64x>;
4952} // HasFRInt3264
4953
4954// Pattern to convert 1x64 vector intrinsics to equivalent scalar instructions
4955def : Pat<(v1f64 (int_aarch64_neon_frint32z (v1f64 FPR64:$Rn))),
4956          (FRINT32ZDr FPR64:$Rn)>;
4957def : Pat<(v1f64 (int_aarch64_neon_frint64z (v1f64 FPR64:$Rn))),
4958          (FRINT64ZDr FPR64:$Rn)>;
4959def : Pat<(v1f64 (int_aarch64_neon_frint32x (v1f64 FPR64:$Rn))),
4960          (FRINT32XDr FPR64:$Rn)>;
4961def : Pat<(v1f64 (int_aarch64_neon_frint64x (v1f64 FPR64:$Rn))),
4962          (FRINT64XDr FPR64:$Rn)>;
4963
4964// Emitting strict_lrint as two instructions is valid as any exceptions that
4965// occur will happen in exactly one of the instructions (e.g. if the input is
4966// not an integer the inexact exception will happen in the FRINTX but not then
4967// in the FCVTZS as the output of FRINTX is an integer).
4968let Predicates = [HasFullFP16] in {
4969  def : Pat<(i32 (any_lrint f16:$Rn)),
4970            (FCVTZSUWHr (FRINTXHr f16:$Rn))>;
4971  def : Pat<(i64 (any_lrint f16:$Rn)),
4972            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4973  def : Pat<(i64 (any_llrint f16:$Rn)),
4974            (FCVTZSUXHr (FRINTXHr f16:$Rn))>;
4975}
4976def : Pat<(i32 (any_lrint f32:$Rn)),
4977          (FCVTZSUWSr (FRINTXSr f32:$Rn))>;
4978def : Pat<(i32 (any_lrint f64:$Rn)),
4979          (FCVTZSUWDr (FRINTXDr f64:$Rn))>;
4980def : Pat<(i64 (any_lrint f32:$Rn)),
4981          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4982def : Pat<(i64 (any_lrint f64:$Rn)),
4983          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4984def : Pat<(i64 (any_llrint f32:$Rn)),
4985          (FCVTZSUXSr (FRINTXSr f32:$Rn))>;
4986def : Pat<(i64 (any_llrint f64:$Rn)),
4987          (FCVTZSUXDr (FRINTXDr f64:$Rn))>;
4988
4989//===----------------------------------------------------------------------===//
4990// Floating point two operand instructions.
4991//===----------------------------------------------------------------------===//
4992
4993defm FADD   : TwoOperandFPData<0b0010, "fadd", any_fadd>;
4994let SchedRW = [WriteFDiv] in {
4995defm FDIV   : TwoOperandFPData<0b0001, "fdiv", any_fdiv>;
4996}
4997defm FMAXNM : TwoOperandFPData<0b0110, "fmaxnm", any_fmaxnum>;
4998defm FMAX   : TwoOperandFPData<0b0100, "fmax", any_fmaximum>;
4999defm FMINNM : TwoOperandFPData<0b0111, "fminnm", any_fminnum>;
5000defm FMIN   : TwoOperandFPData<0b0101, "fmin", any_fminimum>;
5001let SchedRW = [WriteFMul] in {
5002defm FMUL   : TwoOperandFPData<0b0000, "fmul", any_fmul>;
5003defm FNMUL  : TwoOperandFPDataNeg<0b1000, "fnmul", any_fmul>;
5004}
5005defm FSUB   : TwoOperandFPData<0b0011, "fsub", any_fsub>;
5006
5007multiclass FMULScalarFromIndexedLane0Patterns<string inst,
5008                                              string inst_f16_suffix,
5009                                              string inst_f32_suffix,
5010                                              string inst_f64_suffix,
5011                                              SDPatternOperator OpNode,
5012                                              list<Predicate> preds = []> {
5013  let Predicates = !listconcat(preds, [HasFullFP16]) in {
5014  def : Pat<(f16 (OpNode (f16 FPR16:$Rn),
5015                         (f16 (vector_extract (v8f16 V128:$Rm), (i64 0))))),
5016            (!cast<Instruction>(inst # inst_f16_suffix)
5017              FPR16:$Rn, (f16 (EXTRACT_SUBREG V128:$Rm, hsub)))>;
5018  }
5019  let Predicates = preds in {
5020  def : Pat<(f32 (OpNode (f32 FPR32:$Rn),
5021                         (f32 (vector_extract (v4f32 V128:$Rm), (i64 0))))),
5022            (!cast<Instruction>(inst # inst_f32_suffix)
5023              FPR32:$Rn, (EXTRACT_SUBREG V128:$Rm, ssub))>;
5024  def : Pat<(f64 (OpNode (f64 FPR64:$Rn),
5025                         (f64 (vector_extract (v2f64 V128:$Rm), (i64 0))))),
5026            (!cast<Instruction>(inst # inst_f64_suffix)
5027              FPR64:$Rn, (EXTRACT_SUBREG V128:$Rm, dsub))>;
5028  }
5029}
5030
5031defm : FMULScalarFromIndexedLane0Patterns<"FMUL", "Hrr", "Srr", "Drr",
5032                                          any_fmul>;
5033
5034// Match reassociated forms of FNMUL.
5035def : Pat<(fmul (fneg FPR16:$a), (f16 FPR16:$b)),
5036          (FNMULHrr FPR16:$a, FPR16:$b)>,
5037          Requires<[HasFullFP16]>;
5038def : Pat<(fmul (fneg FPR32:$a), (f32 FPR32:$b)),
5039          (FNMULSrr FPR32:$a, FPR32:$b)>;
5040def : Pat<(fmul (fneg FPR64:$a), (f64 FPR64:$b)),
5041          (FNMULDrr FPR64:$a, FPR64:$b)>;
5042
5043def : Pat<(v1f64 (fmaximum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5044          (FMAXDrr FPR64:$Rn, FPR64:$Rm)>;
5045def : Pat<(v1f64 (fminimum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5046          (FMINDrr FPR64:$Rn, FPR64:$Rm)>;
5047def : Pat<(v1f64 (fmaxnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5048          (FMAXNMDrr FPR64:$Rn, FPR64:$Rm)>;
5049def : Pat<(v1f64 (fminnum (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5050          (FMINNMDrr FPR64:$Rn, FPR64:$Rm)>;
5051
5052//===----------------------------------------------------------------------===//
5053// Floating point three operand instructions.
5054//===----------------------------------------------------------------------===//
5055
5056defm FMADD  : ThreeOperandFPData<0, 0, "fmadd", any_fma>;
5057defm FMSUB  : ThreeOperandFPData<0, 1, "fmsub",
5058     TriOpFrag<(any_fma node:$LHS, (fneg node:$MHS), node:$RHS)> >;
5059defm FNMADD : ThreeOperandFPData<1, 0, "fnmadd",
5060     TriOpFrag<(fneg (any_fma node:$LHS, node:$MHS, node:$RHS))> >;
5061defm FNMSUB : ThreeOperandFPData<1, 1, "fnmsub",
5062     TriOpFrag<(any_fma node:$LHS, node:$MHS, (fneg node:$RHS))> >;
5063
5064// The following def pats catch the case where the LHS of an FMA is negated.
5065// The TriOpFrag above catches the case where the middle operand is negated.
5066
5067// N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike
5068// the NEON variant.
5069
5070// Here we handle first -(a + b*c) for FNMADD:
5071
5072let Predicates = [HasNEON, HasFullFP16] in
5073def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)),
5074          (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
5075
5076def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)),
5077          (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
5078
5079def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)),
5080          (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5081
5082// Now it's time for "(-a) + (-b)*c"
5083
5084let Predicates = [HasNEON, HasFullFP16] in
5085def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))),
5086          (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>;
5087
5088def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))),
5089          (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>;
5090
5091def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))),
5092          (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>;
5093
5094//===----------------------------------------------------------------------===//
5095// Floating point comparison instructions.
5096//===----------------------------------------------------------------------===//
5097
5098defm FCMPE : FPComparison<1, "fcmpe", AArch64strict_fcmpe>;
5099defm FCMP  : FPComparison<0, "fcmp", AArch64any_fcmp>;
5100
5101//===----------------------------------------------------------------------===//
5102// Floating point conditional comparison instructions.
5103//===----------------------------------------------------------------------===//
5104
5105defm FCCMPE : FPCondComparison<1, "fccmpe">;
5106defm FCCMP  : FPCondComparison<0, "fccmp", AArch64fccmp>;
5107
5108//===----------------------------------------------------------------------===//
5109// Floating point conditional select instruction.
5110//===----------------------------------------------------------------------===//
5111
5112defm FCSEL : FPCondSelect<"fcsel">;
5113
5114let Predicates = [HasFullFP16] in
5115def : Pat<(bf16 (AArch64csel (bf16 FPR16:$Rn), (bf16 FPR16:$Rm), (i32 imm:$cond), NZCV)),
5116          (FCSELHrrr FPR16:$Rn, FPR16:$Rm, imm:$cond)>;
5117
5118// CSEL instructions providing f128 types need to be handled by a
5119// pseudo-instruction since the eventual code will need to introduce basic
5120// blocks and control flow.
5121let Predicates = [HasFPARMv8] in
5122def F128CSEL : Pseudo<(outs FPR128:$Rd),
5123                      (ins FPR128:$Rn, FPR128:$Rm, ccode:$cond),
5124                      [(set (f128 FPR128:$Rd),
5125                            (AArch64csel FPR128:$Rn, FPR128:$Rm,
5126                                       (i32 imm:$cond), NZCV))]> {
5127  let Uses = [NZCV];
5128  let usesCustomInserter = 1;
5129  let hasNoSchedulingInfo = 1;
5130}
5131
5132//===----------------------------------------------------------------------===//
5133// Instructions used for emitting unwind opcodes on ARM64 Windows.
5134//===----------------------------------------------------------------------===//
5135let isPseudo = 1 in {
5136  def SEH_StackAlloc : Pseudo<(outs), (ins i32imm:$size), []>, Sched<[]>;
5137  def SEH_SaveFPLR : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
5138  def SEH_SaveFPLR_X : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
5139  def SEH_SaveReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5140  def SEH_SaveReg_X : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5141  def SEH_SaveRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5142  def SEH_SaveRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5143  def SEH_SaveFReg : Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5144  def SEH_SaveFReg_X :  Pseudo<(outs), (ins i32imm:$reg, i32imm:$offs), []>, Sched<[]>;
5145  def SEH_SaveFRegP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5146  def SEH_SaveFRegP_X : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5147  def SEH_SetFP : Pseudo<(outs), (ins), []>, Sched<[]>;
5148  def SEH_AddFP : Pseudo<(outs), (ins i32imm:$offs), []>, Sched<[]>;
5149  def SEH_Nop : Pseudo<(outs), (ins), []>, Sched<[]>;
5150  def SEH_PrologEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
5151  def SEH_EpilogStart : Pseudo<(outs), (ins), []>, Sched<[]>;
5152  def SEH_EpilogEnd : Pseudo<(outs), (ins), []>, Sched<[]>;
5153  def SEH_PACSignLR : Pseudo<(outs), (ins), []>, Sched<[]>;
5154  def SEH_SaveAnyRegQP : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5155  def SEH_SaveAnyRegQPX : Pseudo<(outs), (ins i32imm:$reg0, i32imm:$reg1, i32imm:$offs), []>, Sched<[]>;
5156}
5157
5158// Pseudo instructions for Windows EH
5159//===----------------------------------------------------------------------===//
5160let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
5161    isCodeGenOnly = 1, isReturn = 1, isEHScopeReturn = 1, isPseudo = 1 in {
5162   def CLEANUPRET : Pseudo<(outs), (ins), [(cleanupret bb)]>, Sched<[]>;
5163   let usesCustomInserter = 1 in
5164     def CATCHRET : Pseudo<(outs), (ins am_brcond:$dst, am_brcond:$src), [(catchret bb:$dst, bb:$src)]>,
5165                    Sched<[]>;
5166}
5167
5168// Pseudo instructions for homogeneous prolog/epilog
5169let isPseudo = 1 in {
5170  // Save CSRs in order, {FPOffset}
5171  def HOM_Prolog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
5172  // Restore CSRs in order
5173  def HOM_Epilog : Pseudo<(outs), (ins variable_ops), []>, Sched<[]>;
5174}
5175
5176//===----------------------------------------------------------------------===//
5177// Floating point immediate move.
5178//===----------------------------------------------------------------------===//
5179
5180let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
5181defm FMOV : FPMoveImmediate<"fmov">;
5182}
5183
5184let Predicates = [HasFullFP16] in {
5185  def : Pat<(bf16 fpimmbf16:$in),
5186            (FMOVHi (fpimm16XForm bf16:$in))>;
5187}
5188
5189//===----------------------------------------------------------------------===//
5190// Advanced SIMD two vector instructions.
5191//===----------------------------------------------------------------------===//
5192
5193defm UABDL   : SIMDLongThreeVectorBHSabdl<1, 0b0111, "uabdl",
5194                                          AArch64uabd>;
5195// Match UABDL in log2-shuffle patterns.
5196def : Pat<(abs (v8i16 (sub (zext (v8i8 V64:$opA)),
5197                           (zext (v8i8 V64:$opB))))),
5198          (UABDLv8i8_v8i16 V64:$opA, V64:$opB)>;
5199def : Pat<(abs (v8i16 (sub (zext (extract_high_v16i8 (v16i8 V128:$opA))),
5200                           (zext (extract_high_v16i8 (v16i8 V128:$opB)))))),
5201          (UABDLv16i8_v8i16 V128:$opA, V128:$opB)>;
5202def : Pat<(abs (v4i32 (sub (zext (v4i16 V64:$opA)),
5203                           (zext (v4i16 V64:$opB))))),
5204          (UABDLv4i16_v4i32 V64:$opA, V64:$opB)>;
5205def : Pat<(abs (v4i32 (sub (zext (extract_high_v8i16 (v8i16 V128:$opA))),
5206                           (zext (extract_high_v8i16 (v8i16 V128:$opB)))))),
5207          (UABDLv8i16_v4i32 V128:$opA, V128:$opB)>;
5208def : Pat<(abs (v2i64 (sub (zext (v2i32 V64:$opA)),
5209                           (zext (v2i32 V64:$opB))))),
5210          (UABDLv2i32_v2i64 V64:$opA, V64:$opB)>;
5211def : Pat<(abs (v2i64 (sub (zext (extract_high_v4i32 (v4i32 V128:$opA))),
5212                           (zext (extract_high_v4i32 (v4i32 V128:$opB)))))),
5213          (UABDLv4i32_v2i64 V128:$opA, V128:$opB)>;
5214
5215defm ABS    : SIMDTwoVectorBHSD<0, 0b01011, "abs", abs>;
5216defm CLS    : SIMDTwoVectorBHS<0, 0b00100, "cls", int_aarch64_neon_cls>;
5217defm CLZ    : SIMDTwoVectorBHS<1, 0b00100, "clz", ctlz>;
5218defm CMEQ   : SIMDCmpTwoVector<0, 0b01001, "cmeq", AArch64cmeqz>;
5219defm CMGE   : SIMDCmpTwoVector<1, 0b01000, "cmge", AArch64cmgez>;
5220defm CMGT   : SIMDCmpTwoVector<0, 0b01000, "cmgt", AArch64cmgtz>;
5221defm CMLE   : SIMDCmpTwoVector<1, 0b01001, "cmle", AArch64cmlez>;
5222defm CMLT   : SIMDCmpTwoVector<0, 0b01010, "cmlt", AArch64cmltz>;
5223defm CNT    : SIMDTwoVectorB<0, 0b00, 0b00101, "cnt", ctpop>;
5224defm FABS   : SIMDTwoVectorFPNoException<0, 1, 0b01111, "fabs", fabs>;
5225
5226def : Pat<(v8i8 (AArch64vashr (v8i8 V64:$Rn), (i32 7))),
5227          (CMLTv8i8rz V64:$Rn)>;
5228def : Pat<(v4i16 (AArch64vashr (v4i16 V64:$Rn), (i32 15))),
5229          (CMLTv4i16rz V64:$Rn)>;
5230def : Pat<(v2i32 (AArch64vashr (v2i32 V64:$Rn), (i32 31))),
5231          (CMLTv2i32rz V64:$Rn)>;
5232def : Pat<(v16i8 (AArch64vashr (v16i8 V128:$Rn), (i32 7))),
5233          (CMLTv16i8rz V128:$Rn)>;
5234def : Pat<(v8i16 (AArch64vashr (v8i16 V128:$Rn), (i32 15))),
5235          (CMLTv8i16rz V128:$Rn)>;
5236def : Pat<(v4i32 (AArch64vashr (v4i32 V128:$Rn), (i32 31))),
5237          (CMLTv4i32rz V128:$Rn)>;
5238def : Pat<(v2i64 (AArch64vashr (v2i64 V128:$Rn), (i32 63))),
5239          (CMLTv2i64rz V128:$Rn)>;
5240
5241defm FCMEQ  : SIMDFPCmpTwoVector<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
5242defm FCMGE  : SIMDFPCmpTwoVector<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
5243defm FCMGT  : SIMDFPCmpTwoVector<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
5244defm FCMLE  : SIMDFPCmpTwoVector<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
5245defm FCMLT  : SIMDFPCmpTwoVector<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
5246defm FCVTAS : SIMDTwoVectorFPToInt<0,0,0b11100, "fcvtas",int_aarch64_neon_fcvtas>;
5247defm FCVTAU : SIMDTwoVectorFPToInt<1,0,0b11100, "fcvtau",int_aarch64_neon_fcvtau>;
5248defm FCVTL  : SIMDFPWidenTwoVector<0, 0, 0b10111, "fcvtl">;
5249def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (v4i16 V64:$Rn))),
5250          (FCVTLv4i16 V64:$Rn)>;
5251def : Pat<(v4f32 (int_aarch64_neon_vcvthf2fp (extract_subvector (v8i16 V128:$Rn),
5252                                                                (i64 4)))),
5253          (FCVTLv8i16 V128:$Rn)>;
5254def : Pat<(v2f64 (any_fpextend (v2f32 V64:$Rn))),
5255          (FCVTLv2i32 V64:$Rn)>;
5256def : Pat<(v2f64 (any_fpextend (v2f32 (extract_high_v4f32 (v4f32 V128:$Rn))))),
5257          (FCVTLv4i32 V128:$Rn)>;
5258def : Pat<(v4f32 (any_fpextend (v4f16 V64:$Rn))),
5259          (FCVTLv4i16 V64:$Rn)>;
5260def : Pat<(v4f32 (any_fpextend (v4f16 (extract_high_v8f16 (v8f16 V128:$Rn))))),
5261          (FCVTLv8i16 V128:$Rn)>;
5262
5263defm FCVTMS : SIMDTwoVectorFPToInt<0,0,0b11011, "fcvtms",int_aarch64_neon_fcvtms>;
5264defm FCVTMU : SIMDTwoVectorFPToInt<1,0,0b11011, "fcvtmu",int_aarch64_neon_fcvtmu>;
5265defm FCVTNS : SIMDTwoVectorFPToInt<0,0,0b11010, "fcvtns",int_aarch64_neon_fcvtns>;
5266defm FCVTNU : SIMDTwoVectorFPToInt<1,0,0b11010, "fcvtnu",int_aarch64_neon_fcvtnu>;
5267defm FCVTN  : SIMDFPNarrowTwoVector<0, 0, 0b10110, "fcvtn">;
5268def : Pat<(v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn))),
5269          (FCVTNv4i16 V128:$Rn)>;
5270def : Pat<(concat_vectors V64:$Rd,
5271                          (v4i16 (int_aarch64_neon_vcvtfp2hf (v4f32 V128:$Rn)))),
5272          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5273def : Pat<(v2f32 (any_fpround (v2f64 V128:$Rn))),
5274          (FCVTNv2i32 V128:$Rn)>;
5275def : Pat<(v4f16 (any_fpround (v4f32 V128:$Rn))),
5276          (FCVTNv4i16 V128:$Rn)>;
5277def : Pat<(concat_vectors V64:$Rd, (v2f32 (any_fpround (v2f64 V128:$Rn)))),
5278          (FCVTNv4i32 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5279def : Pat<(concat_vectors V64:$Rd, (v4f16 (any_fpround (v4f32 V128:$Rn)))),
5280          (FCVTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>;
5281defm FCVTPS : SIMDTwoVectorFPToInt<0,1,0b11010, "fcvtps",int_aarch64_neon_fcvtps>;
5282defm FCVTPU : SIMDTwoVectorFPToInt<1,1,0b11010, "fcvtpu",int_aarch64_neon_fcvtpu>;
5283defm FCVTXN : SIMDFPInexactCvtTwoVector<1, 0, 0b10110, "fcvtxn",
5284                                        AArch64fcvtxnv>;
5285defm FCVTZS : SIMDTwoVectorFPToInt<0, 1, 0b11011, "fcvtzs", any_fp_to_sint>;
5286defm FCVTZU : SIMDTwoVectorFPToInt<1, 1, 0b11011, "fcvtzu", any_fp_to_uint>;
5287
5288// AArch64's FCVT instructions saturate when out of range.
5289multiclass SIMDTwoVectorFPToIntSatPats<SDNode to_int_sat, string INST> {
5290  let Predicates = [HasFullFP16] in {
5291  def : Pat<(v4i16 (to_int_sat v4f16:$Rn, i16)),
5292            (!cast<Instruction>(INST # v4f16) v4f16:$Rn)>;
5293  def : Pat<(v8i16 (to_int_sat v8f16:$Rn, i16)),
5294            (!cast<Instruction>(INST # v8f16) v8f16:$Rn)>;
5295  }
5296  def : Pat<(v2i32 (to_int_sat v2f32:$Rn, i32)),
5297            (!cast<Instruction>(INST # v2f32) v2f32:$Rn)>;
5298  def : Pat<(v4i32 (to_int_sat v4f32:$Rn, i32)),
5299            (!cast<Instruction>(INST # v4f32) v4f32:$Rn)>;
5300  def : Pat<(v2i64 (to_int_sat v2f64:$Rn, i64)),
5301            (!cast<Instruction>(INST # v2f64) v2f64:$Rn)>;
5302}
5303defm : SIMDTwoVectorFPToIntSatPats<fp_to_sint_sat, "FCVTZS">;
5304defm : SIMDTwoVectorFPToIntSatPats<fp_to_uint_sat, "FCVTZU">;
5305
5306def : Pat<(v4i16 (int_aarch64_neon_fcvtzs v4f16:$Rn)), (FCVTZSv4f16 $Rn)>;
5307def : Pat<(v8i16 (int_aarch64_neon_fcvtzs v8f16:$Rn)), (FCVTZSv8f16 $Rn)>;
5308def : Pat<(v2i32 (int_aarch64_neon_fcvtzs v2f32:$Rn)), (FCVTZSv2f32 $Rn)>;
5309def : Pat<(v4i32 (int_aarch64_neon_fcvtzs v4f32:$Rn)), (FCVTZSv4f32 $Rn)>;
5310def : Pat<(v2i64 (int_aarch64_neon_fcvtzs v2f64:$Rn)), (FCVTZSv2f64 $Rn)>;
5311
5312def : Pat<(v4i16 (int_aarch64_neon_fcvtzu v4f16:$Rn)), (FCVTZUv4f16 $Rn)>;
5313def : Pat<(v8i16 (int_aarch64_neon_fcvtzu v8f16:$Rn)), (FCVTZUv8f16 $Rn)>;
5314def : Pat<(v2i32 (int_aarch64_neon_fcvtzu v2f32:$Rn)), (FCVTZUv2f32 $Rn)>;
5315def : Pat<(v4i32 (int_aarch64_neon_fcvtzu v4f32:$Rn)), (FCVTZUv4f32 $Rn)>;
5316def : Pat<(v2i64 (int_aarch64_neon_fcvtzu v2f64:$Rn)), (FCVTZUv2f64 $Rn)>;
5317
5318defm FNEG   : SIMDTwoVectorFPNoException<1, 1, 0b01111, "fneg", fneg>;
5319defm FRECPE : SIMDTwoVectorFP<0, 1, 0b11101, "frecpe", int_aarch64_neon_frecpe>;
5320defm FRINTA : SIMDTwoVectorFP<1, 0, 0b11000, "frinta", any_fround>;
5321defm FRINTI : SIMDTwoVectorFP<1, 1, 0b11001, "frinti", any_fnearbyint>;
5322defm FRINTM : SIMDTwoVectorFP<0, 0, 0b11001, "frintm", any_ffloor>;
5323defm FRINTN : SIMDTwoVectorFP<0, 0, 0b11000, "frintn", any_froundeven>;
5324defm FRINTP : SIMDTwoVectorFP<0, 1, 0b11000, "frintp", any_fceil>;
5325defm FRINTX : SIMDTwoVectorFP<1, 0, 0b11001, "frintx", any_frint>;
5326defm FRINTZ : SIMDTwoVectorFP<0, 1, 0b11001, "frintz", any_ftrunc>;
5327
5328let Predicates = [HasFRInt3264] in {
5329  defm FRINT32Z : FRIntNNTVector<0, 0, "frint32z", int_aarch64_neon_frint32z>;
5330  defm FRINT64Z : FRIntNNTVector<0, 1, "frint64z", int_aarch64_neon_frint64z>;
5331  defm FRINT32X : FRIntNNTVector<1, 0, "frint32x", int_aarch64_neon_frint32x>;
5332  defm FRINT64X : FRIntNNTVector<1, 1, "frint64x", int_aarch64_neon_frint64x>;
5333} // HasFRInt3264
5334
5335defm FRSQRTE: SIMDTwoVectorFP<1, 1, 0b11101, "frsqrte", int_aarch64_neon_frsqrte>;
5336defm FSQRT  : SIMDTwoVectorFP<1, 1, 0b11111, "fsqrt", any_fsqrt>;
5337defm NEG    : SIMDTwoVectorBHSD<1, 0b01011, "neg",
5338                               UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
5339defm NOT    : SIMDTwoVectorB<1, 0b00, 0b00101, "not", vnot>;
5340// Aliases for MVN -> NOT.
5341let Predicates = [HasNEON] in {
5342def : InstAlias<"mvn{ $Vd.8b, $Vn.8b|.8b $Vd, $Vn}",
5343                (NOTv8i8 V64:$Vd, V64:$Vn)>;
5344def : InstAlias<"mvn{ $Vd.16b, $Vn.16b|.16b $Vd, $Vn}",
5345                (NOTv16i8 V128:$Vd, V128:$Vn)>;
5346}
5347
5348def : Pat<(vnot (v4i16 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5349def : Pat<(vnot (v8i16 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5350def : Pat<(vnot (v2i32 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5351def : Pat<(vnot (v4i32 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5352def : Pat<(vnot (v1i64 V64:$Rn)),  (NOTv8i8  V64:$Rn)>;
5353def : Pat<(vnot (v2i64 V128:$Rn)), (NOTv16i8 V128:$Rn)>;
5354
5355defm RBIT   : SIMDTwoVectorB<1, 0b01, 0b00101, "rbit", bitreverse>;
5356defm REV16  : SIMDTwoVectorB<0, 0b00, 0b00001, "rev16", AArch64rev16>;
5357defm REV32  : SIMDTwoVectorBH<1, 0b00000, "rev32", AArch64rev32>;
5358defm REV64  : SIMDTwoVectorBHS<0, 0b00000, "rev64", AArch64rev64>;
5359defm SADALP : SIMDLongTwoVectorTied<0, 0b00110, "sadalp",
5360       BinOpFrag<(add node:$LHS, (AArch64saddlp node:$RHS))> >;
5361defm SADDLP : SIMDLongTwoVector<0, 0b00010, "saddlp", AArch64saddlp>;
5362defm SCVTF  : SIMDTwoVectorIntToFP<0, 0, 0b11101, "scvtf", any_sint_to_fp>;
5363defm SHLL   : SIMDVectorLShiftLongBySizeBHS;
5364defm SQABS  : SIMDTwoVectorBHSD<0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
5365defm SQNEG  : SIMDTwoVectorBHSD<1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
5366defm SQXTN  : SIMDMixedTwoVector<0, 0b10100, "sqxtn", int_aarch64_neon_sqxtn>;
5367defm SQXTUN : SIMDMixedTwoVector<1, 0b10010, "sqxtun", int_aarch64_neon_sqxtun>;
5368defm SUQADD : SIMDTwoVectorBHSDTied<0, 0b00011, "suqadd",int_aarch64_neon_suqadd>;
5369defm UADALP : SIMDLongTwoVectorTied<1, 0b00110, "uadalp",
5370       BinOpFrag<(add node:$LHS, (AArch64uaddlp node:$RHS))> >;
5371defm UADDLP : SIMDLongTwoVector<1, 0b00010, "uaddlp", AArch64uaddlp>;
5372defm UCVTF  : SIMDTwoVectorIntToFP<1, 0, 0b11101, "ucvtf", any_uint_to_fp>;
5373defm UQXTN  : SIMDMixedTwoVector<1, 0b10100, "uqxtn", int_aarch64_neon_uqxtn>;
5374defm URECPE : SIMDTwoVectorS<0, 1, 0b11100, "urecpe", int_aarch64_neon_urecpe>;
5375defm URSQRTE: SIMDTwoVectorS<1, 1, 0b11100, "ursqrte", int_aarch64_neon_ursqrte>;
5376defm USQADD : SIMDTwoVectorBHSDTied<1, 0b00011, "usqadd",int_aarch64_neon_usqadd>;
5377defm XTN    : SIMDMixedTwoVector<0, 0b10010, "xtn", trunc>;
5378
5379def : Pat<(v4f16  (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5380def : Pat<(v4f16  (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5381def : Pat<(v4bf16 (AArch64rev32 V64:$Rn)),  (REV32v4i16 V64:$Rn)>;
5382def : Pat<(v4bf16 (AArch64rev64 V64:$Rn)),  (REV64v4i16 V64:$Rn)>;
5383def : Pat<(v8f16  (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5384def : Pat<(v8f16  (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5385def : Pat<(v8bf16 (AArch64rev32 V128:$Rn)), (REV32v8i16 V128:$Rn)>;
5386def : Pat<(v8bf16 (AArch64rev64 V128:$Rn)), (REV64v8i16 V128:$Rn)>;
5387def : Pat<(v2f32  (AArch64rev64 V64:$Rn)),  (REV64v2i32 V64:$Rn)>;
5388def : Pat<(v4f32  (AArch64rev64 V128:$Rn)), (REV64v4i32 V128:$Rn)>;
5389
5390// Patterns for vector long shift (by element width). These need to match all
5391// three of zext, sext and anyext so it's easier to pull the patterns out of the
5392// definition.
5393multiclass SIMDVectorLShiftLongBySizeBHSPats<SDPatternOperator ext> {
5394  def : Pat<(AArch64vshl (v8i16 (ext (v8i8 V64:$Rn))), (i32 8)),
5395            (SHLLv8i8 V64:$Rn)>;
5396  def : Pat<(AArch64vshl (v8i16 (ext (extract_high_v16i8 (v16i8 V128:$Rn)))), (i32 8)),
5397            (SHLLv16i8 V128:$Rn)>;
5398  def : Pat<(AArch64vshl (v4i32 (ext (v4i16 V64:$Rn))), (i32 16)),
5399            (SHLLv4i16 V64:$Rn)>;
5400  def : Pat<(AArch64vshl (v4i32 (ext (extract_high_v8i16 (v8i16 V128:$Rn)))), (i32 16)),
5401            (SHLLv8i16 V128:$Rn)>;
5402  def : Pat<(AArch64vshl (v2i64 (ext (v2i32 V64:$Rn))), (i32 32)),
5403            (SHLLv2i32 V64:$Rn)>;
5404  def : Pat<(AArch64vshl (v2i64 (ext (extract_high_v4i32 (v4i32 V128:$Rn)))), (i32 32)),
5405            (SHLLv4i32 V128:$Rn)>;
5406}
5407
5408defm : SIMDVectorLShiftLongBySizeBHSPats<anyext>;
5409defm : SIMDVectorLShiftLongBySizeBHSPats<zext>;
5410defm : SIMDVectorLShiftLongBySizeBHSPats<sext>;
5411
5412// Constant vector values, used in the S/UQXTN patterns below.
5413def VImmFF:   PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 85))))>;
5414def VImmFFFF: PatLeaf<(AArch64NvCast (v2i64 (AArch64movi_edit (i32 51))))>;
5415def VImm7F:   PatLeaf<(AArch64movi_shift (i32 127), (i32 0))>;
5416def VImm80:   PatLeaf<(AArch64mvni_shift (i32 127), (i32 0))>;
5417def VImm7FFF: PatLeaf<(AArch64movi_msl (i32 127), (i32 264))>;
5418def VImm8000: PatLeaf<(AArch64mvni_msl (i32 127), (i32 264))>;
5419
5420// trunc(umin(X, 255)) -> UQXTRN v8i8
5421def : Pat<(v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))),
5422          (UQXTNv8i8 V128:$Vn)>;
5423// trunc(umin(X, 65535)) -> UQXTRN v4i16
5424def : Pat<(v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))),
5425          (UQXTNv4i16 V128:$Vn)>;
5426// trunc(smin(smax(X, -128), 128)) -> SQXTRN
5427//  with reversed min/max
5428def : Pat<(v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5429                             (v8i16 VImm7F)))),
5430          (SQXTNv8i8 V128:$Vn)>;
5431def : Pat<(v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5432                             (v8i16 VImm80)))),
5433          (SQXTNv8i8 V128:$Vn)>;
5434// trunc(smin(smax(X, -32768), 32767)) -> SQXTRN
5435//  with reversed min/max
5436def : Pat<(v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5437                              (v4i32 VImm7FFF)))),
5438          (SQXTNv4i16 V128:$Vn)>;
5439def : Pat<(v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5440                              (v4i32 VImm8000)))),
5441          (SQXTNv4i16 V128:$Vn)>;
5442
5443// concat_vectors(Vd, trunc(umin(X, 255))) -> UQXTRN(Vd, Vn)
5444def : Pat<(v16i8 (concat_vectors
5445                 (v8i8 V64:$Vd),
5446                 (v8i8 (trunc (umin (v8i16 V128:$Vn), (v8i16 VImmFF)))))),
5447          (UQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5448// concat_vectors(Vd, trunc(umin(X, 65535))) -> UQXTRN(Vd, Vn)
5449def : Pat<(v8i16 (concat_vectors
5450                 (v4i16 V64:$Vd),
5451                 (v4i16 (trunc (umin (v4i32 V128:$Vn), (v4i32 VImmFFFF)))))),
5452          (UQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5453
5454// concat_vectors(Vd, trunc(smin(smax Vm, -128), 127) ~> SQXTN2(Vd, Vn)
5455// with reversed min/max
5456def : Pat<(v16i8 (concat_vectors
5457                 (v8i8 V64:$Vd),
5458                 (v8i8 (trunc (smin (smax (v8i16 V128:$Vn), (v8i16 VImm80)),
5459                                          (v8i16 VImm7F)))))),
5460          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5461def : Pat<(v16i8 (concat_vectors
5462                 (v8i8 V64:$Vd),
5463                 (v8i8 (trunc (smax (smin (v8i16 V128:$Vn), (v8i16 VImm7F)),
5464                                          (v8i16 VImm80)))))),
5465          (SQXTNv16i8 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5466
5467// concat_vectors(Vd, trunc(smin(smax Vm, -32768), 32767) ~> SQXTN2(Vd, Vn)
5468// with reversed min/max
5469def : Pat<(v8i16 (concat_vectors
5470                 (v4i16 V64:$Vd),
5471                 (v4i16 (trunc (smin (smax (v4i32 V128:$Vn), (v4i32 VImm8000)),
5472                                           (v4i32 VImm7FFF)))))),
5473          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5474def : Pat<(v8i16 (concat_vectors
5475                 (v4i16 V64:$Vd),
5476                 (v4i16 (trunc (smax (smin (v4i32 V128:$Vn), (v4i32 VImm7FFF)),
5477                                           (v4i32 VImm8000)))))),
5478          (SQXTNv8i16 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn)>;
5479
5480// Select BSWAP vector instructions into REV instructions
5481def : Pat<(v4i16 (bswap (v4i16 V64:$Rn))),
5482          (v4i16 (REV16v8i8 (v4i16 V64:$Rn)))>;
5483def : Pat<(v8i16 (bswap (v8i16 V128:$Rn))),
5484          (v8i16 (REV16v16i8 (v8i16 V128:$Rn)))>;
5485def : Pat<(v2i32 (bswap (v2i32 V64:$Rn))),
5486          (v2i32 (REV32v8i8 (v2i32 V64:$Rn)))>;
5487def : Pat<(v4i32 (bswap (v4i32 V128:$Rn))),
5488          (v4i32 (REV32v16i8 (v4i32 V128:$Rn)))>;
5489def : Pat<(v2i64 (bswap (v2i64 V128:$Rn))),
5490          (v2i64 (REV64v16i8 (v2i64 V128:$Rn)))>;
5491
5492//===----------------------------------------------------------------------===//
5493// Advanced SIMD three vector instructions.
5494//===----------------------------------------------------------------------===//
5495
5496defm ADD     : SIMDThreeSameVector<0, 0b10000, "add", add>;
5497defm ADDP    : SIMDThreeSameVector<0, 0b10111, "addp", AArch64addp>;
5498defm CMEQ    : SIMDThreeSameVector<1, 0b10001, "cmeq", AArch64cmeq>;
5499defm CMGE    : SIMDThreeSameVector<0, 0b00111, "cmge", AArch64cmge>;
5500defm CMGT    : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>;
5501defm CMHI    : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>;
5502defm CMHS    : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>;
5503defm CMTST   : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>;
5504foreach VT = [ v8i8, v16i8, v4i16, v8i16, v2i32, v4i32, v2i64 ] in {
5505def : Pat<(vnot (AArch64cmeqz VT:$Rn)), (!cast<Instruction>("CMTST"#VT) VT:$Rn, VT:$Rn)>;
5506}
5507defm FABD    : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>;
5508let Predicates = [HasNEON] in {
5509foreach VT = [ v2f32, v4f32, v2f64 ] in
5510def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5511}
5512let Predicates = [HasNEON, HasFullFP16] in {
5513foreach VT = [ v4f16, v8f16 ] in
5514def : Pat<(fabs (fsub VT:$Rn, VT:$Rm)), (!cast<Instruction>("FABD"#VT) VT:$Rn, VT:$Rm)>;
5515}
5516defm FACGE   : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",AArch64facge>;
5517defm FACGT   : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",AArch64facgt>;
5518defm FADDP   : SIMDThreeSameVectorFP<1,0,0b010,"faddp", AArch64faddp>;
5519defm FADD    : SIMDThreeSameVectorFP<0,0,0b010,"fadd", any_fadd>;
5520defm FCMEQ   : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5521defm FCMGE   : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5522defm FCMGT   : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5523defm FDIV    : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", any_fdiv>;
5524defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>;
5525defm FMAXNM  : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", any_fmaxnum>;
5526defm FMAXP   : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>;
5527defm FMAX    : SIMDThreeSameVectorFP<0,0,0b110,"fmax", any_fmaximum>;
5528defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>;
5529defm FMINNM  : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", any_fminnum>;
5530defm FMINP   : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>;
5531defm FMIN    : SIMDThreeSameVectorFP<0,1,0b110,"fmin", any_fminimum>;
5532
5533// NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the
5534// instruction expects the addend first, while the fma intrinsic puts it last.
5535defm FMLA     : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla",
5536            TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
5537defm FMLS     : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls",
5538            TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
5539
5540defm FMULX    : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>;
5541defm FMUL     : SIMDThreeSameVectorFP<1,0,0b011,"fmul", any_fmul>;
5542defm FRECPS   : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>;
5543defm FRSQRTS  : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>;
5544defm FSUB     : SIMDThreeSameVectorFP<0,1,0b010,"fsub", any_fsub>;
5545
5546// MLA and MLS are generated in MachineCombine
5547defm MLA      : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", null_frag>;
5548defm MLS      : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", null_frag>;
5549
5550defm MUL      : SIMDThreeSameVectorBHS<0, 0b10011, "mul", mul>;
5551defm PMUL     : SIMDThreeSameVectorB<1, 0b10011, "pmul", int_aarch64_neon_pmul>;
5552defm SABA     : SIMDThreeSameVectorBHSTied<0, 0b01111, "saba",
5553      TriOpFrag<(add node:$LHS, (AArch64sabd node:$MHS, node:$RHS))> >;
5554defm SABD     : SIMDThreeSameVectorBHS<0,0b01110,"sabd", AArch64sabd>;
5555defm SHADD    : SIMDThreeSameVectorBHS<0,0b00000,"shadd", avgfloors>;
5556defm SHSUB    : SIMDThreeSameVectorBHS<0,0b00100,"shsub", int_aarch64_neon_shsub>;
5557defm SMAXP    : SIMDThreeSameVectorBHS<0,0b10100,"smaxp", int_aarch64_neon_smaxp>;
5558defm SMAX     : SIMDThreeSameVectorBHS<0,0b01100,"smax", smax>;
5559defm SMINP    : SIMDThreeSameVectorBHS<0,0b10101,"sminp", int_aarch64_neon_sminp>;
5560defm SMIN     : SIMDThreeSameVectorBHS<0,0b01101,"smin", smin>;
5561defm SQADD    : SIMDThreeSameVector<0,0b00001,"sqadd", int_aarch64_neon_sqadd>;
5562defm SQDMULH  : SIMDThreeSameVectorHS<0,0b10110,"sqdmulh",int_aarch64_neon_sqdmulh>;
5563defm SQRDMULH : SIMDThreeSameVectorHS<1,0b10110,"sqrdmulh",int_aarch64_neon_sqrdmulh>;
5564defm SQRSHL   : SIMDThreeSameVector<0,0b01011,"sqrshl", int_aarch64_neon_sqrshl>;
5565defm SQSHL    : SIMDThreeSameVector<0,0b01001,"sqshl", int_aarch64_neon_sqshl>;
5566defm SQSUB    : SIMDThreeSameVector<0,0b00101,"sqsub", int_aarch64_neon_sqsub>;
5567defm SRHADD   : SIMDThreeSameVectorBHS<0,0b00010,"srhadd", avgceils>;
5568defm SRSHL    : SIMDThreeSameVector<0,0b01010,"srshl", int_aarch64_neon_srshl>;
5569defm SSHL     : SIMDThreeSameVector<0,0b01000,"sshl", int_aarch64_neon_sshl>;
5570defm SUB      : SIMDThreeSameVector<1,0b10000,"sub", sub>;
5571defm UABA     : SIMDThreeSameVectorBHSTied<1, 0b01111, "uaba",
5572      TriOpFrag<(add node:$LHS, (AArch64uabd node:$MHS, node:$RHS))> >;
5573defm UABD     : SIMDThreeSameVectorBHS<1,0b01110,"uabd", AArch64uabd>;
5574defm UHADD    : SIMDThreeSameVectorBHS<1,0b00000,"uhadd", avgflooru>;
5575defm UHSUB    : SIMDThreeSameVectorBHS<1,0b00100,"uhsub", int_aarch64_neon_uhsub>;
5576defm UMAXP    : SIMDThreeSameVectorBHS<1,0b10100,"umaxp", int_aarch64_neon_umaxp>;
5577defm UMAX     : SIMDThreeSameVectorBHS<1,0b01100,"umax", umax>;
5578defm UMINP    : SIMDThreeSameVectorBHS<1,0b10101,"uminp", int_aarch64_neon_uminp>;
5579defm UMIN     : SIMDThreeSameVectorBHS<1,0b01101,"umin", umin>;
5580defm UQADD    : SIMDThreeSameVector<1,0b00001,"uqadd", int_aarch64_neon_uqadd>;
5581defm UQRSHL   : SIMDThreeSameVector<1,0b01011,"uqrshl", int_aarch64_neon_uqrshl>;
5582defm UQSHL    : SIMDThreeSameVector<1,0b01001,"uqshl", int_aarch64_neon_uqshl>;
5583defm UQSUB    : SIMDThreeSameVector<1,0b00101,"uqsub", int_aarch64_neon_uqsub>;
5584defm URHADD   : SIMDThreeSameVectorBHS<1,0b00010,"urhadd", avgceilu>;
5585defm URSHL    : SIMDThreeSameVector<1,0b01010,"urshl", int_aarch64_neon_urshl>;
5586defm USHL     : SIMDThreeSameVector<1,0b01000,"ushl", int_aarch64_neon_ushl>;
5587defm SQRDMLAH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10000,"sqrdmlah",
5588                                                  int_aarch64_neon_sqrdmlah>;
5589defm SQRDMLSH : SIMDThreeSameVectorSQRDMLxHTiedHS<1,0b10001,"sqrdmlsh",
5590                                                    int_aarch64_neon_sqrdmlsh>;
5591
5592// Extra saturate patterns, other than the intrinsics matches above
5593defm : SIMDThreeSameVectorExtraPatterns<"SQADD", saddsat>;
5594defm : SIMDThreeSameVectorExtraPatterns<"UQADD", uaddsat>;
5595defm : SIMDThreeSameVectorExtraPatterns<"SQSUB", ssubsat>;
5596defm : SIMDThreeSameVectorExtraPatterns<"UQSUB", usubsat>;
5597
5598defm AND : SIMDLogicalThreeVector<0, 0b00, "and", and>;
5599defm BIC : SIMDLogicalThreeVector<0, 0b01, "bic",
5600                                  BinOpFrag<(and node:$LHS, (vnot node:$RHS))> >;
5601defm EOR : SIMDLogicalThreeVector<1, 0b00, "eor", xor>;
5602defm ORN : SIMDLogicalThreeVector<0, 0b11, "orn",
5603                                  BinOpFrag<(or node:$LHS, (vnot node:$RHS))> >;
5604defm ORR : SIMDLogicalThreeVector<0, 0b10, "orr", or>;
5605
5606// Pseudo bitwise select pattern BSP.
5607// It is expanded into BSL/BIT/BIF after register allocation.
5608defm BSP : SIMDLogicalThreeVectorPseudo<TriOpFrag<(or (and node:$LHS, node:$MHS),
5609                                                      (and (vnot node:$LHS), node:$RHS))>>;
5610defm BSL : SIMDLogicalThreeVectorTied<1, 0b01, "bsl">;
5611defm BIT : SIMDLogicalThreeVectorTied<1, 0b10, "bit">;
5612defm BIF : SIMDLogicalThreeVectorTied<1, 0b11, "bif">;
5613
5614def : Pat<(AArch64bsp (v8i8 V64:$Rd), V64:$Rn, V64:$Rm),
5615          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5616def : Pat<(AArch64bsp (v4i16 V64:$Rd), V64:$Rn, V64:$Rm),
5617          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5618def : Pat<(AArch64bsp (v2i32 V64:$Rd), V64:$Rn, V64:$Rm),
5619          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5620def : Pat<(AArch64bsp (v1i64 V64:$Rd), V64:$Rn, V64:$Rm),
5621          (BSPv8i8 V64:$Rd, V64:$Rn, V64:$Rm)>;
5622
5623def : Pat<(AArch64bsp (v16i8 V128:$Rd), V128:$Rn, V128:$Rm),
5624          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5625def : Pat<(AArch64bsp (v8i16 V128:$Rd), V128:$Rn, V128:$Rm),
5626          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5627def : Pat<(AArch64bsp (v4i32 V128:$Rd), V128:$Rn, V128:$Rm),
5628          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5629def : Pat<(AArch64bsp (v2i64 V128:$Rd), V128:$Rn, V128:$Rm),
5630          (BSPv16i8 V128:$Rd, V128:$Rn, V128:$Rm)>;
5631
5632// The following SetCC patterns are used for GlobalISel only
5633multiclass SelectSetCC<PatFrags InFrag, string INST> {
5634  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5635            (v8i8 (!cast<Instruction>(INST # v8i8) (v8i8 V64:$Rn), (v8i8 V64:$Rm)))>;
5636  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
5637            (v16i8 (!cast<Instruction>(INST # v16i8) (v16i8 V128:$Rn), (v16i8 V128:$Rm)))>;
5638  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5639            (v4i16 (!cast<Instruction>(INST # v4i16) (v4i16 V64:$Rn), (v4i16 V64:$Rm)))>;
5640  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
5641            (v8i16 (!cast<Instruction>(INST # v8i16) (v8i16 V128:$Rn), (v8i16 V128:$Rm)))>;
5642  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5643            (v2i32 (!cast<Instruction>(INST # v2i32) (v2i32 V64:$Rn), (v2i32 V64:$Rm)))>;
5644  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
5645            (v4i32 (!cast<Instruction>(INST # v4i32) (v4i32 V128:$Rn), (v4i32 V128:$Rm)))>;
5646  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))),
5647            (v2i64 (!cast<Instruction>(INST # v2i64) (v2i64 V128:$Rn), (v2i64 V128:$Rm)))>;
5648}
5649
5650defm : SelectSetCC<seteq, "CMEQ">;
5651defm : SelectSetCC<setgt, "CMGT">;
5652defm : SelectSetCC<setge, "CMGE">;
5653defm : SelectSetCC<setugt, "CMHI">;
5654defm : SelectSetCC<setuge, "CMHS">;
5655
5656multiclass SelectSetCCSwapOperands<PatFrags InFrag, string INST> {
5657  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), (v8i8 V64:$Rm))),
5658            (v8i8 (!cast<Instruction>(INST # v8i8) (v8i8 V64:$Rm), (v8i8 V64:$Rn)))>;
5659  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), (v16i8 V128:$Rm))),
5660            (v16i8 (!cast<Instruction>(INST # v16i8) (v16i8 V128:$Rm), (v16i8 V128:$Rn)))>;
5661  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), (v4i16 V64:$Rm))),
5662            (v4i16 (!cast<Instruction>(INST # v4i16) (v4i16 V64:$Rm), (v4i16 V64:$Rn)))>;
5663  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), (v8i16 V128:$Rm))),
5664            (v8i16 (!cast<Instruction>(INST # v8i16) (v8i16 V128:$Rm), (v8i16 V128:$Rn)))>;
5665  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), (v2i32 V64:$Rm))),
5666            (v2i32 (!cast<Instruction>(INST # v2i32) (v2i32 V64:$Rm), (v2i32 V64:$Rn)))>;
5667  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), (v4i32 V128:$Rm))),
5668            (v4i32 (!cast<Instruction>(INST # v4i32) (v4i32 V128:$Rm), (v4i32 V128:$Rn)))>;
5669  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), (v2i64 V128:$Rm))),
5670            (v2i64 (!cast<Instruction>(INST # v2i64) (v2i64 V128:$Rm), (v2i64 V128:$Rn)))>;
5671}
5672
5673defm : SelectSetCCSwapOperands<setlt, "CMGT">;
5674defm : SelectSetCCSwapOperands<setle, "CMGE">;
5675defm : SelectSetCCSwapOperands<setult, "CMHI">;
5676defm : SelectSetCCSwapOperands<setule, "CMHS">;
5677
5678multiclass SelectSetCCZeroRHS<PatFrags InFrag, string INST> {
5679  def : Pat<(v8i8 (InFrag (v8i8 V64:$Rn), immAllZerosV)),
5680            (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
5681  def : Pat<(v16i8 (InFrag (v16i8 V128:$Rn), immAllZerosV)),
5682            (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
5683  def : Pat<(v4i16 (InFrag (v4i16 V64:$Rn), immAllZerosV)),
5684            (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
5685  def : Pat<(v8i16 (InFrag (v8i16 V128:$Rn), immAllZerosV)),
5686            (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
5687  def : Pat<(v2i32 (InFrag (v2i32 V64:$Rn), immAllZerosV)),
5688            (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
5689  def : Pat<(v4i32 (InFrag (v4i32 V128:$Rn), immAllZerosV)),
5690            (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
5691  def : Pat<(v2i64 (InFrag (v2i64 V128:$Rn), immAllZerosV)),
5692            (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
5693}
5694
5695defm : SelectSetCCZeroRHS<seteq, "CMEQ">;
5696defm : SelectSetCCZeroRHS<setgt, "CMGT">;
5697defm : SelectSetCCZeroRHS<setge, "CMGE">;
5698defm : SelectSetCCZeroRHS<setlt, "CMLT">;
5699defm : SelectSetCCZeroRHS<setle, "CMLE">;
5700
5701multiclass SelectSetCCZeroLHS<PatFrags InFrag, string INST> {
5702  def : Pat<(v8i8 (InFrag immAllZerosV, (v8i8 V64:$Rn))),
5703            (v8i8 (!cast<Instruction>(INST # v8i8rz) (v8i8 V64:$Rn)))>;
5704  def : Pat<(v16i8 (InFrag immAllZerosV, (v16i8 V128:$Rn))),
5705            (v16i8 (!cast<Instruction>(INST # v16i8rz) (v16i8 V128:$Rn)))>;
5706  def : Pat<(v4i16 (InFrag immAllZerosV, (v4i16 V64:$Rn))),
5707            (v4i16 (!cast<Instruction>(INST # v4i16rz) (v4i16 V64:$Rn)))>;
5708  def : Pat<(v8i16 (InFrag immAllZerosV, (v8i16 V128:$Rn))),
5709            (v8i16 (!cast<Instruction>(INST # v8i16rz) (v8i16 V128:$Rn)))>;
5710  def : Pat<(v2i32 (InFrag immAllZerosV, (v2i32 V64:$Rn))),
5711            (v2i32 (!cast<Instruction>(INST # v2i32rz) (v2i32 V64:$Rn)))>;
5712  def : Pat<(v4i32 (InFrag immAllZerosV, (v4i32 V128:$Rn))),
5713            (v4i32 (!cast<Instruction>(INST # v4i32rz) (v4i32 V128:$Rn)))>;
5714  def : Pat<(v2i64 (InFrag immAllZerosV, (v2i64 V128:$Rn))),
5715            (v2i64 (!cast<Instruction>(INST # v2i64rz) (v2i64 V128:$Rn)))>;
5716}
5717
5718defm : SelectSetCCZeroLHS<seteq, "CMEQ">;
5719defm : SelectSetCCZeroLHS<setgt, "CMLT">;
5720defm : SelectSetCCZeroLHS<setge, "CMLE">;
5721defm : SelectSetCCZeroLHS<setlt, "CMGT">;
5722defm : SelectSetCCZeroLHS<setle, "CMGE">;
5723
5724let Predicates = [HasNEON] in {
5725def : InstAlias<"mov{\t$dst.16b, $src.16b|.16b\t$dst, $src}",
5726                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 1>;
5727def : InstAlias<"mov{\t$dst.8h, $src.8h|.8h\t$dst, $src}",
5728                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5729def : InstAlias<"mov{\t$dst.4s, $src.4s|.4s\t$dst, $src}",
5730                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5731def : InstAlias<"mov{\t$dst.2d, $src.2d|.2d\t$dst, $src}",
5732                (ORRv16i8 V128:$dst, V128:$src, V128:$src), 0>;
5733
5734def : InstAlias<"mov{\t$dst.8b, $src.8b|.8b\t$dst, $src}",
5735                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 1>;
5736def : InstAlias<"mov{\t$dst.4h, $src.4h|.4h\t$dst, $src}",
5737                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5738def : InstAlias<"mov{\t$dst.2s, $src.2s|.2s\t$dst, $src}",
5739                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5740def : InstAlias<"mov{\t$dst.1d, $src.1d|.1d\t$dst, $src}",
5741                (ORRv8i8 V64:$dst, V64:$src, V64:$src), 0>;
5742
5743def : InstAlias<"{cmls\t$dst.8b, $src1.8b, $src2.8b" #
5744                "|cmls.8b\t$dst, $src1, $src2}",
5745                (CMHSv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5746def : InstAlias<"{cmls\t$dst.16b, $src1.16b, $src2.16b" #
5747                "|cmls.16b\t$dst, $src1, $src2}",
5748                (CMHSv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5749def : InstAlias<"{cmls\t$dst.4h, $src1.4h, $src2.4h" #
5750                "|cmls.4h\t$dst, $src1, $src2}",
5751                (CMHSv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5752def : InstAlias<"{cmls\t$dst.8h, $src1.8h, $src2.8h" #
5753                "|cmls.8h\t$dst, $src1, $src2}",
5754                (CMHSv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5755def : InstAlias<"{cmls\t$dst.2s, $src1.2s, $src2.2s" #
5756                "|cmls.2s\t$dst, $src1, $src2}",
5757                (CMHSv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5758def : InstAlias<"{cmls\t$dst.4s, $src1.4s, $src2.4s" #
5759                "|cmls.4s\t$dst, $src1, $src2}",
5760                (CMHSv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5761def : InstAlias<"{cmls\t$dst.2d, $src1.2d, $src2.2d" #
5762                "|cmls.2d\t$dst, $src1, $src2}",
5763                (CMHSv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5764
5765def : InstAlias<"{cmlo\t$dst.8b, $src1.8b, $src2.8b" #
5766                "|cmlo.8b\t$dst, $src1, $src2}",
5767                (CMHIv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5768def : InstAlias<"{cmlo\t$dst.16b, $src1.16b, $src2.16b" #
5769                "|cmlo.16b\t$dst, $src1, $src2}",
5770                (CMHIv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5771def : InstAlias<"{cmlo\t$dst.4h, $src1.4h, $src2.4h" #
5772                "|cmlo.4h\t$dst, $src1, $src2}",
5773                (CMHIv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5774def : InstAlias<"{cmlo\t$dst.8h, $src1.8h, $src2.8h" #
5775                "|cmlo.8h\t$dst, $src1, $src2}",
5776                (CMHIv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5777def : InstAlias<"{cmlo\t$dst.2s, $src1.2s, $src2.2s" #
5778                "|cmlo.2s\t$dst, $src1, $src2}",
5779                (CMHIv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5780def : InstAlias<"{cmlo\t$dst.4s, $src1.4s, $src2.4s" #
5781                "|cmlo.4s\t$dst, $src1, $src2}",
5782                (CMHIv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5783def : InstAlias<"{cmlo\t$dst.2d, $src1.2d, $src2.2d" #
5784                "|cmlo.2d\t$dst, $src1, $src2}",
5785                (CMHIv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5786
5787def : InstAlias<"{cmle\t$dst.8b, $src1.8b, $src2.8b" #
5788                "|cmle.8b\t$dst, $src1, $src2}",
5789                (CMGEv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5790def : InstAlias<"{cmle\t$dst.16b, $src1.16b, $src2.16b" #
5791                "|cmle.16b\t$dst, $src1, $src2}",
5792                (CMGEv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5793def : InstAlias<"{cmle\t$dst.4h, $src1.4h, $src2.4h" #
5794                "|cmle.4h\t$dst, $src1, $src2}",
5795                (CMGEv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5796def : InstAlias<"{cmle\t$dst.8h, $src1.8h, $src2.8h" #
5797                "|cmle.8h\t$dst, $src1, $src2}",
5798                (CMGEv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5799def : InstAlias<"{cmle\t$dst.2s, $src1.2s, $src2.2s" #
5800                "|cmle.2s\t$dst, $src1, $src2}",
5801                (CMGEv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5802def : InstAlias<"{cmle\t$dst.4s, $src1.4s, $src2.4s" #
5803                "|cmle.4s\t$dst, $src1, $src2}",
5804                (CMGEv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5805def : InstAlias<"{cmle\t$dst.2d, $src1.2d, $src2.2d" #
5806                "|cmle.2d\t$dst, $src1, $src2}",
5807                (CMGEv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5808
5809def : InstAlias<"{cmlt\t$dst.8b, $src1.8b, $src2.8b" #
5810                "|cmlt.8b\t$dst, $src1, $src2}",
5811                (CMGTv8i8 V64:$dst, V64:$src2, V64:$src1), 0>;
5812def : InstAlias<"{cmlt\t$dst.16b, $src1.16b, $src2.16b" #
5813                "|cmlt.16b\t$dst, $src1, $src2}",
5814                (CMGTv16i8 V128:$dst, V128:$src2, V128:$src1), 0>;
5815def : InstAlias<"{cmlt\t$dst.4h, $src1.4h, $src2.4h" #
5816                "|cmlt.4h\t$dst, $src1, $src2}",
5817                (CMGTv4i16 V64:$dst, V64:$src2, V64:$src1), 0>;
5818def : InstAlias<"{cmlt\t$dst.8h, $src1.8h, $src2.8h" #
5819                "|cmlt.8h\t$dst, $src1, $src2}",
5820                (CMGTv8i16 V128:$dst, V128:$src2, V128:$src1), 0>;
5821def : InstAlias<"{cmlt\t$dst.2s, $src1.2s, $src2.2s" #
5822                "|cmlt.2s\t$dst, $src1, $src2}",
5823                (CMGTv2i32 V64:$dst, V64:$src2, V64:$src1), 0>;
5824def : InstAlias<"{cmlt\t$dst.4s, $src1.4s, $src2.4s" #
5825                "|cmlt.4s\t$dst, $src1, $src2}",
5826                (CMGTv4i32 V128:$dst, V128:$src2, V128:$src1), 0>;
5827def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" #
5828                "|cmlt.2d\t$dst, $src1, $src2}",
5829                (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>;
5830
5831let Predicates = [HasNEON, HasFullFP16] in {
5832def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" #
5833                "|fcmle.4h\t$dst, $src1, $src2}",
5834                (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5835def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" #
5836                "|fcmle.8h\t$dst, $src1, $src2}",
5837                (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5838}
5839def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" #
5840                "|fcmle.2s\t$dst, $src1, $src2}",
5841                (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5842def : InstAlias<"{fcmle\t$dst.4s, $src1.4s, $src2.4s" #
5843                "|fcmle.4s\t$dst, $src1, $src2}",
5844                (FCMGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5845def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" #
5846                "|fcmle.2d\t$dst, $src1, $src2}",
5847                (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5848
5849let Predicates = [HasNEON, HasFullFP16] in {
5850def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" #
5851                "|fcmlt.4h\t$dst, $src1, $src2}",
5852                (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5853def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" #
5854                "|fcmlt.8h\t$dst, $src1, $src2}",
5855                (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5856}
5857def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" #
5858                "|fcmlt.2s\t$dst, $src1, $src2}",
5859                (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5860def : InstAlias<"{fcmlt\t$dst.4s, $src1.4s, $src2.4s" #
5861                "|fcmlt.4s\t$dst, $src1, $src2}",
5862                (FCMGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5863def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" #
5864                "|fcmlt.2d\t$dst, $src1, $src2}",
5865                (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5866
5867let Predicates = [HasNEON, HasFullFP16] in {
5868def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" #
5869                "|facle.4h\t$dst, $src1, $src2}",
5870                (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5871def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" #
5872                "|facle.8h\t$dst, $src1, $src2}",
5873                (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5874}
5875def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" #
5876                "|facle.2s\t$dst, $src1, $src2}",
5877                (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5878def : InstAlias<"{facle\t$dst.4s, $src1.4s, $src2.4s" #
5879                "|facle.4s\t$dst, $src1, $src2}",
5880                (FACGEv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5881def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" #
5882                "|facle.2d\t$dst, $src1, $src2}",
5883                (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5884
5885let Predicates = [HasNEON, HasFullFP16] in {
5886def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" #
5887                "|faclt.4h\t$dst, $src1, $src2}",
5888                (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>;
5889def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" #
5890                "|faclt.8h\t$dst, $src1, $src2}",
5891                (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>;
5892}
5893def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" #
5894                "|faclt.2s\t$dst, $src1, $src2}",
5895                (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>;
5896def : InstAlias<"{faclt\t$dst.4s, $src1.4s, $src2.4s" #
5897                "|faclt.4s\t$dst, $src1, $src2}",
5898                (FACGTv4f32 V128:$dst, V128:$src2, V128:$src1), 0>;
5899def : InstAlias<"{faclt\t$dst.2d, $src1.2d, $src2.2d" #
5900                "|faclt.2d\t$dst, $src1, $src2}",
5901                (FACGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>;
5902}
5903
5904//===----------------------------------------------------------------------===//
5905// Advanced SIMD three scalar instructions.
5906//===----------------------------------------------------------------------===//
5907
5908defm ADD      : SIMDThreeScalarD<0, 0b10000, "add", add>;
5909defm CMEQ     : SIMDThreeScalarD<1, 0b10001, "cmeq", AArch64cmeq>;
5910defm CMGE     : SIMDThreeScalarD<0, 0b00111, "cmge", AArch64cmge>;
5911defm CMGT     : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>;
5912defm CMHI     : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>;
5913defm CMHS     : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>;
5914defm CMTST    : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>;
5915defm FABD     : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>;
5916def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))),
5917          (FABD64 FPR64:$Rn, FPR64:$Rm)>;
5918let Predicates = [HasNEON, HasFullFP16] in {
5919def : Pat<(fabs (fsub f16:$Rn, f16:$Rm)), (FABD16 f16:$Rn, f16:$Rm)>;
5920}
5921let Predicates = [HasNEON] in {
5922def : Pat<(fabs (fsub f32:$Rn, f32:$Rm)), (FABD32 f32:$Rn, f32:$Rm)>;
5923def : Pat<(fabs (fsub f64:$Rn, f64:$Rm)), (FABD64 f64:$Rn, f64:$Rm)>;
5924}
5925defm FACGE    : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge",
5926                                     int_aarch64_neon_facge>;
5927defm FACGT    : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt",
5928                                     int_aarch64_neon_facgt>;
5929defm FCMEQ    : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>;
5930defm FCMGE    : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>;
5931defm FCMGT    : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>;
5932defm FMULX    : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx, HasNEONandIsStreamingSafe>;
5933defm FRECPS   : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps, HasNEONandIsStreamingSafe>;
5934defm FRSQRTS  : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts, HasNEONandIsStreamingSafe>;
5935defm SQADD    : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>;
5936defm SQDMULH  : SIMDThreeScalarHS<  0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>;
5937defm SQRDMULH : SIMDThreeScalarHS<  1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
5938defm SQRSHL   : SIMDThreeScalarBHSD<0, 0b01011, "sqrshl",int_aarch64_neon_sqrshl>;
5939defm SQSHL    : SIMDThreeScalarBHSD<0, 0b01001, "sqshl", int_aarch64_neon_sqshl>;
5940defm SQSUB    : SIMDThreeScalarBHSD<0, 0b00101, "sqsub", int_aarch64_neon_sqsub>;
5941defm SRSHL    : SIMDThreeScalarD<   0, 0b01010, "srshl", int_aarch64_neon_srshl>;
5942defm SSHL     : SIMDThreeScalarD<   0, 0b01000, "sshl", int_aarch64_neon_sshl>;
5943defm SUB      : SIMDThreeScalarD<   1, 0b10000, "sub", sub>;
5944defm UQADD    : SIMDThreeScalarBHSD<1, 0b00001, "uqadd", int_aarch64_neon_uqadd>;
5945defm UQRSHL   : SIMDThreeScalarBHSD<1, 0b01011, "uqrshl",int_aarch64_neon_uqrshl>;
5946defm UQSHL    : SIMDThreeScalarBHSD<1, 0b01001, "uqshl", int_aarch64_neon_uqshl>;
5947defm UQSUB    : SIMDThreeScalarBHSD<1, 0b00101, "uqsub", int_aarch64_neon_uqsub>;
5948defm URSHL    : SIMDThreeScalarD<   1, 0b01010, "urshl", int_aarch64_neon_urshl>;
5949defm USHL     : SIMDThreeScalarD<   1, 0b01000, "ushl", int_aarch64_neon_ushl>;
5950let Predicates = [HasRDM] in {
5951  defm SQRDMLAH : SIMDThreeScalarHSTied<1, 0, 0b10000, "sqrdmlah">;
5952  defm SQRDMLSH : SIMDThreeScalarHSTied<1, 0, 0b10001, "sqrdmlsh">;
5953  def : Pat<(i32 (int_aarch64_neon_sqrdmlah (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5954                                            (i32 FPR32:$Rm))),
5955            (SQRDMLAHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5956  def : Pat<(i32 (int_aarch64_neon_sqrdmlsh (i32 FPR32:$Rd), (i32 FPR32:$Rn),
5957                                            (i32 FPR32:$Rm))),
5958            (SQRDMLSHv1i32 FPR32:$Rd, FPR32:$Rn, FPR32:$Rm)>;
5959}
5960
5961defm : FMULScalarFromIndexedLane0Patterns<"FMULX", "16", "32", "64",
5962                                          int_aarch64_neon_fmulx,
5963                                          [HasNEONandIsStreamingSafe]>;
5964
5965let Predicates = [HasNEON] in {
5966def : InstAlias<"cmls $dst, $src1, $src2",
5967                (CMHSv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5968def : InstAlias<"cmle $dst, $src1, $src2",
5969                (CMGEv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5970def : InstAlias<"cmlo $dst, $src1, $src2",
5971                (CMHIv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5972def : InstAlias<"cmlt $dst, $src1, $src2",
5973                (CMGTv1i64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5974}
5975let Predicates = [HasFPARMv8] in {
5976def : InstAlias<"fcmle $dst, $src1, $src2",
5977                (FCMGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5978def : InstAlias<"fcmle $dst, $src1, $src2",
5979                (FCMGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5980def : InstAlias<"fcmlt $dst, $src1, $src2",
5981                (FCMGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5982def : InstAlias<"fcmlt $dst, $src1, $src2",
5983                (FCMGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5984def : InstAlias<"facle $dst, $src1, $src2",
5985                (FACGE32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5986def : InstAlias<"facle $dst, $src1, $src2",
5987                (FACGE64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5988def : InstAlias<"faclt $dst, $src1, $src2",
5989                (FACGT32 FPR32:$dst, FPR32:$src2, FPR32:$src1), 0>;
5990def : InstAlias<"faclt $dst, $src1, $src2",
5991                (FACGT64 FPR64:$dst, FPR64:$src2, FPR64:$src1), 0>;
5992}
5993
5994//===----------------------------------------------------------------------===//
5995// Advanced SIMD three scalar instructions (mixed operands).
5996//===----------------------------------------------------------------------===//
5997defm SQDMULL  : SIMDThreeScalarMixedHS<0, 0b11010, "sqdmull",
5998                                       int_aarch64_neon_sqdmulls_scalar>;
5999defm SQDMLAL  : SIMDThreeScalarMixedTiedHS<0, 0b10010, "sqdmlal">;
6000defm SQDMLSL  : SIMDThreeScalarMixedTiedHS<0, 0b10110, "sqdmlsl">;
6001
6002def : Pat<(i64 (int_aarch64_neon_sqadd (i64 FPR64:$Rd),
6003                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6004                                                        (i32 FPR32:$Rm))))),
6005          (SQDMLALi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
6006def : Pat<(i64 (int_aarch64_neon_sqsub (i64 FPR64:$Rd),
6007                   (i64 (int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
6008                                                        (i32 FPR32:$Rm))))),
6009          (SQDMLSLi32 FPR64:$Rd, FPR32:$Rn, FPR32:$Rm)>;
6010
6011//===----------------------------------------------------------------------===//
6012// Advanced SIMD two scalar instructions.
6013//===----------------------------------------------------------------------===//
6014
6015defm ABS    : SIMDTwoScalarD<    0, 0b01011, "abs", abs, [HasNoCSSC]>;
6016defm CMEQ   : SIMDCmpTwoScalarD< 0, 0b01001, "cmeq", AArch64cmeqz>;
6017defm CMGE   : SIMDCmpTwoScalarD< 1, 0b01000, "cmge", AArch64cmgez>;
6018defm CMGT   : SIMDCmpTwoScalarD< 0, 0b01000, "cmgt", AArch64cmgtz>;
6019defm CMLE   : SIMDCmpTwoScalarD< 1, 0b01001, "cmle", AArch64cmlez>;
6020defm CMLT   : SIMDCmpTwoScalarD< 0, 0b01010, "cmlt", AArch64cmltz>;
6021defm FCMEQ  : SIMDFPCmpTwoScalar<0, 1, 0b01101, "fcmeq", AArch64fcmeqz>;
6022defm FCMGE  : SIMDFPCmpTwoScalar<1, 1, 0b01100, "fcmge", AArch64fcmgez>;
6023defm FCMGT  : SIMDFPCmpTwoScalar<0, 1, 0b01100, "fcmgt", AArch64fcmgtz>;
6024defm FCMLE  : SIMDFPCmpTwoScalar<1, 1, 0b01101, "fcmle", AArch64fcmlez>;
6025defm FCMLT  : SIMDFPCmpTwoScalar<0, 1, 0b01110, "fcmlt", AArch64fcmltz>;
6026defm FCVTAS : SIMDFPTwoScalar<   0, 0, 0b11100, "fcvtas">;
6027defm FCVTAU : SIMDFPTwoScalar<   1, 0, 0b11100, "fcvtau">;
6028defm FCVTMS : SIMDFPTwoScalar<   0, 0, 0b11011, "fcvtms">;
6029defm FCVTMU : SIMDFPTwoScalar<   1, 0, 0b11011, "fcvtmu">;
6030defm FCVTNS : SIMDFPTwoScalar<   0, 0, 0b11010, "fcvtns">;
6031defm FCVTNU : SIMDFPTwoScalar<   1, 0, 0b11010, "fcvtnu">;
6032defm FCVTPS : SIMDFPTwoScalar<   0, 1, 0b11010, "fcvtps">;
6033defm FCVTPU : SIMDFPTwoScalar<   1, 1, 0b11010, "fcvtpu">;
6034def  FCVTXNv1i64 : SIMDInexactCvtTwoScalar<0b10110, "fcvtxn">;
6035defm FCVTZS : SIMDFPTwoScalar<   0, 1, 0b11011, "fcvtzs">;
6036defm FCVTZU : SIMDFPTwoScalar<   1, 1, 0b11011, "fcvtzu">;
6037defm FRECPE : SIMDFPTwoScalar<   0, 1, 0b11101, "frecpe">;
6038defm FRECPX : SIMDFPTwoScalar<   0, 1, 0b11111, "frecpx">;
6039defm FRSQRTE : SIMDFPTwoScalar<  1, 1, 0b11101, "frsqrte">;
6040defm NEG    : SIMDTwoScalarD<    1, 0b01011, "neg",
6041                                 UnOpFrag<(sub immAllZerosV, node:$LHS)> >;
6042defm SCVTF  : SIMDFPTwoScalarCVT<   0, 0, 0b11101, "scvtf", AArch64sitof>;
6043defm SQABS  : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>;
6044defm SQNEG  : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>;
6045defm SQXTN  : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>;
6046defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>;
6047defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd",
6048                                     int_aarch64_neon_suqadd>;
6049defm UCVTF  : SIMDFPTwoScalarCVT<   1, 0, 0b11101, "ucvtf", AArch64uitof>;
6050defm UQXTN  : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>;
6051defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd",
6052                                    int_aarch64_neon_usqadd>;
6053
6054def : Pat<(v1i64 (AArch64vashr (v1i64 V64:$Rn), (i32 63))),
6055          (CMLTv1i64rz V64:$Rn)>;
6056
6057// Round FP64 to BF16.
6058let Predicates = [HasNEONandIsStreamingSafe, HasBF16] in
6059def : Pat<(bf16 (any_fpround (f64 FPR64:$Rn))),
6060          (BFCVT (FCVTXNv1i64 $Rn))>;
6061
6062def : Pat<(v1i64 (int_aarch64_neon_fcvtas (v1f64 FPR64:$Rn))),
6063          (FCVTASv1i64 FPR64:$Rn)>;
6064def : Pat<(v1i64 (int_aarch64_neon_fcvtau (v1f64 FPR64:$Rn))),
6065          (FCVTAUv1i64 FPR64:$Rn)>;
6066def : Pat<(v1i64 (int_aarch64_neon_fcvtms (v1f64 FPR64:$Rn))),
6067          (FCVTMSv1i64 FPR64:$Rn)>;
6068def : Pat<(v1i64 (int_aarch64_neon_fcvtmu (v1f64 FPR64:$Rn))),
6069          (FCVTMUv1i64 FPR64:$Rn)>;
6070def : Pat<(v1i64 (int_aarch64_neon_fcvtns (v1f64 FPR64:$Rn))),
6071          (FCVTNSv1i64 FPR64:$Rn)>;
6072def : Pat<(v1i64 (int_aarch64_neon_fcvtnu (v1f64 FPR64:$Rn))),
6073          (FCVTNUv1i64 FPR64:$Rn)>;
6074def : Pat<(v1i64 (int_aarch64_neon_fcvtps (v1f64 FPR64:$Rn))),
6075          (FCVTPSv1i64 FPR64:$Rn)>;
6076def : Pat<(v1i64 (int_aarch64_neon_fcvtpu (v1f64 FPR64:$Rn))),
6077          (FCVTPUv1i64 FPR64:$Rn)>;
6078def : Pat<(v1i64 (int_aarch64_neon_fcvtzs (v1f64 FPR64:$Rn))),
6079          (FCVTZSv1i64 FPR64:$Rn)>;
6080def : Pat<(v1i64 (int_aarch64_neon_fcvtzu (v1f64 FPR64:$Rn))),
6081          (FCVTZUv1i64 FPR64:$Rn)>;
6082
6083def : Pat<(f16 (int_aarch64_neon_frecpe (f16 FPR16:$Rn))),
6084          (FRECPEv1f16 FPR16:$Rn)>;
6085def : Pat<(f32 (int_aarch64_neon_frecpe (f32 FPR32:$Rn))),
6086          (FRECPEv1i32 FPR32:$Rn)>;
6087def : Pat<(f64 (int_aarch64_neon_frecpe (f64 FPR64:$Rn))),
6088          (FRECPEv1i64 FPR64:$Rn)>;
6089def : Pat<(v1f64 (int_aarch64_neon_frecpe (v1f64 FPR64:$Rn))),
6090          (FRECPEv1i64 FPR64:$Rn)>;
6091
6092def : Pat<(f32 (AArch64frecpe (f32 FPR32:$Rn))),
6093          (FRECPEv1i32 FPR32:$Rn)>;
6094def : Pat<(v2f32 (AArch64frecpe (v2f32 V64:$Rn))),
6095          (FRECPEv2f32 V64:$Rn)>;
6096def : Pat<(v4f32 (AArch64frecpe (v4f32 FPR128:$Rn))),
6097          (FRECPEv4f32 FPR128:$Rn)>;
6098def : Pat<(f64 (AArch64frecpe (f64 FPR64:$Rn))),
6099          (FRECPEv1i64 FPR64:$Rn)>;
6100def : Pat<(v1f64 (AArch64frecpe (v1f64 FPR64:$Rn))),
6101          (FRECPEv1i64 FPR64:$Rn)>;
6102def : Pat<(v2f64 (AArch64frecpe (v2f64 FPR128:$Rn))),
6103          (FRECPEv2f64 FPR128:$Rn)>;
6104
6105def : Pat<(f32 (AArch64frecps (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
6106          (FRECPS32 FPR32:$Rn, FPR32:$Rm)>;
6107def : Pat<(v2f32 (AArch64frecps (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
6108          (FRECPSv2f32 V64:$Rn, V64:$Rm)>;
6109def : Pat<(v4f32 (AArch64frecps (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
6110          (FRECPSv4f32 FPR128:$Rn, FPR128:$Rm)>;
6111def : Pat<(f64 (AArch64frecps (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
6112          (FRECPS64 FPR64:$Rn, FPR64:$Rm)>;
6113def : Pat<(v2f64 (AArch64frecps (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
6114          (FRECPSv2f64 FPR128:$Rn, FPR128:$Rm)>;
6115
6116def : Pat<(f16 (int_aarch64_neon_frecpx (f16 FPR16:$Rn))),
6117          (FRECPXv1f16 FPR16:$Rn)>;
6118def : Pat<(f32 (int_aarch64_neon_frecpx (f32 FPR32:$Rn))),
6119          (FRECPXv1i32 FPR32:$Rn)>;
6120def : Pat<(f64 (int_aarch64_neon_frecpx (f64 FPR64:$Rn))),
6121          (FRECPXv1i64 FPR64:$Rn)>;
6122
6123def : Pat<(f16 (int_aarch64_neon_frsqrte (f16 FPR16:$Rn))),
6124          (FRSQRTEv1f16 FPR16:$Rn)>;
6125def : Pat<(f32 (int_aarch64_neon_frsqrte (f32 FPR32:$Rn))),
6126          (FRSQRTEv1i32 FPR32:$Rn)>;
6127def : Pat<(f64 (int_aarch64_neon_frsqrte (f64 FPR64:$Rn))),
6128          (FRSQRTEv1i64 FPR64:$Rn)>;
6129def : Pat<(v1f64 (int_aarch64_neon_frsqrte (v1f64 FPR64:$Rn))),
6130          (FRSQRTEv1i64 FPR64:$Rn)>;
6131
6132def : Pat<(f32 (AArch64frsqrte (f32 FPR32:$Rn))),
6133          (FRSQRTEv1i32 FPR32:$Rn)>;
6134def : Pat<(v2f32 (AArch64frsqrte (v2f32 V64:$Rn))),
6135          (FRSQRTEv2f32 V64:$Rn)>;
6136def : Pat<(v4f32 (AArch64frsqrte (v4f32 FPR128:$Rn))),
6137          (FRSQRTEv4f32 FPR128:$Rn)>;
6138def : Pat<(f64 (AArch64frsqrte (f64 FPR64:$Rn))),
6139          (FRSQRTEv1i64 FPR64:$Rn)>;
6140def : Pat<(v1f64 (AArch64frsqrte (v1f64 FPR64:$Rn))),
6141          (FRSQRTEv1i64 FPR64:$Rn)>;
6142def : Pat<(v2f64 (AArch64frsqrte (v2f64 FPR128:$Rn))),
6143          (FRSQRTEv2f64 FPR128:$Rn)>;
6144
6145def : Pat<(f32 (AArch64frsqrts (f32 FPR32:$Rn), (f32 FPR32:$Rm))),
6146          (FRSQRTS32 FPR32:$Rn, FPR32:$Rm)>;
6147def : Pat<(v2f32 (AArch64frsqrts (v2f32 V64:$Rn), (v2f32 V64:$Rm))),
6148          (FRSQRTSv2f32 V64:$Rn, V64:$Rm)>;
6149def : Pat<(v4f32 (AArch64frsqrts (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm))),
6150          (FRSQRTSv4f32 FPR128:$Rn, FPR128:$Rm)>;
6151def : Pat<(f64 (AArch64frsqrts (f64 FPR64:$Rn), (f64 FPR64:$Rm))),
6152          (FRSQRTS64 FPR64:$Rn, FPR64:$Rm)>;
6153def : Pat<(v2f64 (AArch64frsqrts (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm))),
6154          (FRSQRTSv2f64 FPR128:$Rn, FPR128:$Rm)>;
6155
6156// Some float -> int -> float conversion patterns for which we want to keep the
6157// int values in FP registers using the corresponding NEON instructions to
6158// avoid more costly int <-> fp register transfers.
6159let Predicates = [HasNEONandIsStreamingSafe] in {
6160def : Pat<(f64 (any_sint_to_fp (i64 (any_fp_to_sint f64:$Rn)))),
6161          (SCVTFv1i64 (i64 (FCVTZSv1i64 f64:$Rn)))>;
6162def : Pat<(f32 (any_sint_to_fp (i32 (any_fp_to_sint f32:$Rn)))),
6163          (SCVTFv1i32 (i32 (FCVTZSv1i32 f32:$Rn)))>;
6164def : Pat<(f64 (any_uint_to_fp (i64 (any_fp_to_uint f64:$Rn)))),
6165          (UCVTFv1i64 (i64 (FCVTZUv1i64 f64:$Rn)))>;
6166def : Pat<(f32 (any_uint_to_fp (i32 (any_fp_to_uint f32:$Rn)))),
6167          (UCVTFv1i32 (i32 (FCVTZUv1i32 f32:$Rn)))>;
6168
6169let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
6170def : Pat<(f16 (any_sint_to_fp (i32 (any_fp_to_sint f16:$Rn)))),
6171          (SCVTFv1i16 (f16 (FCVTZSv1f16 f16:$Rn)))>;
6172def : Pat<(f16 (any_uint_to_fp (i32 (any_fp_to_uint f16:$Rn)))),
6173          (UCVTFv1i16 (f16 (FCVTZUv1f16 f16:$Rn)))>;
6174}
6175
6176// int -> float conversion of value in lane 0 of simd vector should use
6177// correct cvtf variant to avoid costly fpr <-> gpr register transfers.
6178def : Pat<(f32 (sint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
6179          (SCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
6180
6181def : Pat<(f32 (uint_to_fp (i32 (vector_extract (v4i32 FPR128:$Rn), (i64 0))))),
6182          (UCVTFv1i32 (i32 (EXTRACT_SUBREG (v4i32 FPR128:$Rn), ssub)))>;
6183
6184def : Pat<(f64 (sint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
6185          (SCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
6186
6187def : Pat<(f64 (uint_to_fp (i64 (vector_extract (v2i64 FPR128:$Rn), (i64 0))))),
6188          (UCVTFv1i64 (i64 (EXTRACT_SUBREG (v2i64 FPR128:$Rn), dsub)))>;
6189
6190// fp16: integer extraction from vector must be at least 32-bits to be legal.
6191// Actual extraction result is then an in-reg sign-extension of lower 16-bits.
6192let Predicates = [HasNEONandIsStreamingSafe, HasFullFP16] in {
6193def : Pat<(f16 (sint_to_fp (i32 (sext_inreg (i32 (vector_extract
6194                (v8i16 FPR128:$Rn), (i64 0))), i16)))),
6195          (SCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
6196
6197// unsigned 32-bit extracted element is truncated to 16-bits using AND
6198def : Pat<(f16 (uint_to_fp (i32 (and (i32 (vector_extract
6199                (v8i16 FPR128:$Rn), (i64 0))), (i32 65535))))),
6200          (UCVTFv1i16 (f16 (EXTRACT_SUBREG (v8i16 FPR128:$Rn), hsub)))>;
6201}
6202
6203// If an integer is about to be converted to a floating point value,
6204// just load it on the floating point unit.
6205// Here are the patterns for 8 and 16-bits to float.
6206// 8-bits -> float.
6207multiclass UIntToFPROLoadPat<ValueType DstTy, ValueType SrcTy,
6208                             SDPatternOperator loadop, Instruction UCVTF,
6209                             ROAddrMode ro, Instruction LDRW, Instruction LDRX,
6210                             SubRegIndex sub> {
6211  def : Pat<(DstTy (uint_to_fp (SrcTy
6212                     (loadop (ro.Wpat GPR64sp:$Rn, GPR32:$Rm,
6213                                      ro.Wext:$extend))))),
6214           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
6215                                 (LDRW GPR64sp:$Rn, GPR32:$Rm, ro.Wext:$extend),
6216                                 sub))>;
6217
6218  def : Pat<(DstTy (uint_to_fp (SrcTy
6219                     (loadop (ro.Xpat GPR64sp:$Rn, GPR64:$Rm,
6220                                      ro.Wext:$extend))))),
6221           (UCVTF (INSERT_SUBREG (DstTy (IMPLICIT_DEF)),
6222                                 (LDRX GPR64sp:$Rn, GPR64:$Rm, ro.Xext:$extend),
6223                                 sub))>;
6224}
6225
6226defm : UIntToFPROLoadPat<f32, i32, zextloadi8,
6227                         UCVTFv1i32, ro8, LDRBroW, LDRBroX, bsub>;
6228def : Pat <(f32 (uint_to_fp (i32
6229               (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
6230           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6231                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
6232def : Pat <(f32 (uint_to_fp (i32
6233                     (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
6234           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6235                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
6236// 16-bits -> float.
6237defm : UIntToFPROLoadPat<f32, i32, zextloadi16,
6238                         UCVTFv1i32, ro16, LDRHroW, LDRHroX, hsub>;
6239def : Pat <(f32 (uint_to_fp (i32
6240                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
6241           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6242                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
6243def : Pat <(f32 (uint_to_fp (i32
6244                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
6245           (UCVTFv1i32 (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
6246                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
6247// 32-bits are handled in target specific dag combine:
6248// performIntToFpCombine.
6249// 64-bits integer to 32-bits floating point, not possible with
6250// UCVTF on floating point registers (both source and destination
6251// must have the same size).
6252
6253// Here are the patterns for 8, 16, 32, and 64-bits to double.
6254// 8-bits -> double.
6255defm : UIntToFPROLoadPat<f64, i32, zextloadi8,
6256                         UCVTFv1i64, ro8, LDRBroW, LDRBroX, bsub>;
6257def : Pat <(f64 (uint_to_fp (i32
6258                    (zextloadi8 (am_indexed8 GPR64sp:$Rn, uimm12s1:$offset))))),
6259           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6260                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset), bsub))>;
6261def : Pat <(f64 (uint_to_fp (i32
6262                  (zextloadi8 (am_unscaled8 GPR64sp:$Rn, simm9:$offset))))),
6263           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6264                          (LDURBi GPR64sp:$Rn, simm9:$offset), bsub))>;
6265// 16-bits -> double.
6266defm : UIntToFPROLoadPat<f64, i32, zextloadi16,
6267                         UCVTFv1i64, ro16, LDRHroW, LDRHroX, hsub>;
6268def : Pat <(f64 (uint_to_fp (i32
6269                  (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))),
6270           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6271                          (LDRHui GPR64sp:$Rn, uimm12s2:$offset), hsub))>;
6272def : Pat <(f64 (uint_to_fp (i32
6273                  (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))),
6274           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6275                          (LDURHi GPR64sp:$Rn, simm9:$offset), hsub))>;
6276// 32-bits -> double.
6277defm : UIntToFPROLoadPat<f64, i32, load,
6278                         UCVTFv1i64, ro32, LDRSroW, LDRSroX, ssub>;
6279def : Pat <(f64 (uint_to_fp (i32
6280                  (load (am_indexed32 GPR64sp:$Rn, uimm12s4:$offset))))),
6281           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6282                          (LDRSui GPR64sp:$Rn, uimm12s4:$offset), ssub))>;
6283def : Pat <(f64 (uint_to_fp (i32
6284                  (load (am_unscaled32 GPR64sp:$Rn, simm9:$offset))))),
6285           (UCVTFv1i64 (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
6286                          (LDURSi GPR64sp:$Rn, simm9:$offset), ssub))>;
6287// 64-bits -> double are handled in target specific dag combine:
6288// performIntToFpCombine.
6289} // let Predicates = [HasNEONandIsStreamingSafe]
6290
6291//===----------------------------------------------------------------------===//
6292// Advanced SIMD three different-sized vector instructions.
6293//===----------------------------------------------------------------------===//
6294
6295defm ADDHN  : SIMDNarrowThreeVectorBHS<0,0b0100,"addhn", int_aarch64_neon_addhn>;
6296defm SUBHN  : SIMDNarrowThreeVectorBHS<0,0b0110,"subhn", int_aarch64_neon_subhn>;
6297defm RADDHN : SIMDNarrowThreeVectorBHS<1,0b0100,"raddhn",int_aarch64_neon_raddhn>;
6298defm RSUBHN : SIMDNarrowThreeVectorBHS<1,0b0110,"rsubhn",int_aarch64_neon_rsubhn>;
6299defm PMULL  : SIMDDifferentThreeVectorBD<0,0b1110,"pmull", AArch64pmull>;
6300defm SABAL  : SIMDLongThreeVectorTiedBHSabal<0,0b0101,"sabal",
6301                                             AArch64sabd>;
6302defm SABDL   : SIMDLongThreeVectorBHSabdl<0, 0b0111, "sabdl",
6303                                          AArch64sabd>;
6304defm SADDL   : SIMDLongThreeVectorBHS<   0, 0b0000, "saddl",
6305            BinOpFrag<(add (sext node:$LHS), (sext node:$RHS))>>;
6306defm SADDW   : SIMDWideThreeVectorBHS<   0, 0b0001, "saddw",
6307                 BinOpFrag<(add node:$LHS, (sext node:$RHS))>>;
6308defm SMLAL   : SIMDLongThreeVectorTiedBHS<0, 0b1000, "smlal",
6309    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6310defm SMLSL   : SIMDLongThreeVectorTiedBHS<0, 0b1010, "smlsl",
6311    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
6312defm SMULL   : SIMDLongThreeVectorBHS<0, 0b1100, "smull", AArch64smull>;
6313defm SQDMLAL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1001, "sqdmlal",
6314                                               int_aarch64_neon_sqadd>;
6315defm SQDMLSL : SIMDLongThreeVectorSQDMLXTiedHS<0, 0b1011, "sqdmlsl",
6316                                               int_aarch64_neon_sqsub>;
6317defm SQDMULL : SIMDLongThreeVectorHS<0, 0b1101, "sqdmull",
6318                                     int_aarch64_neon_sqdmull>;
6319defm SSUBL   : SIMDLongThreeVectorBHS<0, 0b0010, "ssubl",
6320                 BinOpFrag<(sub (sext node:$LHS), (sext node:$RHS))>>;
6321defm SSUBW   : SIMDWideThreeVectorBHS<0, 0b0011, "ssubw",
6322                 BinOpFrag<(sub node:$LHS, (sext node:$RHS))>>;
6323defm UABAL   : SIMDLongThreeVectorTiedBHSabal<1, 0b0101, "uabal",
6324                                              AArch64uabd>;
6325defm UADDL   : SIMDLongThreeVectorBHS<1, 0b0000, "uaddl",
6326                 BinOpFrag<(add (zanyext node:$LHS), (zanyext node:$RHS))>>;
6327defm UADDW   : SIMDWideThreeVectorBHS<1, 0b0001, "uaddw",
6328                 BinOpFrag<(add node:$LHS, (zanyext node:$RHS))>>;
6329defm UMLAL   : SIMDLongThreeVectorTiedBHS<1, 0b1000, "umlal",
6330    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6331defm UMLSL   : SIMDLongThreeVectorTiedBHS<1, 0b1010, "umlsl",
6332    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
6333defm UMULL   : SIMDLongThreeVectorBHS<1, 0b1100, "umull", AArch64umull>;
6334defm USUBL   : SIMDLongThreeVectorBHS<1, 0b0010, "usubl",
6335                 BinOpFrag<(sub (zanyext node:$LHS), (zanyext node:$RHS))>>;
6336defm USUBW   : SIMDWideThreeVectorBHS<   1, 0b0011, "usubw",
6337                 BinOpFrag<(sub node:$LHS, (zanyext node:$RHS))>>;
6338
6339// Additional patterns for [SU]ML[AS]L
6340multiclass Neon_mul_acc_widen_patterns<SDPatternOperator opnode, SDPatternOperator vecopnode,
6341  Instruction INST8B, Instruction INST4H, Instruction INST2S> {
6342  def : Pat<(v4i16 (opnode
6343                    V64:$Ra,
6344                    (v4i16 (extract_subvector
6345                            (vecopnode (v8i8 V64:$Rn),(v8i8 V64:$Rm)),
6346                            (i64 0))))),
6347             (EXTRACT_SUBREG (v8i16 (INST8B
6348                                     (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Ra, dsub),
6349                                     V64:$Rn, V64:$Rm)), dsub)>;
6350  def : Pat<(v2i32 (opnode
6351                    V64:$Ra,
6352                    (v2i32 (extract_subvector
6353                            (vecopnode (v4i16 V64:$Rn),(v4i16 V64:$Rm)),
6354                            (i64 0))))),
6355             (EXTRACT_SUBREG (v4i32 (INST4H
6356                                     (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Ra, dsub),
6357                                     V64:$Rn, V64:$Rm)), dsub)>;
6358  def : Pat<(v1i64 (opnode
6359                    V64:$Ra,
6360                    (v1i64 (extract_subvector
6361                            (vecopnode (v2i32 V64:$Rn),(v2i32 V64:$Rm)),
6362                            (i64 0))))),
6363             (EXTRACT_SUBREG (v2i64 (INST2S
6364                                     (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Ra, dsub),
6365                                     V64:$Rn, V64:$Rm)), dsub)>;
6366}
6367
6368defm : Neon_mul_acc_widen_patterns<add, AArch64umull,
6369     UMLALv8i8_v8i16, UMLALv4i16_v4i32, UMLALv2i32_v2i64>;
6370defm : Neon_mul_acc_widen_patterns<add, AArch64smull,
6371     SMLALv8i8_v8i16, SMLALv4i16_v4i32, SMLALv2i32_v2i64>;
6372defm : Neon_mul_acc_widen_patterns<sub, AArch64umull,
6373     UMLSLv8i8_v8i16, UMLSLv4i16_v4i32, UMLSLv2i32_v2i64>;
6374defm : Neon_mul_acc_widen_patterns<sub, AArch64smull,
6375     SMLSLv8i8_v8i16, SMLSLv4i16_v4i32, SMLSLv2i32_v2i64>;
6376
6377
6378multiclass Neon_addl_extract_patterns<SDPatternOperator opnode, SDPatternOperator ext, string Inst> {
6379  def : Pat<(v4i16 (opnode (extract_subvector (ext (v8i8 V64:$Rn)), (i64 0)),
6380                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
6381            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Lv8i8_v8i16") V64:$Rn, V64:$Rm)), dsub)>;
6382  def : Pat<(v2i32 (opnode (extract_subvector (ext (v4i16 V64:$Rn)), (i64 0)),
6383                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
6384            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Lv4i16_v4i32") V64:$Rn, V64:$Rm)), dsub)>;
6385  def : Pat<(v1i64 (opnode (extract_subvector (ext (v2i32 V64:$Rn)), (i64 0)),
6386                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
6387            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Lv2i32_v2i64") V64:$Rn, V64:$Rm)), dsub)>;
6388
6389  def : Pat<(v4i16 (opnode (v4i16 V64:$Rn),
6390                           (extract_subvector (ext (v8i8 V64:$Rm)), (i64 0)))),
6391            (EXTRACT_SUBREG (v8i16 (!cast<Instruction>(Inst#"Wv8i8_v8i16") (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6392  def : Pat<(v2i32 (opnode (v2i32 V64:$Rn),
6393                           (extract_subvector (ext (v4i16 V64:$Rm)), (i64 0)))),
6394            (EXTRACT_SUBREG (v4i32 (!cast<Instruction>(Inst#"Wv4i16_v4i32") (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6395  def : Pat<(v1i64 (opnode (v1i64 V64:$Rn),
6396                           (extract_subvector (ext (v2i32 V64:$Rm)), (i64 0)))),
6397            (EXTRACT_SUBREG (v2i64 (!cast<Instruction>(Inst#"Wv2i32_v2i64") (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), V64:$Rn, dsub), V64:$Rm)), dsub)>;
6398}
6399
6400defm : Neon_addl_extract_patterns<add, zanyext, "UADD">;
6401defm : Neon_addl_extract_patterns<add, sext, "SADD">;
6402defm : Neon_addl_extract_patterns<sub, zanyext, "USUB">;
6403defm : Neon_addl_extract_patterns<sub, sext, "SSUB">;
6404
6405// CodeGen patterns for addhn and subhn instructions, which can actually be
6406// written in LLVM IR without too much difficulty.
6407
6408// Prioritize ADDHN and SUBHN over UZP2.
6409let AddedComplexity = 10 in {
6410
6411// ADDHN
6412def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm), (i32 8))))),
6413          (ADDHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6414def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6415                                           (i32 16))))),
6416          (ADDHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6417def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6418                                           (i32 32))))),
6419          (ADDHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6420def : Pat<(concat_vectors (v8i8 V64:$Rd),
6421                          (trunc (v8i16 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6422                                                    (i32 8))))),
6423          (ADDHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6424                            V128:$Rn, V128:$Rm)>;
6425def : Pat<(concat_vectors (v4i16 V64:$Rd),
6426                          (trunc (v4i32 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6427                                                    (i32 16))))),
6428          (ADDHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6429                            V128:$Rn, V128:$Rm)>;
6430def : Pat<(concat_vectors (v2i32 V64:$Rd),
6431                          (trunc (v2i64 (AArch64vlshr (add V128:$Rn, V128:$Rm),
6432                                                    (i32 32))))),
6433          (ADDHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6434                            V128:$Rn, V128:$Rm)>;
6435
6436// SUBHN
6437def : Pat<(v8i8 (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm), (i32 8))))),
6438          (SUBHNv8i16_v8i8 V128:$Rn, V128:$Rm)>;
6439def : Pat<(v4i16 (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6440                                           (i32 16))))),
6441          (SUBHNv4i32_v4i16 V128:$Rn, V128:$Rm)>;
6442def : Pat<(v2i32 (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6443                                           (i32 32))))),
6444          (SUBHNv2i64_v2i32 V128:$Rn, V128:$Rm)>;
6445def : Pat<(concat_vectors (v8i8 V64:$Rd),
6446                          (trunc (v8i16 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6447                                                    (i32 8))))),
6448          (SUBHNv8i16_v16i8 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6449                            V128:$Rn, V128:$Rm)>;
6450def : Pat<(concat_vectors (v4i16 V64:$Rd),
6451                          (trunc (v4i32 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6452                                                    (i32 16))))),
6453          (SUBHNv4i32_v8i16 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6454                            V128:$Rn, V128:$Rm)>;
6455def : Pat<(concat_vectors (v2i32 V64:$Rd),
6456                          (trunc (v2i64 (AArch64vlshr (sub V128:$Rn, V128:$Rm),
6457                                                    (i32 32))))),
6458          (SUBHNv2i64_v4i32 (SUBREG_TO_REG (i32 0), V64:$Rd, dsub),
6459                            V128:$Rn, V128:$Rm)>;
6460
6461} // AddedComplexity = 10
6462
6463//----------------------------------------------------------------------------
6464// AdvSIMD bitwise extract from vector instruction.
6465//----------------------------------------------------------------------------
6466
6467defm EXT : SIMDBitwiseExtract<"ext">;
6468
6469def AdjustExtImm : SDNodeXForm<imm, [{
6470  return CurDAG->getTargetConstant(8 + N->getZExtValue(), SDLoc(N), MVT::i32);
6471}]>;
6472multiclass ExtPat<ValueType VT64, ValueType VT128, int N> {
6473  def : Pat<(VT64 (AArch64ext V64:$Rn, V64:$Rm, (i32 imm:$imm))),
6474            (EXTv8i8 V64:$Rn, V64:$Rm, imm:$imm)>;
6475  def : Pat<(VT128 (AArch64ext V128:$Rn, V128:$Rm, (i32 imm:$imm))),
6476            (EXTv16i8 V128:$Rn, V128:$Rm, imm:$imm)>;
6477  // We use EXT to handle extract_subvector to copy the upper 64-bits of a
6478  // 128-bit vector.
6479  def : Pat<(VT64 (extract_subvector V128:$Rn, (i64 N))),
6480            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, 8), dsub)>;
6481  // A 64-bit EXT of two halves of the same 128-bit register can be done as a
6482  // single 128-bit EXT.
6483  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 0)),
6484                              (extract_subvector V128:$Rn, (i64 N)),
6485                              (i32 imm:$imm))),
6486            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn, V128:$Rn, imm:$imm), dsub)>;
6487  // A 64-bit EXT of the high half of a 128-bit register can be done using a
6488  // 128-bit EXT of the whole register with an adjustment to the immediate. The
6489  // top half of the other operand will be unset, but that doesn't matter as it
6490  // will not be used.
6491  def : Pat<(VT64 (AArch64ext (extract_subvector V128:$Rn, (i64 N)),
6492                              V64:$Rm,
6493                              (i32 imm:$imm))),
6494            (EXTRACT_SUBREG (EXTv16i8 V128:$Rn,
6495                                      (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
6496                                      (AdjustExtImm imm:$imm)), dsub)>;
6497}
6498
6499defm : ExtPat<v8i8, v16i8, 8>;
6500defm : ExtPat<v4i16, v8i16, 4>;
6501defm : ExtPat<v4f16, v8f16, 4>;
6502defm : ExtPat<v4bf16, v8bf16, 4>;
6503defm : ExtPat<v2i32, v4i32, 2>;
6504defm : ExtPat<v2f32, v4f32, 2>;
6505defm : ExtPat<v1i64, v2i64, 1>;
6506defm : ExtPat<v1f64, v2f64, 1>;
6507
6508//----------------------------------------------------------------------------
6509// AdvSIMD zip vector
6510//----------------------------------------------------------------------------
6511
6512defm TRN1 : SIMDZipVector<0b010, "trn1", AArch64trn1>;
6513defm TRN2 : SIMDZipVector<0b110, "trn2", AArch64trn2>;
6514defm UZP1 : SIMDZipVector<0b001, "uzp1", AArch64uzp1>;
6515defm UZP2 : SIMDZipVector<0b101, "uzp2", AArch64uzp2>;
6516defm ZIP1 : SIMDZipVector<0b011, "zip1", AArch64zip1>;
6517defm ZIP2 : SIMDZipVector<0b111, "zip2", AArch64zip2>;
6518
6519def trunc_optional_assert_ext : PatFrags<(ops node:$op0),
6520                                         [(trunc node:$op0),
6521                                          (assertzext (trunc node:$op0)),
6522                                          (assertsext (trunc node:$op0))]>;
6523
6524// concat_vectors(trunc(x), trunc(y)) -> uzp1(x, y)
6525// concat_vectors(assertzext(trunc(x)), assertzext(trunc(y))) -> uzp1(x, y)
6526// concat_vectors(assertsext(trunc(x)), assertsext(trunc(y))) -> uzp1(x, y)
6527class concat_trunc_to_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy>
6528  : Pat<(ConcatTy (concat_vectors (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6529                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))),
6530        (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm)>;
6531def : concat_trunc_to_uzp1_pat<v8i16, v8i8, v16i8>;
6532def : concat_trunc_to_uzp1_pat<v4i32, v4i16, v8i16>;
6533def : concat_trunc_to_uzp1_pat<v2i64, v2i32, v4i32>;
6534
6535// trunc(concat_vectors(trunc(x), trunc(y))) -> xtn(uzp1(x, y))
6536// trunc(concat_vectors(assertzext(trunc(x)), assertzext(trunc(y)))) -> xtn(uzp1(x, y))
6537// trunc(concat_vectors(assertsext(trunc(x)), assertsext(trunc(y)))) -> xtn(uzp1(x, y))
6538class trunc_concat_trunc_to_xtn_uzp1_pat<ValueType SrcTy, ValueType TruncTy, ValueType ConcatTy,
6539                                         ValueType Ty>
6540  : Pat<(Ty (trunc_optional_assert_ext
6541                    (ConcatTy (concat_vectors
6542                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vn))),
6543                                  (TruncTy (trunc_optional_assert_ext (SrcTy V128:$Vm))))))),
6544        (!cast<Instruction>("XTN"#Ty) (!cast<Instruction>("UZP1"#ConcatTy) V128:$Vn, V128:$Vm))>;
6545def : trunc_concat_trunc_to_xtn_uzp1_pat<v4i32, v4i16, v8i16, v8i8>;
6546def : trunc_concat_trunc_to_xtn_uzp1_pat<v2i64, v2i32, v4i32, v4i16>;
6547
6548def : Pat<(v8i8 (trunc (concat_vectors (v4i16 V64:$Vn), (v4i16 V64:$Vm)))),
6549          (UZP1v8i8 V64:$Vn, V64:$Vm)>;
6550def : Pat<(v4i16 (trunc (concat_vectors (v2i32 V64:$Vn), (v2i32 V64:$Vm)))),
6551          (UZP1v4i16 V64:$Vn, V64:$Vm)>;
6552
6553def : Pat<(v16i8 (concat_vectors
6554                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vn), (i32 8)))),
6555                 (v8i8 (trunc (AArch64vlshr (v8i16 V128:$Vm), (i32 8)))))),
6556          (UZP2v16i8 V128:$Vn, V128:$Vm)>;
6557def : Pat<(v8i16 (concat_vectors
6558                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vn), (i32 16)))),
6559                 (v4i16 (trunc (AArch64vlshr (v4i32 V128:$Vm), (i32 16)))))),
6560          (UZP2v8i16 V128:$Vn, V128:$Vm)>;
6561def : Pat<(v4i32 (concat_vectors
6562                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vn), (i32 32)))),
6563                 (v2i32 (trunc (AArch64vlshr (v2i64 V128:$Vm), (i32 32)))))),
6564          (UZP2v4i32 V128:$Vn, V128:$Vm)>;
6565
6566//----------------------------------------------------------------------------
6567// AdvSIMD TBL/TBX instructions
6568//----------------------------------------------------------------------------
6569
6570defm TBL : SIMDTableLookup<    0, "tbl">;
6571defm TBX : SIMDTableLookupTied<1, "tbx">;
6572
6573def : Pat<(v8i8 (int_aarch64_neon_tbl1 (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6574          (TBLv8i8One VecListOne128:$Rn, V64:$Ri)>;
6575def : Pat<(v16i8 (int_aarch64_neon_tbl1 (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6576          (TBLv16i8One V128:$Ri, V128:$Rn)>;
6577
6578def : Pat<(v8i8 (int_aarch64_neon_tbx1 (v8i8 V64:$Rd),
6579                  (v16i8 VecListOne128:$Rn), (v8i8 V64:$Ri))),
6580          (TBXv8i8One V64:$Rd, VecListOne128:$Rn, V64:$Ri)>;
6581def : Pat<(v16i8 (int_aarch64_neon_tbx1 (v16i8 V128:$Rd),
6582                   (v16i8 V128:$Ri), (v16i8 V128:$Rn))),
6583          (TBXv16i8One V128:$Rd, V128:$Ri, V128:$Rn)>;
6584
6585//----------------------------------------------------------------------------
6586// AdvSIMD LUT instructions
6587//----------------------------------------------------------------------------
6588let Predicates = [HasLUT] in {
6589  defm LUT2 : BaseSIMDTableLookupIndexed2<"luti2">;
6590  defm LUT4 : BaseSIMDTableLookupIndexed4<"luti4">;
6591}
6592
6593//----------------------------------------------------------------------------
6594// AdvSIMD scalar DUP instruction
6595//----------------------------------------------------------------------------
6596
6597defm DUP : SIMDScalarDUP<"mov">;
6598
6599//----------------------------------------------------------------------------
6600// AdvSIMD scalar pairwise instructions
6601//----------------------------------------------------------------------------
6602
6603defm ADDP    : SIMDPairwiseScalarD<0, 0b11011, "addp">;
6604defm FADDP   : SIMDFPPairwiseScalar<0, 0b01101, "faddp">;
6605defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">;
6606defm FMAXP   : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">;
6607defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">;
6608defm FMINP   : SIMDFPPairwiseScalar<1, 0b01111, "fminp">;
6609
6610// Only the lower half of the result of the inner FADDP is used in the patterns
6611// below, so the second operand does not matter. Re-use the first input
6612// operand, so no additional dependencies need to be introduced.
6613let Predicates = [HasFullFP16] in {
6614def : Pat<(f16 (vecreduce_fadd (v8f16 V128:$Rn))),
6615            (FADDPv2i16p
6616              (EXTRACT_SUBREG
6617                 (FADDPv8f16 (FADDPv8f16 V128:$Rn, V128:$Rn), V128:$Rn),
6618               dsub))>;
6619def : Pat<(f16 (vecreduce_fadd (v4f16 V64:$Rn))),
6620          (FADDPv2i16p (FADDPv4f16 V64:$Rn, V64:$Rn))>;
6621}
6622def : Pat<(f32 (vecreduce_fadd (v4f32 V128:$Rn))),
6623          (FADDPv2i32p
6624            (EXTRACT_SUBREG
6625              (FADDPv4f32 V128:$Rn, V128:$Rn),
6626             dsub))>;
6627def : Pat<(f32 (vecreduce_fadd (v2f32 V64:$Rn))),
6628          (FADDPv2i32p V64:$Rn)>;
6629def : Pat<(f64 (vecreduce_fadd (v2f64 V128:$Rn))),
6630          (FADDPv2i64p V128:$Rn)>;
6631
6632def : Pat<(v2i64 (AArch64saddv V128:$Rn)),
6633          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6634def : Pat<(v2i64 (AArch64uaddv V128:$Rn)),
6635          (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>;
6636def : Pat<(f32 (int_aarch64_neon_faddv (v2f32 V64:$Rn))),
6637          (FADDPv2i32p V64:$Rn)>;
6638def : Pat<(f32 (int_aarch64_neon_faddv (v4f32 V128:$Rn))),
6639          (FADDPv2i32p (EXTRACT_SUBREG (FADDPv4f32 V128:$Rn, V128:$Rn), dsub))>;
6640def : Pat<(f64 (int_aarch64_neon_faddv (v2f64 V128:$Rn))),
6641          (FADDPv2i64p V128:$Rn)>;
6642def : Pat<(f32 (AArch64fmaxnmv (v2f32 V64:$Rn))),
6643          (FMAXNMPv2i32p V64:$Rn)>;
6644def : Pat<(f64 (AArch64fmaxnmv (v2f64 V128:$Rn))),
6645          (FMAXNMPv2i64p V128:$Rn)>;
6646def : Pat<(f32 (AArch64fmaxv (v2f32 V64:$Rn))),
6647          (FMAXPv2i32p V64:$Rn)>;
6648def : Pat<(f64 (AArch64fmaxv (v2f64 V128:$Rn))),
6649          (FMAXPv2i64p V128:$Rn)>;
6650def : Pat<(f32 (AArch64fminnmv (v2f32 V64:$Rn))),
6651          (FMINNMPv2i32p V64:$Rn)>;
6652def : Pat<(f64 (AArch64fminnmv (v2f64 V128:$Rn))),
6653          (FMINNMPv2i64p V128:$Rn)>;
6654def : Pat<(f32 (AArch64fminv (v2f32 V64:$Rn))),
6655          (FMINPv2i32p V64:$Rn)>;
6656def : Pat<(f64 (AArch64fminv (v2f64 V128:$Rn))),
6657          (FMINPv2i64p V128:$Rn)>;
6658
6659//----------------------------------------------------------------------------
6660// AdvSIMD INS/DUP instructions
6661//----------------------------------------------------------------------------
6662
6663def DUPv8i8gpr  : SIMDDupFromMain<0, {?,?,?,?,1}, ".8b", v8i8, V64, GPR32>;
6664def DUPv16i8gpr : SIMDDupFromMain<1, {?,?,?,?,1}, ".16b", v16i8, V128, GPR32>;
6665def DUPv4i16gpr : SIMDDupFromMain<0, {?,?,?,1,0}, ".4h", v4i16, V64, GPR32>;
6666def DUPv8i16gpr : SIMDDupFromMain<1, {?,?,?,1,0}, ".8h", v8i16, V128, GPR32>;
6667def DUPv2i32gpr : SIMDDupFromMain<0, {?,?,1,0,0}, ".2s", v2i32, V64, GPR32>;
6668def DUPv4i32gpr : SIMDDupFromMain<1, {?,?,1,0,0}, ".4s", v4i32, V128, GPR32>;
6669def DUPv2i64gpr : SIMDDupFromMain<1, {?,1,0,0,0}, ".2d", v2i64, V128, GPR64>;
6670
6671def DUPv2i64lane : SIMDDup64FromElement;
6672def DUPv2i32lane : SIMDDup32FromElement<0, ".2s", v2i32, V64>;
6673def DUPv4i32lane : SIMDDup32FromElement<1, ".4s", v4i32, V128>;
6674def DUPv4i16lane : SIMDDup16FromElement<0, ".4h", v4i16, V64>;
6675def DUPv8i16lane : SIMDDup16FromElement<1, ".8h", v8i16, V128>;
6676def DUPv8i8lane  : SIMDDup8FromElement <0, ".8b", v8i8, V64>;
6677def DUPv16i8lane : SIMDDup8FromElement <1, ".16b", v16i8, V128>;
6678
6679// DUP from a 64-bit register to a 64-bit register is just a copy
6680def : Pat<(v1i64 (AArch64dup (i64 GPR64:$Rn))),
6681          (COPY_TO_REGCLASS GPR64:$Rn, FPR64)>;
6682def : Pat<(v1f64 (AArch64dup (f64 FPR64:$Rn))),
6683          (COPY_TO_REGCLASS FPR64:$Rn, FPR64)>;
6684
6685def : Pat<(v2f32 (AArch64dup (f32 FPR32:$Rn))),
6686          (v2f32 (DUPv2i32lane
6687            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6688            (i64 0)))>;
6689def : Pat<(v4f32 (AArch64dup (f32 FPR32:$Rn))),
6690          (v4f32 (DUPv4i32lane
6691            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rn, ssub),
6692            (i64 0)))>;
6693def : Pat<(v2f64 (AArch64dup (f64 FPR64:$Rn))),
6694          (v2f64 (DUPv2i64lane
6695            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rn, dsub),
6696            (i64 0)))>;
6697def : Pat<(v4f16 (AArch64dup (f16 FPR16:$Rn))),
6698          (v4f16 (DUPv4i16lane
6699            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6700            (i64 0)))>;
6701def : Pat<(v4bf16 (AArch64dup (bf16 FPR16:$Rn))),
6702          (v4bf16 (DUPv4i16lane
6703            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6704            (i64 0)))>;
6705def : Pat<(v8f16 (AArch64dup (f16 FPR16:$Rn))),
6706          (v8f16 (DUPv8i16lane
6707            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6708            (i64 0)))>;
6709def : Pat<(v8bf16 (AArch64dup (bf16 FPR16:$Rn))),
6710          (v8bf16 (DUPv8i16lane
6711            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR16:$Rn, hsub),
6712            (i64 0)))>;
6713
6714def : Pat<(v4f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6715          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6716def : Pat<(v8f16 (AArch64duplane16 (v8f16 V128:$Rn), VectorIndexH:$imm)),
6717          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6718
6719def : Pat<(v4bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6720          (DUPv4i16lane V128:$Rn, VectorIndexH:$imm)>;
6721def : Pat<(v8bf16 (AArch64duplane16 (v8bf16 V128:$Rn), VectorIndexH:$imm)),
6722          (DUPv8i16lane V128:$Rn, VectorIndexH:$imm)>;
6723
6724def : Pat<(v2f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6725          (DUPv2i32lane V128:$Rn, VectorIndexS:$imm)>;
6726def : Pat<(v4f32 (AArch64duplane32 (v4f32 V128:$Rn), VectorIndexS:$imm)),
6727         (DUPv4i32lane V128:$Rn, VectorIndexS:$imm)>;
6728def : Pat<(v2f64 (AArch64duplane64 (v2f64 V128:$Rn), VectorIndexD:$imm)),
6729          (DUPv2i64lane V128:$Rn, VectorIndexD:$imm)>;
6730
6731// If there's an (AArch64dup (vector_extract ...) ...), we can use a duplane
6732// instruction even if the types don't match: we just have to remap the lane
6733// carefully. N.b. this trick only applies to truncations.
6734def VecIndex_x2 : SDNodeXForm<imm, [{
6735  return CurDAG->getTargetConstant(2 * N->getZExtValue(), SDLoc(N), MVT::i64);
6736}]>;
6737def VecIndex_x4 : SDNodeXForm<imm, [{
6738  return CurDAG->getTargetConstant(4 * N->getZExtValue(), SDLoc(N), MVT::i64);
6739}]>;
6740def VecIndex_x8 : SDNodeXForm<imm, [{
6741  return CurDAG->getTargetConstant(8 * N->getZExtValue(), SDLoc(N), MVT::i64);
6742}]>;
6743
6744multiclass DUPWithTruncPats<ValueType ResVT, ValueType Src64VT,
6745                            ValueType Src128VT, ValueType ScalVT,
6746                            Instruction DUP, SDNodeXForm IdxXFORM> {
6747  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src128VT V128:$Rn),
6748                                                     imm:$idx)))),
6749            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6750
6751  def : Pat<(ResVT (AArch64dup (ScalVT (vector_extract (Src64VT V64:$Rn),
6752                                                     imm:$idx)))),
6753            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6754}
6755
6756defm : DUPWithTruncPats<v8i8,   v4i16, v8i16, i32, DUPv8i8lane,  VecIndex_x2>;
6757defm : DUPWithTruncPats<v8i8,   v2i32, v4i32, i32, DUPv8i8lane,  VecIndex_x4>;
6758defm : DUPWithTruncPats<v4i16,  v2i32, v4i32, i32, DUPv4i16lane, VecIndex_x2>;
6759
6760defm : DUPWithTruncPats<v16i8,  v4i16, v8i16, i32, DUPv16i8lane, VecIndex_x2>;
6761defm : DUPWithTruncPats<v16i8,  v2i32, v4i32, i32, DUPv16i8lane, VecIndex_x4>;
6762defm : DUPWithTruncPats<v8i16,  v2i32, v4i32, i32, DUPv8i16lane, VecIndex_x2>;
6763
6764multiclass DUPWithTrunci64Pats<ValueType ResVT, Instruction DUP,
6765                               SDNodeXForm IdxXFORM> {
6766  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v2i64 V128:$Rn),
6767                                                         imm:$idx))))),
6768            (DUP V128:$Rn, (IdxXFORM imm:$idx))>;
6769
6770  def : Pat<(ResVT (AArch64dup (i32 (trunc (extractelt (v1i64 V64:$Rn),
6771                                                       imm:$idx))))),
6772            (DUP (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), (IdxXFORM imm:$idx))>;
6773}
6774
6775defm : DUPWithTrunci64Pats<v8i8,  DUPv8i8lane,   VecIndex_x8>;
6776defm : DUPWithTrunci64Pats<v4i16, DUPv4i16lane,  VecIndex_x4>;
6777defm : DUPWithTrunci64Pats<v2i32, DUPv2i32lane,  VecIndex_x2>;
6778
6779defm : DUPWithTrunci64Pats<v16i8, DUPv16i8lane, VecIndex_x8>;
6780defm : DUPWithTrunci64Pats<v8i16, DUPv8i16lane, VecIndex_x4>;
6781defm : DUPWithTrunci64Pats<v4i32, DUPv4i32lane, VecIndex_x2>;
6782
6783// SMOV and UMOV definitions, with some extra patterns for convenience
6784defm SMOV : SMov;
6785defm UMOV : UMov;
6786
6787def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6788          (i32 (SMOVvi8to32 V128:$Rn, VectorIndexB:$idx))>;
6789def : Pat<(sext_inreg (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx), i8),
6790          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6791def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6792          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6793def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6794          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6795def : Pat<(sext_inreg (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),i16),
6796          (i32 (SMOVvi16to32 V128:$Rn, VectorIndexH:$idx))>;
6797def : Pat<(sext (i32 (vector_extract (v4i32 V128:$Rn), VectorIndexS:$idx))),
6798          (i64 (SMOVvi32to64 V128:$Rn, VectorIndexS:$idx))>;
6799
6800def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6801            VectorIndexB:$idx)))), i8),
6802          (i64 (SMOVvi8to64 V128:$Rn, VectorIndexB:$idx))>;
6803def : Pat<(sext_inreg (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6804            VectorIndexH:$idx)))), i16),
6805          (i64 (SMOVvi16to64 V128:$Rn, VectorIndexH:$idx))>;
6806
6807// Extracting i8 or i16 elements will have the zero-extend transformed to
6808// an 'and' mask by type legalization since neither i8 nor i16 are legal types
6809// for AArch64. Match these patterns here since UMOV already zeroes out the high
6810// bits of the destination register.
6811def : Pat<(and (vector_extract (v16i8 V128:$Rn), VectorIndexB:$idx),
6812               (i32 0xff)),
6813          (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx))>;
6814def : Pat<(and (vector_extract (v8i16 V128:$Rn), VectorIndexH:$idx),
6815               (i32 0xffff)),
6816          (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx))>;
6817
6818def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v16i8 V128:$Rn),
6819            VectorIndexB:$idx)))), (i64 0xff))),
6820          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi8 V128:$Rn, VectorIndexB:$idx)), sub_32)>;
6821def : Pat<(i64 (and (i64 (anyext (i32 (vector_extract (v8i16 V128:$Rn),
6822            VectorIndexH:$idx)))), (i64 0xffff))),
6823          (SUBREG_TO_REG (i64 0), (i32 (UMOVvi16 V128:$Rn, VectorIndexH:$idx)), sub_32)>;
6824
6825defm INS : SIMDIns;
6826
6827def : Pat<(v16i8 (scalar_to_vector GPR32:$Rn)),
6828          (SUBREG_TO_REG (i32 0),
6829                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6830def : Pat<(v8i8 (scalar_to_vector GPR32:$Rn)),
6831          (SUBREG_TO_REG (i32 0),
6832                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6833
6834// The top bits will be zero from the FMOVWSr
6835def : Pat<(v8i8 (bitconvert (i64 (zext GPR32:$Rn)))),
6836          (SUBREG_TO_REG (i32 0), (f32 (FMOVWSr GPR32:$Rn)), ssub)>;
6837
6838def : Pat<(v8i16 (scalar_to_vector GPR32:$Rn)),
6839          (SUBREG_TO_REG (i32 0),
6840                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6841def : Pat<(v4i16 (scalar_to_vector GPR32:$Rn)),
6842          (SUBREG_TO_REG (i32 0),
6843                         (f32 (COPY_TO_REGCLASS GPR32:$Rn, FPR32)), ssub)>;
6844
6845def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6846          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6847def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6848          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6849
6850def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6851          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6852def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6853          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6854
6855def : Pat<(v2i32 (scalar_to_vector (i32 FPR32:$Rn))),
6856            (v2i32 (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)),
6857                                  (i32 FPR32:$Rn), ssub))>;
6858def : Pat<(v4i32 (scalar_to_vector (i32 FPR32:$Rn))),
6859            (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
6860                                  (i32 FPR32:$Rn), ssub))>;
6861
6862def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))),
6863            (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)),
6864                                  (i64 FPR64:$Rn), dsub))>;
6865
6866def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))),
6867          (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6868def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))),
6869          (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6870
6871def : Pat<(v4bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6872          (INSERT_SUBREG (v4bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6873def : Pat<(v8bf16 (scalar_to_vector (bf16 FPR16:$Rn))),
6874          (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>;
6875
6876def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))),
6877          (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6878def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))),
6879          (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>;
6880
6881def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$Rn))),
6882          (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rn, dsub)>;
6883
6884def : Pat<(v4f16 (vector_insert (v4f16 V64:$Rn),
6885            (f16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6886          (EXTRACT_SUBREG
6887            (INSvi16lane
6888              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6889              VectorIndexS:$imm,
6890              (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6891              (i64 0)),
6892            dsub)>;
6893
6894def : Pat<(vector_insert (v8f16 V128:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6895          (INSvi16gpr V128:$Rn, VectorIndexH:$imm, WZR)>;
6896def : Pat<(vector_insert (v4f16 V64:$Rn), (f16 fpimm0), (i64 VectorIndexH:$imm)),
6897          (EXTRACT_SUBREG (INSvi16gpr (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexH:$imm, WZR), dsub)>;
6898def : Pat<(vector_insert (v4f32 V128:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6899          (INSvi32gpr V128:$Rn, VectorIndexS:$imm, WZR)>;
6900def : Pat<(vector_insert (v2f32 V64:$Rn), (f32 fpimm0), (i64 VectorIndexS:$imm)),
6901          (EXTRACT_SUBREG (INSvi32gpr (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)), VectorIndexS:$imm, WZR), dsub)>;
6902def : Pat<(vector_insert v2f64:$Rn, (f64 fpimm0), (i64 VectorIndexD:$imm)),
6903          (INSvi64gpr V128:$Rn, VectorIndexS:$imm, XZR)>;
6904
6905def : Pat<(v8f16 (vector_insert (v8f16 V128:$Rn),
6906            (f16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6907          (INSvi16lane
6908            V128:$Rn, VectorIndexH:$imm,
6909            (v8f16 (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6910            (i64 0))>;
6911
6912def : Pat<(v4bf16 (vector_insert (v4bf16 V64:$Rn),
6913            (bf16 FPR16:$Rm), (i64 VectorIndexS:$imm))),
6914          (EXTRACT_SUBREG
6915            (INSvi16lane
6916              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6917              VectorIndexS:$imm,
6918              (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6919              (i64 0)),
6920            dsub)>;
6921
6922def : Pat<(v8bf16 (vector_insert (v8bf16 V128:$Rn),
6923            (bf16 FPR16:$Rm), (i64 VectorIndexH:$imm))),
6924          (INSvi16lane
6925            V128:$Rn, VectorIndexH:$imm,
6926            (v8bf16 (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR16:$Rm, hsub)),
6927            (i64 0))>;
6928
6929def : Pat<(v2f32 (vector_insert (v2f32 V64:$Rn),
6930            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6931          (EXTRACT_SUBREG
6932            (INSvi32lane
6933              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6934              VectorIndexS:$imm,
6935              (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6936              (i64 0)),
6937            dsub)>;
6938def : Pat<(v4f32 (vector_insert (v4f32 V128:$Rn),
6939            (f32 FPR32:$Rm), (i64 VectorIndexS:$imm))),
6940          (INSvi32lane
6941            V128:$Rn, VectorIndexS:$imm,
6942            (v4f32 (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rm, ssub)),
6943            (i64 0))>;
6944def : Pat<(v2f64 (vector_insert (v2f64 V128:$Rn),
6945            (f64 FPR64:$Rm), (i64 VectorIndexD:$imm))),
6946          (INSvi64lane
6947            V128:$Rn, VectorIndexD:$imm,
6948            (v2f64 (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$Rm, dsub)),
6949            (i64 0))>;
6950
6951def : Pat<(v2i32 (vector_insert (v2i32 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexS:$imm))),
6952          (EXTRACT_SUBREG
6953            (INSvi32gpr (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6954                        VectorIndexS:$imm, GPR32:$Rm),
6955            dsub)>;
6956def : Pat<(v4i16 (vector_insert (v4i16 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexH:$imm))),
6957          (EXTRACT_SUBREG
6958            (INSvi16gpr (v8i16 (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6959                        VectorIndexH:$imm, GPR32:$Rm),
6960            dsub)>;
6961def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i32 GPR32:$Rm), (i64 VectorIndexB:$imm))),
6962          (EXTRACT_SUBREG
6963            (INSvi8gpr (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6964                       VectorIndexB:$imm, GPR32:$Rm),
6965            dsub)>;
6966
6967def : Pat<(v8i8 (vector_insert (v8i8 V64:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
6968          (EXTRACT_SUBREG
6969            (INSvi8lane (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), V64:$Rn, dsub)),
6970                       VectorIndexB:$imm, (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0)),
6971            dsub)>;
6972def : Pat<(v16i8 (vector_insert (v16i8 V128:$Rn), (i8 FPR8:$Rm), (i64 VectorIndexB:$imm))),
6973          (INSvi8lane V128:$Rn, VectorIndexB:$imm,
6974             (v16i8 (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR8:$Rm, bsub)), (i64 0))>;
6975
6976// Copy an element at a constant index in one vector into a constant indexed
6977// element of another.
6978// FIXME refactor to a shared class/dev parameterized on vector type, vector
6979// index type and INS extension
6980def : Pat<(v16i8 (int_aarch64_neon_vcopy_lane
6981                   (v16i8 V128:$Vd), VectorIndexB:$idx, (v16i8 V128:$Vs),
6982                   VectorIndexB:$idx2)),
6983          (v16i8 (INSvi8lane
6984                   V128:$Vd, VectorIndexB:$idx, V128:$Vs, VectorIndexB:$idx2)
6985          )>;
6986def : Pat<(v8i16 (int_aarch64_neon_vcopy_lane
6987                   (v8i16 V128:$Vd), VectorIndexH:$idx, (v8i16 V128:$Vs),
6988                   VectorIndexH:$idx2)),
6989          (v8i16 (INSvi16lane
6990                   V128:$Vd, VectorIndexH:$idx, V128:$Vs, VectorIndexH:$idx2)
6991          )>;
6992def : Pat<(v4i32 (int_aarch64_neon_vcopy_lane
6993                   (v4i32 V128:$Vd), VectorIndexS:$idx, (v4i32 V128:$Vs),
6994                   VectorIndexS:$idx2)),
6995          (v4i32 (INSvi32lane
6996                   V128:$Vd, VectorIndexS:$idx, V128:$Vs, VectorIndexS:$idx2)
6997          )>;
6998def : Pat<(v2i64 (int_aarch64_neon_vcopy_lane
6999                   (v2i64 V128:$Vd), VectorIndexD:$idx, (v2i64 V128:$Vs),
7000                   VectorIndexD:$idx2)),
7001          (v2i64 (INSvi64lane
7002                   V128:$Vd, VectorIndexD:$idx, V128:$Vs, VectorIndexD:$idx2)
7003          )>;
7004
7005multiclass Neon_INS_elt_pattern<ValueType VT128, ValueType VT64,
7006                                ValueType VTScal, Instruction INS> {
7007  def : Pat<(VT128 (vector_insert V128:$src,
7008                        (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7009                        (i64 imm:$Immd))),
7010            (INS V128:$src, imm:$Immd, V128:$Rn, imm:$Immn)>;
7011
7012  def : Pat<(VT128 (vector_insert V128:$src,
7013                        (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
7014                        (i64 imm:$Immd))),
7015            (INS V128:$src, imm:$Immd,
7016                 (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn)>;
7017
7018  def : Pat<(VT64 (vector_insert V64:$src,
7019                        (VTScal (vector_extract (VT128 V128:$Rn), (i64 imm:$Immn))),
7020                        (i64 imm:$Immd))),
7021            (EXTRACT_SUBREG (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub),
7022                                 imm:$Immd, V128:$Rn, imm:$Immn),
7023                            dsub)>;
7024
7025  def : Pat<(VT64 (vector_insert V64:$src,
7026                        (VTScal (vector_extract (VT64 V64:$Rn), (i64 imm:$Immn))),
7027                        (i64 imm:$Immd))),
7028            (EXTRACT_SUBREG
7029                (INS (SUBREG_TO_REG (i64 0), V64:$src, dsub), imm:$Immd,
7030                     (SUBREG_TO_REG (i64 0), V64:$Rn, dsub), imm:$Immn),
7031                dsub)>;
7032}
7033
7034defm : Neon_INS_elt_pattern<v8f16, v4f16, f16, INSvi16lane>;
7035defm : Neon_INS_elt_pattern<v8bf16, v4bf16, bf16, INSvi16lane>;
7036defm : Neon_INS_elt_pattern<v4f32, v2f32, f32, INSvi32lane>;
7037defm : Neon_INS_elt_pattern<v2f64, v1f64, f64, INSvi64lane>;
7038
7039defm : Neon_INS_elt_pattern<v16i8, v8i8, i32, INSvi8lane>;
7040defm : Neon_INS_elt_pattern<v8i16, v4i16, i32, INSvi16lane>;
7041defm : Neon_INS_elt_pattern<v4i32, v2i32, i32, INSvi32lane>;
7042defm : Neon_INS_elt_pattern<v2i64, v1i64, i64, INSvi64lane>;
7043
7044// Insert from bitcast
7045// vector_insert(bitcast(f32 src), n, lane) -> INSvi32lane(src, lane, INSERT_SUBREG(-, n), 0)
7046def : Pat<(v4i32 (vector_insert v4i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
7047          (INSvi32lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0)>;
7048def : Pat<(v2i32 (vector_insert v2i32:$src, (i32 (bitconvert (f32 FPR32:$Sn))), (i64 imm:$Immd))),
7049          (EXTRACT_SUBREG
7050            (INSvi32lane (v4i32 (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), V64:$src, dsub)),
7051                         imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR32:$Sn, ssub), 0),
7052            dsub)>;
7053def : Pat<(v2i64 (vector_insert v2i64:$src, (i64 (bitconvert (f64 FPR64:$Sn))), (i64 imm:$Immd))),
7054          (INSvi64lane V128:$src, imm:$Immd, (INSERT_SUBREG (IMPLICIT_DEF), FPR64:$Sn, dsub), 0)>;
7055
7056// bitcast of an extract
7057// f32 bitcast(vector_extract(v4i32 src, lane)) -> EXTRACT_SUBREG(INSvi32lane(-, 0, src, lane))
7058def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, imm:$Immd)))),
7059          (EXTRACT_SUBREG (INSvi32lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), ssub)>;
7060def : Pat<(f32 (bitconvert (i32 (vector_extract v4i32:$src, (i64 0))))),
7061          (EXTRACT_SUBREG V128:$src, ssub)>;
7062def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, imm:$Immd)))),
7063          (EXTRACT_SUBREG (INSvi64lane (IMPLICIT_DEF), 0, V128:$src, imm:$Immd), dsub)>;
7064def : Pat<(f64 (bitconvert (i64 (vector_extract v2i64:$src, (i64 0))))),
7065          (EXTRACT_SUBREG V128:$src, dsub)>;
7066
7067// Floating point vector extractions are codegen'd as either a sequence of
7068// subregister extractions, or a MOV (aka DUP here) if
7069// the lane number is anything other than zero.
7070def : Pat<(f64 (vector_extract (v2f64 V128:$Rn), (i64 0))),
7071          (f64 (EXTRACT_SUBREG V128:$Rn, dsub))>;
7072def : Pat<(f32 (vector_extract (v4f32 V128:$Rn), (i64 0))),
7073          (f32 (EXTRACT_SUBREG V128:$Rn, ssub))>;
7074def : Pat<(f16 (vector_extract (v8f16 V128:$Rn), (i64 0))),
7075          (f16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
7076def : Pat<(bf16 (vector_extract (v8bf16 V128:$Rn), (i64 0))),
7077          (bf16 (EXTRACT_SUBREG V128:$Rn, hsub))>;
7078
7079
7080def : Pat<(vector_extract (v2f64 V128:$Rn), VectorIndexD:$idx),
7081          (f64 (DUPi64 V128:$Rn, VectorIndexD:$idx))>;
7082def : Pat<(vector_extract (v4f32 V128:$Rn), VectorIndexS:$idx),
7083          (f32 (DUPi32 V128:$Rn, VectorIndexS:$idx))>;
7084def : Pat<(vector_extract (v8f16 V128:$Rn), VectorIndexH:$idx),
7085          (f16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
7086def : Pat<(vector_extract (v8bf16 V128:$Rn), VectorIndexH:$idx),
7087          (bf16 (DUPi16 V128:$Rn, VectorIndexH:$idx))>;
7088
7089// All concat_vectors operations are canonicalised to act on i64 vectors for
7090// AArch64. In the general case we need an instruction, which had just as well be
7091// INS.
7092multiclass ConcatPat<ValueType DstTy, ValueType SrcTy> {
7093  def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rd), V64:$Rn)),
7094            (INSvi64lane (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), 1,
7095                         (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub), 0)>;
7096
7097  // If the high lanes are zero we can instead emit a d->d register mov, which
7098  // will implicitly clear the upper bits.
7099  def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), immAllZerosV)),
7100            (SUBREG_TO_REG (i64 0), (FMOVDr V64:$Rn), dsub)>;
7101
7102  // If the high lanes are undef we can just ignore them:
7103  def : Pat<(DstTy (concat_vectors (SrcTy V64:$Rn), undef)),
7104            (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rn, dsub)>;
7105}
7106
7107defm : ConcatPat<v2i64, v1i64>;
7108defm : ConcatPat<v2f64, v1f64>;
7109defm : ConcatPat<v4i32, v2i32>;
7110defm : ConcatPat<v4f32, v2f32>;
7111defm : ConcatPat<v8i16, v4i16>;
7112defm : ConcatPat<v8f16, v4f16>;
7113defm : ConcatPat<v8bf16, v4bf16>;
7114defm : ConcatPat<v16i8, v8i8>;
7115
7116//----------------------------------------------------------------------------
7117// AdvSIMD across lanes instructions
7118//----------------------------------------------------------------------------
7119
7120defm ADDV    : SIMDAcrossLanesBHS<0, 0b11011, "addv">;
7121defm SMAXV   : SIMDAcrossLanesBHS<0, 0b01010, "smaxv">;
7122defm SMINV   : SIMDAcrossLanesBHS<0, 0b11010, "sminv">;
7123defm UMAXV   : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">;
7124defm UMINV   : SIMDAcrossLanesBHS<1, 0b11010, "uminv">;
7125defm SADDLV  : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">;
7126defm UADDLV  : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">;
7127defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", AArch64fmaxnmv>;
7128defm FMAXV   : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", AArch64fmaxv>;
7129defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", AArch64fminnmv>;
7130defm FMINV   : SIMDFPAcrossLanes<0b01111, 1, "fminv", AArch64fminv>;
7131
7132multiclass SIMDAcrossLaneLongPairIntrinsic<string Opc, SDPatternOperator addlp> {
7133  // Patterns for addv(addlp(x)) ==> addlv
7134  def : Pat<(i32 (vector_extract (v8i16 (insert_subvector undef,
7135              (v4i16 (AArch64uaddv (v4i16 (addlp (v8i8 V64:$op))))),
7136              (i64 0))), (i64 0))),
7137            (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
7138              (!cast<Instruction>(Opc#"v8i8v") V64:$op), hsub), ssub)>;
7139  def : Pat<(i32 (vector_extract (v8i16 (AArch64uaddv (v8i16 (addlp (v16i8 V128:$op))))), (i64 0))),
7140            (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
7141              (!cast<Instruction>(Opc#"v16i8v") V128:$op), hsub), ssub)>;
7142  def : Pat<(v4i32 (AArch64uaddv (v4i32 (addlp (v8i16 V128:$op))))),
7143            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v8i16v") V128:$op), ssub)>;
7144
7145  // Patterns for addp(addlp(x))) ==> addlv
7146  def : Pat<(v2i32 (AArch64uaddv (v2i32 (addlp (v4i16 V64:$op))))),
7147            (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i16v") V64:$op), ssub)>;
7148  def : Pat<(v2i64 (AArch64uaddv (v2i64 (addlp (v4i32 V128:$op))))),
7149            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (!cast<Instruction>(Opc#"v4i32v") V128:$op), dsub)>;
7150}
7151
7152defm : SIMDAcrossLaneLongPairIntrinsic<"UADDLV", AArch64uaddlp>;
7153defm : SIMDAcrossLaneLongPairIntrinsic<"SADDLV", AArch64saddlp>;
7154
7155// Pattern is used for GlobalISel
7156multiclass SIMDAcrossLaneLongPairIntrinsicGISel<string Opc, SDPatternOperator addlp> {
7157  // Patterns for addv(addlp(x)) ==> addlv
7158  def : Pat<(i16 (vecreduce_add (v4i16 (addlp (v8i8 V64:$Rn))))),
7159            (!cast<Instruction>(Opc#"v8i8v") V64:$Rn)>;
7160  def : Pat<(i16 (vecreduce_add (v8i16 (addlp (v16i8 V128:$Rn))))),
7161            (!cast<Instruction>(Opc#"v16i8v") V128:$Rn)>;
7162  def : Pat<(i32 (vecreduce_add (v4i32 (addlp (v8i16 V128:$Rn))))),
7163            (!cast<Instruction>(Opc#"v8i16v") V128:$Rn)>;
7164
7165  // Patterns for addp(addlp(x))) ==> addlv
7166  def : Pat<(i32 (vecreduce_add (v2i32 (addlp (v4i16 V64:$Rn))))),
7167            (!cast<Instruction>(Opc#"v4i16v") V64:$Rn)>;
7168  def : Pat<(i64 (vecreduce_add (v2i64 (addlp (v4i32 V128:$Rn))))),
7169            (!cast<Instruction>(Opc#"v4i32v") V128:$Rn)>;
7170}
7171
7172defm : SIMDAcrossLaneLongPairIntrinsicGISel<"UADDLV", AArch64uaddlp>;
7173defm : SIMDAcrossLaneLongPairIntrinsicGISel<"SADDLV", AArch64saddlp>;
7174
7175// Patterns for uaddlv(uaddlp(x)) ==> uaddlv
7176def : Pat<(i64 (int_aarch64_neon_uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
7177          (i64 (EXTRACT_SUBREG
7178            (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub)),
7179            dsub))>;
7180
7181def : Pat<(i32 (int_aarch64_neon_uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
7182          (i32 (EXTRACT_SUBREG
7183            (v8i16 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub)),
7184            ssub))>;
7185
7186def : Pat<(v2i64 (AArch64uaddlv (v4i32 (AArch64uaddlp (v8i16 V128:$op))))),
7187          (v2i64 (SUBREG_TO_REG (i64 0), (UADDLVv8i16v V128:$op), ssub))>;
7188
7189def : Pat<(v4i32 (AArch64uaddlv (v8i16 (AArch64uaddlp (v16i8 V128:$op))))),
7190          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv16i8v V128:$op), hsub))>;
7191
7192def : Pat<(v4i32 (AArch64uaddlv (v4i16 (AArch64uaddlp (v8i8 V64:$op))))),
7193          (v4i32 (SUBREG_TO_REG (i64 0), (UADDLVv8i8v V64:$op), hsub))>;
7194
7195multiclass SIMDAcrossLaneLongReductionIntrinsic<string Opc, SDPatternOperator addlv> {
7196  def : Pat<(v4i32 (addlv (v8i8 V64:$Rn))),
7197            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i8v") V64:$Rn), hsub))>;
7198
7199  def : Pat<(v4i32 (addlv (v4i16 V64:$Rn))),
7200            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i16v") V64:$Rn), ssub))>;
7201
7202  def : Pat<(v4i32 (addlv (v16i8 V128:$Rn))),
7203            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v16i8v") V128:$Rn), hsub))>;
7204
7205  def : Pat<(v4i32 (addlv (v8i16 V128:$Rn))),
7206            (v4i32 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v8i16v") V128:$Rn), ssub))>;
7207
7208  def : Pat<(v2i64 (addlv (v4i32 V128:$Rn))),
7209            (v2i64 (SUBREG_TO_REG (i64 0), (!cast<Instruction>(Opc#"v4i32v") V128:$Rn), dsub))>;
7210}
7211
7212defm : SIMDAcrossLaneLongReductionIntrinsic<"UADDLV", AArch64uaddlv>;
7213defm : SIMDAcrossLaneLongReductionIntrinsic<"SADDLV", AArch64saddlv>;
7214
7215// Patterns for across-vector intrinsics, that have a node equivalent, that
7216// returns a vector (with only the low lane defined) instead of a scalar.
7217// In effect, opNode is the same as (scalar_to_vector (IntNode)).
7218multiclass SIMDAcrossLanesIntrinsic<string baseOpc,
7219                                    SDPatternOperator opNode> {
7220// If a lane instruction caught the vector_extract around opNode, we can
7221// directly match the latter to the instruction.
7222def : Pat<(v8i8 (opNode V64:$Rn)),
7223          (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
7224           (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub)>;
7225def : Pat<(v16i8 (opNode V128:$Rn)),
7226          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7227           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub)>;
7228def : Pat<(v4i16 (opNode V64:$Rn)),
7229          (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
7230           (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub)>;
7231def : Pat<(v8i16 (opNode V128:$Rn)),
7232          (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
7233           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub)>;
7234def : Pat<(v4i32 (opNode V128:$Rn)),
7235          (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
7236           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), ssub)>;
7237
7238
7239// If none did, fallback to the explicit patterns, consuming the vector_extract.
7240def : Pat<(i32 (vector_extract (insert_subvector undef, (v8i8 (opNode V64:$Rn)),
7241            (i64 0)), (i64 0))),
7242          (EXTRACT_SUBREG (INSERT_SUBREG (v8i8 (IMPLICIT_DEF)),
7243            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn),
7244            bsub), ssub)>;
7245def : Pat<(i32 (vector_extract (v16i8 (opNode V128:$Rn)), (i64 0))),
7246          (EXTRACT_SUBREG (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7247            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn),
7248            bsub), ssub)>;
7249def : Pat<(i32 (vector_extract (insert_subvector undef,
7250            (v4i16 (opNode V64:$Rn)), (i64 0)), (i64 0))),
7251          (EXTRACT_SUBREG (INSERT_SUBREG (v4i16 (IMPLICIT_DEF)),
7252            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn),
7253            hsub), ssub)>;
7254def : Pat<(i32 (vector_extract (v8i16 (opNode V128:$Rn)), (i64 0))),
7255          (EXTRACT_SUBREG (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)),
7256            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn),
7257            hsub), ssub)>;
7258def : Pat<(i32 (vector_extract (v4i32 (opNode V128:$Rn)), (i64 0))),
7259          (EXTRACT_SUBREG (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)),
7260            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn),
7261            ssub), ssub)>;
7262
7263}
7264
7265multiclass SIMDAcrossLanesSignedIntrinsic<string baseOpc,
7266                                          SDPatternOperator opNode>
7267    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
7268// If there is a sign extension after this intrinsic, consume it as smov already
7269// performed it
7270def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
7271            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), i8)),
7272          (i32 (SMOVvi8to32
7273            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7274              (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
7275            (i64 0)))>;
7276def : Pat<(i32 (sext_inreg (i32 (vector_extract
7277            (opNode (v16i8 V128:$Rn)), (i64 0))), i8)),
7278          (i32 (SMOVvi8to32
7279            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7280             (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
7281            (i64 0)))>;
7282def : Pat<(i32 (sext_inreg (i32 (vector_extract (insert_subvector undef,
7283            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), i16)),
7284          (i32 (SMOVvi16to32
7285           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7286            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
7287           (i64 0)))>;
7288def : Pat<(i32 (sext_inreg (i32 (vector_extract
7289            (opNode (v8i16 V128:$Rn)), (i64 0))), i16)),
7290          (i32 (SMOVvi16to32
7291            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7292             (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
7293            (i64 0)))>;
7294}
7295
7296multiclass SIMDAcrossLanesUnsignedIntrinsic<string baseOpc,
7297                                            SDPatternOperator opNode>
7298    : SIMDAcrossLanesIntrinsic<baseOpc, opNode> {
7299// If there is a masking operation keeping only what has been actually
7300// generated, consume it.
7301def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
7302            (opNode (v8i8 V64:$Rn)), (i64 0)), (i64 0))), maski8_or_more)),
7303      (i32 (EXTRACT_SUBREG
7304        (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7305          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), bsub),
7306        ssub))>;
7307def : Pat<(i32 (and (i32 (vector_extract (opNode (v16i8 V128:$Rn)), (i64 0))),
7308            maski8_or_more)),
7309        (i32 (EXTRACT_SUBREG
7310          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7311            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), bsub),
7312          ssub))>;
7313def : Pat<(i32 (and (i32 (vector_extract (insert_subvector undef,
7314            (opNode (v4i16 V64:$Rn)), (i64 0)), (i64 0))), maski16_or_more)),
7315          (i32 (EXTRACT_SUBREG
7316            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7317              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), hsub),
7318            ssub))>;
7319def : Pat<(i32 (and (i32 (vector_extract (opNode (v8i16 V128:$Rn)), (i64 0))),
7320            maski16_or_more)),
7321        (i32 (EXTRACT_SUBREG
7322          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7323            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), hsub),
7324          ssub))>;
7325}
7326
7327// For vecreduce_add, used by GlobalISel not SDAG
7328def : Pat<(i8 (vecreduce_add (v8i8 V64:$Rn))),
7329          (i8 (ADDVv8i8v V64:$Rn))>;
7330def : Pat<(i8 (vecreduce_add (v16i8 V128:$Rn))),
7331          (i8 (ADDVv16i8v V128:$Rn))>;
7332def : Pat<(i16 (vecreduce_add (v4i16 V64:$Rn))),
7333          (i16 (ADDVv4i16v V64:$Rn))>;
7334def : Pat<(i16 (vecreduce_add (v8i16 V128:$Rn))),
7335          (i16 (ADDVv8i16v V128:$Rn))>;
7336def : Pat<(i32 (vecreduce_add (v2i32 V64:$Rn))),
7337          (i32 (EXTRACT_SUBREG (ADDPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7338def : Pat<(i32 (vecreduce_add (v4i32 V128:$Rn))),
7339          (i32 (ADDVv4i32v V128:$Rn))>;
7340def : Pat<(i64 (vecreduce_add (v2i64 V128:$Rn))),
7341          (i64 (ADDPv2i64p V128:$Rn))>;
7342
7343defm : SIMDAcrossLanesSignedIntrinsic<"ADDV",  AArch64saddv>;
7344// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7345def : Pat<(v2i32 (AArch64saddv (v2i32 V64:$Rn))),
7346          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
7347
7348defm : SIMDAcrossLanesUnsignedIntrinsic<"ADDV", AArch64uaddv>;
7349// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
7350def : Pat<(v2i32 (AArch64uaddv (v2i32 V64:$Rn))),
7351          (ADDPv2i32 V64:$Rn, V64:$Rn)>;
7352
7353defm : SIMDAcrossLanesSignedIntrinsic<"SMAXV", AArch64smaxv>;
7354def : Pat<(v2i32 (AArch64smaxv (v2i32 V64:$Rn))),
7355          (SMAXPv2i32 V64:$Rn, V64:$Rn)>;
7356
7357defm : SIMDAcrossLanesSignedIntrinsic<"SMINV", AArch64sminv>;
7358def : Pat<(v2i32 (AArch64sminv (v2i32 V64:$Rn))),
7359          (SMINPv2i32 V64:$Rn, V64:$Rn)>;
7360
7361defm : SIMDAcrossLanesUnsignedIntrinsic<"UMAXV", AArch64umaxv>;
7362def : Pat<(v2i32 (AArch64umaxv (v2i32 V64:$Rn))),
7363          (UMAXPv2i32 V64:$Rn, V64:$Rn)>;
7364
7365defm : SIMDAcrossLanesUnsignedIntrinsic<"UMINV", AArch64uminv>;
7366def : Pat<(v2i32 (AArch64uminv (v2i32 V64:$Rn))),
7367          (UMINPv2i32 V64:$Rn, V64:$Rn)>;
7368
7369// For vecreduce_{opc} used by GlobalISel, not SDAG at the moment
7370// because GlobalISel allows us to specify the return register to be a FPR
7371multiclass SIMDAcrossLanesVecReductionIntrinsic<string baseOpc,
7372                                               SDPatternOperator opNode> {
7373def : Pat<(i8 (opNode (v8i8 FPR64:$Rn))),
7374          (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) FPR64:$Rn)>;
7375
7376def : Pat<(i8 (opNode (v16i8 FPR128:$Rn))),
7377          (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) FPR128:$Rn)>;
7378
7379def : Pat<(i16 (opNode (v4i16 FPR64:$Rn))),
7380          (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) FPR64:$Rn)>;
7381
7382def : Pat<(i16 (opNode (v8i16 FPR128:$Rn))),
7383          (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) FPR128:$Rn)>;
7384
7385def : Pat<(i32 (opNode (v4i32 V128:$Rn))),
7386          (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn)>;
7387}
7388
7389// For v2i32 source type, the pairwise instruction can be used instead
7390defm : SIMDAcrossLanesVecReductionIntrinsic<"UMINV", vecreduce_umin>;
7391def : Pat<(i32 (vecreduce_umin (v2i32 V64:$Rn))),
7392          (i32 (EXTRACT_SUBREG (UMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7393
7394defm : SIMDAcrossLanesVecReductionIntrinsic<"UMAXV", vecreduce_umax>;
7395def : Pat<(i32 (vecreduce_umax (v2i32 V64:$Rn))),
7396          (i32 (EXTRACT_SUBREG (UMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7397
7398defm : SIMDAcrossLanesVecReductionIntrinsic<"SMINV", vecreduce_smin>;
7399def : Pat<(i32 (vecreduce_smin (v2i32 V64:$Rn))),
7400          (i32 (EXTRACT_SUBREG (SMINPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7401
7402defm : SIMDAcrossLanesVecReductionIntrinsic<"SMAXV", vecreduce_smax>;
7403def : Pat<(i32 (vecreduce_smax (v2i32 V64:$Rn))),
7404          (i32 (EXTRACT_SUBREG (SMAXPv2i32 V64:$Rn, V64:$Rn), ssub))>;
7405
7406multiclass SIMDAcrossLanesSignedLongIntrinsic<string baseOpc, Intrinsic intOp> {
7407  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
7408        (i32 (SMOVvi16to32
7409          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7410            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
7411          (i64 0)))>;
7412def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
7413        (i32 (SMOVvi16to32
7414          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7415           (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
7416          (i64 0)))>;
7417
7418def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
7419          (i32 (EXTRACT_SUBREG
7420           (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7421            (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
7422           ssub))>;
7423def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
7424        (i32 (EXTRACT_SUBREG
7425          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7426           (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
7427          ssub))>;
7428
7429def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
7430        (i64 (EXTRACT_SUBREG
7431          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7432           (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
7433          dsub))>;
7434}
7435
7436multiclass SIMDAcrossLanesUnsignedLongIntrinsic<string baseOpc,
7437                                                Intrinsic intOp> {
7438  def : Pat<(i32 (intOp (v8i8 V64:$Rn))),
7439        (i32 (EXTRACT_SUBREG
7440          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7441            (!cast<Instruction>(!strconcat(baseOpc, "v8i8v")) V64:$Rn), hsub),
7442          ssub))>;
7443def : Pat<(i32 (intOp (v16i8 V128:$Rn))),
7444        (i32 (EXTRACT_SUBREG
7445          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7446            (!cast<Instruction>(!strconcat(baseOpc, "v16i8v")) V128:$Rn), hsub),
7447          ssub))>;
7448
7449def : Pat<(i32 (intOp (v4i16 V64:$Rn))),
7450          (i32 (EXTRACT_SUBREG
7451            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7452              (!cast<Instruction>(!strconcat(baseOpc, "v4i16v")) V64:$Rn), ssub),
7453            ssub))>;
7454def : Pat<(i32 (intOp (v8i16 V128:$Rn))),
7455        (i32 (EXTRACT_SUBREG
7456          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7457            (!cast<Instruction>(!strconcat(baseOpc, "v8i16v")) V128:$Rn), ssub),
7458          ssub))>;
7459
7460def : Pat<(i64 (intOp (v4i32 V128:$Rn))),
7461        (i64 (EXTRACT_SUBREG
7462          (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7463            (!cast<Instruction>(!strconcat(baseOpc, "v4i32v")) V128:$Rn), dsub),
7464          dsub))>;
7465}
7466
7467defm : SIMDAcrossLanesSignedLongIntrinsic<"SADDLV", int_aarch64_neon_saddlv>;
7468defm : SIMDAcrossLanesUnsignedLongIntrinsic<"UADDLV", int_aarch64_neon_uaddlv>;
7469
7470// The vaddlv_s32 intrinsic gets mapped to SADDLP.
7471def : Pat<(i64 (int_aarch64_neon_saddlv (v2i32 V64:$Rn))),
7472          (i64 (EXTRACT_SUBREG
7473            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7474              (SADDLPv2i32_v1i64 V64:$Rn), dsub),
7475            dsub))>;
7476// The vaddlv_u32 intrinsic gets mapped to UADDLP.
7477def : Pat<(i64 (int_aarch64_neon_uaddlv (v2i32 V64:$Rn))),
7478          (i64 (EXTRACT_SUBREG
7479            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)),
7480              (UADDLPv2i32_v1i64 V64:$Rn), dsub),
7481            dsub))>;
7482
7483//------------------------------------------------------------------------------
7484// AdvSIMD modified immediate instructions
7485//------------------------------------------------------------------------------
7486
7487// AdvSIMD BIC
7488defm BIC : SIMDModifiedImmVectorShiftTied<1, 0b11, 0b01, "bic", AArch64bici>;
7489// AdvSIMD ORR
7490defm ORR : SIMDModifiedImmVectorShiftTied<0, 0b11, 0b01, "orr", AArch64orri>;
7491
7492let Predicates = [HasNEON] in {
7493def : InstAlias<"bic $Vd.4h, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7494def : InstAlias<"bic $Vd.8h, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7495def : InstAlias<"bic $Vd.2s, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7496def : InstAlias<"bic $Vd.4s, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7497
7498def : InstAlias<"bic.4h $Vd, $imm", (BICv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7499def : InstAlias<"bic.8h $Vd, $imm", (BICv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7500def : InstAlias<"bic.2s $Vd, $imm", (BICv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7501def : InstAlias<"bic.4s $Vd, $imm", (BICv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7502
7503def : InstAlias<"orr $Vd.4h, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7504def : InstAlias<"orr $Vd.8h, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7505def : InstAlias<"orr $Vd.2s, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7506def : InstAlias<"orr $Vd.4s, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7507
7508def : InstAlias<"orr.4h $Vd, $imm", (ORRv4i16 V64:$Vd,  imm0_255:$imm, 0)>;
7509def : InstAlias<"orr.8h $Vd, $imm", (ORRv8i16 V128:$Vd, imm0_255:$imm, 0)>;
7510def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd,  imm0_255:$imm, 0)>;
7511def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0)>;
7512}
7513
7514// AdvSIMD FMOV
7515def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8,
7516                                              "fmov", ".2d",
7517                       [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7518def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64,  fpimm8,
7519                                              "fmov", ".2s",
7520                       [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7521def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8,
7522                                              "fmov", ".4s",
7523                       [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7524let Predicates = [HasNEON, HasFullFP16] in {
7525def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64,  fpimm8,
7526                                              "fmov", ".4h",
7527                       [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7528def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8,
7529                                              "fmov", ".8h",
7530                       [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>;
7531} // Predicates = [HasNEON, HasFullFP16]
7532
7533// AdvSIMD MOVI
7534
7535// EDIT byte mask: scalar
7536let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7537def MOVID      : SIMDModifiedImmScalarNoShift<0, 1, 0b1110, "movi",
7538                    [(set FPR64:$Rd, simdimmtype10:$imm8)]>;
7539// The movi_edit node has the immediate value already encoded, so we use
7540// a plain imm0_255 here.
7541def : Pat<(f64 (AArch64movi_edit imm0_255:$shift)),
7542          (MOVID imm0_255:$shift)>;
7543
7544// EDIT byte mask: 2d
7545
7546// The movi_edit node has the immediate value already encoded, so we use
7547// a plain imm0_255 in the pattern
7548let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7549def MOVIv2d_ns   : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128,
7550                                                simdimmtype10,
7551                                                "movi", ".2d",
7552                   [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>;
7553
7554def : Pat<(v2i64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7555def : Pat<(v4i32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7556def : Pat<(v8i16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7557def : Pat<(v16i8 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7558def : Pat<(v2f64 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7559def : Pat<(v4f32 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7560def : Pat<(v8f16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7561def : Pat<(v8bf16 immAllZerosV), (MOVIv2d_ns (i32 0))>;
7562
7563def : Pat<(v2i64 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7564def : Pat<(v4i32 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7565def : Pat<(v8i16 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7566def : Pat<(v16i8 immAllOnesV), (MOVIv2d_ns (i32 255))>;
7567
7568// Set 64-bit vectors to all 0/1 by extracting from a 128-bit register as the
7569// extract is free and this gives better MachineCSE results.
7570def : Pat<(v1i64 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7571def : Pat<(v2i32 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7572def : Pat<(v4i16 immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7573def : Pat<(v8i8  immAllZerosV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 0)), dsub)>;
7574def : Pat<(v1f64 immAllZerosV), (MOVID (i32 0))>;
7575def : Pat<(v2f32 immAllZerosV), (MOVID (i32 0))>;
7576def : Pat<(v4f16 immAllZerosV), (MOVID (i32 0))>;
7577def : Pat<(v4bf16 immAllZerosV), (MOVID (i32 0))>;
7578
7579def : Pat<(v1i64 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7580def : Pat<(v2i32 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7581def : Pat<(v4i16 immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7582def : Pat<(v8i8  immAllOnesV), (EXTRACT_SUBREG (MOVIv2d_ns (i32 255)), dsub)>;
7583
7584// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7585let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7586defm MOVI      : SIMDModifiedImmVectorShift<0, 0b10, 0b00, "movi">;
7587
7588let Predicates = [HasNEON] in {
7589  // Using the MOVI to materialize fp constants.
7590  def : Pat<(f32 fpimm32SIMDModImmType4:$in),
7591            (EXTRACT_SUBREG (MOVIv2i32 (fpimm32SIMDModImmType4XForm f32:$in),
7592                                       (i32 24)),
7593                            ssub)>;
7594}
7595
7596let Predicates = [HasNEON] in {
7597def : InstAlias<"movi $Vd.4h, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7598def : InstAlias<"movi $Vd.8h, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7599def : InstAlias<"movi $Vd.2s, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7600def : InstAlias<"movi $Vd.4s, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7601
7602def : InstAlias<"movi.4h $Vd, $imm", (MOVIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7603def : InstAlias<"movi.8h $Vd, $imm", (MOVIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7604def : InstAlias<"movi.2s $Vd, $imm", (MOVIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7605def : InstAlias<"movi.4s $Vd, $imm", (MOVIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7606}
7607
7608def : Pat<(v2i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7609          (MOVIv2i32 imm0_255:$imm8, imm:$shift)>;
7610def : Pat<(v4i32 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7611          (MOVIv4i32 imm0_255:$imm8, imm:$shift)>;
7612def : Pat<(v4i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7613          (MOVIv4i16 imm0_255:$imm8, imm:$shift)>;
7614def : Pat<(v8i16 (AArch64movi_shift imm0_255:$imm8, (i32 imm:$shift))),
7615          (MOVIv8i16 imm0_255:$imm8, imm:$shift)>;
7616
7617let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7618// EDIT per word: 2s & 4s with MSL shifter
7619def MOVIv2s_msl  : SIMDModifiedImmMoveMSL<0, 0, {1,1,0,?}, V64, "movi", ".2s",
7620                      [(set (v2i32 V64:$Rd),
7621                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7622def MOVIv4s_msl  : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s",
7623                      [(set (v4i32 V128:$Rd),
7624                            (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7625
7626// Per byte: 8b & 16b
7627def MOVIv8b_ns   : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64,  imm0_255,
7628                                                 "movi", ".8b",
7629                       [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>;
7630
7631def MOVIv16b_ns  : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255,
7632                                                 "movi", ".16b",
7633                       [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>;
7634}
7635
7636// AdvSIMD MVNI
7637
7638// EDIT per word & halfword: 2s, 4h, 4s, & 8h
7639let isReMaterializable = 1, isAsCheapAsAMove = 1 in
7640defm MVNI      : SIMDModifiedImmVectorShift<1, 0b10, 0b00, "mvni">;
7641
7642let Predicates = [HasNEON] in {
7643def : InstAlias<"mvni $Vd.4h, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7644def : InstAlias<"mvni $Vd.8h, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7645def : InstAlias<"mvni $Vd.2s, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7646def : InstAlias<"mvni $Vd.4s, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7647
7648def : InstAlias<"mvni.4h $Vd, $imm", (MVNIv4i16 V64:$Vd,  imm0_255:$imm, 0), 0>;
7649def : InstAlias<"mvni.8h $Vd, $imm", (MVNIv8i16 V128:$Vd, imm0_255:$imm, 0), 0>;
7650def : InstAlias<"mvni.2s $Vd, $imm", (MVNIv2i32 V64:$Vd,  imm0_255:$imm, 0), 0>;
7651def : InstAlias<"mvni.4s $Vd, $imm", (MVNIv4i32 V128:$Vd, imm0_255:$imm, 0), 0>;
7652}
7653
7654def : Pat<(v2i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7655          (MVNIv2i32 imm0_255:$imm8, imm:$shift)>;
7656def : Pat<(v4i32 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7657          (MVNIv4i32 imm0_255:$imm8, imm:$shift)>;
7658def : Pat<(v4i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7659          (MVNIv4i16 imm0_255:$imm8, imm:$shift)>;
7660def : Pat<(v8i16 (AArch64mvni_shift imm0_255:$imm8, (i32 imm:$shift))),
7661          (MVNIv8i16 imm0_255:$imm8, imm:$shift)>;
7662
7663// EDIT per word: 2s & 4s with MSL shifter
7664let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
7665def MVNIv2s_msl   : SIMDModifiedImmMoveMSL<0, 1, {1,1,0,?}, V64, "mvni", ".2s",
7666                      [(set (v2i32 V64:$Rd),
7667                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7668def MVNIv4s_msl   : SIMDModifiedImmMoveMSL<1, 1, {1,1,0,?}, V128, "mvni", ".4s",
7669                      [(set (v4i32 V128:$Rd),
7670                            (AArch64mvni_msl imm0_255:$imm8, (i32 imm:$shift)))]>;
7671}
7672
7673//----------------------------------------------------------------------------
7674// AdvSIMD indexed element
7675//----------------------------------------------------------------------------
7676
7677let hasSideEffects = 0 in {
7678  defm FMLA  : SIMDFPIndexedTied<0, 0b0001, "fmla">;
7679  defm FMLS  : SIMDFPIndexedTied<0, 0b0101, "fmls">;
7680}
7681
7682// NOTE: Operands are reordered in the FMLA/FMLS PatFrags because the
7683// instruction expects the addend first, while the intrinsic expects it last.
7684
7685// On the other hand, there are quite a few valid combinatorial options due to
7686// the commutativity of multiplication and the fact that (-x) * y = x * (-y).
7687defm : SIMDFPIndexedTiedPatterns<"FMLA",
7688           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)>>;
7689defm : SIMDFPIndexedTiedPatterns<"FMLA",
7690           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)>>;
7691
7692defm : SIMDFPIndexedTiedPatterns<"FMLS",
7693           TriOpFrag<(any_fma node:$MHS, (fneg node:$RHS), node:$LHS)> >;
7694defm : SIMDFPIndexedTiedPatterns<"FMLS",
7695           TriOpFrag<(any_fma node:$RHS, (fneg node:$MHS), node:$LHS)> >;
7696defm : SIMDFPIndexedTiedPatterns<"FMLS",
7697           TriOpFrag<(any_fma (fneg node:$RHS), node:$MHS, node:$LHS)> >;
7698defm : SIMDFPIndexedTiedPatterns<"FMLS",
7699           TriOpFrag<(any_fma (fneg node:$MHS), node:$RHS, node:$LHS)> >;
7700
7701multiclass FMLSIndexedAfterNegPatterns<SDPatternOperator OpNode> {
7702  // 3 variants for the .2s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7703  // and DUP scalar.
7704  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7705                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7706                                           VectorIndexS:$idx))),
7707            (FMLSv2i32_indexed V64:$Rd, V64:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7708  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7709                           (v2f32 (AArch64duplane32
7710                                      (v4f32 (insert_subvector undef,
7711                                                 (v2f32 (fneg V64:$Rm)),
7712                                                 (i64 0))),
7713                                      VectorIndexS:$idx)))),
7714            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7715                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7716                               VectorIndexS:$idx)>;
7717  def : Pat<(v2f32 (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn),
7718                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7719            (FMLSv2i32_indexed V64:$Rd, V64:$Rn,
7720                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7721
7722  // 3 variants for the .4s version: DUPLANE from 128-bit, DUPLANE from 64-bit
7723  // and DUP scalar.
7724  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7725                           (AArch64duplane32 (v4f32 (fneg V128:$Rm)),
7726                                           VectorIndexS:$idx))),
7727            (FMLSv4i32_indexed V128:$Rd, V128:$Rn, V128:$Rm,
7728                               VectorIndexS:$idx)>;
7729  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7730                           (v4f32 (AArch64duplane32
7731                                      (v4f32 (insert_subvector undef,
7732                                                 (v2f32 (fneg V64:$Rm)),
7733                                                 (i64 0))),
7734                                      VectorIndexS:$idx)))),
7735            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7736                               (SUBREG_TO_REG (i32 0), V64:$Rm, dsub),
7737                               VectorIndexS:$idx)>;
7738  def : Pat<(v4f32 (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn),
7739                           (AArch64dup (f32 (fneg FPR32Op:$Rm))))),
7740            (FMLSv4i32_indexed V128:$Rd, V128:$Rn,
7741                (SUBREG_TO_REG (i32 0), FPR32Op:$Rm, ssub), (i64 0))>;
7742
7743  // 2 variants for the .2d version: DUPLANE from 128-bit, and DUP scalar
7744  // (DUPLANE from 64-bit would be trivial).
7745  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7746                           (AArch64duplane64 (v2f64 (fneg V128:$Rm)),
7747                                           VectorIndexD:$idx))),
7748            (FMLSv2i64_indexed
7749                V128:$Rd, V128:$Rn, V128:$Rm, VectorIndexS:$idx)>;
7750  def : Pat<(v2f64 (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn),
7751                           (AArch64dup (f64 (fneg FPR64Op:$Rm))))),
7752            (FMLSv2i64_indexed V128:$Rd, V128:$Rn,
7753                (SUBREG_TO_REG (i32 0), FPR64Op:$Rm, dsub), (i64 0))>;
7754
7755  // 2 variants for 32-bit scalar version: extract from .2s or from .4s
7756  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7757                         (vector_extract (v4f32 (fneg V128:$Rm)),
7758                                         VectorIndexS:$idx))),
7759            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7760                V128:$Rm, VectorIndexS:$idx)>;
7761  def : Pat<(f32 (OpNode (f32 FPR32:$Rd), (f32 FPR32:$Rn),
7762                         (vector_extract (v4f32 (insert_subvector undef,
7763                                                    (v2f32 (fneg V64:$Rm)),
7764                                                    (i64 0))),
7765                                         VectorIndexS:$idx))),
7766            (FMLSv1i32_indexed FPR32:$Rd, FPR32:$Rn,
7767                (SUBREG_TO_REG (i32 0), V64:$Rm, dsub), VectorIndexS:$idx)>;
7768
7769  // 1 variant for 64-bit scalar version: extract from .1d or from .2d
7770  def : Pat<(f64 (OpNode (f64 FPR64:$Rd), (f64 FPR64:$Rn),
7771                         (vector_extract (v2f64 (fneg V128:$Rm)),
7772                                         VectorIndexS:$idx))),
7773            (FMLSv1i64_indexed FPR64:$Rd, FPR64:$Rn,
7774                V128:$Rm, VectorIndexS:$idx)>;
7775}
7776
7777defm : FMLSIndexedAfterNegPatterns<
7778           TriOpFrag<(any_fma node:$RHS, node:$MHS, node:$LHS)> >;
7779defm : FMLSIndexedAfterNegPatterns<
7780           TriOpFrag<(any_fma node:$MHS, node:$RHS, node:$LHS)> >;
7781
7782defm FMULX : SIMDFPIndexed<1, 0b1001, "fmulx", int_aarch64_neon_fmulx>;
7783defm FMUL  : SIMDFPIndexed<0, 0b1001, "fmul", any_fmul>;
7784
7785def : Pat<(v2f32 (any_fmul V64:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7786          (FMULv2i32_indexed V64:$Rn,
7787            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7788            (i64 0))>;
7789def : Pat<(v4f32 (any_fmul V128:$Rn, (AArch64dup (f32 FPR32:$Rm)))),
7790          (FMULv4i32_indexed V128:$Rn,
7791            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR32:$Rm, ssub),
7792            (i64 0))>;
7793def : Pat<(v2f64 (any_fmul V128:$Rn, (AArch64dup (f64 FPR64:$Rm)))),
7794          (FMULv2i64_indexed V128:$Rn,
7795            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$Rm, dsub),
7796            (i64 0))>;
7797
7798defm SQDMULH : SIMDIndexedHS<0, 0b1100, "sqdmulh", int_aarch64_neon_sqdmulh>;
7799defm SQRDMULH : SIMDIndexedHS<0, 0b1101, "sqrdmulh", int_aarch64_neon_sqrdmulh>;
7800
7801defm SQDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqdmulh_lane,
7802                                     int_aarch64_neon_sqdmulh_laneq>;
7803defm SQRDMULH : SIMDIndexedHSPatterns<int_aarch64_neon_sqrdmulh_lane,
7804                                      int_aarch64_neon_sqrdmulh_laneq>;
7805
7806// Generated by MachineCombine
7807defm MLA   : SIMDVectorIndexedHSTied<1, 0b0000, "mla", null_frag>;
7808defm MLS   : SIMDVectorIndexedHSTied<1, 0b0100, "mls", null_frag>;
7809
7810defm MUL   : SIMDVectorIndexedHS<0, 0b1000, "mul", mul>;
7811defm SMLAL : SIMDVectorIndexedLongSDTied<0, 0b0010, "smlal",
7812    TriOpFrag<(add node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7813defm SMLSL : SIMDVectorIndexedLongSDTied<0, 0b0110, "smlsl",
7814    TriOpFrag<(sub node:$LHS, (AArch64smull node:$MHS, node:$RHS))>>;
7815defm SMULL : SIMDVectorIndexedLongSD<0, 0b1010, "smull", AArch64smull>;
7816defm SQDMLAL : SIMDIndexedLongSQDMLXSDTied<0, 0b0011, "sqdmlal",
7817                                           int_aarch64_neon_sqadd>;
7818defm SQDMLSL : SIMDIndexedLongSQDMLXSDTied<0, 0b0111, "sqdmlsl",
7819                                           int_aarch64_neon_sqsub>;
7820defm SQRDMLAH : SIMDIndexedSQRDMLxHSDTied<1, 0b1101, "sqrdmlah",
7821                                          int_aarch64_neon_sqrdmlah>;
7822defm SQRDMLSH : SIMDIndexedSQRDMLxHSDTied<1, 0b1111, "sqrdmlsh",
7823                                          int_aarch64_neon_sqrdmlsh>;
7824defm SQDMULL : SIMDIndexedLongSD<0, 0b1011, "sqdmull", int_aarch64_neon_sqdmull>;
7825defm UMLAL   : SIMDVectorIndexedLongSDTied<1, 0b0010, "umlal",
7826    TriOpFrag<(add node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7827defm UMLSL   : SIMDVectorIndexedLongSDTied<1, 0b0110, "umlsl",
7828    TriOpFrag<(sub node:$LHS, (AArch64umull node:$MHS, node:$RHS))>>;
7829defm UMULL   : SIMDVectorIndexedLongSD<1, 0b1010, "umull", AArch64umull>;
7830
7831// A scalar sqdmull with the second operand being a vector lane can be
7832// handled directly with the indexed instruction encoding.
7833def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn),
7834                                          (vector_extract (v4i32 V128:$Vm),
7835                                                           VectorIndexS:$idx)),
7836          (SQDMULLv1i64_indexed FPR32:$Rn, V128:$Vm, VectorIndexS:$idx)>;
7837
7838//----------------------------------------------------------------------------
7839// AdvSIMD scalar shift instructions
7840//----------------------------------------------------------------------------
7841defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">;
7842defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">;
7843defm SCVTF  : SIMDFPScalarRShift<0, 0b11100, "scvtf">;
7844defm UCVTF  : SIMDFPScalarRShift<1, 0b11100, "ucvtf">;
7845// Codegen patterns for the above. We don't put these directly on the
7846// instructions because TableGen's type inference can't handle the truth.
7847// Having the same base pattern for fp <--> int totally freaks it out.
7848def : Pat<(int_aarch64_neon_vcvtfp2fxs FPR32:$Rn, vecshiftR32:$imm),
7849          (FCVTZSs FPR32:$Rn, vecshiftR32:$imm)>;
7850def : Pat<(int_aarch64_neon_vcvtfp2fxu FPR32:$Rn, vecshiftR32:$imm),
7851          (FCVTZUs FPR32:$Rn, vecshiftR32:$imm)>;
7852def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f64 FPR64:$Rn), vecshiftR64:$imm)),
7853          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7854def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f64 FPR64:$Rn), vecshiftR64:$imm)),
7855          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7856def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxs (v1f64 FPR64:$Rn),
7857                                            vecshiftR64:$imm)),
7858          (FCVTZSd FPR64:$Rn, vecshiftR64:$imm)>;
7859def : Pat<(v1i64 (int_aarch64_neon_vcvtfp2fxu (v1f64 FPR64:$Rn),
7860                                            vecshiftR64:$imm)),
7861          (FCVTZUd FPR64:$Rn, vecshiftR64:$imm)>;
7862def : Pat<(int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR32:$imm),
7863          (UCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7864def : Pat<(f64 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7865          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7866def : Pat<(v1f64 (int_aarch64_neon_vcvtfxs2fp (v1i64 FPR64:$Rn),
7867                                            vecshiftR64:$imm)),
7868          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7869def : Pat<(f64 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR64:$imm)),
7870          (SCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7871def : Pat<(v1f64 (int_aarch64_neon_vcvtfxu2fp (v1i64 FPR64:$Rn),
7872                                            vecshiftR64:$imm)),
7873          (UCVTFd FPR64:$Rn, vecshiftR64:$imm)>;
7874def : Pat<(int_aarch64_neon_vcvtfxs2fp FPR32:$Rn, vecshiftR32:$imm),
7875          (SCVTFs FPR32:$Rn, vecshiftR32:$imm)>;
7876
7877// Patterns for FP16 Intrinsics - requires reg copy to/from as i16s not supported.
7878
7879def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 (sext_inreg FPR32:$Rn, i16)), vecshiftR16:$imm)),
7880          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7881def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i32 FPR32:$Rn), vecshiftR16:$imm)),
7882          (SCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7883def : Pat<(f16 (int_aarch64_neon_vcvtfxs2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7884          (SCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7885def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp
7886            (and FPR32:$Rn, (i32 65535)),
7887            vecshiftR16:$imm)),
7888          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7889def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp FPR32:$Rn, vecshiftR16:$imm)),
7890          (UCVTFh (f16 (EXTRACT_SUBREG FPR32:$Rn, hsub)), vecshiftR16:$imm)>;
7891def : Pat<(f16 (int_aarch64_neon_vcvtfxu2fp (i64 FPR64:$Rn), vecshiftR16:$imm)),
7892          (UCVTFh (f16 (EXTRACT_SUBREG FPR64:$Rn, hsub)), vecshiftR16:$imm)>;
7893def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR32:$imm)),
7894          (i32 (INSERT_SUBREG
7895            (i32 (IMPLICIT_DEF)),
7896            (FCVTZSh FPR16:$Rn, vecshiftR32:$imm),
7897            hsub))>;
7898def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxs (f16 FPR16:$Rn), vecshiftR64:$imm)),
7899          (i64 (INSERT_SUBREG
7900            (i64 (IMPLICIT_DEF)),
7901            (FCVTZSh FPR16:$Rn, vecshiftR64:$imm),
7902            hsub))>;
7903def : Pat<(i32 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR32:$imm)),
7904          (i32 (INSERT_SUBREG
7905            (i32 (IMPLICIT_DEF)),
7906            (FCVTZUh FPR16:$Rn, vecshiftR32:$imm),
7907            hsub))>;
7908def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
7909          (i64 (INSERT_SUBREG
7910            (i64 (IMPLICIT_DEF)),
7911            (FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
7912            hsub))>;
7913def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7914          (i32 (INSERT_SUBREG
7915            (i32 (IMPLICIT_DEF)),
7916            (FACGE16 FPR16:$Rn, FPR16:$Rm),
7917            hsub))>;
7918def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
7919          (i32 (INSERT_SUBREG
7920            (i32 (IMPLICIT_DEF)),
7921            (FACGT16 FPR16:$Rn, FPR16:$Rm),
7922            hsub))>;
7923
7924defm SHL      : SIMDScalarLShiftD<   0, 0b01010, "shl", AArch64vshl>;
7925defm SLI      : SIMDScalarLShiftDTied<1, 0b01010, "sli">;
7926defm SQRSHRN  : SIMDScalarRShiftBHS< 0, 0b10011, "sqrshrn",
7927                                     int_aarch64_neon_sqrshrn>;
7928defm SQRSHRUN : SIMDScalarRShiftBHS< 1, 0b10001, "sqrshrun",
7929                                     int_aarch64_neon_sqrshrun>;
7930defm SQSHLU   : SIMDScalarLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
7931defm SQSHL    : SIMDScalarLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
7932defm SQSHRN   : SIMDScalarRShiftBHS< 0, 0b10010, "sqshrn",
7933                                     int_aarch64_neon_sqshrn>;
7934defm SQSHRUN  : SIMDScalarRShiftBHS< 1, 0b10000, "sqshrun",
7935                                     int_aarch64_neon_sqshrun>;
7936defm SRI      : SIMDScalarRShiftDTied<   1, 0b01000, "sri">;
7937defm SRSHR    : SIMDScalarRShiftD<   0, 0b00100, "srshr", AArch64srshri>;
7938defm SRSRA    : SIMDScalarRShiftDTied<   0, 0b00110, "srsra",
7939    TriOpFrag<(add node:$LHS,
7940                   (AArch64srshri node:$MHS, node:$RHS))>>;
7941defm SSHR     : SIMDScalarRShiftD<   0, 0b00000, "sshr", AArch64vashr>;
7942defm SSRA     : SIMDScalarRShiftDTied<   0, 0b00010, "ssra",
7943    TriOpFrag<(add_and_or_is_add node:$LHS,
7944                   (AArch64vashr node:$MHS, node:$RHS))>>;
7945defm UQRSHRN  : SIMDScalarRShiftBHS< 1, 0b10011, "uqrshrn",
7946                                     int_aarch64_neon_uqrshrn>;
7947defm UQSHL    : SIMDScalarLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
7948defm UQSHRN   : SIMDScalarRShiftBHS< 1, 0b10010, "uqshrn",
7949                                     int_aarch64_neon_uqshrn>;
7950defm URSHR    : SIMDScalarRShiftD<   1, 0b00100, "urshr", AArch64urshri>;
7951defm URSRA    : SIMDScalarRShiftDTied<   1, 0b00110, "ursra",
7952    TriOpFrag<(add node:$LHS,
7953                   (AArch64urshri node:$MHS, node:$RHS))>>;
7954defm USHR     : SIMDScalarRShiftD<   1, 0b00000, "ushr", AArch64vlshr>;
7955defm USRA     : SIMDScalarRShiftDTied<   1, 0b00010, "usra",
7956    TriOpFrag<(add_and_or_is_add node:$LHS,
7957                   (AArch64vlshr node:$MHS, node:$RHS))>>;
7958
7959//----------------------------------------------------------------------------
7960// AdvSIMD vector shift instructions
7961//----------------------------------------------------------------------------
7962defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>;
7963defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>;
7964defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf",
7965                                   int_aarch64_neon_vcvtfxs2fp>;
7966defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", AArch64rshrn>;
7967defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
7968
7969let Predicates = [HasNEON] in {
7970def : Pat<(v2f32 (sint_to_fp (v2i32 (AArch64vashr_exact v2i32:$Vn, i32:$shift)))),
7971          (SCVTFv2i32_shift $Vn, vecshiftR32:$shift)>;
7972
7973def : Pat<(v4f32 (sint_to_fp (v4i32 (AArch64vashr_exact v4i32:$Vn, i32:$shift)))),
7974          (SCVTFv4i32_shift $Vn, vecshiftR32:$shift)>;
7975
7976def : Pat<(v2f64 (sint_to_fp (v2i64 (AArch64vashr_exact v2i64:$Vn, i32:$shift)))),
7977          (SCVTFv2i64_shift $Vn, vecshiftR64:$shift)>;
7978}
7979
7980let Predicates = [HasNEON, HasFullFP16] in {
7981def : Pat<(v4f16 (sint_to_fp (v4i16 (AArch64vashr_exact v4i16:$Vn, i32:$shift)))),
7982          (SCVTFv4i16_shift $Vn, vecshiftR16:$shift)>;
7983
7984def : Pat<(v8f16 (sint_to_fp (v8i16 (AArch64vashr_exact v8i16:$Vn, i32:$shift)))),
7985          (SCVTFv8i16_shift $Vn, vecshiftR16:$shift)>;
7986}
7987
7988// X << 1 ==> X + X
7989class SHLToADDPat<ValueType ty, RegisterClass regtype>
7990  : Pat<(ty (AArch64vshl (ty regtype:$Rn), (i32 1))),
7991            (!cast<Instruction>("ADD"#ty) regtype:$Rn, regtype:$Rn)>;
7992
7993def : SHLToADDPat<v16i8, FPR128>;
7994def : SHLToADDPat<v8i16, FPR128>;
7995def : SHLToADDPat<v4i32, FPR128>;
7996def : SHLToADDPat<v2i64, FPR128>;
7997def : SHLToADDPat<v8i8,  FPR64>;
7998def : SHLToADDPat<v4i16, FPR64>;
7999def : SHLToADDPat<v2i32, FPR64>;
8000
8001defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
8002                          BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
8003defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
8004def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
8005                                      (i32 vecshiftL64:$imm))),
8006          (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
8007defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
8008                                         int_aarch64_neon_sqrshrn>;
8009defm SQRSHRUN: SIMDVectorRShiftNarrowBHS<1, 0b10001, "sqrshrun",
8010                                         int_aarch64_neon_sqrshrun>;
8011defm SQSHLU : SIMDVectorLShiftBHSD<1, 0b01100, "sqshlu", AArch64sqshlui>;
8012defm SQSHL  : SIMDVectorLShiftBHSD<0, 0b01110, "sqshl", AArch64sqshli>;
8013defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
8014                                         int_aarch64_neon_sqshrn>;
8015defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
8016                                         int_aarch64_neon_sqshrun>;
8017defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
8018def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
8019                                      (i32 vecshiftR64:$imm))),
8020          (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
8021defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
8022defm SRSRA   : SIMDVectorRShiftBHSDTied<0, 0b00110, "srsra",
8023                 TriOpFrag<(add node:$LHS,
8024                                (AArch64srshri node:$MHS, node:$RHS))> >;
8025defm SSHLL   : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll",
8026                BinOpFrag<(AArch64vshl (sext node:$LHS), node:$RHS)>>;
8027
8028defm SSHR    : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>;
8029defm SSRA    : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra",
8030                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>;
8031defm UCVTF   : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf",
8032                        int_aarch64_neon_vcvtfxu2fp>;
8033defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn",
8034                                         int_aarch64_neon_uqrshrn>;
8035defm UQSHL   : SIMDVectorLShiftBHSD<1, 0b01110, "uqshl", AArch64uqshli>;
8036defm UQSHRN  : SIMDVectorRShiftNarrowBHS<1, 0b10010, "uqshrn",
8037                                         int_aarch64_neon_uqshrn>;
8038defm URSHR   : SIMDVectorRShiftBHSD<1, 0b00100, "urshr", AArch64urshri>;
8039defm URSRA   : SIMDVectorRShiftBHSDTied<1, 0b00110, "ursra",
8040                TriOpFrag<(add node:$LHS,
8041                               (AArch64urshri node:$MHS, node:$RHS))> >;
8042defm USHLL   : SIMDVectorLShiftLongBHSD<1, 0b10100, "ushll",
8043                BinOpFrag<(AArch64vshl (zext node:$LHS), node:$RHS)>>;
8044defm USHR    : SIMDVectorRShiftBHSD<1, 0b00000, "ushr", AArch64vlshr>;
8045defm USRA    : SIMDVectorRShiftBHSDTied<1, 0b00010, "usra",
8046                TriOpFrag<(add_and_or_is_add node:$LHS, (AArch64vlshr node:$MHS, node:$RHS))> >;
8047
8048def VImm0080:         PatLeaf<(AArch64movi_shift (i32 128), (i32 0))>;
8049def VImm00008000:     PatLeaf<(AArch64movi_shift (i32 128), (i32 8))>;
8050def VImm0000000080000000: PatLeaf<(AArch64NvCast (v2f64 (fneg (AArch64NvCast (v4i32 (AArch64movi_shift (i32 128), (i32 24)))))))>;
8051
8052// RADDHN patterns for when RSHRN shifts by half the size of the vector element
8053def : Pat<(v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))),
8054          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
8055def : Pat<(v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))),
8056          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
8057let AddedComplexity = 5 in
8058def : Pat<(v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))),
8059          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
8060def : Pat<(v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))),
8061          (RADDHNv8i16_v8i8 V128:$Vn, (v8i16 (MOVIv2d_ns (i32 0))))>;
8062def : Pat<(v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))),
8063          (RADDHNv4i32_v4i16 V128:$Vn, (v4i32 (MOVIv2d_ns (i32 0))))>;
8064def : Pat<(v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))),
8065          (RADDHNv2i64_v2i32 V128:$Vn, (v2i64 (MOVIv2d_ns (i32 0))))>;
8066
8067// RADDHN2 patterns for when RSHRN shifts by half the size of the vector element
8068def : Pat<(v16i8 (concat_vectors
8069                 (v8i8 V64:$Vd),
8070                 (v8i8 (trunc (AArch64vlshr (add (v8i16 V128:$Vn), VImm0080), (i32 8)))))),
8071          (RADDHNv8i16_v16i8
8072                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8073                 (v8i16 (MOVIv2d_ns (i32 0))))>;
8074def : Pat<(v8i16 (concat_vectors
8075                 (v4i16 V64:$Vd),
8076                 (v4i16 (trunc (AArch64vlshr (add (v4i32 V128:$Vn), VImm00008000), (i32 16)))))),
8077          (RADDHNv4i32_v8i16
8078                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8079                 (v4i32 (MOVIv2d_ns (i32 0))))>;
8080let AddedComplexity = 5 in
8081def : Pat<(v4i32 (concat_vectors
8082                 (v2i32 V64:$Vd),
8083                 (v2i32 (trunc (AArch64vlshr (add (v2i64 V128:$Vn), VImm0000000080000000), (i32 32)))))),
8084          (RADDHNv2i64_v4i32
8085                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8086                 (v2i64 (MOVIv2d_ns (i32 0))))>;
8087def : Pat<(v16i8 (concat_vectors
8088                 (v8i8 V64:$Vd),
8089                 (v8i8 (int_aarch64_neon_rshrn (v8i16 V128:$Vn), (i32 8))))),
8090          (RADDHNv8i16_v16i8
8091                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8092                 (v8i16 (MOVIv2d_ns (i32 0))))>;
8093def : Pat<(v8i16 (concat_vectors
8094                 (v4i16 V64:$Vd),
8095                 (v4i16 (int_aarch64_neon_rshrn (v4i32 V128:$Vn), (i32 16))))),
8096          (RADDHNv4i32_v8i16
8097                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8098                 (v4i32 (MOVIv2d_ns (i32 0))))>;
8099def : Pat<(v4i32 (concat_vectors
8100                 (v2i32 V64:$Vd),
8101                 (v2i32 (int_aarch64_neon_rshrn (v2i64 V128:$Vn), (i32 32))))),
8102          (RADDHNv2i64_v4i32
8103                 (INSERT_SUBREG (IMPLICIT_DEF), V64:$Vd, dsub), V128:$Vn,
8104                 (v2i64 (MOVIv2d_ns (i32 0))))>;
8105
8106// SHRN patterns for when a logical right shift was used instead of arithmetic
8107// (the immediate guarantees no sign bits actually end up in the result so it
8108// doesn't matter).
8109def : Pat<(v8i8 (trunc (AArch64vlshr (v8i16 V128:$Rn), vecshiftR16Narrow:$imm))),
8110          (SHRNv8i8_shift V128:$Rn, vecshiftR16Narrow:$imm)>;
8111def : Pat<(v4i16 (trunc (AArch64vlshr (v4i32 V128:$Rn), vecshiftR32Narrow:$imm))),
8112          (SHRNv4i16_shift V128:$Rn, vecshiftR32Narrow:$imm)>;
8113def : Pat<(v2i32 (trunc (AArch64vlshr (v2i64 V128:$Rn), vecshiftR64Narrow:$imm))),
8114          (SHRNv2i32_shift V128:$Rn, vecshiftR64Narrow:$imm)>;
8115
8116def : Pat<(v16i8 (concat_vectors (v8i8 V64:$Rd),
8117                                 (trunc (AArch64vlshr (v8i16 V128:$Rn),
8118                                                    vecshiftR16Narrow:$imm)))),
8119          (SHRNv16i8_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
8120                           V128:$Rn, vecshiftR16Narrow:$imm)>;
8121def : Pat<(v8i16 (concat_vectors (v4i16 V64:$Rd),
8122                                 (trunc (AArch64vlshr (v4i32 V128:$Rn),
8123                                                    vecshiftR32Narrow:$imm)))),
8124          (SHRNv8i16_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
8125                           V128:$Rn, vecshiftR32Narrow:$imm)>;
8126def : Pat<(v4i32 (concat_vectors (v2i32 V64:$Rd),
8127                                 (trunc (AArch64vlshr (v2i64 V128:$Rn),
8128                                                    vecshiftR64Narrow:$imm)))),
8129          (SHRNv4i32_shift (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub),
8130                           V128:$Rn, vecshiftR32Narrow:$imm)>;
8131
8132// Vector sign and zero extensions are implemented with SSHLL and USSHLL.
8133// Anyexts are implemented as zexts.
8134def : Pat<(v8i16 (sext   (v8i8 V64:$Rn))),  (SSHLLv8i8_shift  V64:$Rn, (i32 0))>;
8135def : Pat<(v8i16 (zext   (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
8136def : Pat<(v8i16 (anyext (v8i8 V64:$Rn))),  (USHLLv8i8_shift  V64:$Rn, (i32 0))>;
8137def : Pat<(v4i32 (sext   (v4i16 V64:$Rn))), (SSHLLv4i16_shift V64:$Rn, (i32 0))>;
8138def : Pat<(v4i32 (zext   (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
8139def : Pat<(v4i32 (anyext (v4i16 V64:$Rn))), (USHLLv4i16_shift V64:$Rn, (i32 0))>;
8140def : Pat<(v2i64 (sext   (v2i32 V64:$Rn))), (SSHLLv2i32_shift V64:$Rn, (i32 0))>;
8141def : Pat<(v2i64 (zext   (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
8142def : Pat<(v2i64 (anyext (v2i32 V64:$Rn))), (USHLLv2i32_shift V64:$Rn, (i32 0))>;
8143// Vector bf16 -> fp32 is implemented morally as a zext + shift.
8144def : Pat<(v4f32 (any_fpextend (v4bf16 V64:$Rn))), (SHLLv4i16 V64:$Rn)>;
8145// Also match an extend from the upper half of a 128 bit source register.
8146def : Pat<(v8i16 (anyext (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
8147          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
8148def : Pat<(v8i16 (zext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
8149          (USHLLv16i8_shift V128:$Rn, (i32 0))>;
8150def : Pat<(v8i16 (sext   (v8i8 (extract_high_v16i8 (v16i8 V128:$Rn)) ))),
8151          (SSHLLv16i8_shift V128:$Rn, (i32 0))>;
8152def : Pat<(v4i32 (anyext (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
8153          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
8154def : Pat<(v4i32 (zext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
8155          (USHLLv8i16_shift V128:$Rn, (i32 0))>;
8156def : Pat<(v4i32 (sext   (v4i16 (extract_high_v8i16 (v8i16 V128:$Rn)) ))),
8157          (SSHLLv8i16_shift V128:$Rn, (i32 0))>;
8158def : Pat<(v2i64 (anyext (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
8159          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
8160def : Pat<(v2i64 (zext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
8161          (USHLLv4i32_shift V128:$Rn, (i32 0))>;
8162def : Pat<(v2i64 (sext   (v2i32 (extract_high_v4i32 (v4i32 V128:$Rn)) ))),
8163          (SSHLLv4i32_shift V128:$Rn, (i32 0))>;
8164
8165let Predicates = [HasNEON] in {
8166// Vector shift sxtl aliases
8167def : InstAlias<"sxtl.8h $dst, $src1",
8168                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8169def : InstAlias<"sxtl $dst.8h, $src1.8b",
8170                (SSHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8171def : InstAlias<"sxtl.4s $dst, $src1",
8172                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8173def : InstAlias<"sxtl $dst.4s, $src1.4h",
8174                (SSHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8175def : InstAlias<"sxtl.2d $dst, $src1",
8176                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8177def : InstAlias<"sxtl $dst.2d, $src1.2s",
8178                (SSHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8179
8180// Vector shift sxtl2 aliases
8181def : InstAlias<"sxtl2.8h $dst, $src1",
8182                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8183def : InstAlias<"sxtl2 $dst.8h, $src1.16b",
8184                (SSHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8185def : InstAlias<"sxtl2.4s $dst, $src1",
8186                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8187def : InstAlias<"sxtl2 $dst.4s, $src1.8h",
8188                (SSHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8189def : InstAlias<"sxtl2.2d $dst, $src1",
8190                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8191def : InstAlias<"sxtl2 $dst.2d, $src1.4s",
8192                (SSHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8193
8194// Vector shift uxtl aliases
8195def : InstAlias<"uxtl.8h $dst, $src1",
8196                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8197def : InstAlias<"uxtl $dst.8h, $src1.8b",
8198                (USHLLv8i8_shift V128:$dst, V64:$src1, 0)>;
8199def : InstAlias<"uxtl.4s $dst, $src1",
8200                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8201def : InstAlias<"uxtl $dst.4s, $src1.4h",
8202                (USHLLv4i16_shift V128:$dst, V64:$src1, 0)>;
8203def : InstAlias<"uxtl.2d $dst, $src1",
8204                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8205def : InstAlias<"uxtl $dst.2d, $src1.2s",
8206                (USHLLv2i32_shift V128:$dst, V64:$src1, 0)>;
8207
8208// Vector shift uxtl2 aliases
8209def : InstAlias<"uxtl2.8h $dst, $src1",
8210                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8211def : InstAlias<"uxtl2 $dst.8h, $src1.16b",
8212                (USHLLv16i8_shift V128:$dst, V128:$src1, 0)>;
8213def : InstAlias<"uxtl2.4s $dst, $src1",
8214                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8215def : InstAlias<"uxtl2 $dst.4s, $src1.8h",
8216                (USHLLv8i16_shift V128:$dst, V128:$src1, 0)>;
8217def : InstAlias<"uxtl2.2d $dst, $src1",
8218                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8219def : InstAlias<"uxtl2 $dst.2d, $src1.4s",
8220                (USHLLv4i32_shift V128:$dst, V128:$src1, 0)>;
8221}
8222
8223def abs_f16 :
8224  OutPatFrag<(ops node:$Rn),
8225             (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
8226	         (i32 (ANDWri
8227		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
8228		                           node:$Rn, hsub), GPR32)),
8229	           (i32 (logical_imm32_XFORM(i32 0x7fff))))),
8230	         FPR32)), hsub)>;
8231
8232def : Pat<(f16 (fabs (f16 FPR16:$Rn))), (f16 (abs_f16 (f16 FPR16:$Rn)))>;
8233def : Pat<(bf16 (fabs (bf16 FPR16:$Rn))), (bf16 (abs_f16 (bf16 FPR16:$Rn)))>;
8234
8235def neg_f16 :
8236  OutPatFrag<(ops node:$Rn),
8237             (EXTRACT_SUBREG (f32 (COPY_TO_REGCLASS
8238	         (i32 (EORWri
8239		   (i32 (COPY_TO_REGCLASS (INSERT_SUBREG (f32 (IMPLICIT_DEF)),
8240		                           node:$Rn, hsub), GPR32)),
8241	           (i32 (logical_imm32_XFORM(i32 0x8000))))),
8242	         FPR32)), hsub)>;
8243
8244def : Pat<(f16 (fneg (f16 FPR16:$Rn))), (f16 (neg_f16 (f16 FPR16:$Rn)))>;
8245def : Pat<(bf16 (fneg (bf16 FPR16:$Rn))), (bf16 (neg_f16 (bf16 FPR16:$Rn)))>;
8246
8247let Predicates = [HasNEON] in {
8248def : Pat<(v4f16 (fabs (v4f16 V64:$Rn))), (v4f16 (BICv4i16 (v4f16 V64:$Rn), (i32 128), (i32 8)))>;
8249def : Pat<(v4bf16 (fabs (v4bf16 V64:$Rn))), (v4bf16 (BICv4i16 (v4bf16 V64:$Rn), (i32 128), (i32 8)))>;
8250def : Pat<(v8f16 (fabs (v8f16 V128:$Rn))), (v8f16 (BICv8i16 (v8f16 V128:$Rn), (i32 128), (i32 8)))>;
8251def : Pat<(v8bf16 (fabs (v8bf16 V128:$Rn))), (v8bf16 (BICv8i16 (v8bf16 V128:$Rn), (i32 128), (i32 8)))>;
8252
8253def : Pat<(v4f16 (fneg (v4f16 V64:$Rn))), (v4f16 (EORv8i8 (v4f16 V64:$Rn), (MOVIv4i16 (i32 128), (i32 8))))>;
8254def : Pat<(v4bf16 (fneg (v4bf16 V64:$Rn))), (v4bf16 (EORv8i8 (v4bf16 V64:$Rn), (v4i16 (MOVIv4i16 (i32 0x80), (i32 8)))))>;
8255def : Pat<(v8f16 (fneg (v8f16 V128:$Rn))), (v8f16 (EORv16i8 (v8f16 V128:$Rn), (MOVIv8i16 (i32 128), (i32 8))))>;
8256def : Pat<(v8bf16 (fneg (v8bf16 V128:$Rn))), (v8bf16 (EORv16i8 (v8bf16 V128:$Rn), (v8i16 (MOVIv8i16 (i32 0x80), (i32 8)))))>;
8257}
8258
8259// If an integer is about to be converted to a floating point value,
8260// just load it on the floating point unit.
8261// These patterns are more complex because floating point loads do not
8262// support sign extension.
8263// The sign extension has to be explicitly added and is only supported for
8264// one step: byte-to-half, half-to-word, word-to-doubleword.
8265// SCVTF GPR -> FPR is 9 cycles.
8266// SCVTF FPR -> FPR is 4 cyclces.
8267// (sign extension with lengthen) SXTL FPR -> FPR is 2 cycles.
8268// Therefore, we can do 2 sign extensions and one SCVTF FPR -> FPR
8269// and still being faster.
8270// However, this is not good for code size.
8271// 8-bits -> float. 2 sizes step-up.
8272class SExtLoadi8CVTf32Pat<dag addrmode, dag INST>
8273  : Pat<(f32 (sint_to_fp (i32 (sextloadi8 addrmode)))),
8274        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
8275                            (SSHLLv4i16_shift
8276                              (f64
8277                                (EXTRACT_SUBREG
8278                                  (SSHLLv8i8_shift
8279                                    (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8280                                        INST,
8281                                        bsub),
8282                                    0),
8283                                  dsub)),
8284                               0),
8285                             ssub)))>,
8286    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8287
8288def : SExtLoadi8CVTf32Pat<(ro8.Wpat GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext),
8289                          (LDRBroW  GPR64sp:$Rn, GPR32:$Rm, ro8.Wext:$ext)>;
8290def : SExtLoadi8CVTf32Pat<(ro8.Xpat GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext),
8291                          (LDRBroX  GPR64sp:$Rn, GPR64:$Rm, ro8.Xext:$ext)>;
8292def : SExtLoadi8CVTf32Pat<(am_indexed8 GPR64sp:$Rn, uimm12s1:$offset),
8293                          (LDRBui GPR64sp:$Rn, uimm12s1:$offset)>;
8294def : SExtLoadi8CVTf32Pat<(am_unscaled8 GPR64sp:$Rn, simm9:$offset),
8295                          (LDURBi GPR64sp:$Rn, simm9:$offset)>;
8296
8297// 16-bits -> float. 1 size step-up.
8298class SExtLoadi16CVTf32Pat<dag addrmode, dag INST>
8299  : Pat<(f32 (sint_to_fp (i32 (sextloadi16 addrmode)))),
8300        (SCVTFv1i32 (f32 (EXTRACT_SUBREG
8301                            (SSHLLv4i16_shift
8302                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8303                                  INST,
8304                                  hsub),
8305                                0),
8306                            ssub)))>,
8307    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8308
8309def : SExtLoadi16CVTf32Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
8310                           (LDRHroW   GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
8311def : SExtLoadi16CVTf32Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
8312                           (LDRHroX   GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
8313def : SExtLoadi16CVTf32Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
8314                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
8315def : SExtLoadi16CVTf32Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
8316                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
8317
8318// 32-bits to 32-bits are handled in target specific dag combine:
8319// performIntToFpCombine.
8320// 64-bits integer to 32-bits floating point, not possible with
8321// SCVTF on floating point registers (both source and destination
8322// must have the same size).
8323
8324// Here are the patterns for 8, 16, 32, and 64-bits to double.
8325// 8-bits -> double. 3 size step-up: give up.
8326// 16-bits -> double. 2 size step.
8327class SExtLoadi16CVTf64Pat<dag addrmode, dag INST>
8328  : Pat <(f64 (sint_to_fp (i32 (sextloadi16 addrmode)))),
8329           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
8330                              (SSHLLv2i32_shift
8331                                 (f64
8332                                  (EXTRACT_SUBREG
8333                                    (SSHLLv4i16_shift
8334                                      (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8335                                        INST,
8336                                        hsub),
8337                                     0),
8338                                   dsub)),
8339                               0),
8340                             dsub)))>,
8341    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8342
8343def : SExtLoadi16CVTf64Pat<(ro16.Wpat GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext),
8344                           (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro16.Wext:$ext)>;
8345def : SExtLoadi16CVTf64Pat<(ro16.Xpat GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext),
8346                           (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro16.Xext:$ext)>;
8347def : SExtLoadi16CVTf64Pat<(am_indexed16 GPR64sp:$Rn, uimm12s2:$offset),
8348                           (LDRHui GPR64sp:$Rn, uimm12s2:$offset)>;
8349def : SExtLoadi16CVTf64Pat<(am_unscaled16 GPR64sp:$Rn, simm9:$offset),
8350                           (LDURHi GPR64sp:$Rn, simm9:$offset)>;
8351// 32-bits -> double. 1 size step-up.
8352class SExtLoadi32CVTf64Pat<dag addrmode, dag INST>
8353  : Pat <(f64 (sint_to_fp (i32 (load addrmode)))),
8354           (SCVTFv1i64 (f64 (EXTRACT_SUBREG
8355                              (SSHLLv2i32_shift
8356                                (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
8357                                  INST,
8358                                  ssub),
8359                               0),
8360                             dsub)))>,
8361    Requires<[NotForCodeSize, UseAlternateSExtLoadCVTF32, HasNEON]>;
8362
8363def : SExtLoadi32CVTf64Pat<(ro32.Wpat GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext),
8364                           (LDRSroW GPR64sp:$Rn, GPR32:$Rm, ro32.Wext:$ext)>;
8365def : SExtLoadi32CVTf64Pat<(ro32.Xpat GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext),
8366                           (LDRSroX GPR64sp:$Rn, GPR64:$Rm, ro32.Xext:$ext)>;
8367def : SExtLoadi32CVTf64Pat<(am_indexed32 GPR64sp:$Rn, uimm12s4:$offset),
8368                           (LDRSui GPR64sp:$Rn, uimm12s4:$offset)>;
8369def : SExtLoadi32CVTf64Pat<(am_unscaled32 GPR64sp:$Rn, simm9:$offset),
8370                           (LDURSi GPR64sp:$Rn, simm9:$offset)>;
8371
8372// 64-bits -> double are handled in target specific dag combine:
8373// performIntToFpCombine.
8374
8375
8376//----------------------------------------------------------------------------
8377// AdvSIMD Load-Store Structure
8378//----------------------------------------------------------------------------
8379defm LD1 : SIMDLd1Multiple<"ld1">;
8380defm LD2 : SIMDLd2Multiple<"ld2">;
8381defm LD3 : SIMDLd3Multiple<"ld3">;
8382defm LD4 : SIMDLd4Multiple<"ld4">;
8383
8384defm ST1 : SIMDSt1Multiple<"st1">;
8385defm ST2 : SIMDSt2Multiple<"st2">;
8386defm ST3 : SIMDSt3Multiple<"st3">;
8387defm ST4 : SIMDSt4Multiple<"st4">;
8388
8389class Ld1Pat<ValueType ty, Instruction INST>
8390  : Pat<(ty (load GPR64sp:$Rn)), (INST GPR64sp:$Rn)>;
8391
8392def : Ld1Pat<v16i8, LD1Onev16b>;
8393def : Ld1Pat<v8i16, LD1Onev8h>;
8394def : Ld1Pat<v4i32, LD1Onev4s>;
8395def : Ld1Pat<v2i64, LD1Onev2d>;
8396def : Ld1Pat<v8i8,  LD1Onev8b>;
8397def : Ld1Pat<v4i16, LD1Onev4h>;
8398def : Ld1Pat<v2i32, LD1Onev2s>;
8399def : Ld1Pat<v1i64, LD1Onev1d>;
8400
8401class St1Pat<ValueType ty, Instruction INST>
8402  : Pat<(store ty:$Vt, GPR64sp:$Rn),
8403        (INST ty:$Vt, GPR64sp:$Rn)>;
8404
8405def : St1Pat<v16i8, ST1Onev16b>;
8406def : St1Pat<v8i16, ST1Onev8h>;
8407def : St1Pat<v4i32, ST1Onev4s>;
8408def : St1Pat<v2i64, ST1Onev2d>;
8409def : St1Pat<v8i8,  ST1Onev8b>;
8410def : St1Pat<v4i16, ST1Onev4h>;
8411def : St1Pat<v2i32, ST1Onev2s>;
8412def : St1Pat<v1i64, ST1Onev1d>;
8413
8414//---
8415// Single-element
8416//---
8417
8418defm LD1R          : SIMDLdR<0, 0b110, 0, "ld1r", "One", 1, 2, 4, 8>;
8419defm LD2R          : SIMDLdR<1, 0b110, 0, "ld2r", "Two", 2, 4, 8, 16>;
8420defm LD3R          : SIMDLdR<0, 0b111, 0, "ld3r", "Three", 3, 6, 12, 24>;
8421defm LD4R          : SIMDLdR<1, 0b111, 0, "ld4r", "Four", 4, 8, 16, 32>;
8422let mayLoad = 1, hasSideEffects = 0 in {
8423defm LD1 : SIMDLdSingleBTied<0, 0b000,       "ld1", VecListOneb,   GPR64pi1>;
8424defm LD1 : SIMDLdSingleHTied<0, 0b010, 0,    "ld1", VecListOneh,   GPR64pi2>;
8425defm LD1 : SIMDLdSingleSTied<0, 0b100, 0b00, "ld1", VecListOnes,   GPR64pi4>;
8426defm LD1 : SIMDLdSingleDTied<0, 0b100, 0b01, "ld1", VecListOned,   GPR64pi8>;
8427defm LD2 : SIMDLdSingleBTied<1, 0b000,       "ld2", VecListTwob,   GPR64pi2>;
8428defm LD2 : SIMDLdSingleHTied<1, 0b010, 0,    "ld2", VecListTwoh,   GPR64pi4>;
8429defm LD2 : SIMDLdSingleSTied<1, 0b100, 0b00, "ld2", VecListTwos,   GPR64pi8>;
8430defm LD2 : SIMDLdSingleDTied<1, 0b100, 0b01, "ld2", VecListTwod,   GPR64pi16>;
8431defm LD3 : SIMDLdSingleBTied<0, 0b001,       "ld3", VecListThreeb, GPR64pi3>;
8432defm LD3 : SIMDLdSingleHTied<0, 0b011, 0,    "ld3", VecListThreeh, GPR64pi6>;
8433defm LD3 : SIMDLdSingleSTied<0, 0b101, 0b00, "ld3", VecListThrees, GPR64pi12>;
8434defm LD3 : SIMDLdSingleDTied<0, 0b101, 0b01, "ld3", VecListThreed, GPR64pi24>;
8435defm LD4 : SIMDLdSingleBTied<1, 0b001,       "ld4", VecListFourb,  GPR64pi4>;
8436defm LD4 : SIMDLdSingleHTied<1, 0b011, 0,    "ld4", VecListFourh,  GPR64pi8>;
8437defm LD4 : SIMDLdSingleSTied<1, 0b101, 0b00, "ld4", VecListFours,  GPR64pi16>;
8438defm LD4 : SIMDLdSingleDTied<1, 0b101, 0b01, "ld4", VecListFourd,  GPR64pi32>;
8439}
8440
8441def : Pat<(v8i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
8442          (LD1Rv8b GPR64sp:$Rn)>;
8443def : Pat<(v16i8 (AArch64dup (i32 (extloadi8 GPR64sp:$Rn)))),
8444          (LD1Rv16b GPR64sp:$Rn)>;
8445def : Pat<(v4i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
8446          (LD1Rv4h GPR64sp:$Rn)>;
8447def : Pat<(v8i16 (AArch64dup (i32 (extloadi16 GPR64sp:$Rn)))),
8448          (LD1Rv8h GPR64sp:$Rn)>;
8449def : Pat<(v2i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
8450          (LD1Rv2s GPR64sp:$Rn)>;
8451def : Pat<(v4i32 (AArch64dup (i32 (load GPR64sp:$Rn)))),
8452          (LD1Rv4s GPR64sp:$Rn)>;
8453def : Pat<(v2i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
8454          (LD1Rv2d GPR64sp:$Rn)>;
8455def : Pat<(v1i64 (AArch64dup (i64 (load GPR64sp:$Rn)))),
8456          (LD1Rv1d GPR64sp:$Rn)>;
8457
8458def : Pat<(v8i8 (AArch64duplane8 (v16i8 (insert_subvector undef, (v8i8 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8459          (LD1Rv8b GPR64sp:$Rn)>;
8460def : Pat<(v16i8 (AArch64duplane8 (v16i8 (load GPR64sp:$Rn)), (i64 0))),
8461          (LD1Rv16b GPR64sp:$Rn)>;
8462def : Pat<(v4i16 (AArch64duplane16 (v8i16 (insert_subvector undef, (v4i16 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8463          (LD1Rv4h GPR64sp:$Rn)>;
8464def : Pat<(v8i16 (AArch64duplane16 (v8i16 (load GPR64sp:$Rn)), (i64 0))),
8465          (LD1Rv8h GPR64sp:$Rn)>;
8466def : Pat<(v2i32 (AArch64duplane32 (v4i32 (insert_subvector undef, (v2i32 (load GPR64sp:$Rn)), (i64 0))), (i64 0))),
8467          (LD1Rv2s GPR64sp:$Rn)>;
8468def : Pat<(v4i32 (AArch64duplane32 (v4i32 (load GPR64sp:$Rn)), (i64 0))),
8469          (LD1Rv4s GPR64sp:$Rn)>;
8470def : Pat<(v2i64 (AArch64duplane64 (v2i64 (load GPR64sp:$Rn)), (i64 0))),
8471          (LD1Rv2d GPR64sp:$Rn)>;
8472
8473// Grab the floating point version too
8474def : Pat<(v2f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
8475          (LD1Rv2s GPR64sp:$Rn)>;
8476def : Pat<(v4f32 (AArch64dup (f32 (load GPR64sp:$Rn)))),
8477          (LD1Rv4s GPR64sp:$Rn)>;
8478def : Pat<(v2f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
8479          (LD1Rv2d GPR64sp:$Rn)>;
8480def : Pat<(v1f64 (AArch64dup (f64 (load GPR64sp:$Rn)))),
8481          (LD1Rv1d GPR64sp:$Rn)>;
8482def : Pat<(v4f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
8483          (LD1Rv4h GPR64sp:$Rn)>;
8484def : Pat<(v8f16 (AArch64dup (f16 (load GPR64sp:$Rn)))),
8485          (LD1Rv8h GPR64sp:$Rn)>;
8486def : Pat<(v4bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
8487          (LD1Rv4h GPR64sp:$Rn)>;
8488def : Pat<(v8bf16 (AArch64dup (bf16 (load GPR64sp:$Rn)))),
8489          (LD1Rv8h GPR64sp:$Rn)>;
8490
8491class Ld1Lane128Pat<SDPatternOperator scalar_load, Operand VecIndex,
8492                    ValueType VTy, ValueType STy, Instruction LD1>
8493  : Pat<(vector_insert (VTy VecListOne128:$Rd),
8494           (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8495        (LD1 VecListOne128:$Rd, VecIndex:$idx, GPR64sp:$Rn)>;
8496
8497def : Ld1Lane128Pat<extloadi8,  VectorIndexB, v16i8, i32, LD1i8>;
8498def : Ld1Lane128Pat<extloadi16, VectorIndexH, v8i16, i32, LD1i16>;
8499def : Ld1Lane128Pat<load,       VectorIndexS, v4i32, i32, LD1i32>;
8500def : Ld1Lane128Pat<load,       VectorIndexS, v4f32, f32, LD1i32>;
8501def : Ld1Lane128Pat<load,       VectorIndexD, v2i64, i64, LD1i64>;
8502def : Ld1Lane128Pat<load,       VectorIndexD, v2f64, f64, LD1i64>;
8503def : Ld1Lane128Pat<load,       VectorIndexH, v8f16, f16, LD1i16>;
8504def : Ld1Lane128Pat<load,       VectorIndexH, v8bf16, bf16, LD1i16>;
8505
8506// Generate LD1 for extload if memory type does not match the
8507// destination type, for example:
8508//
8509//   (v4i32 (insert_vector_elt (load anyext from i8) idx))
8510//
8511// In this case, the index must be adjusted to match LD1 type.
8512//
8513class Ld1Lane128IdxOpPat<SDPatternOperator scalar_load, Operand
8514                         VecIndex, ValueType VTy, ValueType STy,
8515                         Instruction LD1, SDNodeXForm IdxOp>
8516  : Pat<(vector_insert (VTy VecListOne128:$Rd),
8517                       (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8518        (LD1 VecListOne128:$Rd, (IdxOp VecIndex:$idx), GPR64sp:$Rn)>;
8519
8520class Ld1Lane64IdxOpPat<SDPatternOperator scalar_load, Operand VecIndex,
8521                        ValueType VTy, ValueType STy, Instruction LD1,
8522                        SDNodeXForm IdxOp>
8523  : Pat<(vector_insert (VTy VecListOne64:$Rd),
8524                       (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8525        (EXTRACT_SUBREG
8526            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8527                (IdxOp VecIndex:$idx), GPR64sp:$Rn),
8528            dsub)>;
8529
8530def VectorIndexStoH : SDNodeXForm<imm, [{
8531  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8532}]>;
8533def VectorIndexStoB : SDNodeXForm<imm, [{
8534  return CurDAG->getTargetConstant(N->getZExtValue() * 4, SDLoc(N), MVT::i64);
8535}]>;
8536def VectorIndexHtoB : SDNodeXForm<imm, [{
8537  return CurDAG->getTargetConstant(N->getZExtValue() * 2, SDLoc(N), MVT::i64);
8538}]>;
8539
8540def : Ld1Lane128IdxOpPat<extloadi16, VectorIndexS, v4i32, i32, LD1i16, VectorIndexStoH>;
8541def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexS, v4i32, i32, LD1i8, VectorIndexStoB>;
8542def : Ld1Lane128IdxOpPat<extloadi8, VectorIndexH, v8i16, i32, LD1i8, VectorIndexHtoB>;
8543
8544def : Ld1Lane64IdxOpPat<extloadi16, VectorIndexS, v2i32, i32, LD1i16, VectorIndexStoH>;
8545def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexS, v2i32, i32, LD1i8, VectorIndexStoB>;
8546def : Ld1Lane64IdxOpPat<extloadi8, VectorIndexH, v4i16, i32, LD1i8, VectorIndexHtoB>;
8547
8548// Same as above, but the first element is populated using
8549// scalar_to_vector + insert_subvector instead of insert_vector_elt.
8550let Predicates = [HasNEON] in {
8551  class Ld1Lane128FirstElm<ValueType ResultTy, ValueType VecTy,
8552                          SDPatternOperator ExtLoad, Instruction LD1>
8553    : Pat<(ResultTy (scalar_to_vector (i32 (ExtLoad GPR64sp:$Rn)))),
8554            (ResultTy (EXTRACT_SUBREG
8555              (LD1 (VecTy (IMPLICIT_DEF)), 0, GPR64sp:$Rn), dsub))>;
8556
8557  def : Ld1Lane128FirstElm<v2i32, v8i16, extloadi16, LD1i16>;
8558  def : Ld1Lane128FirstElm<v2i32, v16i8, extloadi8, LD1i8>;
8559  def : Ld1Lane128FirstElm<v4i16, v16i8, extloadi8, LD1i8>;
8560}
8561class Ld1Lane64Pat<SDPatternOperator scalar_load, Operand VecIndex,
8562                   ValueType VTy, ValueType STy, Instruction LD1>
8563  : Pat<(vector_insert (VTy VecListOne64:$Rd),
8564           (STy (scalar_load GPR64sp:$Rn)), (i64 VecIndex:$idx)),
8565        (EXTRACT_SUBREG
8566            (LD1 (SUBREG_TO_REG (i32 0), VecListOne64:$Rd, dsub),
8567                          VecIndex:$idx, GPR64sp:$Rn),
8568            dsub)>;
8569
8570def : Ld1Lane64Pat<extloadi8,  VectorIndexB, v8i8,  i32, LD1i8>;
8571def : Ld1Lane64Pat<extloadi16, VectorIndexH, v4i16, i32, LD1i16>;
8572def : Ld1Lane64Pat<load,       VectorIndexS, v2i32, i32, LD1i32>;
8573def : Ld1Lane64Pat<load,       VectorIndexS, v2f32, f32, LD1i32>;
8574def : Ld1Lane64Pat<load,       VectorIndexH, v4f16, f16, LD1i16>;
8575def : Ld1Lane64Pat<load,       VectorIndexH, v4bf16, bf16, LD1i16>;
8576
8577
8578defm LD1 : SIMDLdSt1SingleAliases<"ld1">;
8579defm LD2 : SIMDLdSt2SingleAliases<"ld2">;
8580defm LD3 : SIMDLdSt3SingleAliases<"ld3">;
8581defm LD4 : SIMDLdSt4SingleAliases<"ld4">;
8582
8583// Stores
8584defm ST1 : SIMDStSingleB<0, 0b000,       "st1", VecListOneb, GPR64pi1>;
8585defm ST1 : SIMDStSingleH<0, 0b010, 0,    "st1", VecListOneh, GPR64pi2>;
8586defm ST1 : SIMDStSingleS<0, 0b100, 0b00, "st1", VecListOnes, GPR64pi4>;
8587defm ST1 : SIMDStSingleD<0, 0b100, 0b01, "st1", VecListOned, GPR64pi8>;
8588
8589let AddedComplexity = 19 in
8590class St1Lane128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8591                    ValueType VTy, ValueType STy, Instruction ST1>
8592  : Pat<(scalar_store
8593             (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8594             GPR64sp:$Rn),
8595        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn)>;
8596
8597def : St1Lane128Pat<truncstorei8,  VectorIndexB, v16i8, i32, ST1i8>;
8598def : St1Lane128Pat<truncstorei16, VectorIndexH, v8i16, i32, ST1i16>;
8599def : St1Lane128Pat<store,         VectorIndexS, v4i32, i32, ST1i32>;
8600def : St1Lane128Pat<store,         VectorIndexS, v4f32, f32, ST1i32>;
8601def : St1Lane128Pat<store,         VectorIndexD, v2i64, i64, ST1i64>;
8602def : St1Lane128Pat<store,         VectorIndexD, v2f64, f64, ST1i64>;
8603def : St1Lane128Pat<store,         VectorIndexH, v8f16, f16, ST1i16>;
8604def : St1Lane128Pat<store,         VectorIndexH, v8bf16, bf16, ST1i16>;
8605
8606let AddedComplexity = 19 in
8607class St1Lane64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8608                   ValueType VTy, ValueType STy, Instruction ST1>
8609  : Pat<(scalar_store
8610             (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8611             GPR64sp:$Rn),
8612        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8613             VecIndex:$idx, GPR64sp:$Rn)>;
8614
8615def : St1Lane64Pat<truncstorei8,  VectorIndexB, v8i8, i32, ST1i8>;
8616def : St1Lane64Pat<truncstorei16, VectorIndexH, v4i16, i32, ST1i16>;
8617def : St1Lane64Pat<store,         VectorIndexS, v2i32, i32, ST1i32>;
8618def : St1Lane64Pat<store,         VectorIndexS, v2f32, f32, ST1i32>;
8619def : St1Lane64Pat<store,         VectorIndexH, v4f16, f16, ST1i16>;
8620def : St1Lane64Pat<store,         VectorIndexH, v4bf16, bf16, ST1i16>;
8621
8622multiclass St1LanePost64Pat<SDPatternOperator scalar_store, Operand VecIndex,
8623                             ValueType VTy, ValueType STy, Instruction ST1,
8624                             int offset> {
8625  def : Pat<(scalar_store
8626              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8627              GPR64sp:$Rn, offset),
8628        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8629             VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8630
8631  def : Pat<(scalar_store
8632              (STy (vector_extract (VTy VecListOne64:$Vt), VecIndex:$idx)),
8633              GPR64sp:$Rn, GPR64:$Rm),
8634        (ST1 (SUBREG_TO_REG (i32 0), VecListOne64:$Vt, dsub),
8635             VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8636}
8637
8638defm : St1LanePost64Pat<post_truncsti8, VectorIndexB, v8i8, i32, ST1i8_POST, 1>;
8639defm : St1LanePost64Pat<post_truncsti16, VectorIndexH, v4i16, i32, ST1i16_POST,
8640                        2>;
8641defm : St1LanePost64Pat<post_store, VectorIndexS, v2i32, i32, ST1i32_POST, 4>;
8642defm : St1LanePost64Pat<post_store, VectorIndexS, v2f32, f32, ST1i32_POST, 4>;
8643defm : St1LanePost64Pat<post_store, VectorIndexD, v1i64, i64, ST1i64_POST, 8>;
8644defm : St1LanePost64Pat<post_store, VectorIndexD, v1f64, f64, ST1i64_POST, 8>;
8645defm : St1LanePost64Pat<post_store, VectorIndexH, v4f16, f16, ST1i16_POST, 2>;
8646defm : St1LanePost64Pat<post_store, VectorIndexH, v4bf16, bf16, ST1i16_POST, 2>;
8647
8648multiclass St1LanePost128Pat<SDPatternOperator scalar_store, Operand VecIndex,
8649                             ValueType VTy, ValueType STy, Instruction ST1,
8650                             int offset> {
8651  def : Pat<(scalar_store
8652              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8653              GPR64sp:$Rn, offset),
8654        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, XZR)>;
8655
8656  def : Pat<(scalar_store
8657              (STy (vector_extract (VTy VecListOne128:$Vt), VecIndex:$idx)),
8658              GPR64sp:$Rn, GPR64:$Rm),
8659        (ST1 VecListOne128:$Vt, VecIndex:$idx, GPR64sp:$Rn, $Rm)>;
8660}
8661
8662defm : St1LanePost128Pat<post_truncsti8, VectorIndexB, v16i8, i32, ST1i8_POST,
8663                         1>;
8664defm : St1LanePost128Pat<post_truncsti16, VectorIndexH, v8i16, i32, ST1i16_POST,
8665                         2>;
8666defm : St1LanePost128Pat<post_store, VectorIndexS, v4i32, i32, ST1i32_POST, 4>;
8667defm : St1LanePost128Pat<post_store, VectorIndexS, v4f32, f32, ST1i32_POST, 4>;
8668defm : St1LanePost128Pat<post_store, VectorIndexD, v2i64, i64, ST1i64_POST, 8>;
8669defm : St1LanePost128Pat<post_store, VectorIndexD, v2f64, f64, ST1i64_POST, 8>;
8670defm : St1LanePost128Pat<post_store, VectorIndexH, v8f16, f16, ST1i16_POST, 2>;
8671defm : St1LanePost128Pat<post_store, VectorIndexH, v8bf16, bf16, ST1i16_POST, 2>;
8672
8673let mayStore = 1, hasSideEffects = 0 in {
8674defm ST2 : SIMDStSingleB<1, 0b000,       "st2", VecListTwob,   GPR64pi2>;
8675defm ST2 : SIMDStSingleH<1, 0b010, 0,    "st2", VecListTwoh,   GPR64pi4>;
8676defm ST2 : SIMDStSingleS<1, 0b100, 0b00, "st2", VecListTwos,   GPR64pi8>;
8677defm ST2 : SIMDStSingleD<1, 0b100, 0b01, "st2", VecListTwod,   GPR64pi16>;
8678defm ST3 : SIMDStSingleB<0, 0b001,       "st3", VecListThreeb, GPR64pi3>;
8679defm ST3 : SIMDStSingleH<0, 0b011, 0,    "st3", VecListThreeh, GPR64pi6>;
8680defm ST3 : SIMDStSingleS<0, 0b101, 0b00, "st3", VecListThrees, GPR64pi12>;
8681defm ST3 : SIMDStSingleD<0, 0b101, 0b01, "st3", VecListThreed, GPR64pi24>;
8682defm ST4 : SIMDStSingleB<1, 0b001,       "st4", VecListFourb,  GPR64pi4>;
8683defm ST4 : SIMDStSingleH<1, 0b011, 0,    "st4", VecListFourh,  GPR64pi8>;
8684defm ST4 : SIMDStSingleS<1, 0b101, 0b00, "st4", VecListFours,  GPR64pi16>;
8685defm ST4 : SIMDStSingleD<1, 0b101, 0b01, "st4", VecListFourd,  GPR64pi32>;
8686}
8687
8688defm ST1 : SIMDLdSt1SingleAliases<"st1">;
8689defm ST2 : SIMDLdSt2SingleAliases<"st2">;
8690defm ST3 : SIMDLdSt3SingleAliases<"st3">;
8691defm ST4 : SIMDLdSt4SingleAliases<"st4">;
8692
8693//----------------------------------------------------------------------------
8694// Crypto extensions
8695//----------------------------------------------------------------------------
8696
8697let Predicates = [HasAES] in {
8698let isCommutable = 1 in {
8699def AESErr   : AESTiedInst<0b0100, "aese",   int_aarch64_crypto_aese>;
8700def AESDrr   : AESTiedInst<0b0101, "aesd",   int_aarch64_crypto_aesd>;
8701}
8702def AESMCrr  : AESInst<    0b0110, "aesmc",  int_aarch64_crypto_aesmc>;
8703def AESIMCrr : AESInst<    0b0111, "aesimc", int_aarch64_crypto_aesimc>;
8704}
8705
8706// Pseudo instructions for AESMCrr/AESIMCrr with a register constraint required
8707// for AES fusion on some CPUs.
8708let hasSideEffects = 0, mayStore = 0, mayLoad = 0, Predicates = [HasAES] in {
8709def AESMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8710                        Sched<[WriteVq]>;
8711def AESIMCrrTied: Pseudo<(outs V128:$Rd), (ins V128:$Rn), [], "$Rn = $Rd">,
8712                         Sched<[WriteVq]>;
8713}
8714
8715// Only use constrained versions of AES(I)MC instructions if they are paired with
8716// AESE/AESD.
8717def : Pat<(v16i8 (int_aarch64_crypto_aesmc
8718            (v16i8 (int_aarch64_crypto_aese (v16i8 V128:$src1),
8719                                            (v16i8 V128:$src2))))),
8720          (v16i8 (AESMCrrTied (v16i8 (AESErr (v16i8 V128:$src1),
8721                                             (v16i8 V128:$src2)))))>,
8722          Requires<[HasFuseAES]>;
8723
8724def : Pat<(v16i8 (int_aarch64_crypto_aesimc
8725            (v16i8 (int_aarch64_crypto_aesd (v16i8 V128:$src1),
8726                                            (v16i8 V128:$src2))))),
8727          (v16i8 (AESIMCrrTied (v16i8 (AESDrr (v16i8 V128:$src1),
8728                                              (v16i8 V128:$src2)))))>,
8729          Requires<[HasFuseAES]>;
8730
8731let Predicates = [HasSHA2] in {
8732def SHA1Crrr     : SHATiedInstQSV<0b000, "sha1c",   int_aarch64_crypto_sha1c>;
8733def SHA1Prrr     : SHATiedInstQSV<0b001, "sha1p",   int_aarch64_crypto_sha1p>;
8734def SHA1Mrrr     : SHATiedInstQSV<0b010, "sha1m",   int_aarch64_crypto_sha1m>;
8735def SHA1SU0rrr   : SHATiedInstVVV<0b011, "sha1su0", int_aarch64_crypto_sha1su0>;
8736def SHA256Hrrr   : SHATiedInstQQV<0b100, "sha256h", int_aarch64_crypto_sha256h>;
8737def SHA256H2rrr  : SHATiedInstQQV<0b101, "sha256h2",int_aarch64_crypto_sha256h2>;
8738def SHA256SU1rrr :SHATiedInstVVV<0b110, "sha256su1",int_aarch64_crypto_sha256su1>;
8739
8740def SHA1Hrr     : SHAInstSS<    0b0000, "sha1h",    int_aarch64_crypto_sha1h>;
8741def SHA1SU1rr   : SHATiedInstVV<0b0001, "sha1su1",  int_aarch64_crypto_sha1su1>;
8742def SHA256SU0rr : SHATiedInstVV<0b0010, "sha256su0",int_aarch64_crypto_sha256su0>;
8743}
8744
8745//----------------------------------------------------------------------------
8746// Compiler-pseudos
8747//----------------------------------------------------------------------------
8748// FIXME: Like for X86, these should go in their own separate .td file.
8749
8750// For an anyext, we don't care what the high bits are, so we can perform an
8751// INSERT_SUBREF into an IMPLICIT_DEF.
8752def : Pat<(i64 (anyext GPR32:$src)),
8753          (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32)>;
8754
8755// When we need to explicitly zero-extend, we use a 32-bit MOV instruction and
8756// then assert the extension has happened.
8757def : Pat<(i64 (zext GPR32:$src)),
8758          (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
8759
8760// To sign extend, we use a signed bitfield move instruction (SBFM) on the
8761// containing super-reg.
8762def : Pat<(i64 (sext GPR32:$src)),
8763   (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$src, sub_32), 0, 31)>;
8764def : Pat<(i64 (sext_inreg GPR64:$src, i32)), (SBFMXri GPR64:$src, 0, 31)>;
8765def : Pat<(i64 (sext_inreg GPR64:$src, i16)), (SBFMXri GPR64:$src, 0, 15)>;
8766def : Pat<(i64 (sext_inreg GPR64:$src, i8)),  (SBFMXri GPR64:$src, 0, 7)>;
8767def : Pat<(i64 (sext_inreg GPR64:$src, i1)),  (SBFMXri GPR64:$src, 0, 0)>;
8768def : Pat<(i32 (sext_inreg GPR32:$src, i16)), (SBFMWri GPR32:$src, 0, 15)>;
8769def : Pat<(i32 (sext_inreg GPR32:$src, i8)),  (SBFMWri GPR32:$src, 0, 7)>;
8770def : Pat<(i32 (sext_inreg GPR32:$src, i1)),  (SBFMWri GPR32:$src, 0, 0)>;
8771
8772def : Pat<(shl (sext_inreg GPR32:$Rn, i8), (i64 imm0_31:$imm)),
8773          (SBFMWri GPR32:$Rn, (i64 (i32shift_a       imm0_31:$imm)),
8774                              (i64 (i32shift_sext_i8 imm0_31:$imm)))>;
8775def : Pat<(shl (sext_inreg GPR64:$Rn, i8), (i64 imm0_63:$imm)),
8776          (SBFMXri GPR64:$Rn, (i64 (i64shift_a imm0_63:$imm)),
8777                              (i64 (i64shift_sext_i8 imm0_63:$imm)))>;
8778
8779def : Pat<(shl (sext_inreg GPR32:$Rn, i16), (i64 imm0_31:$imm)),
8780          (SBFMWri GPR32:$Rn, (i64 (i32shift_a        imm0_31:$imm)),
8781                              (i64 (i32shift_sext_i16 imm0_31:$imm)))>;
8782def : Pat<(shl (sext_inreg GPR64:$Rn, i16), (i64 imm0_63:$imm)),
8783          (SBFMXri GPR64:$Rn, (i64 (i64shift_a        imm0_63:$imm)),
8784                              (i64 (i64shift_sext_i16 imm0_63:$imm)))>;
8785
8786def : Pat<(shl (i64 (sext GPR32:$Rn)), (i64 imm0_63:$imm)),
8787          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8788                   (i64 (i64shift_a        imm0_63:$imm)),
8789                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8790
8791def : Pat<(shl (i64 (zext GPR32:$Rn)), (i64 imm0_63:$imm)),
8792          (UBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8793                   (i64 (i64shift_a        imm0_63:$imm)),
8794                   (i64 (i64shift_sext_i32 imm0_63:$imm)))>;
8795
8796// sra patterns have an AddedComplexity of 10, so make sure we have a higher
8797// AddedComplexity for the following patterns since we want to match sext + sra
8798// patterns before we attempt to match a single sra node.
8799let AddedComplexity = 20 in {
8800// We support all sext + sra combinations which preserve at least one bit of the
8801// original value which is to be sign extended. E.g. we support shifts up to
8802// bitwidth-1 bits.
8803def : Pat<(sra (sext_inreg GPR32:$Rn, i8), (i64 imm0_7:$imm)),
8804          (SBFMWri GPR32:$Rn, (i64 imm0_7:$imm), 7)>;
8805def : Pat<(sra (sext_inreg GPR64:$Rn, i8), (i64 imm0_7:$imm)),
8806          (SBFMXri GPR64:$Rn, (i64 imm0_7:$imm), 7)>;
8807
8808def : Pat<(sra (sext_inreg GPR32:$Rn, i16), (i64 imm0_15:$imm)),
8809          (SBFMWri GPR32:$Rn, (i64 imm0_15:$imm), 15)>;
8810def : Pat<(sra (sext_inreg GPR64:$Rn, i16), (i64 imm0_15:$imm)),
8811          (SBFMXri GPR64:$Rn, (i64 imm0_15:$imm), 15)>;
8812
8813def : Pat<(sra (i64 (sext GPR32:$Rn)), (i64 imm0_31:$imm)),
8814          (SBFMXri (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GPR32:$Rn, sub_32),
8815                   (i64 imm0_31:$imm), 31)>;
8816} // AddedComplexity = 20
8817
8818// To truncate, we can simply extract from a subregister.
8819def : Pat<(i32 (trunc GPR64sp:$src)),
8820          (i32 (EXTRACT_SUBREG GPR64sp:$src, sub_32))>;
8821
8822// __builtin_trap() uses the BRK instruction on AArch64.
8823def : Pat<(trap), (BRK 1)>;
8824def : Pat<(debugtrap), (BRK 0xF000)>;
8825
8826def ubsan_trap_xform : SDNodeXForm<timm, [{
8827  return CurDAG->getTargetConstant(N->getZExtValue() | ('U' << 8), SDLoc(N), MVT::i32);
8828}]>;
8829
8830def gi_ubsan_trap_xform : GICustomOperandRenderer<"renderUbsanTrap">,
8831  GISDNodeXFormEquiv<ubsan_trap_xform>;
8832
8833def ubsan_trap_imm : TImmLeaf<i32, [{
8834  return isUInt<8>(Imm);
8835}], ubsan_trap_xform>;
8836
8837def : Pat<(ubsantrap ubsan_trap_imm:$kind), (BRK ubsan_trap_imm:$kind)>;
8838
8839// Multiply high patterns which multiply the lower subvector using smull/umull
8840// and the upper subvector with smull2/umull2. Then shuffle the high the high
8841// part of both results together.
8842def : Pat<(v16i8 (mulhs V128:$Rn, V128:$Rm)),
8843          (UZP2v16i8
8844           (SMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8845                            (EXTRACT_SUBREG V128:$Rm, dsub)),
8846           (SMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8847def : Pat<(v8i16 (mulhs V128:$Rn, V128:$Rm)),
8848          (UZP2v8i16
8849           (SMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8850                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8851           (SMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8852def : Pat<(v4i32 (mulhs V128:$Rn, V128:$Rm)),
8853          (UZP2v4i32
8854           (SMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8855                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8856           (SMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8857
8858def : Pat<(v16i8 (mulhu V128:$Rn, V128:$Rm)),
8859          (UZP2v16i8
8860           (UMULLv8i8_v8i16 (EXTRACT_SUBREG V128:$Rn, dsub),
8861                            (EXTRACT_SUBREG V128:$Rm, dsub)),
8862           (UMULLv16i8_v8i16 V128:$Rn, V128:$Rm))>;
8863def : Pat<(v8i16 (mulhu V128:$Rn, V128:$Rm)),
8864          (UZP2v8i16
8865           (UMULLv4i16_v4i32 (EXTRACT_SUBREG V128:$Rn, dsub),
8866                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8867           (UMULLv8i16_v4i32 V128:$Rn, V128:$Rm))>;
8868def : Pat<(v4i32 (mulhu V128:$Rn, V128:$Rm)),
8869          (UZP2v4i32
8870           (UMULLv2i32_v2i64 (EXTRACT_SUBREG V128:$Rn, dsub),
8871                             (EXTRACT_SUBREG V128:$Rm, dsub)),
8872           (UMULLv4i32_v2i64 V128:$Rn, V128:$Rm))>;
8873
8874// Conversions within AdvSIMD types in the same register size are free.
8875// But because we need a consistent lane ordering, in big endian many
8876// conversions require one or more REV instructions.
8877//
8878// Consider a simple memory load followed by a bitconvert then a store.
8879//   v0 = load v2i32
8880//   v1 = BITCAST v2i32 v0 to v4i16
8881//        store v4i16 v2
8882//
8883// In big endian mode every memory access has an implicit byte swap. LDR and
8884// STR do a 64-bit byte swap, whereas LD1/ST1 do a byte swap per lane - that
8885// is, they treat the vector as a sequence of elements to be byte-swapped.
8886// The two pairs of instructions are fundamentally incompatible. We've decided
8887// to use LD1/ST1 only to simplify compiler implementation.
8888//
8889// LD1/ST1 perform the equivalent of a sequence of LDR/STR + REV. This makes
8890// the original code sequence:
8891//   v0 = load v2i32
8892//   v1 = REV v2i32                  (implicit)
8893//   v2 = BITCAST v2i32 v1 to v4i16
8894//   v3 = REV v4i16 v2               (implicit)
8895//        store v4i16 v3
8896//
8897// But this is now broken - the value stored is different to the value loaded
8898// due to lane reordering. To fix this, on every BITCAST we must perform two
8899// other REVs:
8900//   v0 = load v2i32
8901//   v1 = REV v2i32                  (implicit)
8902//   v2 = REV v2i32
8903//   v3 = BITCAST v2i32 v2 to v4i16
8904//   v4 = REV v4i16
8905//   v5 = REV v4i16 v4               (implicit)
8906//        store v4i16 v5
8907//
8908// This means an extra two instructions, but actually in most cases the two REV
8909// instructions can be combined into one. For example:
8910//   (REV64_2s (REV64_4h X)) === (REV32_4h X)
8911//
8912// There is also no 128-bit REV instruction. This must be synthesized with an
8913// EXT instruction.
8914//
8915// Most bitconverts require some sort of conversion. The only exceptions are:
8916//   a) Identity conversions -  vNfX <-> vNiX
8917//   b) Single-lane-to-scalar - v1fX <-> fX or v1iX <-> iX
8918//
8919
8920// Natural vector casts (64 bit)
8921foreach VT = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8922  foreach VT2 = [ v8i8, v4i16, v4f16, v4bf16, v2i32, v2f32, v1i64, v1f64, f64 ] in
8923    def : Pat<(VT (AArch64NvCast (VT2 FPR64:$src))),
8924              (VT FPR64:$src)>;
8925
8926// Natural vector casts (128 bit)
8927foreach VT = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8928  foreach VT2 = [ v16i8, v8i16, v8f16, v8bf16, v4i32, v4f32, v2i64, v2f64 ] in
8929    def : Pat<(VT (AArch64NvCast (VT2 FPR128:$src))),
8930              (VT FPR128:$src)>;
8931
8932let Predicates = [IsLE] in {
8933def : Pat<(v8i8  (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8934def : Pat<(v4i16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8935def : Pat<(v2i32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8936def : Pat<(v4f16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8937def : Pat<(v4bf16 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8938def : Pat<(v2f32 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8939
8940def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8941          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8942def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8943          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8944def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8945          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8946def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8947          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8948def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8949          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8950def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8951          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8952def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
8953          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8954}
8955let Predicates = [IsBE] in {
8956def : Pat<(v8i8  (bitconvert GPR64:$Xn)),
8957                 (REV64v8i8 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8958def : Pat<(v4i16 (bitconvert GPR64:$Xn)),
8959                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8960def : Pat<(v2i32 (bitconvert GPR64:$Xn)),
8961                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8962def : Pat<(v4f16 (bitconvert GPR64:$Xn)),
8963                 (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8964def : Pat<(v4bf16 (bitconvert GPR64:$Xn)),
8965                  (REV64v4i16 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8966def : Pat<(v2f32 (bitconvert GPR64:$Xn)),
8967                 (REV64v2i32 (COPY_TO_REGCLASS GPR64:$Xn, FPR64))>;
8968
8969def : Pat<(i64 (bitconvert (v8i8  V64:$Vn))),
8970          (REV64v8i8 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8971def : Pat<(i64 (bitconvert (v4i16 V64:$Vn))),
8972          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8973def : Pat<(i64 (bitconvert (v2i32 V64:$Vn))),
8974          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8975def : Pat<(i64 (bitconvert (v4f16 V64:$Vn))),
8976          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8977def : Pat<(i64 (bitconvert (v4bf16 V64:$Vn))),
8978          (REV64v4i16 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8979def : Pat<(i64 (bitconvert (v2f32 V64:$Vn))),
8980          (REV64v2i32 (COPY_TO_REGCLASS V64:$Vn, GPR64))>;
8981}
8982def : Pat<(v1i64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8983def : Pat<(v1f64 (bitconvert GPR64:$Xn)), (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8984def : Pat<(i64 (bitconvert (v1i64 V64:$Vn))),
8985          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
8986def : Pat<(v1i64 (scalar_to_vector GPR64:$Xn)),
8987          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8988def : Pat<(v1f64 (scalar_to_vector GPR64:$Xn)),
8989          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8990def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Xn))), (v1f64 FPR64:$Xn)>;
8991
8992def : Pat<(f32 (bitconvert (i32 GPR32:$Xn))),
8993          (COPY_TO_REGCLASS GPR32:$Xn, FPR32)>;
8994def : Pat<(i32 (bitconvert (f32 FPR32:$Xn))),
8995          (COPY_TO_REGCLASS FPR32:$Xn, GPR32)>;
8996def : Pat<(f64 (bitconvert (i64 GPR64:$Xn))),
8997          (COPY_TO_REGCLASS GPR64:$Xn, FPR64)>;
8998def : Pat<(i64 (bitconvert (f64 FPR64:$Xn))),
8999          (COPY_TO_REGCLASS FPR64:$Xn, GPR64)>;
9000def : Pat<(i64 (bitconvert (v1f64 V64:$Vn))),
9001          (COPY_TO_REGCLASS V64:$Vn, GPR64)>;
9002
9003def : Pat<(f16 (bitconvert (bf16 FPR16:$src))), (f16 FPR16:$src)>;
9004def : Pat<(bf16 (bitconvert (f16 FPR16:$src))), (bf16 FPR16:$src)>;
9005
9006let Predicates = [IsLE] in {
9007def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))), (v1i64 FPR64:$src)>;
9008def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))), (v1i64 FPR64:$src)>;
9009def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))), (v1i64 FPR64:$src)>;
9010def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))), (v1i64 FPR64:$src)>;
9011def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))), (v1i64 FPR64:$src)>;
9012def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))), (v1i64 FPR64:$src)>;
9013}
9014let Predicates = [IsBE] in {
9015def : Pat<(v1i64 (bitconvert (v2i32 FPR64:$src))),
9016                             (v1i64 (REV64v2i32 FPR64:$src))>;
9017def : Pat<(v1i64 (bitconvert (v4i16 FPR64:$src))),
9018                             (v1i64 (REV64v4i16 FPR64:$src))>;
9019def : Pat<(v1i64 (bitconvert (v8i8  FPR64:$src))),
9020                             (v1i64 (REV64v8i8 FPR64:$src))>;
9021def : Pat<(v1i64 (bitconvert (v4f16 FPR64:$src))),
9022                             (v1i64 (REV64v4i16 FPR64:$src))>;
9023def : Pat<(v1i64 (bitconvert (v4bf16 FPR64:$src))),
9024                             (v1i64 (REV64v4i16 FPR64:$src))>;
9025def : Pat<(v1i64 (bitconvert (v2f32 FPR64:$src))),
9026                             (v1i64 (REV64v2i32 FPR64:$src))>;
9027}
9028def : Pat<(v1i64 (bitconvert (v1f64 FPR64:$src))), (v1i64 FPR64:$src)>;
9029def : Pat<(v1i64 (bitconvert (f64   FPR64:$src))), (v1i64 FPR64:$src)>;
9030
9031let Predicates = [IsLE] in {
9032def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))), (v2i32 FPR64:$src)>;
9033def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))), (v2i32 FPR64:$src)>;
9034def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))), (v2i32 FPR64:$src)>;
9035def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))), (v2i32 FPR64:$src)>;
9036def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))), (v2i32 FPR64:$src)>;
9037def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))), (v2i32 FPR64:$src)>;
9038def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))), (v2i32 FPR64:$src)>;
9039}
9040let Predicates = [IsBE] in {
9041def : Pat<(v2i32 (bitconvert (v1i64 FPR64:$src))),
9042                             (v2i32 (REV64v2i32 FPR64:$src))>;
9043def : Pat<(v2i32 (bitconvert (v4i16 FPR64:$src))),
9044                             (v2i32 (REV32v4i16 FPR64:$src))>;
9045def : Pat<(v2i32 (bitconvert (v8i8  FPR64:$src))),
9046                             (v2i32 (REV32v8i8 FPR64:$src))>;
9047def : Pat<(v2i32 (bitconvert (f64   FPR64:$src))),
9048                             (v2i32 (REV64v2i32 FPR64:$src))>;
9049def : Pat<(v2i32 (bitconvert (v1f64 FPR64:$src))),
9050                             (v2i32 (REV64v2i32 FPR64:$src))>;
9051def : Pat<(v2i32 (bitconvert (v4f16 FPR64:$src))),
9052                             (v2i32 (REV32v4i16 FPR64:$src))>;
9053def : Pat<(v2i32 (bitconvert (v4bf16 FPR64:$src))),
9054                             (v2i32 (REV32v4i16 FPR64:$src))>;
9055}
9056def : Pat<(v2i32 (bitconvert (v2f32 FPR64:$src))), (v2i32 FPR64:$src)>;
9057
9058let Predicates = [IsLE] in {
9059def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))), (v4i16 FPR64:$src)>;
9060def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))), (v4i16 FPR64:$src)>;
9061def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))), (v4i16 FPR64:$src)>;
9062def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))), (v4i16 FPR64:$src)>;
9063def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))), (v4i16 FPR64:$src)>;
9064def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))), (v4i16 FPR64:$src)>;
9065}
9066let Predicates = [IsBE] in {
9067def : Pat<(v4i16 (bitconvert (v1i64 FPR64:$src))),
9068                             (v4i16 (REV64v4i16 FPR64:$src))>;
9069def : Pat<(v4i16 (bitconvert (v2i32 FPR64:$src))),
9070                             (v4i16 (REV32v4i16 FPR64:$src))>;
9071def : Pat<(v4i16 (bitconvert (v8i8  FPR64:$src))),
9072                             (v4i16 (REV16v8i8 FPR64:$src))>;
9073def : Pat<(v4i16 (bitconvert (f64   FPR64:$src))),
9074                             (v4i16 (REV64v4i16 FPR64:$src))>;
9075def : Pat<(v4i16 (bitconvert (v2f32 FPR64:$src))),
9076                             (v4i16 (REV32v4i16 FPR64:$src))>;
9077def : Pat<(v4i16 (bitconvert (v1f64 FPR64:$src))),
9078                             (v4i16 (REV64v4i16 FPR64:$src))>;
9079}
9080def : Pat<(v4i16 (bitconvert (v4f16 FPR64:$src))), (v4i16 FPR64:$src)>;
9081def : Pat<(v4i16 (bitconvert (v4bf16 FPR64:$src))), (v4i16 FPR64:$src)>;
9082
9083let Predicates = [IsLE] in {
9084def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))), (v4f16 FPR64:$src)>;
9085def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))), (v4f16 FPR64:$src)>;
9086def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))), (v4f16 FPR64:$src)>;
9087def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))), (v4f16 FPR64:$src)>;
9088def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))), (v4f16 FPR64:$src)>;
9089def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))), (v4f16 FPR64:$src)>;
9090
9091def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))), (v4bf16 FPR64:$src)>;
9092def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))), (v4bf16 FPR64:$src)>;
9093def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))), (v4bf16 FPR64:$src)>;
9094def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))), (v4bf16 FPR64:$src)>;
9095def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))), (v4bf16 FPR64:$src)>;
9096def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))), (v4bf16 FPR64:$src)>;
9097}
9098let Predicates = [IsBE] in {
9099def : Pat<(v4f16 (bitconvert (v1i64 FPR64:$src))),
9100                             (v4f16 (REV64v4i16 FPR64:$src))>;
9101def : Pat<(v4f16 (bitconvert (v2i32 FPR64:$src))),
9102                             (v4f16 (REV32v4i16 FPR64:$src))>;
9103def : Pat<(v4f16 (bitconvert (v8i8  FPR64:$src))),
9104                             (v4f16 (REV16v8i8 FPR64:$src))>;
9105def : Pat<(v4f16 (bitconvert (f64   FPR64:$src))),
9106                             (v4f16 (REV64v4i16 FPR64:$src))>;
9107def : Pat<(v4f16 (bitconvert (v2f32 FPR64:$src))),
9108                             (v4f16 (REV32v4i16 FPR64:$src))>;
9109def : Pat<(v4f16 (bitconvert (v1f64 FPR64:$src))),
9110                             (v4f16 (REV64v4i16 FPR64:$src))>;
9111
9112def : Pat<(v4bf16 (bitconvert (v1i64 FPR64:$src))),
9113                             (v4bf16 (REV64v4i16 FPR64:$src))>;
9114def : Pat<(v4bf16 (bitconvert (v2i32 FPR64:$src))),
9115                             (v4bf16 (REV32v4i16 FPR64:$src))>;
9116def : Pat<(v4bf16 (bitconvert (v8i8  FPR64:$src))),
9117                             (v4bf16 (REV16v8i8 FPR64:$src))>;
9118def : Pat<(v4bf16 (bitconvert (f64   FPR64:$src))),
9119                             (v4bf16 (REV64v4i16 FPR64:$src))>;
9120def : Pat<(v4bf16 (bitconvert (v2f32 FPR64:$src))),
9121                             (v4bf16 (REV32v4i16 FPR64:$src))>;
9122def : Pat<(v4bf16 (bitconvert (v1f64 FPR64:$src))),
9123                             (v4bf16 (REV64v4i16 FPR64:$src))>;
9124}
9125def : Pat<(v4f16 (bitconvert (v4i16 FPR64:$src))), (v4f16 FPR64:$src)>;
9126def : Pat<(v4bf16 (bitconvert (v4i16 FPR64:$src))), (v4bf16 FPR64:$src)>;
9127
9128let Predicates = [IsLE] in {
9129def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))), (v8i8  FPR64:$src)>;
9130def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))), (v8i8  FPR64:$src)>;
9131def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))), (v8i8  FPR64:$src)>;
9132def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))), (v8i8  FPR64:$src)>;
9133def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))), (v8i8  FPR64:$src)>;
9134def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))), (v8i8  FPR64:$src)>;
9135def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))), (v8i8  FPR64:$src)>;
9136def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))), (v8i8  FPR64:$src)>;
9137}
9138let Predicates = [IsBE] in {
9139def : Pat<(v8i8  (bitconvert (v1i64 FPR64:$src))),
9140                             (v8i8 (REV64v8i8 FPR64:$src))>;
9141def : Pat<(v8i8  (bitconvert (v2i32 FPR64:$src))),
9142                             (v8i8 (REV32v8i8 FPR64:$src))>;
9143def : Pat<(v8i8  (bitconvert (v4i16 FPR64:$src))),
9144                             (v8i8 (REV16v8i8 FPR64:$src))>;
9145def : Pat<(v8i8  (bitconvert (f64   FPR64:$src))),
9146                             (v8i8 (REV64v8i8 FPR64:$src))>;
9147def : Pat<(v8i8  (bitconvert (v2f32 FPR64:$src))),
9148                             (v8i8 (REV32v8i8 FPR64:$src))>;
9149def : Pat<(v8i8  (bitconvert (v1f64 FPR64:$src))),
9150                             (v8i8 (REV64v8i8 FPR64:$src))>;
9151def : Pat<(v8i8  (bitconvert (v4f16 FPR64:$src))),
9152                             (v8i8 (REV16v8i8 FPR64:$src))>;
9153def : Pat<(v8i8  (bitconvert (v4bf16 FPR64:$src))),
9154                             (v8i8 (REV16v8i8 FPR64:$src))>;
9155}
9156
9157let Predicates = [IsLE] in {
9158def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))), (f64   FPR64:$src)>;
9159def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))), (f64   FPR64:$src)>;
9160def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))), (f64   FPR64:$src)>;
9161def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))), (f64   FPR64:$src)>;
9162def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))), (f64   FPR64:$src)>;
9163def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))), (f64   FPR64:$src)>;
9164}
9165let Predicates = [IsBE] in {
9166def : Pat<(f64   (bitconvert (v2i32 FPR64:$src))),
9167                             (f64 (REV64v2i32 FPR64:$src))>;
9168def : Pat<(f64   (bitconvert (v4i16 FPR64:$src))),
9169                             (f64 (REV64v4i16 FPR64:$src))>;
9170def : Pat<(f64   (bitconvert (v2f32 FPR64:$src))),
9171                             (f64 (REV64v2i32 FPR64:$src))>;
9172def : Pat<(f64   (bitconvert (v8i8  FPR64:$src))),
9173                             (f64 (REV64v8i8 FPR64:$src))>;
9174def : Pat<(f64   (bitconvert (v4f16 FPR64:$src))),
9175                             (f64 (REV64v4i16 FPR64:$src))>;
9176def : Pat<(f64   (bitconvert (v4bf16 FPR64:$src))),
9177                             (f64 (REV64v4i16 FPR64:$src))>;
9178}
9179def : Pat<(f64   (bitconvert (v1i64 FPR64:$src))), (f64   FPR64:$src)>;
9180def : Pat<(f64   (bitconvert (v1f64 FPR64:$src))), (f64   FPR64:$src)>;
9181
9182let Predicates = [IsLE] in {
9183def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))), (v1f64 FPR64:$src)>;
9184def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))), (v1f64 FPR64:$src)>;
9185def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))), (v1f64 FPR64:$src)>;
9186def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))), (v1f64 FPR64:$src)>;
9187def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))), (v1f64 FPR64:$src)>;
9188def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))), (v1f64 FPR64:$src)>;
9189}
9190let Predicates = [IsBE] in {
9191def : Pat<(v1f64 (bitconvert (v2i32 FPR64:$src))),
9192                             (v1f64 (REV64v2i32 FPR64:$src))>;
9193def : Pat<(v1f64 (bitconvert (v4i16 FPR64:$src))),
9194                             (v1f64 (REV64v4i16 FPR64:$src))>;
9195def : Pat<(v1f64 (bitconvert (v8i8  FPR64:$src))),
9196                             (v1f64 (REV64v8i8 FPR64:$src))>;
9197def : Pat<(v1f64 (bitconvert (v2f32 FPR64:$src))),
9198                             (v1f64 (REV64v2i32 FPR64:$src))>;
9199def : Pat<(v1f64 (bitconvert (v4f16 FPR64:$src))),
9200                             (v1f64 (REV64v4i16 FPR64:$src))>;
9201def : Pat<(v1f64 (bitconvert (v4bf16 FPR64:$src))),
9202                             (v1f64 (REV64v4i16 FPR64:$src))>;
9203}
9204def : Pat<(v1f64 (bitconvert (v1i64 FPR64:$src))), (v1f64 FPR64:$src)>;
9205def : Pat<(v1f64 (bitconvert (f64   FPR64:$src))), (v1f64 FPR64:$src)>;
9206
9207let Predicates = [IsLE] in {
9208def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))), (v2f32 FPR64:$src)>;
9209def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))), (v2f32 FPR64:$src)>;
9210def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))), (v2f32 FPR64:$src)>;
9211def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))), (v2f32 FPR64:$src)>;
9212def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))), (v2f32 FPR64:$src)>;
9213def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))), (v2f32 FPR64:$src)>;
9214def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))), (v2f32 FPR64:$src)>;
9215}
9216let Predicates = [IsBE] in {
9217def : Pat<(v2f32 (bitconvert (v1i64 FPR64:$src))),
9218                             (v2f32 (REV64v2i32 FPR64:$src))>;
9219def : Pat<(v2f32 (bitconvert (v4i16 FPR64:$src))),
9220                             (v2f32 (REV32v4i16 FPR64:$src))>;
9221def : Pat<(v2f32 (bitconvert (v8i8  FPR64:$src))),
9222                             (v2f32 (REV32v8i8 FPR64:$src))>;
9223def : Pat<(v2f32 (bitconvert (v1f64 FPR64:$src))),
9224                             (v2f32 (REV64v2i32 FPR64:$src))>;
9225def : Pat<(v2f32 (bitconvert (f64   FPR64:$src))),
9226                             (v2f32 (REV64v2i32 FPR64:$src))>;
9227def : Pat<(v2f32 (bitconvert (v4f16 FPR64:$src))),
9228                             (v2f32 (REV32v4i16 FPR64:$src))>;
9229def : Pat<(v2f32 (bitconvert (v4bf16 FPR64:$src))),
9230                             (v2f32 (REV32v4i16 FPR64:$src))>;
9231}
9232def : Pat<(v2f32 (bitconvert (v2i32 FPR64:$src))), (v2f32 FPR64:$src)>;
9233
9234let Predicates = [IsLE] in {
9235def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))), (f128 FPR128:$src)>;
9236def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))), (f128 FPR128:$src)>;
9237def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))), (f128 FPR128:$src)>;
9238def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))), (f128 FPR128:$src)>;
9239def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))), (f128 FPR128:$src)>;
9240def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))), (f128 FPR128:$src)>;
9241def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))), (f128 FPR128:$src)>;
9242def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))), (f128 FPR128:$src)>;
9243}
9244let Predicates = [IsBE] in {
9245def : Pat<(f128 (bitconvert (v2i64 FPR128:$src))),
9246                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
9247def : Pat<(f128 (bitconvert (v4i32 FPR128:$src))),
9248                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
9249                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
9250def : Pat<(f128 (bitconvert (v8i16 FPR128:$src))),
9251                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
9252                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
9253def : Pat<(f128 (bitconvert (v8f16 FPR128:$src))),
9254                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
9255                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
9256def : Pat<(f128 (bitconvert (v8bf16 FPR128:$src))),
9257                            (f128 (EXTv16i8 (REV64v8i16 FPR128:$src),
9258                                            (REV64v8i16 FPR128:$src), (i32 8)))>;
9259def : Pat<(f128 (bitconvert (v2f64 FPR128:$src))),
9260                            (f128 (EXTv16i8 FPR128:$src, FPR128:$src, (i32 8)))>;
9261def : Pat<(f128 (bitconvert (v4f32 FPR128:$src))),
9262                            (f128 (EXTv16i8 (REV64v4i32 FPR128:$src),
9263                                            (REV64v4i32 FPR128:$src), (i32 8)))>;
9264def : Pat<(f128 (bitconvert (v16i8 FPR128:$src))),
9265                            (f128 (EXTv16i8 (REV64v16i8 FPR128:$src),
9266                                            (REV64v16i8 FPR128:$src), (i32 8)))>;
9267}
9268
9269let Predicates = [IsLE] in {
9270def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))), (v2f64 FPR128:$src)>;
9271def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))), (v2f64 FPR128:$src)>;
9272def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))), (v2f64 FPR128:$src)>;
9273def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))), (v2f64 FPR128:$src)>;
9274def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))), (v2f64 FPR128:$src)>;
9275def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))), (v2f64 FPR128:$src)>;
9276def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))), (v2f64 FPR128:$src)>;
9277}
9278let Predicates = [IsBE] in {
9279def : Pat<(v2f64 (bitconvert (f128  FPR128:$src))),
9280                             (v2f64 (EXTv16i8 FPR128:$src,
9281                                              FPR128:$src, (i32 8)))>;
9282def : Pat<(v2f64 (bitconvert (v4i32 FPR128:$src))),
9283                             (v2f64 (REV64v4i32 FPR128:$src))>;
9284def : Pat<(v2f64 (bitconvert (v8i16 FPR128:$src))),
9285                             (v2f64 (REV64v8i16 FPR128:$src))>;
9286def : Pat<(v2f64 (bitconvert (v8f16 FPR128:$src))),
9287                             (v2f64 (REV64v8i16 FPR128:$src))>;
9288def : Pat<(v2f64 (bitconvert (v8bf16 FPR128:$src))),
9289                             (v2f64 (REV64v8i16 FPR128:$src))>;
9290def : Pat<(v2f64 (bitconvert (v16i8 FPR128:$src))),
9291                             (v2f64 (REV64v16i8 FPR128:$src))>;
9292def : Pat<(v2f64 (bitconvert (v4f32 FPR128:$src))),
9293                             (v2f64 (REV64v4i32 FPR128:$src))>;
9294}
9295def : Pat<(v2f64 (bitconvert (v2i64 FPR128:$src))), (v2f64 FPR128:$src)>;
9296
9297let Predicates = [IsLE] in {
9298def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))), (v4f32 FPR128:$src)>;
9299def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))), (v4f32 FPR128:$src)>;
9300def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))), (v4f32 FPR128:$src)>;
9301def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))), (v4f32 FPR128:$src)>;
9302def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))), (v4f32 FPR128:$src)>;
9303def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))), (v4f32 FPR128:$src)>;
9304def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))), (v4f32 FPR128:$src)>;
9305}
9306let Predicates = [IsBE] in {
9307def : Pat<(v4f32 (bitconvert (f128  FPR128:$src))),
9308                             (v4f32 (EXTv16i8 (REV64v4i32 FPR128:$src),
9309                                    (REV64v4i32 FPR128:$src), (i32 8)))>;
9310def : Pat<(v4f32 (bitconvert (v8i16 FPR128:$src))),
9311                             (v4f32 (REV32v8i16 FPR128:$src))>;
9312def : Pat<(v4f32 (bitconvert (v8f16 FPR128:$src))),
9313                             (v4f32 (REV32v8i16 FPR128:$src))>;
9314def : Pat<(v4f32 (bitconvert (v8bf16 FPR128:$src))),
9315                             (v4f32 (REV32v8i16 FPR128:$src))>;
9316def : Pat<(v4f32 (bitconvert (v16i8 FPR128:$src))),
9317                             (v4f32 (REV32v16i8 FPR128:$src))>;
9318def : Pat<(v4f32 (bitconvert (v2i64 FPR128:$src))),
9319                             (v4f32 (REV64v4i32 FPR128:$src))>;
9320def : Pat<(v4f32 (bitconvert (v2f64 FPR128:$src))),
9321                             (v4f32 (REV64v4i32 FPR128:$src))>;
9322}
9323def : Pat<(v4f32 (bitconvert (v4i32 FPR128:$src))), (v4f32 FPR128:$src)>;
9324
9325let Predicates = [IsLE] in {
9326def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))), (v2i64 FPR128:$src)>;
9327def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))), (v2i64 FPR128:$src)>;
9328def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))), (v2i64 FPR128:$src)>;
9329def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))), (v2i64 FPR128:$src)>;
9330def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))), (v2i64 FPR128:$src)>;
9331def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))), (v2i64 FPR128:$src)>;
9332def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))), (v2i64 FPR128:$src)>;
9333}
9334let Predicates = [IsBE] in {
9335def : Pat<(v2i64 (bitconvert (f128  FPR128:$src))),
9336                             (v2i64 (EXTv16i8 FPR128:$src,
9337                                              FPR128:$src, (i32 8)))>;
9338def : Pat<(v2i64 (bitconvert (v4i32 FPR128:$src))),
9339                             (v2i64 (REV64v4i32 FPR128:$src))>;
9340def : Pat<(v2i64 (bitconvert (v8i16 FPR128:$src))),
9341                             (v2i64 (REV64v8i16 FPR128:$src))>;
9342def : Pat<(v2i64 (bitconvert (v16i8 FPR128:$src))),
9343                             (v2i64 (REV64v16i8 FPR128:$src))>;
9344def : Pat<(v2i64 (bitconvert (v4f32 FPR128:$src))),
9345                             (v2i64 (REV64v4i32 FPR128:$src))>;
9346def : Pat<(v2i64 (bitconvert (v8f16 FPR128:$src))),
9347                             (v2i64 (REV64v8i16 FPR128:$src))>;
9348def : Pat<(v2i64 (bitconvert (v8bf16 FPR128:$src))),
9349                             (v2i64 (REV64v8i16 FPR128:$src))>;
9350}
9351def : Pat<(v2i64 (bitconvert (v2f64 FPR128:$src))), (v2i64 FPR128:$src)>;
9352
9353let Predicates = [IsLE] in {
9354def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))), (v4i32 FPR128:$src)>;
9355def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))), (v4i32 FPR128:$src)>;
9356def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))), (v4i32 FPR128:$src)>;
9357def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))), (v4i32 FPR128:$src)>;
9358def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))), (v4i32 FPR128:$src)>;
9359def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))), (v4i32 FPR128:$src)>;
9360def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))), (v4i32 FPR128:$src)>;
9361}
9362let Predicates = [IsBE] in {
9363def : Pat<(v4i32 (bitconvert (f128  FPR128:$src))),
9364                             (v4i32 (EXTv16i8 (REV64v4i32 FPR128:$src),
9365                                              (REV64v4i32 FPR128:$src),
9366                                              (i32 8)))>;
9367def : Pat<(v4i32 (bitconvert (v2i64 FPR128:$src))),
9368                             (v4i32 (REV64v4i32 FPR128:$src))>;
9369def : Pat<(v4i32 (bitconvert (v8i16 FPR128:$src))),
9370                             (v4i32 (REV32v8i16 FPR128:$src))>;
9371def : Pat<(v4i32 (bitconvert (v16i8 FPR128:$src))),
9372                             (v4i32 (REV32v16i8 FPR128:$src))>;
9373def : Pat<(v4i32 (bitconvert (v2f64 FPR128:$src))),
9374                             (v4i32 (REV64v4i32 FPR128:$src))>;
9375def : Pat<(v4i32 (bitconvert (v8f16 FPR128:$src))),
9376                             (v4i32 (REV32v8i16 FPR128:$src))>;
9377def : Pat<(v4i32 (bitconvert (v8bf16 FPR128:$src))),
9378                             (v4i32 (REV32v8i16 FPR128:$src))>;
9379}
9380def : Pat<(v4i32 (bitconvert (v4f32 FPR128:$src))), (v4i32 FPR128:$src)>;
9381
9382let Predicates = [IsLE] in {
9383def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))), (v8i16 FPR128:$src)>;
9384def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))), (v8i16 FPR128:$src)>;
9385def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))), (v8i16 FPR128:$src)>;
9386def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))), (v8i16 FPR128:$src)>;
9387def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))), (v8i16 FPR128:$src)>;
9388def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))), (v8i16 FPR128:$src)>;
9389}
9390let Predicates = [IsBE] in {
9391def : Pat<(v8i16 (bitconvert (f128  FPR128:$src))),
9392                             (v8i16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9393                                              (REV64v8i16 FPR128:$src),
9394                                              (i32 8)))>;
9395def : Pat<(v8i16 (bitconvert (v2i64 FPR128:$src))),
9396                             (v8i16 (REV64v8i16 FPR128:$src))>;
9397def : Pat<(v8i16 (bitconvert (v4i32 FPR128:$src))),
9398                             (v8i16 (REV32v8i16 FPR128:$src))>;
9399def : Pat<(v8i16 (bitconvert (v16i8 FPR128:$src))),
9400                             (v8i16 (REV16v16i8 FPR128:$src))>;
9401def : Pat<(v8i16 (bitconvert (v2f64 FPR128:$src))),
9402                             (v8i16 (REV64v8i16 FPR128:$src))>;
9403def : Pat<(v8i16 (bitconvert (v4f32 FPR128:$src))),
9404                             (v8i16 (REV32v8i16 FPR128:$src))>;
9405}
9406def : Pat<(v8i16 (bitconvert (v8f16 FPR128:$src))), (v8i16 FPR128:$src)>;
9407def : Pat<(v8i16 (bitconvert (v8bf16 FPR128:$src))), (v8i16 FPR128:$src)>;
9408
9409let Predicates = [IsLE] in {
9410def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))), (v8f16 FPR128:$src)>;
9411def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))), (v8f16 FPR128:$src)>;
9412def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))), (v8f16 FPR128:$src)>;
9413def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))), (v8f16 FPR128:$src)>;
9414def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))), (v8f16 FPR128:$src)>;
9415def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))), (v8f16 FPR128:$src)>;
9416
9417def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))), (v8bf16 FPR128:$src)>;
9418def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))), (v8bf16 FPR128:$src)>;
9419def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))), (v8bf16 FPR128:$src)>;
9420def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))), (v8bf16 FPR128:$src)>;
9421def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))), (v8bf16 FPR128:$src)>;
9422def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))), (v8bf16 FPR128:$src)>;
9423}
9424let Predicates = [IsBE] in {
9425def : Pat<(v8f16 (bitconvert (f128  FPR128:$src))),
9426                             (v8f16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9427                                              (REV64v8i16 FPR128:$src),
9428                                              (i32 8)))>;
9429def : Pat<(v8f16 (bitconvert (v2i64 FPR128:$src))),
9430                             (v8f16 (REV64v8i16 FPR128:$src))>;
9431def : Pat<(v8f16 (bitconvert (v4i32 FPR128:$src))),
9432                             (v8f16 (REV32v8i16 FPR128:$src))>;
9433def : Pat<(v8f16 (bitconvert (v16i8 FPR128:$src))),
9434                             (v8f16 (REV16v16i8 FPR128:$src))>;
9435def : Pat<(v8f16 (bitconvert (v2f64 FPR128:$src))),
9436                             (v8f16 (REV64v8i16 FPR128:$src))>;
9437def : Pat<(v8f16 (bitconvert (v4f32 FPR128:$src))),
9438                             (v8f16 (REV32v8i16 FPR128:$src))>;
9439
9440def : Pat<(v8bf16 (bitconvert (f128  FPR128:$src))),
9441                             (v8bf16 (EXTv16i8 (REV64v8i16 FPR128:$src),
9442                                              (REV64v8i16 FPR128:$src),
9443                                              (i32 8)))>;
9444def : Pat<(v8bf16 (bitconvert (v2i64 FPR128:$src))),
9445                             (v8bf16 (REV64v8i16 FPR128:$src))>;
9446def : Pat<(v8bf16 (bitconvert (v4i32 FPR128:$src))),
9447                             (v8bf16 (REV32v8i16 FPR128:$src))>;
9448def : Pat<(v8bf16 (bitconvert (v16i8 FPR128:$src))),
9449                             (v8bf16 (REV16v16i8 FPR128:$src))>;
9450def : Pat<(v8bf16 (bitconvert (v2f64 FPR128:$src))),
9451                             (v8bf16 (REV64v8i16 FPR128:$src))>;
9452def : Pat<(v8bf16 (bitconvert (v4f32 FPR128:$src))),
9453                             (v8bf16 (REV32v8i16 FPR128:$src))>;
9454}
9455def : Pat<(v8f16 (bitconvert (v8i16 FPR128:$src))), (v8f16 FPR128:$src)>;
9456def : Pat<(v8bf16 (bitconvert (v8i16 FPR128:$src))), (v8bf16 FPR128:$src)>;
9457
9458let Predicates = [IsLE] in {
9459def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))), (v16i8 FPR128:$src)>;
9460def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))), (v16i8 FPR128:$src)>;
9461def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))), (v16i8 FPR128:$src)>;
9462def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))), (v16i8 FPR128:$src)>;
9463def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))), (v16i8 FPR128:$src)>;
9464def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))), (v16i8 FPR128:$src)>;
9465def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))), (v16i8 FPR128:$src)>;
9466def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))), (v16i8 FPR128:$src)>;
9467}
9468let Predicates = [IsBE] in {
9469def : Pat<(v16i8 (bitconvert (f128  FPR128:$src))),
9470                             (v16i8 (EXTv16i8 (REV64v16i8 FPR128:$src),
9471                                              (REV64v16i8 FPR128:$src),
9472                                              (i32 8)))>;
9473def : Pat<(v16i8 (bitconvert (v2i64 FPR128:$src))),
9474                             (v16i8 (REV64v16i8 FPR128:$src))>;
9475def : Pat<(v16i8 (bitconvert (v4i32 FPR128:$src))),
9476                             (v16i8 (REV32v16i8 FPR128:$src))>;
9477def : Pat<(v16i8 (bitconvert (v8i16 FPR128:$src))),
9478                             (v16i8 (REV16v16i8 FPR128:$src))>;
9479def : Pat<(v16i8 (bitconvert (v2f64 FPR128:$src))),
9480                             (v16i8 (REV64v16i8 FPR128:$src))>;
9481def : Pat<(v16i8 (bitconvert (v4f32 FPR128:$src))),
9482                             (v16i8 (REV32v16i8 FPR128:$src))>;
9483def : Pat<(v16i8 (bitconvert (v8f16 FPR128:$src))),
9484                             (v16i8 (REV16v16i8 FPR128:$src))>;
9485def : Pat<(v16i8 (bitconvert (v8bf16 FPR128:$src))),
9486                             (v16i8 (REV16v16i8 FPR128:$src))>;
9487}
9488
9489def : Pat<(v4i16 (extract_subvector V128:$Rn, (i64 0))),
9490           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9491def : Pat<(v8i8 (extract_subvector V128:$Rn, (i64 0))),
9492           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9493def : Pat<(v2f32 (extract_subvector V128:$Rn, (i64 0))),
9494           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9495def : Pat<(v4f16 (extract_subvector V128:$Rn, (i64 0))),
9496           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9497def : Pat<(v4bf16 (extract_subvector V128:$Rn, (i64 0))),
9498           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9499def : Pat<(v2i32 (extract_subvector V128:$Rn, (i64 0))),
9500           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9501def : Pat<(v1i64 (extract_subvector V128:$Rn, (i64 0))),
9502           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9503def : Pat<(v1f64 (extract_subvector V128:$Rn, (i64 0))),
9504           (EXTRACT_SUBREG V128:$Rn, dsub)>;
9505
9506def : Pat<(v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 1))),
9507          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9508def : Pat<(v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 1))),
9509          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9510def : Pat<(v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 1))),
9511          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9512def : Pat<(v1i64 (extract_subvector (v2i64 FPR128:$Rn), (i64 1))),
9513          (EXTRACT_SUBREG (DUPv2i64lane FPR128:$Rn, 1), dsub)>;
9514
9515// A 64-bit subvector insert to the first 128-bit vector position
9516// is a subregister copy that needs no instruction.
9517multiclass InsertSubvectorUndef<ValueType Ty> {
9518  def : Pat<(insert_subvector undef, (v1i64 FPR64:$src), (Ty 0)),
9519            (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9520  def : Pat<(insert_subvector undef, (v1f64 FPR64:$src), (Ty 0)),
9521            (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9522  def : Pat<(insert_subvector undef, (v2i32 FPR64:$src), (Ty 0)),
9523            (INSERT_SUBREG (v4i32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9524  def : Pat<(insert_subvector undef, (v2f32 FPR64:$src), (Ty 0)),
9525            (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9526  def : Pat<(insert_subvector undef, (v4i16 FPR64:$src), (Ty 0)),
9527            (INSERT_SUBREG (v8i16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9528  def : Pat<(insert_subvector undef, (v4f16 FPR64:$src), (Ty 0)),
9529            (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9530  def : Pat<(insert_subvector undef, (v4bf16 FPR64:$src), (Ty 0)),
9531            (INSERT_SUBREG (v8bf16 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9532  def : Pat<(insert_subvector undef, (v8i8 FPR64:$src), (Ty 0)),
9533            (INSERT_SUBREG (v16i8 (IMPLICIT_DEF)), FPR64:$src, dsub)>;
9534}
9535
9536defm : InsertSubvectorUndef<i32>;
9537defm : InsertSubvectorUndef<i64>;
9538
9539// Use pair-wise add instructions when summing up the lanes for v2f64, v2i64
9540// or v2f32.
9541def : Pat<(i64 (add (vector_extract (v2i64 FPR128:$Rn), (i64 0)),
9542                    (vector_extract (v2i64 FPR128:$Rn), (i64 1)))),
9543           (i64 (ADDPv2i64p (v2i64 FPR128:$Rn)))>;
9544def : Pat<(f64 (any_fadd (vector_extract (v2f64 FPR128:$Rn), (i64 0)),
9545                         (vector_extract (v2f64 FPR128:$Rn), (i64 1)))),
9546           (f64 (FADDPv2i64p (v2f64 FPR128:$Rn)))>;
9547    // vector_extract on 64-bit vectors gets promoted to a 128 bit vector,
9548    // so we match on v4f32 here, not v2f32. This will also catch adding
9549    // the low two lanes of a true v4f32 vector.
9550def : Pat<(any_fadd (vector_extract (v4f32 FPR128:$Rn), (i64 0)),
9551                    (vector_extract (v4f32 FPR128:$Rn), (i64 1))),
9552          (f32 (FADDPv2i32p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9553def : Pat<(any_fadd (vector_extract (v8f16 FPR128:$Rn), (i64 0)),
9554                    (vector_extract (v8f16 FPR128:$Rn), (i64 1))),
9555          (f16 (FADDPv2i16p (EXTRACT_SUBREG FPR128:$Rn, dsub)))>;
9556
9557// Prefer using the bottom lanes of addp Rn, Rn compared to
9558// addp extractlow(Rn), extracthigh(Rn)
9559def : Pat<(AArch64addp (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 0))),
9560                       (v2i32 (extract_subvector (v4i32 FPR128:$Rn), (i64 2)))),
9561          (v2i32 (EXTRACT_SUBREG (ADDPv4i32 $Rn, $Rn), dsub))>;
9562def : Pat<(AArch64addp (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 0))),
9563                       (v4i16 (extract_subvector (v8i16 FPR128:$Rn), (i64 4)))),
9564          (v4i16 (EXTRACT_SUBREG (ADDPv8i16 $Rn, $Rn), dsub))>;
9565def : Pat<(AArch64addp (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 0))),
9566                       (v8i8 (extract_subvector (v16i8 FPR128:$Rn), (i64 8)))),
9567          (v8i8 (EXTRACT_SUBREG (ADDPv16i8 $Rn, $Rn), dsub))>;
9568
9569def : Pat<(AArch64faddp (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 0))),
9570                        (v2f32 (extract_subvector (v4f32 FPR128:$Rn), (i64 2)))),
9571          (v2f32 (EXTRACT_SUBREG (FADDPv4f32 $Rn, $Rn), dsub))>;
9572def : Pat<(AArch64faddp (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 0))),
9573                        (v4f16 (extract_subvector (v8f16 FPR128:$Rn), (i64 4)))),
9574          (v4f16 (EXTRACT_SUBREG (FADDPv8f16 $Rn, $Rn), dsub))>;
9575
9576// add(uzp1(X, Y), uzp2(X, Y)) -> addp(X, Y)
9577def : Pat<(v2i64 (add (AArch64zip1 (v2i64 FPR128:$Rn), (v2i64 FPR128:$Rm)),
9578                      (AArch64zip2 (v2i64 FPR128:$Rn), (v2i64 FPR128:$Rm)))),
9579          (v2i64 (ADDPv2i64 $Rn, $Rm))>;
9580def : Pat<(v4i32 (add (AArch64uzp1 (v4i32 FPR128:$Rn), (v4i32 FPR128:$Rm)),
9581                      (AArch64uzp2 (v4i32 FPR128:$Rn), (v4i32 FPR128:$Rm)))),
9582          (v4i32 (ADDPv4i32 $Rn, $Rm))>;
9583def : Pat<(v8i16 (add (AArch64uzp1 (v8i16 FPR128:$Rn), (v8i16 FPR128:$Rm)),
9584                      (AArch64uzp2 (v8i16 FPR128:$Rn), (v8i16 FPR128:$Rm)))),
9585          (v8i16 (ADDPv8i16 $Rn, $Rm))>;
9586def : Pat<(v16i8 (add (AArch64uzp1 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)),
9587                      (AArch64uzp2 (v16i8 FPR128:$Rn), (v16i8 FPR128:$Rm)))),
9588          (v16i8 (ADDPv16i8 $Rn, $Rm))>;
9589
9590def : Pat<(v2f64 (fadd (AArch64zip1 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)),
9591                       (AArch64zip2 (v2f64 FPR128:$Rn), (v2f64 FPR128:$Rm)))),
9592          (v2f64 (FADDPv2f64 $Rn, $Rm))>;
9593def : Pat<(v4f32 (fadd (AArch64uzp1 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)),
9594                       (AArch64uzp2 (v4f32 FPR128:$Rn), (v4f32 FPR128:$Rm)))),
9595          (v4f32 (FADDPv4f32 $Rn, $Rm))>;
9596let Predicates = [HasFullFP16] in
9597def : Pat<(v8f16 (fadd (AArch64uzp1 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)),
9598                       (AArch64uzp2 (v8f16 FPR128:$Rn), (v8f16 FPR128:$Rm)))),
9599          (v8f16 (FADDPv8f16 $Rn, $Rm))>;
9600
9601// Scalar 64-bit shifts in FPR64 registers.
9602def : Pat<(i64 (int_aarch64_neon_sshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9603          (SSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9604def : Pat<(i64 (int_aarch64_neon_ushl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9605          (USHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9606def : Pat<(i64 (int_aarch64_neon_srshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9607          (SRSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9608def : Pat<(i64 (int_aarch64_neon_urshl (i64 FPR64:$Rn), (i64 FPR64:$Rm))),
9609          (URSHLv1i64 FPR64:$Rn, FPR64:$Rm)>;
9610
9611// Patterns for nontemporal/no-allocate stores.
9612// We have to resort to tricks to turn a single-input store into a store pair,
9613// because there is no single-input nontemporal store, only STNP.
9614let Predicates = [IsLE] in {
9615let AddedComplexity = 15 in {
9616class NTStore128Pat<ValueType VT> :
9617  Pat<(nontemporalstore (VT FPR128:$Rt),
9618        (am_indexed7s64 GPR64sp:$Rn, simm7s8:$offset)),
9619      (STNPDi (EXTRACT_SUBREG FPR128:$Rt, dsub),
9620              (DUPi64 FPR128:$Rt, (i64 1)),
9621              GPR64sp:$Rn, simm7s8:$offset)>;
9622
9623def : NTStore128Pat<v2i64>;
9624def : NTStore128Pat<v4i32>;
9625def : NTStore128Pat<v8i16>;
9626def : NTStore128Pat<v16i8>;
9627
9628class NTStore64Pat<ValueType VT> :
9629  Pat<(nontemporalstore (VT FPR64:$Rt),
9630        (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9631      (STNPSi (EXTRACT_SUBREG FPR64:$Rt, ssub),
9632              (DUPi32 (SUBREG_TO_REG (i64 0), FPR64:$Rt, dsub), (i64 1)),
9633              GPR64sp:$Rn, simm7s4:$offset)>;
9634
9635// FIXME: Shouldn't v1f64 loads/stores be promoted to v1i64?
9636def : NTStore64Pat<v1f64>;
9637def : NTStore64Pat<v1i64>;
9638def : NTStore64Pat<v2i32>;
9639def : NTStore64Pat<v4i16>;
9640def : NTStore64Pat<v8i8>;
9641
9642def : Pat<(nontemporalstore GPR64:$Rt,
9643            (am_indexed7s32 GPR64sp:$Rn, simm7s4:$offset)),
9644          (STNPWi (EXTRACT_SUBREG GPR64:$Rt, sub_32),
9645                  (EXTRACT_SUBREG (UBFMXri GPR64:$Rt, 32, 63), sub_32),
9646                  GPR64sp:$Rn, simm7s4:$offset)>;
9647} // AddedComplexity=10
9648} // Predicates = [IsLE]
9649
9650// Tail call return handling. These are all compiler pseudo-instructions,
9651// so no encoding information or anything like that.
9652let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in {
9653  def TCRETURNdi : Pseudo<(outs), (ins i64imm:$dst, i32imm:$FPDiff), []>,
9654                   Sched<[WriteBrReg]>;
9655  def TCRETURNri : Pseudo<(outs), (ins tcGPR64:$dst, i32imm:$FPDiff), []>,
9656                   Sched<[WriteBrReg]>;
9657  // Indirect tail-call with any register allowed, used by MachineOutliner when
9658  // this is proven safe.
9659  // FIXME: If we have to add any more hacks like this, we should instead relax
9660  // some verifier checks for outlined functions.
9661  def TCRETURNriALL : Pseudo<(outs), (ins GPR64:$dst, i32imm:$FPDiff), []>,
9662                      Sched<[WriteBrReg]>;
9663
9664  // Indirect tail-calls with reduced register classes, needed for BTI and
9665  // PAuthLR.
9666  def TCRETURNrix16x17 : Pseudo<(outs), (ins tcGPRx16x17:$dst, i32imm:$FPDiff), []>,
9667                      Sched<[WriteBrReg]>;
9668  def TCRETURNrix17 : Pseudo<(outs), (ins tcGPRx17:$dst, i32imm:$FPDiff), []>,
9669                      Sched<[WriteBrReg]>;
9670  def TCRETURNrinotx16 : Pseudo<(outs), (ins tcGPRnotx16:$dst, i32imm:$FPDiff), []>,
9671                      Sched<[WriteBrReg]>;
9672}
9673
9674def : Pat<(AArch64tcret tcGPR64:$dst, (i32 timm:$FPDiff)),
9675          (TCRETURNri tcGPR64:$dst, imm:$FPDiff)>,
9676      Requires<[TailCallAny]>;
9677def : Pat<(AArch64tcret tcGPRx16x17:$dst, (i32 timm:$FPDiff)),
9678          (TCRETURNrix16x17 tcGPRx16x17:$dst, imm:$FPDiff)>,
9679      Requires<[TailCallX16X17]>;
9680def : Pat<(AArch64tcret tcGPRx17:$dst, (i32 timm:$FPDiff)),
9681          (TCRETURNrix17 tcGPRx17:$dst, imm:$FPDiff)>,
9682      Requires<[TailCallX17]>;
9683def : Pat<(AArch64tcret tcGPRnotx16:$dst, (i32 timm:$FPDiff)),
9684          (TCRETURNrinotx16 tcGPRnotx16:$dst, imm:$FPDiff)>,
9685      Requires<[TailCallNotX16]>;
9686
9687def : Pat<(AArch64tcret tglobaladdr:$dst, (i32 timm:$FPDiff)),
9688          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9689def : Pat<(AArch64tcret texternalsym:$dst, (i32 timm:$FPDiff)),
9690          (TCRETURNdi texternalsym:$dst, imm:$FPDiff)>;
9691
9692def MOVMCSym : Pseudo<(outs GPR64:$dst), (ins i64imm:$sym), []>, Sched<[]>;
9693def : Pat<(i64 (AArch64LocalRecover mcsym:$sym)), (MOVMCSym mcsym:$sym)>;
9694
9695// Extracting lane zero is a special case where we can just use a plain
9696// EXTRACT_SUBREG instruction, which will become FMOV. This is easier for the
9697// rest of the compiler, especially the register allocator and copy propagation,
9698// to reason about, so is preferred when it's possible to use it.
9699let AddedComplexity = 10 in {
9700  def : Pat<(i64 (extractelt (v2i64 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, dsub)>;
9701  def : Pat<(i32 (extractelt (v4i32 V128:$V), (i64 0))), (EXTRACT_SUBREG V128:$V, ssub)>;
9702  def : Pat<(i32 (extractelt (v2i32 V64:$V), (i64 0))), (EXTRACT_SUBREG V64:$V, ssub)>;
9703}
9704
9705// dot_v4i8
9706class mul_v4i8<SDPatternOperator ldop> :
9707  PatFrag<(ops node:$Rn, node:$Rm, node:$offset),
9708          (mul (ldop (add node:$Rn, node:$offset)),
9709               (ldop (add node:$Rm, node:$offset)))>;
9710class mulz_v4i8<SDPatternOperator ldop> :
9711  PatFrag<(ops node:$Rn, node:$Rm),
9712          (mul (ldop node:$Rn), (ldop node:$Rm))>;
9713
9714def load_v4i8 :
9715  OutPatFrag<(ops node:$R),
9716             (INSERT_SUBREG
9717              (v2i32 (IMPLICIT_DEF)),
9718               (i32 (COPY_TO_REGCLASS (LDRWui node:$R, (i64 0)), FPR32)),
9719              ssub)>;
9720
9721class dot_v4i8<Instruction DOT, SDPatternOperator ldop> :
9722  Pat<(i32 (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 3)),
9723           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 2)),
9724           (add (mul_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm, (i64 1)),
9725                (mulz_v4i8<ldop> GPR64sp:$Rn, GPR64sp:$Rm))))),
9726      (EXTRACT_SUBREG (i64 (DOT (DUPv2i32gpr WZR),
9727                                (load_v4i8 GPR64sp:$Rn),
9728                                (load_v4i8 GPR64sp:$Rm))),
9729                      sub_32)>, Requires<[HasDotProd]>;
9730
9731// dot_v8i8
9732class ee_v8i8<SDPatternOperator extend> :
9733  PatFrag<(ops node:$V, node:$K),
9734          (v4i16 (extract_subvector (v8i16 (extend node:$V)), node:$K))>;
9735
9736class mul_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9737  PatFrag<(ops node:$M, node:$N, node:$K),
9738          (mulop (v4i16 (ee_v8i8<extend> node:$M, node:$K)),
9739                 (v4i16 (ee_v8i8<extend> node:$N, node:$K)))>;
9740
9741class idot_v8i8<SDPatternOperator mulop, SDPatternOperator extend> :
9742  PatFrag<(ops node:$M, node:$N),
9743          (i32 (extractelt
9744           (v4i32 (AArch64uaddv
9745            (add (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 0)),
9746                 (mul_v8i8<mulop, extend> node:$M, node:$N, (i64 4))))),
9747           (i64 0)))>;
9748
9749// vaddv_[su]32 is special; -> ADDP Vd.2S,Vn.2S,Vm.2S; return Vd.s[0];Vn==Vm
9750def VADDV_32 : OutPatFrag<(ops node:$R), (ADDPv2i32 node:$R, node:$R)>;
9751
9752class odot_v8i8<Instruction DOT> :
9753  OutPatFrag<(ops node:$Vm, node:$Vn),
9754             (EXTRACT_SUBREG
9755              (VADDV_32
9756               (i64 (DOT (DUPv2i32gpr WZR),
9757                         (v8i8 node:$Vm),
9758                         (v8i8 node:$Vn)))),
9759              sub_32)>;
9760
9761class dot_v8i8<Instruction DOT, SDPatternOperator mulop,
9762                    SDPatternOperator extend> :
9763  Pat<(idot_v8i8<mulop, extend> V64:$Vm, V64:$Vn),
9764      (odot_v8i8<DOT> V64:$Vm, V64:$Vn)>,
9765  Requires<[HasDotProd]>;
9766
9767// dot_v16i8
9768class ee_v16i8<SDPatternOperator extend> :
9769  PatFrag<(ops node:$V, node:$K1, node:$K2),
9770          (v4i16 (extract_subvector
9771           (v8i16 (extend
9772            (v8i8 (extract_subvector node:$V, node:$K1)))), node:$K2))>;
9773
9774class mul_v16i8<SDPatternOperator mulop, SDPatternOperator extend> :
9775  PatFrag<(ops node:$M, node:$N, node:$K1, node:$K2),
9776          (v4i32
9777           (mulop (v4i16 (ee_v16i8<extend> node:$M, node:$K1, node:$K2)),
9778                  (v4i16 (ee_v16i8<extend> node:$N, node:$K1, node:$K2))))>;
9779
9780class idot_v16i8<SDPatternOperator m, SDPatternOperator x> :
9781  PatFrag<(ops node:$M, node:$N),
9782          (i32 (extractelt
9783           (v4i32 (AArch64uaddv
9784            (add
9785             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 0)),
9786                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 0))),
9787             (add (mul_v16i8<m, x> node:$M, node:$N, (i64 0), (i64 4)),
9788                  (mul_v16i8<m, x> node:$M, node:$N, (i64 8), (i64 4)))))),
9789           (i64 0)))>;
9790
9791class odot_v16i8<Instruction DOT> :
9792  OutPatFrag<(ops node:$Vm, node:$Vn),
9793             (i32 (ADDVv4i32v
9794              (DOT (DUPv4i32gpr WZR), node:$Vm, node:$Vn)))>;
9795
9796class dot_v16i8<Instruction DOT, SDPatternOperator mulop,
9797                SDPatternOperator extend> :
9798  Pat<(idot_v16i8<mulop, extend> V128:$Vm, V128:$Vn),
9799      (odot_v16i8<DOT> V128:$Vm, V128:$Vn)>,
9800  Requires<[HasDotProd]>;
9801
9802let AddedComplexity = 10 in {
9803  def : dot_v4i8<SDOTv8i8, sextloadi8>;
9804  def : dot_v4i8<UDOTv8i8, zextloadi8>;
9805  def : dot_v8i8<SDOTv8i8, AArch64smull, sext>;
9806  def : dot_v8i8<UDOTv8i8, AArch64umull, zext>;
9807  def : dot_v16i8<SDOTv16i8, AArch64smull, sext>;
9808  def : dot_v16i8<UDOTv16i8, AArch64umull, zext>;
9809
9810  // FIXME: add patterns to generate vector by element dot product.
9811  // FIXME: add SVE dot-product patterns.
9812}
9813
9814// Custom DAG nodes and isel rules to make a 64-byte block out of eight GPRs,
9815// so that it can be used as input to inline asm, and vice versa.
9816def LS64_BUILD : SDNode<"AArch64ISD::LS64_BUILD", SDTypeProfile<1, 8, []>>;
9817def LS64_EXTRACT : SDNode<"AArch64ISD::LS64_EXTRACT", SDTypeProfile<1, 2, []>>;
9818def : Pat<(i64x8 (LS64_BUILD GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3,
9819                             GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7)),
9820          (REG_SEQUENCE GPR64x8Class,
9821              $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3,
9822              $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7)>;
9823foreach i = 0-7 in {
9824  def : Pat<(i64 (LS64_EXTRACT (i64x8 GPR64x8:$val), (i32 i))),
9825            (EXTRACT_SUBREG $val, !cast<SubRegIndex>("x8sub_"#i))>;
9826}
9827
9828let Predicates = [HasLS64] in {
9829  def LD64B: LoadStore64B<0b101, "ld64b", (ins GPR64sp:$Rn),
9830                                          (outs GPR64x8:$Rt)>;
9831  def ST64B: LoadStore64B<0b001, "st64b", (ins GPR64x8:$Rt, GPR64sp:$Rn),
9832                                          (outs)>;
9833  def ST64BV:   Store64BV<0b011, "st64bv">;
9834  def ST64BV0:  Store64BV<0b010, "st64bv0">;
9835
9836  class ST64BPattern<Intrinsic intrinsic, Instruction instruction>
9837    : Pat<(intrinsic GPR64sp:$addr, GPR64:$x0, GPR64:$x1, GPR64:$x2, GPR64:$x3, GPR64:$x4, GPR64:$x5, GPR64:$x6, GPR64:$x7),
9838          (instruction (REG_SEQUENCE GPR64x8Class, $x0, x8sub_0, $x1, x8sub_1, $x2, x8sub_2, $x3, x8sub_3, $x4, x8sub_4, $x5, x8sub_5, $x6, x8sub_6, $x7, x8sub_7), $addr)>;
9839
9840  def : ST64BPattern<int_aarch64_st64b, ST64B>;
9841  def : ST64BPattern<int_aarch64_st64bv, ST64BV>;
9842  def : ST64BPattern<int_aarch64_st64bv0, ST64BV0>;
9843}
9844
9845let Predicates = [HasMOPS] in {
9846  let Defs = [NZCV] in {
9847    defm CPYFP : MOPSMemoryCopyInsns<0b00, "cpyfp">;
9848
9849    defm CPYP : MOPSMemoryMoveInsns<0b00, "cpyp">;
9850
9851    defm SETP : MOPSMemorySetInsns<0b00, "setp">;
9852  }
9853  let Uses = [NZCV] in {
9854    defm CPYFM : MOPSMemoryCopyInsns<0b01, "cpyfm">;
9855    defm CPYFE : MOPSMemoryCopyInsns<0b10, "cpyfe">;
9856
9857    defm CPYM : MOPSMemoryMoveInsns<0b01, "cpym">;
9858    defm CPYE : MOPSMemoryMoveInsns<0b10, "cpye">;
9859
9860    defm SETM : MOPSMemorySetInsns<0b01, "setm">;
9861    defm SETE : MOPSMemorySetInsns<0b10, "sete">;
9862  }
9863}
9864let Predicates = [HasMOPS, HasMTE] in {
9865  let Defs = [NZCV] in {
9866    defm SETGP     : MOPSMemorySetTaggingInsns<0b00, "setgp">;
9867  }
9868  let Uses = [NZCV] in {
9869    defm SETGM     : MOPSMemorySetTaggingInsns<0b01, "setgm">;
9870    // Can't use SETGE because it's a reserved name in TargetSelectionDAG.td
9871    defm MOPSSETGE : MOPSMemorySetTaggingInsns<0b10, "setge">;
9872  }
9873}
9874
9875// MOPS Node operands: 0: Dst, 1: Src or Value, 2: Size, 3: Chain
9876// MOPS Node results: 0: Dst writeback, 1: Size writeback, 2: Chain
9877def SDT_AArch64mops : SDTypeProfile<2, 3, [ SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2> ]>;
9878def AArch64mops_memset : SDNode<"AArch64ISD::MOPS_MEMSET", SDT_AArch64mops>;
9879def AArch64mops_memset_tagging : SDNode<"AArch64ISD::MOPS_MEMSET_TAGGING", SDT_AArch64mops>;
9880def AArch64mops_memcopy : SDNode<"AArch64ISD::MOPS_MEMCOPY", SDT_AArch64mops>;
9881def AArch64mops_memmove : SDNode<"AArch64ISD::MOPS_MEMMOVE", SDT_AArch64mops>;
9882
9883// MOPS operations always contain three 4-byte instructions
9884let Predicates = [HasMOPS], Defs = [NZCV], Size = 12, mayStore = 1 in {
9885  let mayLoad = 1 in {
9886    def MOPSMemoryCopyPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9887                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9888                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9889    def MOPSMemoryMovePseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64common:$Rs_wb, GPR64:$Rn_wb),
9890                                      (ins GPR64common:$Rd, GPR64common:$Rs, GPR64:$Rn),
9891                                      [], "$Rd = $Rd_wb,$Rs = $Rs_wb,$Rn = $Rn_wb">, Sched<[]>;
9892  }
9893  let mayLoad = 0 in {
9894    def MOPSMemorySetPseudo  : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9895                                      (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9896                                      [], "$Rd = $Rd_wb,$Rn = $Rn_wb,@earlyclobber $Rn_wb">, Sched<[]>;
9897  }
9898}
9899let Predicates = [HasMOPS, HasMTE], Defs = [NZCV], Size = 12, mayLoad = 0, mayStore = 1 in {
9900  def MOPSMemorySetTaggingPseudo : Pseudo<(outs GPR64common:$Rd_wb, GPR64:$Rn_wb),
9901                                          (ins GPR64common:$Rd, GPR64:$Rn, GPR64:$Rm),
9902                                          [], "$Rd = $Rd_wb,$Rn = $Rn_wb">, Sched<[]>;
9903}
9904
9905//-----------------------------------------------------------------------------
9906// v8.3 Pointer Authentication late patterns
9907
9908def : Pat<(int_ptrauth_blend GPR64:$Rd, imm64_0_65535:$imm),
9909          (PAUTH_BLEND GPR64:$Rd, (trunc_imm imm64_0_65535:$imm))>;
9910def : Pat<(int_ptrauth_blend GPR64:$Rd, GPR64:$Rn),
9911          (BFMXri GPR64:$Rd, GPR64:$Rn, 16, 15)>;
9912
9913//-----------------------------------------------------------------------------
9914
9915// This gets lowered into an instruction sequence of 20 bytes
9916let Defs = [X16, X17], mayStore = 1, isCodeGenOnly = 1, Size = 20 in
9917def StoreSwiftAsyncContext
9918      : Pseudo<(outs), (ins GPR64:$ctx, GPR64sp:$base, simm9:$offset),
9919               []>, Sched<[]>;
9920
9921def AArch64AssertZExtBool : SDNode<"AArch64ISD::ASSERT_ZEXT_BOOL", SDT_assert>;
9922def : Pat<(AArch64AssertZExtBool GPR32:$op),
9923          (i32 GPR32:$op)>;
9924
9925//===----------------------------===//
9926// 2022 Architecture Extensions:
9927//===----------------------------===//
9928
9929def : InstAlias<"clrbhb",  (HINT 22), 0>;
9930let Predicates = [HasCLRBHB] in {
9931  def : InstAlias<"clrbhb",  (HINT 22), 1>;
9932}
9933
9934//===----------------------------------------------------------------------===//
9935// Translation Hardening Extension (FEAT_THE)
9936//===----------------------------------------------------------------------===//
9937defm RCW     : ReadCheckWriteCompareAndSwap;
9938
9939defm RCWCLR  : ReadCheckWriteOperation<0b001, "clr">;
9940defm RCWSET  : ReadCheckWriteOperation<0b011, "set">;
9941defm RCWSWP  : ReadCheckWriteOperation<0b010, "swp">;
9942
9943//===----------------------------------------------------------------------===//
9944// General Data-Processing Instructions (FEAT_V94_DP)
9945//===----------------------------------------------------------------------===//
9946defm ABS : OneOperandData<0b001000, "abs", abs>, Requires<[HasCSSC]>;
9947defm CNT : OneOperandData<0b000111, "cnt", ctpop>, Requires<[HasCSSC]>;
9948defm CTZ : OneOperandData<0b000110, "ctz", cttz>, Requires<[HasCSSC]>;
9949
9950defm SMAX : ComparisonOp<0, 0, "smax", smax>, Requires<[HasCSSC]>;
9951defm SMIN : ComparisonOp<0, 1, "smin", smin>, Requires<[HasCSSC]>;
9952defm UMAX : ComparisonOp<1, 0, "umax", umax>, Requires<[HasCSSC]>;
9953defm UMIN : ComparisonOp<1, 1, "umin", umin>, Requires<[HasCSSC]>;
9954
9955def RPRFM:
9956    I<(outs), (ins rprfop:$Rt, GPR64:$Rm, GPR64sp:$Rn),
9957      "rprfm", "\t$Rt, $Rm, [$Rn]", "", []>,
9958    Sched<[]> {
9959  bits<6> Rt;
9960  bits<5> Rn;
9961  bits<5> Rm;
9962  let Inst{2-0} = Rt{2-0};
9963  let Inst{4-3} = 0b11;
9964  let Inst{9-5} = Rn;
9965  let Inst{11-10} = 0b10;
9966  let Inst{13-12} = Rt{4-3};
9967  let Inst{14} = 0b1;
9968  let Inst{15} = Rt{5};
9969  let Inst{20-16} = Rm;
9970  let Inst{31-21} = 0b11111000101;
9971  let mayLoad = 0;
9972  let mayStore = 0;
9973  let hasSideEffects = 1;
9974  // RPRFM overlaps with PRFM (reg), when the decoder method of PRFM returns
9975  // Fail, the decoder should attempt to decode RPRFM. This requires setting
9976  // the decoder namespace to "Fallback".
9977  let DecoderNamespace = "Fallback";
9978}
9979
9980//===----------------------------------------------------------------------===//
9981// 128-bit Atomics (FEAT_LSE128)
9982//===----------------------------------------------------------------------===//
9983let Predicates = [HasLSE128] in {
9984  def SWPP     : LSE128Base<0b000, 0b00, 0b1, "swpp">;
9985  def SWPPA    : LSE128Base<0b000, 0b10, 0b1, "swppa">;
9986  def SWPPAL   : LSE128Base<0b000, 0b11, 0b1, "swppal">;
9987  def SWPPL    : LSE128Base<0b000, 0b01, 0b1, "swppl">;
9988  def LDCLRP   : LSE128Base<0b001, 0b00, 0b0, "ldclrp">;
9989  def LDCLRPA  : LSE128Base<0b001, 0b10, 0b0, "ldclrpa">;
9990  def LDCLRPAL : LSE128Base<0b001, 0b11, 0b0, "ldclrpal">;
9991  def LDCLRPL  : LSE128Base<0b001, 0b01, 0b0, "ldclrpl">;
9992  def LDSETP   : LSE128Base<0b011, 0b00, 0b0, "ldsetp">;
9993  def LDSETPA  : LSE128Base<0b011, 0b10, 0b0, "ldsetpa">;
9994  def LDSETPAL : LSE128Base<0b011, 0b11, 0b0, "ldsetpal">;
9995  def LDSETPL  : LSE128Base<0b011, 0b01, 0b0, "ldsetpl">;
9996}
9997
9998//===----------------------------------------------------------------------===//
9999// RCPC Instructions (FEAT_LRCPC3)
10000//===----------------------------------------------------------------------===//
10001
10002let Predicates = [HasRCPC3] in {
10003  //                                              size   opc    opc2
10004  def STILPWpre:   BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-8]!", "$Rn = $wback">;
10005  def STILPXpre:   BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0000, (outs GPR64sp:$wback), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn, #-16]!", "$Rn = $wback">;
10006  def STILPW:      BaseLRCPC3IntegerLoadStorePair<0b10, 0b00, 0b0001, (outs), (ins GPR32:$Rt, GPR32:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
10007  def STILPX:      BaseLRCPC3IntegerLoadStorePair<0b11, 0b00, 0b0001, (outs), (ins GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), "stilp", "\t$Rt, $Rt2, [$Rn]", "">;
10008  def LDIAPPWpost: BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0000, (outs GPR64sp:$wback, GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #8", "$Rn = $wback">;
10009  def LDIAPPXpost: BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0000, (outs GPR64sp:$wback, GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn], #16", "$Rn = $wback">;
10010  def LDIAPPW:     BaseLRCPC3IntegerLoadStorePair<0b10, 0b01, 0b0001, (outs GPR32:$Rt, GPR32:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
10011  def LDIAPPX:     BaseLRCPC3IntegerLoadStorePair<0b11, 0b01, 0b0001, (outs GPR64:$Rt, GPR64:$Rt2), (ins GPR64sp0:$Rn), "ldiapp", "\t$Rt, $Rt2, [$Rn]", "">;
10012
10013  def : Pat<(AArch64ldiapp GPR64sp:$Rn), (LDIAPPX GPR64sp:$Rn)>;
10014  def : Pat<(AArch64stilp GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn), (STILPX GPR64:$Rt, GPR64:$Rt2, GPR64sp:$Rn)>;
10015
10016  // Aliases for when offset=0
10017  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPW GPR32: $Rt, GPR32: $Rt2, GPR64sp:$Rn)>;
10018  def : InstAlias<"stilp\t$Rt, $Rt2, [$Rn, #0]", (STILPX GPR64: $Rt, GPR64: $Rt2, GPR64sp:$Rn)>;
10019
10020  //                                         size   opc
10021  def STLRWpre:   BaseLRCPC3IntegerLoadStore<0b10, 0b10, (outs GPR64sp:$wback),            (ins GPR32:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-4]!", "$Rn = $wback">;
10022  def STLRXpre:   BaseLRCPC3IntegerLoadStore<0b11, 0b10, (outs GPR64sp:$wback),            (ins GPR64:$Rt, GPR64sp:$Rn), "stlr",  "\t$Rt, [$Rn, #-8]!", "$Rn = $wback">;
10023  def LDAPRWpost: BaseLRCPC3IntegerLoadStore<0b10, 0b11, (outs GPR64sp:$wback, GPR32:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #4",   "$Rn = $wback">;
10024  def LDAPRXpost: BaseLRCPC3IntegerLoadStore<0b11, 0b11, (outs GPR64sp:$wback, GPR64:$Rt), (ins GPR64sp:$Rn),            "ldapr", "\t$Rt, [$Rn], #8",   "$Rn = $wback">;
10025}
10026
10027let Predicates = [HasRCPC3, HasNEON] in {
10028  //                                              size   opc regtype
10029  defm STLURb:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b00, FPR8  , (outs), (ins FPR8  :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10030  defm STLURh:  LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b00, FPR16 , (outs), (ins FPR16 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10031  defm STLURs:  LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b00, FPR32 , (outs), (ins FPR32 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10032  defm STLURd:  LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b00, FPR64 , (outs), (ins FPR64 :$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10033  defm STLURq:  LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b10, FPR128, (outs), (ins FPR128:$Rt, GPR64sp:$Rn, simm9:$simm), "stlur">;
10034  defm LDAPURb: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b01, FPR8  , (outs FPR8  :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10035  defm LDAPURh: LRCPC3NEONLoadStoreUnscaledOffset<0b01, 0b01, FPR16 , (outs FPR16 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10036  defm LDAPURs: LRCPC3NEONLoadStoreUnscaledOffset<0b10, 0b01, FPR32 , (outs FPR32 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10037  defm LDAPURd: LRCPC3NEONLoadStoreUnscaledOffset<0b11, 0b01, FPR64 , (outs FPR64 :$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10038  defm LDAPURq: LRCPC3NEONLoadStoreUnscaledOffset<0b00, 0b11, FPR128, (outs FPR128:$Rt), (ins GPR64sp:$Rn, simm9:$simm), "ldapur">;
10039
10040  //                                L
10041  def STL1:  LRCPC3NEONLdStSingle<0b0, (outs), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn) , "stl1", "">;
10042  def LDAP1: LRCPC3NEONLdStSingle<0b1, (outs VecListOned:$dst), (ins VecListOned:$Vt, VectorIndexD:$Q, GPR64sp0:$Rn), "ldap1", "$Vt = $dst">;
10043
10044  // Aliases for when offset=0
10045  def : InstAlias<"stl1\t$Vt$Q, [$Rn, #0]", (STL1 VecListOned:$Vt, VectorIndexD:$Q, GPR64sp:$Rn)>;
10046}
10047
10048//===----------------------------------------------------------------------===//
10049// 128-bit System Instructions (FEAT_SYSINSTR128)
10050//===----------------------------------------------------------------------===//
10051let Predicates = [HasD128] in {
10052  def SYSPxt  : SystemPXtI<0, "sysp">;
10053
10054  def SYSPxt_XZR
10055    : BaseSystemI<0, (outs),
10056        (ins imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, SyspXzrPairOperand:$xzr_pair),
10057        "sysp", "\t$op1, $Cn, $Cm, $op2, $xzr_pair">,
10058      Sched<[WriteSys]>
10059  {
10060    // Had to use a custom decoder because tablegen interprets this as having 4 fields (why?)
10061    // and therefore autogenerates a decoder that builds an MC representation that has 4 fields
10062    // (decodeToMCInst), but when printing we expect the MC representation to have 5 fields (one
10063    // extra for the XZR) because AArch64InstPrinter::printInstruction in AArch64GenAsmWriter.inc
10064    // is based off of the asm template (maybe) and therefore wants to print 5 operands.
10065    // I could add a bits<5> xzr_pair. But without a way to constrain it to 0b11111 here it would
10066    // overlap with the main SYSP instruction.
10067    let DecoderMethod = "DecodeSyspXzrInstruction";
10068    bits<3> op1;
10069    bits<4> Cn;
10070    bits<4> Cm;
10071    bits<3> op2;
10072    let Inst{22}    = 0b1; // override BaseSystemI
10073    let Inst{20-19} = 0b01;
10074    let Inst{18-16} = op1;
10075    let Inst{15-12} = Cn;
10076    let Inst{11-8}  = Cm;
10077    let Inst{7-5}   = op2;
10078    let Inst{4-0}   = 0b11111;
10079  }
10080
10081  def : InstAlias<"sysp $op1, $Cn, $Cm, $op2",
10082                  (SYSPxt_XZR imm0_7:$op1, sys_cr_op:$Cn, sys_cr_op:$Cm, imm0_7:$op2, XZR)>;
10083}
10084
10085//---
10086// 128-bit System Registers (FEAT_SYSREG128)
10087//---
10088
10089// Instruction encoding:
10090//
10091//          31       22|21|20|19|18 16|15 12|11 8|7 5|4 0
10092// MRRS      1101010101| 1| 1|o0|  op1|   Cn|  Cm|op2| Rt
10093// MSRR      1101010101| 0| 1|o0|  op1|   Cn|  Cm|op2| Rt
10094
10095// Instruction syntax:
10096//
10097// MRRS <Xt>, <Xt+1>, <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>
10098// MSRR <sysreg|S<op0>_<op1>_<Cn>_<Cm>_<op2>>, <Xt>, <Xt+1>
10099//
10100// ...where t is even (X0, X2, etc).
10101
10102let Predicates = [HasD128] in {
10103  def MRRS : RtSystemI128<1,
10104    (outs MrrsMssrPairClassOperand:$Rt), (ins mrs_sysreg_op:$systemreg),
10105    "mrrs", "\t$Rt, $systemreg">
10106  {
10107    bits<16> systemreg;
10108    let Inst{20-5} = systemreg;
10109  }
10110
10111  def MSRR : RtSystemI128<0,
10112    (outs),  (ins msr_sysreg_op:$systemreg, MrrsMssrPairClassOperand:$Rt),
10113    "msrr", "\t$systemreg, $Rt">
10114  {
10115    bits<16> systemreg;
10116    let Inst{20-5} = systemreg;
10117  }
10118}
10119
10120//===----------------------------===//
10121// 2023 Architecture Extensions:
10122//===----------------------------===//
10123
10124let Predicates = [HasFP8] in {
10125  defm F1CVTL  : SIMDMixedTwoVectorFP8<0b00, "f1cvtl">;
10126  defm F2CVTL  : SIMDMixedTwoVectorFP8<0b01, "f2cvtl">;
10127  defm BF1CVTL : SIMDMixedTwoVectorFP8<0b10, "bf1cvtl">;
10128  defm BF2CVTL : SIMDMixedTwoVectorFP8<0b11, "bf2cvtl">;
10129  defm FCVTN_F16_F8 : SIMDThreeSameSizeVectorCvt<"fcvtn">;
10130  defm FCVTN_F32_F8 : SIMDThreeVectorCvt<"fcvtn">;
10131  defm FSCALE : SIMDThreeSameVectorFP<0b1, 0b1, 0b111, "fscale", null_frag>;
10132} // End let Predicates = [HasFP8]
10133
10134let Predicates = [HasFAMINMAX] in {
10135 defm FAMAX : SIMDThreeSameVectorFP<0b0, 0b1, 0b011, "famax", null_frag>;
10136 defm FAMIN : SIMDThreeSameVectorFP<0b1, 0b1, 0b011, "famin", null_frag>;
10137} // End let Predicates = [HasFAMAXMIN]
10138
10139let Predicates = [HasFP8FMA] in {
10140 defm FMLALBlane : SIMDThreeSameVectorMLAIndex<0b0, "fmlalb">;
10141 defm FMLALTlane : SIMDThreeSameVectorMLAIndex<0b1, "fmlalt">;
10142 defm FMLALLBBlane : SIMDThreeSameVectorMLALIndex<0b0, 0b00, "fmlallbb">;
10143 defm FMLALLBTlane : SIMDThreeSameVectorMLALIndex<0b0, 0b01, "fmlallbt">;
10144 defm FMLALLTBlane : SIMDThreeSameVectorMLALIndex<0b1, 0b00, "fmlalltb">;
10145 defm FMLALLTTlane : SIMDThreeSameVectorMLALIndex<0b1, 0b01, "fmlalltt">;
10146
10147 defm FMLALB : SIMDThreeSameVectorMLA<0b0, "fmlalb">;
10148 defm FMLALT : SIMDThreeSameVectorMLA<0b1, "fmlalt">;
10149 defm FMLALLBB : SIMDThreeSameVectorMLAL<0b0, 0b00, "fmlallbb">;
10150 defm FMLALLBT : SIMDThreeSameVectorMLAL<0b0, 0b01, "fmlallbt">;
10151 defm FMLALLTB : SIMDThreeSameVectorMLAL<0b1, 0b00, "fmlalltb">;
10152 defm FMLALLTT : SIMDThreeSameVectorMLAL<0b1, 0b01, "fmlalltt">;
10153} // End let Predicates = [HasFP8FMA]
10154
10155let Predicates = [HasFP8DOT2] in {
10156 defm FDOTlane : SIMDThreeSameVectorFP8DOT2Index<"fdot">;
10157 defm FDOT : SIMDThreeSameVectorDOT2<"fdot">;
10158} // End let Predicates = [HasFP8DOT2]
10159
10160let Predicates = [HasFP8DOT4] in {
10161 defm FDOTlane : SIMDThreeSameVectorFP8DOT4Index<"fdot">;
10162 defm FDOT : SIMDThreeSameVectorDOT4<"fdot">;
10163} // End let Predicates = [HasFP8DOT4]
10164
10165//===----------------------------------------------------------------------===//
10166// Checked Pointer Arithmetic (FEAT_CPA)
10167//===----------------------------------------------------------------------===//
10168let Predicates = [HasCPA] in {
10169  // Scalar add/subtract
10170  defm ADDPT : AddSubCPA<0, "addpt">;
10171  defm SUBPT : AddSubCPA<1, "subpt">;
10172
10173  // Scalar multiply-add/subtract
10174  def MADDPT : MulAccumCPA<0, "maddpt">;
10175  def MSUBPT : MulAccumCPA<1, "msubpt">;
10176}
10177
10178def round_v4fp32_to_v4bf16 :
10179  OutPatFrag<(ops node:$Rn),
10180             // NaN? Round : Quiet(NaN)
10181             (BSPv16i8 (FCMEQv4f32 $Rn, $Rn),
10182                       (ADDv4i32
10183                         (ADDv4i32 $Rn,
10184                           // Extract the LSB of the fp32 *truncated* to bf16.
10185                           (ANDv16i8 (USHRv4i32_shift V128:$Rn, (i32 16)),
10186                                     (MOVIv4i32 (i32 1), (i32 0)))),
10187                         // Bias which will help us break ties correctly.
10188                         (MOVIv4s_msl (i32 127), (i32 264))),
10189                       // Set the quiet bit in the NaN.
10190                       (ORRv4i32 $Rn, (i32 64), (i32 16)))>;
10191
10192multiclass PromoteUnaryv8f16Tov4f32<SDPatternOperator InOp, Instruction OutInst> {
10193  let Predicates = [HasNoFullFP16] in
10194  def : Pat<(InOp (v8f16 V128:$Rn)),
10195            (v8f16 (FCVTNv8i16
10196              (INSERT_SUBREG (IMPLICIT_DEF),
10197                             (v4f16 (FCVTNv4i16
10198                               (v4f32 (OutInst
10199                                 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))),
10200               dsub),
10201              (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn))))))>;
10202
10203  let Predicates = [HasBF16] in
10204  def : Pat<(InOp (v8bf16 V128:$Rn)),
10205            (v8bf16 (BFCVTN2
10206              (v8bf16 (BFCVTN
10207                (v4f32 (OutInst
10208                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))))))),
10209              (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn))))))>;
10210
10211  let Predicates = [HasNoBF16] in
10212  def : Pat<(InOp (v8bf16 V128:$Rn)),
10213            (UZP2v8i16
10214              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10215                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub))))))),
10216              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10217                  (v4f32 (SHLLv8i16 V128:$Rn))))))>;
10218}
10219defm : PromoteUnaryv8f16Tov4f32<any_fceil,  	FRINTPv4f32>;
10220defm : PromoteUnaryv8f16Tov4f32<any_ffloor, 	FRINTMv4f32>;
10221defm : PromoteUnaryv8f16Tov4f32<any_fnearbyint, FRINTIv4f32>;
10222defm : PromoteUnaryv8f16Tov4f32<any_fround, 	FRINTAv4f32>;
10223defm : PromoteUnaryv8f16Tov4f32<any_froundeven, FRINTNv4f32>;
10224defm : PromoteUnaryv8f16Tov4f32<any_frint,  	FRINTXv4f32>;
10225defm : PromoteUnaryv8f16Tov4f32<any_ftrunc, 	FRINTZv4f32>;
10226
10227multiclass PromoteBinaryv8f16Tov4f32<SDPatternOperator InOp, Instruction OutInst> {
10228  let Predicates = [HasNoFullFP16] in
10229  def : Pat<(InOp (v8f16 V128:$Rn), (v8f16 V128:$Rm)),
10230            (v8f16 (FCVTNv8i16
10231              (INSERT_SUBREG (IMPLICIT_DEF),
10232                             (v4f16 (FCVTNv4i16
10233                               (v4f32 (OutInst
10234                                 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
10235                                 (v4f32 (FCVTLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))),
10236               dsub),
10237              (v4f32 (OutInst (v4f32 (FCVTLv8i16 V128:$Rn)),
10238                              (v4f32 (FCVTLv8i16 V128:$Rm))))))>;
10239
10240  let Predicates = [HasBF16] in
10241  def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)),
10242            (v8bf16 (BFCVTN2
10243              (v8bf16 (BFCVTN
10244                (v4f32 (OutInst
10245                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
10246                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub)))))))),
10247              (v4f32 (OutInst (v4f32 (SHLLv8i16 V128:$Rn)),
10248                              (v4f32 (SHLLv8i16 V128:$Rm))))))>;
10249
10250  let Predicates = [HasNoBF16] in
10251  def : Pat<(InOp (v8bf16 V128:$Rn), (v8bf16 V128:$Rm)),
10252            (UZP2v8i16
10253              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10254                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rn, dsub)))),
10255                  (v4f32 (SHLLv4i16 (v4i16 (EXTRACT_SUBREG V128:$Rm, dsub))))))),
10256              (round_v4fp32_to_v4bf16 (v4f32 (OutInst
10257                  (v4f32 (SHLLv8i16 V128:$Rn)),
10258                  (v4f32 (SHLLv8i16 V128:$Rm))))))>;
10259}
10260defm : PromoteBinaryv8f16Tov4f32<any_fadd, FADDv4f32>;
10261defm : PromoteBinaryv8f16Tov4f32<any_fdiv, FDIVv4f32>;
10262defm : PromoteBinaryv8f16Tov4f32<any_fmul, FMULv4f32>;
10263defm : PromoteBinaryv8f16Tov4f32<any_fsub, FSUBv4f32>;
10264
10265include "AArch64InstrAtomics.td"
10266include "AArch64SVEInstrInfo.td"
10267include "AArch64SMEInstrInfo.td"
10268include "AArch64InstrGISel.td"
10269