xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV1.td (revision e1e636193db45630c7881246d25902e57c43d24e)
1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V1 processors.
10//
11// References:
12// - "Arm Neoverse V1 Software Optimization Guide"
13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing"
14//   https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm
15// - "Neoverse V1"
16//   https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1
17
18//
19//===----------------------------------------------------------------------===//
20
21def NeoverseV1Model : SchedMachineModel {
22  let IssueWidth            =  15; // Maximum micro-ops dispatch rate.
23  let MicroOpBufferSize     = 256; // Micro-op re-order buffer.
24  let LoadLatency           =   4; // Optimistic load latency.
25  let MispredictPenalty     =  11; // Cycles cost of branch mispredicted.
26  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
27  let CompleteModel         =   1;
28
29  list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F,
30                                                    SMEUnsupported.F,
31                                                    [HasMTE, HasCPA,
32                                                    HasCSSC]);
33}
34
35//===----------------------------------------------------------------------===//
36// Define each kind of processor resource and number available on Neoverse V1.
37// Instructions are first fetched and then decoded into internal macro-ops
38// (MOPs).  From there, the MOPs proceed through register renaming and dispatch
39// stages.  A MOP can be split into one or more micro-ops further down the
40// pipeline, after the decode stage.  Once dispatched, micro-ops wait for their
41// operands and issue out-of-order to one of the issue pipelines.  Each issue
42// pipeline can accept one micro-op per cycle.
43
44let SchedModel = NeoverseV1Model in {
45
46// Define the issue ports.
47def V1UnitB   : ProcResource<2>;  // Branch 0/1
48def V1UnitS   : ProcResource<2>;  // Integer single cycle 0/1
49def V1UnitM0  : ProcResource<1>;  // Integer multicycle 0
50def V1UnitM1  : ProcResource<1>;  // Integer multicycle 1
51def V1UnitL01 : ProcResource<2>;  // Load/Store 0/1
52def V1UnitL2  : ProcResource<1>;  // Load 2
53def V1UnitD   : ProcResource<2>;  // Store data 0/1
54def V1UnitV0  : ProcResource<1>;  // FP/ASIMD 0
55def V1UnitV1  : ProcResource<1>;  // FP/ASIMD 1
56def V1UnitV2  : ProcResource<1>;  // FP/ASIMD 2
57def V1UnitV3  : ProcResource<1>;  // FP/ASIMD 3
58
59def V1UnitI   : ProcResGroup<[V1UnitS,
60                              V1UnitM0, V1UnitM1]>;   // Integer units
61def V1UnitJ   : ProcResGroup<[V1UnitS, V1UnitM0]>;    // Integer 0-2 units
62def V1UnitM   : ProcResGroup<[V1UnitM0, V1UnitM1]>;   // Integer multicycle units
63def V1UnitL   : ProcResGroup<[V1UnitL01, V1UnitL2]>;  // Load units
64def V1UnitV   : ProcResGroup<[V1UnitV0, V1UnitV1,
65                              V1UnitV2, V1UnitV3]>;   // FP/ASIMD units
66def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>;   // FP/ASIMD 0/1 units
67def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>;   // FP/ASIMD 0/2 units
68def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>;   // FP/ASIMD 1/3 units
69
70// Define commonly used read types.
71
72// No generic forwarding is provided for these types.
73def : ReadAdvance<ReadI,       0>;
74def : ReadAdvance<ReadISReg,   0>;
75def : ReadAdvance<ReadIEReg,   0>;
76def : ReadAdvance<ReadIM,      0>;
77def : ReadAdvance<ReadIMA,     0>;
78def : ReadAdvance<ReadID,      0>;
79def : ReadAdvance<ReadExtrHi,  0>;
80def : ReadAdvance<ReadAdrBase, 0>;
81def : ReadAdvance<ReadST,      0>;
82def : ReadAdvance<ReadVLD,     0>;
83
84def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
85def : WriteRes<WriteBarrier, []> { let Latency = 1; }
86def : WriteRes<WriteHint,    []> { let Latency = 1; }
87
88
89//===----------------------------------------------------------------------===//
90// Define generic 0 micro-op types
91
92let Latency = 0, NumMicroOps = 0 in
93def V1Write_0c_0Z : SchedWriteRes<[]>;
94
95
96//===----------------------------------------------------------------------===//
97// Define generic 1 micro-op types
98
99def V1Write_1c_1B      : SchedWriteRes<[V1UnitB]>   { let Latency = 1; }
100def V1Write_1c_1I      : SchedWriteRes<[V1UnitI]>   { let Latency = 1; }
101def V1Write_1c_1J      : SchedWriteRes<[V1UnitJ]>   { let Latency = 1; }
102def V1Write_4c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 4; }
103def V1Write_6c_1L      : SchedWriteRes<[V1UnitL]>   { let Latency = 6; }
104def V1Write_1c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 1; }
105def V1Write_4c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 4; }
106def V1Write_6c_1L01    : SchedWriteRes<[V1UnitL01]> { let Latency = 6; }
107def V1Write_2c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 2; }
108def V1Write_3c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 3; }
109def V1Write_4c_1M      : SchedWriteRes<[V1UnitM]>   { let Latency = 4; }
110def V1Write_1c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 1; }
111def V1Write_2c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 2; }
112def V1Write_3c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 3; }
113def V1Write_5c_1M0     : SchedWriteRes<[V1UnitM0]>  { let Latency = 5; }
114def V1Write_12c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 12;
115                                                      let ReleaseAtCycles = [5]; }
116def V1Write_20c5_1M0   : SchedWriteRes<[V1UnitM0]>  { let Latency = 20;
117                                                      let ReleaseAtCycles = [5]; }
118def V1Write_2c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 2; }
119def V1Write_3c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 3; }
120def V1Write_4c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 4; }
121def V1Write_5c_1V      : SchedWriteRes<[V1UnitV]>   { let Latency = 5; }
122def V1Write_2c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 2; }
123def V1Write_3c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 3; }
124def V1Write_4c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 4; }
125def V1Write_6c_1V0     : SchedWriteRes<[V1UnitV0]>  { let Latency = 6; }
126def V1Write_10c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 10;
127                                                      let ReleaseAtCycles = [7]; }
128def V1Write_12c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 12;
129                                                      let ReleaseAtCycles = [7]; }
130def V1Write_13c10_1V0  : SchedWriteRes<[V1UnitV0]>  { let Latency = 13;
131                                                      let ReleaseAtCycles = [10]; }
132def V1Write_15c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 15;
133                                                      let ReleaseAtCycles = [7]; }
134def V1Write_16c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 16;
135                                                      let ReleaseAtCycles = [7]; }
136def V1Write_20c7_1V0   : SchedWriteRes<[V1UnitV0]>  { let Latency = 20;
137                                                      let ReleaseAtCycles = [7]; }
138def V1Write_2c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 2; }
139def V1Write_3c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 3; }
140def V1Write_4c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 4; }
141def V1Write_5c_1V01    : SchedWriteRes<[V1UnitV01]> { let Latency = 5; }
142def V1Write_3c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 3; }
143def V1Write_4c_1V02    : SchedWriteRes<[V1UnitV02]> { let Latency = 4; }
144def V1Write_7c7_1V02   : SchedWriteRes<[V1UnitV02]> { let Latency = 7;
145                                                      let ReleaseAtCycles = [7]; }
146def V1Write_10c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 10;
147                                                      let ReleaseAtCycles = [7]; }
148def V1Write_13c5_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
149                                                      let ReleaseAtCycles = [5]; }
150def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13;
151                                                      let ReleaseAtCycles = [11]; }
152def V1Write_15c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 15;
153                                                      let ReleaseAtCycles = [7]; }
154def V1Write_16c7_1V02  : SchedWriteRes<[V1UnitV02]> { let Latency = 16;
155                                                      let ReleaseAtCycles = [7]; }
156def V1Write_2c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 2; }
157def V1Write_3c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 3; }
158def V1Write_4c_1V1     : SchedWriteRes<[V1UnitV1]>  { let Latency = 4; }
159def V1Write_2c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 2; }
160def V1Write_4c_1V13    : SchedWriteRes<[V1UnitV13]> { let Latency = 4; }
161
162//===----------------------------------------------------------------------===//
163// Define generic 2 micro-op types
164
165let Latency = 1, NumMicroOps = 2 in
166def V1Write_1c_1B_1S     : SchedWriteRes<[V1UnitB, V1UnitS]>;
167let Latency = 6, NumMicroOps = 2 in
168def V1Write_6c_1B_1M0    : SchedWriteRes<[V1UnitB, V1UnitM0]>;
169let Latency = 3, NumMicroOps = 2 in
170def V1Write_3c_1I_1M     : SchedWriteRes<[V1UnitI, V1UnitM]>;
171let Latency = 5, NumMicroOps = 2 in
172def V1Write_5c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
173let Latency = 7, NumMicroOps = 2 in
174def V1Write_7c_1I_1L     : SchedWriteRes<[V1UnitI, V1UnitL]>;
175let Latency = 6, NumMicroOps = 2 in
176def V1Write_6c_2L        : SchedWriteRes<[V1UnitL, V1UnitL]>;
177let Latency = 6, NumMicroOps = 2 in
178def V1Write_6c_1L_1M     : SchedWriteRes<[V1UnitL, V1UnitM]>;
179let Latency = 8, NumMicroOps = 2 in
180def V1Write_8c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
181let Latency = 9, NumMicroOps = 2 in
182def V1Write_9c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
183let Latency = 11, NumMicroOps = 2 in
184def V1Write_11c_1L_1V     : SchedWriteRes<[V1UnitL, V1UnitV]>;
185let Latency = 1, NumMicroOps = 2 in
186def V1Write_1c_1L01_1D   : SchedWriteRes<[V1UnitL01, V1UnitD]>;
187let Latency = 6, NumMicroOps = 2 in
188def V1Write_6c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
189let Latency = 7, NumMicroOps = 2 in
190def V1Write_7c_1L01_1S   : SchedWriteRes<[V1UnitL01, V1UnitS]>;
191let Latency = 2, NumMicroOps = 2 in
192def V1Write_2c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
193let Latency = 4, NumMicroOps = 2 in
194def V1Write_4c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
195let Latency = 6, NumMicroOps = 2 in
196def V1Write_6c_1L01_1V   : SchedWriteRes<[V1UnitL01, V1UnitV]>;
197let Latency = 2, NumMicroOps = 2 in
198def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
199let Latency = 4, NumMicroOps = 2 in
200def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>;
201let Latency = 2, NumMicroOps = 2 in
202def V1Write_2c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
203let Latency = 3, NumMicroOps = 2 in
204def V1Write_3c_2M0       : SchedWriteRes<[V1UnitM0, V1UnitM0]>;
205let Latency = 9, NumMicroOps = 2 in
206def V1Write_9c_1M0_1L    : SchedWriteRes<[V1UnitM0, V1UnitL]>;
207let Latency = 5, NumMicroOps = 2 in
208def V1Write_5c_1M0_1V    : SchedWriteRes<[V1UnitM0, V1UnitV]>;
209let Latency = 4, NumMicroOps = 2 in
210def V1Write_4c_1M0_1V0    : SchedWriteRes<[V1UnitM0, V1UnitV0]>;
211let Latency = 7, NumMicroOps = 2 in
212def V1Write_7c_1M0_1V0   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
213let Latency = 5, NumMicroOps = 2 in
214def V1Write_5c_1M0_1V01    : SchedWriteRes<[V1UnitM0, V1UnitV01]>;
215let Latency = 6, NumMicroOps = 2 in
216def V1Write_6c_1M0_1V1   : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
217let Latency = 9, NumMicroOps = 2 in
218def V1Write_9c_1M0_1V1    : SchedWriteRes<[V1UnitM0, V1UnitV1]>;
219let Latency = 4, NumMicroOps = 2 in
220def V1Write_4c_2V        : SchedWriteRes<[V1UnitV, V1UnitV]>;
221let Latency = 8, NumMicroOps = 2 in
222def V1Write_8c_1V_1V01   : SchedWriteRes<[V1UnitV, V1UnitV01]>;
223let Latency = 4, NumMicroOps = 2 in
224def V1Write_4c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
225let Latency = 5, NumMicroOps = 2 in
226def V1Write_5c_2V0       : SchedWriteRes<[V1UnitV0, V1UnitV0]>;
227let Latency = 2, NumMicroOps = 2 in
228def V1Write_2c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
229let Latency = 4, NumMicroOps = 2 in
230def V1Write_4c_2V01      : SchedWriteRes<[V1UnitV01, V1UnitV01]>;
231let Latency = 4, NumMicroOps = 2 in
232def V1Write_4c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
233let Latency = 6, NumMicroOps = 2 in
234def V1Write_6c_2V02      : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
235let Latency = 4, NumMicroOps = 2 in
236def V1Write_4c_1V13_1V   : SchedWriteRes<[V1UnitV13, V1UnitV]>;
237let Latency = 4, NumMicroOps = 2 in
238def V1Write_4c_2V13      : SchedWriteRes<[V1UnitV13, V1UnitV13]>;
239
240//===----------------------------------------------------------------------===//
241// Define generic 3 micro-op types
242
243let Latency = 2, NumMicroOps = 3 in
244def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>;
245let Latency = 7, NumMicroOps = 3 in
246def V1Write_7c_2M0_1V01     : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>;
247let Latency = 8, NumMicroOps = 3 in
248def V1Write_8c_1L_2V        : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>;
249let Latency = 6, NumMicroOps = 3 in
250def V1Write_6c_3L           : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>;
251let Latency = 2, NumMicroOps = 3 in
252def V1Write_2c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
253let Latency = 4, NumMicroOps = 3 in
254def V1Write_4c_1L01_1S_1V   : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>;
255let Latency = 2, NumMicroOps = 3 in
256def V1Write_2c_2L01_1V01    : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>;
257let Latency = 6, NumMicroOps = 3 in
258def V1Write_6c_3V           : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>;
259let Latency = 4, NumMicroOps = 3 in
260def V1Write_4c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
261let Latency = 6, NumMicroOps = 3 in
262def V1Write_6c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
263let Latency = 8, NumMicroOps = 3 in
264def V1Write_8c_3V01         : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>;
265
266//===----------------------------------------------------------------------===//
267// Define generic 4 micro-op types
268
269let Latency = 8, NumMicroOps = 4 in
270def V1Write_8c_2M0_2V0   : SchedWriteRes<[V1UnitM0, V1UnitM0,
271                                          V1UnitV0, V1UnitV0]>;
272let Latency = 7, NumMicroOps = 4 in
273def V1Write_7c_4L        : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>;
274let Latency = 8, NumMicroOps = 4 in
275def V1Write_8c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
276                                             V1UnitV, V1UnitV]>;
277let Latency = 9, NumMicroOps = 4 in
278def V1Write_9c_2L_2V        : SchedWriteRes<[V1UnitL, V1UnitL,
279                                             V1UnitV, V1UnitV]>;
280let Latency = 11, NumMicroOps = 4 in
281def V1Write_11c_2L_2V       : SchedWriteRes<[V1UnitL, V1UnitL,
282                                             V1UnitV, V1UnitV]>;
283let Latency = 10, NumMicroOps = 4 in
284def V1Write_10c_2L01_2V     : SchedWriteRes<[V1UnitL01, V1UnitL01,
285                                             V1UnitV, V1UnitV]>;
286let Latency = 2, NumMicroOps = 4 in
287def V1Write_2c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
288                                             V1UnitV01, V1UnitV01]>;
289let Latency = 4, NumMicroOps = 4 in
290def V1Write_4c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
291                                             V1UnitV01, V1UnitV01]>;
292let Latency = 8, NumMicroOps = 4 in
293def V1Write_8c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
294                                             V1UnitV01, V1UnitV01]>;
295let Latency = 9, NumMicroOps = 4 in
296def V1Write_9c_2L01_2V01    : SchedWriteRes<[V1UnitL01, V1UnitL01,
297                                             V1UnitV01, V1UnitV01]>;
298let Latency = 10, NumMicroOps = 4 in
299def V1Write_10c_2L01_2V01   : SchedWriteRes<[V1UnitL01, V1UnitL01,
300                                             V1UnitV01, V1UnitV01]>;
301let Latency = 10, NumMicroOps = 4 in
302def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
303                                             V1UnitV1, V1UnitV1]>;
304let Latency = 12, NumMicroOps = 4 in
305def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01,
306                                             V1UnitV1, V1UnitV1]>;
307let Latency = 6, NumMicroOps = 4 in
308def V1Write_6c_4V0          : SchedWriteRes<[V1UnitV0, V1UnitV0,
309                                             V1UnitV0, V1UnitV0]>;
310let Latency = 12, NumMicroOps = 4 in
311def V1Write_12c_4V01        : SchedWriteRes<[V1UnitV01, V1UnitV01,
312                                             V1UnitV01, V1UnitV01]>;
313let Latency = 6, NumMicroOps = 4 in
314def V1Write_6c_4V02         : SchedWriteRes<[V1UnitV02, V1UnitV02]>;
315
316//===----------------------------------------------------------------------===//
317// Define generic 5 micro-op types
318
319let Latency = 8, NumMicroOps = 5 in
320def V1Write_8c_2L_3V            : SchedWriteRes<[V1UnitL, V1UnitL,
321                                                 V1UnitV, V1UnitV, V1UnitV]>;
322let Latency = 14, NumMicroOps = 5 in
323def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV,
324                                                 V1UnitV0,
325                                                 V1UnitV1, V1UnitV1,
326                                                 V1UnitV13]>;
327let Latency = 9, NumMicroOps = 5 in
328def V1Write_9c_1V_4V01          : SchedWriteRes<[V1UnitV,
329                                                 V1UnitV01, V1UnitV01,
330                                                 V1UnitV01, V1UnitV01]>;
331let Latency = 6, NumMicroOps = 5 in
332def V1Write_6c_5V01             : SchedWriteRes<[V1UnitV01, V1UnitV01,
333                                                 V1UnitV01, V1UnitV01, V1UnitV01]>;
334
335//===----------------------------------------------------------------------===//
336// Define generic 6 micro-op types
337
338let Latency = 6, NumMicroOps = 6 in
339def V1Write_6c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
340                                           V1UnitV, V1UnitV, V1UnitV]>;
341let Latency = 8, NumMicroOps = 6 in
342def V1Write_8c_3L_3V      : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
343                                           V1UnitV, V1UnitV, V1UnitV]>;
344let Latency = 2, NumMicroOps = 6 in
345def V1Write_2c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
346                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
347let Latency = 5, NumMicroOps = 6 in
348def V1Write_5c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
349                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
350let Latency = 6, NumMicroOps = 6 in
351def V1Write_6c_3L01_3V01  : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
352                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
353let Latency = 11, NumMicroOps = 6 in
354def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
355                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
356let Latency = 11, NumMicroOps = 6 in
357def V1Write_11c_1V_5V01   : SchedWriteRes<[V1UnitV,
358                                           V1UnitV01, V1UnitV01,
359                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
360let Latency = 13, NumMicroOps = 6 in
361def V1Write_13c_6V01      : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01,
362                                           V1UnitV01, V1UnitV01, V1UnitV01]>;
363
364//===----------------------------------------------------------------------===//
365// Define generic 7 micro-op types
366
367let Latency = 8, NumMicroOps = 7 in
368def V1Write_8c_3L_4V         : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL,
369                                              V1UnitV, V1UnitV, V1UnitV, V1UnitV]>;
370let Latency = 8, NumMicroOps = 7 in
371def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
372                                              V1UnitS,
373                                              V1UnitV01, V1UnitV01, V1UnitV01]>;
374
375//===----------------------------------------------------------------------===//
376// Define generic 8 micro-op types
377
378let Latency = 9, NumMicroOps = 8 in
379def V1Write_9c_4L_4V      : SchedWriteRes<[V1UnitL, V1UnitL,
380                                           V1UnitL, V1UnitL,
381                                           V1UnitV, V1UnitV,
382                                           V1UnitV, V1UnitV]>;
383let Latency = 2, NumMicroOps = 8 in
384def V1Write_2c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
385                                           V1UnitL01, V1UnitL01,
386                                           V1UnitV01, V1UnitV01,
387                                           V1UnitV01, V1UnitV01]>;
388let Latency = 4, NumMicroOps = 8 in
389def V1Write_4c_4L01_4V01  : SchedWriteRes<[V1UnitL01, V1UnitL01,
390                                           V1UnitL01, V1UnitL01,
391                                           V1UnitV01, V1UnitV01,
392                                           V1UnitV01, V1UnitV01]>;
393let Latency = 12, NumMicroOps = 8 in
394def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
395                                           V1UnitL01, V1UnitL01,
396                                           V1UnitV01, V1UnitV01,
397                                           V1UnitV01, V1UnitV01]>;
398
399//===----------------------------------------------------------------------===//
400// Define generic 10 micro-op types
401
402let Latency = 13, NumMicroOps = 10 in
403def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01,
404                                              V1UnitL01, V1UnitL01,
405                                              V1UnitS, V1UnitS,
406                                              V1UnitV01, V1UnitV01,
407                                              V1UnitV01, V1UnitV01]>;
408let Latency = 7, NumMicroOps = 10 in
409def V1Write_7c_5L01_5V       : SchedWriteRes<[V1UnitL01, V1UnitL01,
410                                              V1UnitL01, V1UnitL01, V1UnitL01,
411                                              V1UnitV, V1UnitV,
412                                              V1UnitV, V1UnitV, V1UnitV]>;
413let Latency = 11, NumMicroOps = 10 in
414def V1Write_11c_10V0         : SchedWriteRes<[V1UnitV0,
415                                              V1UnitV0, V1UnitV0, V1UnitV0,
416                                              V1UnitV0, V1UnitV0, V1UnitV0,
417                                              V1UnitV0, V1UnitV0, V1UnitV0]>;
418
419//===----------------------------------------------------------------------===//
420// Define generic 12 micro-op types
421
422let Latency = 7, NumMicroOps = 12 in
423def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
424                                          V1UnitL01, V1UnitL01, V1UnitL01,
425                                          V1UnitV01, V1UnitV01, V1UnitV01,
426                                          V1UnitV01, V1UnitV01, V1UnitV01]>;
427
428//===----------------------------------------------------------------------===//
429// Define generic 15 micro-op types
430
431let Latency = 7, NumMicroOps = 15 in
432def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01,
433                                           V1UnitL01, V1UnitL01, V1UnitL01,
434                                           V1UnitS, V1UnitS,
435                                           V1UnitS, V1UnitS, V1UnitS,
436                                           V1UnitV, V1UnitV,
437                                           V1UnitV, V1UnitV, V1UnitV]>;
438
439
440//===----------------------------------------------------------------------===//
441// Define generic 18 micro-op types
442
443let Latency = 19, NumMicroOps = 18 in
444def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
445                                         V1UnitL01, V1UnitL01, V1UnitL01,
446                                         V1UnitL01, V1UnitL01, V1UnitL01,
447                                         V1UnitV, V1UnitV, V1UnitV,
448                                         V1UnitV, V1UnitV, V1UnitV,
449                                         V1UnitV, V1UnitV, V1UnitV]>;
450let Latency = 19, NumMicroOps = 18 in
451def V1Write_19c_18V0    : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0,
452                                         V1UnitV0, V1UnitV0, V1UnitV0,
453                                         V1UnitV0, V1UnitV0, V1UnitV0,
454                                         V1UnitV0, V1UnitV0, V1UnitV0,
455                                         V1UnitV0, V1UnitV0, V1UnitV0,
456                                         V1UnitV0, V1UnitV0, V1UnitV0]>;
457
458//===----------------------------------------------------------------------===//
459// Define generic 27 micro-op types
460
461let Latency = 11, NumMicroOps = 27 in
462def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01,
463                                            V1UnitL01, V1UnitL01, V1UnitL01,
464                                            V1UnitL01, V1UnitL01, V1UnitL01,
465                                            V1UnitS, V1UnitS, V1UnitS,
466                                            V1UnitS, V1UnitS, V1UnitS,
467                                            V1UnitS, V1UnitS, V1UnitS,
468                                            V1UnitV, V1UnitV, V1UnitV,
469                                            V1UnitV, V1UnitV, V1UnitV,
470                                            V1UnitV, V1UnitV, V1UnitV]>;
471
472
473// Miscellaneous Instructions
474// -----------------------------------------------------------------------------
475
476// COPY
477def : InstRW<[V1Write_1c_1I], (instrs COPY)>;
478
479// MSR
480def : WriteRes<WriteSys, []> { let Latency = 1; }
481
482
483// Branch Instructions
484// -----------------------------------------------------------------------------
485
486// Branch, immed
487// Compare and branch
488def : SchedAlias<WriteBr, V1Write_1c_1B>;
489
490// Branch, register
491def : SchedAlias<WriteBrReg, V1Write_1c_1B>;
492
493// Branch and link, immed
494// Branch and link, register
495def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>;
496
497// Compare and branch
498def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>;
499
500
501// Arithmetic and Logical Instructions
502// -----------------------------------------------------------------------------
503
504// ALU, basic
505// Conditional compare
506// Conditional select
507// Logical, basic
508// Address generation
509// Count leading
510// Reverse bits/bytes
511// Move immediate
512def : SchedAlias<WriteI, V1Write_1c_1I>;
513
514// ALU, basic, flagset
515def : InstRW<[V1Write_1c_1J],
516             (instregex "^(ADD|SUB)S[WX]r[ir]$",
517                        "^(ADC|SBC)S[WX]r$",
518                        "^ANDS[WX]ri$",
519                        "^(AND|BIC)S[WX]rr$")>;
520
521// ALU, extend and shift
522def : SchedAlias<WriteIEReg, V1Write_2c_1M>;
523
524// Arithmetic, LSL shift, shift <= 4
525// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
526def V1WriteISReg : SchedWriteVariant<
527                     [SchedVar<IsCheapLSL,  [V1Write_1c_1I]>,
528                      SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
529def              : SchedAlias<WriteISReg, V1WriteISReg>;
530
531// Arithmetic, flagset, LSL shift, shift <= 4
532// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4
533def V1WriteISRegS : SchedWriteVariant<
534                      [SchedVar<IsCheapLSL,  [V1Write_1c_1J]>,
535                       SchedVar<NoSchedPred, [V1Write_2c_1M]>]>;
536def               : InstRW<[V1WriteISRegS],
537                           (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>;
538
539// Logical, shift, no flagset
540def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
541
542// Logical, shift, flagset
543def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>;
544
545// Flag manipulation instructions
546def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>;
547
548
549// Divide and multiply instructions
550// -----------------------------------------------------------------------------
551
552// Divide
553def : SchedAlias<WriteID32, V1Write_12c5_1M0>;
554def : SchedAlias<WriteID64, V1Write_20c5_1M0>;
555
556// Multiply
557// Multiply accumulate
558// Multiply accumulate, long
559// Multiply long
560def V1WriteIM : SchedWriteVariant<
561                  [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>,
562                   SchedVar<NoSchedPred,          [V1Write_2c_1M0]>]>;
563def           : SchedAlias<WriteIM32, V1WriteIM>;
564def           : SchedAlias<WriteIM64, V1WriteIM>;
565
566// Multiply high
567def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>;
568
569
570// Pointer Authentication Instructions (v8.3 PAC)
571// -----------------------------------------------------------------------------
572
573// Authenticate data address
574// Authenticate instruction address
575// Compute pointer authentication code for data address
576// Compute pointer authentication code, using generic key
577// Compute pointer authentication code for instruction address
578def : InstRW<[V1Write_5c_1M0], (instregex "^AUT",
579                                          "^PAC")>;
580
581// Branch and link, register, with pointer authentication
582// Branch, register, with pointer authentication
583// Branch, return, with pointer authentication
584def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$",
585                                             "^E?RETA[AB]$")>;
586
587// Load register, with pointer authentication
588def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
589
590// Strip pointer authentication code
591def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>;
592
593
594// Miscellaneous data-processing instructions
595// -----------------------------------------------------------------------------
596
597// Bitfield extract, one reg
598// Bitfield extract, two regs
599def V1WriteExtr : SchedWriteVariant<
600                    [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>,
601                     SchedVar<NoSchedPred,       [V1Write_3c_1I_1M]>]>;
602def : SchedAlias<WriteExtr, V1WriteExtr>;
603
604// Bitfield move, basic
605// Variable shift
606def : SchedAlias<WriteIS, V1Write_1c_1I>;
607
608// Bitfield move, insert
609def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>;
610
611// Move immediate
612def : SchedAlias<WriteImm, V1Write_1c_1I>;
613
614
615// Load instructions
616// -----------------------------------------------------------------------------
617
618// Load register, immed offset
619def : SchedAlias<WriteLD, V1Write_4c_1L>;
620
621// Load register, immed offset, index
622def : SchedAlias<WriteLDIdx, V1Write_4c_1L>;
623def : SchedAlias<WriteAdr,   V1Write_1c_1I>;
624
625// Load pair, immed offset
626def : SchedAlias<WriteLDHi, V1Write_4c_1L>;
627def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>;
628def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z],
629             (instrs LDPWpost, LDPWpre)>;
630
631// Load pair, signed immed offset, signed words
632def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>;
633
634// Load pair, immed post or pre-index, signed words
635def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z],
636             (instrs LDPSWpost, LDPSWpre)>;
637
638
639// Store instructions
640// -----------------------------------------------------------------------------
641
642// Store register, immed offset
643def : SchedAlias<WriteST, V1Write_1c_1L01_1D>;
644
645// Store register, immed offset, index
646def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>;
647
648// Store pair, immed offset
649def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>;
650
651
652// FP data processing instructions
653// -----------------------------------------------------------------------------
654
655// FP absolute value
656// FP arithmetic
657// FP min/max
658// FP negate
659def : SchedAlias<WriteF, V1Write_2c_1V>;
660
661// FP compare
662def : SchedAlias<WriteFCmp, V1Write_2c_1V0>;
663
664// FP divide
665// FP square root
666def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>;
667
668// FP divide, H-form
669// FP square root, H-form
670def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>;
671
672// FP divide, S-form
673// FP square root, S-form
674def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>;
675
676// FP divide, D-form
677def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>;
678
679// FP square root, D-form
680def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>;
681
682// FP multiply
683def : SchedAlias<WriteFMul, V1Write_3c_1V>;
684
685// FP multiply accumulate
686def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
687
688// FP round to integral
689def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
690                                           "^FRINT(32|64)[XZ][SD]r$")>;
691
692// FP select
693def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>;
694
695
696// FP miscellaneous instructions
697// -----------------------------------------------------------------------------
698
699// FP convert, from gen to vec reg
700def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
701
702// FP convert, from vec to gen reg
703def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
704
705// FP convert, Javascript from vec to gen reg
706def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>;
707
708// FP convert, from vec to vec reg
709def : SchedAlias<WriteFCvt, V1Write_3c_1V02>;
710
711// FP move, immed
712def : SchedAlias<WriteFImm, V1Write_2c_1V>;
713
714// FP move, register
715def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>;
716
717// FP transfer, from gen to low half of vec reg
718def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
719
720// FP transfer, from gen to high half of vec reg
721def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>;
722
723// FP transfer, from vec to gen reg
724def : SchedAlias<WriteFCopy, V1Write_2c_1V1>;
725
726
727// FP load instructions
728// -----------------------------------------------------------------------------
729
730// Load vector reg, literal, S/D/Q forms
731// Load vector reg, unscaled immed
732// Load vector reg, unsigned immed
733def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$",
734                                                      "^LDUR[BHSDQ]i$",
735                                                      "^LDR[BHSDQ]ui$")>;
736
737// Load vector reg, immed post-index
738// Load vector reg, immed pre-index
739def : InstRW<[WriteAdr, V1Write_6c_1L],
740             (instregex "^LDR[BHSDQ](post|pre)$")>;
741
742// Load vector reg, register offset, basic
743// Load vector reg, register offset, scale, S/D-form
744// Load vector reg, register offset, extend
745// Load vector reg, register offset, extend, scale, S/D-form
746def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
747
748// Load vector reg, register offset, scale, H/Q-form
749// Load vector reg, register offset, extend, scale, H/Q-form
750def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
751
752// Load vector pair, immed offset, S/D-form
753def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>;
754
755// Load vector pair, immed offset, Q-form
756def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
757
758// Load vector pair, immed post-index, S/D-form
759// Load vector pair, immed pre-index, S/D-form
760def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z],
761             (instregex "^LDP[SD](pre|post)$")>;
762
763// Load vector pair, immed post-index, Q-form
764// Load vector pair, immed pre-index, Q-form
765def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi],
766             (instrs LDPQpost, LDPQpre)>;
767
768
769// FP store instructions
770// -----------------------------------------------------------------------------
771
772// Store vector reg, unscaled immed, B/H/S/D/Q-form
773def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
774
775// Store vector reg, immed post-index, B/H/S/D/Q-form
776// Store vector reg, immed pre-index, B/H/S/D/Q-form
777def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
778             (instregex "^STR[BHSDQ](pre|post)$")>;
779
780// Store vector reg, unsigned immed, B/H/S/D/Q-form
781def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
782
783// Store vector reg, register offset, basic, B/S/D-form
784// Store vector reg, register offset, scale, B/S/D-form
785// Store vector reg, register offset, extend, B/S/D-form
786// Store vector reg, register offset, extend, scale, B/S/D-form
787def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase],
788             (instregex "^STR[BSD]ro[WX]$")>;
789
790// Store vector reg, register offset, basic, H/Q-form
791// Store vector reg, register offset, scale, H/Q-form
792// Store vector reg, register offset, extend, H/Q-form
793// Store vector reg, register offset, extend, scale, H/Q-form
794def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase],
795             (instregex "^STR[HQ]ro[WX]$")>;
796
797// Store vector pair, immed offset, S/D/Q-form
798def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>;
799
800// Store vector pair, immed post-index, S/D-form
801// Store vector pair, immed pre-index, S/D-form
802def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
803             (instregex "^STP[SD](pre|post)$")>;
804
805// Store vector pair, immed post-index, Q-form
806// Store vector pair, immed pre-index, Q-form
807def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>;
808
809
810// ASIMD integer instructions
811// -----------------------------------------------------------------------------
812
813// ASIMD absolute diff
814// ASIMD absolute diff long
815// ASIMD arith, basic
816// ASIMD arith, complex
817// ASIMD arith, pair-wise
818// ASIMD compare
819// ASIMD logical
820// ASIMD max/min, basic and pair-wise
821def : SchedAlias<WriteVd, V1Write_2c_1V>;
822def : SchedAlias<WriteVq, V1Write_2c_1V>;
823
824// ASIMD absolute diff accum
825// ASIMD absolute diff accum long
826// ASIMD pairwise add and accumulate long
827def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>;
828
829// ASIMD arith, reduce, 4H/4S
830// ASIMD max/min, reduce, 4H/4S
831def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$",
832                                           "^[SU](MAX|MIN)Vv4(i16|i32)v$")>;
833
834// ASIMD arith, reduce, 8B/8H
835// ASIMD max/min, reduce, 8B/8H
836def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$",
837                                              "^[SU](MAX|MIN)Vv8(i8|i16)v$")>;
838
839// ASIMD arith, reduce, 16B
840// ASIMD max/min, reduce, 16B
841def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$",
842                                           "[SU](MAX|MIN)Vv16i8v$")>;
843
844// ASIMD dot product
845// ASIMD dot product using signed and unsigned integers
846def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>;
847
848// ASIMD matrix multiply- accumulate
849def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>;
850
851// ASIMD multiply
852// ASIMD multiply accumulate
853// ASIMD multiply accumulate long
854// ASIMD multiply accumulate high
855// ASIMD multiply accumulate saturating long
856def : InstRW<[V1Write_4c_1V02],
857             (instregex "^MUL(v[148]i16|v[124]i32)$",
858                        "^SQR?DMULH(v[48]i16|v[24]i32)$",
859                        "^ML[AS](v[148]i16|v[124]i32)$",
860                        "^[SU]ML[AS]Lv",
861                        "^SQRDML[AS]H(v[148]i16|v[124]i32)$",
862                        "^SQDML[AS]Lv")>;
863
864// ASIMD multiply/multiply long (8x8) polynomial
865def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>;
866
867// ASIMD multiply long
868def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>;
869
870// ASIMD shift accumulate
871// ASIMD shift by immed, complex
872// ASIMD shift by register, complex
873def : InstRW<[V1Write_4c_1V13],
874             (instregex "^[SU]R?SRAv",
875                        "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$",
876                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
877                        "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv",
878                        "^[SU]Q?RSHLv", "^[SU]QSHLv")>;
879
880// ASIMD shift by immed, basic
881// ASIMD shift by immed and insert, basic
882// ASIMD shift by register, basic
883def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv",
884                                          "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>;
885
886
887// ASIMD FP instructions
888// -----------------------------------------------------------------------------
889
890// ASIMD FP absolute value/difference
891// ASIMD FP arith, normal
892// ASIMD FP compare
893// ASIMD FP complex add
894// ASIMD FP max/min, normal
895// ASIMD FP max/min, pairwise
896// ASIMD FP negate
897// Covered by "SchedAlias (WriteV[dq]...)" above
898
899// ASIMD FP complex multiply add
900// ASIMD FP multiply accumulate
901def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$",
902                                         "^FML[AS]v")>;
903
904// ASIMD FP convert, long (F16 to F32)
905def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>;
906
907// ASIMD FP convert, long (F32 to F64)
908def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>;
909
910// ASIMD FP convert, narrow (F32 to F16)
911def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>;
912
913// ASIMD FP convert, narrow (F64 to F32)
914def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$",
915                                           "^FCVTXN(v[24]f32|v1i64)$")>;
916
917// ASIMD FP convert, other, D-form F32 and Q-form F64
918def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
919                                           "^[SU]CVTFv2f(32|64)$")>;
920
921// ASIMD FP convert, other, D-form F16 and Q-form F32
922def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
923                                           "^[SU]CVTFv4f(16|32)$")>;
924
925// ASIMD FP convert, other, Q-form F16
926def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
927                                           "^[SU]CVTFv8f16$")>;
928
929// ASIMD FP divide, D-form, F16
930// ASIMD FP square root, D-form, F16
931def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>;
932
933// ASIMD FP divide, F32
934// ASIMD FP square root, F32
935def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32,
936                                          FSQRTv2f32, FSQRTv4f32)>;
937
938// ASIMD FP divide, Q-form, F16
939def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>;
940
941// ASIMD FP divide, Q-form, F64
942def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>;
943
944// ASIMD FP square root, Q-form, F16
945def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>;
946
947// ASIMD FP square root, Q-form, F64
948def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>;
949
950// ASIMD FP max/min, reduce, F32 and D-form F16
951def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>;
952
953// ASIMD FP max/min, reduce, Q-form F16
954def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>;
955
956// ASIMD FP multiply
957def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>;
958
959// ASIMD FP multiply accumulate long
960def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>;
961
962// ASIMD FP round, D-form F32 and Q-form F64
963def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>;
964
965// ASIMD FP round, D-form F16 and Q-form F32
966def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>;
967
968// ASIMD FP round, Q-form F16
969def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
970
971
972// ASIMD BF instructions
973// -----------------------------------------------------------------------------
974
975// ASIMD convert, F32 to BF16
976def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>;
977
978// ASIMD dot product
979def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>;
980
981// ASIMD matrix multiply accumulate
982def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>;
983
984// ASIMD multiply accumulate long
985def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>;
986
987// Scalar convert, F32 to BF16
988def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>;
989
990
991// ASIMD miscellaneous instructions
992// -----------------------------------------------------------------------------
993
994// ASIMD bit reverse
995// ASIMD bitwise insert
996// ASIMD count
997// ASIMD duplicate, element
998// ASIMD extract
999// ASIMD extract narrow
1000// ASIMD insert, element to element
1001// ASIMD move, FP immed
1002// ASIMD move, integer immed
1003// ASIMD reverse
1004// ASIMD table lookup, 1 or 2 table regs
1005// ASIMD table lookup extension, 1 table reg
1006// ASIMD transfer, element to gen reg
1007// ASIMD transpose
1008// ASIMD unzip/zip
1009// Covered by "SchedAlias (WriteV[dq]...)" above
1010
1011// ASIMD duplicate, gen reg
1012def : InstRW<[V1Write_3c_1M0],
1013             (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>;
1014
1015// ASIMD extract narrow, saturating
1016def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1017
1018// ASIMD reciprocal and square root estimate, D-form U32
1019// ASIMD reciprocal and square root estimate, D-form F32 and F64
1020def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32,
1021                                        URSQRTEv2i32,
1022                                        FRECPEv1i32, FRECPEv2f32, FRECPEv1i64,
1023                                        FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>;
1024
1025// ASIMD reciprocal and square root estimate, Q-form U32
1026// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64
1027def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32,
1028                                        URSQRTEv4i32,
1029                                        FRECPEv1f16, FRECPEv4f16,
1030                                        FRECPEv4f32, FRECPEv2f64,
1031                                        FRSQRTEv1f16, FRSQRTEv4f16,
1032                                        FRSQRTEv4f32, FRSQRTEv2f64)>;
1033
1034// ASIMD reciprocal and square root estimate, Q-form F16
1035def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16,
1036                                        FRSQRTEv8f16)>;
1037
1038// ASIMD reciprocal exponent
1039def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>;
1040
1041// ASIMD reciprocal step
1042def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv",
1043                                         "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>;
1044
1045// ASIMD table lookup, 1 or 2 table regs
1046// ASIMD table lookup extension, 1 table reg
1047def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$",
1048                                           "^TBXv(8|16)i8One$")>;
1049
1050// ASIMD table lookup, 3 table regs
1051// ASIMD table lookup extension, 2 table reg
1052def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three,
1053                                        TBXv8i8Two, TBXv16i8Two)>;
1054
1055// ASIMD table lookup, 4 table regs
1056def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1057
1058// ASIMD table lookup extension, 3 table reg
1059def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>;
1060
1061// ASIMD table lookup extension, 4 table reg
1062def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>;
1063
1064// ASIMD transfer, element to gen reg
1065def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$",
1066                                         "^UMOVvi(8|16|32|64)$")>;
1067
1068// ASIMD transfer, gen reg to element
1069def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1070
1071
1072// ASIMD load instructions
1073// -----------------------------------------------------------------------------
1074
1075// ASIMD load, 1 element, multiple, 1 reg
1076def : InstRW<[V1Write_6c_1L],
1077             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1078def : InstRW<[WriteAdr, V1Write_6c_1L],
1079             (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1080
1081// ASIMD load, 1 element, multiple, 2 reg
1082def : InstRW<[V1Write_6c_2L],
1083             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1084def : InstRW<[WriteAdr, V1Write_6c_2L],
1085             (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1086
1087// ASIMD load, 1 element, multiple, 3 reg
1088def : InstRW<[V1Write_6c_3L],
1089             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1090def : InstRW<[WriteAdr, V1Write_6c_3L],
1091             (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1092
1093// ASIMD load, 1 element, multiple, 4 reg, D-form
1094def : InstRW<[V1Write_6c_2L],
1095             (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1096def : InstRW<[WriteAdr, V1Write_6c_2L],
1097             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1098
1099// ASIMD load, 1 element, multiple, 4 reg, Q-form
1100def : InstRW<[V1Write_7c_4L],
1101             (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1102def : InstRW<[WriteAdr, V1Write_7c_4L],
1103             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1104
1105// ASIMD load, 1 element, one lane
1106// ASIMD load, 1 element, all lanes
1107def : InstRW<[V1Write_8c_1L_1V],
1108             (instregex "^LD1(i|Rv)(8|16|32|64)$",
1109                        "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1110def : InstRW<[WriteAdr, V1Write_8c_1L_1V],
1111             (instregex "^LD1i(8|16|32|64)_POST$",
1112                        "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1113
1114// ASIMD load, 2 element, multiple, D-form
1115def : InstRW<[V1Write_8c_1L_2V],
1116             (instregex "^LD2Twov(8b|4h|2s)$")>;
1117def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
1118             (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
1119
1120// ASIMD load, 2 element, multiple, Q-form
1121def : InstRW<[V1Write_8c_2L_2V],
1122             (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
1123def : InstRW<[WriteAdr, V1Write_8c_2L_2V],
1124             (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
1125
1126// ASIMD load, 2 element, one lane
1127// ASIMD load, 2 element, all lanes
1128def : InstRW<[V1Write_8c_1L_2V],
1129             (instregex "^LD2i(8|16|32|64)$",
1130                        "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1131def : InstRW<[WriteAdr, V1Write_8c_1L_2V],
1132             (instregex "^LD2i(8|16|32|64)_POST$",
1133                        "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1134
1135// ASIMD load, 3 element, multiple, D-form
1136// ASIMD load, 3 element, one lane
1137// ASIMD load, 3 element, all lanes
1138def : InstRW<[V1Write_8c_2L_3V],
1139             (instregex "^LD3Threev(8b|4h|2s)$",
1140                        "^LD3i(8|16|32|64)$",
1141                        "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1142def : InstRW<[WriteAdr, V1Write_8c_2L_3V],
1143             (instregex "^LD3Threev(8b|4h|2s)_POST$",
1144                        "^LD3i(8|16|32|64)_POST$",
1145                        "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1146
1147// ASIMD load, 3 element, multiple, Q-form
1148def : InstRW<[V1Write_8c_3L_3V],
1149             (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
1150def : InstRW<[WriteAdr, V1Write_8c_3L_3V],
1151             (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
1152
1153// ASIMD load, 4 element, multiple, D-form
1154// ASIMD load, 4 element, one lane
1155// ASIMD load, 4 element, all lanes
1156def : InstRW<[V1Write_8c_3L_4V],
1157             (instregex "^LD4Fourv(8b|4h|2s)$",
1158                        "^LD4i(8|16|32|64)$",
1159                        "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>;
1160def : InstRW<[WriteAdr, V1Write_8c_3L_4V],
1161             (instregex "^LD4Fourv(8b|4h|2s)_POST$",
1162                        "^LD4i(8|16|32|64)_POST$",
1163                        "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>;
1164
1165// ASIMD load, 4 element, multiple, Q-form
1166def : InstRW<[V1Write_9c_4L_4V],
1167             (instregex "^LD4Fourv(16b|8h|4s|2d)$")>;
1168def : InstRW<[WriteAdr, V1Write_9c_4L_4V],
1169             (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>;
1170
1171
1172// ASIMD store instructions
1173// -----------------------------------------------------------------------------
1174
1175// ASIMD store, 1 element, multiple, 1 reg
1176// ASIMD store, 1 element, multiple, 2 reg, D-form
1177def : InstRW<[V1Write_2c_1L01_1V01],
1178             (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$",
1179                        "^ST1Twov(8b|4h|2s|1d)$")>;
1180def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01],
1181             (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$",
1182                        "^ST1Twov(8b|4h|2s|1d)_POST$")>;
1183
1184// ASIMD store, 1 element, multiple, 2 reg, Q-form
1185// ASIMD store, 1 element, multiple, 3 reg, D-form
1186// ASIMD store, 1 element, multiple, 4 reg, D-form
1187def : InstRW<[V1Write_2c_2L01_2V01],
1188             (instregex "^ST1Twov(16b|8h|4s|2d)$",
1189                        "^ST1Threev(8b|4h|2s|1d)$",
1190                        "^ST1Fourv(8b|4h|2s|1d)$")>;
1191def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01],
1192             (instregex "^ST1Twov(16b|8h|4s|2d)_POST$",
1193                        "^ST1Threev(8b|4h|2s|1d)_POST$",
1194                        "^ST1Fourv(8b|4h|2s|1d)_POST$")>;
1195
1196// ASIMD store, 1 element, multiple, 3 reg, Q-form
1197def : InstRW<[V1Write_2c_3L01_3V01],
1198             (instregex "^ST1Threev(16b|8h|4s|2d)$")>;
1199def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01],
1200             (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>;
1201
1202// ASIMD store, 1 element, multiple, 4 reg, Q-form
1203def : InstRW<[V1Write_2c_4L01_4V01],
1204             (instregex "^ST1Fourv(16b|8h|4s|2d)$")>;
1205def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01],
1206             (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>;
1207
1208// ASIMD store, 1 element, one lane
1209// ASIMD store, 2 element, multiple, D-form
1210// ASIMD store, 2 element, one lane
1211def : InstRW<[V1Write_4c_1L01_1V01],
1212             (instregex "^ST1i(8|16|32|64)$",
1213                        "^ST2Twov(8b|4h|2s)$",
1214                        "^ST2i(8|16|32|64)$")>;
1215def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01],
1216             (instregex "^ST1i(8|16|32|64)_POST$",
1217                        "^ST2Twov(8b|4h|2s)_POST$",
1218                        "^ST2i(8|16|32|64)_POST$")>;
1219
1220// ASIMD store, 2 element, multiple, Q-form
1221// ASIMD store, 3 element, multiple, D-form
1222// ASIMD store, 3 element, one lane
1223// ASIMD store, 4 element, one lane, D
1224def : InstRW<[V1Write_4c_2L01_2V01],
1225             (instregex "^ST2Twov(16b|8h|4s|2d)$",
1226                        "^ST3Threev(8b|4h|2s)$",
1227                        "^ST3i(8|16|32|64)$",
1228                        "^ST4i64$")>;
1229def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01],
1230             (instregex "^ST2Twov(16b|8h|4s|2d)_POST$",
1231                        "^ST3Threev(8b|4h|2s)_POST$",
1232                        "^ST3i(8|16|32|64)_POST$",
1233                        "^ST4i64_POST$")>;
1234
1235// ASIMD store, 3 element, multiple, Q-form
1236def : InstRW<[V1Write_5c_3L01_3V01],
1237             (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
1238def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01],
1239             (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
1240
1241// ASIMD store, 4 element, multiple, D-form
1242def : InstRW<[V1Write_6c_3L01_3V01],
1243             (instregex "^ST4Fourv(8b|4h|2s)$")>;
1244def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01],
1245             (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
1246
1247// ASIMD store, 4 element, multiple, Q-form, B/H/S
1248def : InstRW<[V1Write_7c_6L01_6V01],
1249             (instregex "^ST4Fourv(16b|8h|4s)$")>;
1250def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01],
1251             (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
1252
1253// ASIMD store, 4 element, multiple, Q-form, D
1254def : InstRW<[V1Write_4c_4L01_4V01],
1255             (instrs ST4Fourv2d)>;
1256def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01],
1257             (instrs ST4Fourv2d_POST)>;
1258
1259// ASIMD store, 4 element, one lane, B/H/S
1260def : InstRW<[V1Write_6c_3L_3V],
1261             (instregex "^ST4i(8|16|32)$")>;
1262def : InstRW<[WriteAdr, V1Write_6c_3L_3V],
1263             (instregex "^ST4i(8|16|32)_POST$")>;
1264
1265
1266// Cryptography extensions
1267// -----------------------------------------------------------------------------
1268
1269// Crypto polynomial (64x64) multiply long
1270// Covered by "SchedAlias (WriteV[dq]...)" above
1271
1272// Crypto AES ops
1273def V1WriteVC : WriteSequence<[V1Write_2c_1V]>;
1274def V1ReadVC  : SchedReadAdvance<2, [V1WriteVC]>;
1275def           : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>;
1276def           : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>;
1277
1278// Crypto SHA1 hash acceleration op
1279// Crypto SHA1 schedule acceleration ops
1280// Crypto SHA256 schedule acceleration ops
1281// Crypto SHA512 hash acceleration ops
1282// Crypto SM3 ops
1283def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$",
1284                                          "^SHA256SU[01]rr$",
1285                                          "^SHA512(H2?|SU[01])$",
1286                                          "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>;
1287
1288// Crypto SHA1 hash acceleration ops
1289// Crypto SHA256 hash acceleration ops
1290// Crypto SM4 ops
1291def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$",
1292                                          "^SHA256H2?rrr$",
1293                                          "^SM4E(KEY)?$")>;
1294
1295// Crypto SHA3 ops
1296def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1297
1298
1299// CRC instruction
1300// -----------------------------------------------------------------------------
1301
1302// CRC checksum ops
1303def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>;
1304
1305
1306// SVE Predicate instructions
1307// -----------------------------------------------------------------------------
1308
1309// Loop control, based on predicate
1310def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>;
1311def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1312
1313// Loop control, based on predicate and flag setting
1314def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP,
1315                                       BRKPAS_PPzPP, BRKPBS_PPzPP)>;
1316
1317// Loop control, based on GPR
1318def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1319
1320// Loop terminate
1321def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1322
1323// Predicate counting scalar
1324// Predicate counting scalar, active predicate
1325def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1326def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$",
1327                                          "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1328                                          "^UQ(DEC|INC)[BHWD]_WPiI$",
1329                                          "^CNTP_XPP_[BHSD]$",
1330                                          "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$",
1331                                          "^UQ(DEC|INC)P_WP_[BHSD]$",
1332                                          "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>;
1333
1334// Predicate counting vector, active predicate
1335def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>;
1336
1337// Predicate logical
1338def : InstRW<[V1Write_1c_1M0],
1339             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1340
1341// Predicate logical, flag setting
1342def : InstRW<[V1Write_2c_2M0],
1343             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>;
1344
1345// Predicate reverse
1346// Predicate set/initialize/find next
1347// Predicate transpose
1348// Predicate unpack and widen
1349// Predicate zip/unzip
1350def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$",
1351                                          "^PFALSE$", "^PFIRST_B$",
1352                                          "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$",
1353                                          "^TRN[12]_PPP_[BHSDQ]$",
1354                                          "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1355
1356// Predicate set/initialize/find next
1357// Predicate unpack and widen
1358def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP,
1359                                       PUNPKHI_PP, PUNPKLO_PP)>;
1360
1361// Predicate select
1362def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>;
1363
1364// Predicate set/initialize, set flags
1365def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>;
1366
1367
1368
1369// SVE integer instructions
1370// -----------------------------------------------------------------------------
1371
1372// Arithmetic, basic
1373// Logical
1374def : InstRW<[V1Write_2c_1V01],
1375             (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$",
1376                        "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$",
1377                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1378                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
1379                        "^[SU]ABD_ZP[mZ]Z_[BHSD]$",
1380                        "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$",
1381                        "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$",
1382                        "^SUBR_Z(I|P[mZ]Z)_[BHSD]$",
1383                        "^(AND|EOR|ORR)_ZI$",
1384                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1385                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
1386                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1387
1388// Arithmetic, shift
1389def : InstRW<[V1Write_2c_1V1],
1390             (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]",
1391                        "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]",
1392                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1393                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1394                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1395
1396// Arithmetic, shift right for divide
1397def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>;
1398
1399// Count/reverse bits
1400def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1401
1402// Broadcast logical bitmask immediate to vector
1403def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>;
1404
1405// Compare and set flags
1406def : InstRW<[V1Write_4c_1M0_1V0],
1407             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1408                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1409
1410// Conditional extract operations, scalar form
1411def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1412
1413// Conditional extract operations, SIMD&FP scalar and vector forms
1414def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1415                                          "^COMPACT_ZPZ_[SD]$",
1416                                          "^SPLICE_ZPZZ?_[BHSD]$")>;
1417
1418// Convert to floating point, 64b to float or convert to double
1419def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1420                                          "^[SU]CVTF_ZPmZ_StoD")>;
1421
1422// Convert to floating point, 32b to single or half
1423def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1424
1425// Convert to floating point, 16b to half
1426def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1427
1428// Copy, scalar
1429def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>;
1430
1431// Copy, scalar SIMD&FP or imm
1432def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>;
1433
1434// Divides, 32 bit
1435def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1436
1437// Divides, 64 bit
1438def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1439
1440// Dot product, 8 bit
1441def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>;
1442
1443// Dot product, 8 bit, using signed and unsigned integers
1444def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>;
1445
1446// Dot product, 16 bit
1447def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>;
1448
1449// Duplicate, immediate and indexed form
1450def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$",
1451                                           "^DUP_ZZI_[BHSDQ]$")>;
1452
1453// Duplicate, scalar form
1454def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1455
1456// Extend, sign or zero
1457def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1458                                          "^[SU]XTH_ZPmZ_[SD]$",
1459                                          "^[SU]XTW_ZPmZ_[D]$")>;
1460
1461// Extract
1462def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>;
1463
1464// Extract/insert operation, SIMD and FP scalar form
1465def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1466                                          "^INSR_ZV_[BHSD]$")>;
1467
1468// Extract/insert operation, scalar
1469def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1470                                              "^INSR_ZR_[BHSD]$")>;
1471
1472// Horizontal operations, B, H, S form, imm, imm
1473def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>;
1474
1475// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar
1476def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1477
1478// Horizontal operations, D form, imm, imm
1479def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>;
1480
1481// Horizontal operations, D form, scalar, imm / scalar / imm, scalar
1482def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1483
1484// Move prefix
1485def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1486                                           "^MOVPRFX_ZZ$")>;
1487
1488// Matrix multiply-accumulate
1489def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1490
1491// Multiply, B, H, S element size
1492def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$",
1493                                          "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1494
1495// Multiply, D element size
1496// Multiply accumulate, D element size
1497def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$",
1498                                          "^[SU]MULH_ZPmZ_D$",
1499                                          "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>;
1500
1501// Multiply accumulate, B, H, S element size
1502// NOTE: This is not specified in the SOG.
1503def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
1504
1505// Predicate counting vector
1506def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>;
1507
1508// Reduction, arithmetic, B form
1509def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13],
1510             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1511
1512// Reduction, arithmetic, H form
1513def : InstRW<[V1Write_12c_1V_1V01_2V1],
1514             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1515
1516// Reduction, arithmetic, S form
1517def : InstRW<[V1Write_10c_1V_1V01_2V1],
1518             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1519
1520// Reduction, arithmetic, D form
1521def : InstRW<[V1Write_8c_1V_1V01],
1522             (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1523
1524// Reduction, logical
1525def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>;
1526
1527// Reverse, vector
1528def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$",
1529                                           "^REVB_ZPmZ_[HSD]$",
1530                                           "^REVH_ZPmZ_[SD]$",
1531                                           "^REVW_ZPmZ_D$")>;
1532
1533// Select, vector form
1534// Table lookup
1535// Table lookup extension
1536// Transpose, vector form
1537// Unpack and extend
1538// Zip/unzip
1539def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$",
1540                                           "^TB[LX]_ZZZ_[BHSD]$",
1541                                           "^TRN[12]_ZZZ_[BHSDQ]$",
1542                                           "^[SU]UNPK(HI|LO)_ZZ_[HSD]$",
1543                                           "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1544
1545
1546// SVE floating-point instructions
1547// -----------------------------------------------------------------------------
1548
1549// Floating point absolute value/difference
1550// Floating point arithmetic
1551def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$",
1552                                           "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1553                                           "^FADDP_ZPmZZ_[HSD]$",
1554                                           "^FNEG_ZPmZ_[HSD]$",
1555                                           "^FSUBR_ZPm[IZ]_[HSD]$")>;
1556
1557// Floating point associative add, F16
1558def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>;
1559
1560// Floating point associative add, F32
1561def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>;
1562
1563// Floating point associative add, F64
1564def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>;
1565
1566// Floating point compare
1567def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$",
1568                                          "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$",
1569                                          "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>;
1570
1571// Floating point complex add
1572def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1573
1574// Floating point complex multiply add
1575def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1576                                           "^FCMLA_ZZZI_[HS]$")>;
1577
1578// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1579// Floating point convert to integer, F32
1580def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1581                                          "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1582
1583// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16)
1584// Floating point convert to integer, F64
1585def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1586                                          "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1587
1588// Floating point convert to integer, F16
1589def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1590
1591// Floating point copy
1592def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$",
1593                                           "^FDUP_ZI_[HSD]$")>;
1594
1595// Floating point divide, F16
1596def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1597
1598// Floating point divide, F32
1599def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1600
1601// Floating point divide, F64
1602def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1603
1604// Floating point min/max
1605def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
1606
1607// Floating point multiply
1608def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$",
1609                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
1610
1611// Floating point multiply accumulate
1612// Floating point reciprocal step
1613def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
1614                                           "^FML[AS]_ZZZI_[HSD]$",
1615                                           "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1616
1617// Floating point reciprocal estimate, F16
1618def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>;
1619
1620// Floating point reciprocal estimate, F32
1621def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>;
1622
1623// Floating point reciprocal estimate, F64
1624def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>;
1625
1626// Floating point reciprocal exponent
1627def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>;
1628
1629// Floating point reduction, F16
1630def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>;
1631
1632// Floating point reduction, F32
1633def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>;
1634
1635// Floating point reduction, F64
1636def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>;
1637
1638// Floating point round to integral, F16
1639def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
1640
1641// Floating point round to integral, F32
1642def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
1643
1644// Floating point round to integral, F64
1645def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
1646
1647// Floating point square root, F16
1648def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>;
1649
1650// Floating point square root, F32
1651def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>;
1652
1653// Floating point square root, F64
1654def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>;
1655
1656// Floating point trigonometric
1657def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$",
1658                                           "^FTMAD_ZZI_[HSD]$",
1659                                           "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
1660
1661
1662// SVE BFloat16 (BF16) instructions
1663// -----------------------------------------------------------------------------
1664
1665// Convert, F32 to BF16
1666def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
1667
1668// Dot product
1669def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
1670
1671// Matrix multiply accumulate
1672def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>;
1673
1674// Multiply accumulate long
1675def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
1676
1677
1678// SVE Load instructions
1679// -----------------------------------------------------------------------------
1680
1681// Load vector
1682def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>;
1683
1684// Load predicate
1685def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>;
1686
1687// Contiguous load, scalar + imm
1688// Contiguous load, scalar + scalar
1689// Contiguous load broadcast, scalar + imm
1690// Contiguous load broadcast, scalar + scalar
1691def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$",
1692                                           "^LD1S?B_[HSD]_IMM$",
1693                                           "^LD1S?H_[SD]_IMM$",
1694                                           "^LD1S?W_D_IMM$",
1695                                           "^LD1[BWD]$",
1696                                           "^LD1S?B_[HSD]$",
1697                                           "^LD1S?W_D$",
1698                                           "^LD1R[BHWD]_IMM$",
1699                                           "^LD1RSW_IMM$",
1700                                           "^LD1RS?B_[HSD]_IMM$",
1701                                           "^LD1RS?H_[SD]_IMM$",
1702                                           "^LD1RS?W_D_IMM$",
1703                                           "^LD1RQ_[BHWD]_IMM$",
1704                                           "^LD1RQ_[BWD]$")>;
1705def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$",
1706                                              "^LD1S?H_[SD]$",
1707                                              "^LD1RQ_H$")>;
1708
1709// Non temporal load, scalar + imm
1710def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>;
1711
1712// Non temporal load, scalar + scalar
1713def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>;
1714def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>;
1715
1716// Contiguous first faulting load, scalar + scalar
1717def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$",
1718                                              "^LDFF1S?H_[SD]_REAL$")>;
1719def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$",
1720                                              "^LDFF1S?B_[HSD]_REAL$",
1721                                              "^LDFF1S?W_D_REAL$")>;
1722
1723// Contiguous non faulting load, scalar + imm
1724def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$",
1725                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
1726                                           "^LDNF1S?H_[SD]_IMM_REAL$",
1727                                           "^LDNF1S?W_D_IMM_REAL$")>;
1728
1729// Contiguous Load two structures to two vectors, scalar + imm
1730def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>;
1731
1732// Contiguous Load two structures to two vectors, scalar + scalar
1733def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>;
1734def : InstRW<[V1Write_9c_2L01_2V01],  (instregex "^LD2[BWD]$")>;
1735
1736// Contiguous Load three structures to three vectors, scalar + imm
1737def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>;
1738
1739// Contiguous Load three structures to three vectors, scalar + scalar
1740def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>;
1741
1742// Contiguous Load four structures to four vectors, scalar + imm
1743def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>;
1744
1745// Contiguous Load four structures to four vectors, scalar + scalar
1746def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>;
1747
1748// Gather load, vector + imm, 32-bit element size
1749def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
1750                                             "^GLD(FF)?1W_IMM_REAL$")>;
1751
1752// Gather load, vector + imm, 64-bit element size
1753def : InstRW<[V1Write_9c_2L_2V],
1754             (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
1755                        "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$",
1756                        "^GLD(FF)?1D_IMM_REAL$",
1757                        "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>;
1758
1759// Gather load, 32-bit scaled offset
1760def : InstRW<[V1Write_11c_2L_2V],
1761             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
1762                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
1763
1764// Gather load, 32-bit unpacked unscaled offset
1765def : InstRW<[V1Write_9c_1L_1V],
1766             (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
1767                        "^GLD(FF)?1W_[SU]XTW_REAL$")>;
1768
1769// Prefetch
1770// NOTE: This is not specified in the SOG.
1771def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>;
1772
1773
1774// SVE Store instructions
1775// -----------------------------------------------------------------------------
1776
1777// Store from predicate reg
1778def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>;
1779
1780// Store from vector reg
1781def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>;
1782
1783// Contiguous store, scalar + imm
1784// Contiguous store, scalar + scalar
1785def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
1786                                              "^ST1B_[HSD]_IMM$",
1787                                              "^ST1H_[SD]_IMM$",
1788                                              "^ST1W_D_IMM$",
1789                                              "^ST1[BWD]$",
1790                                              "^ST1B_[HSD]$",
1791                                              "^ST1W_D$")>;
1792def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
1793
1794// Contiguous store two structures from two vectors, scalar + imm
1795// Contiguous store two structures from two vectors, scalar + scalar
1796def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$",
1797                                              "^ST2[BWD]$")>;
1798def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>;
1799
1800// Contiguous store three structures from three vectors, scalar + imm
1801def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
1802
1803// Contiguous store three structures from three vectors, scalar + scalar
1804def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>;
1805
1806// Contiguous store four structures from four vectors, scalar + imm
1807def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
1808
1809// Contiguous store four structures from four vectors, scalar + scalar
1810def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>;
1811
1812// Non temporal store, scalar + imm
1813// Non temporal store, scalar + scalar
1814def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$",
1815                                              "^STNT1[BWD]_ZRR$")>;
1816def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>;
1817
1818// Scatter store vector + imm 32-bit element size
1819// Scatter store, 32-bit scaled offset
1820// Scatter store, 32-bit unscaled offset
1821def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
1822                                               "^SST1W_IMM$",
1823                                               "^SST1(H_S|W)_[SU]XTW_SCALED$",
1824                                               "^SST1[BH]_S_[SU]XTW$",
1825                                               "^SST1W_[SU]XTW$")>;
1826
1827// Scatter store, 32-bit unpacked unscaled offset
1828// Scatter store, 32-bit unpacked scaled offset
1829def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
1830                                              "^SST1D_[SU]XTW$",
1831                                              "^SST1[HW]_D_[SU]XTW_SCALED$",
1832                                              "^SST1D_[SU]XTW_SCALED$")>;
1833
1834// Scatter store vector + imm 64-bit element size
1835// Scatter store, 64-bit scaled offset
1836// Scatter store, 64-bit unscaled offset
1837def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
1838                                              "^SST1D_IMM$",
1839                                              "^SST1[HW]_D_SCALED$",
1840                                              "^SST1D_SCALED$",
1841                                              "^SST1[BHW]_D$",
1842                                              "^SST1D$")>;
1843
1844
1845// SVE Miscellaneous instructions
1846// -----------------------------------------------------------------------------
1847
1848// Read first fault register, unpredicated
1849// Set first fault register
1850// Write to first fault register
1851def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL,
1852                                       SETFFR,
1853                                       WRFFR)>;
1854
1855// Read first fault register, predicated
1856def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>;
1857
1858// Read first fault register and set flags
1859def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>;
1860
1861
1862}
1863