xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (revision 2e3507c25e42292b45a5482e116d278f5515d04d)
1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V2 processors.
10// All information is taken from the V2 Software Optimisation guide:
11//
12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
13//
14//===----------------------------------------------------------------------===//
15
16def NeoverseV2Model : SchedMachineModel {
17  let IssueWidth            =  16; // Micro-ops dispatched at a time.
18  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
19  let LoadLatency           =   4; // Optimistic load latency.
20  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N2.
21  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
22  let CompleteModel         =   1;
23
24  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
25                                                    [HasSVE2p1]);
26}
27
28//===----------------------------------------------------------------------===//
29// Define each kind of processor resource and number available on Neoverse V2.
30// Instructions are first fetched and then decoded into internal macro-ops
31// (MOPs). From there, the MOPs proceed through register renaming and dispatch
32// stages. A MOP can be split into two micro-ops further down the pipeline
33// after the decode stage. Once dispatched, micro-ops wait for their operands
34// and issue out-of-order to one of seventeen issue pipelines. Each issue
35// pipeline can accept one micro-op per cycle.
36
37let SchedModel = NeoverseV2Model in {
38
39// Define the (17) issue ports.
40def V2UnitB   : ProcResource<2>;  // Branch 0/1
41def V2UnitS0  : ProcResource<1>;  // Integer single-cycle 0
42def V2UnitS1  : ProcResource<1>;  // Integer single-cycle 1
43def V2UnitS2  : ProcResource<1>;  // Integer single-cycle 2
44def V2UnitS3  : ProcResource<1>;  // Integer single-cycle 3
45def V2UnitM0  : ProcResource<1>;  // Integer single/multicycle 0
46def V2UnitM1  : ProcResource<1>;  // Integer single/multicycle 1
47def V2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
48def V2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
49def V2UnitV2  : ProcResource<1>;  // FP/ASIMD 2
50def V2UnitV3  : ProcResource<1>;  // FP/ASIMD 3
51def V2UnitL01 : ProcResource<2>;  // Load/Store 0/1
52def V2UnitL2  : ProcResource<1>;  // Load 2
53def V2UnitD   : ProcResource<2>;  // Store data 0/1
54
55def V2UnitR   : ProcResGroup<[V2UnitS0, V2UnitS1]>;  // Integer single-cycle 0/1
56def V2UnitS   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>;  // Integer single-cycle 0/1/2/3
57def V2UnitF   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1 and single/multicycle 0/1
58def V2UnitI   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
59def V2UnitM   : ProcResGroup<[V2UnitM0, V2UnitM1]>;  // Integer single/multicycle 0/1
60def V2UnitL   : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
61def V2UnitV   : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>;  // FP/ASIMD 0/1/2/3
62def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>;  // FP/ASIMD 0/1
63def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>;  // FP/ASIMD 0/2
64def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>;  // FP/ASIMD 1/3
65def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>;  // FP/ASIMD 2/3
66
67// Define commonly used read types.
68
69// No forwarding is provided for these types.
70def : ReadAdvance<ReadI,       0>;
71def : ReadAdvance<ReadISReg,   0>;
72def : ReadAdvance<ReadIEReg,   0>;
73def : ReadAdvance<ReadIM,      0>;
74def : ReadAdvance<ReadIMA,     0>;
75def : ReadAdvance<ReadID,      0>;
76def : ReadAdvance<ReadExtrHi,  0>;
77def : ReadAdvance<ReadAdrBase, 0>;
78def : ReadAdvance<ReadST,      0>;
79def : ReadAdvance<ReadVLD,     0>;
80
81// NOTE: Copied from N2.
82def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
83def : WriteRes<WriteBarrier, []> { let Latency = 1; }
84def : WriteRes<WriteHint,    []> { let Latency = 1; }
85def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
86
87//===----------------------------------------------------------------------===//
88// Define customized scheduler read/write types specific to the Neoverse V2.
89
90//===----------------------------------------------------------------------===//
91// Define generic 1 micro-op types
92
93def V2Write_1cyc_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
94def V2Write_1cyc_1F    : SchedWriteRes<[V2UnitF]>   { let Latency = 1; }
95def V2Write_1cyc_1I    : SchedWriteRes<[V2UnitI]>   { let Latency = 1; }
96def V2Write_1cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 1; }
97def V2Write_1cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 1; }
98def V2Write_1cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
99def V2Write_2cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 2; }
100def V2Write_3cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 3; }
101def V2Write_2cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 2; }
102def V2Write_3cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 3; }
103def V2Write_5cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 5; }
104def V2Write_12cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 12;
105                                                      let ResourceCycles = [12]; }
106def V2Write_20cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 20;
107                                                      let ResourceCycles = [20]; }
108def V2Write_4cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 4; }
109def V2Write_6cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 6; }
110def V2Write_2cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 2; }
111def V2Write_2cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 2; }
112def V2Write_2cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
113def V2Write_2cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
114def V2Write_3cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
115def V2Write_3cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
116                                                      let ResourceCycles = [2]; }
117def V2Write_3cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
118def V2Write_4cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 4; }
119def V2Write_5cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 5; }
120def V2Write_6cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 6; }
121def V2Write_12cyc_1V   : SchedWriteRes<[V2UnitV]>   { let Latency = 12; }
122def V2Write_3cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 3; }
123def V2Write_3cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
124def V2Write_4cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 4; }
125def V2Write_4cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
126def V2Write_7cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 7;
127                                                      let ResourceCycles = [7]; }
128def V2Write_7cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
129                                                      let ResourceCycles = [2]; }
130def V2Write_9cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 9; }
131def V2Write_9cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
132                                                      let ResourceCycles = [2]; }
133def V2Write_10cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; }
134def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
135                                                      let ResourceCycles = [2]; }
136def V2Write_12cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 12;
137                                                      let ResourceCycles = [11]; }
138def V2Write_13cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; }
139def V2Write_15cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 15; }
140def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
141                                                      let ResourceCycles = [8]; }
142def V2Write_16cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; }
143def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
144                                                      let ResourceCycles = [8]; }
145def V2Write_20cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 20;
146                                                      let ResourceCycles = [20]; }
147def V2Write_2cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 2; }
148def V2Write_2cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
149def V2Write_3cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 3; }
150def V2Write_4cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 4; }
151def V2Write_4cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
152def V2Write_6cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 6; }
153def V2Write_10cyc_1V1  : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; }
154def V2Write_6cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
155
156//===----------------------------------------------------------------------===//
157// Define generic 2 micro-op types
158
159def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
160  let Latency     = 1;
161  let NumMicroOps = 2;
162}
163
164def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
165  let Latency     = 6;
166  let NumMicroOps = 2;
167}
168
169def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
170  let Latency     = 9;
171  let NumMicroOps = 2;
172}
173
174def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
175  let Latency     = 3;
176  let NumMicroOps = 2;
177}
178
179def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
180  let Latency     = 1;
181  let NumMicroOps = 2;
182}
183
184def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
185  let Latency     = 3;
186  let NumMicroOps = 2;
187}
188
189def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
190  let Latency     = 4;
191  let NumMicroOps = 2;
192}
193
194def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
195  let Latency     = 5;
196  let NumMicroOps = 2;
197}
198
199def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
200  let Latency     = 6;
201  let NumMicroOps = 2;
202}
203
204def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
205  let Latency     = 7;
206  let NumMicroOps = 2;
207}
208
209def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
210  let Latency     = 7;
211  let NumMicroOps = 2;
212}
213
214def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
215  let Latency     = 1;
216  let NumMicroOps = 2;
217}
218
219def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
220  let Latency     = 5;
221  let NumMicroOps = 2;
222}
223
224def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
225  let Latency     = 2;
226  let NumMicroOps = 2;
227}
228
229def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
230  let Latency     = 2;
231  let NumMicroOps = 2;
232}
233
234def V2Write_2cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
235  let Latency = 2;
236  let NumMicroOps = 2;
237}
238
239def V2Write_4cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
240  let Latency = 4;
241  let NumMicroOps = 2;
242}
243
244def V2Write_4cyc_1L01_1V01  : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
245  let Latency = 4;
246  let NumMicroOps = 2;
247}
248
249def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
250  let Latency     = 4;
251  let NumMicroOps = 2;
252}
253
254def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
255  let Latency     = 4;
256  let NumMicroOps = 2;
257}
258
259def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
260  let Latency     = 4;
261  let NumMicroOps = 2;
262}
263
264def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
265  let Latency     = 4;
266  let NumMicroOps = 2;
267}
268
269def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
270  let Latency     = 6;
271  let NumMicroOps = 2;
272}
273
274def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
275  let Latency     = 6;
276  let NumMicroOps = 2;
277}
278
279def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
280  let Latency     = 8;
281  let NumMicroOps = 2;
282}
283
284def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
285  let Latency     = 4;
286  let NumMicroOps = 2;
287}
288
289def V2Write_3cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
290  let Latency     = 3;
291  let NumMicroOps = 2;
292}
293
294def V2Write_4cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
295  let Latency     = 4;
296  let NumMicroOps = 2;
297}
298
299def V2Write_1cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
300  let Latency     = 1;
301  let NumMicroOps = 2;
302}
303
304def V2Write_2cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
305  let Latency     = 2;
306  let NumMicroOps = 2;
307}
308
309def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
310  let Latency     = 6;
311  let NumMicroOps = 2;
312}
313
314def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
315  let Latency     = 4;
316  let NumMicroOps = 2;
317}
318
319def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
320  let Latency     = 5;
321  let NumMicroOps = 2;
322}
323
324def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
325  let Latency     = 5;
326  let NumMicroOps = 2;
327}
328
329def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
330  let Latency     = 5;
331  let NumMicroOps = 2;
332}
333
334def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
335  let Latency     = 6;
336  let NumMicroOps = 2;
337}
338
339def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
340  let Latency     = 7;
341  let NumMicroOps = 2;
342}
343
344def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
345  let Latency     = 2;
346  let NumMicroOps = 2;
347}
348
349def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
350  let Latency     = 3;
351  let NumMicroOps = 2;
352}
353
354def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
355  let Latency     = 6;
356  let NumMicroOps = 2;
357}
358
359def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
360  let Latency     = 6;
361  let NumMicroOps = 2;
362}
363
364def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
365  let Latency     = 6;
366  let NumMicroOps = 2;
367}
368
369def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
370  let Latency     = 4;
371  let NumMicroOps = 2;
372}
373
374def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
375  let Latency     = 8;
376  let NumMicroOps = 2;
377}
378
379//===----------------------------------------------------------------------===//
380// Define generic 3 micro-op types
381
382def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
383  let Latency     = 1;
384  let NumMicroOps = 3;
385}
386
387def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
388  let Latency     = 2;
389  let NumMicroOps = 3;
390}
391
392def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
393  let Latency     = 2;
394  let NumMicroOps = 3;
395}
396
397def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
398  let Latency     = 4;
399  let NumMicroOps = 3;
400}
401
402def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
403  let Latency     = 9;
404  let NumMicroOps = 3;
405}
406
407def V2Write_4cyc_3V01  : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
408  let Latency = 4;
409  let NumMicroOps = 3;
410}
411
412def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
413  let Latency     = 7;
414  let NumMicroOps = 3;
415}
416
417def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
418  let Latency     = 2;
419  let NumMicroOps = 3;
420}
421
422def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
423  let Latency     = 2;
424  let NumMicroOps = 3;
425}
426
427def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
428  let Latency     = 6;
429  let NumMicroOps = 3;
430}
431
432def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
433  let Latency     = 6;
434  let NumMicroOps = 3;
435}
436
437def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
438  let Latency     = 8;
439  let NumMicroOps = 3;
440}
441
442//===----------------------------------------------------------------------===//
443// Define generic 4 micro-op types
444
445def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
446                                               V2UnitI]> {
447  let Latency     = 2;
448  let NumMicroOps = 4;
449}
450
451def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
452                                            V2UnitV01, V2UnitV01]> {
453  let Latency     = 2;
454  let NumMicroOps = 4;
455}
456
457def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
458                                            V2UnitV01, V2UnitV01]> {
459  let Latency     = 4;
460  let NumMicroOps = 4;
461}
462
463def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
464  let Latency     = 5;
465  let NumMicroOps = 4;
466}
467
468def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
469                                         V2UnitV1]> {
470  let Latency     = 9;
471  let NumMicroOps = 4;
472}
473
474def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
475  let Latency     = 6;
476  let NumMicroOps = 4;
477}
478
479def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
480  let Latency     = 8;
481  let NumMicroOps = 4;
482}
483
484def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
485                                          V2UnitV13]> {
486  let Latency     = 6;
487  let NumMicroOps = 4;
488}
489
490def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
491                                          V2UnitV13]> {
492  let Latency     = 8;
493  let NumMicroOps = 4;
494}
495
496def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
497                                       V2UnitV02]> {
498  let Latency     = 6;
499  let NumMicroOps = 4;
500}
501
502def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
503  let Latency     = 6;
504  let NumMicroOps = 4;
505}
506
507def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
508  let Latency     = 8;
509  let NumMicroOps = 4;
510}
511
512def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
513  let Latency     = 9;
514  let NumMicroOps = 4;
515}
516
517def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
518                                          V2UnitV]> {
519  let Latency     = 2;
520  let NumMicroOps = 4;
521}
522
523def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
524                                          V2UnitV]> {
525  let Latency     = 4;
526  let NumMicroOps = 4;
527}
528
529def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
530                                          V2UnitV02]> {
531  let Latency     = 8;
532  let NumMicroOps = 4;
533}
534
535def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
536                                         V2UnitV1]> {
537  let Latency     = 8;
538  let NumMicroOps = 4;
539}
540
541def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
542                                         V2UnitM]> {
543  let Latency     = 4;
544  let NumMicroOps = 4;
545}
546
547def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
548                                         V2UnitM]> {
549  let Latency     = 5;
550  let NumMicroOps = 4;
551}
552
553def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
554  let Latency     = 6;
555  let NumMicroOps = 4;
556}
557
558def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
559  let Latency     = 7;
560  let NumMicroOps = 4;
561}
562
563def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
564                                            V2UnitV01]> {
565  let Latency     = 6;
566  let NumMicroOps = 4;
567}
568
569//===----------------------------------------------------------------------===//
570// Define generic 5 micro-op types
571
572def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
573                                               V2UnitI, V2UnitI]> {
574  let Latency     = 2;
575  let NumMicroOps = 5;
576}
577
578def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
579                                        V2UnitV]> {
580  let Latency     = 8;
581  let NumMicroOps = 5;
582}
583
584def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
585                                        V2UnitV]> {
586  let Latency     = 9;
587  let NumMicroOps = 5;
588}
589
590def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
591                                         V2UnitV]> {
592  let Latency     = 10;
593  let NumMicroOps = 5;
594}
595
596def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
597                                     V2UnitV]> {
598  let Latency     = 6;
599  let NumMicroOps = 5;
600}
601
602//===----------------------------------------------------------------------===//
603// Define generic 6 micro-op types
604
605def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
606                                        V2UnitV, V2UnitV, V2UnitV]> {
607  let Latency     = 8;
608  let NumMicroOps = 6;
609}
610
611def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
612                                        V2UnitV, V2UnitV, V2UnitV]> {
613  let Latency     = 9;
614  let NumMicroOps = 6;
615}
616
617def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
618                                        V2UnitV, V2UnitV, V2UnitV]> {
619  let Latency     = 9;
620  let NumMicroOps = 6;
621}
622
623def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
624                                           V2UnitV, V2UnitS, V2UnitS]> {
625  let Latency     = 9;
626  let NumMicroOps = 6;
627}
628
629def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
630                                          V2UnitV13, V2UnitV13, V2UnitV13]> {
631  let Latency     = 9;
632  let NumMicroOps = 6;
633}
634
635def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
636                                          V2UnitV, V2UnitV, V2UnitV]> {
637  let Latency     = 2;
638  let NumMicroOps = 6;
639}
640
641def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
642                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
643  let Latency     = 4;
644  let NumMicroOps = 6;
645}
646
647def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
648                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
649  let Latency     = 5;
650  let NumMicroOps = 6;
651}
652
653def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
654                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
655  let Latency     = 2;
656  let NumMicroOps = 6;
657}
658
659def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
660                                               V2UnitS, V2UnitV01, V2UnitV01]> {
661  let Latency     = 4;
662  let NumMicroOps = 6;
663}
664
665//===----------------------------------------------------------------------===//
666// Define generic 7 micro-op types
667
668def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
669                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
670  let Latency     = 8;
671  let NumMicroOps = 7;
672}
673
674//===----------------------------------------------------------------------===//
675// Define generic 8 micro-op types
676
677def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
678                                          V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
679                                          V2UnitV]> {
680  let Latency     = 2;
681  let NumMicroOps = 8;
682}
683
684def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
685                                            V2UnitL01, V2UnitV01, V2UnitV01,
686                                            V2UnitV01, V2UnitV01]> {
687  let Latency     = 2;
688  let NumMicroOps = 8;
689}
690
691def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
692                                            V2UnitL01, V2UnitV01, V2UnitV01,
693                                            V2UnitV01, V2UnitV01]> {
694  let Latency     = 4;
695  let NumMicroOps = 8;
696}
697
698def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
699                                            V2UnitV01, V2UnitV01, V2UnitV01,
700                                            V2UnitV01, V2UnitV01]> {
701  let Latency     = 6;
702  let NumMicroOps = 8;
703}
704
705def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
706                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
707  let Latency     = 8;
708  let NumMicroOps = 8;
709}
710
711//===----------------------------------------------------------------------===//
712// Define generic 9 micro-op types
713
714def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
715                                            V2UnitV01, V2UnitV01, V2UnitV01,
716                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
717  let Latency     = 6;
718  let NumMicroOps = 9;
719}
720
721def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
722                                         V2UnitV, V2UnitV, V2UnitV, V2UnitV,
723                                         V2UnitV]> {
724  let Latency     = 10;
725  let NumMicroOps = 9;
726}
727
728def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
729                                            V2UnitL, V2UnitL, V2UnitL,
730                                            V2UnitS, V2UnitS, V2UnitS]> {
731  let Latency     = 10;
732  let NumMicroOps = 9;
733}
734
735//===----------------------------------------------------------------------===//
736// Define generic 10 micro-op types
737
738def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
739                                        V2UnitL, V2UnitL, V2UnitV, V2UnitV,
740                                        V2UnitV, V2UnitV]> {
741  let Latency     = 9;
742  let NumMicroOps = 10;
743}
744
745//===----------------------------------------------------------------------===//
746// Define generic 12 micro-op types
747
748def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
749                                            V2UnitL01, V2UnitV01, V2UnitV01,
750                                            V2UnitV01, V2UnitV01, V2UnitV01,
751                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
752  let Latency     = 5;
753  let NumMicroOps = 12;
754}
755
756def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
757                                        V2UnitL, V2UnitV, V2UnitV,
758                                        V2UnitV, V2UnitV, V2UnitV,
759                                        V2UnitV, V2UnitV, V2UnitV]> {
760  let Latency     = 9;
761  let NumMicroOps = 12;
762}
763
764def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
765                                         V2UnitL, V2UnitV, V2UnitV,
766                                         V2UnitV, V2UnitV, V2UnitV,
767                                         V2UnitV, V2UnitV, V2UnitV]> {
768  let Latency     = 10;
769  let NumMicroOps = 12;
770}
771
772//===----------------------------------------------------------------------===//
773// Define generic 16 micro-op types
774
775def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
776                                             V2UnitL01, V2UnitV01, V2UnitV01,
777                                             V2UnitV01, V2UnitV01, V2UnitV01,
778                                             V2UnitV01, V2UnitV01, V2UnitV01,
779                                             V2UnitV01, V2UnitV01, V2UnitV01,
780                                             V2UnitV01]> {
781  let Latency     = 7;
782  let NumMicroOps = 16;
783}
784
785def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
786                                            V2UnitL, V2UnitV, V2UnitV,
787                                            V2UnitV, V2UnitV, V2UnitV,
788                                            V2UnitV, V2UnitV, V2UnitV,
789                                            V2UnitS, V2UnitS, V2UnitS,
790                                            V2UnitS]> {
791  let Latency     = 10;
792  let NumMicroOps = 16;
793}
794
795//===----------------------------------------------------------------------===//
796// Define generic 18 micro-op types
797
798def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
799                                            V2UnitL01, V2UnitL01, V2UnitL01,
800                                            V2UnitL01, V2UnitL01, V2UnitL01,
801                                            V2UnitV01, V2UnitV01, V2UnitV01,
802                                            V2UnitV01, V2UnitV01, V2UnitV01,
803                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
804  let Latency     = 7;
805  let NumMicroOps = 18;
806}
807
808//===----------------------------------------------------------------------===//
809// Define generic 27 micro-op types
810
811def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
812                                               V2UnitL01, V2UnitL01, V2UnitL01,
813                                               V2UnitL01, V2UnitL01, V2UnitL01,
814                                               V2UnitS, V2UnitS, V2UnitS,
815                                               V2UnitS, V2UnitS, V2UnitS,
816                                               V2UnitS, V2UnitS, V2UnitS,
817                                               V2UnitV01, V2UnitV01, V2UnitV01,
818                                               V2UnitV01, V2UnitV01, V2UnitV01,
819                                               V2UnitV01, V2UnitV01,
820                                               V2UnitV01]> {
821  let Latency     = 7;
822  let NumMicroOps = 27;
823}
824
825//===----------------------------------------------------------------------===//
826// Define generic 36 micro-op types
827
828def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
829                                               V2UnitL01, V2UnitL01, V2UnitL01,
830                                               V2UnitL01, V2UnitL01, V2UnitL01,
831                                               V2UnitL01, V2UnitL01, V2UnitL01,
832                                               V2UnitL01, V2UnitL01, V2UnitL01,
833                                               V2UnitL01, V2UnitL01, V2UnitL01,
834                                               V2UnitV01, V2UnitV01, V2UnitV01,
835                                               V2UnitV01, V2UnitV01, V2UnitV01,
836                                               V2UnitV01, V2UnitV01, V2UnitV01,
837                                               V2UnitV01, V2UnitV01, V2UnitV01,
838                                               V2UnitV01, V2UnitV01, V2UnitV01,
839                                               V2UnitV01, V2UnitV01,
840                                               V2UnitV01]> {
841  let Latency     = 11;
842  let NumMicroOps = 36;
843}
844
845//===----------------------------------------------------------------------===//
846// Define generic 54 micro-op types
847
848def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
849                                                   V2UnitL01, V2UnitL01,
850                                                   V2UnitL01, V2UnitL01,
851                                                   V2UnitL01, V2UnitL01,
852                                                   V2UnitL01, V2UnitL01,
853                                                   V2UnitL01, V2UnitL01,
854                                                   V2UnitL01, V2UnitL01,
855                                                   V2UnitL01, V2UnitL01,
856                                                   V2UnitL01, V2UnitL01,
857                                                   V2UnitS, V2UnitS, V2UnitS,
858                                                   V2UnitS, V2UnitS, V2UnitS,
859                                                   V2UnitS, V2UnitS, V2UnitS,
860                                                   V2UnitS, V2UnitS, V2UnitS,
861                                                   V2UnitS, V2UnitS, V2UnitS,
862                                                   V2UnitS, V2UnitS, V2UnitS,
863                                                   V2UnitV01, V2UnitV01,
864                                                   V2UnitV01, V2UnitV01,
865                                                   V2UnitV01, V2UnitV01,
866                                                   V2UnitV01, V2UnitV01,
867                                                   V2UnitV01, V2UnitV01,
868                                                   V2UnitV01, V2UnitV01,
869                                                   V2UnitV01, V2UnitV01,
870                                                   V2UnitV01, V2UnitV01,
871                                                   V2UnitV01, V2UnitV01]> {
872  let Latency     = 11;
873  let NumMicroOps = 54;
874}
875
876//===----------------------------------------------------------------------===//
877// Define predicate-controlled types
878
879def V2Write_ArithI : SchedWriteVariant<[
880                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1I]>,
881                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
882
883def V2Write_ArithF : SchedWriteVariant<[
884                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1F]>,
885                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
886
887def V2Write_Logical : SchedWriteVariant<[
888                        SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>,
889                        SchedVar<NoSchedPred,   [V2Write_2cyc_1M]>]>;
890
891def V2Write_Extr : SchedWriteVariant<[
892                     SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>,
893                     SchedVar<NoSchedPred,       [V2Write_3cyc_1I_1M]>]>;
894
895def V2Write_LdrHQ : SchedWriteVariant<[
896                      SchedVar<NeoverseHQForm,  [V2Write_7cyc_1I_1L]>,
897                      SchedVar<NoSchedPred,     [V2Write_6cyc_1L]>]>;
898
899def V2Write_StrHQ : SchedWriteVariant<[
900                      SchedVar<NeoverseHQForm,  [V2Write_2cyc_1L01_1V01_1I]>,
901                      SchedVar<NoSchedPred,     [V2Write_2cyc_1L01_1V01]>]>;
902
903def V2Write_2or3cyc_1M : SchedWriteVariant<[
904                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M]>,
905                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M]>]>;
906
907def V2Write_3or4cyc_2M : SchedWriteVariant<[
908                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_2M]>,
909                      SchedVar<NoSchedPred,     [V2Write_3cyc_2M]>]>;
910
911def V2Write_1or2cyc_1M0 : SchedWriteVariant<[
912                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0]>,
913                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0]>]>;
914
915def V2Write_2or3cyc_1M0 : SchedWriteVariant<[
916                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M0]>,
917                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M0]>]>;
918
919def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[
920                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0_1M]>,
921                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0_1M]>]>;
922
923def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[
924                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_1M0_1M]>,
925                      SchedVar<NoSchedPred,     [V2Write_3cyc_1M0_1M]>]>;
926
927def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[
928                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_2M0_2M]>,
929                      SchedVar<NoSchedPred,     [V2Write_4cyc_2M0_2M]>]>;
930
931def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[
932                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_1V0_1M0]>,
933                      SchedVar<NoSchedPred,     [V2Write_4cyc_1V0_1M0]>]>;
934
935def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[
936                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1V0_1M]>,
937                      SchedVar<NoSchedPred,     [V2Write_2cyc_1V0_1M]>]>;
938
939def V2Write_IncDec : SchedWriteVariant<[
940                      SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>,
941                      SchedVar<NoSchedPred,         [V2Write_2cyc_1M]>]>;
942
943//===----------------------------------------------------------------------===//
944// Define forwarded types
945
946// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
947// consumers of 64 bit multiply high operations?
948def V2Wr_IM   : SchedWriteRes<[V2UnitM]>  { let Latency = 2; }
949def V2Wr_IMA  : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
950def V2Wr_IMUL : SchedWriteVariant<[
951                  SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
952                  SchedVar<NoSchedPred,    [V2Wr_IMA]>]>;
953def V2Rd_IMA  : SchedReadAdvance<1, [V2Wr_IMA]>;
954
955def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
956def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
957
958def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
959def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
960
961def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
962def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
963
964def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
965def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
966
967def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
968def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
969
970def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
971def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
972
973def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
974def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
975
976def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
977def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
978
979def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
980def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
981
982def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
983def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
984
985def V2Wr_VFM  : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
986def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
987def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
988
989def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
990def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
991
992def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
993def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
994def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
995def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
996def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
997def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
998
999def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
1000def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
1001
1002def V2Wr_ZA  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1003def V2Rd_ZA  : SchedReadAdvance<3, [V2Wr_ZA]>;
1004def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1005def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
1006def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1007def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
1008
1009def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
1010def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
1011def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1012def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
1013
1014// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
1015// throughput to 1 in case of forwarding?
1016def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1017def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
1018def V2Wr_ZCMAD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1019def V2Rd_ZCMAD   : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
1020
1021def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1022def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
1023
1024def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
1025def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
1026def V2Wr_ZMAD  : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1027def V2Rd_ZMAD  : SchedReadAdvance<2, [V2Wr_ZMAD]>;
1028
1029def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1030def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
1031
1032def V2Wr_ZMASQL   : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1033def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1034def V2Wr_ZMASQD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1035def V2Rd_ZMASQ    : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
1036                                         V2Wr_ZMASQD]>;
1037
1038def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1039def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
1040
1041def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1042def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
1043
1044def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1045def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
1046
1047def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1048def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
1049def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1050def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
1051def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1052def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
1053
1054//===----------------------------------------------------------------------===//
1055// Define types with long resource cycles (rc)
1056
1057def V2Write_6cyc_1V1_5rc    : SchedWriteRes<[V2UnitV1]>  { let Latency =  6; let ResourceCycles = [ 5]; }
1058def V2Write_7cyc_1V02_7rc   : SchedWriteRes<[V2UnitV02]> { let Latency =  7; let ResourceCycles = [ 7]; }
1059def V2Write_10cyc_1V02_5rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 5]; }
1060def V2Write_10cyc_1V02_9rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 9]; }
1061def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [10]; }
1062def V2Write_10cyc_1V0_9rc   : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; let ResourceCycles = [ 9]; }
1063def V2Write_10cyc_1V1_9rc   : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; let ResourceCycles = [ 9]; }
1064def V2Write_13cyc_1V0_12rc  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; let ResourceCycles = [12]; }
1065def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [12]; }
1066def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [13]; }
1067def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ResourceCycles = [14]; }
1068def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ResourceCycles = [15]; }
1069def V2Write_16cyc_1V0_14rc  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; let ResourceCycles = [14]; }
1070
1071// Miscellaneous
1072// -----------------------------------------------------------------------------
1073
1074def : InstRW<[WriteI], (instrs COPY)>;
1075
1076// §3.3 Branch instructions
1077// -----------------------------------------------------------------------------
1078
1079// Branch, immed
1080// Compare and branch
1081def : SchedAlias<WriteBr,    V2Write_1cyc_1B>;
1082
1083// Branch, register
1084def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>;
1085
1086// Branch and link, immed
1087// Branch and link, register
1088def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
1089
1090// §3.4 Arithmetic and Logical Instructions
1091// -----------------------------------------------------------------------------
1092
1093// ALU, basic
1094// ALU, basic, flagset
1095def : SchedAlias<WriteI,     V2Write_1cyc_1I>;
1096def : InstRW<[V2Write_1cyc_1F],
1097             (instregex "^(ADC|SBC)S[WX]r$")>;
1098
1099// ALU, extend and shift
1100def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
1101
1102// Arithmetic, LSL shift, shift <= 4
1103// Arithmetic, flagset, LSL shift, shift <= 4
1104// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
1105def : SchedAlias<WriteISReg, V2Write_ArithI>;
1106def : InstRW<[V2Write_ArithF],
1107             (instregex "^(ADD|SUB)S[WX]rs$")>;
1108
1109// Arithmetic, immediate to logical address tag
1110def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>;
1111
1112// Convert floating-point condition flags
1113// Flag manipulation instructions
1114def : WriteRes<WriteSys, []> { let Latency = 1; }
1115
1116// Insert Random Tags
1117def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
1118
1119// Insert Tag Mask
1120// Subtract Pointer
1121// Subtract Pointer, flagset
1122def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
1123
1124// Logical, shift, no flagset
1125def : InstRW<[V2Write_1cyc_1I],
1126             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
1127
1128// Logical, shift, flagset
1129def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
1130
1131// Move and shift instructions
1132// -----------------------------------------------------------------------------
1133
1134def : SchedAlias<WriteImm, V2Write_1cyc_1I>;
1135
1136// §3.5 Divide and multiply instructions
1137// -----------------------------------------------------------------------------
1138
1139// SDIV, UDIV
1140def : SchedAlias<WriteID32,  V2Write_12cyc_1M0>;
1141def : SchedAlias<WriteID64,  V2Write_20cyc_1M0>;
1142
1143def : SchedAlias<WriteIM32, V2Write_2cyc_1M>;
1144def : SchedAlias<WriteIM64, V2Write_2cyc_1M>;
1145
1146// Multiply
1147// Multiply accumulate, W-form
1148// Multiply accumulate, X-form
1149def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1150             (instregex "^M(ADD|SUB)[WX]rrr$")>;
1151
1152// Multiply accumulate long
1153// Multiply long
1154def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1155             (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1156
1157// Multiply high
1158def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
1159
1160// Pointer Authentication Instructions (v8.3 PAC)
1161// -----------------------------------------------------------------------------
1162
1163// Authenticate data address
1164// Authenticate instruction address
1165// Compute pointer authentication code for data address
1166// Compute pointer authentication code, using generic key
1167// Compute pointer authentication code for instruction address
1168def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
1169
1170// Branch and link, register, with pointer authentication
1171// Branch, register, with pointer authentication
1172// Branch, return, with pointer authentication
1173def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
1174                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
1175                                            ERETAA, ERETAB)>;
1176
1177
1178// Load register, with pointer authentication
1179def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
1180
1181// Strip pointer authentication code
1182def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
1183
1184// Miscellaneous data-processing instructions
1185// -----------------------------------------------------------------------------
1186
1187// Address generation
1188def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>;
1189
1190// Bitfield extract, one reg
1191// Bitfield extract, two regs
1192def : SchedAlias<WriteExtr, V2Write_Extr>;
1193def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
1194
1195// Bitfield move, basic
1196def : SchedAlias<WriteIS, V2Write_1cyc_1I>;
1197
1198// Bitfield move, insert
1199def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
1200
1201// Load instructions
1202// -----------------------------------------------------------------------------
1203
1204// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
1205
1206def : SchedAlias<WriteLD,    V2Write_4cyc_1L>;
1207def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>;
1208
1209// Load register, literal
1210def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
1211
1212// Load pair, signed immed offset, signed words
1213def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
1214
1215// Load pair, immed post-index or immed pre-index, signed words
1216def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr],
1217             (instregex "^LDPSW(post|pre)$")>;
1218
1219// Store instructions
1220// -----------------------------------------------------------------------------
1221
1222// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
1223
1224def : SchedAlias<WriteST,    V2Write_1cyc_1L01_1D>;
1225def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
1226def : SchedAlias<WriteSTP,   V2Write_1cyc_1L01_1D>;
1227def : SchedAlias<WriteAdr,   V2Write_1cyc_1I>; // copied from A57.
1228
1229// Tag load instructions
1230// -----------------------------------------------------------------------------
1231
1232// Load allocation tag
1233// Load multiple allocation tags
1234def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>;
1235
1236// Tag store instructions
1237// -----------------------------------------------------------------------------
1238
1239// Store allocation tags to one or two granules, post-index
1240// Store allocation tags to one or two granules, pre-index
1241// Store allocation tag to one or two granules, zeroing, post-index
1242// Store Allocation Tag to one or two granules, zeroing, pre-index
1243// Store allocation tag and reg pair to memory, post-Index
1244// Store allocation tag and reg pair to memory, pre-Index
1245def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
1246                                                ST2GPreIndex, ST2GPostIndex,
1247                                                STZGPreIndex, STZGPostIndex,
1248                                                STZ2GPreIndex, STZ2GPostIndex,
1249                                                STGPpre, STGPpost)>;
1250
1251// Store allocation tags to one or two granules, signed offset
1252// Store allocation tag to two granules, zeroing, signed offset
1253// Store allocation tag and reg pair to memory, signed offset
1254// Store multiple allocation tags
1255def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
1256                                             STZ2Gi, STGPi, STGM, STZGM)>;
1257
1258// FP data processing instructions
1259// -----------------------------------------------------------------------------
1260
1261// FP absolute value
1262// FP arithmetic
1263// FP min/max
1264// FP negate
1265// FP select
1266def : SchedAlias<WriteF,     V2Write_2cyc_1V>;
1267
1268// FP compare
1269def : SchedAlias<WriteFCmp,  V2Write_2cyc_1V0>;
1270
1271// FP divide, square root
1272def : SchedAlias<WriteFDiv,  V2Write_7cyc_1V02>;
1273
1274// FP divide, H-form
1275def : InstRW<[V2Write_7cyc_1V02],  (instrs FDIVHrr)>;
1276// FP divide, S-form
1277def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>;
1278// FP divide, D-form
1279def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>;
1280
1281// FP square root, H-form
1282def : InstRW<[V2Write_7cyc_1V02],  (instrs FSQRTHr)>;
1283// FP square root, S-form
1284def : InstRW<[V2Write_9cyc_1V02],  (instrs FSQRTSr)>;
1285// FP square root, D-form
1286def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>;
1287
1288// FP multiply
1289def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
1290
1291// FP multiply accumulate
1292def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
1293             (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
1294
1295// FP round to integral
1296def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
1297                                             "^FRINT(32|64)[XZ][SD]r$")>;
1298
1299// FP miscellaneous instructions
1300// -----------------------------------------------------------------------------
1301
1302// FP convert, from gen to vec reg
1303def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
1304
1305// FP convert, from vec to gen reg
1306def : InstRW<[V2Write_3cyc_1V01],
1307             (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
1308
1309// FP convert, Javascript from vec to gen reg
1310def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>;
1311
1312// FP convert, from vec to vec reg
1313def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
1314                                          FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
1315
1316// FP move, immed
1317// FP move, register
1318def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
1319
1320// FP transfer, from gen to low half of vec reg
1321def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
1322
1323// FP transfer, from gen to high half of vec reg
1324def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
1325
1326// FP transfer, from vec to gen reg
1327def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>;
1328
1329// FP load instructions
1330// -----------------------------------------------------------------------------
1331
1332// Load vector reg, literal, S/D/Q forms
1333def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>;
1334
1335// Load vector reg, unscaled immed
1336def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
1337
1338// Load vector reg, immed post-index
1339// Load vector reg, immed pre-index
1340def : InstRW<[V2Write_6cyc_1I_1L, WriteAdr],
1341             (instregex "^LDR[BHSDQ](pre|post)$")>;
1342
1343// Load vector reg, unsigned immed
1344def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
1345
1346// Load vector reg, register offset, basic
1347// Load vector reg, register offset, scale, S/D-form
1348// Load vector reg, register offset, scale, H/Q-form
1349// Load vector reg, register offset, extend
1350// Load vector reg, register offset, extend, scale, S/D-form
1351// Load vector reg, register offset, extend, scale, H/Q-form
1352def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
1353
1354// Load vector pair, immed offset, S/D-form
1355def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
1356
1357// Load vector pair, immed offset, Q-form
1358def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
1359
1360// Load vector pair, immed post-index, S/D-form
1361// Load vector pair, immed pre-index, S/D-form
1362def : InstRW<[V2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
1363             (instregex "^LDP[SD](pre|post)$")>;
1364
1365// Load vector pair, immed post-index, Q-form
1366// Load vector pair, immed pre-index, Q-form
1367def : InstRW<[V2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
1368                                                                LDPQpre)>;
1369
1370// FP store instructions
1371// -----------------------------------------------------------------------------
1372
1373// Store vector reg, unscaled immed, B/H/S/D-form
1374// Store vector reg, unscaled immed, Q-form
1375def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
1376
1377// Store vector reg, immed post-index, B/H/S/D-form
1378// Store vector reg, immed post-index, Q-form
1379// Store vector reg, immed pre-index, B/H/S/D-form
1380// Store vector reg, immed pre-index, Q-form
1381def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1382             (instregex "^STR[BHSDQ](pre|post)$")>;
1383
1384// Store vector reg, unsigned immed, B/H/S/D-form
1385// Store vector reg, unsigned immed, Q-form
1386def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
1387
1388// Store vector reg, register offset, basic, B/H/S/D-form
1389// Store vector reg, register offset, basic, Q-form
1390// Store vector reg, register offset, scale, H-form
1391// Store vector reg, register offset, scale, S/D-form
1392// Store vector reg, register offset, scale, Q-form
1393// Store vector reg, register offset, extend, B/H/S/D-form
1394// Store vector reg, register offset, extend, Q-form
1395// Store vector reg, register offset, extend, scale, H-form
1396// Store vector reg, register offset, extend, scale, S/D-form
1397// Store vector reg, register offset, extend, scale, Q-form
1398def : InstRW<[V2Write_StrHQ, ReadAdrBase],
1399             (instregex "^STR[BHSDQ]ro[WX]$")>;
1400
1401// Store vector pair, immed offset, S-form
1402// Store vector pair, immed offset, D-form
1403def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>;
1404
1405// Store vector pair, immed offset, Q-form
1406def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>;
1407
1408// Store vector pair, immed post-index, S-form
1409// Store vector pair, immed post-index, D-form
1410// Store vector pair, immed pre-index, S-form
1411// Store vector pair, immed pre-index, D-form
1412def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1413             (instregex "^STP[SD](pre|post)$")>;
1414
1415// Store vector pair, immed post-index, Q-form
1416def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>;
1417
1418// Store vector pair, immed pre-index, Q-form
1419def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>;
1420
1421// ASIMD integer instructions
1422// -----------------------------------------------------------------------------
1423
1424// ASIMD absolute diff
1425// ASIMD absolute diff long
1426// ASIMD arith, basic
1427// ASIMD arith, complex
1428// ASIMD arith, pair-wise
1429// ASIMD compare
1430// ASIMD logical
1431// ASIMD max/min, basic and pair-wise
1432def : SchedAlias<WriteVd, V2Write_2cyc_1V>;
1433def : SchedAlias<WriteVq, V2Write_2cyc_1V>;
1434
1435// ASIMD absolute diff accum
1436// ASIMD absolute diff accum long
1437def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
1438
1439// ASIMD arith, reduce, 4H/4S
1440def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1441
1442// ASIMD arith, reduce, 8B/8H
1443def : InstRW<[V2Write_4cyc_1V13_1V],
1444             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
1445
1446// ASIMD arith, reduce, 16B
1447def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1448
1449// ASIMD dot product
1450// ASIMD dot product using signed and unsigned integers
1451def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
1452             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1453
1454// ASIMD matrix multiply-accumulate
1455def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
1456
1457// ASIMD max/min, reduce, 4H/4S
1458def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1459                                             "^[SU](MAX|MIN)Vv4i32v$")>;
1460
1461// ASIMD max/min, reduce, 8B/8H
1462def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1463                                                "^[SU](MAX|MIN)Vv8i16v$")>;
1464
1465// ASIMD max/min, reduce, 16B
1466def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1467
1468// ASIMD multiply
1469def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1470
1471// ASIMD multiply accumulate
1472def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
1473
1474// ASIMD multiply accumulate high
1475def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1476
1477// ASIMD multiply accumulate long
1478def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1479
1480// ASIMD multiply accumulate saturating long
1481def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>;
1482
1483// ASIMD multiply/multiply long (8x8) polynomial, D-form
1484// ASIMD multiply/multiply long (8x8) polynomial, Q-form
1485def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
1486
1487// ASIMD multiply long
1488def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
1489
1490// ASIMD pairwise add and accumulate long
1491def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
1492
1493// ASIMD shift accumulate
1494def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
1495
1496// ASIMD shift by immed, basic
1497def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
1498                                             "^SSHLLv", "^SSHR[dv]", "^USHLLv",
1499                                             "^USHR[dv]")>;
1500
1501// ASIMD shift by immed and insert, basic
1502def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
1503
1504// ASIMD shift by immed, complex
1505def : InstRW<[V2Write_4cyc_1V13],
1506             (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
1507                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1508                        "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
1509                        "^UQSHRN[bhsv]", "^URSHR[dv]")>;
1510
1511// ASIMD shift by register, basic
1512def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>;
1513
1514// ASIMD shift by register, complex
1515def : InstRW<[V2Write_4cyc_1V13],
1516             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1517                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1518
1519// ASIMD floating-point instructions
1520// -----------------------------------------------------------------------------
1521
1522// ASIMD FP absolute value/difference
1523// ASIMD FP arith, normal
1524// ASIMD FP compare
1525// ASIMD FP complex add
1526// ASIMD FP max/min, normal
1527// ASIMD FP max/min, pairwise
1528// ASIMD FP negate
1529// Handled by SchedAlias<WriteV[dq], ...>
1530
1531// ASIMD FP complex multiply add
1532def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
1533
1534// ASIMD FP convert, long (F16 to F32)
1535def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>;
1536
1537// ASIMD FP convert, long (F32 to F64)
1538def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>;
1539
1540// ASIMD FP convert, narrow (F32 to F16)
1541def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>;
1542
1543// ASIMD FP convert, narrow (F64 to F32)
1544def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32",
1545                                             "^FCVTXN(v2|v4)f32")>;
1546
1547// ASIMD FP convert, other, D-form F32 and Q-form F64
1548def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1549                                             "^FCVT[AMNPZ][SU]v1i64$",
1550                                             "^FCVTZ[SU]d$",
1551                                             "^[SU]CVTFv2f(32|64)$",
1552                                             "^[SU]CVTFv1i64$",
1553                                             "^[SU]CVTFd$")>;
1554
1555// ASIMD FP convert, other, D-form F16 and Q-form F32
1556def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1557                                             "^FCVT[AMNPZ][SU]v1i32$",
1558                                             "^FCVTZ[SU]s$",
1559                                             "^[SU]CVTFv4f(16|32)$",
1560                                             "^[SU]CVTFv1i32$",
1561                                             "^[SU]CVTFs$")>;
1562
1563// ASIMD FP convert, other, Q-form F16
1564def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1565                                             "^FCVT[AMNPZ][SU]v1f16$",
1566                                             "^FCVTZ[SU]h$",
1567                                             "^[SU]CVTFv8f16$",
1568                                             "^[SU]CVTFv1i16$",
1569                                             "^[SU]CVTFh$")>;
1570
1571// ASIMD FP divide, D-form, F16
1572def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>;
1573
1574// ASIMD FP divide, D-form, F32
1575def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>;
1576
1577// ASIMD FP divide, Q-form, F16
1578def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>;
1579
1580// ASIMD FP divide, Q-form, F32
1581def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>;
1582
1583// ASIMD FP divide, Q-form, F64
1584def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>;
1585
1586// ASIMD FP max/min, reduce, F32 and D-form F16
1587def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1588
1589// ASIMD FP max/min, reduce, Q-form F16
1590def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1591
1592// ASIMD FP multiply
1593def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
1594
1595// ASIMD FP multiply accumulate
1596def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
1597
1598// ASIMD FP multiply accumulate long
1599def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
1600
1601// ASIMD FP round, D-form F32 and Q-form F64
1602def : InstRW<[V2Write_3cyc_1V02],
1603             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1604                        "^FRINT(32|64)[XZ]v2f(32|64)$")>;
1605
1606// ASIMD FP round, D-form F16 and Q-form F32
1607def : InstRW<[V2Write_4cyc_2V02],
1608             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1609                        "^FRINT(32|64)[XZ]v4f32$")>;
1610
1611// ASIMD FP round, Q-form F16
1612def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1613
1614// ASIMD FP square root, D-form, F16
1615def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>;
1616
1617// ASIMD FP square root, D-form, F32
1618def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>;
1619
1620// ASIMD FP square root, Q-form, F16
1621def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>;
1622
1623// ASIMD FP square root, Q-form, F32
1624def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>;
1625
1626// ASIMD FP square root, Q-form, F64
1627def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>;
1628
1629// ASIMD BFloat16 (BF16) instructions
1630// -----------------------------------------------------------------------------
1631
1632// ASIMD convert, F32 to BF16
1633def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>;
1634
1635// ASIMD dot product
1636def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1637
1638// ASIMD matrix multiply accumulate
1639def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
1640
1641// ASIMD multiply accumulate long
1642def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1643                                                 BFMLALTIdx)>;
1644
1645// Scalar convert, F32 to BF16
1646def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
1647
1648// ASIMD miscellaneous instructions
1649// -----------------------------------------------------------------------------
1650
1651// ASIMD bit reverse
1652// ASIMD bitwise insert
1653// ASIMD count
1654// ASIMD duplicate, element
1655// ASIMD extract
1656// ASIMD extract narrow
1657// ASIMD insert, element to element
1658// ASIMD move, FP immed
1659// ASIMD move, integer immed
1660// ASIMD reverse
1661// ASIMD table lookup extension, 1 table reg
1662// ASIMD transpose
1663// ASIMD unzip/zip
1664// Handled by SchedAlias<WriteV[dq], ...>
1665
1666// ASIMD duplicate, gen reg
1667def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1668
1669// ASIMD extract narrow, saturating
1670def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1671
1672// ASIMD reciprocal and square root estimate, D-form U32
1673def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
1674
1675// ASIMD reciprocal and square root estimate, Q-form U32
1676def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
1677
1678// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1679def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
1680                                          FRECPEv1i64, FRECPEv2f32,
1681                                          FRSQRTEv1f16, FRSQRTEv1i32,
1682                                          FRSQRTEv1i64, FRSQRTEv2f32)>;
1683
1684// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1685def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
1686                                          FRSQRTEv4f16, FRSQRTEv4f32)>;
1687
1688// ASIMD reciprocal and square root estimate, Q-form F16
1689def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1690
1691// ASIMD reciprocal exponent
1692def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>;
1693
1694// ASIMD reciprocal step
1695def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)",
1696                                           "^FRSQRTS(32|64|v)")>;
1697
1698// ASIMD table lookup, 1 or 2 table regs
1699def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One,
1700                                          TBLv8i8Two, TBLv16i8Two)>;
1701
1702// ASIMD table lookup, 3 table regs
1703def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
1704
1705// ASIMD table lookup, 4 table regs
1706def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1707
1708// ASIMD table lookup extension, 2 table reg
1709def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1710
1711// ASIMD table lookup extension, 3 table reg
1712def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1713
1714// ASIMD table lookup extension, 4 table reg
1715def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1716
1717// ASIMD transfer, element to gen reg
1718def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>;
1719
1720// ASIMD transfer, gen reg to element
1721def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1722
1723// ASIMD load instructions
1724// -----------------------------------------------------------------------------
1725
1726// ASIMD load, 1 element, multiple, 1 reg, D-form
1727def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1728def : InstRW<[V2Write_6cyc_1L, WriteAdr],
1729             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1730
1731// ASIMD load, 1 element, multiple, 1 reg, Q-form
1732def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1733def : InstRW<[V2Write_6cyc_1L, WriteAdr],
1734             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1735
1736// ASIMD load, 1 element, multiple, 2 reg, D-form
1737def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1738def : InstRW<[V2Write_6cyc_2L, WriteAdr],
1739             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1740
1741// ASIMD load, 1 element, multiple, 2 reg, Q-form
1742def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1743def : InstRW<[V2Write_6cyc_2L, WriteAdr],
1744             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1745
1746// ASIMD load, 1 element, multiple, 3 reg, D-form
1747def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1748def : InstRW<[V2Write_6cyc_3L, WriteAdr],
1749             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1750
1751// ASIMD load, 1 element, multiple, 3 reg, Q-form
1752def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1753def : InstRW<[V2Write_6cyc_3L, WriteAdr],
1754             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1755
1756// ASIMD load, 1 element, multiple, 4 reg, D-form
1757def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1758def : InstRW<[V2Write_7cyc_4L, WriteAdr],
1759             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1760
1761// ASIMD load, 1 element, multiple, 4 reg, Q-form
1762def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1763def : InstRW<[V2Write_7cyc_4L, WriteAdr],
1764             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1765
1766// ASIMD load, 1 element, one lane, B/H/S
1767// ASIMD load, 1 element, one lane, D
1768def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1769def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
1770
1771// ASIMD load, 1 element, all lanes, D-form, B/H/S
1772// ASIMD load, 1 element, all lanes, D-form, D
1773def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1774def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1775
1776// ASIMD load, 1 element, all lanes, Q-form
1777def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1778def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1779
1780// ASIMD load, 2 element, multiple, D-form, B/H/S
1781def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1782def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1783
1784// ASIMD load, 2 element, multiple, Q-form, B/H/S
1785// ASIMD load, 2 element, multiple, Q-form, D
1786def : InstRW<[V2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1787def : InstRW<[V2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1788
1789// ASIMD load, 2 element, one lane, B/H
1790// ASIMD load, 2 element, one lane, S
1791// ASIMD load, 2 element, one lane, D
1792def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1793def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
1794
1795// ASIMD load, 2 element, all lanes, D-form, B/H/S
1796// ASIMD load, 2 element, all lanes, D-form, D
1797def : InstRW<[V2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1798def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1799
1800// ASIMD load, 2 element, all lanes, Q-form
1801def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1802def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1803
1804// ASIMD load, 3 element, multiple, D-form, B/H/S
1805def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1806def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1807
1808// ASIMD load, 3 element, multiple, Q-form, B/H/S
1809// ASIMD load, 3 element, multiple, Q-form, D
1810def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s|2d)$")>;
1811def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
1812
1813// ASIMD load, 3 element, one lane, B/H
1814// ASIMD load, 3 element, one lane, S
1815// ASIMD load, 3 element, one lane, D
1816def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1817def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
1818
1819// ASIMD load, 3 element, all lanes, D-form, B/H/S
1820// ASIMD load, 3 element, all lanes, D-form, D
1821def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1822def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1823
1824// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1825// ASIMD load, 3 element, all lanes, Q-form, D
1826def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1827def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1828
1829// ASIMD load, 4 element, multiple, D-form, B/H/S
1830def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1831def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1832
1833// ASIMD load, 4 element, multiple, Q-form, B/H/S
1834// ASIMD load, 4 element, multiple, Q-form, D
1835def : InstRW<[V2Write_9cyc_6L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1836def : InstRW<[V2Write_9cyc_6L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1837
1838// ASIMD load, 4 element, one lane, B/H
1839// ASIMD load, 4 element, one lane, S
1840// ASIMD load, 4 element, one lane, D
1841def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1842def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
1843
1844// ASIMD load, 4 element, all lanes, D-form, B/H/S
1845// ASIMD load, 4 element, all lanes, D-form, D
1846def : InstRW<[V2Write_8cyc_3L_4V],              (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1847def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr],    (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1848
1849// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1850// ASIMD load, 4 element, all lanes, Q-form, D
1851def : InstRW<[V2Write_8cyc_4L_4V],            (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1852def : InstRW<[V2Write_8cyc_4L_4V, WriteAdr],  (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1853
1854// ASIMD store instructions
1855// -----------------------------------------------------------------------------
1856
1857// ASIMD store, 1 element, multiple, 1 reg, D-form
1858def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1859def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1860
1861// ASIMD store, 1 element, multiple, 1 reg, Q-form
1862def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1863def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1864
1865// ASIMD store, 1 element, multiple, 2 reg, D-form
1866def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1867def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1868
1869// ASIMD store, 1 element, multiple, 2 reg, Q-form
1870def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1871def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1872
1873// ASIMD store, 1 element, multiple, 3 reg, D-form
1874def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1875def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1876
1877// ASIMD store, 1 element, multiple, 3 reg, Q-form
1878def : InstRW<[V2Write_2cyc_3L01_3V01],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1879def : InstRW<[V2Write_2cyc_3L01_3V01, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1880
1881// ASIMD store, 1 element, multiple, 4 reg, D-form
1882def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1883def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1884
1885// ASIMD store, 1 element, multiple, 4 reg, Q-form
1886def : InstRW<[V2Write_2cyc_4L01_4V01],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1887def : InstRW<[V2Write_2cyc_4L01_4V01, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1888
1889// ASIMD store, 1 element, one lane, B/H/S
1890// ASIMD store, 1 element, one lane, D
1891def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST1i(8|16|32|64)$")>;
1892def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
1893
1894// ASIMD store, 2 element, multiple, D-form, B/H/S
1895def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2Twov(8b|4h|2s)$")>;
1896def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1897
1898// ASIMD store, 2 element, multiple, Q-form, B/H/S
1899// ASIMD store, 2 element, multiple, Q-form, D
1900def : InstRW<[V2Write_4cyc_2L01_4V01],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1901def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1902
1903// ASIMD store, 2 element, one lane, B/H/S
1904// ASIMD store, 2 element, one lane, D
1905def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2i(8|16|32|64)$")>;
1906def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
1907
1908// ASIMD store, 3 element, multiple, D-form, B/H/S
1909def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3Threev(8b|4h|2s)$")>;
1910def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1911
1912// ASIMD store, 3 element, multiple, Q-form, B/H/S
1913// ASIMD store, 3 element, multiple, Q-form, D
1914def : InstRW<[V2Write_6cyc_3L01_6V01],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1915def : InstRW<[V2Write_6cyc_3L01_6V01, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1916
1917// ASIMD store, 3 element, one lane, B/H
1918// ASIMD store, 3 element, one lane, S
1919// ASIMD store, 3 element, one lane, D
1920def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3i(8|16|32|64)$")>;
1921def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
1922
1923// ASIMD store, 4 element, multiple, D-form, B/H/S
1924def : InstRW<[V2Write_6cyc_2L01_6V01],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1925def : InstRW<[V2Write_6cyc_2L01_6V01, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1926
1927// ASIMD store, 4 element, multiple, Q-form, B/H/S
1928def : InstRW<[V2Write_7cyc_4L01_12V01],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1929def : InstRW<[V2Write_7cyc_4L01_12V01, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1930
1931// ASIMD store, 4 element, multiple, Q-form, D
1932def : InstRW<[V2Write_5cyc_4L01_8V01],           (instregex "ST4Fourv(2d)$")>;
1933def : InstRW<[V2Write_5cyc_4L01_8V01, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
1934
1935// ASIMD store, 4 element, one lane, B/H/S
1936def : InstRW<[V2Write_6cyc_1L01_3V01],           (instregex "ST4i(8|16|32)$")>;
1937def : InstRW<[V2Write_6cyc_1L01_3V01, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
1938
1939// ASIMD store, 4 element, one lane, D
1940def : InstRW<[V2Write_4cyc_2L01_4V01],            (instregex "ST4i(64)$")>;
1941def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr],  (instregex "ST4i(64)_POST$")>;
1942
1943// Cryptography extensions
1944// -----------------------------------------------------------------------------
1945
1946// Crypto AES ops
1947def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1948
1949// Crypto polynomial (64x64) multiply long
1950def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>;
1951
1952// Crypto SHA1 hash acceleration op
1953// Crypto SHA1 schedule acceleration ops
1954def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1955
1956// Crypto SHA1 hash acceleration ops
1957// Crypto SHA256 hash acceleration ops
1958def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1959
1960// Crypto SHA256 schedule acceleration ops
1961def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1962
1963// Crypto SHA512 hash acceleration ops
1964def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1965
1966// Crypto SHA3 ops
1967def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1968
1969// Crypto SM3 ops
1970def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1971                                            "^SM3TT[12][AB]$")>;
1972
1973// Crypto SM4 ops
1974def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1975
1976// CRC
1977// -----------------------------------------------------------------------------
1978
1979def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
1980
1981// SVE Predicate instructions
1982// -----------------------------------------------------------------------------
1983
1984// Loop control, based on predicate
1985def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1986                                           BRKB_PPmP, BRKB_PPzP)>;
1987
1988// Loop control, based on predicate and flag setting
1989def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1990
1991// Loop control, propagating
1992def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
1993                                            BRKPB_PPzPP)>;
1994
1995// Loop control, propagating and flag setting
1996def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1997                                               BRKPBS_PPzPP)>;
1998
1999// Loop control, based on GPR
2000def : InstRW<[V2Write_3cyc_2M],
2001             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
2002def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
2003
2004// Loop terminate
2005def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
2006
2007// Predicate counting scalar
2008def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
2009def : InstRW<[V2Write_2cyc_1M],
2010             (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
2011                        "^SQ(DEC|INC)[BHWD]_XPiWdI",
2012                        "^UQ(DEC|INC)[BHWD]_WPiI")>;
2013
2014// Predicate counting scalar, ALL, {1,2,4}
2015def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
2016
2017// Predicate counting scalar, active predicate
2018def : InstRW<[V2Write_2cyc_1M],
2019             (instregex "^CNTP_XPP_[BHSD]",
2020                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
2021                        "^(UQDEC|UQINC)P_WP_[BHSD]",
2022                        "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
2023
2024// Predicate counting vector, active predicate
2025def : InstRW<[V2Write_7cyc_1M_1M0_1V],
2026             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
2027
2028// Predicate logical
2029def : InstRW<[V2Write_1or2cyc_1M0],
2030             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
2031
2032// Predicate logical, flag setting
2033def : InstRW<[V2Write_1or2cyc_1M0_1M],
2034             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
2035
2036// Predicate reverse
2037def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
2038
2039// Predicate select
2040def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
2041
2042// Predicate set
2043def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
2044
2045// Predicate set/initialize, set flags
2046def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
2047
2048// Predicate find first/next
2049def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
2050
2051// Predicate test
2052def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
2053
2054// Predicate transpose
2055def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
2056
2057// Predicate unpack and widen
2058def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
2059
2060// Predicate zip/unzip
2061def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
2062
2063// SVE integer instructions
2064// -----------------------------------------------------------------------------
2065
2066// Arithmetic, absolute diff
2067def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
2068                                           "^[SU]ABD_ZPZZ_[BHSD]")>;
2069
2070// Arithmetic, absolute diff accum
2071def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
2072
2073// Arithmetic, absolute diff accum long
2074def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
2075
2076// Arithmetic, absolute diff long
2077def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
2078
2079// Arithmetic, basic
2080def : InstRW<[V2Write_2cyc_1V],
2081             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2082                        "^(ADD|SUB)_ZZZ_[BHSD]",
2083                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
2084                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
2085                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
2086                        "^ADR_LSL_ZZZ_[SD]_[0123]",
2087                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
2088                        "^SADDLBT_ZZZ_[HSD]",
2089                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
2090                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
2091
2092// Arithmetic, complex
2093def : InstRW<[V2Write_2cyc_1V],
2094             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
2095                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2096                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
2097                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
2098                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
2099                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
2100
2101// Arithmetic, large integer
2102def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
2103
2104// Arithmetic, pairwise add
2105def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
2106
2107// Arithmetic, pairwise add and accum long
2108def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
2109             (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
2110
2111// Arithmetic, shift
2112def : InstRW<[V2Write_2cyc_1V13],
2113             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
2114                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
2115                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
2116                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
2117                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
2118                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
2119                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
2120
2121// Arithmetic, shift and accumulate
2122def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
2123
2124// Arithmetic, shift by immediate
2125def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
2126                                             "^[SU]SHLL[BT]_ZZI_[HSD]")>;
2127
2128// Arithmetic, shift by immediate and insert
2129def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
2130
2131// Arithmetic, shift complex
2132def : InstRW<[V2Write_4cyc_1V13],
2133             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
2134                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
2135                        "^[SU]QR?SHL_ZPZZ_[BHSD]",
2136                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
2137                        "^SQSHRU?N[BT]_ZZI_[BHS]",
2138                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
2139
2140// Arithmetic, shift right for divide
2141def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
2142
2143// Arithmetic, shift rounding
2144def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
2145                                             "^[SU]RSHL_ZPZZ_[BHSD]",
2146                                             "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
2147
2148// Bit manipulation
2149def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
2150
2151// Bitwise select
2152def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
2153
2154// Count/reverse bits
2155def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
2156
2157// Broadcast logical bitmask immediate to vector
2158def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
2159
2160// Compare and set flags
2161def : InstRW<[V2Write_4or5cyc_1V0_1M0],
2162             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
2163                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
2164
2165// Complex add
2166def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
2167
2168// Complex dot product 8-bit element
2169def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
2170
2171// Complex dot product 16-bit element
2172def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
2173
2174// Complex multiply-add B, H, S element size
2175def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
2176                                                      "^CMLA_ZZZI_[HS]")>;
2177
2178// Complex multiply-add D element size
2179def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
2180
2181// Conditional extract operations, scalar form
2182def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
2183
2184// Conditional extract operations, SIMD&FP scalar and vector forms
2185def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
2186                                            "^COMPACT_ZPZ_[SD]",
2187                                            "^SPLICE_ZPZZ?_[BHSD]")>;
2188
2189// Convert to floating point, 64b to float or convert to double
2190def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
2191                                             "^[SU]CVTF_ZPmZ_StoD")>;
2192
2193// Convert to floating point, 32b to single or half
2194def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
2195
2196// Convert to floating point, 16b to half
2197def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
2198
2199// Copy, scalar
2200def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
2201
2202// Copy, scalar SIMD&FP or imm
2203def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
2204                                           "^CPY_ZPzI_[BHSD]")>;
2205
2206// Divides, 32 bit
2207def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
2208                                             "^[SU]DIV_ZPZZ_S")>;
2209
2210// Divides, 64 bit
2211def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
2212                                             "^[SU]DIV_ZPZZ_D")>;
2213
2214// Dot product, 8 bit
2215def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
2216
2217// Dot product, 8 bit, using signed and unsigned integers
2218def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
2219
2220// Dot product, 16 bit
2221def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
2222
2223// Duplicate, immediate and indexed form
2224def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
2225                                           "^DUP_ZZI_[BHSDQ]")>;
2226
2227// Duplicate, scalar form
2228def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
2229
2230// Extend, sign or zero
2231def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
2232                                             "^[SU]XTH_ZPmZ_[SD]",
2233                                             "^[SU]XTW_ZPmZ_[D]")>;
2234
2235// Extract
2236def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
2237
2238// Extract narrow saturating
2239def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
2240                                             "^SQXTUN[BT]_ZZ_[BHS]")>;
2241
2242// Extract/insert operation, SIMD and FP scalar form
2243def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
2244                                            "^INSR_ZV_[BHSD]")>;
2245
2246// Extract/insert operation, scalar
2247def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
2248                                                "^INSR_ZR_[BHSD]")>;
2249
2250// Histogram operations
2251def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
2252                                           "^HISTSEG_ZZZ")>;
2253
2254// Horizontal operations, B, H, S form, immediate operands only
2255def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
2256
2257// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
2258// operands only / immediate, scalar operands
2259def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
2260
2261// Horizontal operations, D form, immediate operands only
2262def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
2263
2264// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
2265// only / immediate, scalar operands
2266def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
2267
2268// Logical
2269def : InstRW<[V2Write_2cyc_1V],
2270             (instregex "^(AND|EOR|ORR)_ZI",
2271                        "^(AND|BIC|EOR|ORR)_ZZZ",
2272                        "^EOR(BT|TB)_ZZZ_[BHSD]",
2273                        "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
2274                        "^NOT_ZPmZ_[BHSD]")>;
2275
2276// Max/min, basic and pairwise
2277def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
2278                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
2279                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
2280
2281// Matching operations
2282// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
2283// latency for this instruction is 4 cycles.
2284def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
2285
2286// Matrix multiply-accumulate
2287def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
2288
2289// Move prefix
2290def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
2291                                           "^MOVPRFX_ZZ")>;
2292
2293// Multiply, B, H, S element size
2294def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
2295                                             "^MUL_ZPZZ_[BHS]",
2296                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
2297                                             "^[SU]MULH_ZPZZ_[BHS]")>;
2298
2299// Multiply, D element size
2300def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
2301                                             "^MUL_ZPZZ_D",
2302                                             "^[SU]MULH_(ZPmZ|ZZZ)_D",
2303                                             "^[SU]MULH_ZPZZ_D")>;
2304
2305// Multiply long
2306def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
2307                                             "^[SU]MULL[BT]_ZZZ_[HSD]")>;
2308
2309// Multiply accumulate, B, H, S element size
2310def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
2311             (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
2312def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
2313             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
2314
2315// Multiply accumulate, D element size
2316def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
2317             (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
2318def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
2319             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
2320
2321// Multiply accumulate long
2322def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
2323                                                "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
2324
2325// Multiply accumulate saturating doubling long regular
2326def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
2327             (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
2328                        "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
2329
2330// Multiply saturating doubling high, B, H, S element size
2331def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
2332                                             "^SQDMULH_ZZZI_[HS]")>;
2333
2334// Multiply saturating doubling high, D element size
2335def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
2336
2337// Multiply saturating doubling long
2338def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
2339                                             "^SQDMULL[BT]_ZZZI_[SD]")>;
2340
2341// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
2342// element size
2343def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
2344                                                     "^SQRDCMLAH_ZZZ_[BHS]",
2345                                                     "^SQRDML[AS]H_ZZZI_[HS]",
2346                                                     "^SQRDCMLAH_ZZZI_[HS]")>;
2347
2348// Multiply saturating rounding doubling regular/complex accumulate, D element
2349// size
2350def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
2351                                                   "^SQRDCMLAH_ZZZ_D")>;
2352
2353// Multiply saturating rounding doubling regular/complex, B, H, S element size
2354def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
2355                                             "^SQRDMULH_ZZZI_[HS]")>;
2356
2357// Multiply saturating rounding doubling regular/complex, D element size
2358def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
2359
2360// Multiply/multiply long, (8x8) polynomial
2361def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
2362                                             "^PMULL[BT]_ZZZ_[HDQ]")>;
2363
2364// Predicate counting vector
2365def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
2366
2367// Reciprocal estimate
2368def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
2369
2370// Reduction, arithmetic, B form
2371def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
2372
2373// Reduction, arithmetic, H form
2374def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
2375
2376// Reduction, arithmetic, S form
2377def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
2378
2379// Reduction, arithmetic, D form
2380def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
2381
2382// Reduction, logical
2383def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
2384
2385// Reverse, vector
2386def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
2387                                           "^REVB_ZPmZ_[HSD]",
2388                                           "^REVH_ZPmZ_[SD]",
2389                                           "^REVW_ZPmZ_D")>;
2390
2391// Select, vector form
2392def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
2393
2394// Table lookup
2395def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
2396
2397// Table lookup extension
2398def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
2399
2400// Transpose, vector form
2401def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
2402
2403// Unpack and extend
2404def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
2405
2406// Zip/unzip
2407def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
2408
2409// SVE floating-point instructions
2410// -----------------------------------------------------------------------------
2411
2412// Floating point absolute value/difference
2413def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
2414                                           "^FABD_ZPZZ_[HSD]",
2415                                           "^FABS_ZPmZ_[HSD]")>;
2416
2417// Floating point arithmetic
2418def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
2419                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
2420                                           "^FADDP_ZPmZZ_[HSD]",
2421                                           "^FNEG_ZPmZ_[HSD]",
2422                                           "^FSUBR_ZPm[IZ]_[HSD]",
2423                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
2424
2425// Floating point associative add, F16
2426def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
2427
2428// Floating point associative add, F32
2429def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
2430
2431// Floating point associative add, F64
2432def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
2433
2434// Floating point compare
2435def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
2436                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
2437                                            "^FCM(LE|LT)_PPzZ0_[HSD]",
2438                                            "^FCMUO_PPzZZ_[HSD]")>;
2439
2440// Floating point complex add
2441def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
2442
2443// Floating point complex multiply add
2444def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2445def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
2446
2447// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
2448def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
2449                                             "^FCVTLT_ZPmZ_HtoS",
2450                                             "^FCVTNT_ZPmZ_StoH")>;
2451
2452// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
2453// or F64 to F16)
2454def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
2455                                             "^FCVTLT_ZPmZ_StoD",
2456                                             "^FCVTNT_ZPmZ_DtoS")>;
2457
2458// Floating point convert, round to odd
2459def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
2460
2461// Floating point base2 log, F16
2462def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
2463
2464// Floating point base2 log, F32
2465def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
2466
2467// Floating point base2 log, F64
2468def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
2469
2470// Floating point convert to integer, F16
2471def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
2472
2473// Floating point convert to integer, F32
2474def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
2475
2476// Floating point convert to integer, F64
2477def : InstRW<[V2Write_3cyc_1V02],
2478             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
2479
2480// Floating point copy
2481def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
2482                                           "^FDUP_ZI_[HSD]")>;
2483
2484// Floating point divide, F16
2485def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
2486
2487// Floating point divide, F32
2488def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
2489
2490// Floating point divide, F64
2491def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
2492
2493// Floating point min/max pairwise
2494def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2495
2496// Floating point min/max
2497def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2498                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2499
2500// Floating point multiply
2501def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2502                                           "^FMULX_ZPZZ_[HSD]",
2503                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2504                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
2505
2506// Floating point multiply accumulate
2507def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
2508             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2509                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2510def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
2511             (instregex "^FML[AS]_ZZZI_[HSD]",
2512                        "^FN?ML[AS]_ZPZZZ_[HSD]")>;
2513
2514// Floating point multiply add/sub accumulate long
2515def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
2516
2517// Floating point reciprocal estimate, F16
2518def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2519
2520// Floating point reciprocal estimate, F32
2521def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2522
2523// Floating point reciprocal estimate, F64
2524def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2525
2526// Floating point reciprocal step
2527def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
2528
2529// Floating point reduction, F16
2530def : InstRW<[V2Write_8cyc_4V],
2531             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
2532
2533// Floating point reduction, F32
2534def : InstRW<[V2Write_6cyc_3V],
2535             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
2536
2537// Floating point reduction, F64
2538def : InstRW<[V2Write_4cyc_2V],
2539             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
2540
2541// Floating point round to integral, F16
2542def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2543
2544// Floating point round to integral, F32
2545def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2546
2547// Floating point round to integral, F64
2548def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2549
2550// Floating point square root, F16
2551def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
2552
2553// Floating point square root, F32
2554def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
2555
2556// Floating point square root, F64
2557def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
2558
2559// Floating point trigonometric exponentiation
2560def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
2561
2562// Floating point trigonometric multiply add
2563def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
2564
2565// Floating point trigonometric, miscellaneous
2566def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
2567
2568// SVE BFloat16 (BF16) instructions
2569// -----------------------------------------------------------------------------
2570
2571// Convert, F32 to BF16
2572def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2573
2574// Dot product
2575def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2576
2577// Matrix multiply accumulate
2578def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
2579
2580// Multiply accumulate long
2581def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
2582
2583// SVE Load instructions
2584// -----------------------------------------------------------------------------
2585
2586// Load vector
2587def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
2588
2589// Load predicate
2590def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2591
2592// Contiguous load, scalar + imm
2593def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
2594                                           "^LD1S?B_[HSD]_IMM_REAL$",
2595                                           "^LD1S?H_[SD]_IMM_REAL$",
2596                                           "^LD1S?W_D_IMM_REAL$" )>;
2597// Contiguous load, scalar + scalar
2598def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
2599                                           "^LD1S?B_[HSD]$",
2600                                           "^LD1S?H_[SD]$",
2601                                           "^LD1S?W_D$" )>;
2602
2603// Contiguous load broadcast, scalar + imm
2604def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2605                                           "^LD1RS?B_[HSD]_IMM$",
2606                                           "^LD1RS?H_[SD]_IMM$",
2607                                           "^LD1RW_D_IMM$",
2608                                           "^LD1RSW_IMM$",
2609                                           "^LD1RQ_[BHWD]_IMM$")>;
2610
2611// Contiguous load broadcast, scalar + scalar
2612def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2613
2614// Non temporal load, scalar + imm
2615// Non temporal load, scalar + scalar
2616def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
2617
2618// Non temporal gather load, vector + scalar 32-bit element size
2619def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2620                                              "^LDNT1S[BH]_ZZR_S_REAL$")>;
2621
2622// Non temporal gather load, vector + scalar 64-bit element size
2623def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2624def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2625
2626// Contiguous first faulting load, scalar + scalar
2627def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2628                                              "^LDFF1S?B_[HSD]_REAL$",
2629                                              "^LDFF1S?H_[SD]_REAL$",
2630                                              "^LDFF1S?W_D_REAL$")>;
2631
2632// Contiguous non faulting load, scalar + imm
2633def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2634                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
2635                                           "^LDNF1S?H_[SD]_IMM_REAL$",
2636                                           "^LDNF1S?W_D_IMM_REAL$")>;
2637
2638// Contiguous Load two structures to two vectors, scalar + imm
2639def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
2640
2641// Contiguous Load two structures to two vectors, scalar + scalar
2642def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
2643
2644// Contiguous Load three structures to three vectors, scalar + imm
2645def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
2646
2647// Contiguous Load three structures to three vectors, scalar + scalar
2648def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
2649
2650// Contiguous Load four structures to four vectors, scalar + imm
2651def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
2652
2653// Contiguous Load four structures to four vectors, scalar + scalar
2654def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
2655
2656// Gather load, vector + imm, 32-bit element size
2657def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2658                                              "^GLD(FF)?1W_IMM_REAL$")>;
2659
2660// Gather load, vector + imm, 64-bit element size
2661def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2662                                              "^GLD(FF)?1D_IMM_REAL$")>;
2663
2664// Gather load, 32-bit scaled offset
2665def : InstRW<[V2Write_10cyc_1L_8V],
2666             (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$",
2667                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2668
2669// Gather load, 64-bit scaled offset
2670// NOTE: These instructions are not specified in the SOG.
2671def : InstRW<[V2Write_10cyc_1L_4V],
2672             (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$",
2673                        "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>;
2674
2675// Gather load, 32-bit unpacked unscaled offset
2676def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2677                                              "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2678
2679// Gather load, 64-bit unpacked unscaled offset
2680// NOTE: These instructions are not specified in the SOG.
2681def : InstRW<[V2Write_9cyc_1L_2V],
2682             (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$",
2683                        "^GLD(FF)?1D_([SU]XTW_)?REAL$")>;
2684
2685// SVE Store instructions
2686// -----------------------------------------------------------------------------
2687
2688// Store from predicate reg
2689def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>;
2690
2691// Store from vector reg
2692def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>;
2693
2694// Contiguous store, scalar + imm
2695def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
2696                                                  "^ST1B_[HSD]_IMM$",
2697                                                  "^ST1H_[SD]_IMM$",
2698                                                  "^ST1W_D_IMM$")>;
2699
2700// Contiguous store, scalar + scalar
2701def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
2702def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$",
2703                                                  "^ST1B_[HSD]$",
2704                                                  "^ST1W_D$")>;
2705
2706// Contiguous store two structures from two vectors, scalar + imm
2707def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
2708
2709// Contiguous store two structures from two vectors, scalar + scalar
2710def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>;
2711def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>;
2712
2713// Contiguous store three structures from three vectors, scalar + imm
2714def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
2715
2716// Contiguous store three structures from three vectors, scalar + scalar
2717def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
2718
2719// Contiguous store four structures from four vectors, scalar + imm
2720def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
2721
2722// Contiguous store four structures from four vectors, scalar + scalar
2723def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
2724
2725// Non temporal store, scalar + imm
2726def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2727
2728// Non temporal store, scalar + scalar
2729def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2730def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2731
2732// Scatter non temporal store, vector + scalar 32-bit element size
2733def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
2734
2735// Scatter non temporal store, vector + scalar 64-bit element size
2736def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
2737
2738// Scatter store vector + imm 32-bit element size
2739def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
2740                                                  "^SST1W_IMM$")>;
2741
2742// Scatter store vector + imm 64-bit element size
2743def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
2744                                                  "^SST1D_IMM$")>;
2745
2746// Scatter store, 32-bit scaled offset
2747def : InstRW<[V2Write_4cyc_4L01_4V01],
2748             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2749
2750// Scatter store, 32-bit unpacked unscaled offset
2751def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
2752                                                  "^SST1D_[SU]XTW$")>;
2753
2754// Scatter store, 32-bit unpacked scaled offset
2755def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2756                                                  "^SST1D_[SU]XTW_SCALED$")>;
2757
2758// Scatter store, 32-bit unscaled offset
2759def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
2760                                                  "^SST1W_[SU]XTW$")>;
2761
2762// Scatter store, 64-bit scaled offset
2763def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
2764                                                  "^SST1D_SCALED$")>;
2765
2766// Scatter store, 64-bit unscaled offset
2767def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
2768                                                  "^SST1D$")>;
2769
2770// SVE Miscellaneous instructions
2771// -----------------------------------------------------------------------------
2772
2773// Read first fault register, unpredicated
2774def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2775
2776// Read first fault register, predicated
2777def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2778
2779// Read first fault register and set flags
2780def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2781
2782// Set first fault register
2783// Write to first fault register
2784def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2785
2786// Prefetch
2787// NOTE: This is not specified in the SOG.
2788def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2789
2790// SVE Cryptographic instructions
2791// -----------------------------------------------------------------------------
2792
2793// Crypto AES ops
2794def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2795                                           "^AESI?MC_ZZ_B$")>;
2796
2797// Crypto SHA3 ops
2798def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2799                                            "^RAX1_ZZZ_D$",
2800                                            "^XAR_ZZZI_[BHSD]$")>;
2801
2802// Crypto SM4 ops
2803def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2804
2805}
2806