xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse N2 processors.
10//
11//===----------------------------------------------------------------------===//
12
13def NeoverseN2Model : SchedMachineModel {
14  let IssueWidth            =  10; // Micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer.
16  let LoadLatency           =   4; // Optimistic load latency.
17  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.
18  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
19  let CompleteModel         =   1;
20
21  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
22    [HasSVE2p1, HasPAuthLR, HasCPA]);
23}
24
25//===----------------------------------------------------------------------===//
26// Define each kind of processor resource and number available on Neoverse N2.
27// Instructions are first fetched and then decoded into internal macro-ops
28// (MOPs). From there, the MOPs proceed through register renaming and dispatch
29// stages. A MOP can be split into two micro-ops further down the pipeline
30// after the decode stage. Once dispatched, micro-ops wait for their operands
31// and issue out-of-order to one of thirteen issue pipelines. Each issue
32// pipeline can accept one micro-op per cycle.
33
34let SchedModel = NeoverseN2Model in {
35
36// Define the (13) issue ports.
37def N2UnitB   : ProcResource<2>;  // Branch 0/1
38def N2UnitS   : ProcResource<2>;  // Integer single Cycle 0/1
39def N2UnitM0  : ProcResource<1>;  // Integer multicycle 0
40def N2UnitM1  : ProcResource<1>;  // Integer multicycle 1
41def N2UnitL01 : ProcResource<2>;  // Load/Store 0/1
42def N2UnitL2  : ProcResource<1>;  // Load 2
43def N2UnitD   : ProcResource<2>;  // Store data 0/1
44def N2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
45def N2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
46
47def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>;  // FP/ASIMD 0/1
48def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>;  // Integer single/multicycle 0/1
49def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
50def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
51
52// Define commonly used read types.
53
54// No forwarding is provided for these types.
55def : ReadAdvance<ReadI,       0>;
56def : ReadAdvance<ReadISReg,   0>;
57def : ReadAdvance<ReadIEReg,   0>;
58def : ReadAdvance<ReadIM,      0>;
59def : ReadAdvance<ReadIMA,     0>;
60def : ReadAdvance<ReadID,      0>;
61def : ReadAdvance<ReadExtrHi,  0>;
62def : ReadAdvance<ReadAdrBase, 0>;
63def : ReadAdvance<ReadST,      0>;
64def : ReadAdvance<ReadVLD,     0>;
65
66def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
67def : WriteRes<WriteBarrier, []> { let Latency = 1; }
68def : WriteRes<WriteHint,    []> { let Latency = 1; }
69def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
70
71//===----------------------------------------------------------------------===//
72// Define customized scheduler read/write types specific to the Neoverse N2.
73
74//===----------------------------------------------------------------------===//
75// Define generic 1 micro-op types
76
77def N2Write_1cyc_1B   : SchedWriteRes<[N2UnitB]>   { let Latency = 1; }
78def N2Write_1cyc_1I   : SchedWriteRes<[N2UnitI]>   { let Latency = 1; }
79def N2Write_1cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 1; }
80def N2Write_1cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 1; }
81def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
82def N2Write_2cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 2; }
83def N2Write_3cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 3; }
84def N2Write_2cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 2;
85                                                     let ReleaseAtCycles = [2]; }
86def N2Write_3cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 3;
87                                                     let ReleaseAtCycles = [3]; }
88def N2Write_5cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 5;
89                                                     let ReleaseAtCycles = [5]; }
90def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 12;
91                                                     let ReleaseAtCycles = [12]; }
92def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 20;
93                                                     let ReleaseAtCycles = [20]; }
94def N2Write_4cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 4; }
95def N2Write_6cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 6; }
96def N2Write_2cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 2; }
97def N2Write_3cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 3; }
98def N2Write_4cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 4; }
99def N2Write_5cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 5; }
100def N2Write_12cyc_1V  : SchedWriteRes<[N2UnitV]>   { let Latency = 12; }
101def N2Write_2cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 2; }
102def N2Write_3cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 3; }
103def N2Write_4cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 4; }
104def N2Write_7cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 7;
105                                                     let ReleaseAtCycles = [7]; }
106def N2Write_9cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 9; }
107def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 10; }
108def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 12; }
109def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 13; }
110def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 15; }
111def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 16; }
112def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 20; }
113def N2Write_2cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 2; }
114def N2Write_3cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 3; }
115def N2Write_4cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 4; }
116def N2Write_6cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 6; }
117def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]>  { let Latency = 10; }
118def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
119
120//===----------------------------------------------------------------------===//
121// Define generic 2 micro-op types
122
123def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
124  let Latency     = 1;
125  let NumMicroOps = 2;
126}
127
128def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
129  let Latency     = 6;
130  let NumMicroOps = 2;
131}
132
133def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
134  let Latency     = 9;
135  let NumMicroOps = 2;
136}
137
138def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
139  let Latency     = 3;
140  let NumMicroOps = 2;
141}
142
143def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
144  let Latency     = 4;
145  let NumMicroOps = 2;
146}
147
148def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
149  let Latency     = 5;
150  let NumMicroOps = 2;
151}
152
153def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
154  let Latency     = 6;
155  let NumMicroOps = 2;
156}
157
158def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
159  let Latency     = 7;
160  let NumMicroOps = 2;
161}
162
163def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
164  let Latency     = 1;
165  let NumMicroOps = 2;
166}
167
168def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
169  let Latency     = 5;
170  let NumMicroOps = 2;
171}
172
173def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
174  let Latency     = 2;
175  let NumMicroOps = 2;
176}
177
178def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
179  let Latency     = 4;
180  let NumMicroOps = 2;
181}
182
183def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
184  let Latency     = 4;
185  let NumMicroOps = 2;
186}
187
188def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
189  let Latency = 10;
190  let NumMicroOps = 2;
191  let ReleaseAtCycles = [5, 5];
192}
193
194def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
195  let Latency = 13;
196  let NumMicroOps = 2;
197  let ReleaseAtCycles = [6, 7];
198}
199
200def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
201  let Latency = 15;
202  let NumMicroOps = 2;
203  let ReleaseAtCycles = [7, 8];
204}
205
206def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
207  let Latency = 16;
208  let NumMicroOps = 2;
209  let ReleaseAtCycles = [8, 8];
210}
211
212def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
213  let Latency     = 4;
214  let NumMicroOps = 2;
215}
216
217def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
218  let Latency     = 6;
219  let NumMicroOps = 2;
220}
221
222def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
223  let Latency     = 6;
224  let NumMicroOps = 2;
225}
226
227def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
228  let Latency     = 8;
229  let NumMicroOps = 2;
230}
231
232def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
233  let Latency     = 4;
234  let NumMicroOps = 2;
235}
236
237def N2Write_3cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
238  let Latency     = 3;
239  let NumMicroOps = 2;
240}
241
242def N2Write_2cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
243  let Latency     = 2;
244  let NumMicroOps = 2;
245}
246
247def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
248  let Latency     = 6;
249  let NumMicroOps = 2;
250}
251
252def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
253  let Latency     = 4;
254  let NumMicroOps = 2;
255}
256
257def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
258  let Latency     = 5;
259  let NumMicroOps = 2;
260}
261
262def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
263  let Latency     = 5;
264  let NumMicroOps = 2;
265}
266
267def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
268  let Latency     = 7;
269  let NumMicroOps = 2;
270}
271
272def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
273  let Latency     = 2;
274  let NumMicroOps = 2;
275}
276
277def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
278  let Latency     = 6;
279  let NumMicroOps = 2;
280}
281
282def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
283  let Latency     = 6;
284  let NumMicroOps = 2;
285}
286
287def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
288  let Latency     = 6;
289  let NumMicroOps = 2;
290}
291
292def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
293  let Latency     = 9;
294  let NumMicroOps = 2;
295}
296
297def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
298  let Latency     = 4;
299  let NumMicroOps = 2;
300}
301
302//===----------------------------------------------------------------------===//
303// Define generic 3 micro-op types
304
305def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
306  let Latency     = 1;
307  let NumMicroOps = 3;
308}
309
310def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
311  let Latency     = 2;
312  let NumMicroOps = 3;
313}
314
315def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
316  let Latency     = 2;
317  let NumMicroOps = 3;
318}
319
320def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
321  let Latency     = 7;
322  let NumMicroOps = 3;
323}
324
325def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
326  let Latency     = 8;
327  let NumMicroOps = 3;
328}
329
330def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
331  let Latency     = 10;
332  let NumMicroOps = 3;
333}
334
335def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
336  let Latency     = 2;
337  let NumMicroOps = 3;
338}
339
340def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
341  let Latency     = 4;
342  let NumMicroOps = 3;
343}
344
345def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
346  let Latency     = 6;
347  let NumMicroOps = 3;
348}
349
350def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
351  let Latency     = 8;
352  let NumMicroOps = 3;
353}
354
355//===----------------------------------------------------------------------===//
356// Define generic 4 micro-op types
357
358def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
359                                             N2UnitI]> {
360  let Latency     = 2;
361  let NumMicroOps = 4;
362}
363
364def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
365  let Latency     = 6;
366  let NumMicroOps = 4;
367}
368
369def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
370  let Latency     = 4;
371  let NumMicroOps = 4;
372}
373
374def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
375  let Latency     = 6;
376  let NumMicroOps = 4;
377}
378
379def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
380  let Latency     = 8;
381  let NumMicroOps = 4;
382}
383
384def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
385  let Latency     = 9;
386  let NumMicroOps = 4;
387}
388
389def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
390                                          N2UnitV]> {
391  let Latency     = 2;
392  let NumMicroOps = 4;
393}
394
395def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
396                                          N2UnitV]> {
397  let Latency     = 4;
398  let NumMicroOps = 4;
399}
400
401def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
402                                          N2UnitV]> {
403  let Latency     = 5;
404  let NumMicroOps = 4;
405}
406
407def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
408                                          N2UnitV0]> {
409  let Latency     = 8;
410  let NumMicroOps = 4;
411}
412
413def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
414                                          N2UnitV1]> {
415  let Latency     = 11;
416  let NumMicroOps = 4;
417}
418
419def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
420                                         N2UnitV1]> {
421  let Latency     = 9;
422  let NumMicroOps = 4;
423}
424
425def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
426                                         N2UnitV1]> {
427  let Latency     = 8;
428  let NumMicroOps = 4;
429}
430
431def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
432                                          N2UnitV1]> {
433  let Latency     = 10;
434  let NumMicroOps = 4;
435}
436
437def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
438  let Latency     = 10;
439  let NumMicroOps = 4;
440}
441
442def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
443                                         N2UnitM]> {
444  let Latency     = 4;
445  let NumMicroOps = 4;
446}
447
448def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
449  let Latency     = 6;
450  let NumMicroOps = 4;
451}
452
453def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
454  let Latency     = 7;
455  let NumMicroOps = 4;
456}
457
458//===----------------------------------------------------------------------===//
459// Define generic 5 micro-op types
460
461def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
462                                             N2UnitI, N2UnitI]> {
463  let Latency     = 2;
464  let NumMicroOps = 5;
465}
466
467def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
468                                        N2UnitV]> {
469  let Latency     = 8;
470  let NumMicroOps = 5;
471}
472
473//===----------------------------------------------------------------------===//
474// Define generic 6 micro-op types
475
476def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
477                                        N2UnitV, N2UnitV, N2UnitV]> {
478  let Latency     = 8;
479  let NumMicroOps = 6;
480}
481
482def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
483                                          N2UnitV, N2UnitV, N2UnitV]> {
484  let Latency     = 2;
485  let NumMicroOps = 6;
486}
487
488def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
489                                          N2UnitV, N2UnitV, N2UnitV]> {
490  let Latency     = 6;
491  let NumMicroOps = 6;
492}
493
494def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
495                                          N2UnitV, N2UnitV, N2UnitV]> {
496  let Latency     = 4;
497  let NumMicroOps = 6;
498}
499
500def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
501                                            N2UnitS, N2UnitS]> {
502  let Latency     = 10;
503  let NumMicroOps = 6;
504}
505
506//===----------------------------------------------------------------------===//
507// Define generic 7 micro-op types
508
509def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
510                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
511  let Latency     = 8;
512  let NumMicroOps = 7;
513}
514
515//===----------------------------------------------------------------------===//
516// Define generic 8 micro-op types
517
518def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
519                                     N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
520  let Latency     = 6;
521  let NumMicroOps = 8;
522}
523
524def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
525                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
526                                          N2UnitV]> {
527  let Latency     = 2;
528  let NumMicroOps = 8;
529}
530
531def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
532                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
533                                          N2UnitV]> {
534  let Latency     = 5;
535  let NumMicroOps = 8;
536}
537
538def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
539                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
540  let Latency     = 8;
541  let NumMicroOps = 8;
542}
543
544def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
545                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
546  let Latency     = 9;
547  let NumMicroOps = 8;
548}
549
550//===----------------------------------------------------------------------===//
551// Define generic 10 micro-op types
552
553def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
554                                          N2UnitL01, N2UnitL01, N2UnitV,
555                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
556  let Latency     = 7;
557  let NumMicroOps = 10;
558}
559
560//===----------------------------------------------------------------------===//
561// Define generic 12 micro-op types
562
563def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
564                                          N2UnitL01, N2UnitL01, N2UnitL01,
565                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV,
566                                          N2UnitV, N2UnitV]> {
567  let Latency     = 7;
568  let NumMicroOps = 12;
569}
570
571//===----------------------------------------------------------------------===//
572// Define generic 15 micro-op types
573
574def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
575                                             N2UnitL01, N2UnitL01, N2UnitS,
576                                             N2UnitS, N2UnitS, N2UnitS,
577                                             N2UnitS, N2UnitV, N2UnitV,
578                                             N2UnitV, N2UnitV, N2UnitV]> {
579  let Latency     = 7;
580  let NumMicroOps = 15;
581}
582
583//===----------------------------------------------------------------------===//
584// Define generic 18 micro-op types
585
586def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
587                                           N2UnitL01, N2UnitL01, N2UnitL01,
588                                           N2UnitL01, N2UnitL01, N2UnitL01,
589                                           N2UnitV, N2UnitV, N2UnitV,
590                                           N2UnitV, N2UnitV, N2UnitV,
591                                           N2UnitV, N2UnitV, N2UnitV]> {
592  let Latency     = 11;
593  let NumMicroOps = 18;
594}
595
596//===----------------------------------------------------------------------===//
597// Define generic 27 micro-op types
598
599def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
600                                              N2UnitL01, N2UnitL01, N2UnitL01,
601                                              N2UnitL01, N2UnitL01, N2UnitL01,
602                                              N2UnitS, N2UnitS, N2UnitS,
603                                              N2UnitS, N2UnitS, N2UnitS,
604                                              N2UnitS, N2UnitS, N2UnitS,
605                                              N2UnitV, N2UnitV, N2UnitV,
606                                              N2UnitV, N2UnitV, N2UnitV,
607                                              N2UnitV, N2UnitV, N2UnitV]> {
608  let Latency     = 11;
609  let NumMicroOps = 27;
610}
611
612//===----------------------------------------------------------------------===//
613// Define types for arithmetic and logical ops with short shifts
614def N2Write_Arith : SchedWriteVariant<[
615                      SchedVar<IsCheapLSL,  [N2Write_1cyc_1I]>,
616                      SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
617
618def N2Write_Logical: SchedWriteVariant<[
619                       SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>,
620                       SchedVar<NoSchedPred,   [N2Write_2cyc_1M]>]>;
621
622// Miscellaneous
623// -----------------------------------------------------------------------------
624
625def : InstRW<[WriteI], (instrs COPY)>;
626
627// Branch Instructions
628// -----------------------------------------------------------------------------
629
630// Branch, immed
631// Compare and branch
632def : SchedAlias<WriteBr,    N2Write_1cyc_1B>;
633
634// Branch, register
635def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
636
637// Branch and link, immed
638// Branch and link, register
639def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
640
641// Arithmetic and Logical Instructions
642// -----------------------------------------------------------------------------
643
644// ALU, basic
645// ALU, basic, flagset
646def : SchedAlias<WriteI,     N2Write_1cyc_1I>;
647
648// ALU, extend and shift
649def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
650
651// Arithmetic, LSL shift, shift <= 4
652// Arithmetic, flagset, LSL shift, shift <= 4
653// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
654def : SchedAlias<WriteISReg, N2Write_Arith>;
655
656// Logical, shift, no flagset
657def : InstRW<[N2Write_1cyc_1I],
658             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
659
660// Logical, shift, flagset
661def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
662
663// Arithmetic, immediate to logical address tag
664def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
665
666// Convert floating-point condition flags
667// Flag manipulation instructions
668def : WriteRes<WriteSys, []> { let Latency = 1; }
669
670// Insert Random Tags
671def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
672
673// Insert Tag Mask
674// Subtract Pointer
675// Subtract Pointer, flagset
676def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
677
678// Move and shift instructions
679// -----------------------------------------------------------------------------
680
681def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
682
683// Divide and Multiply Instructions
684// -----------------------------------------------------------------------------
685
686// SDIV, UDIV
687def : SchedAlias<WriteID32,  N2Write_12cyc_1M0>;
688def : SchedAlias<WriteID64,  N2Write_20cyc_1M0>;
689
690def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
691def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
692
693// Multiply high
694def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
695
696// Pointer Authentication Instructions (v8.3 PAC)
697// -----------------------------------------------------------------------------
698
699// Authenticate data address
700// Authenticate instruction address
701// Compute pointer authentication code for data address
702// Compute pointer authentication code, using generic key
703// Compute pointer authentication code for instruction address
704def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
705
706// Branch and link, register, with pointer authentication
707// Branch, register, with pointer authentication
708// Branch, return, with pointer authentication
709def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
710                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
711                                            ERETAA, ERETAB)>;
712
713
714// Load register, with pointer authentication
715def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
716
717// Strip pointer authentication code
718def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
719
720// Miscellaneous data-processing instructions
721// -----------------------------------------------------------------------------
722
723// Bitfield extract, one reg
724// Bitfield extract, two regs
725// NOTE: We don't model the difference between EXTR where both operands are the
726// same (one reg).
727def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
728def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
729
730// Bitfield move, basic
731def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
732
733// Bitfield move, insert
734def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
735
736// Load instructions
737// -----------------------------------------------------------------------------
738
739def : SchedAlias<WriteLD,    N2Write_4cyc_1L>;
740def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
741
742// Load pair, signed immed offset, signed words
743def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
744// Load pair, immed post-index or immed pre-index, signed words
745def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi],
746             (instregex "^LDPSW(post|pre)$")>;
747
748// Store instructions
749// -----------------------------------------------------------------------------
750
751def : SchedAlias<WriteST,    N2Write_1cyc_1L01_1D>;
752def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
753def : SchedAlias<WriteSTP,   N2Write_1cyc_1L01_1D>;
754def : SchedAlias<WriteAdr,   N2Write_1cyc_1I>; // copied from A57.
755
756// Tag load instructions
757// -----------------------------------------------------------------------------
758
759// Load allocation tag
760// Load multiple allocation tags
761def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
762
763// Tag store instructions
764// -----------------------------------------------------------------------------
765
766// Store allocation tags to one or two granules, post-index
767// Store allocation tags to one or two granules, pre-index
768// Store allocation tag to one or two granules, zeroing, post-index
769// Store Allocation Tag to one or two granules, zeroing, pre-index
770// Store allocation tag and reg pair to memory, post-Index
771// Store allocation tag and reg pair to memory, pre-Index
772def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
773                                                ST2GPreIndex, ST2GPostIndex,
774                                                STZGPreIndex, STZGPostIndex,
775                                                STZ2GPreIndex, STZ2GPostIndex,
776                                                STGPpre, STGPpost)>;
777
778// Store allocation tags to one or two granules, signed offset
779// Store allocation tag to two granules, zeroing, signed offset
780// Store allocation tag and reg pair to memory, signed offset
781// Store multiple allocation tags
782def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
783                                             STZ2Gi, STGPi, STGM, STZGM)>;
784
785// FP data processing instructions
786// -----------------------------------------------------------------------------
787
788// FP absolute value
789// FP arithmetic
790// FP min/max
791// FP negate
792// FP select
793def : SchedAlias<WriteF,     N2Write_2cyc_1V>;
794
795// FP compare
796def : SchedAlias<WriteFCmp,  N2Write_2cyc_1V0>;
797
798// FP divide, square root
799def : SchedAlias<WriteFDiv,  N2Write_7cyc_1V0>;
800
801// FP divide, H-form
802def : InstRW<[N2Write_7cyc_1V0],  (instrs FDIVHrr)>;
803// FP divide, S-form
804def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
805// FP divide, D-form
806def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
807
808// FP square root, H-form
809def : InstRW<[N2Write_7cyc_1V0],  (instrs FSQRTHr)>;
810// FP square root, S-form
811def : InstRW<[N2Write_9cyc_1V0],  (instrs FSQRTSr)>;
812// FP square root, D-form
813def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
814
815// FP multiply
816def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
817
818// FP multiply accumulate
819def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
820
821// FP round to integral
822def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
823                                            "^FRINT(32|64)[XZ][SD]r$")>;
824
825// FP miscellaneous instructions
826// -----------------------------------------------------------------------------
827
828// FP convert, from gen to vec reg
829def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
830
831// FP convert, from vec to gen reg
832def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
833
834// FP convert, Javascript from vec to gen reg
835// FP convert, from vec to vec reg
836def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
837
838// FP move, immed
839// FP move, register
840def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
841
842// FP transfer, from gen to low half of vec reg
843def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
844                                         FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
845
846// FP transfer, from gen to high half of vec reg
847def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
848
849// FP transfer, from vec to gen reg
850def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
851
852// FP load instructions
853// -----------------------------------------------------------------------------
854
855// Load vector reg, literal, S/D/Q forms
856// Load vector reg, unscaled immed
857def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
858                                           "^LDUR[BHSDQ]i$")>;
859
860// Load vector reg, immed post-index
861def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
862// Load vector reg, immed pre-index
863def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>;
864
865// Load vector reg, unsigned immed
866def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
867
868// Load vector reg, register offset, basic
869// Load vector reg, register offset, scale, S/D-form
870// Load vector reg, register offset, extend
871// Load vector reg, register offset, extend, scale, S/D-form
872def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
873
874// Load vector reg, register offset, scale, H/Q-form
875// Load vector reg, register offset, extend, scale, H/Q-form
876def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
877
878// Load vector pair, immed offset, S/D-form
879def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
880
881// Load vector pair, immed offset, Q-form
882def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
883
884// Load vector pair, immed post-index, S/D-form
885// Load vector pair, immed pre-index, S/D-form
886def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi],
887             (instregex "^LDP[SD](pre|post)$")>;
888
889// Load vector pair, immed post-index, Q-form
890// Load vector pair, immed pre-index, Q-form
891def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
892                                                                LDPQpre)>;
893
894// FP store instructions
895// -----------------------------------------------------------------------------
896
897// Store vector reg, unscaled immed, B/H/S/D-form
898// Store vector reg, unscaled immed, Q-form
899def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
900
901// Store vector reg, immed post-index, B/H/S/D-form
902// Store vector reg, immed post-index, Q-form
903// Store vector reg, immed pre-index, B/H/S/D-form
904// Store vector reg, immed pre-index, Q-form
905def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
906             (instregex "^STR[BHSDQ](pre|post)$")>;
907
908// Store vector reg, unsigned immed, B/H/S/D-form
909// Store vector reg, unsigned immed, Q-form
910def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
911
912// Store vector reg, register offset, basic, B/H/S/D-form
913// Store vector reg, register offset, basic, Q-form
914// Store vector reg, register offset, scale, S/D-form
915// Store vector reg, register offset, extend, B/H/S/D-form
916// Store vector reg, register offset, extend, Q-form
917// Store vector reg, register offset, extend, scale, S/D-form
918def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
919             (instregex "^STR[BSD]ro[WX]$")>;
920
921// Store vector reg, register offset, scale, H-form
922// Store vector reg, register offset, scale, Q-form
923// Store vector reg, register offset, extend, scale, H-form
924// Store vector reg, register offset, extend, scale, Q-form
925def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
926             (instregex "^STR[HQ]ro[WX]$")>;
927
928// Store vector pair, immed offset, S-form
929// Store vector pair, immed offset, D-form
930def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
931
932// Store vector pair, immed offset, Q-form
933def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
934
935// Store vector pair, immed post-index, S-form
936// Store vector pair, immed post-index, D-form
937// Store vector pair, immed pre-index, S-form
938// Store vector pair, immed pre-index, D-form
939def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
940             (instregex "^STP[SD](pre|post)$")>;
941
942// Store vector pair, immed post-index, Q-form
943def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
944
945// Store vector pair, immed pre-index, Q-form
946def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
947
948// ASIMD integer instructions
949// -----------------------------------------------------------------------------
950
951// ASIMD absolute diff
952// ASIMD absolute diff long
953// ASIMD arith, basic
954// ASIMD arith, complex
955// ASIMD arith, pair-wise
956// ASIMD compare
957// ASIMD logical
958// ASIMD max/min, basic and pair-wise
959def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
960def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
961
962// ASIMD absolute diff accum
963// ASIMD absolute diff accum long
964def : InstRW<[N2Write_4cyc_1V1],
965             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
966
967// ASIMD arith, reduce, 4H/4S
968def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
969
970// ASIMD arith, reduce, 8B/8H
971def : InstRW<[N2Write_4cyc_1V1_1V],
972             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
973
974// ASIMD arith, reduce, 16B
975def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
976                                         UADDLVv16i8v)>;
977
978// ASIMD dot product
979// ASIMD dot product using signed and unsigned integers
980def : InstRW<[N2Write_3cyc_1V],
981             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
982
983// ASIMD matrix multiply-accumulate
984def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
985
986// ASIMD max/min, reduce, 4H/4S
987def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
988                                            "^[SU](MAX|MIN)Vv4i32v$")>;
989
990// ASIMD max/min, reduce, 8B/8H
991def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
992                                               "^[SU](MAX|MIN)Vv8i16v$")>;
993
994// ASIMD max/min, reduce, 16B
995def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
996
997// ASIMD multiply
998def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
999
1000// ASIMD multiply accumulate
1001def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
1002
1003// ASIMD multiply accumulate high
1004def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1005
1006// ASIMD multiply accumulate long
1007def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1008
1009// ASIMD multiply accumulate saturating long
1010def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
1011
1012// ASIMD multiply/multiply long (8x8) polynomial, D-form
1013// ASIMD multiply/multiply long (8x8) polynomial, Q-form
1014def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
1015
1016// ASIMD multiply long
1017def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
1018
1019// ASIMD pairwise add and accumulate long
1020def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
1021
1022// ASIMD shift accumulate
1023def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
1024
1025// ASIMD shift by immed, basic
1026def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1027                                            "^SSHLLv", "^SSHRv", "^USHLLv",
1028                                            "^USHRv")>;
1029
1030// ASIMD shift by immed and insert, basic
1031def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
1032
1033// ASIMD shift by immed, complex
1034def : InstRW<[N2Write_4cyc_1V1],
1035             (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1036                        "^(SQSHLU?|UQSHL)[bhsd]$",
1037                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1038                        "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1039                        "^UQSHRNv", "^URSHRv")>;
1040
1041// ASIMD shift by register, basic
1042def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
1043
1044// ASIMD shift by register, complex
1045def : InstRW<[N2Write_4cyc_1V1],
1046             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1047                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1048
1049// ASIMD floating-point instructions
1050// -----------------------------------------------------------------------------
1051
1052// ASIMD FP absolute value/difference
1053// ASIMD FP arith, normal
1054// ASIMD FP compare
1055// ASIMD FP complex add
1056// ASIMD FP max/min, normal
1057// ASIMD FP max/min, pairwise
1058// ASIMD FP negate
1059// Handled by SchedAlias<WriteV[dq], ...>
1060
1061// ASIMD FP complex multiply add
1062def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
1063
1064// ASIMD FP convert, long (F16 to F32)
1065def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
1066
1067// ASIMD FP convert, long (F32 to F64)
1068def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
1069
1070// ASIMD FP convert, narrow (F32 to F16)
1071def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
1072
1073// ASIMD FP convert, narrow (F64 to F32)
1074def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
1075                                            "^FCVTXN(v2|v4)f32")>;
1076
1077// ASIMD FP convert, other, D-form F32 and Q-form F64
1078def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1079                                            "^[SU]CVTFv2f(32|64)$")>;
1080
1081// ASIMD FP convert, other, D-form F16 and Q-form F32
1082def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1083                                            "^[SU]CVTFv4f(16|32)$")>;
1084
1085// ASIMD FP convert, other, Q-form F16
1086def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1087                                            "^[SU]CVTFv8f16$")>;
1088
1089// ASIMD FP divide, D-form, F16
1090def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
1091
1092// ASIMD FP divide, D-form, F32
1093def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
1094
1095// ASIMD FP divide, Q-form, F16
1096def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
1097
1098// ASIMD FP divide, Q-form, F32
1099def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
1100
1101// ASIMD FP divide, Q-form, F64
1102def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
1103
1104// ASIMD FP max/min, reduce, F32 and D-form F16
1105def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1106
1107// ASIMD FP max/min, reduce, Q-form F16
1108def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1109
1110// ASIMD FP multiply
1111def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
1112
1113// ASIMD FP multiply accumulate
1114def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
1115
1116// ASIMD FP multiply accumulate long
1117def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
1118
1119// ASIMD FP round, D-form F32 and Q-form F64
1120def : InstRW<[N2Write_3cyc_1V0],
1121             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1122                        "^FRINT[32|64)[XZ]v2f(32|64)$")>;
1123
1124// ASIMD FP round, D-form F16 and Q-form F32
1125def : InstRW<[N2Write_4cyc_2V0],
1126             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1127                        "^FRINT(32|64)[XZ]v4f32$")>;
1128
1129
1130// ASIMD FP round, Q-form F16
1131def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1132
1133// ASIMD FP square root, D-form, F16
1134def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
1135
1136// ASIMD FP square root, D-form, F32
1137def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
1138
1139// ASIMD FP square root, Q-form, F16
1140def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
1141
1142// ASIMD FP square root, Q-form, F32
1143def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
1144
1145// ASIMD FP square root, Q-form, F64
1146def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
1147
1148// ASIMD BFloat16 (BF16) instructions
1149// -----------------------------------------------------------------------------
1150
1151// ASIMD convert, F32 to BF16
1152def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
1153
1154// ASIMD dot product
1155def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1156
1157// ASIMD matrix multiply accumulate
1158def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
1159
1160// ASIMD multiply accumulate long
1161def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1162                                        BFMLALTIdx)>;
1163
1164// Scalar convert, F32 to BF16
1165def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
1166
1167// ASIMD miscellaneous instructions
1168// -----------------------------------------------------------------------------
1169
1170// ASIMD bit reverse
1171// ASIMD bitwise insert
1172// ASIMD count
1173// ASIMD duplicate, element
1174// ASIMD extract
1175// ASIMD extract narrow
1176// ASIMD insert, element to element
1177// ASIMD move, FP immed
1178// ASIMD move, integer immed
1179// ASIMD reverse
1180// ASIMD table lookup, 1 or 2 table regs
1181// ASIMD table lookup extension, 1 table reg
1182// ASIMD transfer, element to gen reg
1183// ASIMD transpose
1184// ASIMD unzip/zip
1185// Handled by SchedAlias<WriteV[dq], ...>
1186
1187// ASIMD duplicate, gen reg
1188def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1189
1190// ASIMD extract narrow, saturating
1191def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1192
1193// ASIMD reciprocal and square root estimate, D-form U32
1194def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
1195
1196// ASIMD reciprocal and square root estimate, Q-form U32
1197def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
1198
1199// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1200def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1201                                         FRECPEv1i64, FRECPEv2f32,
1202                                         FRSQRTEv1f16, FRSQRTEv1i32,
1203                                         FRSQRTEv1i64, FRSQRTEv2f32)>;
1204
1205// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1206def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
1207                                         FRSQRTEv4f16, FRSQRTEv4f32)>;
1208
1209// ASIMD reciprocal and square root estimate, Q-form F16
1210def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1211
1212// ASIMD reciprocal exponent
1213def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
1214
1215// ASIMD reciprocal step
1216def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1217
1218// ASIMD table lookup, 3 table regs
1219def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
1220
1221// ASIMD table lookup, 4 table regs
1222def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
1223
1224// ASIMD table lookup extension, 2 table reg
1225def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1226
1227// ASIMD table lookup extension, 3 table reg
1228def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1229
1230// ASIMD table lookup extension, 4 table reg
1231def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1232
1233// ASIMD transfer, gen reg to element
1234def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1235
1236// ASIMD load instructions
1237// -----------------------------------------------------------------------------
1238
1239// ASIMD load, 1 element, multiple, 1 reg, D-form
1240def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1241def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1242             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1243
1244// ASIMD load, 1 element, multiple, 1 reg, Q-form
1245def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1246def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1247             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1248
1249// ASIMD load, 1 element, multiple, 2 reg, D-form
1250def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1251def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1252             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1253
1254// ASIMD load, 1 element, multiple, 2 reg, Q-form
1255def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1256def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1257             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1258
1259// ASIMD load, 1 element, multiple, 3 reg, D-form
1260def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1261def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1262             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1263
1264// ASIMD load, 1 element, multiple, 3 reg, Q-form
1265def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1266def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1267             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1268
1269// ASIMD load, 1 element, multiple, 4 reg, D-form
1270def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1271def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1272             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1273
1274// ASIMD load, 1 element, multiple, 4 reg, Q-form
1275def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1276def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1277             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1278
1279// ASIMD load, 1 element, one lane, B/H/S
1280// ASIMD load, 1 element, one lane, D
1281def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1282def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1283
1284// ASIMD load, 1 element, all lanes, D-form, B/H/S
1285// ASIMD load, 1 element, all lanes, D-form, D
1286def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1287def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1288
1289// ASIMD load, 1 element, all lanes, Q-form
1290def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1291def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1292
1293// ASIMD load, 2 element, multiple, D-form, B/H/S
1294def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1295def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1296
1297// ASIMD load, 2 element, multiple, Q-form, B/H/S
1298// ASIMD load, 2 element, multiple, Q-form, D
1299def : InstRW<[N2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1300def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1301
1302// ASIMD load, 2 element, one lane, B/H
1303// ASIMD load, 2 element, one lane, S
1304// ASIMD load, 2 element, one lane, D
1305def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1306def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1307
1308// ASIMD load, 2 element, all lanes, D-form, B/H/S
1309// ASIMD load, 2 element, all lanes, D-form, D
1310def : InstRW<[N2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1311def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1312
1313// ASIMD load, 2 element, all lanes, Q-form
1314def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1315def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1316
1317// ASIMD load, 3 element, multiple, D-form, B/H/S
1318def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1319def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1320
1321// ASIMD load, 3 element, multiple, Q-form, B/H/S
1322def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s)$")>;
1323def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
1324
1325// ASIMD load, 3 element, multiple, Q-form, D
1326def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(2d)$")>;
1327def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>;
1328
1329// ASIMD load, 3 element, one lane, B/H
1330// ASIMD load, 3 element, one lane, S
1331// ASIMD load, 3 element, one lane, D
1332def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1333def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1334
1335// ASIMD load, 3 element, all lanes, D-form, B/H/S
1336// ASIMD load, 3 element, all lanes, D-form, D
1337def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1338def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1339
1340// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1341// ASIMD load, 3 element, all lanes, Q-form, D
1342def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1343def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1344
1345// ASIMD load, 4 element, multiple, D-form, B/H/S
1346def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1347def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1348
1349// ASIMD load, 4 element, multiple, Q-form, B/H/S
1350// ASIMD load, 4 element, multiple, Q-form, D
1351def : InstRW<[N2Write_9cyc_4L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1352def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1353
1354// ASIMD load, 4 element, one lane, B/H
1355// ASIMD load, 4 element, one lane, S
1356// ASIMD load, 4 element, one lane, D
1357def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1358def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1359
1360// ASIMD load, 4 element, all lanes, D-form, B/H/S
1361// ASIMD load, 4 element, all lanes, D-form, D
1362def : InstRW<[N2Write_8cyc_3L_4V],              (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1363def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V],    (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1364
1365// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1366// ASIMD load, 4 element, all lanes, Q-form, D
1367def : InstRW<[N2Write_8cyc_4L_4V],            (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1368def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V],  (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1369
1370// ASIMD store instructions
1371// -----------------------------------------------------------------------------
1372
1373// ASIMD store, 1 element, multiple, 1 reg, D-form
1374def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1375def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1376
1377// ASIMD store, 1 element, multiple, 1 reg, Q-form
1378def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1379def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1380
1381// ASIMD store, 1 element, multiple, 2 reg, D-form
1382def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1383def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1384
1385// ASIMD store, 1 element, multiple, 2 reg, Q-form
1386def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1387def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1388
1389// ASIMD store, 1 element, multiple, 3 reg, D-form
1390def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1391def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1392
1393// ASIMD store, 1 element, multiple, 3 reg, Q-form
1394def : InstRW<[N2Write_2cyc_3L01_3V],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1395def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1396
1397// ASIMD store, 1 element, multiple, 4 reg, D-form
1398def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1399def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1400
1401// ASIMD store, 1 element, multiple, 4 reg, Q-form
1402def : InstRW<[N2Write_2cyc_4L01_4V],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1403def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1404
1405// ASIMD store, 1 element, one lane, B/H/S
1406// ASIMD store, 1 element, one lane, D
1407def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST1i(8|16|32|64)$")>;
1408def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>;
1409
1410// ASIMD store, 2 element, multiple, D-form, B/H/S
1411def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2Twov(8b|4h|2s)$")>;
1412def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1413
1414// ASIMD store, 2 element, multiple, Q-form, B/H/S
1415// ASIMD store, 2 element, multiple, Q-form, D
1416def : InstRW<[N2Write_4cyc_2L01_2V],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1417def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1418
1419// ASIMD store, 2 element, one lane, B/H/S
1420// ASIMD store, 2 element, one lane, D
1421def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2i(8|16|32|64)$")>;
1422def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>;
1423
1424// ASIMD store, 3 element, multiple, D-form, B/H/S
1425def : InstRW<[N2Write_5cyc_2L01_2V],           (instregex "ST3Threev(8b|4h|2s)$")>;
1426def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1427
1428// ASIMD store, 3 element, multiple, Q-form, B/H/S
1429// ASIMD store, 3 element, multiple, Q-form, D
1430def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1431def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1432
1433// ASIMD store, 3 element, one lane, B/H
1434// ASIMD store, 3 element, one lane, S
1435// ASIMD store, 3 element, one lane, D
1436def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3i(8|16|32|64)$")>;
1437def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>;
1438
1439// ASIMD store, 4 element, multiple, D-form, B/H/S
1440def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1441def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1442
1443// ASIMD store, 4 element, multiple, Q-form, B/H/S
1444def : InstRW<[N2Write_7cyc_6L01_6V],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1445def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1446
1447// ASIMD store, 4 element, multiple, Q-form, D
1448def : InstRW<[N2Write_5cyc_4L01_4V],           (instregex "ST4Fourv(2d)$")>;
1449def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>;
1450
1451// ASIMD store, 4 element, one lane, B/H/S
1452def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4i(8|16|32)$")>;
1453def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>;
1454
1455// ASIMD store, 4 element, one lane, D
1456def : InstRW<[N2Write_4cyc_3L01_3V],            (instregex "ST4i(64)$")>;
1457def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V],  (instregex "ST4i(64)_POST$")>;
1458
1459// Cryptography extensions
1460// -----------------------------------------------------------------------------
1461
1462// Crypto AES ops
1463def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1464
1465// Crypto polynomial (64x64) multiply long
1466def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
1467
1468// Crypto SHA1 hash acceleration op
1469// Crypto SHA1 schedule acceleration ops
1470def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1471
1472// Crypto SHA1 hash acceleration ops
1473// Crypto SHA256 hash acceleration ops
1474def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1475
1476// Crypto SHA256 schedule acceleration ops
1477def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1478
1479// Crypto SHA512 hash acceleration ops
1480def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1481
1482// Crypto SHA3 ops
1483def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1484
1485// Crypto SM3 ops
1486def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1487                                            "^SM3TT[12][AB]$")>;
1488
1489// Crypto SM4 ops
1490def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1491
1492// CRC
1493// -----------------------------------------------------------------------------
1494
1495def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
1496
1497// SVE Predicate instructions
1498// -----------------------------------------------------------------------------
1499
1500// Loop control, based on predicate
1501def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1502                                        BRKB_PPmP, BRKB_PPzP)>;
1503
1504// Loop control, based on predicate and flag setting
1505def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1506
1507// Loop control, propagating
1508def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1509
1510// Loop control, propagating and flag setting
1511def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1512                                            BRKPBS_PPzPP)>;
1513
1514// Loop control, based on GPR
1515def : InstRW<[N2Write_3cyc_1M],
1516             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1517
1518def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
1519
1520// Loop terminate
1521def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1522
1523// Predicate counting scalar
1524def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1525def : InstRW<[N2Write_2cyc_1M],
1526             (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
1527                        "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1528                        "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
1529
1530// Predicate counting scalar, active predicate
1531def : InstRW<[N2Write_2cyc_1M],
1532             (instregex "^CNTP_XPP_[BHSD]$",
1533                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
1534                        "^(UQDEC|UQINC)P_WP_[BHSD]$",
1535                        "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
1536
1537// Predicate counting vector, active predicate
1538def : InstRW<[N2Write_7cyc_1M_1M0_1V],
1539             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
1540
1541// Predicate logical
1542def : InstRW<[N2Write_1cyc_1M0],
1543             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1544
1545// Predicate logical, flag setting
1546def : InstRW<[N2Write_2cyc_1M0_1M],
1547             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
1548
1549// Predicate reverse
1550def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
1551
1552// Predicate select
1553def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
1554
1555// Predicate set
1556def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
1557
1558// Predicate set/initialize, set flags
1559def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
1560
1561// Predicate find first/next
1562def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
1563
1564// Predicate test
1565def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
1566
1567// Predicate transpose
1568def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
1569
1570// Predicate unpack and widen
1571def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
1572
1573// Predicate zip/unzip
1574def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1575
1576// SVE integer instructions
1577// -----------------------------------------------------------------------------
1578
1579// Arithmetic, absolute diff
1580def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
1581
1582// Arithmetic, absolute diff accum
1583def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1584
1585// Arithmetic, absolute diff accum long
1586def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1587
1588// Arithmetic, absolute diff long
1589def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
1590
1591// Arithmetic, basic
1592def : InstRW<[N2Write_2cyc_1V],
1593             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1594                        "^(ADD|SUB)_ZZZ_[BHSD]$",
1595                        "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
1596                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1597                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
1598                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
1599                        "^SADDLBT_ZZZ_[HSD]$",
1600                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
1601                        "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
1602
1603// Arithmetic, complex
1604def : InstRW<[N2Write_2cyc_1V],
1605             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
1606                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1607                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
1608                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
1609                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
1610                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
1611
1612// Arithmetic, large integer
1613def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
1614
1615// Arithmetic, pairwise add
1616def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
1617
1618// Arithmetic, pairwise add and accum long
1619def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1620
1621// Arithmetic, shift
1622def : InstRW<[N2Write_2cyc_1V1],
1623             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
1624                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
1625                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
1626                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
1627                        "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
1628                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
1629
1630// Arithmetic, shift and accumulate
1631def : InstRW<[N2Write_4cyc_1V1],
1632             (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
1633
1634// Arithmetic, shift by immediate
1635// Arithmetic, shift by immediate and insert
1636def : InstRW<[N2Write_2cyc_1V1],
1637             (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
1638
1639// Arithmetic, shift complex
1640def : InstRW<[N2Write_4cyc_1V1],
1641             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
1642                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
1643                        "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
1644                        "^SQSHRU?N[BT]_ZZI_[BHS]$",
1645                        "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
1646
1647// Arithmetic, shift right for divide
1648def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>;
1649
1650// Arithmetic, shift rounding
1651def : InstRW<[N2Write_4cyc_1V1],
1652             (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
1653                        "^[SU]RSHR_ZPmI_[BHSD]$")>;
1654
1655// Bit manipulation
1656def : InstRW<[N2Write_6cyc_2V1],
1657             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
1658
1659// Bitwise select
1660def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
1661
1662// Count/reverse bits
1663def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1664
1665// Broadcast logical bitmask immediate to vector
1666def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
1667
1668// Compare and set flags
1669def : InstRW<[N2Write_4cyc_1V0_1M],
1670             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1671                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1672
1673// Complex add
1674def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
1675
1676// Complex dot product 8-bit element
1677def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1678
1679// Complex dot product 16-bit element
1680def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1681
1682// Complex multiply-add B, H, S element size
1683def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
1684                                            "^CMLA_ZZZI_[HS]$")>;
1685
1686// Complex multiply-add D element size
1687def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
1688
1689// Conditional extract operations, scalar form
1690def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1691
1692// Conditional extract operations, SIMD&FP scalar and vector forms
1693def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1694                                            "^COMPACT_ZPZ_[SD]$",
1695                                            "^SPLICE_ZPZZ?_[BHSD]$")>;
1696
1697// Convert to floating point, 64b to float or convert to double
1698def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>;
1699
1700// Convert to floating point, 64b to half
1701def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>;
1702
1703// Convert to floating point, 32b to single or half
1704def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1705
1706// Convert to floating point, 32b to double
1707def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>;
1708
1709// Convert to floating point, 16b to half
1710def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1711
1712// Copy, scalar
1713def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1714
1715// Copy, scalar SIMD&FP or imm
1716def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
1717                                           "^CPY_ZPzI_[BHSD]$")>;
1718
1719// Divides, 32 bit
1720def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1721
1722// Divides, 64 bit
1723def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1724
1725// Dot product, 8 bit
1726def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
1727
1728// Dot product, 8 bit, using signed and unsigned integers
1729def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1730
1731// Dot product, 16 bit
1732def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1733
1734// Duplicate, immediate and indexed form
1735def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
1736                                           "^DUP_ZZI_[BHSDQ]$")>;
1737
1738// Duplicate, scalar form
1739def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1740
1741// Extend, sign or zero
1742def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1743                                            "^[SU]XTH_ZPmZ_[SD]$",
1744                                            "^[SU]XTW_ZPmZ_[D]$")>;
1745
1746// Extract
1747def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
1748
1749// Extract narrow saturating
1750def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
1751                                            "^SQXTUN[BT]_ZZ_[BHS]$")>;
1752
1753// Extract/insert operation, SIMD and FP scalar form
1754def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1755                                            "^INSR_ZV_[BHSD]$")>;
1756
1757// Extract/insert operation, scalar
1758def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1759                                                "^INSR_ZR_[BHSD]$")>;
1760
1761// Histogram operations
1762def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
1763                                           "^HISTSEG_ZZZ$")>;
1764
1765// Horizontal operations, B, H, S form, immediate operands only
1766def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
1767
1768// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
1769// operands only / immediate, scalar operands
1770def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1771
1772// Horizontal operations, D form, immediate operands only
1773def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
1774
1775// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
1776// only / immediate, scalar operands
1777def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1778
1779// Logical
1780def : InstRW<[N2Write_2cyc_1V],
1781             (instregex "^(AND|EOR|ORR)_ZI$",
1782                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1783                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
1784                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1785
1786// Max/min, basic and pairwise
1787def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
1788                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
1789
1790// Matching operations
1791def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
1792
1793// Matrix multiply-accumulate
1794def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1795
1796// Move prefix
1797def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1798                                           "^MOVPRFX_ZZ$")>;
1799
1800// Multiply, B, H, S element size
1801def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
1802                                            "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1803
1804// Multiply, D element size
1805def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
1806                                            "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
1807
1808// Multiply long
1809def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
1810                                            "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
1811
1812// Multiply accumulate, B, H, S element size
1813def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1814                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
1815
1816// Multiply accumulate, D element size
1817def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1818                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
1819
1820// Multiply accumulate long
1821def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1822                                            "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
1823
1824// Multiply accumulate saturating doubling long regular
1825def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1826                                            "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1827
1828// Multiply saturating doubling high, B, H, S element size
1829def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
1830                                            "^SQDMULH_ZZZI_[HS]$")>;
1831
1832// Multiply saturating doubling high, D element size
1833def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
1834
1835// Multiply saturating doubling long
1836def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
1837                                            "^SQDMULL[BT]_ZZZI_[SD]$")>;
1838
1839// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
1840// element size
1841def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1842                                            "^SQRDCMLAH_ZZZ_[BHS]$",
1843                                            "^SQRDML[AS]H_ZZZI_[HS]$",
1844                                            "^SQRDCMLAH_ZZZI_[HS]$")>;
1845
1846// Multiply saturating rounding doubling regular/complex accumulate, D element
1847// size
1848def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1849                                            "^SQRDCMLAH_ZZZ_D$")>;
1850
1851// Multiply saturating rounding doubling regular/complex, B, H, S element size
1852def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
1853                                            "^SQRDMULH_ZZZI_[HS]$")>;
1854
1855// Multiply saturating rounding doubling regular/complex, D element size
1856def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
1857
1858// Multiply/multiply long, (8x8) polynomial
1859def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
1860                                            "^PMULL[BT]_ZZZ_[HDQ]$")>;
1861
1862// Predicate counting vector
1863def : InstRW<[N2Write_2cyc_1V0],
1864             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
1865
1866// Reciprocal estimate
1867def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
1868
1869// Reduction, arithmetic, B form
1870def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1871
1872// Reduction, arithmetic, H form
1873def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1874
1875// Reduction, arithmetic, S form
1876def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1877
1878// Reduction, arithmetic, D form
1879def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1880
1881// Reduction, logical
1882def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
1883
1884// Reverse, vector
1885def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
1886                                           "^REVB_ZPmZ_[HSD]$",
1887                                           "^REVH_ZPmZ_[SD]$",
1888                                           "^REVW_ZPmZ_D$")>;
1889
1890// Select, vector form
1891def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
1892
1893// Table lookup
1894def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
1895
1896// Table lookup extension
1897def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
1898
1899// Transpose, vector form
1900def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
1901
1902// Unpack and extend
1903def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
1904
1905// Zip/unzip
1906def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1907
1908// SVE floating-point instructions
1909// -----------------------------------------------------------------------------
1910
1911// Floating point absolute value/difference
1912def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
1913
1914// Floating point arithmetic
1915def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1916                                           "^FADDP_ZPmZZ_[HSD]$",
1917                                           "^FNEG_ZPmZ_[HSD]$",
1918                                           "^FSUBR_ZPm[IZ]_[HSD]$")>;
1919
1920// Floating point associative add, F16
1921def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
1922
1923// Floating point associative add, F32
1924def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
1925
1926// Floating point associative add, F64
1927def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
1928
1929// Floating point compare
1930def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
1931                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
1932                                            "^FCM(LE|LT)_PPzZ0_[HSD]$",
1933                                            "^FCMUO_PPzZZ_[HSD]$")>;
1934
1935// Floating point complex add
1936def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1937
1938// Floating point complex multiply add
1939def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1940                                           "^FCMLA_ZZZI_[HS]$")>;
1941
1942// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1943def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1944                                            "^FCVTLT_ZPmZ_HtoS$",
1945                                            "^FCVTNT_ZPmZ_StoH$")>;
1946
1947// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
1948// or F64 to F16)
1949def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1950                                            "^FCVTLT_ZPmZ_StoD$",
1951                                            "^FCVTNT_ZPmZ_DtoS$")>;
1952
1953// Floating point convert, round to odd
1954def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
1955
1956// Floating point base2 log, F16
1957def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>;
1958
1959// Floating point base2 log, F32
1960def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>;
1961
1962// Floating point base2 log, F64
1963def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>;
1964
1965// Floating point convert to integer, F16
1966def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1967
1968// Floating point convert to integer, F32
1969def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1970
1971// Floating point convert to integer, F64
1972def : InstRW<[N2Write_3cyc_1V0],
1973             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1974
1975// Floating point copy
1976def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
1977                                           "^FDUP_ZI_[HSD]$")>;
1978
1979// Floating point divide, F16
1980def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1981
1982// Floating point divide, F32
1983def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1984
1985// Floating point divide, F64
1986def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1987
1988// Floating point min/max pairwise
1989def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
1990
1991// Floating point min/max
1992def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
1993
1994// Floating point multiply
1995def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
1996                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
1997
1998// Floating point multiply accumulate
1999def : InstRW<[N2Write_4cyc_1V],
2000             (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
2001                        "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
2002
2003// Floating point multiply add/sub accumulate long
2004def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
2005
2006// Floating point reciprocal estimate, F16
2007def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
2008                                         FRSQRTE_ZZ_H)>;
2009
2010// Floating point reciprocal estimate, F32
2011def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
2012                                         FRSQRTE_ZZ_S)>;
2013
2014// Floating point reciprocal estimate, F64
2015def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
2016                                         FRSQRTE_ZZ_D)>;
2017
2018// Floating point reciprocal step
2019def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
2020
2021// Floating point reduction, F16
2022def : InstRW<[N2Write_6cyc_2V],
2023             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
2024
2025// Floating point reduction, F32
2026def : InstRW<[N2Write_4cyc_1V],
2027             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
2028
2029// Floating point reduction, F64
2030def : InstRW<[N2Write_2cyc_1V],
2031             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
2032
2033// Floating point round to integral, F16
2034def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
2035
2036// Floating point round to integral, F32
2037def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
2038
2039// Floating point round to integral, F64
2040def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
2041
2042// Floating point square root, F16
2043def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>;
2044
2045// Floating point square root, F32
2046def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>;
2047
2048// Floating point square root, F64
2049def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>;
2050
2051// Floating point trigonometric exponentiation
2052def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
2053
2054// Floating point trigonometric multiply add
2055def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
2056
2057// Floating point trigonometric, miscellaneous
2058def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
2059
2060// SVE BFloat16 (BF16) instructions
2061// -----------------------------------------------------------------------------
2062
2063// Convert, F32 to BF16
2064def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2065
2066// Dot product
2067def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2068
2069// Matrix multiply accumulate
2070def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
2071
2072// Multiply accumulate long
2073def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
2074
2075// SVE Load instructions
2076// -----------------------------------------------------------------------------
2077
2078// Load vector
2079def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
2080
2081// Load predicate
2082def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2083
2084// Contiguous load, scalar + imm
2085def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2086                                           "^LD1S?B_[HSD]_IMM$",
2087                                           "^LD1S?H_[SD]_IMM$",
2088                                           "^LD1S?W_D_IMM$" )>;
2089// Contiguous load, scalar + scalar
2090def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
2091                                             "^LD1S?B_[HSD]$",
2092                                             "^LD1S?H_[SD]$",
2093                                             "^LD1S?W_D$" )>;
2094
2095// Contiguous load broadcast, scalar + imm
2096def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2097                                           "^LD1RSW_IMM$",
2098                                           "^LD1RS?B_[HSD]_IMM$",
2099                                           "^LD1RS?H_[SD]_IMM$",
2100                                           "^LD1RS?W_D_IMM$",
2101                                           "^LD1RQ_[BHWD]_IMM$")>;
2102
2103// Contiguous load broadcast, scalar + scalar
2104def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2105
2106// Non temporal load, scalar + imm
2107def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
2108
2109// Non temporal load, scalar + scalar
2110def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
2111
2112// Non temporal gather load, vector + scalar 32-bit element size
2113def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2114                                              "^LDNT1S[BH]_ZZR_S_REAL$")>;
2115
2116// Non temporal gather load, vector + scalar 64-bit element size
2117def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2118def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2119
2120// Contiguous first faulting load, scalar + scalar
2121def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2122                                              "^LDFF1S?B_[HSD]_REAL$",
2123                                              "^LDFF1S?H_[SD]_REAL$",
2124                                              "^LDFF1S?W_D_REAL$")>;
2125
2126// Contiguous non faulting load, scalar + imm
2127def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2128                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
2129                                           "^LDNF1S?H_[SD]_IMM_REAL$",
2130                                           "^LDNF1S?W_D_IMM_REAL$")>;
2131
2132// Contiguous Load two structures to two vectors, scalar + imm
2133def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
2134
2135// Contiguous Load two structures to two vectors, scalar + scalar
2136def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
2137
2138// Contiguous Load three structures to three vectors, scalar + imm
2139def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
2140
2141// Contiguous Load three structures to three vectors, scalar + scalar
2142def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
2143
2144// Contiguous Load four structures to four vectors, scalar + imm
2145def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
2146
2147// Contiguous Load four structures to four vectors, scalar + scalar
2148def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
2149
2150// Gather load, vector + imm, 32-bit element size
2151def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2152                                              "^GLD(FF)?1W_IMM_REAL$")>;
2153
2154// Gather load, vector + imm, 64-bit element size
2155def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2156                                              "^GLD(FF)?1D_IMM_REAL$")>;
2157
2158// Gather load, 64-bit element size
2159def : InstRW<[N2Write_9cyc_2L_2V],
2160             (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
2161                        "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
2162                        "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
2163                        "^GLD(FF)?1D_(SCALED_)?REAL$")>;
2164
2165// Gather load, 32-bit scaled offset
2166def : InstRW<[N2Write_10cyc_2L_2V],
2167             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
2168                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2169
2170// Gather load, 32-bit unpacked unscaled offset
2171def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2172                                              "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2173
2174// SVE Store instructions
2175// -----------------------------------------------------------------------------
2176
2177// Store from predicate reg
2178def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
2179
2180// Store from vector reg
2181def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
2182
2183// Contiguous store, scalar + imm
2184def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
2185                                                "^ST1B_[HSD]_IMM$",
2186                                                "^ST1H_[SD]_IMM$",
2187                                                "^ST1W_D_IMM$")>;
2188
2189// Contiguous store, scalar + scalar
2190def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
2191def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
2192                                                "^ST1B_[HSD]$",
2193                                                "^ST1W_D$")>;
2194
2195// Contiguous store two structures from two vectors, scalar + imm
2196def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
2197
2198// Contiguous store two structures from two vectors, scalar + scalar
2199def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
2200
2201// Contiguous store two structures from two vectors, scalar + scalar
2202def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
2203
2204// Contiguous store three structures from three vectors, scalar + imm
2205def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
2206
2207// Contiguous store three structures from three vectors, scalar + scalar
2208def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
2209
2210// Contiguous store three structures from three vectors, scalar + scalar
2211def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
2212
2213// Contiguous store four structures from four vectors, scalar + imm
2214def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
2215
2216// Contiguous store four structures from four vectors, scalar + scalar
2217def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
2218
2219// Contiguous store four structures from four vectors, scalar + scalar
2220def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
2221
2222// Non temporal store, scalar + imm
2223def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2224
2225// Non temporal store, scalar + scalar
2226def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2227def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2228
2229// Scatter non temporal store, vector + scalar 32-bit element size
2230def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
2231
2232// Scatter non temporal store, vector + scalar 64-bit element size
2233def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
2234
2235// Scatter store vector + imm 32-bit element size
2236def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
2237                                                "^SST1W_IMM$")>;
2238
2239// Scatter store vector + imm 64-bit element size
2240def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
2241                                                "^SST1D_IMM$")>;
2242
2243// Scatter store, 32-bit scaled offset
2244def : InstRW<[N2Write_4cyc_2L01_2V],
2245             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2246
2247// Scatter store, 32-bit unpacked unscaled offset
2248def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
2249                                                "^SST1D_[SU]XTW$")>;
2250
2251// Scatter store, 32-bit unpacked scaled offset
2252def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2253                                                "^SST1D_[SU]XTW_SCALED$")>;
2254
2255// Scatter store, 32-bit unscaled offset
2256def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
2257                                                "^SST1W_[SU]XTW$")>;
2258
2259// Scatter store, 64-bit scaled offset
2260def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
2261                                                "^SST1D_SCALED$")>;
2262
2263// Scatter store, 64-bit unscaled offset
2264def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
2265                                                "^SST1D$")>;
2266
2267// SVE Miscellaneous instructions
2268// -----------------------------------------------------------------------------
2269
2270// Read first fault register, unpredicated
2271def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2272
2273// Read first fault register, predicated
2274def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2275
2276// Read first fault register and set flags
2277def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2278
2279// Set first fault register
2280// Write to first fault register
2281def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2282
2283// Prefetch
2284def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2285
2286// SVE Cryptographic instructions
2287// -----------------------------------------------------------------------------
2288
2289// Crypto AES ops
2290def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2291                                           "^AESI?MC_ZZ_B$")>;
2292
2293// Crypto SHA3 ops
2294def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2295                                            "^RAX1_ZZZ_D$",
2296                                            "^XAR_ZZZI_[BHSD]$")>;
2297
2298// Crypto SM4 ops
2299def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2300
2301}
2302