xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td (revision 3ceba58a7509418b47b8fca2d2b6bbf088714e26)
1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse N2 processors.
10//
11//===----------------------------------------------------------------------===//
12
13def NeoverseN2Model : SchedMachineModel {
14  let IssueWidth            =  10; // Micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer.
16  let LoadLatency           =   4; // Optimistic load latency.
17  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.
18  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
19  let CompleteModel         =   1;
20
21  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
22    [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]);
23}
24
25//===----------------------------------------------------------------------===//
26// Define each kind of processor resource and number available on Neoverse N2.
27// Instructions are first fetched and then decoded into internal macro-ops
28// (MOPs). From there, the MOPs proceed through register renaming and dispatch
29// stages. A MOP can be split into two micro-ops further down the pipeline
30// after the decode stage. Once dispatched, micro-ops wait for their operands
31// and issue out-of-order to one of thirteen issue pipelines. Each issue
32// pipeline can accept one micro-op per cycle.
33
34let SchedModel = NeoverseN2Model in {
35
36// Define the (13) issue ports.
37def N2UnitB   : ProcResource<2>;  // Branch 0/1
38def N2UnitS   : ProcResource<2>;  // Integer single Cycle 0/1
39def N2UnitM0  : ProcResource<1>;  // Integer multicycle 0
40def N2UnitM1  : ProcResource<1>;  // Integer multicycle 1
41def N2UnitL01 : ProcResource<2>;  // Load/Store 0/1
42def N2UnitL2  : ProcResource<1>;  // Load 2
43def N2UnitD   : ProcResource<2>;  // Store data 0/1
44def N2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
45def N2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
46
47def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>;  // FP/ASIMD 0/1
48def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>;  // Integer single/multicycle 0/1
49def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
50def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
51
52// Define commonly used read types.
53
54// No forwarding is provided for these types.
55def : ReadAdvance<ReadI,       0>;
56def : ReadAdvance<ReadISReg,   0>;
57def : ReadAdvance<ReadIEReg,   0>;
58def : ReadAdvance<ReadIM,      0>;
59def : ReadAdvance<ReadIMA,     0>;
60def : ReadAdvance<ReadID,      0>;
61def : ReadAdvance<ReadExtrHi,  0>;
62def : ReadAdvance<ReadAdrBase, 0>;
63def : ReadAdvance<ReadST,      0>;
64def : ReadAdvance<ReadVLD,     0>;
65
66def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
67def : WriteRes<WriteBarrier, []> { let Latency = 1; }
68def : WriteRes<WriteHint,    []> { let Latency = 1; }
69def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
70
71//===----------------------------------------------------------------------===//
72// Define customized scheduler read/write types specific to the Neoverse N2.
73
74//===----------------------------------------------------------------------===//
75// Define generic 1 micro-op types
76
77def N2Write_1cyc_1B   : SchedWriteRes<[N2UnitB]>   { let Latency = 1; }
78def N2Write_1cyc_1I   : SchedWriteRes<[N2UnitI]>   { let Latency = 1; }
79def N2Write_1cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 1; }
80def N2Write_1cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 1; }
81def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
82def N2Write_2cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 2; }
83def N2Write_3cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 3; }
84def N2Write_2cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 2;
85                                                     let ReleaseAtCycles = [2]; }
86def N2Write_3cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 3;
87                                                     let ReleaseAtCycles = [3]; }
88def N2Write_5cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 5;
89                                                     let ReleaseAtCycles = [5]; }
90def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 12;
91                                                     let ReleaseAtCycles = [12]; }
92def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 20;
93                                                     let ReleaseAtCycles = [20]; }
94def N2Write_4cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 4; }
95def N2Write_6cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 6; }
96def N2Write_2cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 2; }
97def N2Write_3cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 3; }
98def N2Write_4cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 4; }
99def N2Write_5cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 5; }
100def N2Write_12cyc_1V  : SchedWriteRes<[N2UnitV]>   { let Latency = 12; }
101def N2Write_2cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 2; }
102def N2Write_3cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 3; }
103def N2Write_4cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 4; }
104def N2Write_7cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 7;
105                                                     let ReleaseAtCycles = [7]; }
106def N2Write_9cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 9; }
107def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 10; }
108def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 12; }
109def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 13; }
110def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 15; }
111def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 16; }
112def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 20; }
113def N2Write_2cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 2; }
114def N2Write_3cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 3; }
115def N2Write_4cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 4; }
116def N2Write_6cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 6; }
117def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]>  { let Latency = 10; }
118def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
119
120//===----------------------------------------------------------------------===//
121// Define generic 2 micro-op types
122
123def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
124  let Latency     = 1;
125  let NumMicroOps = 2;
126}
127
128def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
129  let Latency     = 6;
130  let NumMicroOps = 2;
131}
132
133def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
134  let Latency     = 9;
135  let NumMicroOps = 2;
136}
137
138def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
139  let Latency     = 3;
140  let NumMicroOps = 2;
141}
142
143def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
144  let Latency     = 4;
145  let NumMicroOps = 2;
146}
147
148def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
149  let Latency     = 5;
150  let NumMicroOps = 2;
151}
152
153def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
154  let Latency     = 6;
155  let NumMicroOps = 2;
156}
157
158def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
159  let Latency     = 7;
160  let NumMicroOps = 2;
161}
162
163def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
164  let Latency     = 1;
165  let NumMicroOps = 2;
166}
167
168def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
169  let Latency     = 5;
170  let NumMicroOps = 2;
171}
172
173def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
174  let Latency     = 2;
175  let NumMicroOps = 2;
176}
177
178def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
179  let Latency     = 4;
180  let NumMicroOps = 2;
181}
182
183def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
184  let Latency     = 4;
185  let NumMicroOps = 2;
186}
187
188def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
189  let Latency = 10;
190  let NumMicroOps = 2;
191  let ReleaseAtCycles = [5, 5];
192}
193
194def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
195  let Latency = 13;
196  let NumMicroOps = 2;
197  let ReleaseAtCycles = [6, 7];
198}
199
200def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
201  let Latency = 15;
202  let NumMicroOps = 2;
203  let ReleaseAtCycles = [7, 8];
204}
205
206def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
207  let Latency = 16;
208  let NumMicroOps = 2;
209  let ReleaseAtCycles = [8, 8];
210}
211
212def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
213  let Latency     = 4;
214  let NumMicroOps = 2;
215}
216
217def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
218  let Latency     = 6;
219  let NumMicroOps = 2;
220}
221
222def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
223  let Latency     = 6;
224  let NumMicroOps = 2;
225}
226
227def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
228  let Latency     = 8;
229  let NumMicroOps = 2;
230}
231
232def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
233  let Latency     = 4;
234  let NumMicroOps = 2;
235}
236
237def N2Write_3cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
238  let Latency     = 3;
239  let NumMicroOps = 2;
240}
241
242def N2Write_2cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
243  let Latency     = 2;
244  let NumMicroOps = 2;
245}
246
247def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
248  let Latency     = 6;
249  let NumMicroOps = 2;
250}
251
252def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
253  let Latency     = 4;
254  let NumMicroOps = 2;
255}
256
257def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
258  let Latency     = 5;
259  let NumMicroOps = 2;
260}
261
262def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
263  let Latency     = 5;
264  let NumMicroOps = 2;
265}
266
267def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
268  let Latency     = 7;
269  let NumMicroOps = 2;
270}
271
272def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
273  let Latency     = 2;
274  let NumMicroOps = 2;
275}
276
277def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
278  let Latency     = 6;
279  let NumMicroOps = 2;
280}
281
282def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
283  let Latency     = 6;
284  let NumMicroOps = 2;
285}
286
287def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
288  let Latency     = 6;
289  let NumMicroOps = 2;
290}
291
292def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
293  let Latency     = 9;
294  let NumMicroOps = 2;
295}
296
297def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
298  let Latency     = 4;
299  let NumMicroOps = 2;
300}
301
302//===----------------------------------------------------------------------===//
303// Define generic 3 micro-op types
304
305def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
306  let Latency     = 1;
307  let NumMicroOps = 3;
308}
309
310def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
311  let Latency     = 2;
312  let NumMicroOps = 3;
313}
314
315def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
316  let Latency     = 2;
317  let NumMicroOps = 3;
318}
319
320def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
321  let Latency     = 7;
322  let NumMicroOps = 3;
323}
324
325def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
326  let Latency     = 8;
327  let NumMicroOps = 3;
328}
329
330def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
331  let Latency     = 10;
332  let NumMicroOps = 3;
333}
334
335def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
336  let Latency     = 2;
337  let NumMicroOps = 3;
338}
339
340def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
341  let Latency     = 4;
342  let NumMicroOps = 3;
343}
344
345def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
346  let Latency     = 6;
347  let NumMicroOps = 3;
348}
349
350def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
351  let Latency     = 8;
352  let NumMicroOps = 3;
353}
354
355//===----------------------------------------------------------------------===//
356// Define generic 4 micro-op types
357
358def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
359                                             N2UnitI]> {
360  let Latency     = 2;
361  let NumMicroOps = 4;
362}
363
364def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
365  let Latency     = 6;
366  let NumMicroOps = 4;
367}
368
369def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
370  let Latency     = 4;
371  let NumMicroOps = 4;
372}
373
374def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
375  let Latency     = 6;
376  let NumMicroOps = 4;
377}
378
379def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
380  let Latency     = 8;
381  let NumMicroOps = 4;
382}
383
384def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
385  let Latency     = 9;
386  let NumMicroOps = 4;
387}
388
389def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
390                                          N2UnitV]> {
391  let Latency     = 2;
392  let NumMicroOps = 4;
393}
394
395def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
396                                          N2UnitV]> {
397  let Latency     = 4;
398  let NumMicroOps = 4;
399}
400
401def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
402                                          N2UnitV]> {
403  let Latency     = 5;
404  let NumMicroOps = 4;
405}
406
407def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
408                                          N2UnitV0]> {
409  let Latency     = 8;
410  let NumMicroOps = 4;
411}
412
413def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
414                                          N2UnitV1]> {
415  let Latency     = 11;
416  let NumMicroOps = 4;
417}
418
419def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
420                                         N2UnitV1]> {
421  let Latency     = 9;
422  let NumMicroOps = 4;
423}
424
425def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
426                                         N2UnitV1]> {
427  let Latency     = 8;
428  let NumMicroOps = 4;
429}
430
431def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
432                                          N2UnitV1]> {
433  let Latency     = 10;
434  let NumMicroOps = 4;
435}
436
437def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
438  let Latency     = 10;
439  let NumMicroOps = 4;
440}
441
442def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
443                                         N2UnitM]> {
444  let Latency     = 4;
445  let NumMicroOps = 4;
446}
447
448def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
449  let Latency     = 6;
450  let NumMicroOps = 4;
451}
452
453def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
454  let Latency     = 7;
455  let NumMicroOps = 4;
456}
457
458//===----------------------------------------------------------------------===//
459// Define generic 5 micro-op types
460
461def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
462                                             N2UnitI, N2UnitI]> {
463  let Latency     = 2;
464  let NumMicroOps = 5;
465}
466
467def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
468                                        N2UnitV]> {
469  let Latency     = 8;
470  let NumMicroOps = 5;
471}
472
473//===----------------------------------------------------------------------===//
474// Define generic 6 micro-op types
475
476def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
477                                        N2UnitV, N2UnitV, N2UnitV]> {
478  let Latency     = 8;
479  let NumMicroOps = 6;
480}
481
482def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
483                                          N2UnitV, N2UnitV, N2UnitV]> {
484  let Latency     = 2;
485  let NumMicroOps = 6;
486}
487
488def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
489                                          N2UnitV, N2UnitV, N2UnitV]> {
490  let Latency     = 6;
491  let NumMicroOps = 6;
492}
493
494def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
495                                          N2UnitV, N2UnitV, N2UnitV]> {
496  let Latency     = 4;
497  let NumMicroOps = 6;
498}
499
500def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
501                                            N2UnitS, N2UnitS]> {
502  let Latency     = 10;
503  let NumMicroOps = 6;
504}
505
506//===----------------------------------------------------------------------===//
507// Define generic 7 micro-op types
508
509def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
510                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
511  let Latency     = 8;
512  let NumMicroOps = 7;
513}
514
515//===----------------------------------------------------------------------===//
516// Define generic 8 micro-op types
517
518def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
519                                     N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
520  let Latency     = 6;
521  let NumMicroOps = 8;
522}
523
524def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
525                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
526                                          N2UnitV]> {
527  let Latency     = 2;
528  let NumMicroOps = 8;
529}
530
531def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
532                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
533                                          N2UnitV]> {
534  let Latency     = 5;
535  let NumMicroOps = 8;
536}
537
538def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
539                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
540  let Latency     = 8;
541  let NumMicroOps = 8;
542}
543
544def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
545                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
546  let Latency     = 9;
547  let NumMicroOps = 8;
548}
549
550//===----------------------------------------------------------------------===//
551// Define generic 10 micro-op types
552
553def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
554                                          N2UnitL01, N2UnitL01, N2UnitV,
555                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
556  let Latency     = 7;
557  let NumMicroOps = 10;
558}
559
560//===----------------------------------------------------------------------===//
561// Define generic 12 micro-op types
562
563def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
564                                          N2UnitL01, N2UnitL01, N2UnitL01,
565                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV,
566                                          N2UnitV, N2UnitV]> {
567  let Latency     = 7;
568  let NumMicroOps = 12;
569}
570
571//===----------------------------------------------------------------------===//
572// Define generic 15 micro-op types
573
574def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
575                                             N2UnitL01, N2UnitL01, N2UnitS,
576                                             N2UnitS, N2UnitS, N2UnitS,
577                                             N2UnitS, N2UnitV, N2UnitV,
578                                             N2UnitV, N2UnitV, N2UnitV]> {
579  let Latency     = 7;
580  let NumMicroOps = 15;
581}
582
583//===----------------------------------------------------------------------===//
584// Define generic 18 micro-op types
585
586def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
587                                           N2UnitL01, N2UnitL01, N2UnitL01,
588                                           N2UnitL01, N2UnitL01, N2UnitL01,
589                                           N2UnitV, N2UnitV, N2UnitV,
590                                           N2UnitV, N2UnitV, N2UnitV,
591                                           N2UnitV, N2UnitV, N2UnitV]> {
592  let Latency     = 11;
593  let NumMicroOps = 18;
594}
595
596//===----------------------------------------------------------------------===//
597// Define generic 27 micro-op types
598
599def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
600                                              N2UnitL01, N2UnitL01, N2UnitL01,
601                                              N2UnitL01, N2UnitL01, N2UnitL01,
602                                              N2UnitS, N2UnitS, N2UnitS,
603                                              N2UnitS, N2UnitS, N2UnitS,
604                                              N2UnitS, N2UnitS, N2UnitS,
605                                              N2UnitV, N2UnitV, N2UnitV,
606                                              N2UnitV, N2UnitV, N2UnitV,
607                                              N2UnitV, N2UnitV, N2UnitV]> {
608  let Latency     = 11;
609  let NumMicroOps = 27;
610}
611
612//===----------------------------------------------------------------------===//
613// Define types for arithmetic and logical ops with short shifts
614def N2Write_Arith : SchedWriteVariant<[
615                      SchedVar<IsCheapLSL,  [N2Write_1cyc_1I]>,
616                      SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>;
617
618def N2Write_Logical: SchedWriteVariant<[
619                       SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>,
620                       SchedVar<NoSchedPred,   [N2Write_2cyc_1M]>]>;
621
622// Miscellaneous
623// -----------------------------------------------------------------------------
624
625def : InstRW<[WriteI], (instrs COPY)>;
626
627// Branch Instructions
628// -----------------------------------------------------------------------------
629
630// Branch, immed
631// Compare and branch
632def : SchedAlias<WriteBr,    N2Write_1cyc_1B>;
633
634// Branch, register
635def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
636
637// Branch and link, immed
638// Branch and link, register
639def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
640
641// Arithmetic and Logical Instructions
642// -----------------------------------------------------------------------------
643
644// ALU, basic
645// ALU, basic, flagset
646def : SchedAlias<WriteI,     N2Write_1cyc_1I>;
647
648// ALU, extend and shift
649def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
650
651// Arithmetic, LSL shift, shift <= 4
652// Arithmetic, flagset, LSL shift, shift <= 4
653// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
654def : SchedAlias<WriteISReg, N2Write_Arith>;
655
656// Logical, shift, no flagset
657def : InstRW<[N2Write_1cyc_1I],
658             (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>;
659
660// Logical, shift, flagset
661def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
662
663// Arithmetic, immediate to logical address tag
664def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
665
666// Convert floating-point condition flags
667// Flag manipulation instructions
668def : WriteRes<WriteSys, []> { let Latency = 1; }
669
670// Insert Random Tags
671def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
672
673// Insert Tag Mask
674// Subtract Pointer
675// Subtract Pointer, flagset
676def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
677
678// Move and shift instructions
679// -----------------------------------------------------------------------------
680
681def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
682
683// Divide and Multiply Instructions
684// -----------------------------------------------------------------------------
685
686// SDIV, UDIV
687def : SchedAlias<WriteID32,  N2Write_12cyc_1M0>;
688def : SchedAlias<WriteID64,  N2Write_20cyc_1M0>;
689
690def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
691def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
692
693// Multiply high
694def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
695
696// Pointer Authentication Instructions (v8.3 PAC)
697// -----------------------------------------------------------------------------
698
699// Authenticate data address
700// Authenticate instruction address
701// Compute pointer authentication code for data address
702// Compute pointer authentication code, using generic key
703// Compute pointer authentication code for instruction address
704def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
705
706// Branch and link, register, with pointer authentication
707// Branch, register, with pointer authentication
708// Branch, return, with pointer authentication
709def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
710                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
711                                            ERETAA, ERETAB)>;
712
713
714// Load register, with pointer authentication
715def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
716
717// Strip pointer authentication code
718def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
719
720// Miscellaneous data-processing instructions
721// -----------------------------------------------------------------------------
722
723// Bitfield extract, one reg
724// Bitfield extract, two regs
725// NOTE: We don't model the difference between EXTR where both operands are the
726// same (one reg).
727def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
728def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
729
730// Bitfield move, basic
731def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
732
733// Bitfield move, insert
734def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
735
736// Load instructions
737// -----------------------------------------------------------------------------
738
739def : SchedAlias<WriteLD,    N2Write_4cyc_1L>;
740def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
741
742// Load pair, signed immed offset, signed words
743def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
744// Load pair, immed post-index or immed pre-index, signed words
745def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi],
746             (instregex "^LDPSW(post|pre)$")>;
747
748// Store instructions
749// -----------------------------------------------------------------------------
750
751def : SchedAlias<WriteST,    N2Write_1cyc_1L01_1D>;
752def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
753def : SchedAlias<WriteSTP,   N2Write_1cyc_1L01_1D>;
754def : SchedAlias<WriteAdr,   N2Write_1cyc_1I>; // copied from A57.
755
756// Tag load instructions
757// -----------------------------------------------------------------------------
758
759// Load allocation tag
760// Load multiple allocation tags
761def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
762
763// Tag store instructions
764// -----------------------------------------------------------------------------
765
766// Store allocation tags to one or two granules, post-index
767// Store allocation tags to one or two granules, pre-index
768// Store allocation tag to one or two granules, zeroing, post-index
769// Store Allocation Tag to one or two granules, zeroing, pre-index
770// Store allocation tag and reg pair to memory, post-Index
771// Store allocation tag and reg pair to memory, pre-Index
772def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
773                                                ST2GPreIndex, ST2GPostIndex,
774                                                STZGPreIndex, STZGPostIndex,
775                                                STZ2GPreIndex, STZ2GPostIndex,
776                                                STGPpre, STGPpost)>;
777
778// Store allocation tags to one or two granules, signed offset
779// Store allocation tag to two granules, zeroing, signed offset
780// Store allocation tag and reg pair to memory, signed offset
781// Store multiple allocation tags
782def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
783                                             STZ2Gi, STGPi, STGM, STZGM)>;
784
785// FP data processing instructions
786// -----------------------------------------------------------------------------
787
788// FP absolute value
789// FP arithmetic
790// FP min/max
791// FP negate
792// FP select
793def : SchedAlias<WriteF,     N2Write_2cyc_1V>;
794
795// FP compare
796def : SchedAlias<WriteFCmp,  N2Write_2cyc_1V0>;
797
798// FP divide, square root
799def : SchedAlias<WriteFDiv,  N2Write_7cyc_1V0>;
800
801// FP divide, H-form
802def : InstRW<[N2Write_7cyc_1V0],  (instrs FDIVHrr)>;
803// FP divide, S-form
804def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
805// FP divide, D-form
806def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
807
808// FP square root, H-form
809def : InstRW<[N2Write_7cyc_1V0],  (instrs FSQRTHr)>;
810// FP square root, S-form
811def : InstRW<[N2Write_9cyc_1V0],  (instrs FSQRTSr)>;
812// FP square root, D-form
813def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
814
815// FP multiply
816def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
817
818// FP multiply accumulate
819def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
820
821// FP round to integral
822def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
823                                            "^FRINT(32|64)[XZ][SD]r$")>;
824
825// FP miscellaneous instructions
826// -----------------------------------------------------------------------------
827
828// FP convert, from gen to vec reg
829def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
830
831// FP convert, from vec to gen reg
832def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
833
834// FP convert, Javascript from vec to gen reg
835// FP convert, from vec to vec reg
836def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
837
838// FP move, immed
839// FP move, register
840def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
841
842// FP transfer, from gen to low half of vec reg
843def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
844                                         FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
845
846// FP transfer, from gen to high half of vec reg
847def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
848
849// FP transfer, from vec to gen reg
850def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
851
852// FP load instructions
853// -----------------------------------------------------------------------------
854
855// Load vector reg, literal, S/D/Q forms
856// Load vector reg, unscaled immed
857def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
858                                           "^LDUR[BHSDQ]i$")>;
859
860// Load vector reg, immed post-index
861def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
862// Load vector reg, immed pre-index
863def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>;
864
865// Load vector reg, unsigned immed
866def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
867
868// Load vector reg, register offset, basic
869// Load vector reg, register offset, scale, S/D-form
870// Load vector reg, register offset, extend
871// Load vector reg, register offset, extend, scale, S/D-form
872def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
873
874// Load vector reg, register offset, scale, H/Q-form
875// Load vector reg, register offset, extend, scale, H/Q-form
876def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
877
878// Load vector pair, immed offset, S/D-form
879def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
880
881// Load vector pair, immed offset, Q-form
882def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
883
884// Load vector pair, immed post-index, S/D-form
885// Load vector pair, immed pre-index, S/D-form
886def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi],
887             (instregex "^LDP[SD](pre|post)$")>;
888
889// Load vector pair, immed post-index, Q-form
890// Load vector pair, immed pre-index, Q-form
891def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
892                                                                LDPQpre)>;
893
894// FP store instructions
895// -----------------------------------------------------------------------------
896
897// Store vector reg, unscaled immed, B/H/S/D-form
898// Store vector reg, unscaled immed, Q-form
899def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
900
901// Store vector reg, immed post-index, B/H/S/D-form
902// Store vector reg, immed post-index, Q-form
903// Store vector reg, immed pre-index, B/H/S/D-form
904// Store vector reg, immed pre-index, Q-form
905def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
906             (instregex "^STR[BHSDQ](pre|post)$")>;
907
908// Store vector reg, unsigned immed, B/H/S/D-form
909// Store vector reg, unsigned immed, Q-form
910def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
911
912// Store vector reg, register offset, basic, B/H/S/D-form
913// Store vector reg, register offset, basic, Q-form
914// Store vector reg, register offset, scale, S/D-form
915// Store vector reg, register offset, extend, B/H/S/D-form
916// Store vector reg, register offset, extend, Q-form
917// Store vector reg, register offset, extend, scale, S/D-form
918def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
919             (instregex "^STR[BSD]ro[WX]$")>;
920
921// Store vector reg, register offset, scale, H-form
922// Store vector reg, register offset, scale, Q-form
923// Store vector reg, register offset, extend, scale, H-form
924// Store vector reg, register offset, extend, scale, Q-form
925def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
926             (instregex "^STR[HQ]ro[WX]$")>;
927
928// Store vector pair, immed offset, S-form
929// Store vector pair, immed offset, D-form
930def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
931
932// Store vector pair, immed offset, Q-form
933def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
934
935// Store vector pair, immed post-index, S-form
936// Store vector pair, immed post-index, D-form
937// Store vector pair, immed pre-index, S-form
938// Store vector pair, immed pre-index, D-form
939def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
940             (instregex "^STP[SD](pre|post)$")>;
941
942// Store vector pair, immed post-index, Q-form
943def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
944
945// Store vector pair, immed pre-index, Q-form
946def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
947
948// ASIMD integer instructions
949// -----------------------------------------------------------------------------
950
951// ASIMD absolute diff
952// ASIMD absolute diff long
953// ASIMD arith, basic
954// ASIMD arith, complex
955// ASIMD arith, pair-wise
956// ASIMD compare
957// ASIMD logical
958// ASIMD max/min, basic and pair-wise
959def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
960def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
961
962// ASIMD absolute diff accum
963// ASIMD absolute diff accum long
964def : InstRW<[N2Write_4cyc_1V1],
965             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
966
967// ASIMD arith, reduce, 4H/4S
968def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
969
970// ASIMD arith, reduce, 8B/8H
971def : InstRW<[N2Write_4cyc_1V1_1V],
972             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
973
974// ASIMD arith, reduce, 16B
975def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
976                                         UADDLVv16i8v)>;
977
978// ASIMD dot product
979// ASIMD dot product using signed and unsigned integers
980def : InstRW<[N2Write_3cyc_1V],
981             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
982
983// ASIMD matrix multiply-accumulate
984def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
985
986// ASIMD max/min, reduce, 4H/4S
987def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
988                                            "^[SU](MAX|MIN)Vv4i32v$")>;
989
990// ASIMD max/min, reduce, 8B/8H
991def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
992                                               "^[SU](MAX|MIN)Vv8i16v$")>;
993
994// ASIMD max/min, reduce, 16B
995def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
996
997// ASIMD multiply
998def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
999
1000// ASIMD multiply accumulate
1001def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
1002
1003// ASIMD multiply accumulate high
1004def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1005
1006// ASIMD multiply accumulate long
1007def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1008
1009// ASIMD multiply accumulate saturating long
1010def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
1011
1012// ASIMD multiply/multiply long (8x8) polynomial, D-form
1013// ASIMD multiply/multiply long (8x8) polynomial, Q-form
1014def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
1015
1016// ASIMD multiply long
1017def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
1018
1019// ASIMD pairwise add and accumulate long
1020def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
1021
1022// ASIMD shift accumulate
1023def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
1024
1025// ASIMD shift by immed, basic
1026def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1027                                            "^SSHLLv", "^SSHRv", "^USHLLv",
1028                                            "^USHRv")>;
1029
1030// ASIMD shift by immed and insert, basic
1031def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
1032
1033// ASIMD shift by immed, complex
1034def : InstRW<[N2Write_4cyc_1V1],
1035             (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1036                        "^(SQSHLU?|UQSHL)[bhsd]$",
1037                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1038                        "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1039                        "^UQSHRNv", "^URSHRv")>;
1040
1041// ASIMD shift by register, basic
1042def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
1043
1044// ASIMD shift by register, complex
1045def : InstRW<[N2Write_4cyc_1V1],
1046             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1047                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1048
1049// ASIMD floating-point instructions
1050// -----------------------------------------------------------------------------
1051
1052// ASIMD FP absolute value/difference
1053// ASIMD FP arith, normal
1054// ASIMD FP compare
1055// ASIMD FP complex add
1056// ASIMD FP max/min, normal
1057// ASIMD FP max/min, pairwise
1058// ASIMD FP negate
1059// Handled by SchedAlias<WriteV[dq], ...>
1060
1061// ASIMD FP complex multiply add
1062def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
1063
1064// ASIMD FP convert, long (F16 to F32)
1065def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
1066
1067// ASIMD FP convert, long (F32 to F64)
1068def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
1069
1070// ASIMD FP convert, narrow (F32 to F16)
1071def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
1072
1073// ASIMD FP convert, narrow (F64 to F32)
1074def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
1075                                            "^FCVTXN(v2|v4)f32")>;
1076
1077// ASIMD FP convert, other, D-form F32 and Q-form F64
1078def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1079                                            "^[SU]CVTFv2f(32|64)$")>;
1080
1081// ASIMD FP convert, other, D-form F16 and Q-form F32
1082def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1083                                            "^[SU]CVTFv4f(16|32)$")>;
1084
1085// ASIMD FP convert, other, Q-form F16
1086def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1087                                            "^[SU]CVTFv8f16$")>;
1088
1089// ASIMD FP divide, D-form, F16
1090def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
1091
1092// ASIMD FP divide, D-form, F32
1093def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
1094
1095// ASIMD FP divide, Q-form, F16
1096def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
1097
1098// ASIMD FP divide, Q-form, F32
1099def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
1100
1101// ASIMD FP divide, Q-form, F64
1102def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
1103
1104// ASIMD FP max/min, reduce, F32 and D-form F16
1105def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1106
1107// ASIMD FP max/min, reduce, Q-form F16
1108def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1109
1110// ASIMD FP multiply
1111def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
1112
1113// ASIMD FP multiply accumulate
1114def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
1115
1116// ASIMD FP multiply accumulate long
1117def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
1118
1119// ASIMD FP round, D-form F32 and Q-form F64
1120def : InstRW<[N2Write_3cyc_1V0],
1121             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1122                        "^FRINT[32|64)[XZ]v2f(32|64)$")>;
1123
1124// ASIMD FP round, D-form F16 and Q-form F32
1125def : InstRW<[N2Write_4cyc_2V0],
1126             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1127                        "^FRINT(32|64)[XZ]v4f32$")>;
1128
1129
1130// ASIMD FP round, Q-form F16
1131def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1132
1133// ASIMD FP square root, D-form, F16
1134def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
1135
1136// ASIMD FP square root, D-form, F32
1137def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
1138
1139// ASIMD FP square root, Q-form, F16
1140def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
1141
1142// ASIMD FP square root, Q-form, F32
1143def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
1144
1145// ASIMD FP square root, Q-form, F64
1146def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
1147
1148// ASIMD BFloat16 (BF16) instructions
1149// -----------------------------------------------------------------------------
1150
1151// ASIMD convert, F32 to BF16
1152def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
1153
1154// ASIMD dot product
1155def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1156
1157// ASIMD matrix multiply accumulate
1158def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
1159
1160// ASIMD multiply accumulate long
1161def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1162                                        BFMLALTIdx)>;
1163
1164// Scalar convert, F32 to BF16
1165def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
1166
1167// ASIMD miscellaneous instructions
1168// -----------------------------------------------------------------------------
1169
1170// ASIMD bit reverse
1171// ASIMD bitwise insert
1172// ASIMD count
1173// ASIMD duplicate, element
1174// ASIMD extract
1175// ASIMD extract narrow
1176// ASIMD insert, element to element
1177// ASIMD move, FP immed
1178// ASIMD move, integer immed
1179// ASIMD reverse
1180// ASIMD table lookup, 1 or 2 table regs
1181// ASIMD table lookup extension, 1 table reg
1182// ASIMD transfer, element to gen reg
1183// ASIMD transpose
1184// ASIMD unzip/zip
1185// Handled by SchedAlias<WriteV[dq], ...>
1186
1187// ASIMD duplicate, gen reg
1188def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1189
1190// ASIMD extract narrow, saturating
1191def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1192
1193// ASIMD reciprocal and square root estimate, D-form U32
1194def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
1195
1196// ASIMD reciprocal and square root estimate, Q-form U32
1197def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
1198
1199// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1200def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1201                                         FRECPEv1i64, FRECPEv2f32,
1202                                         FRSQRTEv1f16, FRSQRTEv1i32,
1203                                         FRSQRTEv1i64, FRSQRTEv2f32)>;
1204
1205// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1206def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
1207                                         FRSQRTEv4f16, FRSQRTEv4f32)>;
1208
1209// ASIMD reciprocal and square root estimate, Q-form F16
1210def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1211
1212// ASIMD reciprocal exponent
1213def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
1214
1215// ASIMD reciprocal step
1216def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1217
1218// ASIMD table lookup, 3 table regs
1219def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
1220
1221// ASIMD table lookup, 4 table regs
1222def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
1223
1224// ASIMD table lookup extension, 2 table reg
1225def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1226
1227// ASIMD table lookup extension, 3 table reg
1228def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1229
1230// ASIMD table lookup extension, 4 table reg
1231def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1232
1233// ASIMD transfer, gen reg to element
1234def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1235
1236// ASIMD load instructions
1237// -----------------------------------------------------------------------------
1238
1239// ASIMD load, 1 element, multiple, 1 reg, D-form
1240def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1241def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1242             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1243
1244// ASIMD load, 1 element, multiple, 1 reg, Q-form
1245def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1246def : InstRW<[WriteAdr, N2Write_6cyc_1L],
1247             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1248
1249// ASIMD load, 1 element, multiple, 2 reg, D-form
1250def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1251def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1252             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1253
1254// ASIMD load, 1 element, multiple, 2 reg, Q-form
1255def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1256def : InstRW<[WriteAdr, N2Write_6cyc_2L],
1257             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1258
1259// ASIMD load, 1 element, multiple, 3 reg, D-form
1260def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1261def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1262             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1263
1264// ASIMD load, 1 element, multiple, 3 reg, Q-form
1265def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1266def : InstRW<[WriteAdr, N2Write_6cyc_3L],
1267             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1268
1269// ASIMD load, 1 element, multiple, 4 reg, D-form
1270def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1271def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1272             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1273
1274// ASIMD load, 1 element, multiple, 4 reg, Q-form
1275def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1276def : InstRW<[WriteAdr, N2Write_7cyc_4L],
1277             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1278
1279// ASIMD load, 1 element, one lane, B/H/S
1280// ASIMD load, 1 element, one lane, D
1281def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1282def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1283
1284// ASIMD load, 1 element, all lanes, D-form, B/H/S
1285// ASIMD load, 1 element, all lanes, D-form, D
1286def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1287def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1288
1289// ASIMD load, 1 element, all lanes, Q-form
1290def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1291def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1292
1293// ASIMD load, 2 element, multiple, D-form, B/H/S
1294def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1295def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1296
1297// ASIMD load, 2 element, multiple, Q-form, B/H/S
1298// ASIMD load, 2 element, multiple, Q-form, D
1299def : InstRW<[N2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1300def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1301
1302// ASIMD load, 2 element, one lane, B/H
1303// ASIMD load, 2 element, one lane, S
1304// ASIMD load, 2 element, one lane, D
1305def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1306def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1307
1308// ASIMD load, 2 element, all lanes, D-form, B/H/S
1309// ASIMD load, 2 element, all lanes, D-form, D
1310def : InstRW<[N2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1311def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1312
1313// ASIMD load, 2 element, all lanes, Q-form
1314def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1315def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1316
1317// ASIMD load, 3 element, multiple, D-form, B/H/S
1318def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1319def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1320
1321// ASIMD load, 3 element, multiple, Q-form, B/H/S
1322def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s)$")>;
1323def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
1324
1325// ASIMD load, 3 element, multiple, Q-form, D
1326def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(2d)$")>;
1327def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>;
1328
1329// ASIMD load, 3 element, one lane, B/H
1330// ASIMD load, 3 element, one lane, S
1331// ASIMD load, 3 element, one lane, D
1332def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1333def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1334
1335// ASIMD load, 3 element, all lanes, D-form, B/H/S
1336// ASIMD load, 3 element, all lanes, D-form, D
1337def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1338def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1339
1340// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1341// ASIMD load, 3 element, all lanes, Q-form, D
1342def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1343def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1344
1345// ASIMD load, 4 element, multiple, D-form, B/H/S
1346def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1347def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1348
1349// ASIMD load, 4 element, multiple, Q-form, B/H/S
1350// ASIMD load, 4 element, multiple, Q-form, D
1351def : InstRW<[N2Write_9cyc_4L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1352def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1353
1354// ASIMD load, 4 element, one lane, B/H
1355// ASIMD load, 4 element, one lane, S
1356// ASIMD load, 4 element, one lane, D
1357def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1358def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1359
1360// ASIMD load, 4 element, all lanes, D-form, B/H/S
1361// ASIMD load, 4 element, all lanes, D-form, D
1362def : InstRW<[N2Write_8cyc_3L_4V],              (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1363def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V],    (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1364
1365// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1366// ASIMD load, 4 element, all lanes, Q-form, D
1367def : InstRW<[N2Write_8cyc_4L_4V],            (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1368def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V],  (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1369
1370// ASIMD store instructions
1371// -----------------------------------------------------------------------------
1372
1373// ASIMD store, 1 element, multiple, 1 reg, D-form
1374def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1375def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1376
1377// ASIMD store, 1 element, multiple, 1 reg, Q-form
1378def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1379def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1380
1381// ASIMD store, 1 element, multiple, 2 reg, D-form
1382def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1383def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1384
1385// ASIMD store, 1 element, multiple, 2 reg, Q-form
1386def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1387def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1388
1389// ASIMD store, 1 element, multiple, 3 reg, D-form
1390def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1391def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1392
1393// ASIMD store, 1 element, multiple, 3 reg, Q-form
1394def : InstRW<[N2Write_2cyc_3L01_3V],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1395def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1396
1397// ASIMD store, 1 element, multiple, 4 reg, D-form
1398def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1399def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1400
1401// ASIMD store, 1 element, multiple, 4 reg, Q-form
1402def : InstRW<[N2Write_2cyc_4L01_4V],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1403def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1404
1405// ASIMD store, 1 element, one lane, B/H/S
1406// ASIMD store, 1 element, one lane, D
1407def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST1i(8|16|32|64)$")>;
1408def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>;
1409
1410// ASIMD store, 2 element, multiple, D-form, B/H/S
1411def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2Twov(8b|4h|2s)$")>;
1412def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1413
1414// ASIMD store, 2 element, multiple, Q-form, B/H/S
1415// ASIMD store, 2 element, multiple, Q-form, D
1416def : InstRW<[N2Write_4cyc_2L01_2V],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1417def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1418
1419// ASIMD store, 2 element, one lane, B/H/S
1420// ASIMD store, 2 element, one lane, D
1421def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2i(8|16|32|64)$")>;
1422def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>;
1423
1424// ASIMD store, 3 element, multiple, D-form, B/H/S
1425def : InstRW<[N2Write_5cyc_2L01_2V],           (instregex "ST3Threev(8b|4h|2s)$")>;
1426def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1427
1428// ASIMD store, 3 element, multiple, Q-form, B/H/S
1429// ASIMD store, 3 element, multiple, Q-form, D
1430def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1431def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1432
1433// ASIMD store, 3 element, one lane, B/H
1434// ASIMD store, 3 element, one lane, S
1435// ASIMD store, 3 element, one lane, D
1436def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3i(8|16|32|64)$")>;
1437def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>;
1438
1439// ASIMD store, 4 element, multiple, D-form, B/H/S
1440def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1441def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1442
1443// ASIMD store, 4 element, multiple, Q-form, B/H/S
1444def : InstRW<[N2Write_7cyc_6L01_6V],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1445def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1446
1447// ASIMD store, 4 element, multiple, Q-form, D
1448def : InstRW<[N2Write_5cyc_4L01_4V],           (instregex "ST4Fourv(2d)$")>;
1449def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>;
1450
1451// ASIMD store, 4 element, one lane, B/H/S
1452def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4i(8|16|32)$")>;
1453def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>;
1454
1455// ASIMD store, 4 element, one lane, D
1456def : InstRW<[N2Write_4cyc_3L01_3V],            (instregex "ST4i(64)$")>;
1457def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V],  (instregex "ST4i(64)_POST$")>;
1458
1459// Cryptography extensions
1460// -----------------------------------------------------------------------------
1461
1462// Crypto AES ops
1463def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1464
1465// Crypto polynomial (64x64) multiply long
1466def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
1467
1468// Crypto SHA1 hash acceleration op
1469// Crypto SHA1 schedule acceleration ops
1470def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1471
1472// Crypto SHA1 hash acceleration ops
1473// Crypto SHA256 hash acceleration ops
1474def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1475
1476// Crypto SHA256 schedule acceleration ops
1477def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1478
1479// Crypto SHA512 hash acceleration ops
1480def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1481
1482// Crypto SHA3 ops
1483def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1484
1485// Crypto SM3 ops
1486def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1487                                            "^SM3TT[12][AB]$")>;
1488
1489// Crypto SM4 ops
1490def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1491
1492// CRC
1493// -----------------------------------------------------------------------------
1494
1495def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
1496
1497// SVE Predicate instructions
1498// -----------------------------------------------------------------------------
1499
1500// Loop control, based on predicate
1501def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1502                                        BRKB_PPmP, BRKB_PPzP)>;
1503
1504// Loop control, based on predicate and flag setting
1505def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1506
1507// Loop control, propagating
1508def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1509
1510// Loop control, propagating and flag setting
1511def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1512                                            BRKPBS_PPzPP)>;
1513
1514// Loop control, based on GPR
1515def : InstRW<[N2Write_3cyc_1M],
1516             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1517
1518def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
1519
1520// Loop terminate
1521def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1522
1523// Predicate counting scalar
1524def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1525def : InstRW<[N2Write_2cyc_1M],
1526             (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
1527                        "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1528                        "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
1529
1530// Predicate counting scalar, active predicate
1531def : InstRW<[N2Write_2cyc_1M],
1532             (instregex "^CNTP_XPP_[BHSD]$",
1533                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
1534                        "^(UQDEC|UQINC)P_WP_[BHSD]$",
1535                        "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
1536
1537// Predicate counting vector, active predicate
1538def : InstRW<[N2Write_7cyc_1M_1M0_1V],
1539             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
1540
1541// Predicate logical
1542def : InstRW<[N2Write_1cyc_1M0],
1543             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1544
1545// Predicate logical, flag setting
1546def : InstRW<[N2Write_2cyc_1M0_1M],
1547             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
1548
1549// Predicate reverse
1550def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
1551
1552// Predicate select
1553def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
1554
1555// Predicate set
1556def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
1557
1558// Predicate set/initialize, set flags
1559def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
1560
1561// Predicate find first/next
1562def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
1563
1564// Predicate test
1565def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
1566
1567// Predicate transpose
1568def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
1569
1570// Predicate unpack and widen
1571def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
1572
1573// Predicate zip/unzip
1574def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1575
1576// SVE integer instructions
1577// -----------------------------------------------------------------------------
1578
1579// Arithmetic, absolute diff
1580def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
1581                                           "^[SU]ABD_ZPZZ_[BHSD]")>;
1582
1583// Arithmetic, absolute diff accum
1584def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1585
1586// Arithmetic, absolute diff accum long
1587def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1588
1589// Arithmetic, absolute diff long
1590def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
1591
1592// Arithmetic, basic
1593def : InstRW<[N2Write_2cyc_1V],
1594             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1595                        "^(ADD|SUB)_ZZZ_[BHSD]",
1596                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
1597                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
1598                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
1599                        "^ADR_LSL_ZZZ_[SD]_[0123]",
1600                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
1601                        "^SADDLBT_ZZZ_[HSD]",
1602                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
1603                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
1604
1605// Arithmetic, complex
1606def : InstRW<[N2Write_2cyc_1V],
1607             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
1608                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
1609                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
1610                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
1611                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
1612                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
1613
1614// Arithmetic, large integer
1615def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
1616
1617// Arithmetic, pairwise add
1618def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
1619
1620// Arithmetic, pairwise add and accum long
1621def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1622
1623// Arithmetic, shift
1624def : InstRW<[N2Write_2cyc_1V1],
1625             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
1626                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
1627                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
1628                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
1629                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
1630                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
1631                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
1632
1633// Arithmetic, shift and accumulate
1634def : InstRW<[N2Write_4cyc_1V1],
1635             (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
1636
1637// Arithmetic, shift by immediate
1638// Arithmetic, shift by immediate and insert
1639def : InstRW<[N2Write_2cyc_1V1],
1640             (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
1641
1642// Arithmetic, shift complex
1643def : InstRW<[N2Write_4cyc_1V1],
1644             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
1645                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
1646                        "^[SU]QR?SHL_ZPZZ_[BHSD]",
1647                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
1648                        "^SQSHRU?N[BT]_ZZI_[BHS]",
1649                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
1650
1651// Arithmetic, shift right for divide
1652def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
1653
1654// Arithmetic, shift rounding
1655def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
1656                                             "^[SU]RSHL_ZPZZ_[BHSD]",
1657                                             "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
1658
1659// Bit manipulation
1660def : InstRW<[N2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
1661
1662// Bitwise select
1663def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
1664
1665// Count/reverse bits
1666def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
1667
1668// Broadcast logical bitmask immediate to vector
1669def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
1670
1671// Compare and set flags
1672def : InstRW<[N2Write_4cyc_1V0_1M],
1673             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1674                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1675
1676// Complex add
1677def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
1678
1679// Complex dot product 8-bit element
1680def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1681
1682// Complex dot product 16-bit element
1683def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1684
1685// Complex multiply-add B, H, S element size
1686def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
1687                                            "^CMLA_ZZZI_[HS]$")>;
1688
1689// Complex multiply-add D element size
1690def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
1691
1692// Conditional extract operations, scalar form
1693def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1694
1695// Conditional extract operations, SIMD&FP scalar and vector forms
1696def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1697                                            "^COMPACT_ZPZ_[SD]$",
1698                                            "^SPLICE_ZPZZ?_[BHSD]$")>;
1699
1700// Convert to floating point, 64b to float or convert to double
1701def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
1702                                            "^[SU]CVTF_ZPmZ_StoD")>;
1703
1704// Convert to floating point, 32b to single or half
1705def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
1706
1707// Convert to floating point, 16b to half
1708def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
1709
1710// Copy, scalar
1711def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1712
1713// Copy, scalar SIMD&FP or imm
1714def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
1715                                           "^CPY_ZPzI_[BHSD]$")>;
1716
1717// Divides, 32 bit
1718def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
1719                                             "^[SU]DIV_ZPZZ_S")>;
1720
1721// Divides, 64 bit
1722def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
1723                                             "^[SU]DIV_ZPZZ_D")>;
1724
1725// Dot product, 8 bit
1726def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
1727
1728// Dot product, 8 bit, using signed and unsigned integers
1729def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1730
1731// Dot product, 16 bit
1732def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1733
1734// Duplicate, immediate and indexed form
1735def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
1736                                           "^DUP_ZZI_[BHSDQ]$")>;
1737
1738// Duplicate, scalar form
1739def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1740
1741// Extend, sign or zero
1742def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]",
1743                                            "^[SU]XTH_ZPmZ_[SD]",
1744                                            "^[SU]XTW_ZPmZ_[D]")>;
1745
1746// Extract
1747def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
1748
1749// Extract narrow saturating
1750def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
1751                                            "^SQXTUN[BT]_ZZ_[BHS]$")>;
1752
1753// Extract/insert operation, SIMD and FP scalar form
1754def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1755                                            "^INSR_ZV_[BHSD]$")>;
1756
1757// Extract/insert operation, scalar
1758def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1759                                                "^INSR_ZR_[BHSD]$")>;
1760
1761// Histogram operations
1762def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
1763                                           "^HISTSEG_ZZZ$")>;
1764
1765// Horizontal operations, B, H, S form, immediate operands only
1766def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
1767
1768// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
1769// operands only / immediate, scalar operands
1770def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1771
1772// Horizontal operations, D form, immediate operands only
1773def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
1774
1775// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
1776// only / immediate, scalar operands
1777def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1778
1779// Logical
1780def : InstRW<[N2Write_2cyc_1V],
1781             (instregex "^(AND|EOR|ORR)_ZI",
1782                        "^(AND|BIC|EOR|ORR)_ZZZ",
1783                        "^EOR(BT|TB)_ZZZ_[BHSD]",
1784                        "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
1785                        "^NOT_ZPmZ_[BHSD]")>;
1786
1787// Max/min, basic and pairwise
1788def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
1789                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
1790                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
1791
1792// Matching operations
1793def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
1794
1795// Matrix multiply-accumulate
1796def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1797
1798// Move prefix
1799def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1800                                           "^MOVPRFX_ZZ$")>;
1801
1802// Multiply, B, H, S element size
1803def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
1804                                             "^MUL_ZPZZ_[BHS]",
1805                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
1806                                             "^[SU]MULH_ZPZZ_[BHS]")>;
1807
1808// Multiply, D element size
1809def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
1810                                             "^MUL_ZPZZ_D",
1811                                             "^[SU]MULH_(ZPmZ|ZZZ)_D",
1812                                             "^[SU]MULH_ZPZZ_D")>;
1813
1814// Multiply long
1815def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
1816                                            "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
1817
1818// Multiply accumulate, B, H, S element size
1819def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1820                                            "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>;
1821
1822// Multiply accumulate, D element size
1823def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1824                                            "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>;
1825
1826// Multiply accumulate long
1827def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1828                                            "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
1829
1830// Multiply accumulate saturating doubling long regular
1831def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1832                                            "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1833
1834// Multiply saturating doubling high, B, H, S element size
1835def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
1836                                            "^SQDMULH_ZZZI_[HS]$")>;
1837
1838// Multiply saturating doubling high, D element size
1839def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
1840
1841// Multiply saturating doubling long
1842def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
1843                                            "^SQDMULL[BT]_ZZZI_[SD]$")>;
1844
1845// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
1846// element size
1847def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1848                                            "^SQRDCMLAH_ZZZ_[BHS]$",
1849                                            "^SQRDML[AS]H_ZZZI_[HS]$",
1850                                            "^SQRDCMLAH_ZZZI_[HS]$")>;
1851
1852// Multiply saturating rounding doubling regular/complex accumulate, D element
1853// size
1854def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1855                                            "^SQRDCMLAH_ZZZ_D$")>;
1856
1857// Multiply saturating rounding doubling regular/complex, B, H, S element size
1858def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
1859                                            "^SQRDMULH_ZZZI_[HS]$")>;
1860
1861// Multiply saturating rounding doubling regular/complex, D element size
1862def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
1863
1864// Multiply/multiply long, (8x8) polynomial
1865def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
1866                                            "^PMULL[BT]_ZZZ_[HDQ]$")>;
1867
1868// Predicate counting vector
1869def : InstRW<[N2Write_2cyc_1V0],
1870             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
1871
1872// Reciprocal estimate
1873def : InstRW<[N2Write_4cyc_2V0], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
1874
1875// Reduction, arithmetic, B form
1876def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1877
1878// Reduction, arithmetic, H form
1879def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1880
1881// Reduction, arithmetic, S form
1882def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1883
1884// Reduction, arithmetic, D form
1885def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1886
1887// Reduction, logical
1888def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
1889
1890// Reverse, vector
1891def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
1892                                           "^REVB_ZPmZ_[HSD]$",
1893                                           "^REVH_ZPmZ_[SD]$",
1894                                           "^REVW_ZPmZ_D$")>;
1895
1896// Select, vector form
1897def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
1898
1899// Table lookup
1900def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
1901
1902// Table lookup extension
1903def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
1904
1905// Transpose, vector form
1906def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
1907
1908// Unpack and extend
1909def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
1910
1911// Zip/unzip
1912def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1913
1914// SVE floating-point instructions
1915// -----------------------------------------------------------------------------
1916
1917// Floating point absolute value/difference
1918def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
1919                                           "^FABD_ZPZZ_[HSD]",
1920                                           "^FABS_ZPmZ_[HSD]")>;
1921
1922// Floating point arithmetic
1923def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
1924                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
1925                                           "^FADDP_ZPmZZ_[HSD]",
1926                                           "^FNEG_ZPmZ_[HSD]",
1927                                           "^FSUBR_ZPm[IZ]_[HSD]",
1928                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
1929
1930// Floating point associative add, F16
1931def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
1932
1933// Floating point associative add, F32
1934def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
1935
1936// Floating point associative add, F64
1937def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
1938
1939// Floating point compare
1940def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
1941                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
1942                                            "^FCM(LE|LT)_PPzZ0_[HSD]$",
1943                                            "^FCMUO_PPzZZ_[HSD]$")>;
1944
1945// Floating point complex add
1946def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1947
1948// Floating point complex multiply add
1949def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1950                                           "^FCMLA_ZZZI_[HS]$")>;
1951
1952// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1953def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
1954                                            "^FCVTLT_ZPmZ_HtoS",
1955                                            "^FCVTNT_ZPmZ_StoH")>;
1956
1957// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
1958// or F64 to F16)
1959def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
1960                                            "^FCVTLT_ZPmZ_StoD",
1961                                            "^FCVTNT_ZPmZ_DtoS")>;
1962
1963// Floating point convert, round to odd
1964def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
1965
1966// Floating point base2 log, F16
1967def : InstRW<[N2Write_6cyc_4V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
1968
1969// Floating point base2 log, F32
1970def : InstRW<[N2Write_4cyc_2V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
1971
1972// Floating point base2 log, F64
1973def : InstRW<[N2Write_3cyc_1V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
1974
1975// Floating point convert to integer, F16
1976def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
1977
1978// Floating point convert to integer, F32
1979def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
1980
1981// Floating point convert to integer, F64
1982def : InstRW<[N2Write_3cyc_1V0],
1983             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
1984
1985// Floating point copy
1986def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
1987                                           "^FDUP_ZI_[HSD]$")>;
1988
1989// Floating point divide, F16
1990def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
1991
1992// Floating point divide, F32
1993def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
1994
1995// Floating point divide, F64
1996def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
1997
1998// Floating point min/max pairwise
1999def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2000
2001// Floating point min/max
2002def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2003                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2004
2005// Floating point multiply
2006def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2007                                           "^FMULX_ZPZZ_[HSD]",
2008                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2009                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
2010
2011// Floating point multiply accumulate
2012def : InstRW<[N2Write_4cyc_1V], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$",
2013                                           "^FN?ML[AS]_ZPZZZ_[HSD]",
2014                                           "^FML[AS]_ZZZI_[HSD]$")>;
2015
2016// Floating point multiply add/sub accumulate long
2017def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
2018
2019// Floating point reciprocal estimate, F16
2020def : InstRW<[N2Write_6cyc_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2021
2022// Floating point reciprocal estimate, F32
2023def : InstRW<[N2Write_4cyc_2V0], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2024
2025// Floating point reciprocal estimate, F64
2026def : InstRW<[N2Write_3cyc_1V0], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2027
2028// Floating point reciprocal step
2029def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
2030
2031// Floating point reduction, F16
2032def : InstRW<[N2Write_6cyc_2V],
2033             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
2034
2035// Floating point reduction, F32
2036def : InstRW<[N2Write_4cyc_1V],
2037             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
2038
2039// Floating point reduction, F64
2040def : InstRW<[N2Write_2cyc_1V],
2041             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
2042
2043// Floating point round to integral, F16
2044def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2045
2046// Floating point round to integral, F32
2047def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2048
2049// Floating point round to integral, F64
2050def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2051
2052// Floating point square root, F16
2053def : InstRW<[N2Write_13cyc_1V0], (instregex "^FSQRT_ZPmZ_H")>;
2054
2055// Floating point square root, F32
2056def : InstRW<[N2Write_10cyc_1V0], (instregex "^FSQRT_ZPmZ_S")>;
2057
2058// Floating point square root, F64
2059def : InstRW<[N2Write_16cyc_1V0], (instregex "^FSQRT_ZPmZ_D")>;
2060
2061// Floating point trigonometric exponentiation
2062def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
2063
2064// Floating point trigonometric multiply add
2065def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
2066
2067// Floating point trigonometric, miscellaneous
2068def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
2069
2070// SVE BFloat16 (BF16) instructions
2071// -----------------------------------------------------------------------------
2072
2073// Convert, F32 to BF16
2074def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2075
2076// Dot product
2077def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2078
2079// Matrix multiply accumulate
2080def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
2081
2082// Multiply accumulate long
2083def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
2084
2085// SVE Load instructions
2086// -----------------------------------------------------------------------------
2087
2088// Load vector
2089def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
2090
2091// Load predicate
2092def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2093
2094// Contiguous load, scalar + imm
2095def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2096                                           "^LD1S?B_[HSD]_IMM$",
2097                                           "^LD1S?H_[SD]_IMM$",
2098                                           "^LD1S?W_D_IMM$" )>;
2099// Contiguous load, scalar + scalar
2100def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
2101                                             "^LD1S?B_[HSD]$",
2102                                             "^LD1S?H_[SD]$",
2103                                             "^LD1S?W_D$" )>;
2104
2105// Contiguous load broadcast, scalar + imm
2106def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2107                                           "^LD1RSW_IMM$",
2108                                           "^LD1RS?B_[HSD]_IMM$",
2109                                           "^LD1RS?H_[SD]_IMM$",
2110                                           "^LD1RS?W_D_IMM$",
2111                                           "^LD1RQ_[BHWD]_IMM$")>;
2112
2113// Contiguous load broadcast, scalar + scalar
2114def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2115
2116// Non temporal load, scalar + imm
2117def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
2118
2119// Non temporal load, scalar + scalar
2120def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
2121
2122// Non temporal gather load, vector + scalar 32-bit element size
2123def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$",
2124                                              "^LDNT1S[BH]_ZZR_S$")>;
2125
2126// Non temporal gather load, vector + scalar 64-bit element size
2127def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2128def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
2129
2130// Contiguous first faulting load, scalar + scalar
2131def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
2132                                              "^LDFF1S?B_[HSD]$",
2133                                              "^LDFF1S?H_[SD]$",
2134                                              "^LDFF1S?W_D$")>;
2135
2136// Contiguous non faulting load, scalar + imm
2137def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
2138                                           "^LDNF1S?B_[HSD]_IMM$",
2139                                           "^LDNF1S?H_[SD]_IMM$",
2140                                           "^LDNF1S?W_D_IMM$")>;
2141
2142// Contiguous Load two structures to two vectors, scalar + imm
2143def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
2144
2145// Contiguous Load two structures to two vectors, scalar + scalar
2146def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
2147
2148// Contiguous Load three structures to three vectors, scalar + imm
2149def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
2150
2151// Contiguous Load three structures to three vectors, scalar + scalar
2152def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
2153
2154// Contiguous Load four structures to four vectors, scalar + imm
2155def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
2156
2157// Contiguous Load four structures to four vectors, scalar + scalar
2158def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
2159
2160// Gather load, vector + imm, 32-bit element size
2161def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
2162                                              "^GLD(FF)?1W_IMM$")>;
2163
2164// Gather load, vector + imm, 64-bit element size
2165def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
2166                                              "^GLD(FF)?1D_IMM$")>;
2167
2168// Gather load, 64-bit element size
2169def : InstRW<[N2Write_9cyc_2L_2V],
2170             (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$",
2171                        "^GLD(FF)?1S?[BHW]_D(_SCALED)?$",
2172                        "^GLD(FF)?1D_[SU]XTW(_SCALED)?$",
2173                        "^GLD(FF)?1D(_SCALED)?$")>;
2174
2175// Gather load, 32-bit scaled offset
2176def : InstRW<[N2Write_10cyc_2L_2V],
2177             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$",
2178                        "^GLD(FF)?1W_[SU]XTW_SCALED")>;
2179
2180// Gather load, 32-bit unpacked unscaled offset
2181def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
2182                                              "^GLD(FF)?1W_[SU]XTW$")>;
2183
2184// SVE Store instructions
2185// -----------------------------------------------------------------------------
2186
2187// Store from predicate reg
2188def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
2189
2190// Store from vector reg
2191def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
2192
2193// Contiguous store, scalar + imm
2194def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
2195                                                "^ST1B_[HSD]_IMM$",
2196                                                "^ST1H_[SD]_IMM$",
2197                                                "^ST1W_D_IMM$")>;
2198
2199// Contiguous store, scalar + scalar
2200def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
2201def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
2202                                                "^ST1B_[HSD]$",
2203                                                "^ST1W_D$")>;
2204
2205// Contiguous store two structures from two vectors, scalar + imm
2206def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
2207
2208// Contiguous store two structures from two vectors, scalar + scalar
2209def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
2210
2211// Contiguous store two structures from two vectors, scalar + scalar
2212def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
2213
2214// Contiguous store three structures from three vectors, scalar + imm
2215def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
2216
2217// Contiguous store three structures from three vectors, scalar + scalar
2218def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
2219
2220// Contiguous store three structures from three vectors, scalar + scalar
2221def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
2222
2223// Contiguous store four structures from four vectors, scalar + imm
2224def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
2225
2226// Contiguous store four structures from four vectors, scalar + scalar
2227def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
2228
2229// Contiguous store four structures from four vectors, scalar + scalar
2230def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
2231
2232// Non temporal store, scalar + imm
2233def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2234
2235// Non temporal store, scalar + scalar
2236def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2237def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2238
2239// Scatter non temporal store, vector + scalar 32-bit element size
2240def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
2241
2242// Scatter non temporal store, vector + scalar 64-bit element size
2243def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
2244
2245// Scatter store vector + imm 32-bit element size
2246def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
2247                                                "^SST1W_IMM$")>;
2248
2249// Scatter store vector + imm 64-bit element size
2250def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
2251                                                "^SST1D_IMM$")>;
2252
2253// Scatter store, 32-bit scaled offset
2254def : InstRW<[N2Write_4cyc_2L01_2V],
2255             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2256
2257// Scatter store, 32-bit unpacked unscaled offset
2258def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
2259                                                "^SST1D_[SU]XTW$")>;
2260
2261// Scatter store, 32-bit unpacked scaled offset
2262def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2263                                                "^SST1D_[SU]XTW_SCALED$")>;
2264
2265// Scatter store, 32-bit unscaled offset
2266def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
2267                                                "^SST1W_[SU]XTW$")>;
2268
2269// Scatter store, 64-bit scaled offset
2270def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
2271                                                "^SST1D_SCALED$")>;
2272
2273// Scatter store, 64-bit unscaled offset
2274def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
2275                                                "^SST1D$")>;
2276
2277// SVE Miscellaneous instructions
2278// -----------------------------------------------------------------------------
2279
2280// Read first fault register, unpredicated
2281def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>;
2282
2283// Read first fault register, predicated
2284def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>;
2285
2286// Read first fault register and set flags
2287def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2288
2289// Set first fault register
2290// Write to first fault register
2291def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2292
2293// Prefetch
2294def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2295
2296// SVE Cryptographic instructions
2297// -----------------------------------------------------------------------------
2298
2299// Crypto AES ops
2300def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2301                                           "^AESI?MC_ZZ_B$")>;
2302
2303// Crypto SHA3 ops
2304def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2305                                            "^RAX1_ZZZ_D$",
2306                                            "^XAR_ZZZI_[BHSD]$")>;
2307
2308// Crypto SM4 ops
2309def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2310
2311}
2312