xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td (revision 9f23cbd6cae82fd77edfad7173432fa8dccd0a95)
1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse N2 processors.
10//
11//===----------------------------------------------------------------------===//
12
13def NeoverseN2Model : SchedMachineModel {
14  let IssueWidth            =  10; // Micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer.
16  let LoadLatency           =   4; // Optimistic load latency.
17  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.
18  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
19  let CompleteModel         =   1;
20
21  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
22                                                    [HasSVE2p1]);
23}
24
25//===----------------------------------------------------------------------===//
26// Define each kind of processor resource and number available on Neoverse N2.
27// Instructions are first fetched and then decoded into internal macro-ops
28// (MOPs). From there, the MOPs proceed through register renaming and dispatch
29// stages. A MOP can be split into two micro-ops further down the pipeline
30// after the decode stage. Once dispatched, micro-ops wait for their operands
31// and issue out-of-order to one of thirteen issue pipelines. Each issue
32// pipeline can accept one micro-op per cycle.
33
34let SchedModel = NeoverseN2Model in {
35
36// Define the (13) issue ports.
37def N2UnitB   : ProcResource<2>;  // Branch 0/1
38def N2UnitS   : ProcResource<2>;  // Integer single Cycle 0/1
39def N2UnitM0  : ProcResource<1>;  // Integer multicycle 0
40def N2UnitM1  : ProcResource<1>;  // Integer multicycle 1
41def N2UnitL01 : ProcResource<2>;  // Load/Store 0/1
42def N2UnitL2  : ProcResource<1>;  // Load 2
43def N2UnitD   : ProcResource<2>;  // Store data 0/1
44def N2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
45def N2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
46
47def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>;  // FP/ASIMD 0/1
48def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>;  // Integer single/multicycle 0/1
49def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
50def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
51
52// Define commonly used read types.
53
54// No forwarding is provided for these types.
55def : ReadAdvance<ReadI,       0>;
56def : ReadAdvance<ReadISReg,   0>;
57def : ReadAdvance<ReadIEReg,   0>;
58def : ReadAdvance<ReadIM,      0>;
59def : ReadAdvance<ReadIMA,     0>;
60def : ReadAdvance<ReadID,      0>;
61def : ReadAdvance<ReadExtrHi,  0>;
62def : ReadAdvance<ReadAdrBase, 0>;
63def : ReadAdvance<ReadST,      0>;
64def : ReadAdvance<ReadVLD,     0>;
65
66def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
67def : WriteRes<WriteBarrier, []> { let Latency = 1; }
68def : WriteRes<WriteHint,    []> { let Latency = 1; }
69def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
70
71//===----------------------------------------------------------------------===//
72// Define customized scheduler read/write types specific to the Neoverse N2.
73
74//===----------------------------------------------------------------------===//
75// Define generic 1 micro-op types
76
77def N2Write_1cyc_1B   : SchedWriteRes<[N2UnitB]>   { let Latency = 1; }
78def N2Write_1cyc_1I   : SchedWriteRes<[N2UnitI]>   { let Latency = 1; }
79def N2Write_1cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 1; }
80def N2Write_1cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 1; }
81def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
82def N2Write_2cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 2; }
83def N2Write_3cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 3; }
84def N2Write_2cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 2;
85                                                     let ResourceCycles = [2]; }
86def N2Write_3cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 3;
87                                                     let ResourceCycles = [3]; }
88def N2Write_5cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 5;
89                                                     let ResourceCycles = [5]; }
90def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 12;
91                                                     let ResourceCycles = [12]; }
92def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 20;
93                                                     let ResourceCycles = [20]; }
94def N2Write_4cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 4; }
95def N2Write_6cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 6; }
96def N2Write_2cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 2; }
97def N2Write_3cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 3; }
98def N2Write_4cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 4; }
99def N2Write_5cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 5; }
100def N2Write_12cyc_1V  : SchedWriteRes<[N2UnitV]>   { let Latency = 12; }
101def N2Write_2cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 2; }
102def N2Write_3cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 3; }
103def N2Write_4cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 4; }
104def N2Write_7cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 7;
105                                                     let ResourceCycles = [7]; }
106def N2Write_9cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 9; }
107def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 10; }
108def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 12; }
109def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 13; }
110def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 15; }
111def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 16; }
112def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 20; }
113def N2Write_2cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 2; }
114def N2Write_3cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 3; }
115def N2Write_4cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 4; }
116def N2Write_6cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 6; }
117def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]>  { let Latency = 10; }
118def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
119
120//===----------------------------------------------------------------------===//
121// Define generic 2 micro-op types
122
123def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
124  let Latency     = 1;
125  let NumMicroOps = 2;
126}
127
128def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
129  let Latency     = 6;
130  let NumMicroOps = 2;
131}
132
133def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
134  let Latency     = 9;
135  let NumMicroOps = 2;
136}
137
138def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
139  let Latency     = 3;
140  let NumMicroOps = 2;
141}
142
143def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
144  let Latency     = 4;
145  let NumMicroOps = 2;
146}
147
148def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
149  let Latency     = 5;
150  let NumMicroOps = 2;
151}
152
153def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
154  let Latency     = 6;
155  let NumMicroOps = 2;
156}
157
158def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
159  let Latency     = 7;
160  let NumMicroOps = 2;
161}
162
163def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
164  let Latency     = 1;
165  let NumMicroOps = 2;
166}
167
168def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
169  let Latency     = 5;
170  let NumMicroOps = 2;
171}
172
173def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
174  let Latency     = 2;
175  let NumMicroOps = 2;
176}
177
178def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
179  let Latency     = 4;
180  let NumMicroOps = 2;
181}
182
183def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
184  let Latency     = 4;
185  let NumMicroOps = 2;
186}
187
188def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
189  let Latency = 10;
190  let NumMicroOps = 2;
191  let ResourceCycles = [5, 5];
192}
193
194def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
195  let Latency = 13;
196  let NumMicroOps = 2;
197  let ResourceCycles = [6, 7];
198}
199
200def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
201  let Latency = 15;
202  let NumMicroOps = 2;
203  let ResourceCycles = [7, 8];
204}
205
206def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
207  let Latency = 16;
208  let NumMicroOps = 2;
209  let ResourceCycles = [8, 8];
210}
211
212def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
213  let Latency     = 4;
214  let NumMicroOps = 2;
215}
216
217def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
218  let Latency     = 6;
219  let NumMicroOps = 2;
220}
221
222def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
223  let Latency     = 6;
224  let NumMicroOps = 2;
225}
226
227def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
228  let Latency     = 8;
229  let NumMicroOps = 2;
230}
231
232def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
233  let Latency     = 4;
234  let NumMicroOps = 2;
235}
236
237def N2Write_3cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
238  let Latency     = 3;
239  let NumMicroOps = 2;
240}
241
242def N2Write_2cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
243  let Latency     = 2;
244  let NumMicroOps = 2;
245}
246
247def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
248  let Latency     = 6;
249  let NumMicroOps = 2;
250}
251
252def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
253  let Latency     = 4;
254  let NumMicroOps = 2;
255}
256
257def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
258  let Latency     = 5;
259  let NumMicroOps = 2;
260}
261
262def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
263  let Latency     = 5;
264  let NumMicroOps = 2;
265}
266
267def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
268  let Latency     = 7;
269  let NumMicroOps = 2;
270}
271
272def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
273  let Latency     = 2;
274  let NumMicroOps = 2;
275}
276
277def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
278  let Latency     = 6;
279  let NumMicroOps = 2;
280}
281
282def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
283  let Latency     = 6;
284  let NumMicroOps = 2;
285}
286
287def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
288  let Latency     = 6;
289  let NumMicroOps = 2;
290}
291
292def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
293  let Latency     = 9;
294  let NumMicroOps = 2;
295}
296
297def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
298  let Latency     = 4;
299  let NumMicroOps = 2;
300}
301
302//===----------------------------------------------------------------------===//
303// Define generic 3 micro-op types
304
305def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
306  let Latency     = 1;
307  let NumMicroOps = 3;
308}
309
310def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
311  let Latency     = 2;
312  let NumMicroOps = 3;
313}
314
315def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
316  let Latency     = 2;
317  let NumMicroOps = 3;
318}
319
320def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
321  let Latency     = 7;
322  let NumMicroOps = 3;
323}
324
325def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
326  let Latency     = 8;
327  let NumMicroOps = 3;
328}
329
330def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
331  let Latency     = 10;
332  let NumMicroOps = 3;
333}
334
335def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
336  let Latency     = 2;
337  let NumMicroOps = 3;
338}
339
340def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
341  let Latency     = 4;
342  let NumMicroOps = 3;
343}
344
345def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
346  let Latency     = 6;
347  let NumMicroOps = 3;
348}
349
350def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
351  let Latency     = 8;
352  let NumMicroOps = 3;
353}
354
355//===----------------------------------------------------------------------===//
356// Define generic 4 micro-op types
357
358def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
359                                             N2UnitI]> {
360  let Latency     = 2;
361  let NumMicroOps = 4;
362}
363
364def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
365  let Latency     = 6;
366  let NumMicroOps = 4;
367}
368
369def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
370  let Latency     = 4;
371  let NumMicroOps = 4;
372}
373
374def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
375  let Latency     = 6;
376  let NumMicroOps = 4;
377}
378
379def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
380  let Latency     = 8;
381  let NumMicroOps = 4;
382}
383
384def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
385  let Latency     = 9;
386  let NumMicroOps = 4;
387}
388
389def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
390                                          N2UnitV]> {
391  let Latency     = 2;
392  let NumMicroOps = 4;
393}
394
395def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
396                                          N2UnitV]> {
397  let Latency     = 4;
398  let NumMicroOps = 4;
399}
400
401def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
402                                          N2UnitV]> {
403  let Latency     = 5;
404  let NumMicroOps = 4;
405}
406
407def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
408                                          N2UnitV0]> {
409  let Latency     = 8;
410  let NumMicroOps = 4;
411}
412
413def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
414                                          N2UnitV1]> {
415  let Latency     = 11;
416  let NumMicroOps = 4;
417}
418
419def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
420                                         N2UnitV1]> {
421  let Latency     = 9;
422  let NumMicroOps = 4;
423}
424
425def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
426                                         N2UnitV1]> {
427  let Latency     = 8;
428  let NumMicroOps = 4;
429}
430
431def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
432                                          N2UnitV1]> {
433  let Latency     = 10;
434  let NumMicroOps = 4;
435}
436
437def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
438  let Latency     = 10;
439  let NumMicroOps = 4;
440}
441
442def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
443                                         N2UnitM]> {
444  let Latency     = 4;
445  let NumMicroOps = 4;
446}
447
448def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
449  let Latency     = 6;
450  let NumMicroOps = 4;
451}
452
453def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
454  let Latency     = 7;
455  let NumMicroOps = 4;
456}
457
458//===----------------------------------------------------------------------===//
459// Define generic 5 micro-op types
460
461def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
462                                             N2UnitI, N2UnitI]> {
463  let Latency     = 2;
464  let NumMicroOps = 5;
465}
466
467def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
468                                        N2UnitV]> {
469  let Latency     = 8;
470  let NumMicroOps = 5;
471}
472
473//===----------------------------------------------------------------------===//
474// Define generic 6 micro-op types
475
476def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
477                                        N2UnitV, N2UnitV, N2UnitV]> {
478  let Latency     = 8;
479  let NumMicroOps = 6;
480}
481
482def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
483                                          N2UnitV, N2UnitV, N2UnitV]> {
484  let Latency     = 2;
485  let NumMicroOps = 6;
486}
487
488def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
489                                          N2UnitV, N2UnitV, N2UnitV]> {
490  let Latency     = 6;
491  let NumMicroOps = 6;
492}
493
494def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
495                                          N2UnitV, N2UnitV, N2UnitV]> {
496  let Latency     = 4;
497  let NumMicroOps = 6;
498}
499
500def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
501                                            N2UnitS, N2UnitS]> {
502  let Latency     = 10;
503  let NumMicroOps = 6;
504}
505
506//===----------------------------------------------------------------------===//
507// Define generic 7 micro-op types
508
509def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
510                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
511  let Latency     = 8;
512  let NumMicroOps = 7;
513}
514
515//===----------------------------------------------------------------------===//
516// Define generic 8 micro-op types
517
518def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
519                                     N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
520  let Latency     = 6;
521  let NumMicroOps = 8;
522}
523
524def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
525                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
526                                          N2UnitV]> {
527  let Latency     = 2;
528  let NumMicroOps = 8;
529}
530
531def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
532                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
533                                          N2UnitV]> {
534  let Latency     = 5;
535  let NumMicroOps = 8;
536}
537
538def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
539                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
540  let Latency     = 8;
541  let NumMicroOps = 8;
542}
543
544def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
545                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
546  let Latency     = 9;
547  let NumMicroOps = 8;
548}
549
550//===----------------------------------------------------------------------===//
551// Define generic 10 micro-op types
552
553def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
554                                          N2UnitL01, N2UnitL01, N2UnitV,
555                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
556  let Latency     = 7;
557  let NumMicroOps = 10;
558}
559
560//===----------------------------------------------------------------------===//
561// Define generic 12 micro-op types
562
563def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
564                                          N2UnitL01, N2UnitL01, N2UnitL01,
565                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV,
566                                          N2UnitV, N2UnitV]> {
567  let Latency     = 7;
568  let NumMicroOps = 12;
569}
570
571//===----------------------------------------------------------------------===//
572// Define generic 15 micro-op types
573
574def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
575                                             N2UnitL01, N2UnitL01, N2UnitS,
576                                             N2UnitS, N2UnitS, N2UnitS,
577                                             N2UnitS, N2UnitV, N2UnitV,
578                                             N2UnitV, N2UnitV, N2UnitV]> {
579  let Latency     = 7;
580  let NumMicroOps = 15;
581}
582
583//===----------------------------------------------------------------------===//
584// Define generic 18 micro-op types
585
586def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
587                                           N2UnitL01, N2UnitL01, N2UnitL01,
588                                           N2UnitL01, N2UnitL01, N2UnitL01,
589                                           N2UnitV, N2UnitV, N2UnitV,
590                                           N2UnitV, N2UnitV, N2UnitV,
591                                           N2UnitV, N2UnitV, N2UnitV]> {
592  let Latency     = 11;
593  let NumMicroOps = 18;
594}
595
596//===----------------------------------------------------------------------===//
597// Define generic 27 micro-op types
598
599def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
600                                              N2UnitL01, N2UnitL01, N2UnitL01,
601                                              N2UnitL01, N2UnitL01, N2UnitL01,
602                                              N2UnitS, N2UnitS, N2UnitS,
603                                              N2UnitS, N2UnitS, N2UnitS,
604                                              N2UnitS, N2UnitS, N2UnitS,
605                                              N2UnitV, N2UnitV, N2UnitV,
606                                              N2UnitV, N2UnitV, N2UnitV,
607                                              N2UnitV, N2UnitV, N2UnitV]> {
608  let Latency     = 11;
609  let NumMicroOps = 27;
610}
611
612// Miscellaneous
613// -----------------------------------------------------------------------------
614
615def : InstRW<[WriteI], (instrs COPY)>;
616
617// Branch Instructions
618// -----------------------------------------------------------------------------
619
620// Branch, immed
621// Compare and branch
622def : SchedAlias<WriteBr,    N2Write_1cyc_1B>;
623
624// Branch, register
625def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
626
627// Branch and link, immed
628// Branch and link, register
629def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
630
631// Arithmetic and Logical Instructions
632// -----------------------------------------------------------------------------
633
634// ALU, basic
635// ALU, basic, flagset
636def : SchedAlias<WriteI,     N2Write_1cyc_1I>;
637
638// ALU, extend and shift
639def : SchedAlias<WriteISReg, N2Write_2cyc_1M>;
640def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
641
642// Arithmetic, immediate to logical address tag
643def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
644
645// Convert floating-point condition flags
646// Flag manipulation instructions
647def : WriteRes<WriteSys, []> { let Latency = 1; }
648
649// Insert Random Tags
650def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
651
652// Insert Tag Mask
653// Subtract Pointer
654// Subtract Pointer, flagset
655def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
656
657// Move and shift instructions
658// -----------------------------------------------------------------------------
659
660def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
661
662// Divide and Multiply Instructions
663// -----------------------------------------------------------------------------
664
665// SDIV, UDIV
666def : SchedAlias<WriteID32,  N2Write_12cyc_1M0>;
667def : SchedAlias<WriteID64,  N2Write_20cyc_1M0>;
668
669def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
670def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
671
672// Multiply high
673def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
674
675// Pointer Authentication Instructions (v8.3 PAC)
676// -----------------------------------------------------------------------------
677
678// Authenticate data address
679// Authenticate instruction address
680// Compute pointer authentication code for data address
681// Compute pointer authentication code, using generic key
682// Compute pointer authentication code for instruction address
683def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
684
685// Branch and link, register, with pointer authentication
686// Branch, register, with pointer authentication
687// Branch, return, with pointer authentication
688def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
689                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
690                                            ERETAA, ERETAB)>;
691
692
693// Load register, with pointer authentication
694def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
695
696// Strip pointer authentication code
697def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
698
699// Miscellaneous data-processing instructions
700// -----------------------------------------------------------------------------
701
702// Bitfield extract, one reg
703// Bitfield extract, two regs
704// NOTE: We don't model the difference between EXTR where both operands are the
705// same (one reg).
706def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
707def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
708
709// Bitfield move, basic
710def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
711
712// Bitfield move, insert
713def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
714
715// Load instructions
716// -----------------------------------------------------------------------------
717
718def : SchedAlias<WriteLD,    N2Write_4cyc_1L>;
719def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
720
721// Load pair, signed immed offset, signed words
722def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
723// Load pair, immed post-index or immed pre-index, signed words
724def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr],
725             (instregex "^LDPSW(post|pre)$")>;
726
727// Store instructions
728// -----------------------------------------------------------------------------
729
730def : SchedAlias<WriteST,    N2Write_1cyc_1L01_1D>;
731def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
732def : SchedAlias<WriteSTP,   N2Write_1cyc_1L01_1D>;
733def : SchedAlias<WriteAdr,   N2Write_1cyc_1I>; // copied from A57.
734
735// Tag load instructions
736// -----------------------------------------------------------------------------
737
738// Load allocation tag
739// Load multiple allocation tags
740def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
741
742// Tag store instructions
743// -----------------------------------------------------------------------------
744
745// Store allocation tags to one or two granules, post-index
746// Store allocation tags to one or two granules, pre-index
747// Store allocation tag to one or two granules, zeroing, post-index
748// Store Allocation Tag to one or two granules, zeroing, pre-index
749// Store allocation tag and reg pair to memory, post-Index
750// Store allocation tag and reg pair to memory, pre-Index
751def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
752                                                ST2GPreIndex, ST2GPostIndex,
753                                                STZGPreIndex, STZGPostIndex,
754                                                STZ2GPreIndex, STZ2GPostIndex,
755                                                STGPpre, STGPpost)>;
756
757// Store allocation tags to one or two granules, signed offset
758// Store allocation tag to two granules, zeroing, signed offset
759// Store allocation tag and reg pair to memory, signed offset
760// Store multiple allocation tags
761def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset,
762                                             STZ2GOffset, STGPi, STGM, STZGM)>;
763
764// FP data processing instructions
765// -----------------------------------------------------------------------------
766
767// FP absolute value
768// FP arithmetic
769// FP min/max
770// FP negate
771// FP select
772def : SchedAlias<WriteF,     N2Write_2cyc_1V>;
773
774// FP compare
775def : SchedAlias<WriteFCmp,  N2Write_2cyc_1V0>;
776
777// FP divide, square root
778def : SchedAlias<WriteFDiv,  N2Write_7cyc_1V0>;
779
780// FP divide, H-form
781def : InstRW<[N2Write_7cyc_1V0],  (instrs FDIVHrr)>;
782// FP divide, S-form
783def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
784// FP divide, D-form
785def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
786
787// FP square root, H-form
788def : InstRW<[N2Write_7cyc_1V0],  (instrs FSQRTHr)>;
789// FP square root, S-form
790def : InstRW<[N2Write_9cyc_1V0],  (instrs FSQRTSr)>;
791// FP square root, D-form
792def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
793
794// FP multiply
795def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
796
797// FP multiply accumulate
798def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
799
800// FP round to integral
801def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
802                                            "^FRINT(32|64)[XZ][SD]r$")>;
803
804// FP miscellaneous instructions
805// -----------------------------------------------------------------------------
806
807// FP convert, from gen to vec reg
808def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
809
810// FP convert, from vec to gen reg
811def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
812
813// FP convert, Javascript from vec to gen reg
814// FP convert, from vec to vec reg
815def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
816
817// FP move, immed
818// FP move, register
819def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
820
821// FP transfer, from gen to low half of vec reg
822def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
823                                         FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
824
825// FP transfer, from gen to high half of vec reg
826def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
827
828// FP transfer, from vec to gen reg
829def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
830
831// FP load instructions
832// -----------------------------------------------------------------------------
833
834// Load vector reg, literal, S/D/Q forms
835// Load vector reg, unscaled immed
836def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
837                                           "^LDUR[BHSDQ]i$")>;
838
839// Load vector reg, immed post-index
840def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
841// Load vector reg, immed pre-index
842def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>;
843
844// Load vector reg, unsigned immed
845def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
846
847// Load vector reg, register offset, basic
848// Load vector reg, register offset, scale, S/D-form
849// Load vector reg, register offset, extend
850// Load vector reg, register offset, extend, scale, S/D-form
851def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
852
853// Load vector reg, register offset, scale, H/Q-form
854// Load vector reg, register offset, extend, scale, H/Q-form
855def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
856
857// Load vector pair, immed offset, S/D-form
858def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
859
860// Load vector pair, immed offset, Q-form
861def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
862
863// Load vector pair, immed post-index, S/D-form
864// Load vector pair, immed pre-index, S/D-form
865def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
866             (instregex "^LDP[SD](pre|post)$")>;
867
868// Load vector pair, immed post-index, Q-form
869// Load vector pair, immed pre-index, Q-form
870def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
871                                                                LDPQpre)>;
872
873// FP store instructions
874// -----------------------------------------------------------------------------
875
876// Store vector reg, unscaled immed, B/H/S/D-form
877// Store vector reg, unscaled immed, Q-form
878def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
879
880// Store vector reg, immed post-index, B/H/S/D-form
881// Store vector reg, immed post-index, Q-form
882// Store vector reg, immed pre-index, B/H/S/D-form
883// Store vector reg, immed pre-index, Q-form
884def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
885             (instregex "^STR[BHSDQ](pre|post)$")>;
886
887// Store vector reg, unsigned immed, B/H/S/D-form
888// Store vector reg, unsigned immed, Q-form
889def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
890
891// Store vector reg, register offset, basic, B/H/S/D-form
892// Store vector reg, register offset, basic, Q-form
893// Store vector reg, register offset, scale, S/D-form
894// Store vector reg, register offset, extend, B/H/S/D-form
895// Store vector reg, register offset, extend, Q-form
896// Store vector reg, register offset, extend, scale, S/D-form
897def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
898             (instregex "^STR[BSD]ro[WX]$")>;
899
900// Store vector reg, register offset, scale, H-form
901// Store vector reg, register offset, scale, Q-form
902// Store vector reg, register offset, extend, scale, H-form
903// Store vector reg, register offset, extend, scale, Q-form
904def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
905             (instregex "^STR[HQ]ro[WX]$")>;
906
907// Store vector pair, immed offset, S-form
908// Store vector pair, immed offset, D-form
909def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
910
911// Store vector pair, immed offset, Q-form
912def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
913
914// Store vector pair, immed post-index, S-form
915// Store vector pair, immed post-index, D-form
916// Store vector pair, immed pre-index, S-form
917// Store vector pair, immed pre-index, D-form
918def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
919             (instregex "^STP[SD](pre|post)$")>;
920
921// Store vector pair, immed post-index, Q-form
922def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
923
924// Store vector pair, immed pre-index, Q-form
925def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
926
927// ASIMD integer instructions
928// -----------------------------------------------------------------------------
929
930// ASIMD absolute diff
931// ASIMD absolute diff long
932// ASIMD arith, basic
933// ASIMD arith, complex
934// ASIMD arith, pair-wise
935// ASIMD compare
936// ASIMD logical
937// ASIMD max/min, basic and pair-wise
938def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
939def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
940
941// ASIMD absolute diff accum
942// ASIMD absolute diff accum long
943def : InstRW<[N2Write_4cyc_1V1],
944             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
945
946// ASIMD arith, reduce, 4H/4S
947def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
948
949// ASIMD arith, reduce, 8B/8H
950def : InstRW<[N2Write_4cyc_1V1_1V],
951             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
952
953// ASIMD arith, reduce, 16B
954def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
955                                         UADDLVv16i8v)>;
956
957// ASIMD dot product
958// ASIMD dot product using signed and unsigned integers
959def : InstRW<[N2Write_3cyc_1V],
960             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
961
962// ASIMD matrix multiply-accumulate
963def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
964
965// ASIMD max/min, reduce, 4H/4S
966def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
967                                            "^[SU](MAX|MIN)Vv4i32v$")>;
968
969// ASIMD max/min, reduce, 8B/8H
970def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
971                                               "^[SU](MAX|MIN)Vv8i16v$")>;
972
973// ASIMD max/min, reduce, 16B
974def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
975
976// ASIMD multiply
977def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
978
979// ASIMD multiply accumulate
980def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
981
982// ASIMD multiply accumulate high
983def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
984
985// ASIMD multiply accumulate long
986def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
987
988// ASIMD multiply accumulate saturating long
989def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
990
991// ASIMD multiply/multiply long (8x8) polynomial, D-form
992// ASIMD multiply/multiply long (8x8) polynomial, Q-form
993def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
994
995// ASIMD multiply long
996def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
997
998// ASIMD pairwise add and accumulate long
999def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
1000
1001// ASIMD shift accumulate
1002def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
1003
1004// ASIMD shift by immed, basic
1005def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1006                                            "^SSHLLv", "^SSHRv", "^USHLLv",
1007                                            "^USHRv")>;
1008
1009// ASIMD shift by immed and insert, basic
1010def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
1011
1012// ASIMD shift by immed, complex
1013def : InstRW<[N2Write_4cyc_1V1],
1014             (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1015                        "^(SQSHLU?|UQSHL)[bhsd]$",
1016                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1017                        "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1018                        "^UQSHRNv", "^URSHRv")>;
1019
1020// ASIMD shift by register, basic
1021def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
1022
1023// ASIMD shift by register, complex
1024def : InstRW<[N2Write_4cyc_1V1],
1025             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1026                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1027
1028// ASIMD floating-point instructions
1029// -----------------------------------------------------------------------------
1030
1031// ASIMD FP absolute value/difference
1032// ASIMD FP arith, normal
1033// ASIMD FP compare
1034// ASIMD FP complex add
1035// ASIMD FP max/min, normal
1036// ASIMD FP max/min, pairwise
1037// ASIMD FP negate
1038// Handled by SchedAlias<WriteV[dq], ...>
1039
1040// ASIMD FP complex multiply add
1041def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
1042
1043// ASIMD FP convert, long (F16 to F32)
1044def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
1045
1046// ASIMD FP convert, long (F32 to F64)
1047def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
1048
1049// ASIMD FP convert, narrow (F32 to F16)
1050def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
1051
1052// ASIMD FP convert, narrow (F64 to F32)
1053def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
1054                                            "^FCVTXN(v2|v4)f32")>;
1055
1056// ASIMD FP convert, other, D-form F32 and Q-form F64
1057def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1058                                            "^[SU]CVTFv2f(32|64)$")>;
1059
1060// ASIMD FP convert, other, D-form F16 and Q-form F32
1061def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1062                                            "^[SU]CVTFv4f(16|32)$")>;
1063
1064// ASIMD FP convert, other, Q-form F16
1065def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1066                                            "^[SU]CVTFv8f16$")>;
1067
1068// ASIMD FP divide, D-form, F16
1069def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
1070
1071// ASIMD FP divide, D-form, F32
1072def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
1073
1074// ASIMD FP divide, Q-form, F16
1075def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
1076
1077// ASIMD FP divide, Q-form, F32
1078def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
1079
1080// ASIMD FP divide, Q-form, F64
1081def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
1082
1083// ASIMD FP max/min, reduce, F32 and D-form F16
1084def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1085
1086// ASIMD FP max/min, reduce, Q-form F16
1087def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1088
1089// ASIMD FP multiply
1090def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
1091
1092// ASIMD FP multiply accumulate
1093def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
1094
1095// ASIMD FP multiply accumulate long
1096def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
1097
1098// ASIMD FP round, D-form F32 and Q-form F64
1099def : InstRW<[N2Write_3cyc_1V0],
1100             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1101                        "^FRINT[32|64)[XZ]v2f(32|64)$")>;
1102
1103// ASIMD FP round, D-form F16 and Q-form F32
1104def : InstRW<[N2Write_4cyc_2V0],
1105             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1106                        "^FRINT(32|64)[XZ]v4f32$")>;
1107
1108
1109// ASIMD FP round, Q-form F16
1110def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1111
1112// ASIMD FP square root, D-form, F16
1113def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
1114
1115// ASIMD FP square root, D-form, F32
1116def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
1117
1118// ASIMD FP square root, Q-form, F16
1119def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
1120
1121// ASIMD FP square root, Q-form, F32
1122def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
1123
1124// ASIMD FP square root, Q-form, F64
1125def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
1126
1127// ASIMD BFloat16 (BF16) instructions
1128// -----------------------------------------------------------------------------
1129
1130// ASIMD convert, F32 to BF16
1131def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
1132
1133// ASIMD dot product
1134def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1135
1136// ASIMD matrix multiply accumulate
1137def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
1138
1139// ASIMD multiply accumulate long
1140def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1141                                        BFMLALTIdx)>;
1142
1143// Scalar convert, F32 to BF16
1144def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
1145
1146// ASIMD miscellaneous instructions
1147// -----------------------------------------------------------------------------
1148
1149// ASIMD bit reverse
1150// ASIMD bitwise insert
1151// ASIMD count
1152// ASIMD duplicate, element
1153// ASIMD extract
1154// ASIMD extract narrow
1155// ASIMD insert, element to element
1156// ASIMD move, FP immed
1157// ASIMD move, integer immed
1158// ASIMD reverse
1159// ASIMD table lookup, 1 or 2 table regs
1160// ASIMD table lookup extension, 1 table reg
1161// ASIMD transfer, element to gen reg
1162// ASIMD transpose
1163// ASIMD unzip/zip
1164// Handled by SchedAlias<WriteV[dq], ...>
1165
1166// ASIMD duplicate, gen reg
1167def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1168
1169// ASIMD extract narrow, saturating
1170def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1171
1172// ASIMD reciprocal and square root estimate, D-form U32
1173def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
1174
1175// ASIMD reciprocal and square root estimate, Q-form U32
1176def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
1177
1178// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1179def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1180                                         FRECPEv1i64, FRECPEv2f32,
1181                                         FRSQRTEv1f16, FRSQRTEv1i32,
1182                                         FRSQRTEv1i64, FRSQRTEv2f32)>;
1183
1184// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1185def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
1186                                         FRSQRTEv4f16, FRSQRTEv4f32)>;
1187
1188// ASIMD reciprocal and square root estimate, Q-form F16
1189def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1190
1191// ASIMD reciprocal exponent
1192def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
1193
1194// ASIMD reciprocal step
1195def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1196
1197// ASIMD table lookup, 3 table regs
1198def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
1199
1200// ASIMD table lookup, 4 table regs
1201def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
1202
1203// ASIMD table lookup extension, 2 table reg
1204def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1205
1206// ASIMD table lookup extension, 3 table reg
1207def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1208
1209// ASIMD table lookup extension, 4 table reg
1210def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1211
1212// ASIMD transfer, gen reg to element
1213def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
1214
1215// ASIMD load instructions
1216// -----------------------------------------------------------------------------
1217
1218// ASIMD load, 1 element, multiple, 1 reg, D-form
1219def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1220def : InstRW<[N2Write_6cyc_1L, WriteAdr],
1221             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1222
1223// ASIMD load, 1 element, multiple, 1 reg, Q-form
1224def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1225def : InstRW<[N2Write_6cyc_1L, WriteAdr],
1226             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1227
1228// ASIMD load, 1 element, multiple, 2 reg, D-form
1229def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1230def : InstRW<[N2Write_6cyc_2L, WriteAdr],
1231             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1232
1233// ASIMD load, 1 element, multiple, 2 reg, Q-form
1234def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1235def : InstRW<[N2Write_6cyc_2L, WriteAdr],
1236             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1237
1238// ASIMD load, 1 element, multiple, 3 reg, D-form
1239def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1240def : InstRW<[N2Write_6cyc_3L, WriteAdr],
1241             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1242
1243// ASIMD load, 1 element, multiple, 3 reg, Q-form
1244def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1245def : InstRW<[N2Write_6cyc_3L, WriteAdr],
1246             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1247
1248// ASIMD load, 1 element, multiple, 4 reg, D-form
1249def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1250def : InstRW<[N2Write_7cyc_4L, WriteAdr],
1251             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1252
1253// ASIMD load, 1 element, multiple, 4 reg, Q-form
1254def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1255def : InstRW<[N2Write_7cyc_4L, WriteAdr],
1256             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1257
1258// ASIMD load, 1 element, one lane, B/H/S
1259// ASIMD load, 1 element, one lane, D
1260def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1261def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
1262
1263// ASIMD load, 1 element, all lanes, D-form, B/H/S
1264// ASIMD load, 1 element, all lanes, D-form, D
1265def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1266def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1267
1268// ASIMD load, 1 element, all lanes, Q-form
1269def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1270def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1271
1272// ASIMD load, 2 element, multiple, D-form, B/H/S
1273def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1274def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1275
1276// ASIMD load, 2 element, multiple, Q-form, B/H/S
1277// ASIMD load, 2 element, multiple, Q-form, D
1278def : InstRW<[N2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1279def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1280
1281// ASIMD load, 2 element, one lane, B/H
1282// ASIMD load, 2 element, one lane, S
1283// ASIMD load, 2 element, one lane, D
1284def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1285def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
1286
1287// ASIMD load, 2 element, all lanes, D-form, B/H/S
1288// ASIMD load, 2 element, all lanes, D-form, D
1289def : InstRW<[N2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1290def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1291
1292// ASIMD load, 2 element, all lanes, Q-form
1293def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1294def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1295
1296// ASIMD load, 3 element, multiple, D-form, B/H/S
1297def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1298def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1299
1300// ASIMD load, 3 element, multiple, Q-form, B/H/S
1301def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s)$")>;
1302def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
1303
1304// ASIMD load, 3 element, multiple, Q-form, D
1305def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(2d)$")>;
1306def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
1307
1308// ASIMD load, 3 element, one lane, B/H
1309// ASIMD load, 3 element, one lane, S
1310// ASIMD load, 3 element, one lane, D
1311def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1312def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
1313
1314// ASIMD load, 3 element, all lanes, D-form, B/H/S
1315// ASIMD load, 3 element, all lanes, D-form, D
1316def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1317def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1318
1319// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1320// ASIMD load, 3 element, all lanes, Q-form, D
1321def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1322def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1323
1324// ASIMD load, 4 element, multiple, D-form, B/H/S
1325def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1326def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1327
1328// ASIMD load, 4 element, multiple, Q-form, B/H/S
1329// ASIMD load, 4 element, multiple, Q-form, D
1330def : InstRW<[N2Write_9cyc_4L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1331def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1332
1333// ASIMD load, 4 element, one lane, B/H
1334// ASIMD load, 4 element, one lane, S
1335// ASIMD load, 4 element, one lane, D
1336def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1337def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
1338
1339// ASIMD load, 4 element, all lanes, D-form, B/H/S
1340// ASIMD load, 4 element, all lanes, D-form, D
1341def : InstRW<[N2Write_8cyc_3L_4V],              (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1342def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr],    (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1343
1344// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1345// ASIMD load, 4 element, all lanes, Q-form, D
1346def : InstRW<[N2Write_8cyc_4L_4V],            (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1347def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr],  (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1348
1349// ASIMD store instructions
1350// -----------------------------------------------------------------------------
1351
1352// ASIMD store, 1 element, multiple, 1 reg, D-form
1353def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1354def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1355
1356// ASIMD store, 1 element, multiple, 1 reg, Q-form
1357def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1358def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1359
1360// ASIMD store, 1 element, multiple, 2 reg, D-form
1361def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1362def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1363
1364// ASIMD store, 1 element, multiple, 2 reg, Q-form
1365def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1366def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1367
1368// ASIMD store, 1 element, multiple, 3 reg, D-form
1369def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1370def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1371
1372// ASIMD store, 1 element, multiple, 3 reg, Q-form
1373def : InstRW<[N2Write_2cyc_3L01_3V],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1374def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1375
1376// ASIMD store, 1 element, multiple, 4 reg, D-form
1377def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1378def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1379
1380// ASIMD store, 1 element, multiple, 4 reg, Q-form
1381def : InstRW<[N2Write_2cyc_4L01_4V],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1382def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1383
1384// ASIMD store, 1 element, one lane, B/H/S
1385// ASIMD store, 1 element, one lane, D
1386def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST1i(8|16|32|64)$")>;
1387def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
1388
1389// ASIMD store, 2 element, multiple, D-form, B/H/S
1390def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2Twov(8b|4h|2s)$")>;
1391def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1392
1393// ASIMD store, 2 element, multiple, Q-form, B/H/S
1394// ASIMD store, 2 element, multiple, Q-form, D
1395def : InstRW<[N2Write_4cyc_2L01_2V],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1396def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1397
1398// ASIMD store, 2 element, one lane, B/H/S
1399// ASIMD store, 2 element, one lane, D
1400def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2i(8|16|32|64)$")>;
1401def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
1402
1403// ASIMD store, 3 element, multiple, D-form, B/H/S
1404def : InstRW<[N2Write_5cyc_2L01_2V],           (instregex "ST3Threev(8b|4h|2s)$")>;
1405def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1406
1407// ASIMD store, 3 element, multiple, Q-form, B/H/S
1408// ASIMD store, 3 element, multiple, Q-form, D
1409def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1410def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1411
1412// ASIMD store, 3 element, one lane, B/H
1413// ASIMD store, 3 element, one lane, S
1414// ASIMD store, 3 element, one lane, D
1415def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3i(8|16|32|64)$")>;
1416def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
1417
1418// ASIMD store, 4 element, multiple, D-form, B/H/S
1419def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1420def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1421
1422// ASIMD store, 4 element, multiple, Q-form, B/H/S
1423def : InstRW<[N2Write_7cyc_6L01_6V],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1424def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1425
1426// ASIMD store, 4 element, multiple, Q-form, D
1427def : InstRW<[N2Write_5cyc_4L01_4V],           (instregex "ST4Fourv(2d)$")>;
1428def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
1429
1430// ASIMD store, 4 element, one lane, B/H/S
1431def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4i(8|16|32)$")>;
1432def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
1433
1434// ASIMD store, 4 element, one lane, D
1435def : InstRW<[N2Write_4cyc_3L01_3V],            (instregex "ST4i(64)$")>;
1436def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr],  (instregex "ST4i(64)_POST$")>;
1437
1438// Cryptography extensions
1439// -----------------------------------------------------------------------------
1440
1441// Crypto AES ops
1442def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1443
1444// Crypto polynomial (64x64) multiply long
1445def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
1446
1447// Crypto SHA1 hash acceleration op
1448// Crypto SHA1 schedule acceleration ops
1449def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1450
1451// Crypto SHA1 hash acceleration ops
1452// Crypto SHA256 hash acceleration ops
1453def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1454
1455// Crypto SHA256 schedule acceleration ops
1456def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1457
1458// Crypto SHA512 hash acceleration ops
1459def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1460
1461// Crypto SHA3 ops
1462def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1463
1464// Crypto SM3 ops
1465def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1466                                            "^SM3TT[12][AB]$")>;
1467
1468// Crypto SM4 ops
1469def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1470
1471// CRC
1472// -----------------------------------------------------------------------------
1473
1474def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
1475
1476// SVE Predicate instructions
1477// -----------------------------------------------------------------------------
1478
1479// Loop control, based on predicate
1480def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1481                                        BRKB_PPmP, BRKB_PPzP)>;
1482
1483// Loop control, based on predicate and flag setting
1484def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1485
1486// Loop control, propagating
1487def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1488
1489// Loop control, propagating and flag setting
1490def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1491                                            BRKPBS_PPzPP)>;
1492
1493// Loop control, based on GPR
1494def : InstRW<[N2Write_3cyc_1M],
1495             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1496
1497def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
1498
1499// Loop terminate
1500def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1501
1502// Predicate counting scalar
1503def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1504def : InstRW<[N2Write_2cyc_1M],
1505             (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
1506                        "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1507                        "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
1508
1509// Predicate counting scalar, active predicate
1510def : InstRW<[N2Write_2cyc_1M],
1511             (instregex "^CNTP_XPP_[BHSD]$",
1512                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
1513                        "^(UQDEC|UQINC)P_WP_[BHSD]$",
1514                        "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
1515
1516// Predicate counting vector, active predicate
1517def : InstRW<[N2Write_7cyc_1M_1M0_1V],
1518             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
1519
1520// Predicate logical
1521def : InstRW<[N2Write_1cyc_1M0],
1522             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1523
1524// Predicate logical, flag setting
1525def : InstRW<[N2Write_2cyc_1M0_1M],
1526             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
1527
1528// Predicate reverse
1529def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
1530
1531// Predicate select
1532def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
1533
1534// Predicate set
1535def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
1536
1537// Predicate set/initialize, set flags
1538def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
1539
1540// Predicate find first/next
1541def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
1542
1543// Predicate test
1544def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
1545
1546// Predicate transpose
1547def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
1548
1549// Predicate unpack and widen
1550def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
1551
1552// Predicate zip/unzip
1553def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1554
1555// SVE integer instructions
1556// -----------------------------------------------------------------------------
1557
1558// Arithmetic, absolute diff
1559def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
1560
1561// Arithmetic, absolute diff accum
1562def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1563
1564// Arithmetic, absolute diff accum long
1565def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1566
1567// Arithmetic, absolute diff long
1568def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
1569
1570// Arithmetic, basic
1571def : InstRW<[N2Write_2cyc_1V],
1572             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1573                        "^(ADD|SUB)_ZZZ_[BHSD]$",
1574                        "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
1575                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1576                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
1577                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
1578                        "^SADDLBT_ZZZ_[HSD]$",
1579                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
1580                        "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
1581
1582// Arithmetic, complex
1583def : InstRW<[N2Write_2cyc_1V],
1584             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
1585                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1586                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
1587                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
1588                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
1589                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
1590
1591// Arithmetic, large integer
1592def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
1593
1594// Arithmetic, pairwise add
1595def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
1596
1597// Arithmetic, pairwise add and accum long
1598def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1599
1600// Arithmetic, shift
1601def : InstRW<[N2Write_2cyc_1V1],
1602             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
1603                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
1604                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
1605                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
1606                        "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
1607                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
1608
1609// Arithmetic, shift and accumulate
1610def : InstRW<[N2Write_4cyc_1V1],
1611             (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
1612
1613// Arithmetic, shift by immediate
1614// Arithmetic, shift by immediate and insert
1615def : InstRW<[N2Write_2cyc_1V1],
1616             (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
1617
1618// Arithmetic, shift complex
1619def : InstRW<[N2Write_4cyc_1V1],
1620             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
1621                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
1622                        "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
1623                        "^SQSHRU?N[BT]_ZZI_[BHS]$",
1624                        "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
1625
1626// Arithmetic, shift right for divide
1627def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>;
1628
1629// Arithmetic, shift rounding
1630def : InstRW<[N2Write_4cyc_1V1],
1631             (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
1632                        "^[SU]RSHR_ZPmI_[BHSD]$")>;
1633
1634// Bit manipulation
1635def : InstRW<[N2Write_6cyc_2V1],
1636             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
1637
1638// Bitwise select
1639def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
1640
1641// Count/reverse bits
1642def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1643
1644// Broadcast logical bitmask immediate to vector
1645def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
1646
1647// Compare and set flags
1648def : InstRW<[N2Write_4cyc_1V0_1M],
1649             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1650                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1651
1652// Complex add
1653def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
1654
1655// Complex dot product 8-bit element
1656def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1657
1658// Complex dot product 16-bit element
1659def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1660
1661// Complex multiply-add B, H, S element size
1662def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
1663                                            "^CMLA_ZZZI_[HS]$")>;
1664
1665// Complex multiply-add D element size
1666def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
1667
1668// Conditional extract operations, scalar form
1669def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1670
1671// Conditional extract operations, SIMD&FP scalar and vector forms
1672def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1673                                            "^COMPACT_ZPZ_[SD]$",
1674                                            "^SPLICE_ZPZZ?_[BHSD]$")>;
1675
1676// Convert to floating point, 64b to float or convert to double
1677def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>;
1678
1679// Convert to floating point, 64b to half
1680def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>;
1681
1682// Convert to floating point, 32b to single or half
1683def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1684
1685// Convert to floating point, 32b to double
1686def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>;
1687
1688// Convert to floating point, 16b to half
1689def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1690
1691// Copy, scalar
1692def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1693
1694// Copy, scalar SIMD&FP or imm
1695def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
1696                                           "^CPY_ZPzI_[BHSD]$")>;
1697
1698// Divides, 32 bit
1699def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1700
1701// Divides, 64 bit
1702def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1703
1704// Dot product, 8 bit
1705def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
1706
1707// Dot product, 8 bit, using signed and unsigned integers
1708def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1709
1710// Dot product, 16 bit
1711def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1712
1713// Duplicate, immediate and indexed form
1714def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
1715                                           "^DUP_ZZI_[BHSDQ]$")>;
1716
1717// Duplicate, scalar form
1718def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1719
1720// Extend, sign or zero
1721def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1722                                            "^[SU]XTH_ZPmZ_[SD]$",
1723                                            "^[SU]XTW_ZPmZ_[D]$")>;
1724
1725// Extract
1726def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
1727
1728// Extract narrow saturating
1729def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
1730                                            "^SQXTUN[BT]_ZZ_[BHS]$")>;
1731
1732// Extract/insert operation, SIMD and FP scalar form
1733def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1734                                            "^INSR_ZV_[BHSD]$")>;
1735
1736// Extract/insert operation, scalar
1737def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1738                                                "^INSR_ZR_[BHSD]$")>;
1739
1740// Histogram operations
1741def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
1742                                           "^HISTSEG_ZZZ$")>;
1743
1744// Horizontal operations, B, H, S form, immediate operands only
1745def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
1746
1747// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
1748// operands only / immediate, scalar operands
1749def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1750
1751// Horizontal operations, D form, immediate operands only
1752def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
1753
1754// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
1755// only / immediate, scalar operands
1756def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1757
1758// Logical
1759def : InstRW<[N2Write_2cyc_1V],
1760             (instregex "^(AND|EOR|ORR)_ZI$",
1761                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1762                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
1763                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1764
1765// Max/min, basic and pairwise
1766def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
1767                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
1768
1769// Matching operations
1770def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
1771
1772// Matrix multiply-accumulate
1773def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1774
1775// Move prefix
1776def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1777                                           "^MOVPRFX_ZZ$")>;
1778
1779// Multiply, B, H, S element size
1780def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
1781                                            "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1782
1783// Multiply, D element size
1784def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
1785                                            "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
1786
1787// Multiply long
1788def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
1789                                            "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
1790
1791// Multiply accumulate, B, H, S element size
1792def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1793                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
1794
1795// Multiply accumulate, D element size
1796def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1797                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
1798
1799// Multiply accumulate long
1800def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1801                                            "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
1802
1803// Multiply accumulate saturating doubling long regular
1804def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1805                                            "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1806
1807// Multiply saturating doubling high, B, H, S element size
1808def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
1809                                            "^SQDMULH_ZZZI_[HS]$")>;
1810
1811// Multiply saturating doubling high, D element size
1812def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
1813
1814// Multiply saturating doubling long
1815def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
1816                                            "^SQDMULL[BT]_ZZZI_[SD]$")>;
1817
1818// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
1819// element size
1820def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1821                                            "^SQRDCMLAH_ZZZ_[BHS]$",
1822                                            "^SQRDML[AS]H_ZZZI_[HS]$",
1823                                            "^SQRDCMLAH_ZZZI_[HS]$")>;
1824
1825// Multiply saturating rounding doubling regular/complex accumulate, D element
1826// size
1827def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1828                                            "^SQRDCMLAH_ZZZ_D$")>;
1829
1830// Multiply saturating rounding doubling regular/complex, B, H, S element size
1831def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
1832                                            "^SQRDMULH_ZZZI_[HS]$")>;
1833
1834// Multiply saturating rounding doubling regular/complex, D element size
1835def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
1836
1837// Multiply/multiply long, (8x8) polynomial
1838def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
1839                                            "^PMULL[BT]_ZZZ_[HDQ]$")>;
1840
1841// Predicate counting vector
1842def : InstRW<[N2Write_2cyc_1V0],
1843             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
1844
1845// Reciprocal estimate
1846def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
1847
1848// Reduction, arithmetic, B form
1849def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1850
1851// Reduction, arithmetic, H form
1852def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1853
1854// Reduction, arithmetic, S form
1855def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1856
1857// Reduction, arithmetic, D form
1858def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1859
1860// Reduction, logical
1861def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
1862
1863// Reverse, vector
1864def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
1865                                           "^REVB_ZPmZ_[HSD]$",
1866                                           "^REVH_ZPmZ_[SD]$",
1867                                           "^REVW_ZPmZ_D$")>;
1868
1869// Select, vector form
1870def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
1871
1872// Table lookup
1873def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
1874
1875// Table lookup extension
1876def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
1877
1878// Transpose, vector form
1879def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
1880
1881// Unpack and extend
1882def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
1883
1884// Zip/unzip
1885def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1886
1887// SVE floating-point instructions
1888// -----------------------------------------------------------------------------
1889
1890// Floating point absolute value/difference
1891def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
1892
1893// Floating point arithmetic
1894def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1895                                           "^FADDP_ZPmZZ_[HSD]$",
1896                                           "^FNEG_ZPmZ_[HSD]$",
1897                                           "^FSUBR_ZPm[IZ]_[HSD]$")>;
1898
1899// Floating point associative add, F16
1900def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
1901
1902// Floating point associative add, F32
1903def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
1904
1905// Floating point associative add, F64
1906def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
1907
1908// Floating point compare
1909def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
1910                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
1911                                            "^FCM(LE|LT)_PPzZ0_[HSD]$",
1912                                            "^FCMUO_PPzZZ_[HSD]$")>;
1913
1914// Floating point complex add
1915def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1916
1917// Floating point complex multiply add
1918def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1919                                           "^FCMLA_ZZZI_[HS]$")>;
1920
1921// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1922def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1923                                            "^FCVTLT_ZPmZ_HtoS$",
1924                                            "^FCVTNT_ZPmZ_StoH$")>;
1925
1926// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
1927// or F64 to F16)
1928def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1929                                            "^FCVTLT_ZPmZ_StoD$",
1930                                            "^FCVTNT_ZPmZ_DtoS$")>;
1931
1932// Floating point convert, round to odd
1933def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
1934
1935// Floating point base2 log, F16
1936def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>;
1937
1938// Floating point base2 log, F32
1939def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>;
1940
1941// Floating point base2 log, F64
1942def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>;
1943
1944// Floating point convert to integer, F16
1945def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1946
1947// Floating point convert to integer, F32
1948def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1949
1950// Floating point convert to integer, F64
1951def : InstRW<[N2Write_3cyc_1V0],
1952             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1953
1954// Floating point copy
1955def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
1956                                           "^FDUP_ZI_[HSD]$")>;
1957
1958// Floating point divide, F16
1959def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1960
1961// Floating point divide, F32
1962def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1963
1964// Floating point divide, F64
1965def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1966
1967// Floating point min/max pairwise
1968def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
1969
1970// Floating point min/max
1971def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
1972
1973// Floating point multiply
1974def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
1975                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
1976
1977// Floating point multiply accumulate
1978def : InstRW<[N2Write_4cyc_1V],
1979             (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
1980                        "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
1981
1982// Floating point multiply add/sub accumulate long
1983def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
1984
1985// Floating point reciprocal estimate, F16
1986def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
1987                                         FRSQRTE_ZZ_H)>;
1988
1989// Floating point reciprocal estimate, F32
1990def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
1991                                         FRSQRTE_ZZ_S)>;
1992
1993// Floating point reciprocal estimate, F64
1994def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
1995                                         FRSQRTE_ZZ_D)>;
1996
1997// Floating point reciprocal step
1998def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1999
2000// Floating point reduction, F16
2001def : InstRW<[N2Write_6cyc_2V],
2002             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
2003
2004// Floating point reduction, F32
2005def : InstRW<[N2Write_4cyc_1V],
2006             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
2007
2008// Floating point reduction, F64
2009def : InstRW<[N2Write_2cyc_1V],
2010             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
2011
2012// Floating point round to integral, F16
2013def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
2014
2015// Floating point round to integral, F32
2016def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
2017
2018// Floating point round to integral, F64
2019def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
2020
2021// Floating point square root, F16
2022def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>;
2023
2024// Floating point square root, F32
2025def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>;
2026
2027// Floating point square root, F64
2028def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>;
2029
2030// Floating point trigonometric exponentiation
2031def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
2032
2033// Floating point trigonometric multiply add
2034def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
2035
2036// Floating point trigonometric, miscellaneous
2037def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
2038
2039// SVE BFloat16 (BF16) instructions
2040// -----------------------------------------------------------------------------
2041
2042// Convert, F32 to BF16
2043def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2044
2045// Dot product
2046def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2047
2048// Matrix multiply accumulate
2049def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
2050
2051// Multiply accumulate long
2052def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>;
2053
2054// SVE Load instructions
2055// -----------------------------------------------------------------------------
2056
2057// Load vector
2058def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
2059
2060// Load predicate
2061def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2062
2063// Contiguous load, scalar + imm
2064def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
2065                                           "^LD1S?B_[HSD]_IMM_REAL$",
2066                                           "^LD1S?H_[SD]_IMM_REAL$",
2067                                           "^LD1S?W_D_IMM_REAL$" )>;
2068// Contiguous load, scalar + scalar
2069def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
2070                                             "^LD1S?B_[HSD]$",
2071                                             "^LD1S?H_[SD]$",
2072                                             "^LD1S?W_D$" )>;
2073
2074// Contiguous load broadcast, scalar + imm
2075def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2076                                           "^LD1RSW_IMM$",
2077                                           "^LD1RS?B_[HSD]_IMM$",
2078                                           "^LD1RS?H_[SD]_IMM$",
2079                                           "^LD1RS?W_D_IMM$",
2080                                           "^LD1RQ_[BHWD]_IMM$")>;
2081
2082// Contiguous load broadcast, scalar + scalar
2083def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2084
2085// Non temporal load, scalar + imm
2086def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
2087
2088// Non temporal load, scalar + scalar
2089def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
2090
2091// Non temporal gather load, vector + scalar 32-bit element size
2092def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2093                                              "^LDNT1S[BH]_ZZR_S_REAL$")>;
2094
2095// Non temporal gather load, vector + scalar 64-bit element size
2096def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2097def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2098
2099// Contiguous first faulting load, scalar + scalar
2100def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2101                                              "^LDFF1S?B_[HSD]_REAL$",
2102                                              "^LDFF1S?H_[SD]_REAL$",
2103                                              "^LDFF1S?W_D_REAL$")>;
2104
2105// Contiguous non faulting load, scalar + imm
2106def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2107                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
2108                                           "^LDNF1S?H_[SD]_IMM_REAL$",
2109                                           "^LDNF1S?W_D_IMM_REAL$")>;
2110
2111// Contiguous Load two structures to two vectors, scalar + imm
2112def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
2113
2114// Contiguous Load two structures to two vectors, scalar + scalar
2115def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
2116
2117// Contiguous Load three structures to three vectors, scalar + imm
2118def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
2119
2120// Contiguous Load three structures to three vectors, scalar + scalar
2121def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
2122
2123// Contiguous Load four structures to four vectors, scalar + imm
2124def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
2125
2126// Contiguous Load four structures to four vectors, scalar + scalar
2127def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
2128
2129// Gather load, vector + imm, 32-bit element size
2130def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2131                                              "^GLD(FF)?1W_IMM_REAL$")>;
2132
2133// Gather load, vector + imm, 64-bit element size
2134def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2135                                              "^GLD(FF)?1D_IMM_REAL$")>;
2136
2137// Gather load, 64-bit element size
2138def : InstRW<[N2Write_9cyc_2L_2V],
2139             (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
2140                        "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
2141                        "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
2142                        "^GLD(FF)?1D_(SCALED_)?REAL$")>;
2143
2144// Gather load, 32-bit scaled offset
2145def : InstRW<[N2Write_10cyc_2L_2V],
2146             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
2147                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2148
2149// Gather load, 32-bit unpacked unscaled offset
2150def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2151                                              "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2152
2153// SVE Store instructions
2154// -----------------------------------------------------------------------------
2155
2156// Store from predicate reg
2157def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
2158
2159// Store from vector reg
2160def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
2161
2162// Contiguous store, scalar + imm
2163def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
2164                                                "^ST1B_[HSD]_IMM$",
2165                                                "^ST1H_[SD]_IMM$",
2166                                                "^ST1W_D_IMM$")>;
2167
2168// Contiguous store, scalar + scalar
2169def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
2170def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
2171                                                "^ST1B_[HSD]$",
2172                                                "^ST1W_D$")>;
2173
2174// Contiguous store two structures from two vectors, scalar + imm
2175def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
2176
2177// Contiguous store two structures from two vectors, scalar + scalar
2178def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
2179
2180// Contiguous store two structures from two vectors, scalar + scalar
2181def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
2182
2183// Contiguous store three structures from three vectors, scalar + imm
2184def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
2185
2186// Contiguous store three structures from three vectors, scalar + scalar
2187def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
2188
2189// Contiguous store three structures from three vectors, scalar + scalar
2190def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
2191
2192// Contiguous store four structures from four vectors, scalar + imm
2193def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
2194
2195// Contiguous store four structures from four vectors, scalar + scalar
2196def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
2197
2198// Contiguous store four structures from four vectors, scalar + scalar
2199def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
2200
2201// Non temporal store, scalar + imm
2202def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2203
2204// Non temporal store, scalar + scalar
2205def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2206def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2207
2208// Scatter non temporal store, vector + scalar 32-bit element size
2209def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
2210
2211// Scatter non temporal store, vector + scalar 64-bit element size
2212def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
2213
2214// Scatter store vector + imm 32-bit element size
2215def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
2216                                                "^SST1W_IMM$")>;
2217
2218// Scatter store vector + imm 64-bit element size
2219def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
2220                                                "^SST1D_IMM$")>;
2221
2222// Scatter store, 32-bit scaled offset
2223def : InstRW<[N2Write_4cyc_2L01_2V],
2224             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2225
2226// Scatter store, 32-bit unpacked unscaled offset
2227def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
2228                                                "^SST1D_[SU]XTW$")>;
2229
2230// Scatter store, 32-bit unpacked scaled offset
2231def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2232                                                "^SST1D_[SU]XTW_SCALED$")>;
2233
2234// Scatter store, 32-bit unscaled offset
2235def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
2236                                                "^SST1W_[SU]XTW$")>;
2237
2238// Scatter store, 64-bit scaled offset
2239def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
2240                                                "^SST1D_SCALED$")>;
2241
2242// Scatter store, 64-bit unscaled offset
2243def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
2244                                                "^SST1D$")>;
2245
2246// SVE Miscellaneous instructions
2247// -----------------------------------------------------------------------------
2248
2249// Read first fault register, unpredicated
2250def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2251
2252// Read first fault register, predicated
2253def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2254
2255// Read first fault register and set flags
2256def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2257
2258// Set first fault register
2259// Write to first fault register
2260def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2261
2262// Prefetch
2263def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2264
2265// SVE Cryptographic instructions
2266// -----------------------------------------------------------------------------
2267
2268// Crypto AES ops
2269def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2270                                           "^AESI?MC_ZZ_B$")>;
2271
2272// Crypto SHA3 ops
2273def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2274                                            "^RAX1_ZZZ_D$",
2275                                            "^XAR_ZZZI_[BHSD]$")>;
2276
2277// Crypto SM4 ops
2278def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2279
2280}
2281