xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td (revision e6bfd18d21b225af6a0ed67ceeaf1293b7b9eba5)
1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse N2 processors.
10//
11//===----------------------------------------------------------------------===//
12
13def NeoverseN2Model : SchedMachineModel {
14  let IssueWidth            =  10; // Micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer.
16  let LoadLatency           =   4; // Optimistic load latency.
17  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.
18  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
19  let CompleteModel         =   1;
20
21  list<Predicate> UnsupportedFeatures = SMEUnsupported.F;
22}
23
24//===----------------------------------------------------------------------===//
25// Define each kind of processor resource and number available on Neoverse N2.
26// Instructions are first fetched and then decoded into internal macro-ops
27// (MOPs). From there, the MOPs proceed through register renaming and dispatch
28// stages. A MOP can be split into two micro-ops further down the pipeline
29// after the decode stage. Once dispatched, micro-ops wait for their operands
30// and issue out-of-order to one of thirteen issue pipelines. Each issue
31// pipeline can accept one micro-op per cycle.
32
33let SchedModel = NeoverseN2Model in {
34
35// Define the (13) issue ports.
36def N2UnitB   : ProcResource<2>;  // Branch 0/1
37def N2UnitS   : ProcResource<2>;  // Integer single Cycle 0/1
38def N2UnitM0  : ProcResource<1>;  // Integer multicycle 0
39def N2UnitM1  : ProcResource<1>;  // Integer multicycle 1
40def N2UnitL01 : ProcResource<2>;  // Load/Store 0/1
41def N2UnitL2  : ProcResource<1>;  // Load 2
42def N2UnitD   : ProcResource<2>;  // Store data 0/1
43def N2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
44def N2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
45
46def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>;  // FP/ASIMD 0/1
47def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>;  // Integer single/multicycle 0/1
48def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2
49def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1
50
51// Define commonly used read types.
52
53// No forwarding is provided for these types.
54def : ReadAdvance<ReadI,       0>;
55def : ReadAdvance<ReadISReg,   0>;
56def : ReadAdvance<ReadIEReg,   0>;
57def : ReadAdvance<ReadIM,      0>;
58def : ReadAdvance<ReadIMA,     0>;
59def : ReadAdvance<ReadID,      0>;
60def : ReadAdvance<ReadExtrHi,  0>;
61def : ReadAdvance<ReadAdrBase, 0>;
62def : ReadAdvance<ReadST,      0>;
63def : ReadAdvance<ReadVLD,     0>;
64
65def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
66def : WriteRes<WriteBarrier, []> { let Latency = 1; }
67def : WriteRes<WriteHint,    []> { let Latency = 1; }
68def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
69
70//===----------------------------------------------------------------------===//
71// Define customized scheduler read/write types specific to the Neoverse N2.
72
73//===----------------------------------------------------------------------===//
74// Define generic 1 micro-op types
75
76def N2Write_1cyc_1B   : SchedWriteRes<[N2UnitB]>   { let Latency = 1; }
77def N2Write_1cyc_1I   : SchedWriteRes<[N2UnitI]>   { let Latency = 1; }
78def N2Write_1cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 1; }
79def N2Write_1cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 1; }
80def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; }
81def N2Write_2cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 2; }
82def N2Write_3cyc_1M   : SchedWriteRes<[N2UnitM]>   { let Latency = 3; }
83def N2Write_2cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 2;
84                                                     let ResourceCycles = [2]; }
85def N2Write_3cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 3;
86                                                     let ResourceCycles = [3]; }
87def N2Write_5cyc_1M0  : SchedWriteRes<[N2UnitM0]>  { let Latency = 5;
88                                                     let ResourceCycles = [5]; }
89def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 12;
90                                                     let ResourceCycles = [12]; }
91def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]>  { let Latency = 20;
92                                                     let ResourceCycles = [20]; }
93def N2Write_4cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 4; }
94def N2Write_6cyc_1L   : SchedWriteRes<[N2UnitL]>   { let Latency = 6; }
95def N2Write_2cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 2; }
96def N2Write_3cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 3; }
97def N2Write_4cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 4; }
98def N2Write_5cyc_1V   : SchedWriteRes<[N2UnitV]>   { let Latency = 5; }
99def N2Write_12cyc_1V  : SchedWriteRes<[N2UnitV]>   { let Latency = 12; }
100def N2Write_2cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 2; }
101def N2Write_3cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 3; }
102def N2Write_4cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 4; }
103def N2Write_7cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 7;
104                                                     let ResourceCycles = [7]; }
105def N2Write_9cyc_1V0  : SchedWriteRes<[N2UnitV0]>  { let Latency = 9; }
106def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 10; }
107def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 12; }
108def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 13; }
109def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 15; }
110def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 16; }
111def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]>  { let Latency = 20; }
112def N2Write_2cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 2; }
113def N2Write_3cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 3; }
114def N2Write_4cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 4; }
115def N2Write_6cyc_1V1  : SchedWriteRes<[N2UnitV1]>  { let Latency = 6; }
116def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]>  { let Latency = 10; }
117def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; }
118
119//===----------------------------------------------------------------------===//
120// Define generic 2 micro-op types
121
122def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> {
123  let Latency     = 1;
124  let NumMicroOps = 2;
125}
126
127def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> {
128  let Latency     = 6;
129  let NumMicroOps = 2;
130}
131
132def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> {
133  let Latency     = 9;
134  let NumMicroOps = 2;
135}
136
137def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> {
138  let Latency     = 3;
139  let NumMicroOps = 2;
140}
141
142def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
143  let Latency     = 4;
144  let NumMicroOps = 2;
145}
146
147def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
148  let Latency     = 5;
149  let NumMicroOps = 2;
150}
151
152def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
153  let Latency     = 6;
154  let NumMicroOps = 2;
155}
156
157def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> {
158  let Latency     = 7;
159  let NumMicroOps = 2;
160}
161
162def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> {
163  let Latency     = 1;
164  let NumMicroOps = 2;
165}
166
167def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> {
168  let Latency     = 5;
169  let NumMicroOps = 2;
170}
171
172def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
173  let Latency     = 2;
174  let NumMicroOps = 2;
175}
176
177def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> {
178  let Latency     = 4;
179  let NumMicroOps = 2;
180}
181
182def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
183  let Latency     = 4;
184  let NumMicroOps = 2;
185}
186
187def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
188  let Latency = 10;
189  let NumMicroOps = 2;
190  let ResourceCycles = [5, 5];
191}
192
193def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
194  let Latency = 13;
195  let NumMicroOps = 2;
196  let ResourceCycles = [6, 7];
197}
198
199def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
200  let Latency = 15;
201  let NumMicroOps = 2;
202  let ResourceCycles = [7, 8];
203}
204
205def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
206  let Latency = 16;
207  let NumMicroOps = 2;
208  let ResourceCycles = [8, 8];
209}
210
211def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
212  let Latency     = 4;
213  let NumMicroOps = 2;
214}
215
216def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> {
217  let Latency     = 6;
218  let NumMicroOps = 2;
219}
220
221def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> {
222  let Latency     = 6;
223  let NumMicroOps = 2;
224}
225
226def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
227  let Latency     = 8;
228  let NumMicroOps = 2;
229}
230
231def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> {
232  let Latency     = 4;
233  let NumMicroOps = 2;
234}
235
236def N2Write_3cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
237  let Latency     = 3;
238  let NumMicroOps = 2;
239}
240
241def N2Write_2cyc_1M0_1M  : SchedWriteRes<[N2UnitM0, N2UnitM]> {
242  let Latency     = 2;
243  let NumMicroOps = 2;
244}
245
246def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
247  let Latency     = 6;
248  let NumMicroOps = 2;
249}
250
251def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
252  let Latency     = 4;
253  let NumMicroOps = 2;
254}
255
256def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> {
257  let Latency     = 5;
258  let NumMicroOps = 2;
259}
260
261def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> {
262  let Latency     = 5;
263  let NumMicroOps = 2;
264}
265
266def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> {
267  let Latency     = 7;
268  let NumMicroOps = 2;
269}
270
271def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> {
272  let Latency     = 2;
273  let NumMicroOps = 2;
274}
275
276def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> {
277  let Latency     = 6;
278  let NumMicroOps = 2;
279}
280
281def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> {
282  let Latency     = 6;
283  let NumMicroOps = 2;
284}
285
286def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> {
287  let Latency     = 6;
288  let NumMicroOps = 2;
289}
290
291def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> {
292  let Latency     = 9;
293  let NumMicroOps = 2;
294}
295
296def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> {
297  let Latency     = 4;
298  let NumMicroOps = 2;
299}
300
301//===----------------------------------------------------------------------===//
302// Define generic 3 micro-op types
303
304def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> {
305  let Latency     = 1;
306  let NumMicroOps = 3;
307}
308
309def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> {
310  let Latency     = 2;
311  let NumMicroOps = 3;
312}
313
314def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> {
315  let Latency     = 2;
316  let NumMicroOps = 3;
317}
318
319def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> {
320  let Latency     = 7;
321  let NumMicroOps = 3;
322}
323
324def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> {
325  let Latency     = 8;
326  let NumMicroOps = 3;
327}
328
329def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> {
330  let Latency     = 10;
331  let NumMicroOps = 3;
332}
333
334def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
335  let Latency     = 2;
336  let NumMicroOps = 3;
337}
338
339def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> {
340  let Latency     = 4;
341  let NumMicroOps = 3;
342}
343
344def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> {
345  let Latency     = 6;
346  let NumMicroOps = 3;
347}
348
349def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> {
350  let Latency     = 8;
351  let NumMicroOps = 3;
352}
353
354//===----------------------------------------------------------------------===//
355// Define generic 4 micro-op types
356
357def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
358                                             N2UnitI]> {
359  let Latency     = 2;
360  let NumMicroOps = 4;
361}
362
363def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> {
364  let Latency     = 6;
365  let NumMicroOps = 4;
366}
367
368def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
369  let Latency     = 4;
370  let NumMicroOps = 4;
371}
372
373def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
374  let Latency     = 6;
375  let NumMicroOps = 4;
376}
377
378def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
379  let Latency     = 8;
380  let NumMicroOps = 4;
381}
382
383def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
384  let Latency     = 9;
385  let NumMicroOps = 4;
386}
387
388def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
389                                          N2UnitV]> {
390  let Latency     = 2;
391  let NumMicroOps = 4;
392}
393
394def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
395                                          N2UnitV]> {
396  let Latency     = 4;
397  let NumMicroOps = 4;
398}
399
400def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV,
401                                          N2UnitV]> {
402  let Latency     = 5;
403  let NumMicroOps = 4;
404}
405
406def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0,
407                                          N2UnitV0]> {
408  let Latency     = 8;
409  let NumMicroOps = 4;
410}
411
412def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
413                                          N2UnitV1]> {
414  let Latency     = 11;
415  let NumMicroOps = 4;
416}
417
418def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
419                                         N2UnitV1]> {
420  let Latency     = 9;
421  let NumMicroOps = 4;
422}
423
424def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
425                                         N2UnitV1]> {
426  let Latency     = 8;
427  let NumMicroOps = 4;
428}
429
430def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1,
431                                          N2UnitV1]> {
432  let Latency     = 10;
433  let NumMicroOps = 4;
434}
435
436def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> {
437  let Latency     = 10;
438  let NumMicroOps = 4;
439}
440
441def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM,
442                                         N2UnitM]> {
443  let Latency     = 4;
444  let NumMicroOps = 4;
445}
446
447def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> {
448  let Latency     = 6;
449  let NumMicroOps = 4;
450}
451
452def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> {
453  let Latency     = 7;
454  let NumMicroOps = 4;
455}
456
457//===----------------------------------------------------------------------===//
458// Define generic 5 micro-op types
459
460def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV,
461                                             N2UnitI, N2UnitI]> {
462  let Latency     = 2;
463  let NumMicroOps = 5;
464}
465
466def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
467                                        N2UnitV]> {
468  let Latency     = 8;
469  let NumMicroOps = 5;
470}
471
472//===----------------------------------------------------------------------===//
473// Define generic 6 micro-op types
474
475def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
476                                        N2UnitV, N2UnitV, N2UnitV]> {
477  let Latency     = 8;
478  let NumMicroOps = 6;
479}
480
481def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
482                                          N2UnitV, N2UnitV, N2UnitV]> {
483  let Latency     = 2;
484  let NumMicroOps = 6;
485}
486
487def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
488                                          N2UnitV, N2UnitV, N2UnitV]> {
489  let Latency     = 6;
490  let NumMicroOps = 6;
491}
492
493def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
494                                          N2UnitV, N2UnitV, N2UnitV]> {
495  let Latency     = 4;
496  let NumMicroOps = 6;
497}
498
499def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV,
500                                            N2UnitS, N2UnitS]> {
501  let Latency     = 10;
502  let NumMicroOps = 6;
503}
504
505//===----------------------------------------------------------------------===//
506// Define generic 7 micro-op types
507
508def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL,
509                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
510  let Latency     = 8;
511  let NumMicroOps = 7;
512}
513
514//===----------------------------------------------------------------------===//
515// Define generic 8 micro-op types
516
517def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV,
518                                     N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
519  let Latency     = 6;
520  let NumMicroOps = 8;
521}
522
523def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
524                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
525                                          N2UnitV]> {
526  let Latency     = 2;
527  let NumMicroOps = 8;
528}
529
530def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
531                                          N2UnitL01, N2UnitV, N2UnitV, N2UnitV,
532                                          N2UnitV]> {
533  let Latency     = 5;
534  let NumMicroOps = 8;
535}
536
537def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
538                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
539  let Latency     = 8;
540  let NumMicroOps = 8;
541}
542
543def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL,
544                                        N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
545  let Latency     = 9;
546  let NumMicroOps = 8;
547}
548
549//===----------------------------------------------------------------------===//
550// Define generic 10 micro-op types
551
552def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
553                                          N2UnitL01, N2UnitL01, N2UnitV,
554                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV]> {
555  let Latency     = 7;
556  let NumMicroOps = 10;
557}
558
559//===----------------------------------------------------------------------===//
560// Define generic 12 micro-op types
561
562def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
563                                          N2UnitL01, N2UnitL01, N2UnitL01,
564                                          N2UnitV, N2UnitV, N2UnitV, N2UnitV,
565                                          N2UnitV, N2UnitV]> {
566  let Latency     = 7;
567  let NumMicroOps = 12;
568}
569
570//===----------------------------------------------------------------------===//
571// Define generic 15 micro-op types
572
573def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
574                                             N2UnitL01, N2UnitL01, N2UnitS,
575                                             N2UnitS, N2UnitS, N2UnitS,
576                                             N2UnitS, N2UnitV, N2UnitV,
577                                             N2UnitV, N2UnitV, N2UnitV]> {
578  let Latency     = 7;
579  let NumMicroOps = 15;
580}
581
582//===----------------------------------------------------------------------===//
583// Define generic 18 micro-op types
584
585def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
586                                           N2UnitL01, N2UnitL01, N2UnitL01,
587                                           N2UnitL01, N2UnitL01, N2UnitL01,
588                                           N2UnitV, N2UnitV, N2UnitV,
589                                           N2UnitV, N2UnitV, N2UnitV,
590                                           N2UnitV, N2UnitV, N2UnitV]> {
591  let Latency     = 11;
592  let NumMicroOps = 18;
593}
594
595//===----------------------------------------------------------------------===//
596// Define generic 27 micro-op types
597
598def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01,
599                                              N2UnitL01, N2UnitL01, N2UnitL01,
600                                              N2UnitL01, N2UnitL01, N2UnitL01,
601                                              N2UnitS, N2UnitS, N2UnitS,
602                                              N2UnitS, N2UnitS, N2UnitS,
603                                              N2UnitS, N2UnitS, N2UnitS,
604                                              N2UnitV, N2UnitV, N2UnitV,
605                                              N2UnitV, N2UnitV, N2UnitV,
606                                              N2UnitV, N2UnitV, N2UnitV]> {
607  let Latency     = 11;
608  let NumMicroOps = 27;
609}
610
611// Miscellaneous
612// -----------------------------------------------------------------------------
613
614def : InstRW<[WriteI], (instrs COPY)>;
615
616// Branch Instructions
617// -----------------------------------------------------------------------------
618
619// Branch, immed
620// Compare and branch
621def : SchedAlias<WriteBr,    N2Write_1cyc_1B>;
622
623// Branch, register
624def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>;
625
626// Branch and link, immed
627// Branch and link, register
628def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>;
629
630// Arithmetic and Logical Instructions
631// -----------------------------------------------------------------------------
632
633// ALU, basic
634// ALU, basic, flagset
635def : SchedAlias<WriteI,     N2Write_1cyc_1I>;
636
637// ALU, extend and shift
638def : SchedAlias<WriteISReg, N2Write_2cyc_1M>;
639def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>;
640
641// Arithmetic, immediate to logical address tag
642def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>;
643
644// Convert floating-point condition flags
645// Flag manipulation instructions
646def : WriteRes<WriteSys, []> { let Latency = 1; }
647
648// Insert Random Tags
649def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>;
650
651// Insert Tag Mask
652// Subtract Pointer
653// Subtract Pointer, flagset
654def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
655
656// Move and shift instructions
657// -----------------------------------------------------------------------------
658
659def : SchedAlias<WriteImm, N2Write_1cyc_1I>;
660
661// Divide and Multiply Instructions
662// -----------------------------------------------------------------------------
663
664// SDIV, UDIV
665def : SchedAlias<WriteID32,  N2Write_12cyc_1M0>;
666def : SchedAlias<WriteID64,  N2Write_20cyc_1M0>;
667
668def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; }
669def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; }
670
671// Multiply high
672def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
673
674// Pointer Authentication Instructions (v8.3 PAC)
675// -----------------------------------------------------------------------------
676
677// Authenticate data address
678// Authenticate instruction address
679// Compute pointer authentication code for data address
680// Compute pointer authentication code, using generic key
681// Compute pointer authentication code for instruction address
682def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
683
684// Branch and link, register, with pointer authentication
685// Branch, register, with pointer authentication
686// Branch, return, with pointer authentication
687def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
688                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
689                                            ERETAA, ERETAB)>;
690
691
692// Load register, with pointer authentication
693def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
694
695// Strip pointer authentication code
696def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
697
698// Miscellaneous data-processing instructions
699// -----------------------------------------------------------------------------
700
701// Bitfield extract, one reg
702// Bitfield extract, two regs
703// NOTE: We don't model the difference between EXTR where both operands are the
704// same (one reg).
705def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>;
706def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>;
707
708// Bitfield move, basic
709def : SchedAlias<WriteIS, N2Write_1cyc_1I>;
710
711// Bitfield move, insert
712def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
713
714// Load instructions
715// -----------------------------------------------------------------------------
716
717def : SchedAlias<WriteLD,    N2Write_4cyc_1L>;
718def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>;
719
720// Load pair, signed immed offset, signed words
721def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>;
722// Load pair, immed post-index or immed pre-index, signed words
723def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr],
724             (instregex "^LDPSW(post|pre)$")>;
725
726// Store instructions
727// -----------------------------------------------------------------------------
728
729def : SchedAlias<WriteST,    N2Write_1cyc_1L01_1D>;
730def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>;
731def : SchedAlias<WriteSTP,   N2Write_1cyc_1L01_1D>;
732def : SchedAlias<WriteAdr,   N2Write_1cyc_1I>; // copied from A57.
733
734// Tag load instructions
735// -----------------------------------------------------------------------------
736
737// Load allocation tag
738// Load multiple allocation tags
739def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>;
740
741// Tag store instructions
742// -----------------------------------------------------------------------------
743
744// Store allocation tags to one or two granules, post-index
745// Store allocation tags to one or two granules, pre-index
746// Store allocation tag to one or two granules, zeroing, post-index
747// Store Allocation Tag to one or two granules, zeroing, pre-index
748// Store allocation tag and reg pair to memory, post-Index
749// Store allocation tag and reg pair to memory, pre-Index
750def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
751                                                ST2GPreIndex, ST2GPostIndex,
752                                                STZGPreIndex, STZGPostIndex,
753                                                STZ2GPreIndex, STZ2GPostIndex,
754                                                STGPpre, STGPpost)>;
755
756// Store allocation tags to one or two granules, signed offset
757// Store allocation tag to two granules, zeroing, signed offset
758// Store allocation tag and reg pair to memory, signed offset
759// Store multiple allocation tags
760def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset,
761                                             STZ2GOffset, STGPi, STGM, STZGM)>;
762
763// FP data processing instructions
764// -----------------------------------------------------------------------------
765
766// FP absolute value
767// FP arithmetic
768// FP min/max
769// FP negate
770// FP select
771def : SchedAlias<WriteF,     N2Write_2cyc_1V>;
772
773// FP compare
774def : SchedAlias<WriteFCmp,  N2Write_2cyc_1V0>;
775
776// FP divide, square root
777def : SchedAlias<WriteFDiv,  N2Write_7cyc_1V0>;
778
779// FP divide, H-form
780def : InstRW<[N2Write_7cyc_1V0],  (instrs FDIVHrr)>;
781// FP divide, S-form
782def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>;
783// FP divide, D-form
784def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>;
785
786// FP square root, H-form
787def : InstRW<[N2Write_7cyc_1V0],  (instrs FSQRTHr)>;
788// FP square root, S-form
789def : InstRW<[N2Write_9cyc_1V0],  (instrs FSQRTSr)>;
790// FP square root, D-form
791def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>;
792
793// FP multiply
794def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; }
795
796// FP multiply accumulate
797def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
798
799// FP round to integral
800def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$",
801                                            "^FRINT(32|64)[XZ][SD]r$")>;
802
803// FP miscellaneous instructions
804// -----------------------------------------------------------------------------
805
806// FP convert, from gen to vec reg
807def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
808
809// FP convert, from vec to gen reg
810def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>;
811
812// FP convert, Javascript from vec to gen reg
813// FP convert, from vec to vec reg
814def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>;
815
816// FP move, immed
817// FP move, register
818def : SchedAlias<WriteFImm, N2Write_2cyc_1V>;
819
820// FP transfer, from gen to low half of vec reg
821def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr,
822                                         FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>;
823
824// FP transfer, from gen to high half of vec reg
825def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
826
827// FP transfer, from vec to gen reg
828def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>;
829
830// FP load instructions
831// -----------------------------------------------------------------------------
832
833// Load vector reg, literal, S/D/Q forms
834// Load vector reg, unscaled immed
835def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$",
836                                           "^LDUR[BHSDQ]i$")>;
837
838// Load vector reg, immed post-index
839def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>;
840// Load vector reg, immed pre-index
841def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>;
842
843// Load vector reg, unsigned immed
844def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
845
846// Load vector reg, register offset, basic
847// Load vector reg, register offset, scale, S/D-form
848// Load vector reg, register offset, extend
849// Load vector reg, register offset, extend, scale, S/D-form
850def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>;
851
852// Load vector reg, register offset, scale, H/Q-form
853// Load vector reg, register offset, extend, scale, H/Q-form
854def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>;
855
856// Load vector pair, immed offset, S/D-form
857def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
858
859// Load vector pair, immed offset, Q-form
860def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
861
862// Load vector pair, immed post-index, S/D-form
863// Load vector pair, immed pre-index, S/D-form
864def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr],
865             (instregex "^LDP[SD](pre|post)$")>;
866
867// Load vector pair, immed post-index, Q-form
868// Load vector pair, immed pre-index, Q-form
869def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost,
870                                                                LDPQpre)>;
871
872// FP store instructions
873// -----------------------------------------------------------------------------
874
875// Store vector reg, unscaled immed, B/H/S/D-form
876// Store vector reg, unscaled immed, Q-form
877def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>;
878
879// Store vector reg, immed post-index, B/H/S/D-form
880// Store vector reg, immed post-index, Q-form
881// Store vector reg, immed pre-index, B/H/S/D-form
882// Store vector reg, immed pre-index, Q-form
883def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase],
884             (instregex "^STR[BHSDQ](pre|post)$")>;
885
886// Store vector reg, unsigned immed, B/H/S/D-form
887// Store vector reg, unsigned immed, Q-form
888def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>;
889
890// Store vector reg, register offset, basic, B/H/S/D-form
891// Store vector reg, register offset, basic, Q-form
892// Store vector reg, register offset, scale, S/D-form
893// Store vector reg, register offset, extend, B/H/S/D-form
894// Store vector reg, register offset, extend, Q-form
895// Store vector reg, register offset, extend, scale, S/D-form
896def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
897             (instregex "^STR[BSD]ro[WX]$")>;
898
899// Store vector reg, register offset, scale, H-form
900// Store vector reg, register offset, scale, Q-form
901// Store vector reg, register offset, extend, scale, H-form
902// Store vector reg, register offset, extend, scale, Q-form
903def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase],
904             (instregex "^STR[HQ]ro[WX]$")>;
905
906// Store vector pair, immed offset, S-form
907// Store vector pair, immed offset, D-form
908def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>;
909
910// Store vector pair, immed offset, Q-form
911def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>;
912
913// Store vector pair, immed post-index, S-form
914// Store vector pair, immed post-index, D-form
915// Store vector pair, immed pre-index, S-form
916// Store vector pair, immed pre-index, D-form
917def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I],
918             (instregex "^STP[SD](pre|post)$")>;
919
920// Store vector pair, immed post-index, Q-form
921def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>;
922
923// Store vector pair, immed pre-index, Q-form
924def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>;
925
926// ASIMD integer instructions
927// -----------------------------------------------------------------------------
928
929// ASIMD absolute diff
930// ASIMD absolute diff long
931// ASIMD arith, basic
932// ASIMD arith, complex
933// ASIMD arith, pair-wise
934// ASIMD compare
935// ASIMD logical
936// ASIMD max/min, basic and pair-wise
937def : SchedAlias<WriteVd, N2Write_2cyc_1V>;
938def : SchedAlias<WriteVq, N2Write_2cyc_1V>;
939
940// ASIMD absolute diff accum
941// ASIMD absolute diff accum long
942def : InstRW<[N2Write_4cyc_1V1],
943             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
944
945// ASIMD arith, reduce, 4H/4S
946def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
947
948// ASIMD arith, reduce, 8B/8H
949def : InstRW<[N2Write_4cyc_1V1_1V],
950             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
951
952// ASIMD arith, reduce, 16B
953def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v,
954                                         UADDLVv16i8v)>;
955
956// ASIMD dot product
957// ASIMD dot product using signed and unsigned integers
958def : InstRW<[N2Write_3cyc_1V],
959             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
960
961// ASIMD matrix multiply-accumulate
962def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>;
963
964// ASIMD max/min, reduce, 4H/4S
965def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$",
966                                            "^[SU](MAX|MIN)Vv4i32v$")>;
967
968// ASIMD max/min, reduce, 8B/8H
969def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
970                                               "^[SU](MAX|MIN)Vv8i16v$")>;
971
972// ASIMD max/min, reduce, 16B
973def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
974
975// ASIMD multiply
976def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>;
977
978// ASIMD multiply accumulate
979def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>;
980
981// ASIMD multiply accumulate high
982def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
983
984// ASIMD multiply accumulate long
985def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
986
987// ASIMD multiply accumulate saturating long
988def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>;
989
990// ASIMD multiply/multiply long (8x8) polynomial, D-form
991// ASIMD multiply/multiply long (8x8) polynomial, Q-form
992def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>;
993
994// ASIMD multiply long
995def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>;
996
997// ASIMD pairwise add and accumulate long
998def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>;
999
1000// ASIMD shift accumulate
1001def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>;
1002
1003// ASIMD shift by immed, basic
1004def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv",
1005                                            "^SSHLLv", "^SSHRv", "^USHLLv",
1006                                            "^USHRv")>;
1007
1008// ASIMD shift by immed and insert, basic
1009def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>;
1010
1011// ASIMD shift by immed, complex
1012def : InstRW<[N2Write_4cyc_1V1],
1013             (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv",
1014                        "^(SQSHLU?|UQSHL)[bhsd]$",
1015                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1016                        "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv",
1017                        "^UQSHRNv", "^URSHRv")>;
1018
1019// ASIMD shift by register, basic
1020def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>;
1021
1022// ASIMD shift by register, complex
1023def : InstRW<[N2Write_4cyc_1V1],
1024             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1025                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1026
1027// ASIMD floating-point instructions
1028// -----------------------------------------------------------------------------
1029
1030// ASIMD FP absolute value/difference
1031// ASIMD FP arith, normal
1032// ASIMD FP compare
1033// ASIMD FP complex add
1034// ASIMD FP max/min, normal
1035// ASIMD FP max/min, pairwise
1036// ASIMD FP negate
1037// Handled by SchedAlias<WriteV[dq], ...>
1038
1039// ASIMD FP complex multiply add
1040def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>;
1041
1042// ASIMD FP convert, long (F16 to F32)
1043def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>;
1044
1045// ASIMD FP convert, long (F32 to F64)
1046def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>;
1047
1048// ASIMD FP convert, narrow (F32 to F16)
1049def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>;
1050
1051// ASIMD FP convert, narrow (F64 to F32)
1052def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32",
1053                                            "^FCVTXN(v2|v4)f32")>;
1054
1055// ASIMD FP convert, other, D-form F32 and Q-form F64
1056def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$",
1057                                            "^[SU]CVTFv2f(32|64)$")>;
1058
1059// ASIMD FP convert, other, D-form F16 and Q-form F32
1060def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$",
1061                                            "^[SU]CVTFv4f(16|32)$")>;
1062
1063// ASIMD FP convert, other, Q-form F16
1064def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$",
1065                                            "^[SU]CVTFv8f16$")>;
1066
1067// ASIMD FP divide, D-form, F16
1068def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>;
1069
1070// ASIMD FP divide, D-form, F32
1071def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>;
1072
1073// ASIMD FP divide, Q-form, F16
1074def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>;
1075
1076// ASIMD FP divide, Q-form, F32
1077def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>;
1078
1079// ASIMD FP divide, Q-form, F64
1080def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>;
1081
1082// ASIMD FP max/min, reduce, F32 and D-form F16
1083def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1084
1085// ASIMD FP max/min, reduce, Q-form F16
1086def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1087
1088// ASIMD FP multiply
1089def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>;
1090
1091// ASIMD FP multiply accumulate
1092def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>;
1093
1094// ASIMD FP multiply accumulate long
1095def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>;
1096
1097// ASIMD FP round, D-form F32 and Q-form F64
1098def : InstRW<[N2Write_3cyc_1V0],
1099             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1100                        "^FRINT[32|64)[XZ]v2f(32|64)$")>;
1101
1102// ASIMD FP round, D-form F16 and Q-form F32
1103def : InstRW<[N2Write_4cyc_2V0],
1104             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1105                        "^FRINT(32|64)[XZ]v4f32$")>;
1106
1107
1108// ASIMD FP round, Q-form F16
1109def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1110
1111// ASIMD FP square root, D-form, F16
1112def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>;
1113
1114// ASIMD FP square root, D-form, F32
1115def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>;
1116
1117// ASIMD FP square root, Q-form, F16
1118def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>;
1119
1120// ASIMD FP square root, Q-form, F32
1121def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>;
1122
1123// ASIMD FP square root, Q-form, F64
1124def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>;
1125
1126// ASIMD BFloat16 (BF16) instructions
1127// -----------------------------------------------------------------------------
1128
1129// ASIMD convert, F32 to BF16
1130def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>;
1131
1132// ASIMD dot product
1133def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1134
1135// ASIMD matrix multiply accumulate
1136def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>;
1137
1138// ASIMD multiply accumulate long
1139def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1140                                        BFMLALTIdx)>;
1141
1142// Scalar convert, F32 to BF16
1143def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>;
1144
1145// ASIMD miscellaneous instructions
1146// -----------------------------------------------------------------------------
1147
1148// ASIMD bit reverse
1149// ASIMD bitwise insert
1150// ASIMD count
1151// ASIMD duplicate, element
1152// ASIMD extract
1153// ASIMD extract narrow
1154// ASIMD insert, element to element
1155// ASIMD move, FP immed
1156// ASIMD move, integer immed
1157// ASIMD reverse
1158// ASIMD table lookup, 1 or 2 table regs
1159// ASIMD table lookup extension, 1 table reg
1160// ASIMD transfer, element to gen reg
1161// ASIMD transpose
1162// ASIMD unzip/zip
1163// Handled by SchedAlias<WriteV[dq], ...>
1164
1165// ASIMD duplicate, gen reg
1166def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1167
1168// ASIMD extract narrow, saturating
1169def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1170
1171// ASIMD reciprocal and square root estimate, D-form U32
1172def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>;
1173
1174// ASIMD reciprocal and square root estimate, Q-form U32
1175def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>;
1176
1177// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1178def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32,
1179                                         FRECPEv1i64, FRECPEv2f32,
1180                                         FRSQRTEv1f16, FRSQRTEv1i32,
1181                                         FRSQRTEv1i64, FRSQRTEv2f32)>;
1182
1183// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1184def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32,
1185                                         FRSQRTEv4f16, FRSQRTEv4f32)>;
1186
1187// ASIMD reciprocal and square root estimate, Q-form F16
1188def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1189
1190// ASIMD reciprocal exponent
1191def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>;
1192
1193// ASIMD reciprocal step
1194def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>;
1195
1196// ASIMD table lookup, 3 table regs
1197def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>;
1198
1199// ASIMD table lookup, 4 table regs
1200def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>;
1201
1202// ASIMD table lookup extension, 2 table reg
1203def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1204
1205// ASIMD table lookup extension, 3 table reg
1206def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1207
1208// ASIMD table lookup extension, 4 table reg
1209def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1210
1211// ASIMD transfer, gen reg to element
1212def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>;
1213
1214// ASIMD load instructions
1215// -----------------------------------------------------------------------------
1216
1217// ASIMD load, 1 element, multiple, 1 reg, D-form
1218def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1219def : InstRW<[N2Write_6cyc_1L, WriteAdr],
1220             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1221
1222// ASIMD load, 1 element, multiple, 1 reg, Q-form
1223def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1224def : InstRW<[N2Write_6cyc_1L, WriteAdr],
1225             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1226
1227// ASIMD load, 1 element, multiple, 2 reg, D-form
1228def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1229def : InstRW<[N2Write_6cyc_2L, WriteAdr],
1230             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1231
1232// ASIMD load, 1 element, multiple, 2 reg, Q-form
1233def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1234def : InstRW<[N2Write_6cyc_2L, WriteAdr],
1235             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1236
1237// ASIMD load, 1 element, multiple, 3 reg, D-form
1238def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1239def : InstRW<[N2Write_6cyc_3L, WriteAdr],
1240             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1241
1242// ASIMD load, 1 element, multiple, 3 reg, Q-form
1243def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1244def : InstRW<[N2Write_6cyc_3L, WriteAdr],
1245             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1246
1247// ASIMD load, 1 element, multiple, 4 reg, D-form
1248def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1249def : InstRW<[N2Write_7cyc_4L, WriteAdr],
1250             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1251
1252// ASIMD load, 1 element, multiple, 4 reg, Q-form
1253def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1254def : InstRW<[N2Write_7cyc_4L, WriteAdr],
1255             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1256
1257// ASIMD load, 1 element, one lane, B/H/S
1258// ASIMD load, 1 element, one lane, D
1259def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1260def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
1261
1262// ASIMD load, 1 element, all lanes, D-form, B/H/S
1263// ASIMD load, 1 element, all lanes, D-form, D
1264def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1265def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1266
1267// ASIMD load, 1 element, all lanes, Q-form
1268def : InstRW<[N2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1269def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1270
1271// ASIMD load, 2 element, multiple, D-form, B/H/S
1272def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1273def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1274
1275// ASIMD load, 2 element, multiple, Q-form, B/H/S
1276// ASIMD load, 2 element, multiple, Q-form, D
1277def : InstRW<[N2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1278def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1279
1280// ASIMD load, 2 element, one lane, B/H
1281// ASIMD load, 2 element, one lane, S
1282// ASIMD load, 2 element, one lane, D
1283def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1284def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>;
1285
1286// ASIMD load, 2 element, all lanes, D-form, B/H/S
1287// ASIMD load, 2 element, all lanes, D-form, D
1288def : InstRW<[N2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1289def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1290
1291// ASIMD load, 2 element, all lanes, Q-form
1292def : InstRW<[N2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1293def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1294
1295// ASIMD load, 3 element, multiple, D-form, B/H/S
1296def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1297def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1298
1299// ASIMD load, 3 element, multiple, Q-form, B/H/S
1300def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s)$")>;
1301def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>;
1302
1303// ASIMD load, 3 element, multiple, Q-form, D
1304def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Threev(2d)$")>;
1305def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>;
1306
1307// ASIMD load, 3 element, one lane, B/H
1308// ASIMD load, 3 element, one lane, S
1309// ASIMD load, 3 element, one lane, D
1310def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1311def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
1312
1313// ASIMD load, 3 element, all lanes, D-form, B/H/S
1314// ASIMD load, 3 element, all lanes, D-form, D
1315def : InstRW<[N2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1316def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1317
1318// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1319// ASIMD load, 3 element, all lanes, Q-form, D
1320def : InstRW<[N2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1321def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1322
1323// ASIMD load, 4 element, multiple, D-form, B/H/S
1324def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1325def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1326
1327// ASIMD load, 4 element, multiple, Q-form, B/H/S
1328// ASIMD load, 4 element, multiple, Q-form, D
1329def : InstRW<[N2Write_9cyc_4L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1330def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1331
1332// ASIMD load, 4 element, one lane, B/H
1333// ASIMD load, 4 element, one lane, S
1334// ASIMD load, 4 element, one lane, D
1335def : InstRW<[N2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1336def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
1337
1338// ASIMD load, 4 element, all lanes, D-form, B/H/S
1339// ASIMD load, 4 element, all lanes, D-form, D
1340def : InstRW<[N2Write_8cyc_3L_4V],              (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1341def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr],    (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1342
1343// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1344// ASIMD load, 4 element, all lanes, Q-form, D
1345def : InstRW<[N2Write_8cyc_4L_4V],            (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1346def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr],  (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1347
1348// ASIMD store instructions
1349// -----------------------------------------------------------------------------
1350
1351// ASIMD store, 1 element, multiple, 1 reg, D-form
1352def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1353def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1354
1355// ASIMD store, 1 element, multiple, 1 reg, Q-form
1356def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1357def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1358
1359// ASIMD store, 1 element, multiple, 2 reg, D-form
1360def : InstRW<[N2Write_2cyc_1L01_1V],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1361def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1362
1363// ASIMD store, 1 element, multiple, 2 reg, Q-form
1364def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1365def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1366
1367// ASIMD store, 1 element, multiple, 3 reg, D-form
1368def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1369def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1370
1371// ASIMD store, 1 element, multiple, 3 reg, Q-form
1372def : InstRW<[N2Write_2cyc_3L01_3V],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1373def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1374
1375// ASIMD store, 1 element, multiple, 4 reg, D-form
1376def : InstRW<[N2Write_2cyc_2L01_2V],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1377def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1378
1379// ASIMD store, 1 element, multiple, 4 reg, Q-form
1380def : InstRW<[N2Write_2cyc_4L01_4V],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1381def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1382
1383// ASIMD store, 1 element, one lane, B/H/S
1384// ASIMD store, 1 element, one lane, D
1385def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST1i(8|16|32|64)$")>;
1386def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
1387
1388// ASIMD store, 2 element, multiple, D-form, B/H/S
1389def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2Twov(8b|4h|2s)$")>;
1390def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1391
1392// ASIMD store, 2 element, multiple, Q-form, B/H/S
1393// ASIMD store, 2 element, multiple, Q-form, D
1394def : InstRW<[N2Write_4cyc_2L01_2V],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1395def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1396
1397// ASIMD store, 2 element, one lane, B/H/S
1398// ASIMD store, 2 element, one lane, D
1399def : InstRW<[N2Write_4cyc_1L01_1V],           (instregex "ST2i(8|16|32|64)$")>;
1400def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
1401
1402// ASIMD store, 3 element, multiple, D-form, B/H/S
1403def : InstRW<[N2Write_5cyc_2L01_2V],           (instregex "ST3Threev(8b|4h|2s)$")>;
1404def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1405
1406// ASIMD store, 3 element, multiple, Q-form, B/H/S
1407// ASIMD store, 3 element, multiple, Q-form, D
1408def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1409def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1410
1411// ASIMD store, 3 element, one lane, B/H
1412// ASIMD store, 3 element, one lane, S
1413// ASIMD store, 3 element, one lane, D
1414def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST3i(8|16|32|64)$")>;
1415def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
1416
1417// ASIMD store, 4 element, multiple, D-form, B/H/S
1418def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1419def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1420
1421// ASIMD store, 4 element, multiple, Q-form, B/H/S
1422def : InstRW<[N2Write_7cyc_6L01_6V],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1423def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1424
1425// ASIMD store, 4 element, multiple, Q-form, D
1426def : InstRW<[N2Write_5cyc_4L01_4V],           (instregex "ST4Fourv(2d)$")>;
1427def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>;
1428
1429// ASIMD store, 4 element, one lane, B/H/S
1430def : InstRW<[N2Write_6cyc_3L01_3V],           (instregex "ST4i(8|16|32)$")>;
1431def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>;
1432
1433// ASIMD store, 4 element, one lane, D
1434def : InstRW<[N2Write_4cyc_3L01_3V],            (instregex "ST4i(64)$")>;
1435def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr],  (instregex "ST4i(64)_POST$")>;
1436
1437// Cryptography extensions
1438// -----------------------------------------------------------------------------
1439
1440// Crypto AES ops
1441def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1442
1443// Crypto polynomial (64x64) multiply long
1444def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>;
1445
1446// Crypto SHA1 hash acceleration op
1447// Crypto SHA1 schedule acceleration ops
1448def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1449
1450// Crypto SHA1 hash acceleration ops
1451// Crypto SHA256 hash acceleration ops
1452def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1453
1454// Crypto SHA256 schedule acceleration ops
1455def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1456
1457// Crypto SHA512 hash acceleration ops
1458def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1459
1460// Crypto SHA3 ops
1461def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1462
1463// Crypto SM3 ops
1464def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1465                                            "^SM3TT[12][AB]$")>;
1466
1467// Crypto SM4 ops
1468def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1469
1470// CRC
1471// -----------------------------------------------------------------------------
1472
1473def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>;
1474
1475// SVE Predicate instructions
1476// -----------------------------------------------------------------------------
1477
1478// Loop control, based on predicate
1479def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
1480                                        BRKB_PPmP, BRKB_PPzP)>;
1481
1482// Loop control, based on predicate and flag setting
1483def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
1484
1485// Loop control, propagating
1486def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>;
1487
1488// Loop control, propagating and flag setting
1489def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
1490                                            BRKPBS_PPzPP)>;
1491
1492// Loop control, based on GPR
1493def : InstRW<[N2Write_3cyc_1M],
1494             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>;
1495
1496def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>;
1497
1498// Loop terminate
1499def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>;
1500
1501// Predicate counting scalar
1502def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
1503def : InstRW<[N2Write_2cyc_1M],
1504             (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$",
1505                        "^SQ(DEC|INC)[BHWD]_XPiWdI$",
1506                        "^(UQDEC|UQINC)[BHWD]_WPiI$")>;
1507
1508// Predicate counting scalar, active predicate
1509def : InstRW<[N2Write_2cyc_1M],
1510             (instregex "^CNTP_XPP_[BHSD]$",
1511                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$",
1512                        "^(UQDEC|UQINC)P_WP_[BHSD]$",
1513                        "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>;
1514
1515// Predicate counting vector, active predicate
1516def : InstRW<[N2Write_7cyc_1M_1M0_1V],
1517             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>;
1518
1519// Predicate logical
1520def : InstRW<[N2Write_1cyc_1M0],
1521             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>;
1522
1523// Predicate logical, flag setting
1524def : InstRW<[N2Write_2cyc_1M0_1M],
1525             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>;
1526
1527// Predicate reverse
1528def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>;
1529
1530// Predicate select
1531def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>;
1532
1533// Predicate set
1534def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>;
1535
1536// Predicate set/initialize, set flags
1537def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>;
1538
1539// Predicate find first/next
1540def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>;
1541
1542// Predicate test
1543def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>;
1544
1545// Predicate transpose
1546def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>;
1547
1548// Predicate unpack and widen
1549def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
1550
1551// Predicate zip/unzip
1552def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>;
1553
1554// SVE integer instructions
1555// -----------------------------------------------------------------------------
1556
1557// Arithmetic, absolute diff
1558def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>;
1559
1560// Arithmetic, absolute diff accum
1561def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>;
1562
1563// Arithmetic, absolute diff accum long
1564def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>;
1565
1566// Arithmetic, absolute diff long
1567def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>;
1568
1569// Arithmetic, basic
1570def : InstRW<[N2Write_2cyc_1V],
1571             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1572                        "^(ADD|SUB)_ZZZ_[BHSD]$",
1573                        "^(ADD|SUB|SUBR)_ZI_[BHSD]$",
1574                        "^ADR_[SU]XTW_ZZZ_D_[0123]$",
1575                        "^ADR_LSL_ZZZ_[SD]_[0123]$",
1576                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$",
1577                        "^SADDLBT_ZZZ_[HSD]$",
1578                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$",
1579                        "^SSUBL(BT|TB)_ZZZ_[HSD]$")>;
1580
1581// Arithmetic, complex
1582def : InstRW<[N2Write_2cyc_1V],
1583             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$",
1584                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$",
1585                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$",
1586                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]$",
1587                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$",
1588                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>;
1589
1590// Arithmetic, large integer
1591def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>;
1592
1593// Arithmetic, pairwise add
1594def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>;
1595
1596// Arithmetic, pairwise add and accum long
1597def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>;
1598
1599// Arithmetic, shift
1600def : InstRW<[N2Write_2cyc_1V1],
1601             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$",
1602                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$",
1603                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]$",
1604                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$",
1605                        "^(ASR|LSL|LSR)_ZZI_[BHSD]$",
1606                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>;
1607
1608// Arithmetic, shift and accumulate
1609def : InstRW<[N2Write_4cyc_1V1],
1610             (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>;
1611
1612// Arithmetic, shift by immediate
1613// Arithmetic, shift by immediate and insert
1614def : InstRW<[N2Write_2cyc_1V1],
1615             (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>;
1616
1617// Arithmetic, shift complex
1618def : InstRW<[N2Write_4cyc_1V1],
1619             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$",
1620                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$",
1621                        "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$",
1622                        "^SQSHRU?N[BT]_ZZI_[BHS]$",
1623                        "^UQR?SHRN[BT]_ZZI_[BHS]$")>;
1624
1625// Arithmetic, shift right for divide
1626def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>;
1627
1628// Arithmetic, shift rounding
1629def : InstRW<[N2Write_4cyc_1V1],
1630             (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$",
1631                        "^[SU]RSHR_ZPmI_[BHSD]$")>;
1632
1633// Bit manipulation
1634def : InstRW<[N2Write_6cyc_2V1],
1635             (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>;
1636
1637// Bitwise select
1638def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>;
1639
1640// Count/reverse bits
1641def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>;
1642
1643// Broadcast logical bitmask immediate to vector
1644def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>;
1645
1646// Compare and set flags
1647def : InstRW<[N2Write_4cyc_1V0_1M],
1648             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$",
1649                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>;
1650
1651// Complex add
1652def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>;
1653
1654// Complex dot product 8-bit element
1655def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
1656
1657// Complex dot product 16-bit element
1658def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
1659
1660// Complex multiply-add B, H, S element size
1661def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$",
1662                                            "^CMLA_ZZZI_[HS]$")>;
1663
1664// Complex multiply-add D element size
1665def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>;
1666
1667// Conditional extract operations, scalar form
1668def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>;
1669
1670// Conditional extract operations, SIMD&FP scalar and vector forms
1671def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$",
1672                                            "^COMPACT_ZPZ_[SD]$",
1673                                            "^SPLICE_ZPZZ?_[BHSD]$")>;
1674
1675// Convert to floating point, 64b to float or convert to double
1676def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>;
1677
1678// Convert to floating point, 64b to half
1679def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>;
1680
1681// Convert to floating point, 32b to single or half
1682def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>;
1683
1684// Convert to floating point, 32b to double
1685def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>;
1686
1687// Convert to floating point, 16b to half
1688def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>;
1689
1690// Copy, scalar
1691def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>;
1692
1693// Copy, scalar SIMD&FP or imm
1694def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$",
1695                                           "^CPY_ZPzI_[BHSD]$")>;
1696
1697// Divides, 32 bit
1698def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>;
1699
1700// Divides, 64 bit
1701def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>;
1702
1703// Dot product, 8 bit
1704def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>;
1705
1706// Dot product, 8 bit, using signed and unsigned integers
1707def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
1708
1709// Dot product, 16 bit
1710def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>;
1711
1712// Duplicate, immediate and indexed form
1713def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$",
1714                                           "^DUP_ZZI_[BHSDQ]$")>;
1715
1716// Duplicate, scalar form
1717def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>;
1718
1719// Extend, sign or zero
1720def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$",
1721                                            "^[SU]XTH_ZPmZ_[SD]$",
1722                                            "^[SU]XTW_ZPmZ_[D]$")>;
1723
1724// Extract
1725def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
1726
1727// Extract narrow saturating
1728def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$",
1729                                            "^SQXTUN[BT]_ZZ_[BHS]$")>;
1730
1731// Extract/insert operation, SIMD and FP scalar form
1732def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$",
1733                                            "^INSR_ZV_[BHSD]$")>;
1734
1735// Extract/insert operation, scalar
1736def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$",
1737                                                "^INSR_ZR_[BHSD]$")>;
1738
1739// Histogram operations
1740def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$",
1741                                           "^HISTSEG_ZZZ$")>;
1742
1743// Horizontal operations, B, H, S form, immediate operands only
1744def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>;
1745
1746// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
1747// operands only / immediate, scalar operands
1748def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>;
1749
1750// Horizontal operations, D form, immediate operands only
1751def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>;
1752
1753// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
1754// only / immediate, scalar operands
1755def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>;
1756
1757// Logical
1758def : InstRW<[N2Write_2cyc_1V],
1759             (instregex "^(AND|EOR|ORR)_ZI$",
1760                        "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$",
1761                        "^EOR(BT|TB)_ZZZ_[BHSD]$",
1762                        "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>;
1763
1764// Max/min, basic and pairwise
1765def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$",
1766                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>;
1767
1768// Matching operations
1769def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>;
1770
1771// Matrix multiply-accumulate
1772def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
1773
1774// Move prefix
1775def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$",
1776                                           "^MOVPRFX_ZZ$")>;
1777
1778// Multiply, B, H, S element size
1779def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$",
1780                                            "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>;
1781
1782// Multiply, D element size
1783def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$",
1784                                            "^[SU]MULH_(ZPmZ|ZZZ)_D$")>;
1785
1786// Multiply long
1787def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$",
1788                                            "^[SU]MULL[BT]_ZZZ_[HSD]$")>;
1789
1790// Multiply accumulate, B, H, S element size
1791def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$",
1792                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>;
1793
1794// Multiply accumulate, D element size
1795def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$",
1796                                            "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>;
1797
1798// Multiply accumulate long
1799def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$",
1800                                            "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>;
1801
1802// Multiply accumulate saturating doubling long regular
1803def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$",
1804                                            "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>;
1805
1806// Multiply saturating doubling high, B, H, S element size
1807def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$",
1808                                            "^SQDMULH_ZZZI_[HS]$")>;
1809
1810// Multiply saturating doubling high, D element size
1811def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
1812
1813// Multiply saturating doubling long
1814def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$",
1815                                            "^SQDMULL[BT]_ZZZI_[SD]$")>;
1816
1817// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
1818// element size
1819def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$",
1820                                            "^SQRDCMLAH_ZZZ_[BHS]$",
1821                                            "^SQRDML[AS]H_ZZZI_[HS]$",
1822                                            "^SQRDCMLAH_ZZZI_[HS]$")>;
1823
1824// Multiply saturating rounding doubling regular/complex accumulate, D element
1825// size
1826def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$",
1827                                            "^SQRDCMLAH_ZZZ_D$")>;
1828
1829// Multiply saturating rounding doubling regular/complex, B, H, S element size
1830def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$",
1831                                            "^SQRDMULH_ZZZI_[HS]$")>;
1832
1833// Multiply saturating rounding doubling regular/complex, D element size
1834def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>;
1835
1836// Multiply/multiply long, (8x8) polynomial
1837def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$",
1838                                            "^PMULL[BT]_ZZZ_[HDQ]$")>;
1839
1840// Predicate counting vector
1841def : InstRW<[N2Write_2cyc_1V0],
1842             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>;
1843
1844// Reciprocal estimate
1845def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>;
1846
1847// Reduction, arithmetic, B form
1848def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
1849
1850// Reduction, arithmetic, H form
1851def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
1852
1853// Reduction, arithmetic, S form
1854def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
1855
1856// Reduction, arithmetic, D form
1857def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
1858
1859// Reduction, logical
1860def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>;
1861
1862// Reverse, vector
1863def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$",
1864                                           "^REVB_ZPmZ_[HSD]$",
1865                                           "^REVH_ZPmZ_[SD]$",
1866                                           "^REVW_ZPmZ_D$")>;
1867
1868// Select, vector form
1869def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>;
1870
1871// Table lookup
1872def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>;
1873
1874// Table lookup extension
1875def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>;
1876
1877// Transpose, vector form
1878def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>;
1879
1880// Unpack and extend
1881def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>;
1882
1883// Zip/unzip
1884def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>;
1885
1886// SVE floating-point instructions
1887// -----------------------------------------------------------------------------
1888
1889// Floating point absolute value/difference
1890def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>;
1891
1892// Floating point arithmetic
1893def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$",
1894                                           "^FADDP_ZPmZZ_[HSD]$",
1895                                           "^FNEG_ZPmZ_[HSD]$",
1896                                           "^FSUBR_ZPm[IZ]_[HSD]$")>;
1897
1898// Floating point associative add, F16
1899def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>;
1900
1901// Floating point associative add, F32
1902def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>;
1903
1904// Floating point associative add, F64
1905def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
1906
1907// Floating point compare
1908def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$",
1909                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$",
1910                                            "^FCM(LE|LT)_PPzZ0_[HSD]$",
1911                                            "^FCMUO_PPzZZ_[HSD]$")>;
1912
1913// Floating point complex add
1914def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>;
1915
1916// Floating point complex multiply add
1917def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$",
1918                                           "^FCMLA_ZZZI_[HS]$")>;
1919
1920// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
1921def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$",
1922                                            "^FCVTLT_ZPmZ_HtoS$",
1923                                            "^FCVTNT_ZPmZ_StoH$")>;
1924
1925// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
1926// or F64 to F16)
1927def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$",
1928                                            "^FCVTLT_ZPmZ_StoD$",
1929                                            "^FCVTNT_ZPmZ_DtoS$")>;
1930
1931// Floating point convert, round to odd
1932def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
1933
1934// Floating point base2 log, F16
1935def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>;
1936
1937// Floating point base2 log, F32
1938def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>;
1939
1940// Floating point base2 log, F64
1941def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>;
1942
1943// Floating point convert to integer, F16
1944def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>;
1945
1946// Floating point convert to integer, F32
1947def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>;
1948
1949// Floating point convert to integer, F64
1950def : InstRW<[N2Write_3cyc_1V0],
1951             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>;
1952
1953// Floating point copy
1954def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$",
1955                                           "^FDUP_ZI_[HSD]$")>;
1956
1957// Floating point divide, F16
1958def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>;
1959
1960// Floating point divide, F32
1961def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>;
1962
1963// Floating point divide, F64
1964def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>;
1965
1966// Floating point min/max pairwise
1967def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>;
1968
1969// Floating point min/max
1970def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>;
1971
1972// Floating point multiply
1973def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$",
1974                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>;
1975
1976// Floating point multiply accumulate
1977def : InstRW<[N2Write_4cyc_1V],
1978             (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$",
1979                        "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>;
1980
1981// Floating point multiply add/sub accumulate long
1982def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>;
1983
1984// Floating point reciprocal estimate, F16
1985def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H,
1986                                         FRSQRTE_ZZ_H)>;
1987
1988// Floating point reciprocal estimate, F32
1989def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S,
1990                                         FRSQRTE_ZZ_S)>;
1991
1992// Floating point reciprocal estimate, F64
1993def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D,
1994                                         FRSQRTE_ZZ_D)>;
1995
1996// Floating point reciprocal step
1997def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>;
1998
1999// Floating point reduction, F16
2000def : InstRW<[N2Write_6cyc_2V],
2001             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>;
2002
2003// Floating point reduction, F32
2004def : InstRW<[N2Write_4cyc_1V],
2005             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>;
2006
2007// Floating point reduction, F64
2008def : InstRW<[N2Write_2cyc_1V],
2009             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>;
2010
2011// Floating point round to integral, F16
2012def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>;
2013
2014// Floating point round to integral, F32
2015def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>;
2016
2017// Floating point round to integral, F64
2018def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>;
2019
2020// Floating point square root, F16
2021def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>;
2022
2023// Floating point square root, F32
2024def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>;
2025
2026// Floating point square root, F64
2027def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>;
2028
2029// Floating point trigonometric exponentiation
2030def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>;
2031
2032// Floating point trigonometric multiply add
2033def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>;
2034
2035// Floating point trigonometric, miscellaneous
2036def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>;
2037
2038// SVE BFloat16 (BF16) instructions
2039// -----------------------------------------------------------------------------
2040
2041// Convert, F32 to BF16
2042def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2043
2044// Dot product
2045def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2046
2047// Matrix multiply accumulate
2048def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>;
2049
2050// Multiply accumulate long
2051def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZ[ZI]$")>;
2052
2053// SVE Load instructions
2054// -----------------------------------------------------------------------------
2055
2056// Load vector
2057def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>;
2058
2059// Load predicate
2060def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2061
2062// Contiguous load, scalar + imm
2063def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$",
2064                                           "^LD1S?B_[HSD]_IMM_REAL$",
2065                                           "^LD1S?H_[SD]_IMM_REAL$",
2066                                           "^LD1S?W_D_IMM_REAL$" )>;
2067// Contiguous load, scalar + scalar
2068def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$",
2069                                             "^LD1S?B_[HSD]$",
2070                                             "^LD1S?H_[SD]$",
2071                                             "^LD1S?W_D$" )>;
2072
2073// Contiguous load broadcast, scalar + imm
2074def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2075                                           "^LD1RSW_IMM$",
2076                                           "^LD1RS?B_[HSD]_IMM$",
2077                                           "^LD1RS?H_[SD]_IMM$",
2078                                           "^LD1RS?W_D_IMM$",
2079                                           "^LD1RQ_[BHWD]_IMM$")>;
2080
2081// Contiguous load broadcast, scalar + scalar
2082def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2083
2084// Non temporal load, scalar + imm
2085def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>;
2086
2087// Non temporal load, scalar + scalar
2088def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>;
2089
2090// Non temporal gather load, vector + scalar 32-bit element size
2091def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2092                                              "^LDNT1S[BH]_ZZR_S_REAL$")>;
2093
2094// Non temporal gather load, vector + scalar 64-bit element size
2095def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2096def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2097
2098// Contiguous first faulting load, scalar + scalar
2099def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2100                                              "^LDFF1S?B_[HSD]_REAL$",
2101                                              "^LDFF1S?H_[SD]_REAL$",
2102                                              "^LDFF1S?W_D_REAL$")>;
2103
2104// Contiguous non faulting load, scalar + imm
2105def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2106                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
2107                                           "^LDNF1S?H_[SD]_IMM_REAL$",
2108                                           "^LDNF1S?W_D_IMM_REAL$")>;
2109
2110// Contiguous Load two structures to two vectors, scalar + imm
2111def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>;
2112
2113// Contiguous Load two structures to two vectors, scalar + scalar
2114def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>;
2115
2116// Contiguous Load three structures to three vectors, scalar + imm
2117def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>;
2118
2119// Contiguous Load three structures to three vectors, scalar + scalar
2120def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>;
2121
2122// Contiguous Load four structures to four vectors, scalar + imm
2123def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>;
2124
2125// Contiguous Load four structures to four vectors, scalar + scalar
2126def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>;
2127
2128// Gather load, vector + imm, 32-bit element size
2129def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2130                                              "^GLD(FF)?1W_IMM_REAL$")>;
2131
2132// Gather load, vector + imm, 64-bit element size
2133def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2134                                              "^GLD(FF)?1D_IMM_REAL$")>;
2135
2136// Gather load, 64-bit element size
2137def : InstRW<[N2Write_9cyc_2L_2V],
2138             (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$",
2139                        "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$",
2140                        "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$",
2141                        "^GLD(FF)?1D_(SCALED_)?REAL$")>;
2142
2143// Gather load, 32-bit scaled offset
2144def : InstRW<[N2Write_10cyc_2L_2V],
2145             (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$",
2146                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2147
2148// Gather load, 32-bit unpacked unscaled offset
2149def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2150                                              "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2151
2152// SVE Store instructions
2153// -----------------------------------------------------------------------------
2154
2155// Store from predicate reg
2156def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>;
2157
2158// Store from vector reg
2159def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>;
2160
2161// Contiguous store, scalar + imm
2162def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$",
2163                                                "^ST1B_[HSD]_IMM$",
2164                                                "^ST1H_[SD]_IMM$",
2165                                                "^ST1W_D_IMM$")>;
2166
2167// Contiguous store, scalar + scalar
2168def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>;
2169def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$",
2170                                                "^ST1B_[HSD]$",
2171                                                "^ST1W_D$")>;
2172
2173// Contiguous store two structures from two vectors, scalar + imm
2174def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>;
2175
2176// Contiguous store two structures from two vectors, scalar + scalar
2177def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>;
2178
2179// Contiguous store two structures from two vectors, scalar + scalar
2180def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>;
2181
2182// Contiguous store three structures from three vectors, scalar + imm
2183def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>;
2184
2185// Contiguous store three structures from three vectors, scalar + scalar
2186def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>;
2187
2188// Contiguous store three structures from three vectors, scalar + scalar
2189def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>;
2190
2191// Contiguous store four structures from four vectors, scalar + imm
2192def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>;
2193
2194// Contiguous store four structures from four vectors, scalar + scalar
2195def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>;
2196
2197// Contiguous store four structures from four vectors, scalar + scalar
2198def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>;
2199
2200// Non temporal store, scalar + imm
2201def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2202
2203// Non temporal store, scalar + scalar
2204def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2205def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2206
2207// Scatter non temporal store, vector + scalar 32-bit element size
2208def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>;
2209
2210// Scatter non temporal store, vector + scalar 64-bit element size
2211def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>;
2212
2213// Scatter store vector + imm 32-bit element size
2214def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$",
2215                                                "^SST1W_IMM$")>;
2216
2217// Scatter store vector + imm 64-bit element size
2218def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$",
2219                                                "^SST1D_IMM$")>;
2220
2221// Scatter store, 32-bit scaled offset
2222def : InstRW<[N2Write_4cyc_2L01_2V],
2223             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2224
2225// Scatter store, 32-bit unpacked unscaled offset
2226def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$",
2227                                                "^SST1D_[SU]XTW$")>;
2228
2229// Scatter store, 32-bit unpacked scaled offset
2230def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2231                                                "^SST1D_[SU]XTW_SCALED$")>;
2232
2233// Scatter store, 32-bit unscaled offset
2234def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$",
2235                                                "^SST1W_[SU]XTW$")>;
2236
2237// Scatter store, 64-bit scaled offset
2238def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$",
2239                                                "^SST1D_SCALED$")>;
2240
2241// Scatter store, 64-bit unscaled offset
2242def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$",
2243                                                "^SST1D$")>;
2244
2245// SVE Miscellaneous instructions
2246// -----------------------------------------------------------------------------
2247
2248// Read first fault register, unpredicated
2249def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2250
2251// Read first fault register, predicated
2252def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2253
2254// Read first fault register and set flags
2255def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2256
2257// Set first fault register
2258// Write to first fault register
2259def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2260
2261// Prefetch
2262def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2263
2264// SVE Cryptographic instructions
2265// -----------------------------------------------------------------------------
2266
2267// Crypto AES ops
2268def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2269                                           "^AESI?MC_ZZ_B$")>;
2270
2271// Crypto SHA3 ops
2272def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2273                                            "^RAX1_ZZZ_D$",
2274                                            "^XAR_ZZZI_[BHSD]$")>;
2275
2276// Crypto SM4 ops
2277def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2278
2279}
2280