xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V2 processors.
10// All information is taken from the V2 Software Optimisation guide:
11//
12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
13//
14//===----------------------------------------------------------------------===//
15
16def NeoverseV2Model : SchedMachineModel {
17  let IssueWidth            =  16; // Micro-ops dispatched at a time.
18  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
19  let LoadLatency           =   4; // Optimistic load latency.
20  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N2.
21  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
22  let CompleteModel         =   1;
23
24  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
25                                                    [HasSVE2p1, HasCPA]);
26}
27
28//===----------------------------------------------------------------------===//
29// Define each kind of processor resource and number available on Neoverse V2.
30// Instructions are first fetched and then decoded into internal macro-ops
31// (MOPs). From there, the MOPs proceed through register renaming and dispatch
32// stages. A MOP can be split into two micro-ops further down the pipeline
33// after the decode stage. Once dispatched, micro-ops wait for their operands
34// and issue out-of-order to one of seventeen issue pipelines. Each issue
35// pipeline can accept one micro-op per cycle.
36
37let SchedModel = NeoverseV2Model in {
38
39// Define the (17) issue ports.
40def V2UnitB   : ProcResource<2>;  // Branch 0/1
41def V2UnitS0  : ProcResource<1>;  // Integer single-cycle 0
42def V2UnitS1  : ProcResource<1>;  // Integer single-cycle 1
43def V2UnitS2  : ProcResource<1>;  // Integer single-cycle 2
44def V2UnitS3  : ProcResource<1>;  // Integer single-cycle 3
45def V2UnitM0  : ProcResource<1>;  // Integer single/multicycle 0
46def V2UnitM1  : ProcResource<1>;  // Integer single/multicycle 1
47def V2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
48def V2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
49def V2UnitV2  : ProcResource<1>;  // FP/ASIMD 2
50def V2UnitV3  : ProcResource<1>;  // FP/ASIMD 3
51def V2UnitL01 : ProcResource<2>;  // Load/Store 0/1
52def V2UnitL2  : ProcResource<1>;  // Load 2
53def V2UnitD   : ProcResource<2>;  // Store data 0/1
54
55def V2UnitR   : ProcResGroup<[V2UnitS0, V2UnitS1]>;  // Integer single-cycle 0/1
56def V2UnitS   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>;  // Integer single-cycle 0/1/2/3
57def V2UnitF   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1 and single/multicycle 0/1
58def V2UnitI   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
59def V2UnitM   : ProcResGroup<[V2UnitM0, V2UnitM1]>;  // Integer single/multicycle 0/1
60def V2UnitL   : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
61def V2UnitV   : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>;  // FP/ASIMD 0/1/2/3
62def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>;  // FP/ASIMD 0/1
63def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>;  // FP/ASIMD 0/2
64def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>;  // FP/ASIMD 1/3
65def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>;  // FP/ASIMD 2/3
66
67// Define commonly used read types.
68
69// No forwarding is provided for these types.
70def : ReadAdvance<ReadI,       0>;
71def : ReadAdvance<ReadISReg,   0>;
72def : ReadAdvance<ReadIEReg,   0>;
73def : ReadAdvance<ReadIM,      0>;
74def : ReadAdvance<ReadIMA,     0>;
75def : ReadAdvance<ReadID,      0>;
76def : ReadAdvance<ReadExtrHi,  0>;
77def : ReadAdvance<ReadAdrBase, 0>;
78def : ReadAdvance<ReadST,      0>;
79def : ReadAdvance<ReadVLD,     0>;
80
81// NOTE: Copied from N2.
82def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
83def : WriteRes<WriteBarrier, []> { let Latency = 1; }
84def : WriteRes<WriteHint,    []> { let Latency = 1; }
85def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
86
87//===----------------------------------------------------------------------===//
88// Define customized scheduler read/write types specific to the Neoverse V2.
89
90//===----------------------------------------------------------------------===//
91
92// Define generic 0 micro-op types
93def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
94
95// Define generic 1 micro-op types
96
97def V2Write_1cyc_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
98def V2Write_1cyc_1F    : SchedWriteRes<[V2UnitF]>   { let Latency = 1; }
99def V2Write_1cyc_1I    : SchedWriteRes<[V2UnitI]>   { let Latency = 1; }
100def V2Write_1cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 1; }
101def V2Write_1cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 1; }
102def V2Write_1cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
103def V2Write_2cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 2; }
104def V2Write_3cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 3; }
105def V2Write_2cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 2; }
106def V2Write_3cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 3; }
107def V2Write_5cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 5; }
108def V2Write_12cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 12;
109                                                      let ReleaseAtCycles = [12]; }
110def V2Write_20cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 20;
111                                                      let ReleaseAtCycles = [20]; }
112def V2Write_4cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 4; }
113def V2Write_6cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 6; }
114def V2Write_2cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 2; }
115def V2Write_2cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 2; }
116def V2Write_2cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
117def V2Write_2cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
118def V2Write_3cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
119def V2Write_3cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
120                                                      let ReleaseAtCycles = [2]; }
121def V2Write_3cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
122def V2Write_4cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 4; }
123def V2Write_5cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 5; }
124def V2Write_6cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 6; }
125def V2Write_12cyc_1V   : SchedWriteRes<[V2UnitV]>   { let Latency = 12; }
126def V2Write_3cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 3; }
127def V2Write_3cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
128def V2Write_4cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 4; }
129def V2Write_4cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
130def V2Write_7cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 7;
131                                                      let ReleaseAtCycles = [7]; }
132def V2Write_7cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
133                                                      let ReleaseAtCycles = [2]; }
134def V2Write_9cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 9; }
135def V2Write_9cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
136                                                      let ReleaseAtCycles = [2]; }
137def V2Write_10cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; }
138def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
139                                                      let ReleaseAtCycles = [2]; }
140def V2Write_12cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 12;
141                                                      let ReleaseAtCycles = [11]; }
142def V2Write_13cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; }
143def V2Write_15cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 15; }
144def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
145                                                      let ReleaseAtCycles = [8]; }
146def V2Write_16cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; }
147def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
148                                                      let ReleaseAtCycles = [8]; }
149def V2Write_20cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 20;
150                                                      let ReleaseAtCycles = [20]; }
151def V2Write_2cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 2; }
152def V2Write_2cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
153def V2Write_3cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 3; }
154def V2Write_4cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 4; }
155def V2Write_4cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
156def V2Write_6cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 6; }
157def V2Write_10cyc_1V1  : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; }
158def V2Write_6cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
159
160//===----------------------------------------------------------------------===//
161// Define generic 2 micro-op types
162
163def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
164  let Latency     = 1;
165  let NumMicroOps = 2;
166}
167
168def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
169  let Latency     = 6;
170  let NumMicroOps = 2;
171}
172
173def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
174  let Latency     = 9;
175  let NumMicroOps = 2;
176}
177
178def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
179  let Latency     = 3;
180  let NumMicroOps = 2;
181}
182
183def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
184  let Latency     = 1;
185  let NumMicroOps = 2;
186}
187
188def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
189  let Latency     = 3;
190  let NumMicroOps = 2;
191}
192
193def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
194  let Latency     = 4;
195  let NumMicroOps = 2;
196}
197
198def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
199  let Latency     = 5;
200  let NumMicroOps = 2;
201}
202
203def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
204  let Latency     = 6;
205  let NumMicroOps = 2;
206}
207
208def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
209  let Latency     = 7;
210  let NumMicroOps = 2;
211}
212
213def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
214  let Latency     = 7;
215  let NumMicroOps = 2;
216}
217
218def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
219  let Latency     = 1;
220  let NumMicroOps = 2;
221}
222
223def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
224  let Latency     = 5;
225  let NumMicroOps = 2;
226}
227
228def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
229  let Latency     = 2;
230  let NumMicroOps = 2;
231}
232
233def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
234  let Latency     = 2;
235  let NumMicroOps = 2;
236}
237
238def V2Write_2cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
239  let Latency = 2;
240  let NumMicroOps = 2;
241}
242
243def V2Write_4cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
244  let Latency = 4;
245  let NumMicroOps = 2;
246}
247
248def V2Write_4cyc_1L01_1V01  : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
249  let Latency = 4;
250  let NumMicroOps = 2;
251}
252
253def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
254  let Latency     = 4;
255  let NumMicroOps = 2;
256}
257
258def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
259  let Latency     = 4;
260  let NumMicroOps = 2;
261}
262
263def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
264  let Latency     = 4;
265  let NumMicroOps = 2;
266}
267
268def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
269  let Latency     = 4;
270  let NumMicroOps = 2;
271}
272
273def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
274  let Latency     = 6;
275  let NumMicroOps = 2;
276}
277
278def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
279  let Latency     = 6;
280  let NumMicroOps = 2;
281}
282
283def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
284  let Latency     = 8;
285  let NumMicroOps = 2;
286}
287
288def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
289  let Latency     = 4;
290  let NumMicroOps = 2;
291}
292
293def V2Write_3cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
294  let Latency     = 3;
295  let NumMicroOps = 2;
296}
297
298def V2Write_4cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
299  let Latency     = 4;
300  let NumMicroOps = 2;
301}
302
303def V2Write_1cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
304  let Latency     = 1;
305  let NumMicroOps = 2;
306}
307
308def V2Write_2cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
309  let Latency     = 2;
310  let NumMicroOps = 2;
311}
312
313def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
314  let Latency     = 6;
315  let NumMicroOps = 2;
316}
317
318def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
319  let Latency     = 4;
320  let NumMicroOps = 2;
321}
322
323def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
324  let Latency     = 5;
325  let NumMicroOps = 2;
326}
327
328def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
329  let Latency     = 5;
330  let NumMicroOps = 2;
331}
332
333def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
334  let Latency     = 5;
335  let NumMicroOps = 2;
336}
337
338def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
339  let Latency     = 6;
340  let NumMicroOps = 2;
341}
342
343def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
344  let Latency     = 7;
345  let NumMicroOps = 2;
346}
347
348def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
349  let Latency     = 2;
350  let NumMicroOps = 2;
351}
352
353def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
354  let Latency     = 3;
355  let NumMicroOps = 2;
356}
357
358def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
359  let Latency     = 6;
360  let NumMicroOps = 2;
361}
362
363def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
364  let Latency     = 6;
365  let NumMicroOps = 2;
366}
367
368def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
369  let Latency     = 6;
370  let NumMicroOps = 2;
371}
372
373def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
374  let Latency     = 4;
375  let NumMicroOps = 2;
376}
377
378def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
379  let Latency     = 8;
380  let NumMicroOps = 2;
381}
382
383//===----------------------------------------------------------------------===//
384// Define generic 3 micro-op types
385
386def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
387  let Latency     = 1;
388  let NumMicroOps = 3;
389}
390
391def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
392  let Latency     = 2;
393  let NumMicroOps = 3;
394}
395
396def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
397  let Latency     = 2;
398  let NumMicroOps = 3;
399}
400
401def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
402  let Latency     = 4;
403  let NumMicroOps = 3;
404}
405
406def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
407  let Latency     = 9;
408  let NumMicroOps = 3;
409}
410
411def V2Write_4cyc_3V01  : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
412  let Latency = 4;
413  let NumMicroOps = 3;
414}
415
416def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
417  let Latency     = 7;
418  let NumMicroOps = 3;
419}
420
421def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
422  let Latency     = 2;
423  let NumMicroOps = 3;
424}
425
426def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
427  let Latency     = 2;
428  let NumMicroOps = 3;
429}
430
431def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
432  let Latency     = 6;
433  let NumMicroOps = 3;
434}
435
436def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
437  let Latency     = 6;
438  let NumMicroOps = 3;
439}
440
441def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
442  let Latency     = 8;
443  let NumMicroOps = 3;
444}
445
446//===----------------------------------------------------------------------===//
447// Define generic 4 micro-op types
448
449def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
450                                               V2UnitI]> {
451  let Latency     = 2;
452  let NumMicroOps = 4;
453}
454
455def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
456                                            V2UnitV01, V2UnitV01]> {
457  let Latency     = 2;
458  let NumMicroOps = 4;
459}
460
461def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
462                                            V2UnitV01, V2UnitV01]> {
463  let Latency     = 4;
464  let NumMicroOps = 4;
465}
466
467def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
468  let Latency     = 5;
469  let NumMicroOps = 4;
470}
471
472def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
473                                         V2UnitV1]> {
474  let Latency     = 9;
475  let NumMicroOps = 4;
476}
477
478def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
479  let Latency     = 6;
480  let NumMicroOps = 4;
481}
482
483def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
484  let Latency     = 8;
485  let NumMicroOps = 4;
486}
487
488def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
489                                          V2UnitV13]> {
490  let Latency     = 6;
491  let NumMicroOps = 4;
492}
493
494def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
495                                          V2UnitV13]> {
496  let Latency     = 8;
497  let NumMicroOps = 4;
498}
499
500def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
501                                       V2UnitV02]> {
502  let Latency     = 6;
503  let NumMicroOps = 4;
504}
505
506def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
507  let Latency     = 6;
508  let NumMicroOps = 4;
509}
510
511def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
512  let Latency     = 8;
513  let NumMicroOps = 4;
514}
515
516def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
517  let Latency     = 9;
518  let NumMicroOps = 4;
519}
520
521def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
522                                          V2UnitV]> {
523  let Latency     = 2;
524  let NumMicroOps = 4;
525}
526
527def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
528                                          V2UnitV]> {
529  let Latency     = 4;
530  let NumMicroOps = 4;
531}
532
533def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
534                                          V2UnitV02]> {
535  let Latency     = 8;
536  let NumMicroOps = 4;
537}
538
539def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
540                                         V2UnitV1]> {
541  let Latency     = 8;
542  let NumMicroOps = 4;
543}
544
545def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
546                                         V2UnitM]> {
547  let Latency     = 4;
548  let NumMicroOps = 4;
549}
550
551def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
552                                         V2UnitM]> {
553  let Latency     = 5;
554  let NumMicroOps = 4;
555}
556
557def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
558  let Latency     = 6;
559  let NumMicroOps = 4;
560}
561
562def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
563  let Latency     = 7;
564  let NumMicroOps = 4;
565}
566
567def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
568                                            V2UnitV01]> {
569  let Latency     = 6;
570  let NumMicroOps = 4;
571}
572
573//===----------------------------------------------------------------------===//
574// Define generic 5 micro-op types
575
576def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
577                                               V2UnitI, V2UnitI]> {
578  let Latency     = 2;
579  let NumMicroOps = 5;
580}
581
582def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
583                                        V2UnitV]> {
584  let Latency     = 8;
585  let NumMicroOps = 5;
586}
587
588def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
589                                        V2UnitV]> {
590  let Latency     = 9;
591  let NumMicroOps = 5;
592}
593
594def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
595                                         V2UnitV]> {
596  let Latency     = 10;
597  let NumMicroOps = 5;
598}
599
600def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
601                                     V2UnitV]> {
602  let Latency     = 6;
603  let NumMicroOps = 5;
604}
605
606//===----------------------------------------------------------------------===//
607// Define generic 6 micro-op types
608
609def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
610                                        V2UnitV, V2UnitV, V2UnitV]> {
611  let Latency     = 8;
612  let NumMicroOps = 6;
613}
614
615def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
616                                        V2UnitV, V2UnitV, V2UnitV]> {
617  let Latency     = 9;
618  let NumMicroOps = 6;
619}
620
621def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
622                                        V2UnitV, V2UnitV, V2UnitV]> {
623  let Latency     = 9;
624  let NumMicroOps = 6;
625}
626
627def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
628                                           V2UnitV, V2UnitS, V2UnitS]> {
629  let Latency     = 9;
630  let NumMicroOps = 6;
631}
632
633def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
634                                          V2UnitV13, V2UnitV13, V2UnitV13]> {
635  let Latency     = 9;
636  let NumMicroOps = 6;
637}
638
639def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
640                                          V2UnitV, V2UnitV, V2UnitV]> {
641  let Latency     = 2;
642  let NumMicroOps = 6;
643}
644
645def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
646                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
647  let Latency     = 4;
648  let NumMicroOps = 6;
649}
650
651def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
652                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
653  let Latency     = 5;
654  let NumMicroOps = 6;
655}
656
657def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
658                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
659  let Latency     = 2;
660  let NumMicroOps = 6;
661}
662
663def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
664                                               V2UnitS, V2UnitV01, V2UnitV01]> {
665  let Latency     = 4;
666  let NumMicroOps = 6;
667}
668
669//===----------------------------------------------------------------------===//
670// Define generic 7 micro-op types
671
672def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
673                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
674  let Latency     = 8;
675  let NumMicroOps = 7;
676}
677
678//===----------------------------------------------------------------------===//
679// Define generic 8 micro-op types
680
681def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
682                                          V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
683                                          V2UnitV]> {
684  let Latency     = 2;
685  let NumMicroOps = 8;
686}
687
688def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
689                                            V2UnitL01, V2UnitV01, V2UnitV01,
690                                            V2UnitV01, V2UnitV01]> {
691  let Latency     = 2;
692  let NumMicroOps = 8;
693}
694
695def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
696                                            V2UnitL01, V2UnitV01, V2UnitV01,
697                                            V2UnitV01, V2UnitV01]> {
698  let Latency     = 4;
699  let NumMicroOps = 8;
700}
701
702def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
703                                            V2UnitV01, V2UnitV01, V2UnitV01,
704                                            V2UnitV01, V2UnitV01]> {
705  let Latency     = 6;
706  let NumMicroOps = 8;
707}
708
709def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
710                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
711  let Latency     = 8;
712  let NumMicroOps = 8;
713}
714
715//===----------------------------------------------------------------------===//
716// Define generic 9 micro-op types
717
718def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
719                                            V2UnitV01, V2UnitV01, V2UnitV01,
720                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
721  let Latency     = 6;
722  let NumMicroOps = 9;
723}
724
725def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
726                                         V2UnitV, V2UnitV, V2UnitV, V2UnitV,
727                                         V2UnitV]> {
728  let Latency     = 10;
729  let NumMicroOps = 9;
730}
731
732def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
733                                            V2UnitL, V2UnitL, V2UnitL,
734                                            V2UnitS, V2UnitS, V2UnitS]> {
735  let Latency     = 10;
736  let NumMicroOps = 9;
737}
738
739//===----------------------------------------------------------------------===//
740// Define generic 10 micro-op types
741
742def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
743                                        V2UnitL, V2UnitL, V2UnitV, V2UnitV,
744                                        V2UnitV, V2UnitV]> {
745  let Latency     = 9;
746  let NumMicroOps = 10;
747}
748
749//===----------------------------------------------------------------------===//
750// Define generic 12 micro-op types
751
752def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
753                                            V2UnitL01, V2UnitV01, V2UnitV01,
754                                            V2UnitV01, V2UnitV01, V2UnitV01,
755                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
756  let Latency     = 5;
757  let NumMicroOps = 12;
758}
759
760def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
761                                        V2UnitL, V2UnitV, V2UnitV,
762                                        V2UnitV, V2UnitV, V2UnitV,
763                                        V2UnitV, V2UnitV, V2UnitV]> {
764  let Latency     = 9;
765  let NumMicroOps = 12;
766}
767
768def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
769                                         V2UnitL, V2UnitV, V2UnitV,
770                                         V2UnitV, V2UnitV, V2UnitV,
771                                         V2UnitV, V2UnitV, V2UnitV]> {
772  let Latency     = 10;
773  let NumMicroOps = 12;
774}
775
776//===----------------------------------------------------------------------===//
777// Define generic 16 micro-op types
778
779def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
780                                             V2UnitL01, V2UnitV01, V2UnitV01,
781                                             V2UnitV01, V2UnitV01, V2UnitV01,
782                                             V2UnitV01, V2UnitV01, V2UnitV01,
783                                             V2UnitV01, V2UnitV01, V2UnitV01,
784                                             V2UnitV01]> {
785  let Latency     = 7;
786  let NumMicroOps = 16;
787}
788
789def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
790                                            V2UnitL, V2UnitV, V2UnitV,
791                                            V2UnitV, V2UnitV, V2UnitV,
792                                            V2UnitV, V2UnitV, V2UnitV,
793                                            V2UnitS, V2UnitS, V2UnitS,
794                                            V2UnitS]> {
795  let Latency     = 10;
796  let NumMicroOps = 16;
797}
798
799//===----------------------------------------------------------------------===//
800// Define generic 18 micro-op types
801
802def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
803                                            V2UnitL01, V2UnitL01, V2UnitL01,
804                                            V2UnitL01, V2UnitL01, V2UnitL01,
805                                            V2UnitV01, V2UnitV01, V2UnitV01,
806                                            V2UnitV01, V2UnitV01, V2UnitV01,
807                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
808  let Latency     = 7;
809  let NumMicroOps = 18;
810}
811
812//===----------------------------------------------------------------------===//
813// Define generic 27 micro-op types
814
815def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
816                                               V2UnitL01, V2UnitL01, V2UnitL01,
817                                               V2UnitL01, V2UnitL01, V2UnitL01,
818                                               V2UnitS, V2UnitS, V2UnitS,
819                                               V2UnitS, V2UnitS, V2UnitS,
820                                               V2UnitS, V2UnitS, V2UnitS,
821                                               V2UnitV01, V2UnitV01, V2UnitV01,
822                                               V2UnitV01, V2UnitV01, V2UnitV01,
823                                               V2UnitV01, V2UnitV01,
824                                               V2UnitV01]> {
825  let Latency     = 7;
826  let NumMicroOps = 27;
827}
828
829//===----------------------------------------------------------------------===//
830// Define generic 36 micro-op types
831
832def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
833                                               V2UnitL01, V2UnitL01, V2UnitL01,
834                                               V2UnitL01, V2UnitL01, V2UnitL01,
835                                               V2UnitL01, V2UnitL01, V2UnitL01,
836                                               V2UnitL01, V2UnitL01, V2UnitL01,
837                                               V2UnitL01, V2UnitL01, V2UnitL01,
838                                               V2UnitV01, V2UnitV01, V2UnitV01,
839                                               V2UnitV01, V2UnitV01, V2UnitV01,
840                                               V2UnitV01, V2UnitV01, V2UnitV01,
841                                               V2UnitV01, V2UnitV01, V2UnitV01,
842                                               V2UnitV01, V2UnitV01, V2UnitV01,
843                                               V2UnitV01, V2UnitV01,
844                                               V2UnitV01]> {
845  let Latency     = 11;
846  let NumMicroOps = 36;
847}
848
849//===----------------------------------------------------------------------===//
850// Define generic 54 micro-op types
851
852def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
853                                                   V2UnitL01, V2UnitL01,
854                                                   V2UnitL01, V2UnitL01,
855                                                   V2UnitL01, V2UnitL01,
856                                                   V2UnitL01, V2UnitL01,
857                                                   V2UnitL01, V2UnitL01,
858                                                   V2UnitL01, V2UnitL01,
859                                                   V2UnitL01, V2UnitL01,
860                                                   V2UnitL01, V2UnitL01,
861                                                   V2UnitS, V2UnitS, V2UnitS,
862                                                   V2UnitS, V2UnitS, V2UnitS,
863                                                   V2UnitS, V2UnitS, V2UnitS,
864                                                   V2UnitS, V2UnitS, V2UnitS,
865                                                   V2UnitS, V2UnitS, V2UnitS,
866                                                   V2UnitS, V2UnitS, V2UnitS,
867                                                   V2UnitV01, V2UnitV01,
868                                                   V2UnitV01, V2UnitV01,
869                                                   V2UnitV01, V2UnitV01,
870                                                   V2UnitV01, V2UnitV01,
871                                                   V2UnitV01, V2UnitV01,
872                                                   V2UnitV01, V2UnitV01,
873                                                   V2UnitV01, V2UnitV01,
874                                                   V2UnitV01, V2UnitV01,
875                                                   V2UnitV01, V2UnitV01]> {
876  let Latency     = 11;
877  let NumMicroOps = 54;
878}
879
880//===----------------------------------------------------------------------===//
881// Define predicate-controlled types
882
883def V2Write_ArithI : SchedWriteVariant<[
884                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1I]>,
885                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
886
887def V2Write_ArithF : SchedWriteVariant<[
888                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1F]>,
889                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
890
891def V2Write_Logical : SchedWriteVariant<[
892                        SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>,
893                        SchedVar<NoSchedPred,   [V2Write_2cyc_1M]>]>;
894
895def V2Write_Extr : SchedWriteVariant<[
896                     SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>,
897                     SchedVar<NoSchedPred,       [V2Write_3cyc_1I_1M]>]>;
898
899def V2Write_LdrHQ : SchedWriteVariant<[
900                      SchedVar<NeoverseHQForm,  [V2Write_7cyc_1I_1L]>,
901                      SchedVar<NoSchedPred,     [V2Write_6cyc_1L]>]>;
902
903def V2Write_StrHQ : SchedWriteVariant<[
904                      SchedVar<NeoverseHQForm,  [V2Write_2cyc_1L01_1V01_1I]>,
905                      SchedVar<NoSchedPred,     [V2Write_2cyc_1L01_1V01]>]>;
906
907def V2Write_0or1cyc_1I : SchedWriteVariant<[
908                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
909                      SchedVar<NoSchedPred,      [V2Write_1cyc_1I]>]>;
910
911def V2Write_0or2cyc_1V : SchedWriteVariant<[
912                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
913                      SchedVar<NoSchedPred,      [V2Write_2cyc_1V]>]>;
914
915def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
916                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
917                      SchedVar<NoSchedPred,      [V2Write_3cyc_1M0]>]>;
918
919def V2Write_2or3cyc_1M : SchedWriteVariant<[
920                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M]>,
921                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M]>]>;
922
923def V2Write_3or4cyc_2M : SchedWriteVariant<[
924                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_2M]>,
925                      SchedVar<NoSchedPred,     [V2Write_3cyc_2M]>]>;
926
927def V2Write_1or2cyc_1M0 : SchedWriteVariant<[
928                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0]>,
929                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0]>]>;
930
931def V2Write_2or3cyc_1M0 : SchedWriteVariant<[
932                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M0]>,
933                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M0]>]>;
934
935def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[
936                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0_1M]>,
937                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0_1M]>]>;
938
939def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[
940                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_1M0_1M]>,
941                      SchedVar<NoSchedPred,     [V2Write_3cyc_1M0_1M]>]>;
942
943def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[
944                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_2M0_2M]>,
945                      SchedVar<NoSchedPred,     [V2Write_4cyc_2M0_2M]>]>;
946
947def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[
948                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_1V0_1M0]>,
949                      SchedVar<NoSchedPred,     [V2Write_4cyc_1V0_1M0]>]>;
950
951def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[
952                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1V0_1M]>,
953                      SchedVar<NoSchedPred,     [V2Write_2cyc_1V0_1M]>]>;
954
955def V2Write_IncDec : SchedWriteVariant<[
956                      SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>,
957                      SchedVar<NoSchedPred,         [V2Write_2cyc_1M]>]>;
958
959//===----------------------------------------------------------------------===//
960// Define forwarded types
961
962// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
963// consumers of 64 bit multiply high operations?
964def V2Wr_IM   : SchedWriteRes<[V2UnitM]>  { let Latency = 2; }
965def V2Wr_IMA  : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
966def V2Wr_IMUL : SchedWriteVariant<[
967                  SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
968                  SchedVar<NoSchedPred,    [V2Wr_IMA]>]>;
969def V2Rd_IMA  : SchedReadAdvance<1, [V2Wr_IMA]>;
970
971def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
972def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
973
974def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
975def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
976
977def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
978def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
979
980def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
981def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
982
983def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
984def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
985
986def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
987def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
988
989def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
990def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
991
992def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
993def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
994
995def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
996def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
997
998def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
999def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
1000
1001def V2Wr_VFM  : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1002def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1003def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
1004
1005def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1006def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
1007
1008def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1009def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
1010def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1011def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
1012def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1013def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
1014
1015def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
1016def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
1017
1018def V2Wr_ZA  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1019def V2Rd_ZA  : SchedReadAdvance<3, [V2Wr_ZA]>;
1020def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1021def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
1022def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1023def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
1024
1025def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
1026def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
1027def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1028def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
1029
1030// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
1031// throughput to 1 in case of forwarding?
1032def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1033def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
1034def V2Wr_ZCMAD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1035def V2Rd_ZCMAD   : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
1036
1037def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1038def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
1039
1040def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
1041def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
1042def V2Wr_ZMAD  : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1043def V2Rd_ZMAD  : SchedReadAdvance<2, [V2Wr_ZMAD]>;
1044
1045def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1046def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
1047
1048def V2Wr_ZMASQL   : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1049def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1050def V2Wr_ZMASQD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1051def V2Rd_ZMASQ    : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
1052                                         V2Wr_ZMASQD]>;
1053
1054def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1055def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
1056
1057def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1058def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
1059
1060def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1061def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
1062
1063def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1064def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
1065def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1066def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
1067def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1068def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
1069
1070//===----------------------------------------------------------------------===//
1071// Define types with long resource cycles (rc)
1072
1073def V2Write_6cyc_1V1_5rc    : SchedWriteRes<[V2UnitV1]>  { let Latency =  6; let ReleaseAtCycles = [ 5]; }
1074def V2Write_7cyc_1V02_7rc   : SchedWriteRes<[V2UnitV02]> { let Latency =  7; let ReleaseAtCycles = [ 7]; }
1075def V2Write_10cyc_1V02_5rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
1076def V2Write_10cyc_1V02_9rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1077def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
1078def V2Write_10cyc_1V0_9rc   : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1079def V2Write_10cyc_1V1_9rc   : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1080def V2Write_13cyc_1V0_12rc  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; let ReleaseAtCycles = [12]; }
1081def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
1082def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
1083def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
1084def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
1085def V2Write_16cyc_1V0_14rc  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; let ReleaseAtCycles = [14]; }
1086
1087// Miscellaneous
1088// -----------------------------------------------------------------------------
1089
1090def : InstRW<[WriteI], (instrs COPY)>;
1091
1092// §3.3 Branch instructions
1093// -----------------------------------------------------------------------------
1094
1095// Branch, immed
1096// Compare and branch
1097def : SchedAlias<WriteBr,    V2Write_1cyc_1B>;
1098
1099// Branch, register
1100def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>;
1101
1102// Branch and link, immed
1103// Branch and link, register
1104def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
1105
1106// §3.4 Arithmetic and Logical Instructions
1107// -----------------------------------------------------------------------------
1108
1109// ALU, basic
1110// ALU, basic, flagset
1111def : SchedAlias<WriteI, V2Write_1cyc_1I>;
1112def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>;
1113def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
1114
1115// ALU, extend and shift
1116def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
1117
1118// Arithmetic, LSL shift, shift <= 4
1119// Arithmetic, flagset, LSL shift, shift <= 4
1120// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
1121def : SchedAlias<WriteISReg, V2Write_ArithI>;
1122def : InstRW<[V2Write_ArithF],
1123             (instregex "^(ADD|SUB)S[WX]rs$")>;
1124
1125// Arithmetic, immediate to logical address tag
1126def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>;
1127
1128// Convert floating-point condition flags
1129// Flag manipulation instructions
1130def : WriteRes<WriteSys, []> { let Latency = 1; }
1131
1132// Insert Random Tags
1133def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
1134
1135// Insert Tag Mask
1136// Subtract Pointer
1137// Subtract Pointer, flagset
1138def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
1139
1140// Logical, shift, no flagset
1141def : InstRW<[V2Write_1cyc_1I],    (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
1142def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
1143
1144// Logical, shift, flagset
1145def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
1146
1147// Move and shift instructions
1148// -----------------------------------------------------------------------------
1149
1150def : SchedAlias<WriteImm, V2Write_1cyc_1I>;
1151
1152// §3.5 Divide and multiply instructions
1153// -----------------------------------------------------------------------------
1154
1155// SDIV, UDIV
1156def : SchedAlias<WriteID32,  V2Write_12cyc_1M0>;
1157def : SchedAlias<WriteID64,  V2Write_20cyc_1M0>;
1158
1159def : SchedAlias<WriteIM32, V2Write_2cyc_1M>;
1160def : SchedAlias<WriteIM64, V2Write_2cyc_1M>;
1161
1162// Multiply
1163// Multiply accumulate, W-form
1164// Multiply accumulate, X-form
1165def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1166             (instregex "^M(ADD|SUB)[WX]rrr$")>;
1167
1168// Multiply accumulate long
1169// Multiply long
1170def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1171             (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1172
1173// Multiply high
1174def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
1175
1176// Pointer Authentication Instructions (v8.3 PAC)
1177// -----------------------------------------------------------------------------
1178
1179// Authenticate data address
1180// Authenticate instruction address
1181// Compute pointer authentication code for data address
1182// Compute pointer authentication code, using generic key
1183// Compute pointer authentication code for instruction address
1184def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
1185
1186// Branch and link, register, with pointer authentication
1187// Branch, register, with pointer authentication
1188// Branch, return, with pointer authentication
1189def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
1190                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
1191                                            ERETAA, ERETAB)>;
1192
1193
1194// Load register, with pointer authentication
1195def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
1196
1197// Strip pointer authentication code
1198def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
1199
1200// Miscellaneous data-processing instructions
1201// -----------------------------------------------------------------------------
1202
1203// Address generation
1204def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>;
1205
1206// Bitfield extract, one reg
1207// Bitfield extract, two regs
1208def : SchedAlias<WriteExtr, V2Write_Extr>;
1209def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
1210
1211// Bitfield move, basic
1212def : SchedAlias<WriteIS, V2Write_1cyc_1I>;
1213
1214// Bitfield move, insert
1215def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
1216
1217// Load instructions
1218// -----------------------------------------------------------------------------
1219
1220// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
1221
1222def : SchedAlias<WriteLD,    V2Write_4cyc_1L>;
1223def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>;
1224
1225// Load register, literal
1226def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
1227
1228// Load pair, signed immed offset, signed words
1229def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
1230
1231// Load pair, immed post-index or immed pre-index, signed words
1232def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi],
1233             (instregex "^LDPSW(post|pre)$")>;
1234
1235// Store instructions
1236// -----------------------------------------------------------------------------
1237
1238// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
1239
1240def : SchedAlias<WriteST,    V2Write_1cyc_1L01_1D>;
1241def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
1242def : SchedAlias<WriteSTP,   V2Write_1cyc_1L01_1D>;
1243def : SchedAlias<WriteAdr,   V2Write_1cyc_1I>;
1244
1245// Tag load instructions
1246// -----------------------------------------------------------------------------
1247
1248// Load allocation tag
1249// Load multiple allocation tags
1250def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>;
1251
1252// Tag store instructions
1253// -----------------------------------------------------------------------------
1254
1255// Store allocation tags to one or two granules, post-index
1256// Store allocation tags to one or two granules, pre-index
1257// Store allocation tag to one or two granules, zeroing, post-index
1258// Store Allocation Tag to one or two granules, zeroing, pre-index
1259// Store allocation tag and reg pair to memory, post-Index
1260// Store allocation tag and reg pair to memory, pre-Index
1261def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
1262                                                ST2GPreIndex, ST2GPostIndex,
1263                                                STZGPreIndex, STZGPostIndex,
1264                                                STZ2GPreIndex, STZ2GPostIndex,
1265                                                STGPpre, STGPpost)>;
1266
1267// Store allocation tags to one or two granules, signed offset
1268// Store allocation tag to two granules, zeroing, signed offset
1269// Store allocation tag and reg pair to memory, signed offset
1270// Store multiple allocation tags
1271def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
1272                                             STZ2Gi, STGPi, STGM, STZGM)>;
1273
1274// FP data processing instructions
1275// -----------------------------------------------------------------------------
1276
1277// FP absolute value
1278// FP arithmetic
1279// FP min/max
1280// FP negate
1281// FP select
1282def : SchedAlias<WriteF,     V2Write_2cyc_1V>;
1283
1284// FP compare
1285def : SchedAlias<WriteFCmp,  V2Write_2cyc_1V0>;
1286
1287// FP divide, square root
1288def : SchedAlias<WriteFDiv,  V2Write_7cyc_1V02>;
1289
1290// FP divide, H-form
1291def : InstRW<[V2Write_7cyc_1V02],  (instrs FDIVHrr)>;
1292// FP divide, S-form
1293def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>;
1294// FP divide, D-form
1295def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>;
1296
1297// FP square root, H-form
1298def : InstRW<[V2Write_7cyc_1V02],  (instrs FSQRTHr)>;
1299// FP square root, S-form
1300def : InstRW<[V2Write_9cyc_1V02],  (instrs FSQRTSr)>;
1301// FP square root, D-form
1302def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>;
1303
1304// FP multiply
1305def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
1306
1307// FP multiply accumulate
1308def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
1309             (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
1310
1311// FP round to integral
1312def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
1313                                             "^FRINT(32|64)[XZ][SD]r$")>;
1314
1315// FP miscellaneous instructions
1316// -----------------------------------------------------------------------------
1317
1318// FP convert, from gen to vec reg
1319def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
1320
1321// FP convert, from vec to gen reg
1322def : InstRW<[V2Write_3cyc_1V01],
1323             (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
1324
1325// FP convert, Javascript from vec to gen reg
1326def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>;
1327
1328// FP convert, from vec to vec reg
1329def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
1330                                          FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
1331
1332// FP move, immed
1333// FP move, register
1334def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
1335
1336// FP transfer, from gen to low half of vec reg
1337def : InstRW<[V2Write_0or3cyc_1M0],
1338             (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
1339
1340// FP transfer, from gen to high half of vec reg
1341def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
1342
1343// FP transfer, from vec to gen reg
1344def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>;
1345
1346// FP load instructions
1347// -----------------------------------------------------------------------------
1348
1349// Load vector reg, literal, S/D/Q forms
1350def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>;
1351
1352// Load vector reg, unscaled immed
1353def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
1354
1355// Load vector reg, immed post-index
1356// Load vector reg, immed pre-index
1357def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L],
1358             (instregex "^LDR[BHSDQ](pre|post)$")>;
1359
1360// Load vector reg, unsigned immed
1361def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
1362
1363// Load vector reg, register offset, basic
1364// Load vector reg, register offset, scale, S/D-form
1365// Load vector reg, register offset, scale, H/Q-form
1366// Load vector reg, register offset, extend
1367// Load vector reg, register offset, extend, scale, S/D-form
1368// Load vector reg, register offset, extend, scale, H/Q-form
1369def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
1370
1371// Load vector pair, immed offset, S/D-form
1372def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
1373
1374// Load vector pair, immed offset, Q-form
1375def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
1376
1377// Load vector pair, immed post-index, S/D-form
1378// Load vector pair, immed pre-index, S/D-form
1379def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi],
1380             (instregex "^LDP[SD](pre|post)$")>;
1381
1382// Load vector pair, immed post-index, Q-form
1383// Load vector pair, immed pre-index, Q-form
1384def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
1385                                                                LDPQpre)>;
1386
1387// FP store instructions
1388// -----------------------------------------------------------------------------
1389
1390// Store vector reg, unscaled immed, B/H/S/D-form
1391// Store vector reg, unscaled immed, Q-form
1392def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
1393
1394// Store vector reg, immed post-index, B/H/S/D-form
1395// Store vector reg, immed post-index, Q-form
1396// Store vector reg, immed pre-index, B/H/S/D-form
1397// Store vector reg, immed pre-index, Q-form
1398def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1399             (instregex "^STR[BHSDQ](pre|post)$")>;
1400
1401// Store vector reg, unsigned immed, B/H/S/D-form
1402// Store vector reg, unsigned immed, Q-form
1403def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
1404
1405// Store vector reg, register offset, basic, B/H/S/D-form
1406// Store vector reg, register offset, basic, Q-form
1407// Store vector reg, register offset, scale, H-form
1408// Store vector reg, register offset, scale, S/D-form
1409// Store vector reg, register offset, scale, Q-form
1410// Store vector reg, register offset, extend, B/H/S/D-form
1411// Store vector reg, register offset, extend, Q-form
1412// Store vector reg, register offset, extend, scale, H-form
1413// Store vector reg, register offset, extend, scale, S/D-form
1414// Store vector reg, register offset, extend, scale, Q-form
1415def : InstRW<[V2Write_StrHQ, ReadAdrBase],
1416             (instregex "^STR[BHSDQ]ro[WX]$")>;
1417
1418// Store vector pair, immed offset, S-form
1419// Store vector pair, immed offset, D-form
1420def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>;
1421
1422// Store vector pair, immed offset, Q-form
1423def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>;
1424
1425// Store vector pair, immed post-index, S-form
1426// Store vector pair, immed post-index, D-form
1427// Store vector pair, immed pre-index, S-form
1428// Store vector pair, immed pre-index, D-form
1429def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1430             (instregex "^STP[SD](pre|post)$")>;
1431
1432// Store vector pair, immed post-index, Q-form
1433def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>;
1434
1435// Store vector pair, immed pre-index, Q-form
1436def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>;
1437
1438// ASIMD integer instructions
1439// -----------------------------------------------------------------------------
1440
1441// ASIMD absolute diff
1442// ASIMD absolute diff long
1443// ASIMD arith, basic
1444// ASIMD arith, complex
1445// ASIMD arith, pair-wise
1446// ASIMD compare
1447// ASIMD logical
1448// ASIMD max/min, basic and pair-wise
1449def : SchedAlias<WriteVd, V2Write_2cyc_1V>;
1450def : SchedAlias<WriteVq, V2Write_2cyc_1V>;
1451
1452// ASIMD absolute diff accum
1453// ASIMD absolute diff accum long
1454def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
1455
1456// ASIMD arith, reduce, 4H/4S
1457def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1458
1459// ASIMD arith, reduce, 8B/8H
1460def : InstRW<[V2Write_4cyc_1V13_1V],
1461             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
1462
1463// ASIMD arith, reduce, 16B
1464def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1465
1466// ASIMD dot product
1467// ASIMD dot product using signed and unsigned integers
1468def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
1469             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1470
1471// ASIMD matrix multiply-accumulate
1472def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
1473
1474// ASIMD max/min, reduce, 4H/4S
1475def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1476                                             "^[SU](MAX|MIN)Vv4i32v$")>;
1477
1478// ASIMD max/min, reduce, 8B/8H
1479def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1480                                                "^[SU](MAX|MIN)Vv8i16v$")>;
1481
1482// ASIMD max/min, reduce, 16B
1483def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1484
1485// ASIMD multiply
1486def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1487
1488// ASIMD multiply accumulate
1489def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
1490
1491// ASIMD multiply accumulate high
1492def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1493
1494// ASIMD multiply accumulate long
1495def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1496
1497// ASIMD multiply accumulate saturating long
1498def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>;
1499
1500// ASIMD multiply/multiply long (8x8) polynomial, D-form
1501// ASIMD multiply/multiply long (8x8) polynomial, Q-form
1502def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
1503
1504// ASIMD multiply long
1505def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
1506
1507// ASIMD pairwise add and accumulate long
1508def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
1509
1510// ASIMD shift accumulate
1511def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
1512
1513// ASIMD shift by immed, basic
1514def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
1515                                             "^SSHLLv", "^SSHR[dv]", "^USHLLv",
1516                                             "^USHR[dv]")>;
1517
1518// ASIMD shift by immed and insert, basic
1519def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
1520
1521// ASIMD shift by immed, complex
1522def : InstRW<[V2Write_4cyc_1V13],
1523             (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
1524                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1525                        "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
1526                        "^UQSHRN[bhsv]", "^URSHR[dv]")>;
1527
1528// ASIMD shift by register, basic
1529def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>;
1530
1531// ASIMD shift by register, complex
1532def : InstRW<[V2Write_4cyc_1V13],
1533             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1534                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1535
1536// ASIMD floating-point instructions
1537// -----------------------------------------------------------------------------
1538
1539// ASIMD FP absolute value/difference
1540// ASIMD FP arith, normal
1541// ASIMD FP compare
1542// ASIMD FP complex add
1543// ASIMD FP max/min, normal
1544// ASIMD FP max/min, pairwise
1545// ASIMD FP negate
1546// Handled by SchedAlias<WriteV[dq], ...>
1547
1548// ASIMD FP complex multiply add
1549def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
1550
1551// ASIMD FP convert, long (F16 to F32)
1552def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>;
1553
1554// ASIMD FP convert, long (F32 to F64)
1555def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>;
1556
1557// ASIMD FP convert, narrow (F32 to F16)
1558def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>;
1559
1560// ASIMD FP convert, narrow (F64 to F32)
1561def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32",
1562                                             "^FCVTXN(v2|v4)f32")>;
1563
1564// ASIMD FP convert, other, D-form F32 and Q-form F64
1565def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1566                                             "^FCVT[AMNPZ][SU]v1i64$",
1567                                             "^FCVTZ[SU]d$",
1568                                             "^[SU]CVTFv2f(32|64)$",
1569                                             "^[SU]CVTFv1i64$",
1570                                             "^[SU]CVTFd$")>;
1571
1572// ASIMD FP convert, other, D-form F16 and Q-form F32
1573def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1574                                             "^FCVT[AMNPZ][SU]v1i32$",
1575                                             "^FCVTZ[SU]s$",
1576                                             "^[SU]CVTFv4f(16|32)$",
1577                                             "^[SU]CVTFv1i32$",
1578                                             "^[SU]CVTFs$")>;
1579
1580// ASIMD FP convert, other, Q-form F16
1581def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1582                                             "^FCVT[AMNPZ][SU]v1f16$",
1583                                             "^FCVTZ[SU]h$",
1584                                             "^[SU]CVTFv8f16$",
1585                                             "^[SU]CVTFv1i16$",
1586                                             "^[SU]CVTFh$")>;
1587
1588// ASIMD FP divide, D-form, F16
1589def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>;
1590
1591// ASIMD FP divide, D-form, F32
1592def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>;
1593
1594// ASIMD FP divide, Q-form, F16
1595def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>;
1596
1597// ASIMD FP divide, Q-form, F32
1598def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>;
1599
1600// ASIMD FP divide, Q-form, F64
1601def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>;
1602
1603// ASIMD FP max/min, reduce, F32 and D-form F16
1604def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1605
1606// ASIMD FP max/min, reduce, Q-form F16
1607def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1608
1609// ASIMD FP multiply
1610def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
1611
1612// ASIMD FP multiply accumulate
1613def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
1614
1615// ASIMD FP multiply accumulate long
1616def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
1617
1618// ASIMD FP round, D-form F32 and Q-form F64
1619def : InstRW<[V2Write_3cyc_1V02],
1620             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1621                        "^FRINT(32|64)[XZ]v2f(32|64)$")>;
1622
1623// ASIMD FP round, D-form F16 and Q-form F32
1624def : InstRW<[V2Write_4cyc_2V02],
1625             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1626                        "^FRINT(32|64)[XZ]v4f32$")>;
1627
1628// ASIMD FP round, Q-form F16
1629def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1630
1631// ASIMD FP square root, D-form, F16
1632def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>;
1633
1634// ASIMD FP square root, D-form, F32
1635def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>;
1636
1637// ASIMD FP square root, Q-form, F16
1638def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>;
1639
1640// ASIMD FP square root, Q-form, F32
1641def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>;
1642
1643// ASIMD FP square root, Q-form, F64
1644def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>;
1645
1646// ASIMD BFloat16 (BF16) instructions
1647// -----------------------------------------------------------------------------
1648
1649// ASIMD convert, F32 to BF16
1650def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>;
1651
1652// ASIMD dot product
1653def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1654
1655// ASIMD matrix multiply accumulate
1656def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
1657
1658// ASIMD multiply accumulate long
1659def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1660                                                 BFMLALTIdx)>;
1661
1662// Scalar convert, F32 to BF16
1663def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
1664
1665// ASIMD miscellaneous instructions
1666// -----------------------------------------------------------------------------
1667
1668// ASIMD bit reverse
1669// ASIMD bitwise insert
1670// ASIMD count
1671// ASIMD duplicate, element
1672// ASIMD extract
1673// ASIMD extract narrow
1674// ASIMD insert, element to element
1675// ASIMD move, FP immed
1676// ASIMD move, integer immed
1677// ASIMD reverse
1678// ASIMD table lookup extension, 1 table reg
1679// ASIMD transpose
1680// ASIMD unzip/zip
1681// Handled by SchedAlias<WriteV[dq], ...>
1682def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
1683
1684// ASIMD duplicate, gen reg
1685def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1686
1687// ASIMD extract narrow, saturating
1688def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1689
1690// ASIMD reciprocal and square root estimate, D-form U32
1691def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
1692
1693// ASIMD reciprocal and square root estimate, Q-form U32
1694def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
1695
1696// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1697def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
1698                                          FRECPEv1i64, FRECPEv2f32,
1699                                          FRSQRTEv1f16, FRSQRTEv1i32,
1700                                          FRSQRTEv1i64, FRSQRTEv2f32)>;
1701
1702// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1703def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
1704                                          FRSQRTEv4f16, FRSQRTEv4f32)>;
1705
1706// ASIMD reciprocal and square root estimate, Q-form F16
1707def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1708
1709// ASIMD reciprocal exponent
1710def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>;
1711
1712// ASIMD reciprocal step
1713def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)",
1714                                           "^FRSQRTS(32|64|v)")>;
1715
1716// ASIMD table lookup, 1 or 2 table regs
1717def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One,
1718                                          TBLv8i8Two, TBLv16i8Two)>;
1719
1720// ASIMD table lookup, 3 table regs
1721def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
1722
1723// ASIMD table lookup, 4 table regs
1724def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1725
1726// ASIMD table lookup extension, 2 table reg
1727def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1728
1729// ASIMD table lookup extension, 3 table reg
1730def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1731
1732// ASIMD table lookup extension, 4 table reg
1733def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1734
1735// ASIMD transfer, element to gen reg
1736def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>;
1737
1738// ASIMD transfer, gen reg to element
1739def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1740
1741// ASIMD load instructions
1742// -----------------------------------------------------------------------------
1743
1744// ASIMD load, 1 element, multiple, 1 reg, D-form
1745def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1746def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1747             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1748
1749// ASIMD load, 1 element, multiple, 1 reg, Q-form
1750def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1751def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1752             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1753
1754// ASIMD load, 1 element, multiple, 2 reg, D-form
1755def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1756def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1757             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1758
1759// ASIMD load, 1 element, multiple, 2 reg, Q-form
1760def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1761def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1762             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1763
1764// ASIMD load, 1 element, multiple, 3 reg, D-form
1765def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1766def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1767             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1768
1769// ASIMD load, 1 element, multiple, 3 reg, Q-form
1770def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1771def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1772             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1773
1774// ASIMD load, 1 element, multiple, 4 reg, D-form
1775def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1776def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1777             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1778
1779// ASIMD load, 1 element, multiple, 4 reg, Q-form
1780def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1781def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1782             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1783
1784// ASIMD load, 1 element, one lane, B/H/S
1785// ASIMD load, 1 element, one lane, D
1786def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1787def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1788
1789// ASIMD load, 1 element, all lanes, D-form, B/H/S
1790// ASIMD load, 1 element, all lanes, D-form, D
1791def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1792def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1793
1794// ASIMD load, 1 element, all lanes, Q-form
1795def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1796def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1797
1798// ASIMD load, 2 element, multiple, D-form, B/H/S
1799def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1800def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1801
1802// ASIMD load, 2 element, multiple, Q-form, B/H/S
1803// ASIMD load, 2 element, multiple, Q-form, D
1804def : InstRW<[V2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1805def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1806
1807// ASIMD load, 2 element, one lane, B/H
1808// ASIMD load, 2 element, one lane, S
1809// ASIMD load, 2 element, one lane, D
1810def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1811def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1812
1813// ASIMD load, 2 element, all lanes, D-form, B/H/S
1814// ASIMD load, 2 element, all lanes, D-form, D
1815def : InstRW<[V2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1816def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1817
1818// ASIMD load, 2 element, all lanes, Q-form
1819def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1820def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1821
1822// ASIMD load, 3 element, multiple, D-form, B/H/S
1823def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1824def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1825
1826// ASIMD load, 3 element, multiple, Q-form, B/H/S
1827// ASIMD load, 3 element, multiple, Q-form, D
1828def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s|2d)$")>;
1829def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
1830
1831// ASIMD load, 3 element, one lane, B/H
1832// ASIMD load, 3 element, one lane, S
1833// ASIMD load, 3 element, one lane, D
1834def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1835def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1836
1837// ASIMD load, 3 element, all lanes, D-form, B/H/S
1838// ASIMD load, 3 element, all lanes, D-form, D
1839def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1840def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1841
1842// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1843// ASIMD load, 3 element, all lanes, Q-form, D
1844def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1845def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1846
1847// ASIMD load, 4 element, multiple, D-form, B/H/S
1848def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1849def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1850
1851// ASIMD load, 4 element, multiple, Q-form, B/H/S
1852// ASIMD load, 4 element, multiple, Q-form, D
1853def : InstRW<[V2Write_9cyc_6L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1854def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1855
1856// ASIMD load, 4 element, one lane, B/H
1857// ASIMD load, 4 element, one lane, S
1858// ASIMD load, 4 element, one lane, D
1859def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1860def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1861
1862// ASIMD load, 4 element, all lanes, D-form, B/H/S
1863// ASIMD load, 4 element, all lanes, D-form, D
1864def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1865def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1866
1867// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1868// ASIMD load, 4 element, all lanes, Q-form, D
1869def : InstRW<[V2Write_8cyc_4L_4V],           (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1870def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1871
1872// ASIMD store instructions
1873// -----------------------------------------------------------------------------
1874
1875// ASIMD store, 1 element, multiple, 1 reg, D-form
1876def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1877def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1878
1879// ASIMD store, 1 element, multiple, 1 reg, Q-form
1880def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1881def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1882
1883// ASIMD store, 1 element, multiple, 2 reg, D-form
1884def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1885def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1886
1887// ASIMD store, 1 element, multiple, 2 reg, Q-form
1888def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1889def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1890
1891// ASIMD store, 1 element, multiple, 3 reg, D-form
1892def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1893def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1894
1895// ASIMD store, 1 element, multiple, 3 reg, Q-form
1896def : InstRW<[V2Write_2cyc_3L01_3V01],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1897def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1898
1899// ASIMD store, 1 element, multiple, 4 reg, D-form
1900def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1901def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1902
1903// ASIMD store, 1 element, multiple, 4 reg, Q-form
1904def : InstRW<[V2Write_2cyc_4L01_4V01],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1905def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1906
1907// ASIMD store, 1 element, one lane, B/H/S
1908// ASIMD store, 1 element, one lane, D
1909def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST1i(8|16|32|64)$")>;
1910def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
1911
1912// ASIMD store, 2 element, multiple, D-form, B/H/S
1913def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2Twov(8b|4h|2s)$")>;
1914def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1915
1916// ASIMD store, 2 element, multiple, Q-form, B/H/S
1917// ASIMD store, 2 element, multiple, Q-form, D
1918def : InstRW<[V2Write_4cyc_2L01_4V01],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1919def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1920
1921// ASIMD store, 2 element, one lane, B/H/S
1922// ASIMD store, 2 element, one lane, D
1923def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2i(8|16|32|64)$")>;
1924def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
1925
1926// ASIMD store, 3 element, multiple, D-form, B/H/S
1927def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3Threev(8b|4h|2s)$")>;
1928def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1929
1930// ASIMD store, 3 element, multiple, Q-form, B/H/S
1931// ASIMD store, 3 element, multiple, Q-form, D
1932def : InstRW<[V2Write_6cyc_3L01_6V01],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1933def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1934
1935// ASIMD store, 3 element, one lane, B/H
1936// ASIMD store, 3 element, one lane, S
1937// ASIMD store, 3 element, one lane, D
1938def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3i(8|16|32|64)$")>;
1939def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
1940
1941// ASIMD store, 4 element, multiple, D-form, B/H/S
1942def : InstRW<[V2Write_6cyc_2L01_6V01],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1943def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1944
1945// ASIMD store, 4 element, multiple, Q-form, B/H/S
1946def : InstRW<[V2Write_7cyc_4L01_12V01],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1947def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1948
1949// ASIMD store, 4 element, multiple, Q-form, D
1950def : InstRW<[V2Write_5cyc_4L01_8V01],           (instregex "ST4Fourv(2d)$")>;
1951def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>;
1952
1953// ASIMD store, 4 element, one lane, B/H/S
1954def : InstRW<[V2Write_6cyc_1L01_3V01],           (instregex "ST4i(8|16|32)$")>;
1955def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>;
1956
1957// ASIMD store, 4 element, one lane, D
1958def : InstRW<[V2Write_4cyc_2L01_4V01],            (instregex "ST4i(64)$")>;
1959def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01],  (instregex "ST4i(64)_POST$")>;
1960
1961// Cryptography extensions
1962// -----------------------------------------------------------------------------
1963
1964// Crypto AES ops
1965def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1966
1967// Crypto polynomial (64x64) multiply long
1968def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>;
1969
1970// Crypto SHA1 hash acceleration op
1971// Crypto SHA1 schedule acceleration ops
1972def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1973
1974// Crypto SHA1 hash acceleration ops
1975// Crypto SHA256 hash acceleration ops
1976def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1977
1978// Crypto SHA256 schedule acceleration ops
1979def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1980
1981// Crypto SHA512 hash acceleration ops
1982def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1983
1984// Crypto SHA3 ops
1985def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1986
1987// Crypto SM3 ops
1988def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1989                                            "^SM3TT[12][AB]$")>;
1990
1991// Crypto SM4 ops
1992def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1993
1994// CRC
1995// -----------------------------------------------------------------------------
1996
1997def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
1998
1999// SVE Predicate instructions
2000// -----------------------------------------------------------------------------
2001
2002// Loop control, based on predicate
2003def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
2004                                           BRKB_PPmP, BRKB_PPzP)>;
2005
2006// Loop control, based on predicate and flag setting
2007def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
2008
2009// Loop control, propagating
2010def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
2011                                            BRKPB_PPzPP)>;
2012
2013// Loop control, propagating and flag setting
2014def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
2015                                               BRKPBS_PPzPP)>;
2016
2017// Loop control, based on GPR
2018def : InstRW<[V2Write_3cyc_2M],
2019             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
2020def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
2021
2022// Loop terminate
2023def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
2024
2025// Predicate counting scalar
2026def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
2027def : InstRW<[V2Write_2cyc_1M],
2028             (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
2029                        "^SQ(DEC|INC)[BHWD]_XPiWdI",
2030                        "^UQ(DEC|INC)[BHWD]_WPiI")>;
2031
2032// Predicate counting scalar, ALL, {1,2,4}
2033def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
2034
2035// Predicate counting scalar, active predicate
2036def : InstRW<[V2Write_2cyc_1M],
2037             (instregex "^CNTP_XPP_[BHSD]",
2038                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
2039                        "^(UQDEC|UQINC)P_WP_[BHSD]",
2040                        "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
2041
2042// Predicate counting vector, active predicate
2043def : InstRW<[V2Write_7cyc_1M_1M0_1V],
2044             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
2045
2046// Predicate logical
2047def : InstRW<[V2Write_1or2cyc_1M0],
2048             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
2049
2050// Predicate logical, flag setting
2051def : InstRW<[V2Write_1or2cyc_1M0_1M],
2052             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
2053
2054// Predicate reverse
2055def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
2056
2057// Predicate select
2058def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
2059
2060// Predicate set
2061def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
2062
2063// Predicate set/initialize, set flags
2064def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
2065
2066// Predicate find first/next
2067def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
2068
2069// Predicate test
2070def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
2071
2072// Predicate transpose
2073def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
2074
2075// Predicate unpack and widen
2076def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
2077
2078// Predicate zip/unzip
2079def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
2080
2081// SVE integer instructions
2082// -----------------------------------------------------------------------------
2083
2084// Arithmetic, absolute diff
2085def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
2086                                           "^[SU]ABD_ZPZZ_[BHSD]")>;
2087
2088// Arithmetic, absolute diff accum
2089def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
2090
2091// Arithmetic, absolute diff accum long
2092def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
2093
2094// Arithmetic, absolute diff long
2095def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
2096
2097// Arithmetic, basic
2098def : InstRW<[V2Write_2cyc_1V],
2099             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2100                        "^(ADD|SUB)_ZZZ_[BHSD]",
2101                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
2102                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
2103                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
2104                        "^ADR_LSL_ZZZ_[SD]_[0123]",
2105                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
2106                        "^SADDLBT_ZZZ_[HSD]",
2107                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
2108                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
2109
2110// Arithmetic, complex
2111def : InstRW<[V2Write_2cyc_1V],
2112             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
2113                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2114                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
2115                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
2116                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
2117                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
2118
2119// Arithmetic, large integer
2120def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
2121
2122// Arithmetic, pairwise add
2123def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
2124
2125// Arithmetic, pairwise add and accum long
2126def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
2127             (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
2128
2129// Arithmetic, shift
2130def : InstRW<[V2Write_2cyc_1V13],
2131             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
2132                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
2133                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
2134                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
2135                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
2136                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
2137                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
2138
2139// Arithmetic, shift and accumulate
2140def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
2141
2142// Arithmetic, shift by immediate
2143def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
2144                                             "^[SU]SHLL[BT]_ZZI_[HSD]")>;
2145
2146// Arithmetic, shift by immediate and insert
2147def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
2148
2149// Arithmetic, shift complex
2150def : InstRW<[V2Write_4cyc_1V13],
2151             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
2152                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
2153                        "^[SU]QR?SHL_ZPZZ_[BHSD]",
2154                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
2155                        "^SQSHRU?N[BT]_ZZI_[BHS]",
2156                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
2157
2158// Arithmetic, shift right for divide
2159def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
2160
2161// Arithmetic, shift rounding
2162def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
2163                                             "^[SU]RSHL_ZPZZ_[BHSD]",
2164                                             "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
2165
2166// Bit manipulation
2167def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
2168
2169// Bitwise select
2170def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
2171
2172// Count/reverse bits
2173def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
2174
2175// Broadcast logical bitmask immediate to vector
2176def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
2177
2178// Compare and set flags
2179def : InstRW<[V2Write_4or5cyc_1V0_1M0],
2180             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
2181                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
2182
2183// Complex add
2184def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
2185
2186// Complex dot product 8-bit element
2187def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
2188
2189// Complex dot product 16-bit element
2190def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
2191
2192// Complex multiply-add B, H, S element size
2193def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
2194                                                      "^CMLA_ZZZI_[HS]")>;
2195
2196// Complex multiply-add D element size
2197def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
2198
2199// Conditional extract operations, scalar form
2200def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
2201
2202// Conditional extract operations, SIMD&FP scalar and vector forms
2203def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
2204                                            "^COMPACT_ZPZ_[SD]",
2205                                            "^SPLICE_ZPZZ?_[BHSD]")>;
2206
2207// Convert to floating point, 64b to float or convert to double
2208def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
2209                                             "^[SU]CVTF_ZPmZ_StoD")>;
2210
2211// Convert to floating point, 32b to single or half
2212def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
2213
2214// Convert to floating point, 16b to half
2215def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
2216
2217// Copy, scalar
2218def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
2219
2220// Copy, scalar SIMD&FP or imm
2221def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
2222                                           "^CPY_ZPzI_[BHSD]")>;
2223
2224// Divides, 32 bit
2225def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
2226                                             "^[SU]DIV_ZPZZ_S")>;
2227
2228// Divides, 64 bit
2229def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
2230                                             "^[SU]DIV_ZPZZ_D")>;
2231
2232// Dot product, 8 bit
2233def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
2234
2235// Dot product, 8 bit, using signed and unsigned integers
2236def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
2237
2238// Dot product, 16 bit
2239def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
2240
2241// Duplicate, immediate and indexed form
2242def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
2243                                           "^DUP_ZZI_[BHSDQ]")>;
2244
2245// Duplicate, scalar form
2246def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
2247
2248// Extend, sign or zero
2249def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
2250                                             "^[SU]XTH_ZPmZ_[SD]",
2251                                             "^[SU]XTW_ZPmZ_[D]")>;
2252
2253// Extract
2254def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
2255
2256// Extract narrow saturating
2257def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
2258                                             "^SQXTUN[BT]_ZZ_[BHS]")>;
2259
2260// Extract/insert operation, SIMD and FP scalar form
2261def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
2262                                            "^INSR_ZV_[BHSD]")>;
2263
2264// Extract/insert operation, scalar
2265def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
2266                                                "^INSR_ZR_[BHSD]")>;
2267
2268// Histogram operations
2269def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
2270                                           "^HISTSEG_ZZZ")>;
2271
2272// Horizontal operations, B, H, S form, immediate operands only
2273def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
2274
2275// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
2276// operands only / immediate, scalar operands
2277def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
2278
2279// Horizontal operations, D form, immediate operands only
2280def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
2281
2282// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
2283// only / immediate, scalar operands
2284def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
2285
2286// Logical
2287def : InstRW<[V2Write_2cyc_1V],
2288             (instregex "^(AND|EOR|ORR)_ZI",
2289                        "^(AND|BIC|EOR|ORR)_ZZZ",
2290                        "^EOR(BT|TB)_ZZZ_[BHSD]",
2291                        "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
2292                        "^NOT_ZPmZ_[BHSD]")>;
2293
2294// Max/min, basic and pairwise
2295def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
2296                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
2297                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
2298
2299// Matching operations
2300// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
2301// latency for this instruction is 4 cycles.
2302def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
2303
2304// Matrix multiply-accumulate
2305def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
2306
2307// Move prefix
2308def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
2309                                           "^MOVPRFX_ZZ")>;
2310
2311// Multiply, B, H, S element size
2312def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
2313                                             "^MUL_ZPZZ_[BHS]",
2314                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
2315                                             "^[SU]MULH_ZPZZ_[BHS]")>;
2316
2317// Multiply, D element size
2318def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
2319                                             "^MUL_ZPZZ_D",
2320                                             "^[SU]MULH_(ZPmZ|ZZZ)_D",
2321                                             "^[SU]MULH_ZPZZ_D")>;
2322
2323// Multiply long
2324def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
2325                                             "^[SU]MULL[BT]_ZZZ_[HSD]")>;
2326
2327// Multiply accumulate, B, H, S element size
2328def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
2329             (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
2330def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
2331             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
2332
2333// Multiply accumulate, D element size
2334def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
2335             (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
2336def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
2337             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
2338
2339// Multiply accumulate long
2340def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
2341                                                "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
2342
2343// Multiply accumulate saturating doubling long regular
2344def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
2345             (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
2346                        "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
2347
2348// Multiply saturating doubling high, B, H, S element size
2349def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
2350                                             "^SQDMULH_ZZZI_[HS]")>;
2351
2352// Multiply saturating doubling high, D element size
2353def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
2354
2355// Multiply saturating doubling long
2356def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
2357                                             "^SQDMULL[BT]_ZZZI_[SD]")>;
2358
2359// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
2360// element size
2361def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
2362                                                     "^SQRDCMLAH_ZZZ_[BHS]",
2363                                                     "^SQRDML[AS]H_ZZZI_[HS]",
2364                                                     "^SQRDCMLAH_ZZZI_[HS]")>;
2365
2366// Multiply saturating rounding doubling regular/complex accumulate, D element
2367// size
2368def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
2369                                                   "^SQRDCMLAH_ZZZ_D")>;
2370
2371// Multiply saturating rounding doubling regular/complex, B, H, S element size
2372def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
2373                                             "^SQRDMULH_ZZZI_[HS]")>;
2374
2375// Multiply saturating rounding doubling regular/complex, D element size
2376def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
2377
2378// Multiply/multiply long, (8x8) polynomial
2379def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
2380                                             "^PMULL[BT]_ZZZ_[HDQ]")>;
2381
2382// Predicate counting vector
2383def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
2384
2385// Reciprocal estimate
2386def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
2387
2388// Reduction, arithmetic, B form
2389def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
2390
2391// Reduction, arithmetic, H form
2392def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
2393
2394// Reduction, arithmetic, S form
2395def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
2396
2397// Reduction, arithmetic, D form
2398def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
2399
2400// Reduction, logical
2401def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
2402
2403// Reverse, vector
2404def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
2405                                           "^REVB_ZPmZ_[HSD]",
2406                                           "^REVH_ZPmZ_[SD]",
2407                                           "^REVW_ZPmZ_D")>;
2408
2409// Select, vector form
2410def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
2411
2412// Table lookup
2413def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
2414
2415// Table lookup extension
2416def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
2417
2418// Transpose, vector form
2419def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
2420
2421// Unpack and extend
2422def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
2423
2424// Zip/unzip
2425def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
2426
2427// SVE floating-point instructions
2428// -----------------------------------------------------------------------------
2429
2430// Floating point absolute value/difference
2431def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
2432                                           "^FABD_ZPZZ_[HSD]",
2433                                           "^FABS_ZPmZ_[HSD]")>;
2434
2435// Floating point arithmetic
2436def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
2437                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
2438                                           "^FADDP_ZPmZZ_[HSD]",
2439                                           "^FNEG_ZPmZ_[HSD]",
2440                                           "^FSUBR_ZPm[IZ]_[HSD]",
2441                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
2442
2443// Floating point associative add, F16
2444def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
2445
2446// Floating point associative add, F32
2447def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
2448
2449// Floating point associative add, F64
2450def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
2451
2452// Floating point compare
2453def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
2454                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
2455                                            "^FCM(LE|LT)_PPzZ0_[HSD]",
2456                                            "^FCMUO_PPzZZ_[HSD]")>;
2457
2458// Floating point complex add
2459def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
2460
2461// Floating point complex multiply add
2462def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2463def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
2464
2465// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
2466def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
2467                                             "^FCVTLT_ZPmZ_HtoS",
2468                                             "^FCVTNT_ZPmZ_StoH")>;
2469
2470// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
2471// or F64 to F16)
2472def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
2473                                             "^FCVTLT_ZPmZ_StoD",
2474                                             "^FCVTNT_ZPmZ_DtoS")>;
2475
2476// Floating point convert, round to odd
2477def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
2478
2479// Floating point base2 log, F16
2480def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
2481
2482// Floating point base2 log, F32
2483def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
2484
2485// Floating point base2 log, F64
2486def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
2487
2488// Floating point convert to integer, F16
2489def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
2490
2491// Floating point convert to integer, F32
2492def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
2493
2494// Floating point convert to integer, F64
2495def : InstRW<[V2Write_3cyc_1V02],
2496             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
2497
2498// Floating point copy
2499def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
2500                                           "^FDUP_ZI_[HSD]")>;
2501
2502// Floating point divide, F16
2503def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
2504
2505// Floating point divide, F32
2506def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
2507
2508// Floating point divide, F64
2509def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
2510
2511// Floating point min/max pairwise
2512def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2513
2514// Floating point min/max
2515def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2516                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2517
2518// Floating point multiply
2519def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2520                                           "^FMULX_ZPZZ_[HSD]",
2521                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2522                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
2523
2524// Floating point multiply accumulate
2525def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
2526             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2527                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2528def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
2529             (instregex "^FML[AS]_ZZZI_[HSD]",
2530                        "^FN?ML[AS]_ZPZZZ_[HSD]")>;
2531
2532// Floating point multiply add/sub accumulate long
2533def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
2534
2535// Floating point reciprocal estimate, F16
2536def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2537
2538// Floating point reciprocal estimate, F32
2539def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2540
2541// Floating point reciprocal estimate, F64
2542def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2543
2544// Floating point reciprocal step
2545def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
2546
2547// Floating point reduction, F16
2548def : InstRW<[V2Write_8cyc_4V],
2549             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
2550
2551// Floating point reduction, F32
2552def : InstRW<[V2Write_6cyc_3V],
2553             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
2554
2555// Floating point reduction, F64
2556def : InstRW<[V2Write_4cyc_2V],
2557             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
2558
2559// Floating point round to integral, F16
2560def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2561
2562// Floating point round to integral, F32
2563def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2564
2565// Floating point round to integral, F64
2566def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2567
2568// Floating point square root, F16
2569def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
2570
2571// Floating point square root, F32
2572def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
2573
2574// Floating point square root, F64
2575def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
2576
2577// Floating point trigonometric exponentiation
2578def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
2579
2580// Floating point trigonometric multiply add
2581def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
2582
2583// Floating point trigonometric, miscellaneous
2584def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
2585
2586// SVE BFloat16 (BF16) instructions
2587// -----------------------------------------------------------------------------
2588
2589// Convert, F32 to BF16
2590def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2591
2592// Dot product
2593def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2594
2595// Matrix multiply accumulate
2596def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
2597
2598// Multiply accumulate long
2599def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
2600
2601// SVE Load instructions
2602// -----------------------------------------------------------------------------
2603
2604// Load vector
2605def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
2606
2607// Load predicate
2608def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2609
2610// Contiguous load, scalar + imm
2611def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2612                                           "^LD1S?B_[HSD]_IMM$",
2613                                           "^LD1S?H_[SD]_IMM$",
2614                                           "^LD1S?W_D_IMM$" )>;
2615// Contiguous load, scalar + scalar
2616def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
2617                                           "^LD1S?B_[HSD]$",
2618                                           "^LD1S?H_[SD]$",
2619                                           "^LD1S?W_D$" )>;
2620
2621// Contiguous load broadcast, scalar + imm
2622def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2623                                           "^LD1RS?B_[HSD]_IMM$",
2624                                           "^LD1RS?H_[SD]_IMM$",
2625                                           "^LD1RW_D_IMM$",
2626                                           "^LD1RSW_IMM$",
2627                                           "^LD1RQ_[BHWD]_IMM$")>;
2628
2629// Contiguous load broadcast, scalar + scalar
2630def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2631
2632// Non temporal load, scalar + imm
2633// Non temporal load, scalar + scalar
2634def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
2635
2636// Non temporal gather load, vector + scalar 32-bit element size
2637def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2638                                              "^LDNT1S[BH]_ZZR_S_REAL$")>;
2639
2640// Non temporal gather load, vector + scalar 64-bit element size
2641def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2642def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2643
2644// Contiguous first faulting load, scalar + scalar
2645def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2646                                              "^LDFF1S?B_[HSD]_REAL$",
2647                                              "^LDFF1S?H_[SD]_REAL$",
2648                                              "^LDFF1S?W_D_REAL$")>;
2649
2650// Contiguous non faulting load, scalar + imm
2651def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2652                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
2653                                           "^LDNF1S?H_[SD]_IMM_REAL$",
2654                                           "^LDNF1S?W_D_IMM_REAL$")>;
2655
2656// Contiguous Load two structures to two vectors, scalar + imm
2657def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
2658
2659// Contiguous Load two structures to two vectors, scalar + scalar
2660def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
2661
2662// Contiguous Load three structures to three vectors, scalar + imm
2663def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
2664
2665// Contiguous Load three structures to three vectors, scalar + scalar
2666def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
2667
2668// Contiguous Load four structures to four vectors, scalar + imm
2669def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
2670
2671// Contiguous Load four structures to four vectors, scalar + scalar
2672def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
2673
2674// Gather load, vector + imm, 32-bit element size
2675def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2676                                              "^GLD(FF)?1W_IMM_REAL$")>;
2677
2678// Gather load, vector + imm, 64-bit element size
2679def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2680                                              "^GLD(FF)?1D_IMM_REAL$")>;
2681
2682// Gather load, 32-bit scaled offset
2683def : InstRW<[V2Write_10cyc_1L_8V],
2684             (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$",
2685                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2686
2687// Gather load, 64-bit scaled offset
2688// NOTE: These instructions are not specified in the SOG.
2689def : InstRW<[V2Write_10cyc_1L_4V],
2690             (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$",
2691                        "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>;
2692
2693// Gather load, 32-bit unpacked unscaled offset
2694def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2695                                              "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2696
2697// Gather load, 64-bit unpacked unscaled offset
2698// NOTE: These instructions are not specified in the SOG.
2699def : InstRW<[V2Write_9cyc_1L_2V],
2700             (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$",
2701                        "^GLD(FF)?1D_([SU]XTW_)?REAL$")>;
2702
2703// SVE Store instructions
2704// -----------------------------------------------------------------------------
2705
2706// Store from predicate reg
2707def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>;
2708
2709// Store from vector reg
2710def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>;
2711
2712// Contiguous store, scalar + imm
2713def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
2714                                                  "^ST1B_[HSD]_IMM$",
2715                                                  "^ST1H_[SD]_IMM$",
2716                                                  "^ST1W_D_IMM$")>;
2717
2718// Contiguous store, scalar + scalar
2719def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
2720def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$",
2721                                                  "^ST1B_[HSD]$",
2722                                                  "^ST1W_D$")>;
2723
2724// Contiguous store two structures from two vectors, scalar + imm
2725def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
2726
2727// Contiguous store two structures from two vectors, scalar + scalar
2728def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>;
2729def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>;
2730
2731// Contiguous store three structures from three vectors, scalar + imm
2732def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
2733
2734// Contiguous store three structures from three vectors, scalar + scalar
2735def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
2736
2737// Contiguous store four structures from four vectors, scalar + imm
2738def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
2739
2740// Contiguous store four structures from four vectors, scalar + scalar
2741def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
2742
2743// Non temporal store, scalar + imm
2744def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2745
2746// Non temporal store, scalar + scalar
2747def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2748def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2749
2750// Scatter non temporal store, vector + scalar 32-bit element size
2751def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
2752
2753// Scatter non temporal store, vector + scalar 64-bit element size
2754def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
2755
2756// Scatter store vector + imm 32-bit element size
2757def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
2758                                                  "^SST1W_IMM$")>;
2759
2760// Scatter store vector + imm 64-bit element size
2761def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
2762                                                  "^SST1D_IMM$")>;
2763
2764// Scatter store, 32-bit scaled offset
2765def : InstRW<[V2Write_4cyc_4L01_4V01],
2766             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2767
2768// Scatter store, 32-bit unpacked unscaled offset
2769def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
2770                                                  "^SST1D_[SU]XTW$")>;
2771
2772// Scatter store, 32-bit unpacked scaled offset
2773def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2774                                                  "^SST1D_[SU]XTW_SCALED$")>;
2775
2776// Scatter store, 32-bit unscaled offset
2777def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
2778                                                  "^SST1W_[SU]XTW$")>;
2779
2780// Scatter store, 64-bit scaled offset
2781def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
2782                                                  "^SST1D_SCALED$")>;
2783
2784// Scatter store, 64-bit unscaled offset
2785def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
2786                                                  "^SST1D$")>;
2787
2788// SVE Miscellaneous instructions
2789// -----------------------------------------------------------------------------
2790
2791// Read first fault register, unpredicated
2792def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2793
2794// Read first fault register, predicated
2795def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2796
2797// Read first fault register and set flags
2798def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2799
2800// Set first fault register
2801// Write to first fault register
2802def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2803
2804// Prefetch
2805// NOTE: This is not specified in the SOG.
2806def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2807
2808// SVE Cryptographic instructions
2809// -----------------------------------------------------------------------------
2810
2811// Crypto AES ops
2812def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2813                                           "^AESI?MC_ZZ_B$")>;
2814
2815// Crypto SHA3 ops
2816def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2817                                            "^RAX1_ZZZ_D$",
2818                                            "^XAR_ZZZI_[BHSD]$")>;
2819
2820// Crypto SM4 ops
2821def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2822
2823}
2824