xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (revision b59017c5cad90d0f09a59e68c00457b7faf93e7c)
1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V2 processors.
10// All information is taken from the V2 Software Optimisation guide:
11//
12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
13//
14//===----------------------------------------------------------------------===//
15
16def NeoverseV2Model : SchedMachineModel {
17  let IssueWidth            =  16; // Micro-ops dispatched at a time.
18  let MicroOpBufferSize     = 320; // Entries in micro-op re-order buffer.
19  let LoadLatency           =   4; // Optimistic load latency.
20  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N2.
21  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
22  let CompleteModel         =   1;
23
24  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
25                                                    [HasSVE2p1, HasCPA,
26                                                    HasCSSC]);
27}
28
29//===----------------------------------------------------------------------===//
30// Define each kind of processor resource and number available on Neoverse V2.
31// Instructions are first fetched and then decoded into internal macro-ops
32// (MOPs). From there, the MOPs proceed through register renaming and dispatch
33// stages. A MOP can be split into two micro-ops further down the pipeline
34// after the decode stage. Once dispatched, micro-ops wait for their operands
35// and issue out-of-order to one of seventeen issue pipelines. Each issue
36// pipeline can accept one micro-op per cycle.
37
38let SchedModel = NeoverseV2Model in {
39
40// Define the (17) issue ports.
41def V2UnitB   : ProcResource<2>;  // Branch 0/1
42def V2UnitS0  : ProcResource<1>;  // Integer single-cycle 0
43def V2UnitS1  : ProcResource<1>;  // Integer single-cycle 1
44def V2UnitS2  : ProcResource<1>;  // Integer single-cycle 2
45def V2UnitS3  : ProcResource<1>;  // Integer single-cycle 3
46def V2UnitM0  : ProcResource<1>;  // Integer single/multicycle 0
47def V2UnitM1  : ProcResource<1>;  // Integer single/multicycle 1
48def V2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
49def V2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
50def V2UnitV2  : ProcResource<1>;  // FP/ASIMD 2
51def V2UnitV3  : ProcResource<1>;  // FP/ASIMD 3
52def V2UnitL01 : ProcResource<2>;  // Load/Store 0/1
53def V2UnitL2  : ProcResource<1>;  // Load 2
54def V2UnitD   : ProcResource<2>;  // Store data 0/1
55
56def V2UnitR   : ProcResGroup<[V2UnitS0, V2UnitS1]>;  // Integer single-cycle 0/1
57def V2UnitS   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>;  // Integer single-cycle 0/1/2/3
58def V2UnitF   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1 and single/multicycle 0/1
59def V2UnitI   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
60def V2UnitM   : ProcResGroup<[V2UnitM0, V2UnitM1]>;  // Integer single/multicycle 0/1
61def V2UnitL   : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
62def V2UnitV   : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>;  // FP/ASIMD 0/1/2/3
63def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>;  // FP/ASIMD 0/1
64def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>;  // FP/ASIMD 0/2
65def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>;  // FP/ASIMD 1/3
66def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>;  // FP/ASIMD 2/3
67
68// Define commonly used read types.
69
70// No forwarding is provided for these types.
71def : ReadAdvance<ReadI,       0>;
72def : ReadAdvance<ReadISReg,   0>;
73def : ReadAdvance<ReadIEReg,   0>;
74def : ReadAdvance<ReadIM,      0>;
75def : ReadAdvance<ReadIMA,     0>;
76def : ReadAdvance<ReadID,      0>;
77def : ReadAdvance<ReadExtrHi,  0>;
78def : ReadAdvance<ReadAdrBase, 0>;
79def : ReadAdvance<ReadST,      0>;
80def : ReadAdvance<ReadVLD,     0>;
81
82// NOTE: Copied from N2.
83def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
84def : WriteRes<WriteBarrier, []> { let Latency = 1; }
85def : WriteRes<WriteHint,    []> { let Latency = 1; }
86def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
87
88//===----------------------------------------------------------------------===//
89// Define customized scheduler read/write types specific to the Neoverse V2.
90
91//===----------------------------------------------------------------------===//
92
93// Define generic 0 micro-op types
94def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
95
96// Define generic 1 micro-op types
97
98def V2Write_1cyc_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
99def V2Write_1cyc_1F    : SchedWriteRes<[V2UnitF]>   { let Latency = 1; }
100def V2Write_1cyc_1I    : SchedWriteRes<[V2UnitI]>   { let Latency = 1; }
101def V2Write_1cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 1; }
102def V2Write_1cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 1; }
103def V2Write_1cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
104def V2Write_2cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 2; }
105def V2Write_3cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 3; }
106def V2Write_2cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 2; }
107def V2Write_3cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 3; }
108def V2Write_5cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 5; }
109def V2Write_12cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 12;
110                                                      let ReleaseAtCycles = [12]; }
111def V2Write_20cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 20;
112                                                      let ReleaseAtCycles = [20]; }
113def V2Write_4cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 4; }
114def V2Write_6cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 6; }
115def V2Write_2cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 2; }
116def V2Write_2cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 2; }
117def V2Write_2cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
118def V2Write_2cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
119def V2Write_3cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
120def V2Write_3cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
121                                                      let ReleaseAtCycles = [2]; }
122def V2Write_3cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
123def V2Write_4cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 4; }
124def V2Write_5cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 5; }
125def V2Write_6cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 6; }
126def V2Write_12cyc_1V   : SchedWriteRes<[V2UnitV]>   { let Latency = 12; }
127def V2Write_3cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 3; }
128def V2Write_3cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
129def V2Write_4cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 4; }
130def V2Write_4cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
131def V2Write_7cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 7;
132                                                      let ReleaseAtCycles = [7]; }
133def V2Write_7cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
134                                                      let ReleaseAtCycles = [2]; }
135def V2Write_9cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 9; }
136def V2Write_9cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
137                                                      let ReleaseAtCycles = [2]; }
138def V2Write_10cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; }
139def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
140                                                      let ReleaseAtCycles = [2]; }
141def V2Write_12cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 12;
142                                                      let ReleaseAtCycles = [11]; }
143def V2Write_13cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; }
144def V2Write_15cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 15; }
145def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
146                                                      let ReleaseAtCycles = [8]; }
147def V2Write_16cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; }
148def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
149                                                      let ReleaseAtCycles = [8]; }
150def V2Write_20cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 20;
151                                                      let ReleaseAtCycles = [20]; }
152def V2Write_2cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 2; }
153def V2Write_2cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
154def V2Write_3cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 3; }
155def V2Write_4cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 4; }
156def V2Write_4cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
157def V2Write_6cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 6; }
158def V2Write_10cyc_1V1  : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; }
159def V2Write_6cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
160
161//===----------------------------------------------------------------------===//
162// Define generic 2 micro-op types
163
164def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
165  let Latency     = 1;
166  let NumMicroOps = 2;
167}
168
169def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
170  let Latency     = 6;
171  let NumMicroOps = 2;
172}
173
174def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
175  let Latency     = 9;
176  let NumMicroOps = 2;
177}
178
179def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
180  let Latency     = 3;
181  let NumMicroOps = 2;
182}
183
184def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
185  let Latency     = 1;
186  let NumMicroOps = 2;
187}
188
189def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
190  let Latency     = 3;
191  let NumMicroOps = 2;
192}
193
194def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
195  let Latency     = 4;
196  let NumMicroOps = 2;
197}
198
199def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
200  let Latency     = 5;
201  let NumMicroOps = 2;
202}
203
204def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
205  let Latency     = 6;
206  let NumMicroOps = 2;
207}
208
209def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
210  let Latency     = 7;
211  let NumMicroOps = 2;
212}
213
214def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
215  let Latency     = 7;
216  let NumMicroOps = 2;
217}
218
219def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
220  let Latency     = 1;
221  let NumMicroOps = 2;
222}
223
224def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
225  let Latency     = 5;
226  let NumMicroOps = 2;
227}
228
229def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
230  let Latency     = 2;
231  let NumMicroOps = 2;
232}
233
234def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
235  let Latency     = 2;
236  let NumMicroOps = 2;
237}
238
239def V2Write_2cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
240  let Latency = 2;
241  let NumMicroOps = 2;
242}
243
244def V2Write_4cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
245  let Latency = 4;
246  let NumMicroOps = 2;
247}
248
249def V2Write_4cyc_1L01_1V01  : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
250  let Latency = 4;
251  let NumMicroOps = 2;
252}
253
254def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
255  let Latency     = 4;
256  let NumMicroOps = 2;
257}
258
259def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
260  let Latency     = 4;
261  let NumMicroOps = 2;
262}
263
264def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
265  let Latency     = 4;
266  let NumMicroOps = 2;
267}
268
269def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
270  let Latency     = 4;
271  let NumMicroOps = 2;
272}
273
274def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
275  let Latency     = 6;
276  let NumMicroOps = 2;
277}
278
279def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
280  let Latency     = 6;
281  let NumMicroOps = 2;
282}
283
284def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
285  let Latency     = 8;
286  let NumMicroOps = 2;
287}
288
289def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
290  let Latency     = 4;
291  let NumMicroOps = 2;
292}
293
294def V2Write_3cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
295  let Latency     = 3;
296  let NumMicroOps = 2;
297}
298
299def V2Write_4cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
300  let Latency     = 4;
301  let NumMicroOps = 2;
302}
303
304def V2Write_1cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
305  let Latency     = 1;
306  let NumMicroOps = 2;
307}
308
309def V2Write_2cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
310  let Latency     = 2;
311  let NumMicroOps = 2;
312}
313
314def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
315  let Latency     = 6;
316  let NumMicroOps = 2;
317}
318
319def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
320  let Latency     = 4;
321  let NumMicroOps = 2;
322}
323
324def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
325  let Latency     = 5;
326  let NumMicroOps = 2;
327}
328
329def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
330  let Latency     = 5;
331  let NumMicroOps = 2;
332}
333
334def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
335  let Latency     = 5;
336  let NumMicroOps = 2;
337}
338
339def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
340  let Latency     = 6;
341  let NumMicroOps = 2;
342}
343
344def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
345  let Latency     = 7;
346  let NumMicroOps = 2;
347}
348
349def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
350  let Latency     = 2;
351  let NumMicroOps = 2;
352}
353
354def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
355  let Latency     = 3;
356  let NumMicroOps = 2;
357}
358
359def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
360  let Latency     = 6;
361  let NumMicroOps = 2;
362}
363
364def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
365  let Latency     = 6;
366  let NumMicroOps = 2;
367}
368
369def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
370  let Latency     = 6;
371  let NumMicroOps = 2;
372}
373
374def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
375  let Latency     = 4;
376  let NumMicroOps = 2;
377}
378
379def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
380  let Latency     = 8;
381  let NumMicroOps = 2;
382}
383
384//===----------------------------------------------------------------------===//
385// Define generic 3 micro-op types
386
387def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
388  let Latency     = 1;
389  let NumMicroOps = 3;
390}
391
392def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
393  let Latency     = 2;
394  let NumMicroOps = 3;
395}
396
397def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
398  let Latency     = 2;
399  let NumMicroOps = 3;
400}
401
402def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
403  let Latency     = 4;
404  let NumMicroOps = 3;
405}
406
407def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
408  let Latency     = 9;
409  let NumMicroOps = 3;
410}
411
412def V2Write_4cyc_3V01  : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
413  let Latency = 4;
414  let NumMicroOps = 3;
415}
416
417def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
418  let Latency     = 7;
419  let NumMicroOps = 3;
420}
421
422def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
423  let Latency     = 2;
424  let NumMicroOps = 3;
425}
426
427def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
428  let Latency     = 2;
429  let NumMicroOps = 3;
430}
431
432def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
433  let Latency     = 6;
434  let NumMicroOps = 3;
435}
436
437def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
438  let Latency     = 6;
439  let NumMicroOps = 3;
440}
441
442def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
443  let Latency     = 8;
444  let NumMicroOps = 3;
445}
446
447//===----------------------------------------------------------------------===//
448// Define generic 4 micro-op types
449
450def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
451                                               V2UnitI]> {
452  let Latency     = 2;
453  let NumMicroOps = 4;
454}
455
456def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
457                                            V2UnitV01, V2UnitV01]> {
458  let Latency     = 2;
459  let NumMicroOps = 4;
460}
461
462def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
463                                            V2UnitV01, V2UnitV01]> {
464  let Latency     = 4;
465  let NumMicroOps = 4;
466}
467
468def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
469  let Latency     = 5;
470  let NumMicroOps = 4;
471}
472
473def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
474                                         V2UnitV1]> {
475  let Latency     = 9;
476  let NumMicroOps = 4;
477}
478
479def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
480  let Latency     = 6;
481  let NumMicroOps = 4;
482}
483
484def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
485  let Latency     = 8;
486  let NumMicroOps = 4;
487}
488
489def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
490                                          V2UnitV13]> {
491  let Latency     = 6;
492  let NumMicroOps = 4;
493}
494
495def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
496                                          V2UnitV13]> {
497  let Latency     = 8;
498  let NumMicroOps = 4;
499}
500
501def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
502                                       V2UnitV02]> {
503  let Latency     = 6;
504  let NumMicroOps = 4;
505}
506
507def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
508  let Latency     = 6;
509  let NumMicroOps = 4;
510}
511
512def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
513  let Latency     = 8;
514  let NumMicroOps = 4;
515}
516
517def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
518  let Latency     = 9;
519  let NumMicroOps = 4;
520}
521
522def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
523                                          V2UnitV]> {
524  let Latency     = 2;
525  let NumMicroOps = 4;
526}
527
528def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
529                                          V2UnitV]> {
530  let Latency     = 4;
531  let NumMicroOps = 4;
532}
533
534def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
535                                          V2UnitV02]> {
536  let Latency     = 8;
537  let NumMicroOps = 4;
538}
539
540def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
541                                         V2UnitV1]> {
542  let Latency     = 8;
543  let NumMicroOps = 4;
544}
545
546def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
547                                         V2UnitM]> {
548  let Latency     = 4;
549  let NumMicroOps = 4;
550}
551
552def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
553                                         V2UnitM]> {
554  let Latency     = 5;
555  let NumMicroOps = 4;
556}
557
558def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
559  let Latency     = 6;
560  let NumMicroOps = 4;
561}
562
563def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
564  let Latency     = 7;
565  let NumMicroOps = 4;
566}
567
568def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
569                                            V2UnitV01]> {
570  let Latency     = 6;
571  let NumMicroOps = 4;
572}
573
574//===----------------------------------------------------------------------===//
575// Define generic 5 micro-op types
576
577def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
578                                               V2UnitI, V2UnitI]> {
579  let Latency     = 2;
580  let NumMicroOps = 5;
581}
582
583def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
584                                        V2UnitV]> {
585  let Latency     = 8;
586  let NumMicroOps = 5;
587}
588
589def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
590                                        V2UnitV]> {
591  let Latency     = 9;
592  let NumMicroOps = 5;
593}
594
595def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
596                                         V2UnitV]> {
597  let Latency     = 10;
598  let NumMicroOps = 5;
599}
600
601def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
602                                     V2UnitV]> {
603  let Latency     = 6;
604  let NumMicroOps = 5;
605}
606
607//===----------------------------------------------------------------------===//
608// Define generic 6 micro-op types
609
610def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
611                                        V2UnitV, V2UnitV, V2UnitV]> {
612  let Latency     = 8;
613  let NumMicroOps = 6;
614}
615
616def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
617                                        V2UnitV, V2UnitV, V2UnitV]> {
618  let Latency     = 9;
619  let NumMicroOps = 6;
620}
621
622def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
623                                        V2UnitV, V2UnitV, V2UnitV]> {
624  let Latency     = 9;
625  let NumMicroOps = 6;
626}
627
628def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
629                                           V2UnitV, V2UnitS, V2UnitS]> {
630  let Latency     = 9;
631  let NumMicroOps = 6;
632}
633
634def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
635                                          V2UnitV13, V2UnitV13, V2UnitV13]> {
636  let Latency     = 9;
637  let NumMicroOps = 6;
638}
639
640def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
641                                          V2UnitV, V2UnitV, V2UnitV]> {
642  let Latency     = 2;
643  let NumMicroOps = 6;
644}
645
646def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
647                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
648  let Latency     = 4;
649  let NumMicroOps = 6;
650}
651
652def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
653                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
654  let Latency     = 5;
655  let NumMicroOps = 6;
656}
657
658def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
659                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
660  let Latency     = 2;
661  let NumMicroOps = 6;
662}
663
664def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
665                                               V2UnitS, V2UnitV01, V2UnitV01]> {
666  let Latency     = 4;
667  let NumMicroOps = 6;
668}
669
670//===----------------------------------------------------------------------===//
671// Define generic 7 micro-op types
672
673def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
674                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
675  let Latency     = 8;
676  let NumMicroOps = 7;
677}
678
679//===----------------------------------------------------------------------===//
680// Define generic 8 micro-op types
681
682def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
683                                          V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
684                                          V2UnitV]> {
685  let Latency     = 2;
686  let NumMicroOps = 8;
687}
688
689def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
690                                            V2UnitL01, V2UnitV01, V2UnitV01,
691                                            V2UnitV01, V2UnitV01]> {
692  let Latency     = 2;
693  let NumMicroOps = 8;
694}
695
696def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
697                                            V2UnitL01, V2UnitV01, V2UnitV01,
698                                            V2UnitV01, V2UnitV01]> {
699  let Latency     = 4;
700  let NumMicroOps = 8;
701}
702
703def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
704                                            V2UnitV01, V2UnitV01, V2UnitV01,
705                                            V2UnitV01, V2UnitV01]> {
706  let Latency     = 6;
707  let NumMicroOps = 8;
708}
709
710def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
711                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
712  let Latency     = 8;
713  let NumMicroOps = 8;
714}
715
716//===----------------------------------------------------------------------===//
717// Define generic 9 micro-op types
718
719def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
720                                            V2UnitV01, V2UnitV01, V2UnitV01,
721                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
722  let Latency     = 6;
723  let NumMicroOps = 9;
724}
725
726def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
727                                         V2UnitV, V2UnitV, V2UnitV, V2UnitV,
728                                         V2UnitV]> {
729  let Latency     = 10;
730  let NumMicroOps = 9;
731}
732
733def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
734                                            V2UnitL, V2UnitL, V2UnitL,
735                                            V2UnitS, V2UnitS, V2UnitS]> {
736  let Latency     = 10;
737  let NumMicroOps = 9;
738}
739
740//===----------------------------------------------------------------------===//
741// Define generic 10 micro-op types
742
743def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
744                                        V2UnitL, V2UnitL, V2UnitV, V2UnitV,
745                                        V2UnitV, V2UnitV]> {
746  let Latency     = 9;
747  let NumMicroOps = 10;
748}
749
750//===----------------------------------------------------------------------===//
751// Define generic 12 micro-op types
752
753def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
754                                            V2UnitL01, V2UnitV01, V2UnitV01,
755                                            V2UnitV01, V2UnitV01, V2UnitV01,
756                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
757  let Latency     = 5;
758  let NumMicroOps = 12;
759}
760
761def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
762                                        V2UnitL, V2UnitV, V2UnitV,
763                                        V2UnitV, V2UnitV, V2UnitV,
764                                        V2UnitV, V2UnitV, V2UnitV]> {
765  let Latency     = 9;
766  let NumMicroOps = 12;
767}
768
769def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
770                                         V2UnitL, V2UnitV, V2UnitV,
771                                         V2UnitV, V2UnitV, V2UnitV,
772                                         V2UnitV, V2UnitV, V2UnitV]> {
773  let Latency     = 10;
774  let NumMicroOps = 12;
775}
776
777//===----------------------------------------------------------------------===//
778// Define generic 16 micro-op types
779
780def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
781                                             V2UnitL01, V2UnitV01, V2UnitV01,
782                                             V2UnitV01, V2UnitV01, V2UnitV01,
783                                             V2UnitV01, V2UnitV01, V2UnitV01,
784                                             V2UnitV01, V2UnitV01, V2UnitV01,
785                                             V2UnitV01]> {
786  let Latency     = 7;
787  let NumMicroOps = 16;
788}
789
790def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
791                                            V2UnitL, V2UnitV, V2UnitV,
792                                            V2UnitV, V2UnitV, V2UnitV,
793                                            V2UnitV, V2UnitV, V2UnitV,
794                                            V2UnitS, V2UnitS, V2UnitS,
795                                            V2UnitS]> {
796  let Latency     = 10;
797  let NumMicroOps = 16;
798}
799
800//===----------------------------------------------------------------------===//
801// Define generic 18 micro-op types
802
803def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
804                                            V2UnitL01, V2UnitL01, V2UnitL01,
805                                            V2UnitL01, V2UnitL01, V2UnitL01,
806                                            V2UnitV01, V2UnitV01, V2UnitV01,
807                                            V2UnitV01, V2UnitV01, V2UnitV01,
808                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
809  let Latency     = 7;
810  let NumMicroOps = 18;
811}
812
813//===----------------------------------------------------------------------===//
814// Define generic 27 micro-op types
815
816def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
817                                               V2UnitL01, V2UnitL01, V2UnitL01,
818                                               V2UnitL01, V2UnitL01, V2UnitL01,
819                                               V2UnitS, V2UnitS, V2UnitS,
820                                               V2UnitS, V2UnitS, V2UnitS,
821                                               V2UnitS, V2UnitS, V2UnitS,
822                                               V2UnitV01, V2UnitV01, V2UnitV01,
823                                               V2UnitV01, V2UnitV01, V2UnitV01,
824                                               V2UnitV01, V2UnitV01,
825                                               V2UnitV01]> {
826  let Latency     = 7;
827  let NumMicroOps = 27;
828}
829
830//===----------------------------------------------------------------------===//
831// Define generic 36 micro-op types
832
833def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
834                                               V2UnitL01, V2UnitL01, V2UnitL01,
835                                               V2UnitL01, V2UnitL01, V2UnitL01,
836                                               V2UnitL01, V2UnitL01, V2UnitL01,
837                                               V2UnitL01, V2UnitL01, V2UnitL01,
838                                               V2UnitL01, V2UnitL01, V2UnitL01,
839                                               V2UnitV01, V2UnitV01, V2UnitV01,
840                                               V2UnitV01, V2UnitV01, V2UnitV01,
841                                               V2UnitV01, V2UnitV01, V2UnitV01,
842                                               V2UnitV01, V2UnitV01, V2UnitV01,
843                                               V2UnitV01, V2UnitV01, V2UnitV01,
844                                               V2UnitV01, V2UnitV01,
845                                               V2UnitV01]> {
846  let Latency     = 11;
847  let NumMicroOps = 36;
848}
849
850//===----------------------------------------------------------------------===//
851// Define generic 54 micro-op types
852
853def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
854                                                   V2UnitL01, V2UnitL01,
855                                                   V2UnitL01, V2UnitL01,
856                                                   V2UnitL01, V2UnitL01,
857                                                   V2UnitL01, V2UnitL01,
858                                                   V2UnitL01, V2UnitL01,
859                                                   V2UnitL01, V2UnitL01,
860                                                   V2UnitL01, V2UnitL01,
861                                                   V2UnitL01, V2UnitL01,
862                                                   V2UnitS, V2UnitS, V2UnitS,
863                                                   V2UnitS, V2UnitS, V2UnitS,
864                                                   V2UnitS, V2UnitS, V2UnitS,
865                                                   V2UnitS, V2UnitS, V2UnitS,
866                                                   V2UnitS, V2UnitS, V2UnitS,
867                                                   V2UnitS, V2UnitS, V2UnitS,
868                                                   V2UnitV01, V2UnitV01,
869                                                   V2UnitV01, V2UnitV01,
870                                                   V2UnitV01, V2UnitV01,
871                                                   V2UnitV01, V2UnitV01,
872                                                   V2UnitV01, V2UnitV01,
873                                                   V2UnitV01, V2UnitV01,
874                                                   V2UnitV01, V2UnitV01,
875                                                   V2UnitV01, V2UnitV01,
876                                                   V2UnitV01, V2UnitV01]> {
877  let Latency     = 11;
878  let NumMicroOps = 54;
879}
880
881//===----------------------------------------------------------------------===//
882// Define predicate-controlled types
883
884def V2Write_ArithI : SchedWriteVariant<[
885                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1I]>,
886                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
887
888def V2Write_ArithF : SchedWriteVariant<[
889                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1F]>,
890                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
891
892def V2Write_Logical : SchedWriteVariant<[
893                        SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>,
894                        SchedVar<NoSchedPred,   [V2Write_2cyc_1M]>]>;
895
896def V2Write_Extr : SchedWriteVariant<[
897                     SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>,
898                     SchedVar<NoSchedPred,       [V2Write_3cyc_1I_1M]>]>;
899
900def V2Write_LdrHQ : SchedWriteVariant<[
901                      SchedVar<NeoverseHQForm,  [V2Write_7cyc_1I_1L]>,
902                      SchedVar<NoSchedPred,     [V2Write_6cyc_1L]>]>;
903
904def V2Write_StrHQ : SchedWriteVariant<[
905                      SchedVar<NeoverseHQForm,  [V2Write_2cyc_1L01_1V01_1I]>,
906                      SchedVar<NoSchedPred,     [V2Write_2cyc_1L01_1V01]>]>;
907
908def V2Write_0or1cyc_1I : SchedWriteVariant<[
909                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
910                      SchedVar<NoSchedPred,      [V2Write_1cyc_1I]>]>;
911
912def V2Write_0or2cyc_1V : SchedWriteVariant<[
913                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
914                      SchedVar<NoSchedPred,      [V2Write_2cyc_1V]>]>;
915
916def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
917                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
918                      SchedVar<NoSchedPred,      [V2Write_3cyc_1M0]>]>;
919
920def V2Write_2or3cyc_1M : SchedWriteVariant<[
921                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M]>,
922                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M]>]>;
923
924def V2Write_3or4cyc_2M : SchedWriteVariant<[
925                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_2M]>,
926                      SchedVar<NoSchedPred,     [V2Write_3cyc_2M]>]>;
927
928def V2Write_1or2cyc_1M0 : SchedWriteVariant<[
929                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0]>,
930                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0]>]>;
931
932def V2Write_2or3cyc_1M0 : SchedWriteVariant<[
933                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M0]>,
934                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M0]>]>;
935
936def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[
937                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0_1M]>,
938                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0_1M]>]>;
939
940def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[
941                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_1M0_1M]>,
942                      SchedVar<NoSchedPred,     [V2Write_3cyc_1M0_1M]>]>;
943
944def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[
945                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_2M0_2M]>,
946                      SchedVar<NoSchedPred,     [V2Write_4cyc_2M0_2M]>]>;
947
948def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[
949                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_1V0_1M0]>,
950                      SchedVar<NoSchedPred,     [V2Write_4cyc_1V0_1M0]>]>;
951
952def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[
953                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1V0_1M]>,
954                      SchedVar<NoSchedPred,     [V2Write_2cyc_1V0_1M]>]>;
955
956def V2Write_IncDec : SchedWriteVariant<[
957                      SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>,
958                      SchedVar<NoSchedPred,         [V2Write_2cyc_1M]>]>;
959
960//===----------------------------------------------------------------------===//
961// Define forwarded types
962
963// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
964// consumers of 64 bit multiply high operations?
965def V2Wr_IM   : SchedWriteRes<[V2UnitM]>  { let Latency = 2; }
966def V2Wr_IMA  : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
967def V2Wr_IMUL : SchedWriteVariant<[
968                  SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
969                  SchedVar<NoSchedPred,    [V2Wr_IMA]>]>;
970def V2Rd_IMA  : SchedReadAdvance<1, [V2Wr_IMA]>;
971
972def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
973def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
974
975def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
976def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
977
978def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
979def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
980
981def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
982def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
983
984def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
985def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
986
987def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
988def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
989
990def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
991def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
992
993def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
994def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
995
996def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
997def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
998
999def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1000def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
1001
1002def V2Wr_VFM  : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1003def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1004def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
1005
1006def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1007def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
1008
1009def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1010def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
1011def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1012def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
1013def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1014def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
1015
1016def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
1017def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
1018
1019def V2Wr_ZA  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1020def V2Rd_ZA  : SchedReadAdvance<3, [V2Wr_ZA]>;
1021def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1022def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
1023def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1024def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
1025
1026def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
1027def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
1028def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1029def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
1030
1031// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
1032// throughput to 1 in case of forwarding?
1033def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1034def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
1035def V2Wr_ZCMAD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1036def V2Rd_ZCMAD   : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
1037
1038def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1039def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
1040
1041def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
1042def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
1043def V2Wr_ZMAD  : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1044def V2Rd_ZMAD  : SchedReadAdvance<2, [V2Wr_ZMAD]>;
1045
1046def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1047def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
1048
1049def V2Wr_ZMASQL   : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1050def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1051def V2Wr_ZMASQD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1052def V2Rd_ZMASQ    : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
1053                                         V2Wr_ZMASQD]>;
1054
1055def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1056def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
1057
1058def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1059def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
1060
1061def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1062def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
1063
1064def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1065def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
1066def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1067def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
1068def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1069def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
1070
1071//===----------------------------------------------------------------------===//
1072// Define types with long resource cycles (rc)
1073
1074def V2Write_6cyc_1V1_5rc    : SchedWriteRes<[V2UnitV1]>  { let Latency =  6; let ReleaseAtCycles = [ 5]; }
1075def V2Write_7cyc_1V02_7rc   : SchedWriteRes<[V2UnitV02]> { let Latency =  7; let ReleaseAtCycles = [ 7]; }
1076def V2Write_10cyc_1V02_5rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
1077def V2Write_10cyc_1V02_9rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1078def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
1079def V2Write_10cyc_1V1_9rc   : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1080def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
1081def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
1082def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
1083def V2Write_16cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [14]; }
1084def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
1085
1086// Miscellaneous
1087// -----------------------------------------------------------------------------
1088
1089def : InstRW<[WriteI], (instrs COPY)>;
1090
1091// §3.3 Branch instructions
1092// -----------------------------------------------------------------------------
1093
1094// Branch, immed
1095// Compare and branch
1096def : SchedAlias<WriteBr,    V2Write_1cyc_1B>;
1097
1098// Branch, register
1099def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>;
1100
1101// Branch and link, immed
1102// Branch and link, register
1103def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
1104
1105// §3.4 Arithmetic and Logical Instructions
1106// -----------------------------------------------------------------------------
1107
1108// ALU, basic
1109// ALU, basic, flagset
1110def : SchedAlias<WriteI, V2Write_1cyc_1I>;
1111def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADD|SUB)S[WX]r[ir]$",
1112                        "^(ADC|SBC)S[WX]r$",
1113                        "^ANDS[WX]ri$")>;
1114def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
1115
1116// ALU, extend and shift
1117def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
1118
1119// Conditional compare
1120def : InstRW<[V2Write_1cyc_1F], (instregex "^CCM[NP][WX][ir]")>;
1121
1122// Arithmetic, LSL shift, shift <= 4
1123// Arithmetic, flagset, LSL shift, shift <= 4
1124// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
1125def : SchedAlias<WriteISReg, V2Write_ArithI>;
1126def : InstRW<[V2Write_ArithF],
1127             (instregex "^(ADD|SUB)S[WX]rs$")>;
1128
1129// Arithmetic, immediate to logical address tag
1130def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>;
1131
1132// Convert floating-point condition flags
1133// Flag manipulation instructions
1134def : WriteRes<WriteSys, []> { let Latency = 1; }
1135
1136// Insert Random Tags
1137def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
1138
1139// Insert Tag Mask
1140// Subtract Pointer
1141// Subtract Pointer, flagset
1142def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
1143
1144// Logical, shift, no flagset
1145def : InstRW<[V2Write_1cyc_1I],    (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
1146def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
1147
1148// Logical, shift, flagset
1149def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
1150
1151// Move and shift instructions
1152// -----------------------------------------------------------------------------
1153
1154def : SchedAlias<WriteImm, V2Write_1cyc_1I>;
1155
1156// §3.5 Divide and multiply instructions
1157// -----------------------------------------------------------------------------
1158
1159// SDIV, UDIV
1160def : SchedAlias<WriteID32,  V2Write_12cyc_1M0>;
1161def : SchedAlias<WriteID64,  V2Write_20cyc_1M0>;
1162
1163def : SchedAlias<WriteIM32, V2Write_2cyc_1M>;
1164def : SchedAlias<WriteIM64, V2Write_2cyc_1M>;
1165
1166// Multiply
1167// Multiply accumulate, W-form
1168// Multiply accumulate, X-form
1169def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1170             (instregex "^M(ADD|SUB)[WX]rrr$")>;
1171
1172// Multiply accumulate long
1173// Multiply long
1174def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1175             (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1176
1177// Multiply high
1178def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
1179
1180// Pointer Authentication Instructions (v8.3 PAC)
1181// -----------------------------------------------------------------------------
1182
1183// Authenticate data address
1184// Authenticate instruction address
1185// Compute pointer authentication code for data address
1186// Compute pointer authentication code, using generic key
1187// Compute pointer authentication code for instruction address
1188def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
1189
1190// Branch and link, register, with pointer authentication
1191// Branch, register, with pointer authentication
1192// Branch, return, with pointer authentication
1193def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
1194                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
1195                                            ERETAA, ERETAB)>;
1196
1197
1198// Load register, with pointer authentication
1199def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
1200
1201// Strip pointer authentication code
1202def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
1203
1204// Miscellaneous data-processing instructions
1205// -----------------------------------------------------------------------------
1206
1207// Address generation
1208def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>;
1209
1210// Bitfield extract, one reg
1211// Bitfield extract, two regs
1212def : SchedAlias<WriteExtr, V2Write_Extr>;
1213def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
1214
1215// Bitfield move, basic
1216def : SchedAlias<WriteIS, V2Write_1cyc_1I>;
1217
1218// Bitfield move, insert
1219def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
1220
1221// Load instructions
1222// -----------------------------------------------------------------------------
1223
1224// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
1225
1226def : SchedAlias<WriteLD,    V2Write_4cyc_1L>;
1227def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>;
1228
1229// Load register, literal
1230def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
1231
1232// Load pair, signed immed offset, signed words
1233def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
1234
1235// Load pair, immed post-index or immed pre-index, signed words
1236def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi],
1237             (instregex "^LDPSW(post|pre)$")>;
1238
1239// Store instructions
1240// -----------------------------------------------------------------------------
1241
1242// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
1243
1244def : SchedAlias<WriteST,    V2Write_1cyc_1L01_1D>;
1245def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
1246def : SchedAlias<WriteSTP,   V2Write_1cyc_1L01_1D>;
1247def : SchedAlias<WriteAdr,   V2Write_1cyc_1I>;
1248
1249// Tag load instructions
1250// -----------------------------------------------------------------------------
1251
1252// Load allocation tag
1253// Load multiple allocation tags
1254def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>;
1255
1256// Tag store instructions
1257// -----------------------------------------------------------------------------
1258
1259// Store allocation tags to one or two granules, post-index
1260// Store allocation tags to one or two granules, pre-index
1261// Store allocation tag to one or two granules, zeroing, post-index
1262// Store Allocation Tag to one or two granules, zeroing, pre-index
1263// Store allocation tag and reg pair to memory, post-Index
1264// Store allocation tag and reg pair to memory, pre-Index
1265def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
1266                                                ST2GPreIndex, ST2GPostIndex,
1267                                                STZGPreIndex, STZGPostIndex,
1268                                                STZ2GPreIndex, STZ2GPostIndex,
1269                                                STGPpre, STGPpost)>;
1270
1271// Store allocation tags to one or two granules, signed offset
1272// Store allocation tag to two granules, zeroing, signed offset
1273// Store allocation tag and reg pair to memory, signed offset
1274// Store multiple allocation tags
1275def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
1276                                             STZ2Gi, STGPi, STGM, STZGM)>;
1277
1278// FP data processing instructions
1279// -----------------------------------------------------------------------------
1280
1281// FP absolute value
1282// FP arithmetic
1283// FP min/max
1284// FP negate
1285// FP select
1286def : SchedAlias<WriteF,     V2Write_2cyc_1V>;
1287
1288// FP compare
1289def : SchedAlias<WriteFCmp,  V2Write_2cyc_1V0>;
1290
1291// FP divide, square root
1292def : SchedAlias<WriteFDiv,  V2Write_7cyc_1V02>;
1293
1294// FP divide, H-form
1295def : InstRW<[V2Write_7cyc_1V02],  (instrs FDIVHrr)>;
1296// FP divide, S-form
1297def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>;
1298// FP divide, D-form
1299def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>;
1300
1301// FP square root, H-form
1302def : InstRW<[V2Write_7cyc_1V02],  (instrs FSQRTHr)>;
1303// FP square root, S-form
1304def : InstRW<[V2Write_9cyc_1V02],  (instrs FSQRTSr)>;
1305// FP square root, D-form
1306def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>;
1307
1308// FP multiply
1309def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
1310
1311// FP multiply accumulate
1312def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
1313             (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
1314
1315// FP round to integral
1316def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
1317                                             "^FRINT(32|64)[XZ][SD]r$")>;
1318
1319// FP miscellaneous instructions
1320// -----------------------------------------------------------------------------
1321
1322// FP convert, from gen to vec reg
1323def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
1324
1325// FP convert, from vec to gen reg
1326def : InstRW<[V2Write_3cyc_1V01],
1327             (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
1328
1329// FP convert, Javascript from vec to gen reg
1330def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>;
1331
1332// FP convert, from vec to vec reg
1333def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
1334                                          FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
1335
1336// FP move, immed
1337// FP move, register
1338def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
1339
1340// FP transfer, from gen to low half of vec reg
1341def : InstRW<[V2Write_0or3cyc_1M0],
1342             (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
1343
1344// FP transfer, from gen to high half of vec reg
1345def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
1346
1347// FP transfer, from vec to gen reg
1348def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>;
1349
1350// FP load instructions
1351// -----------------------------------------------------------------------------
1352
1353// Load vector reg, literal, S/D/Q forms
1354def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>;
1355
1356// Load vector reg, unscaled immed
1357def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
1358
1359// Load vector reg, immed post-index
1360// Load vector reg, immed pre-index
1361def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L],
1362             (instregex "^LDR[BHSDQ](pre|post)$")>;
1363
1364// Load vector reg, unsigned immed
1365def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
1366
1367// Load vector reg, register offset, basic
1368// Load vector reg, register offset, scale, S/D-form
1369// Load vector reg, register offset, scale, H/Q-form
1370// Load vector reg, register offset, extend
1371// Load vector reg, register offset, extend, scale, S/D-form
1372// Load vector reg, register offset, extend, scale, H/Q-form
1373def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
1374
1375// Load vector pair, immed offset, S/D-form
1376def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
1377
1378// Load vector pair, immed offset, Q-form
1379def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
1380
1381// Load vector pair, immed post-index, S/D-form
1382// Load vector pair, immed pre-index, S/D-form
1383def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi],
1384             (instregex "^LDP[SD](pre|post)$")>;
1385
1386// Load vector pair, immed post-index, Q-form
1387// Load vector pair, immed pre-index, Q-form
1388def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
1389                                                                LDPQpre)>;
1390
1391// FP store instructions
1392// -----------------------------------------------------------------------------
1393
1394// Store vector reg, unscaled immed, B/H/S/D-form
1395// Store vector reg, unscaled immed, Q-form
1396def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
1397
1398// Store vector reg, immed post-index, B/H/S/D-form
1399// Store vector reg, immed post-index, Q-form
1400// Store vector reg, immed pre-index, B/H/S/D-form
1401// Store vector reg, immed pre-index, Q-form
1402def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1403             (instregex "^STR[BHSDQ](pre|post)$")>;
1404
1405// Store vector reg, unsigned immed, B/H/S/D-form
1406// Store vector reg, unsigned immed, Q-form
1407def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
1408
1409// Store vector reg, register offset, basic, B/H/S/D-form
1410// Store vector reg, register offset, basic, Q-form
1411// Store vector reg, register offset, scale, H-form
1412// Store vector reg, register offset, scale, S/D-form
1413// Store vector reg, register offset, scale, Q-form
1414// Store vector reg, register offset, extend, B/H/S/D-form
1415// Store vector reg, register offset, extend, Q-form
1416// Store vector reg, register offset, extend, scale, H-form
1417// Store vector reg, register offset, extend, scale, S/D-form
1418// Store vector reg, register offset, extend, scale, Q-form
1419def : InstRW<[V2Write_StrHQ, ReadAdrBase],
1420             (instregex "^STR[BHSDQ]ro[WX]$")>;
1421
1422// Store vector pair, immed offset, S-form
1423// Store vector pair, immed offset, D-form
1424def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>;
1425
1426// Store vector pair, immed offset, Q-form
1427def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>;
1428
1429// Store vector pair, immed post-index, S-form
1430// Store vector pair, immed post-index, D-form
1431// Store vector pair, immed pre-index, S-form
1432// Store vector pair, immed pre-index, D-form
1433def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1434             (instregex "^STP[SD](pre|post)$")>;
1435
1436// Store vector pair, immed post-index, Q-form
1437def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>;
1438
1439// Store vector pair, immed pre-index, Q-form
1440def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>;
1441
1442// ASIMD integer instructions
1443// -----------------------------------------------------------------------------
1444
1445// ASIMD absolute diff
1446// ASIMD absolute diff long
1447// ASIMD arith, basic
1448// ASIMD arith, complex
1449// ASIMD arith, pair-wise
1450// ASIMD compare
1451// ASIMD logical
1452// ASIMD max/min, basic and pair-wise
1453def : SchedAlias<WriteVd, V2Write_2cyc_1V>;
1454def : SchedAlias<WriteVq, V2Write_2cyc_1V>;
1455
1456// ASIMD absolute diff accum
1457// ASIMD absolute diff accum long
1458def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
1459
1460// ASIMD arith, reduce, 4H/4S
1461def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1462
1463// ASIMD arith, reduce, 8B/8H
1464def : InstRW<[V2Write_4cyc_1V13_1V],
1465             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
1466
1467// ASIMD arith, reduce, 16B
1468def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1469
1470// ASIMD dot product
1471// ASIMD dot product using signed and unsigned integers
1472def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
1473             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1474
1475// ASIMD matrix multiply-accumulate
1476def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
1477
1478// ASIMD max/min, reduce, 4H/4S
1479def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1480                                             "^[SU](MAX|MIN)Vv4i32v$")>;
1481
1482// ASIMD max/min, reduce, 8B/8H
1483def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1484                                                "^[SU](MAX|MIN)Vv8i16v$")>;
1485
1486// ASIMD max/min, reduce, 16B
1487def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1488
1489// ASIMD multiply
1490def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1491
1492// ASIMD multiply accumulate
1493def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
1494
1495// ASIMD multiply accumulate high
1496def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1497
1498// ASIMD multiply accumulate long
1499def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1500
1501// ASIMD multiply accumulate saturating long
1502def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>;
1503
1504// ASIMD multiply/multiply long (8x8) polynomial, D-form
1505// ASIMD multiply/multiply long (8x8) polynomial, Q-form
1506def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
1507
1508// ASIMD multiply long
1509def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
1510
1511// ASIMD pairwise add and accumulate long
1512def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
1513
1514// ASIMD shift accumulate
1515def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
1516
1517// ASIMD shift by immed, basic
1518def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
1519                                             "^SSHLLv", "^SSHR[dv]", "^USHLLv",
1520                                             "^USHR[dv]")>;
1521
1522// ASIMD shift by immed and insert, basic
1523def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
1524
1525// ASIMD shift by immed, complex
1526def : InstRW<[V2Write_4cyc_1V13],
1527             (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
1528                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1529                        "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
1530                        "^UQSHRN[bhsv]", "^URSHR[dv]")>;
1531
1532// ASIMD shift by register, basic
1533def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>;
1534
1535// ASIMD shift by register, complex
1536def : InstRW<[V2Write_4cyc_1V13],
1537             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1538                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1539
1540// ASIMD floating-point instructions
1541// -----------------------------------------------------------------------------
1542
1543// ASIMD FP absolute value/difference
1544// ASIMD FP arith, normal
1545// ASIMD FP compare
1546// ASIMD FP complex add
1547// ASIMD FP max/min, normal
1548// ASIMD FP max/min, pairwise
1549// ASIMD FP negate
1550// Handled by SchedAlias<WriteV[dq], ...>
1551
1552// ASIMD FP complex multiply add
1553def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
1554
1555// ASIMD FP convert, long (F16 to F32)
1556def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>;
1557
1558// ASIMD FP convert, long (F32 to F64)
1559def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>;
1560
1561// ASIMD FP convert, narrow (F32 to F16)
1562def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>;
1563
1564// ASIMD FP convert, narrow (F64 to F32)
1565def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32",
1566                                             "^FCVTXN(v2|v4)f32")>;
1567
1568// ASIMD FP convert, other, D-form F32 and Q-form F64
1569def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1570                                             "^FCVT[AMNPZ][SU]v1i64$",
1571                                             "^FCVTZ[SU]d$",
1572                                             "^[SU]CVTFv2f(32|64)$",
1573                                             "^[SU]CVTFv1i64$",
1574                                             "^[SU]CVTFd$")>;
1575
1576// ASIMD FP convert, other, D-form F16 and Q-form F32
1577def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1578                                             "^FCVT[AMNPZ][SU]v1i32$",
1579                                             "^FCVTZ[SU]s$",
1580                                             "^[SU]CVTFv4f(16|32)$",
1581                                             "^[SU]CVTFv1i32$",
1582                                             "^[SU]CVTFs$")>;
1583
1584// ASIMD FP convert, other, Q-form F16
1585def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1586                                             "^FCVT[AMNPZ][SU]v1f16$",
1587                                             "^FCVTZ[SU]h$",
1588                                             "^[SU]CVTFv8f16$",
1589                                             "^[SU]CVTFv1i16$",
1590                                             "^[SU]CVTFh$")>;
1591
1592// ASIMD FP divide, D-form, F16
1593def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>;
1594
1595// ASIMD FP divide, D-form, F32
1596def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>;
1597
1598// ASIMD FP divide, Q-form, F16
1599def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>;
1600
1601// ASIMD FP divide, Q-form, F32
1602def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>;
1603
1604// ASIMD FP divide, Q-form, F64
1605def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>;
1606
1607// ASIMD FP max/min, reduce, F32 and D-form F16
1608def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1609
1610// ASIMD FP max/min, reduce, Q-form F16
1611def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1612
1613// ASIMD FP multiply
1614def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
1615
1616// ASIMD FP multiply accumulate
1617def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
1618
1619// ASIMD FP multiply accumulate long
1620def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
1621
1622// ASIMD FP round, D-form F32 and Q-form F64
1623def : InstRW<[V2Write_3cyc_1V02],
1624             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1625                        "^FRINT(32|64)[XZ]v2f(32|64)$")>;
1626
1627// ASIMD FP round, D-form F16 and Q-form F32
1628def : InstRW<[V2Write_4cyc_2V02],
1629             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1630                        "^FRINT(32|64)[XZ]v4f32$")>;
1631
1632// ASIMD FP round, Q-form F16
1633def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1634
1635// ASIMD FP square root, D-form, F16
1636def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>;
1637
1638// ASIMD FP square root, D-form, F32
1639def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>;
1640
1641// ASIMD FP square root, Q-form, F16
1642def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>;
1643
1644// ASIMD FP square root, Q-form, F32
1645def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>;
1646
1647// ASIMD FP square root, Q-form, F64
1648def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>;
1649
1650// ASIMD BFloat16 (BF16) instructions
1651// -----------------------------------------------------------------------------
1652
1653// ASIMD convert, F32 to BF16
1654def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>;
1655
1656// ASIMD dot product
1657def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1658
1659// ASIMD matrix multiply accumulate
1660def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
1661
1662// ASIMD multiply accumulate long
1663def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1664                                                 BFMLALTIdx)>;
1665
1666// Scalar convert, F32 to BF16
1667def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
1668
1669// ASIMD miscellaneous instructions
1670// -----------------------------------------------------------------------------
1671
1672// ASIMD bit reverse
1673// ASIMD bitwise insert
1674// ASIMD count
1675// ASIMD duplicate, element
1676// ASIMD extract
1677// ASIMD extract narrow
1678// ASIMD insert, element to element
1679// ASIMD move, FP immed
1680// ASIMD move, integer immed
1681// ASIMD reverse
1682// ASIMD table lookup extension, 1 table reg
1683// ASIMD transpose
1684// ASIMD unzip/zip
1685// Handled by SchedAlias<WriteV[dq], ...>
1686def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
1687
1688// ASIMD duplicate, gen reg
1689def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1690
1691// ASIMD extract narrow, saturating
1692def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1693
1694// ASIMD reciprocal and square root estimate, D-form U32
1695def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
1696
1697// ASIMD reciprocal and square root estimate, Q-form U32
1698def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
1699
1700// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1701def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
1702                                          FRECPEv1i64, FRECPEv2f32,
1703                                          FRSQRTEv1f16, FRSQRTEv1i32,
1704                                          FRSQRTEv1i64, FRSQRTEv2f32)>;
1705
1706// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1707def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
1708                                          FRSQRTEv4f16, FRSQRTEv4f32)>;
1709
1710// ASIMD reciprocal and square root estimate, Q-form F16
1711def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1712
1713// ASIMD reciprocal exponent
1714def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>;
1715
1716// ASIMD reciprocal step
1717def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)",
1718                                           "^FRSQRTS(32|64|v)")>;
1719
1720// ASIMD table lookup, 1 or 2 table regs
1721def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One,
1722                                          TBLv8i8Two, TBLv16i8Two)>;
1723
1724// ASIMD table lookup, 3 table regs
1725def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
1726
1727// ASIMD table lookup, 4 table regs
1728def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1729
1730// ASIMD table lookup extension, 2 table reg
1731def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1732
1733// ASIMD table lookup extension, 3 table reg
1734def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1735
1736// ASIMD table lookup extension, 4 table reg
1737def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1738
1739// ASIMD transfer, element to gen reg
1740def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>;
1741
1742// ASIMD transfer, gen reg to element
1743def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1744
1745// ASIMD load instructions
1746// -----------------------------------------------------------------------------
1747
1748// ASIMD load, 1 element, multiple, 1 reg, D-form
1749def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1750def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1751             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1752
1753// ASIMD load, 1 element, multiple, 1 reg, Q-form
1754def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1755def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1756             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1757
1758// ASIMD load, 1 element, multiple, 2 reg, D-form
1759def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1760def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1761             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1762
1763// ASIMD load, 1 element, multiple, 2 reg, Q-form
1764def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1765def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1766             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1767
1768// ASIMD load, 1 element, multiple, 3 reg, D-form
1769def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1770def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1771             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1772
1773// ASIMD load, 1 element, multiple, 3 reg, Q-form
1774def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1775def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1776             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1777
1778// ASIMD load, 1 element, multiple, 4 reg, D-form
1779def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1780def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1781             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1782
1783// ASIMD load, 1 element, multiple, 4 reg, Q-form
1784def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1785def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1786             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1787
1788// ASIMD load, 1 element, one lane, B/H/S
1789// ASIMD load, 1 element, one lane, D
1790def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1791def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1792
1793// ASIMD load, 1 element, all lanes, D-form, B/H/S
1794// ASIMD load, 1 element, all lanes, D-form, D
1795def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1796def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1797
1798// ASIMD load, 1 element, all lanes, Q-form
1799def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1800def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1801
1802// ASIMD load, 2 element, multiple, D-form, B/H/S
1803def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1804def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1805
1806// ASIMD load, 2 element, multiple, Q-form, B/H/S
1807// ASIMD load, 2 element, multiple, Q-form, D
1808def : InstRW<[V2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1809def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1810
1811// ASIMD load, 2 element, one lane, B/H
1812// ASIMD load, 2 element, one lane, S
1813// ASIMD load, 2 element, one lane, D
1814def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1815def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1816
1817// ASIMD load, 2 element, all lanes, D-form, B/H/S
1818// ASIMD load, 2 element, all lanes, D-form, D
1819def : InstRW<[V2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1820def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1821
1822// ASIMD load, 2 element, all lanes, Q-form
1823def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1824def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1825
1826// ASIMD load, 3 element, multiple, D-form, B/H/S
1827def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1828def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1829
1830// ASIMD load, 3 element, multiple, Q-form, B/H/S
1831// ASIMD load, 3 element, multiple, Q-form, D
1832def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s|2d)$")>;
1833def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
1834
1835// ASIMD load, 3 element, one lane, B/H
1836// ASIMD load, 3 element, one lane, S
1837// ASIMD load, 3 element, one lane, D
1838def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1839def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1840
1841// ASIMD load, 3 element, all lanes, D-form, B/H/S
1842// ASIMD load, 3 element, all lanes, D-form, D
1843def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1844def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1845
1846// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1847// ASIMD load, 3 element, all lanes, Q-form, D
1848def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1849def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1850
1851// ASIMD load, 4 element, multiple, D-form, B/H/S
1852def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1853def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1854
1855// ASIMD load, 4 element, multiple, Q-form, B/H/S
1856// ASIMD load, 4 element, multiple, Q-form, D
1857def : InstRW<[V2Write_9cyc_6L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1858def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1859
1860// ASIMD load, 4 element, one lane, B/H
1861// ASIMD load, 4 element, one lane, S
1862// ASIMD load, 4 element, one lane, D
1863def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1864def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1865
1866// ASIMD load, 4 element, all lanes, D-form, B/H/S
1867// ASIMD load, 4 element, all lanes, D-form, D
1868def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1869def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1870
1871// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1872// ASIMD load, 4 element, all lanes, Q-form, D
1873def : InstRW<[V2Write_8cyc_4L_4V],           (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1874def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1875
1876// ASIMD store instructions
1877// -----------------------------------------------------------------------------
1878
1879// ASIMD store, 1 element, multiple, 1 reg, D-form
1880def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1881def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1882
1883// ASIMD store, 1 element, multiple, 1 reg, Q-form
1884def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1885def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1886
1887// ASIMD store, 1 element, multiple, 2 reg, D-form
1888def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1889def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1890
1891// ASIMD store, 1 element, multiple, 2 reg, Q-form
1892def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1893def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1894
1895// ASIMD store, 1 element, multiple, 3 reg, D-form
1896def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1897def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1898
1899// ASIMD store, 1 element, multiple, 3 reg, Q-form
1900def : InstRW<[V2Write_2cyc_3L01_3V01],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1901def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1902
1903// ASIMD store, 1 element, multiple, 4 reg, D-form
1904def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1905def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1906
1907// ASIMD store, 1 element, multiple, 4 reg, Q-form
1908def : InstRW<[V2Write_2cyc_4L01_4V01],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1909def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1910
1911// ASIMD store, 1 element, one lane, B/H/S
1912// ASIMD store, 1 element, one lane, D
1913def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST1i(8|16|32|64)$")>;
1914def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
1915
1916// ASIMD store, 2 element, multiple, D-form, B/H/S
1917def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2Twov(8b|4h|2s)$")>;
1918def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1919
1920// ASIMD store, 2 element, multiple, Q-form, B/H/S
1921// ASIMD store, 2 element, multiple, Q-form, D
1922def : InstRW<[V2Write_4cyc_2L01_4V01],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1923def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1924
1925// ASIMD store, 2 element, one lane, B/H/S
1926// ASIMD store, 2 element, one lane, D
1927def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2i(8|16|32|64)$")>;
1928def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
1929
1930// ASIMD store, 3 element, multiple, D-form, B/H/S
1931def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3Threev(8b|4h|2s)$")>;
1932def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1933
1934// ASIMD store, 3 element, multiple, Q-form, B/H/S
1935// ASIMD store, 3 element, multiple, Q-form, D
1936def : InstRW<[V2Write_6cyc_3L01_6V01],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1937def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1938
1939// ASIMD store, 3 element, one lane, B/H
1940// ASIMD store, 3 element, one lane, S
1941// ASIMD store, 3 element, one lane, D
1942def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3i(8|16|32|64)$")>;
1943def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
1944
1945// ASIMD store, 4 element, multiple, D-form, B/H/S
1946def : InstRW<[V2Write_6cyc_2L01_6V01],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1947def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1948
1949// ASIMD store, 4 element, multiple, Q-form, B/H/S
1950def : InstRW<[V2Write_7cyc_4L01_12V01],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1951def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1952
1953// ASIMD store, 4 element, multiple, Q-form, D
1954def : InstRW<[V2Write_5cyc_4L01_8V01],           (instregex "ST4Fourv(2d)$")>;
1955def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>;
1956
1957// ASIMD store, 4 element, one lane, B/H/S
1958def : InstRW<[V2Write_6cyc_1L01_3V01],           (instregex "ST4i(8|16|32)$")>;
1959def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>;
1960
1961// ASIMD store, 4 element, one lane, D
1962def : InstRW<[V2Write_4cyc_2L01_4V01],            (instregex "ST4i(64)$")>;
1963def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01],  (instregex "ST4i(64)_POST$")>;
1964
1965// Cryptography extensions
1966// -----------------------------------------------------------------------------
1967
1968// Crypto AES ops
1969def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1970
1971// Crypto polynomial (64x64) multiply long
1972def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>;
1973
1974// Crypto SHA1 hash acceleration op
1975// Crypto SHA1 schedule acceleration ops
1976def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1977
1978// Crypto SHA1 hash acceleration ops
1979// Crypto SHA256 hash acceleration ops
1980def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1981
1982// Crypto SHA256 schedule acceleration ops
1983def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1984
1985// Crypto SHA512 hash acceleration ops
1986def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1987
1988// Crypto SHA3 ops
1989def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1990
1991// Crypto SM3 ops
1992def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1993                                            "^SM3TT[12][AB]$")>;
1994
1995// Crypto SM4 ops
1996def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1997
1998// CRC
1999// -----------------------------------------------------------------------------
2000
2001def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
2002
2003// SVE Predicate instructions
2004// -----------------------------------------------------------------------------
2005
2006// Loop control, based on predicate
2007def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
2008                                           BRKB_PPmP, BRKB_PPzP)>;
2009
2010// Loop control, based on predicate and flag setting
2011def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
2012
2013// Loop control, propagating
2014def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
2015                                            BRKPB_PPzPP)>;
2016
2017// Loop control, propagating and flag setting
2018def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
2019                                               BRKPBS_PPzPP)>;
2020
2021// Loop control, based on GPR
2022def : InstRW<[V2Write_3cyc_2M],
2023             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
2024def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
2025
2026// Loop terminate
2027def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
2028
2029// Predicate counting scalar
2030def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
2031def : InstRW<[V2Write_2cyc_1M],
2032             (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
2033                        "^SQ(DEC|INC)[BHWD]_XPiWdI",
2034                        "^UQ(DEC|INC)[BHWD]_WPiI")>;
2035
2036// Predicate counting scalar, ALL, {1,2,4}
2037def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
2038
2039// Predicate counting scalar, active predicate
2040def : InstRW<[V2Write_2cyc_1M],
2041             (instregex "^CNTP_XPP_[BHSD]",
2042                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
2043                        "^(UQDEC|UQINC)P_WP_[BHSD]",
2044                        "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
2045
2046// Predicate counting vector, active predicate
2047def : InstRW<[V2Write_7cyc_1M_1M0_1V],
2048             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
2049
2050// Predicate logical
2051def : InstRW<[V2Write_1or2cyc_1M0],
2052             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
2053
2054// Predicate logical, flag setting
2055def : InstRW<[V2Write_1or2cyc_1M0_1M],
2056             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
2057
2058// Predicate reverse
2059def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
2060
2061// Predicate select
2062def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
2063
2064// Predicate set
2065def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
2066
2067// Predicate set/initialize, set flags
2068def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
2069
2070// Predicate find first/next
2071def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
2072
2073// Predicate test
2074def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
2075
2076// Predicate transpose
2077def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
2078
2079// Predicate unpack and widen
2080def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
2081
2082// Predicate zip/unzip
2083def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
2084
2085// SVE integer instructions
2086// -----------------------------------------------------------------------------
2087
2088// Arithmetic, absolute diff
2089def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
2090                                           "^[SU]ABD_ZPZZ_[BHSD]")>;
2091
2092// Arithmetic, absolute diff accum
2093def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
2094
2095// Arithmetic, absolute diff accum long
2096def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
2097
2098// Arithmetic, absolute diff long
2099def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
2100
2101// Arithmetic, basic
2102def : InstRW<[V2Write_2cyc_1V],
2103             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2104                        "^(ADD|SUB)_ZZZ_[BHSD]",
2105                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
2106                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
2107                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
2108                        "^ADR_LSL_ZZZ_[SD]_[0123]",
2109                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
2110                        "^SADDLBT_ZZZ_[HSD]",
2111                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
2112                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
2113
2114// Arithmetic, complex
2115def : InstRW<[V2Write_2cyc_1V],
2116             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
2117                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2118                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
2119                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
2120                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
2121                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
2122
2123// Arithmetic, large integer
2124def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
2125
2126// Arithmetic, pairwise add
2127def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
2128
2129// Arithmetic, pairwise add and accum long
2130def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
2131             (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
2132
2133// Arithmetic, shift
2134def : InstRW<[V2Write_2cyc_1V13],
2135             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
2136                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
2137                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
2138                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
2139                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
2140                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
2141                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
2142
2143// Arithmetic, shift and accumulate
2144def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
2145
2146// Arithmetic, shift by immediate
2147def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
2148                                             "^[SU]SHLL[BT]_ZZI_[HSD]")>;
2149
2150// Arithmetic, shift by immediate and insert
2151def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
2152
2153// Arithmetic, shift complex
2154def : InstRW<[V2Write_4cyc_1V13],
2155             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
2156                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
2157                        "^[SU]QR?SHL_ZPZZ_[BHSD]",
2158                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
2159                        "^SQSHRU?N[BT]_ZZI_[BHS]",
2160                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
2161
2162// Arithmetic, shift right for divide
2163def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
2164
2165// Arithmetic, shift rounding
2166def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
2167                                             "^[SU]RSHL_ZPZZ_[BHSD]",
2168                                             "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
2169
2170// Bit manipulation
2171def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
2172
2173// Bitwise select
2174def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
2175
2176// Count/reverse bits
2177def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
2178
2179// Broadcast logical bitmask immediate to vector
2180def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
2181
2182// Compare and set flags
2183def : InstRW<[V2Write_4or5cyc_1V0_1M0],
2184             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
2185                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
2186
2187// Complex add
2188def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
2189
2190// Complex dot product 8-bit element
2191def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
2192
2193// Complex dot product 16-bit element
2194def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
2195
2196// Complex multiply-add B, H, S element size
2197def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
2198                                                      "^CMLA_ZZZI_[HS]")>;
2199
2200// Complex multiply-add D element size
2201def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
2202
2203// Conditional extract operations, scalar form
2204def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
2205
2206// Conditional extract operations, SIMD&FP scalar and vector forms
2207def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
2208                                            "^COMPACT_ZPZ_[SD]",
2209                                            "^SPLICE_ZPZZ?_[BHSD]")>;
2210
2211// Convert to floating point, 64b to float or convert to double
2212def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
2213                                             "^[SU]CVTF_ZPmZ_StoD")>;
2214
2215// Convert to floating point, 32b to single or half
2216def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
2217
2218// Convert to floating point, 16b to half
2219def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
2220
2221// Copy, scalar
2222def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
2223
2224// Copy, scalar SIMD&FP or imm
2225def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
2226                                           "^CPY_ZPzI_[BHSD]")>;
2227
2228// Divides, 32 bit
2229def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
2230                                             "^[SU]DIV_ZPZZ_S")>;
2231
2232// Divides, 64 bit
2233def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
2234                                             "^[SU]DIV_ZPZZ_D")>;
2235
2236// Dot product, 8 bit
2237def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
2238
2239// Dot product, 8 bit, using signed and unsigned integers
2240def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
2241
2242// Dot product, 16 bit
2243def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
2244
2245// Duplicate, immediate and indexed form
2246def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
2247                                           "^DUP_ZZI_[BHSDQ]")>;
2248
2249// Duplicate, scalar form
2250def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
2251
2252// Extend, sign or zero
2253def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
2254                                             "^[SU]XTH_ZPmZ_[SD]",
2255                                             "^[SU]XTW_ZPmZ_[D]")>;
2256
2257// Extract
2258def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
2259
2260// Extract narrow saturating
2261def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
2262                                             "^SQXTUN[BT]_ZZ_[BHS]")>;
2263
2264// Extract/insert operation, SIMD and FP scalar form
2265def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
2266                                            "^INSR_ZV_[BHSD]")>;
2267
2268// Extract/insert operation, scalar
2269def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
2270                                                "^INSR_ZR_[BHSD]")>;
2271
2272// Histogram operations
2273def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
2274                                           "^HISTSEG_ZZZ")>;
2275
2276// Horizontal operations, B, H, S form, immediate operands only
2277def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
2278
2279// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
2280// operands only / immediate, scalar operands
2281def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
2282
2283// Horizontal operations, D form, immediate operands only
2284def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
2285
2286// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
2287// only / immediate, scalar operands
2288def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
2289
2290// Logical
2291def : InstRW<[V2Write_2cyc_1V],
2292             (instregex "^(AND|EOR|ORR)_ZI",
2293                        "^(AND|BIC|EOR|ORR)_ZZZ",
2294                        "^EOR(BT|TB)_ZZZ_[BHSD]",
2295                        "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
2296                        "^NOT_ZPmZ_[BHSD]")>;
2297
2298// Max/min, basic and pairwise
2299def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
2300                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
2301                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
2302
2303// Matching operations
2304// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
2305// latency for this instruction is 4 cycles.
2306def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
2307
2308// Matrix multiply-accumulate
2309def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
2310
2311// Move prefix
2312def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
2313                                           "^MOVPRFX_ZZ")>;
2314
2315// Multiply, B, H, S element size
2316def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
2317                                             "^MUL_ZPZZ_[BHS]",
2318                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
2319                                             "^[SU]MULH_ZPZZ_[BHS]")>;
2320
2321// Multiply, D element size
2322def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
2323                                             "^MUL_ZPZZ_D",
2324                                             "^[SU]MULH_(ZPmZ|ZZZ)_D",
2325                                             "^[SU]MULH_ZPZZ_D")>;
2326
2327// Multiply long
2328def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
2329                                             "^[SU]MULL[BT]_ZZZ_[HSD]")>;
2330
2331// Multiply accumulate, B, H, S element size
2332def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
2333             (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
2334def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
2335             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
2336
2337// Multiply accumulate, D element size
2338def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
2339             (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
2340def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
2341             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
2342
2343// Multiply accumulate long
2344def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
2345                                                "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
2346
2347// Multiply accumulate saturating doubling long regular
2348def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
2349             (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
2350                        "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
2351
2352// Multiply saturating doubling high, B, H, S element size
2353def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
2354                                             "^SQDMULH_ZZZI_[HS]")>;
2355
2356// Multiply saturating doubling high, D element size
2357def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
2358
2359// Multiply saturating doubling long
2360def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
2361                                             "^SQDMULL[BT]_ZZZI_[SD]")>;
2362
2363// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
2364// element size
2365def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
2366                                                     "^SQRDCMLAH_ZZZ_[BHS]",
2367                                                     "^SQRDML[AS]H_ZZZI_[HS]",
2368                                                     "^SQRDCMLAH_ZZZI_[HS]")>;
2369
2370// Multiply saturating rounding doubling regular/complex accumulate, D element
2371// size
2372def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
2373                                                   "^SQRDCMLAH_ZZZ_D")>;
2374
2375// Multiply saturating rounding doubling regular/complex, B, H, S element size
2376def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
2377                                             "^SQRDMULH_ZZZI_[HS]")>;
2378
2379// Multiply saturating rounding doubling regular/complex, D element size
2380def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
2381
2382// Multiply/multiply long, (8x8) polynomial
2383def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
2384                                             "^PMULL[BT]_ZZZ_[HDQ]")>;
2385
2386// Predicate counting vector
2387def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
2388
2389// Reciprocal estimate
2390def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
2391
2392// Reduction, arithmetic, B form
2393def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
2394
2395// Reduction, arithmetic, H form
2396def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
2397
2398// Reduction, arithmetic, S form
2399def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
2400
2401// Reduction, arithmetic, D form
2402def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
2403
2404// Reduction, logical
2405def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
2406
2407// Reverse, vector
2408def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
2409                                           "^REVB_ZPmZ_[HSD]",
2410                                           "^REVH_ZPmZ_[SD]",
2411                                           "^REVW_ZPmZ_D")>;
2412
2413// Select, vector form
2414def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
2415
2416// Table lookup
2417def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
2418
2419// Table lookup extension
2420def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
2421
2422// Transpose, vector form
2423def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
2424
2425// Unpack and extend
2426def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
2427
2428// Zip/unzip
2429def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
2430
2431// SVE floating-point instructions
2432// -----------------------------------------------------------------------------
2433
2434// Floating point absolute value/difference
2435def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
2436                                           "^FABD_ZPZZ_[HSD]",
2437                                           "^FABS_ZPmZ_[HSD]")>;
2438
2439// Floating point arithmetic
2440def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
2441                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
2442                                           "^FADDP_ZPmZZ_[HSD]",
2443                                           "^FNEG_ZPmZ_[HSD]",
2444                                           "^FSUBR_ZPm[IZ]_[HSD]",
2445                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
2446
2447// Floating point associative add, F16
2448def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
2449
2450// Floating point associative add, F32
2451def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
2452
2453// Floating point associative add, F64
2454def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
2455
2456// Floating point compare
2457def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
2458                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
2459                                            "^FCM(LE|LT)_PPzZ0_[HSD]",
2460                                            "^FCMUO_PPzZZ_[HSD]")>;
2461
2462// Floating point complex add
2463def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
2464
2465// Floating point complex multiply add
2466def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2467def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
2468
2469// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
2470def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
2471                                             "^FCVTLT_ZPmZ_HtoS",
2472                                             "^FCVTNT_ZPmZ_StoH")>;
2473
2474// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
2475// or F64 to F16)
2476def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
2477                                             "^FCVTLT_ZPmZ_StoD",
2478                                             "^FCVTNT_ZPmZ_DtoS")>;
2479
2480// Floating point convert, round to odd
2481def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
2482
2483// Floating point base2 log, F16
2484def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
2485
2486// Floating point base2 log, F32
2487def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
2488
2489// Floating point base2 log, F64
2490def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
2491
2492// Floating point convert to integer, F16
2493def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
2494
2495// Floating point convert to integer, F32
2496def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
2497
2498// Floating point convert to integer, F64
2499def : InstRW<[V2Write_3cyc_1V02],
2500             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
2501
2502// Floating point copy
2503def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
2504                                           "^FDUP_ZI_[HSD]")>;
2505
2506// Floating point divide, F16
2507def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
2508
2509// Floating point divide, F32
2510def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
2511
2512// Floating point divide, F64
2513def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
2514
2515// Floating point min/max pairwise
2516def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2517
2518// Floating point min/max
2519def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2520                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2521
2522// Floating point multiply
2523def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2524                                           "^FMULX_ZPZZ_[HSD]",
2525                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2526                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
2527
2528// Floating point multiply accumulate
2529def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
2530             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2531                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2532def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
2533             (instregex "^FML[AS]_ZZZI_[HSD]",
2534                        "^FN?ML[AS]_ZPZZZ_[HSD]")>;
2535
2536// Floating point multiply add/sub accumulate long
2537def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
2538
2539// Floating point reciprocal estimate, F16
2540def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2541
2542// Floating point reciprocal estimate, F32
2543def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2544
2545// Floating point reciprocal estimate, F64
2546def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2547
2548// Floating point reciprocal step
2549def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
2550
2551// Floating point reduction, F16
2552def : InstRW<[V2Write_8cyc_4V],
2553             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
2554
2555// Floating point reduction, F32
2556def : InstRW<[V2Write_6cyc_3V],
2557             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
2558
2559// Floating point reduction, F64
2560def : InstRW<[V2Write_4cyc_2V],
2561             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
2562
2563// Floating point round to integral, F16
2564def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2565
2566// Floating point round to integral, F32
2567def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2568
2569// Floating point round to integral, F64
2570def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2571
2572// Floating point square root, F16
2573def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FSQRT_ZPmZ_H")>;
2574
2575// Floating point square root, F32
2576def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FSQRT_ZPmZ_S")>;
2577
2578// Floating point square root, F64
2579def : InstRW<[V2Write_16cyc_1V02_14rc], (instregex "^FSQRT_ZPmZ_D")>;
2580
2581// Floating point trigonometric exponentiation
2582def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
2583
2584// Floating point trigonometric multiply add
2585def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
2586
2587// Floating point trigonometric, miscellaneous
2588def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
2589
2590// SVE BFloat16 (BF16) instructions
2591// -----------------------------------------------------------------------------
2592
2593// Convert, F32 to BF16
2594def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2595
2596// Dot product
2597def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2598
2599// Matrix multiply accumulate
2600def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
2601
2602// Multiply accumulate long
2603def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
2604
2605// SVE Load instructions
2606// -----------------------------------------------------------------------------
2607
2608// Load vector
2609def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
2610
2611// Load predicate
2612def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2613
2614// Contiguous load, scalar + imm
2615def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2616                                           "^LD1S?B_[HSD]_IMM$",
2617                                           "^LD1S?H_[SD]_IMM$",
2618                                           "^LD1S?W_D_IMM$" )>;
2619// Contiguous load, scalar + scalar
2620def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
2621                                           "^LD1S?B_[HSD]$",
2622                                           "^LD1S?H_[SD]$",
2623                                           "^LD1S?W_D$" )>;
2624
2625// Contiguous load broadcast, scalar + imm
2626def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2627                                           "^LD1RS?B_[HSD]_IMM$",
2628                                           "^LD1RS?H_[SD]_IMM$",
2629                                           "^LD1RW_D_IMM$",
2630                                           "^LD1RSW_IMM$",
2631                                           "^LD1RQ_[BHWD]_IMM$")>;
2632
2633// Contiguous load broadcast, scalar + scalar
2634def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2635
2636// Non temporal load, scalar + imm
2637// Non temporal load, scalar + scalar
2638def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
2639
2640// Non temporal gather load, vector + scalar 32-bit element size
2641def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S$",
2642                                              "^LDNT1S[BH]_ZZR_S$")>;
2643
2644// Non temporal gather load, vector + scalar 64-bit element size
2645def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>;
2646def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>;
2647
2648// Contiguous first faulting load, scalar + scalar
2649def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$",
2650                                              "^LDFF1S?B_[HSD]$",
2651                                              "^LDFF1S?H_[SD]$",
2652                                              "^LDFF1S?W_D$")>;
2653
2654// Contiguous non faulting load, scalar + imm
2655def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$",
2656                                           "^LDNF1S?B_[HSD]_IMM$",
2657                                           "^LDNF1S?H_[SD]_IMM$",
2658                                           "^LDNF1S?W_D_IMM$")>;
2659
2660// Contiguous Load two structures to two vectors, scalar + imm
2661def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
2662
2663// Contiguous Load two structures to two vectors, scalar + scalar
2664def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
2665
2666// Contiguous Load three structures to three vectors, scalar + imm
2667def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
2668
2669// Contiguous Load three structures to three vectors, scalar + scalar
2670def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
2671
2672// Contiguous Load four structures to four vectors, scalar + imm
2673def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
2674
2675// Contiguous Load four structures to four vectors, scalar + scalar
2676def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
2677
2678// Gather load, vector + imm, 32-bit element size
2679def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$",
2680                                              "^GLD(FF)?1W_IMM$")>;
2681
2682// Gather load, vector + imm, 64-bit element size
2683def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$",
2684                                              "^GLD(FF)?1D_IMM$")>;
2685
2686// Gather load, 32-bit scaled offset
2687def : InstRW<[V2Write_10cyc_1L_8V],
2688             (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED$",
2689                        "^GLD(FF)?1W_[SU]XTW_SCALED")>;
2690
2691// Gather load, 64-bit scaled offset
2692// NOTE: These instructions are not specified in the SOG.
2693def : InstRW<[V2Write_10cyc_1L_4V],
2694             (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED$",
2695                        "^GLD(FF)?1D_([SU]XTW_)?SCALED$")>;
2696
2697// Gather load, 32-bit unpacked unscaled offset
2698def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$",
2699                                              "^GLD(FF)?1W_[SU]XTW$")>;
2700
2701// Gather load, 64-bit unpacked unscaled offset
2702// NOTE: These instructions are not specified in the SOG.
2703def : InstRW<[V2Write_9cyc_1L_2V],
2704             (instregex "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?$",
2705                        "^GLD(FF)?1D(_[SU]XTW)?$")>;
2706
2707// SVE Store instructions
2708// -----------------------------------------------------------------------------
2709
2710// Store from predicate reg
2711def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>;
2712
2713// Store from vector reg
2714def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>;
2715
2716// Contiguous store, scalar + imm
2717def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
2718                                                  "^ST1B_[HSD]_IMM$",
2719                                                  "^ST1H_[SD]_IMM$",
2720                                                  "^ST1W_D_IMM$")>;
2721
2722// Contiguous store, scalar + scalar
2723def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
2724def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$",
2725                                                  "^ST1B_[HSD]$",
2726                                                  "^ST1W_D$")>;
2727
2728// Contiguous store two structures from two vectors, scalar + imm
2729def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
2730
2731// Contiguous store two structures from two vectors, scalar + scalar
2732def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>;
2733def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>;
2734
2735// Contiguous store three structures from three vectors, scalar + imm
2736def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
2737
2738// Contiguous store three structures from three vectors, scalar + scalar
2739def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
2740
2741// Contiguous store four structures from four vectors, scalar + imm
2742def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
2743
2744// Contiguous store four structures from four vectors, scalar + scalar
2745def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
2746
2747// Non temporal store, scalar + imm
2748def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2749
2750// Non temporal store, scalar + scalar
2751def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2752def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2753
2754// Scatter non temporal store, vector + scalar 32-bit element size
2755def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
2756
2757// Scatter non temporal store, vector + scalar 64-bit element size
2758def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
2759
2760// Scatter store vector + imm 32-bit element size
2761def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
2762                                                  "^SST1W_IMM$")>;
2763
2764// Scatter store vector + imm 64-bit element size
2765def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
2766                                                  "^SST1D_IMM$")>;
2767
2768// Scatter store, 32-bit scaled offset
2769def : InstRW<[V2Write_4cyc_4L01_4V01],
2770             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2771
2772// Scatter store, 32-bit unpacked unscaled offset
2773def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
2774                                                  "^SST1D_[SU]XTW$")>;
2775
2776// Scatter store, 32-bit unpacked scaled offset
2777def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2778                                                  "^SST1D_[SU]XTW_SCALED$")>;
2779
2780// Scatter store, 32-bit unscaled offset
2781def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
2782                                                  "^SST1W_[SU]XTW$")>;
2783
2784// Scatter store, 64-bit scaled offset
2785def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
2786                                                  "^SST1D_SCALED$")>;
2787
2788// Scatter store, 64-bit unscaled offset
2789def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
2790                                                  "^SST1D$")>;
2791
2792// SVE Miscellaneous instructions
2793// -----------------------------------------------------------------------------
2794
2795// Read first fault register, unpredicated
2796def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P)>;
2797
2798// Read first fault register, predicated
2799def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz)>;
2800
2801// Read first fault register and set flags
2802def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2803
2804// Set first fault register
2805// Write to first fault register
2806def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2807
2808// Prefetch
2809// NOTE: This is not specified in the SOG.
2810def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2811
2812// SVE Cryptographic instructions
2813// -----------------------------------------------------------------------------
2814
2815// Crypto AES ops
2816def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2817                                           "^AESI?MC_ZZ_B$")>;
2818
2819// Crypto SHA3 ops
2820def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2821                                            "^RAX1_ZZZ_D$",
2822                                            "^XAR_ZZZI_[BHSD]$")>;
2823
2824// Crypto SM4 ops
2825def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2826
2827}
2828