xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td (revision a90b9d0159070121c221b966469c3e36d912bf82)
1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Arm Neoverse V2 processors.
10// All information is taken from the V2 Software Optimisation guide:
11//
12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2
13//
14//===----------------------------------------------------------------------===//
15
16def NeoverseV2Model : SchedMachineModel {
17  let IssueWidth            =  16; // Micro-ops dispatched at a time.
18  let MicroOpBufferSize     = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2.
19  let LoadLatency           =   4; // Optimistic load latency.
20  let MispredictPenalty     =  10; // Extra cycles for mispredicted branch.  NOTE: Copied from N2.
21  let LoopMicroOpBufferSize =  16; // NOTE: Copied from Cortex-A57.
22  let CompleteModel         =   1;
23
24  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F,
25                                                    [HasSVE2p1, HasCPA,
26                                                    HasCSSC]);
27}
28
29//===----------------------------------------------------------------------===//
30// Define each kind of processor resource and number available on Neoverse V2.
31// Instructions are first fetched and then decoded into internal macro-ops
32// (MOPs). From there, the MOPs proceed through register renaming and dispatch
33// stages. A MOP can be split into two micro-ops further down the pipeline
34// after the decode stage. Once dispatched, micro-ops wait for their operands
35// and issue out-of-order to one of seventeen issue pipelines. Each issue
36// pipeline can accept one micro-op per cycle.
37
38let SchedModel = NeoverseV2Model in {
39
40// Define the (17) issue ports.
41def V2UnitB   : ProcResource<2>;  // Branch 0/1
42def V2UnitS0  : ProcResource<1>;  // Integer single-cycle 0
43def V2UnitS1  : ProcResource<1>;  // Integer single-cycle 1
44def V2UnitS2  : ProcResource<1>;  // Integer single-cycle 2
45def V2UnitS3  : ProcResource<1>;  // Integer single-cycle 3
46def V2UnitM0  : ProcResource<1>;  // Integer single/multicycle 0
47def V2UnitM1  : ProcResource<1>;  // Integer single/multicycle 1
48def V2UnitV0  : ProcResource<1>;  // FP/ASIMD 0
49def V2UnitV1  : ProcResource<1>;  // FP/ASIMD 1
50def V2UnitV2  : ProcResource<1>;  // FP/ASIMD 2
51def V2UnitV3  : ProcResource<1>;  // FP/ASIMD 3
52def V2UnitL01 : ProcResource<2>;  // Load/Store 0/1
53def V2UnitL2  : ProcResource<1>;  // Load 2
54def V2UnitD   : ProcResource<2>;  // Store data 0/1
55
56def V2UnitR   : ProcResGroup<[V2UnitS0, V2UnitS1]>;  // Integer single-cycle 0/1
57def V2UnitS   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>;  // Integer single-cycle 0/1/2/3
58def V2UnitF   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1 and single/multicycle 0/1
59def V2UnitI   : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>;  // Integer single-cycle 0/1/2/3 and single/multicycle 0/1
60def V2UnitM   : ProcResGroup<[V2UnitM0, V2UnitM1]>;  // Integer single/multicycle 0/1
61def V2UnitL   : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2
62def V2UnitV   : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>;  // FP/ASIMD 0/1/2/3
63def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>;  // FP/ASIMD 0/1
64def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>;  // FP/ASIMD 0/2
65def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>;  // FP/ASIMD 1/3
66def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>;  // FP/ASIMD 2/3
67
68// Define commonly used read types.
69
70// No forwarding is provided for these types.
71def : ReadAdvance<ReadI,       0>;
72def : ReadAdvance<ReadISReg,   0>;
73def : ReadAdvance<ReadIEReg,   0>;
74def : ReadAdvance<ReadIM,      0>;
75def : ReadAdvance<ReadIMA,     0>;
76def : ReadAdvance<ReadID,      0>;
77def : ReadAdvance<ReadExtrHi,  0>;
78def : ReadAdvance<ReadAdrBase, 0>;
79def : ReadAdvance<ReadST,      0>;
80def : ReadAdvance<ReadVLD,     0>;
81
82// NOTE: Copied from N2.
83def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
84def : WriteRes<WriteBarrier, []> { let Latency = 1; }
85def : WriteRes<WriteHint,    []> { let Latency = 1; }
86def : WriteRes<WriteLDHi,    []> { let Latency = 4; }
87
88//===----------------------------------------------------------------------===//
89// Define customized scheduler read/write types specific to the Neoverse V2.
90
91//===----------------------------------------------------------------------===//
92
93// Define generic 0 micro-op types
94def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; }
95
96// Define generic 1 micro-op types
97
98def V2Write_1cyc_1B    : SchedWriteRes<[V2UnitB]>   { let Latency = 1; }
99def V2Write_1cyc_1F    : SchedWriteRes<[V2UnitF]>   { let Latency = 1; }
100def V2Write_1cyc_1I    : SchedWriteRes<[V2UnitI]>   { let Latency = 1; }
101def V2Write_1cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 1; }
102def V2Write_1cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 1; }
103def V2Write_1cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 1; }
104def V2Write_2cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 2; }
105def V2Write_3cyc_1M    : SchedWriteRes<[V2UnitM]>   { let Latency = 3; }
106def V2Write_2cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 2; }
107def V2Write_3cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 3; }
108def V2Write_5cyc_1M0   : SchedWriteRes<[V2UnitM0]>  { let Latency = 5; }
109def V2Write_12cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 12;
110                                                      let ReleaseAtCycles = [12]; }
111def V2Write_20cyc_1M0  : SchedWriteRes<[V2UnitM0]>  { let Latency = 20;
112                                                      let ReleaseAtCycles = [20]; }
113def V2Write_4cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 4; }
114def V2Write_6cyc_1L    : SchedWriteRes<[V2UnitL]>   { let Latency = 6; }
115def V2Write_2cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 2; }
116def V2Write_2cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 2; }
117def V2Write_2cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 2; }
118def V2Write_2cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 2; }
119def V2Write_3cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
120def V2Write_3cyc_1V01  : SchedWriteRes<[V2UnitV01]> { let Latency = 3;
121                                                      let ReleaseAtCycles = [2]; }
122def V2Write_3cyc_1V23  : SchedWriteRes<[V2UnitV23]> { let Latency = 3; }
123def V2Write_4cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 4; }
124def V2Write_5cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 5; }
125def V2Write_6cyc_1V    : SchedWriteRes<[V2UnitV]>   { let Latency = 6; }
126def V2Write_12cyc_1V   : SchedWriteRes<[V2UnitV]>   { let Latency = 12; }
127def V2Write_3cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 3; }
128def V2Write_3cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 3; }
129def V2Write_4cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 4; }
130def V2Write_4cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
131def V2Write_7cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 7;
132                                                      let ReleaseAtCycles = [7]; }
133def V2Write_7cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 7;
134                                                      let ReleaseAtCycles = [2]; }
135def V2Write_9cyc_1V0   : SchedWriteRes<[V2UnitV0]>  { let Latency = 9; }
136def V2Write_9cyc_1V02  : SchedWriteRes<[V2UnitV02]> { let Latency = 9;
137                                                      let ReleaseAtCycles = [2]; }
138def V2Write_10cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; }
139def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10;
140                                                      let ReleaseAtCycles = [2]; }
141def V2Write_12cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 12;
142                                                      let ReleaseAtCycles = [11]; }
143def V2Write_13cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; }
144def V2Write_15cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 15; }
145def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15;
146                                                      let ReleaseAtCycles = [8]; }
147def V2Write_16cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; }
148def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16;
149                                                      let ReleaseAtCycles = [8]; }
150def V2Write_20cyc_1V0  : SchedWriteRes<[V2UnitV0]>  { let Latency = 20;
151                                                      let ReleaseAtCycles = [20]; }
152def V2Write_2cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 2; }
153def V2Write_2cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 2; }
154def V2Write_3cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 3; }
155def V2Write_4cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 4; }
156def V2Write_4cyc_1V13  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
157def V2Write_6cyc_1V1   : SchedWriteRes<[V2UnitV1]>  { let Latency = 6; }
158def V2Write_10cyc_1V1  : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; }
159def V2Write_6cyc_1L01  : SchedWriteRes<[V2UnitL01]> { let Latency = 6; }
160
161//===----------------------------------------------------------------------===//
162// Define generic 2 micro-op types
163
164def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> {
165  let Latency     = 1;
166  let NumMicroOps = 2;
167}
168
169def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> {
170  let Latency     = 6;
171  let NumMicroOps = 2;
172}
173
174def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> {
175  let Latency     = 9;
176  let NumMicroOps = 2;
177}
178
179def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> {
180  let Latency     = 3;
181  let NumMicroOps = 2;
182}
183
184def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
185  let Latency     = 1;
186  let NumMicroOps = 2;
187}
188
189def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
190  let Latency     = 3;
191  let NumMicroOps = 2;
192}
193
194def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> {
195  let Latency     = 4;
196  let NumMicroOps = 2;
197}
198
199def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> {
200  let Latency     = 5;
201  let NumMicroOps = 2;
202}
203
204def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
205  let Latency     = 6;
206  let NumMicroOps = 2;
207}
208
209def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> {
210  let Latency     = 7;
211  let NumMicroOps = 2;
212}
213
214def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> {
215  let Latency     = 7;
216  let NumMicroOps = 2;
217}
218
219def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> {
220  let Latency     = 1;
221  let NumMicroOps = 2;
222}
223
224def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> {
225  let Latency     = 5;
226  let NumMicroOps = 2;
227}
228
229def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
230  let Latency     = 2;
231  let NumMicroOps = 2;
232}
233
234def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
235  let Latency     = 2;
236  let NumMicroOps = 2;
237}
238
239def V2Write_2cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
240  let Latency = 2;
241  let NumMicroOps = 2;
242}
243
244def V2Write_4cyc_2V01  : SchedWriteRes<[V2UnitV01, V2UnitV01]> {
245  let Latency = 4;
246  let NumMicroOps = 2;
247}
248
249def V2Write_4cyc_1L01_1V01  : SchedWriteRes<[V2UnitL01, V2UnitV01]> {
250  let Latency = 4;
251  let NumMicroOps = 2;
252}
253
254def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> {
255  let Latency     = 4;
256  let NumMicroOps = 2;
257}
258
259def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
260  let Latency     = 4;
261  let NumMicroOps = 2;
262}
263
264def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
265  let Latency     = 4;
266  let NumMicroOps = 2;
267}
268
269def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
270  let Latency     = 4;
271  let NumMicroOps = 2;
272}
273
274def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> {
275  let Latency     = 6;
276  let NumMicroOps = 2;
277}
278
279def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> {
280  let Latency     = 6;
281  let NumMicroOps = 2;
282}
283
284def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> {
285  let Latency     = 8;
286  let NumMicroOps = 2;
287}
288
289def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> {
290  let Latency     = 4;
291  let NumMicroOps = 2;
292}
293
294def V2Write_3cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
295  let Latency     = 3;
296  let NumMicroOps = 2;
297}
298
299def V2Write_4cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
300  let Latency     = 4;
301  let NumMicroOps = 2;
302}
303
304def V2Write_1cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
305  let Latency     = 1;
306  let NumMicroOps = 2;
307}
308
309def V2Write_2cyc_1M0_1M  : SchedWriteRes<[V2UnitM0, V2UnitM]> {
310  let Latency     = 2;
311  let NumMicroOps = 2;
312}
313
314def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> {
315  let Latency     = 6;
316  let NumMicroOps = 2;
317}
318
319def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
320  let Latency     = 4;
321  let NumMicroOps = 2;
322}
323
324def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> {
325  let Latency     = 5;
326  let NumMicroOps = 2;
327}
328
329def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> {
330  let Latency     = 5;
331  let NumMicroOps = 2;
332}
333
334def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> {
335  let Latency     = 5;
336  let NumMicroOps = 2;
337}
338
339def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> {
340  let Latency     = 6;
341  let NumMicroOps = 2;
342}
343
344def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> {
345  let Latency     = 7;
346  let NumMicroOps = 2;
347}
348
349def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
350  let Latency     = 2;
351  let NumMicroOps = 2;
352}
353
354def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> {
355  let Latency     = 3;
356  let NumMicroOps = 2;
357}
358
359def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> {
360  let Latency     = 6;
361  let NumMicroOps = 2;
362}
363
364def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> {
365  let Latency     = 6;
366  let NumMicroOps = 2;
367}
368
369def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> {
370  let Latency     = 6;
371  let NumMicroOps = 2;
372}
373
374def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> {
375  let Latency     = 4;
376  let NumMicroOps = 2;
377}
378
379def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> {
380  let Latency     = 8;
381  let NumMicroOps = 2;
382}
383
384//===----------------------------------------------------------------------===//
385// Define generic 3 micro-op types
386
387def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> {
388  let Latency     = 1;
389  let NumMicroOps = 3;
390}
391
392def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> {
393  let Latency     = 2;
394  let NumMicroOps = 3;
395}
396
397def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
398  let Latency     = 2;
399  let NumMicroOps = 3;
400}
401
402def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> {
403  let Latency     = 4;
404  let NumMicroOps = 3;
405}
406
407def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
408  let Latency     = 9;
409  let NumMicroOps = 3;
410}
411
412def V2Write_4cyc_3V01  : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> {
413  let Latency = 4;
414  let NumMicroOps = 3;
415}
416
417def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> {
418  let Latency     = 7;
419  let NumMicroOps = 3;
420}
421
422def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> {
423  let Latency     = 2;
424  let NumMicroOps = 3;
425}
426
427def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> {
428  let Latency     = 2;
429  let NumMicroOps = 3;
430}
431
432def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> {
433  let Latency     = 6;
434  let NumMicroOps = 3;
435}
436
437def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> {
438  let Latency     = 6;
439  let NumMicroOps = 3;
440}
441
442def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> {
443  let Latency     = 8;
444  let NumMicroOps = 3;
445}
446
447//===----------------------------------------------------------------------===//
448// Define generic 4 micro-op types
449
450def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
451                                               V2UnitI]> {
452  let Latency     = 2;
453  let NumMicroOps = 4;
454}
455
456def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
457                                            V2UnitV01, V2UnitV01]> {
458  let Latency     = 2;
459  let NumMicroOps = 4;
460}
461
462def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
463                                            V2UnitV01, V2UnitV01]> {
464  let Latency     = 4;
465  let NumMicroOps = 4;
466}
467
468def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> {
469  let Latency     = 5;
470  let NumMicroOps = 4;
471}
472
473def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1,
474                                         V2UnitV1]> {
475  let Latency     = 9;
476  let NumMicroOps = 4;
477}
478
479def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> {
480  let Latency     = 6;
481  let NumMicroOps = 4;
482}
483
484def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
485  let Latency     = 8;
486  let NumMicroOps = 4;
487}
488
489def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
490                                          V2UnitV13]> {
491  let Latency     = 6;
492  let NumMicroOps = 4;
493}
494
495def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
496                                          V2UnitV13]> {
497  let Latency     = 8;
498  let NumMicroOps = 4;
499}
500
501def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02,
502                                       V2UnitV02]> {
503  let Latency     = 6;
504  let NumMicroOps = 4;
505}
506
507def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
508  let Latency     = 6;
509  let NumMicroOps = 4;
510}
511
512def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
513  let Latency     = 8;
514  let NumMicroOps = 4;
515}
516
517def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> {
518  let Latency     = 9;
519  let NumMicroOps = 4;
520}
521
522def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
523                                          V2UnitV]> {
524  let Latency     = 2;
525  let NumMicroOps = 4;
526}
527
528def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV,
529                                          V2UnitV]> {
530  let Latency     = 4;
531  let NumMicroOps = 4;
532}
533
534def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02,
535                                          V2UnitV02]> {
536  let Latency     = 8;
537  let NumMicroOps = 4;
538}
539
540def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1,
541                                         V2UnitV1]> {
542  let Latency     = 8;
543  let NumMicroOps = 4;
544}
545
546def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
547                                         V2UnitM]> {
548  let Latency     = 4;
549  let NumMicroOps = 4;
550}
551
552def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM,
553                                         V2UnitM]> {
554  let Latency     = 5;
555  let NumMicroOps = 4;
556}
557
558def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> {
559  let Latency     = 6;
560  let NumMicroOps = 4;
561}
562
563def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> {
564  let Latency     = 7;
565  let NumMicroOps = 4;
566}
567
568def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
569                                            V2UnitV01]> {
570  let Latency     = 6;
571  let NumMicroOps = 4;
572}
573
574//===----------------------------------------------------------------------===//
575// Define generic 5 micro-op types
576
577def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01,
578                                               V2UnitI, V2UnitI]> {
579  let Latency     = 2;
580  let NumMicroOps = 5;
581}
582
583def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV,
584                                        V2UnitV]> {
585  let Latency     = 8;
586  let NumMicroOps = 5;
587}
588
589def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
590                                        V2UnitV]> {
591  let Latency     = 9;
592  let NumMicroOps = 5;
593}
594
595def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
596                                         V2UnitV]> {
597  let Latency     = 10;
598  let NumMicroOps = 5;
599}
600
601def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV,
602                                     V2UnitV]> {
603  let Latency     = 6;
604  let NumMicroOps = 5;
605}
606
607//===----------------------------------------------------------------------===//
608// Define generic 6 micro-op types
609
610def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
611                                        V2UnitV, V2UnitV, V2UnitV]> {
612  let Latency     = 8;
613  let NumMicroOps = 6;
614}
615
616def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
617                                        V2UnitV, V2UnitV, V2UnitV]> {
618  let Latency     = 9;
619  let NumMicroOps = 6;
620}
621
622def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
623                                        V2UnitV, V2UnitV, V2UnitV]> {
624  let Latency     = 9;
625  let NumMicroOps = 6;
626}
627
628def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV,
629                                           V2UnitV, V2UnitS, V2UnitS]> {
630  let Latency     = 9;
631  let NumMicroOps = 6;
632}
633
634def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13,
635                                          V2UnitV13, V2UnitV13, V2UnitV13]> {
636  let Latency     = 9;
637  let NumMicroOps = 6;
638}
639
640def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
641                                          V2UnitV, V2UnitV, V2UnitV]> {
642  let Latency     = 2;
643  let NumMicroOps = 6;
644}
645
646def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
647                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
648  let Latency     = 4;
649  let NumMicroOps = 6;
650}
651
652def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
653                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
654  let Latency     = 5;
655  let NumMicroOps = 6;
656}
657
658def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
659                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
660  let Latency     = 2;
661  let NumMicroOps = 6;
662}
663
664def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS,
665                                               V2UnitS, V2UnitV01, V2UnitV01]> {
666  let Latency     = 4;
667  let NumMicroOps = 6;
668}
669
670//===----------------------------------------------------------------------===//
671// Define generic 7 micro-op types
672
673def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
674                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
675  let Latency     = 8;
676  let NumMicroOps = 7;
677}
678
679//===----------------------------------------------------------------------===//
680// Define generic 8 micro-op types
681
682def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
683                                          V2UnitL01, V2UnitV, V2UnitV, V2UnitV,
684                                          V2UnitV]> {
685  let Latency     = 2;
686  let NumMicroOps = 8;
687}
688
689def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
690                                            V2UnitL01, V2UnitV01, V2UnitV01,
691                                            V2UnitV01, V2UnitV01]> {
692  let Latency     = 2;
693  let NumMicroOps = 8;
694}
695
696def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
697                                            V2UnitL01, V2UnitV01, V2UnitV01,
698                                            V2UnitV01, V2UnitV01]> {
699  let Latency     = 4;
700  let NumMicroOps = 8;
701}
702
703def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01,
704                                            V2UnitV01, V2UnitV01, V2UnitV01,
705                                            V2UnitV01, V2UnitV01]> {
706  let Latency     = 6;
707  let NumMicroOps = 8;
708}
709
710def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
711                                        V2UnitV, V2UnitV, V2UnitV, V2UnitV]> {
712  let Latency     = 8;
713  let NumMicroOps = 8;
714}
715
716//===----------------------------------------------------------------------===//
717// Define generic 9 micro-op types
718
719def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
720                                            V2UnitV01, V2UnitV01, V2UnitV01,
721                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
722  let Latency     = 6;
723  let NumMicroOps = 9;
724}
725
726def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV,
727                                         V2UnitV, V2UnitV, V2UnitV, V2UnitV,
728                                         V2UnitV]> {
729  let Latency     = 10;
730  let NumMicroOps = 9;
731}
732
733def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV,
734                                            V2UnitL, V2UnitL, V2UnitL,
735                                            V2UnitS, V2UnitS, V2UnitS]> {
736  let Latency     = 10;
737  let NumMicroOps = 9;
738}
739
740//===----------------------------------------------------------------------===//
741// Define generic 10 micro-op types
742
743def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL,
744                                        V2UnitL, V2UnitL, V2UnitV, V2UnitV,
745                                        V2UnitV, V2UnitV]> {
746  let Latency     = 9;
747  let NumMicroOps = 10;
748}
749
750//===----------------------------------------------------------------------===//
751// Define generic 12 micro-op types
752
753def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
754                                            V2UnitL01, V2UnitV01, V2UnitV01,
755                                            V2UnitV01, V2UnitV01, V2UnitV01,
756                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
757  let Latency     = 5;
758  let NumMicroOps = 12;
759}
760
761def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
762                                        V2UnitL, V2UnitV, V2UnitV,
763                                        V2UnitV, V2UnitV, V2UnitV,
764                                        V2UnitV, V2UnitV, V2UnitV]> {
765  let Latency     = 9;
766  let NumMicroOps = 12;
767}
768
769def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
770                                         V2UnitL, V2UnitV, V2UnitV,
771                                         V2UnitV, V2UnitV, V2UnitV,
772                                         V2UnitV, V2UnitV, V2UnitV]> {
773  let Latency     = 10;
774  let NumMicroOps = 12;
775}
776
777//===----------------------------------------------------------------------===//
778// Define generic 16 micro-op types
779
780def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
781                                             V2UnitL01, V2UnitV01, V2UnitV01,
782                                             V2UnitV01, V2UnitV01, V2UnitV01,
783                                             V2UnitV01, V2UnitV01, V2UnitV01,
784                                             V2UnitV01, V2UnitV01, V2UnitV01,
785                                             V2UnitV01]> {
786  let Latency     = 7;
787  let NumMicroOps = 16;
788}
789
790def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL,
791                                            V2UnitL, V2UnitV, V2UnitV,
792                                            V2UnitV, V2UnitV, V2UnitV,
793                                            V2UnitV, V2UnitV, V2UnitV,
794                                            V2UnitS, V2UnitS, V2UnitS,
795                                            V2UnitS]> {
796  let Latency     = 10;
797  let NumMicroOps = 16;
798}
799
800//===----------------------------------------------------------------------===//
801// Define generic 18 micro-op types
802
803def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
804                                            V2UnitL01, V2UnitL01, V2UnitL01,
805                                            V2UnitL01, V2UnitL01, V2UnitL01,
806                                            V2UnitV01, V2UnitV01, V2UnitV01,
807                                            V2UnitV01, V2UnitV01, V2UnitV01,
808                                            V2UnitV01, V2UnitV01, V2UnitV01]> {
809  let Latency     = 7;
810  let NumMicroOps = 18;
811}
812
813//===----------------------------------------------------------------------===//
814// Define generic 27 micro-op types
815
816def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
817                                               V2UnitL01, V2UnitL01, V2UnitL01,
818                                               V2UnitL01, V2UnitL01, V2UnitL01,
819                                               V2UnitS, V2UnitS, V2UnitS,
820                                               V2UnitS, V2UnitS, V2UnitS,
821                                               V2UnitS, V2UnitS, V2UnitS,
822                                               V2UnitV01, V2UnitV01, V2UnitV01,
823                                               V2UnitV01, V2UnitV01, V2UnitV01,
824                                               V2UnitV01, V2UnitV01,
825                                               V2UnitV01]> {
826  let Latency     = 7;
827  let NumMicroOps = 27;
828}
829
830//===----------------------------------------------------------------------===//
831// Define generic 36 micro-op types
832
833def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01,
834                                               V2UnitL01, V2UnitL01, V2UnitL01,
835                                               V2UnitL01, V2UnitL01, V2UnitL01,
836                                               V2UnitL01, V2UnitL01, V2UnitL01,
837                                               V2UnitL01, V2UnitL01, V2UnitL01,
838                                               V2UnitL01, V2UnitL01, V2UnitL01,
839                                               V2UnitV01, V2UnitV01, V2UnitV01,
840                                               V2UnitV01, V2UnitV01, V2UnitV01,
841                                               V2UnitV01, V2UnitV01, V2UnitV01,
842                                               V2UnitV01, V2UnitV01, V2UnitV01,
843                                               V2UnitV01, V2UnitV01, V2UnitV01,
844                                               V2UnitV01, V2UnitV01,
845                                               V2UnitV01]> {
846  let Latency     = 11;
847  let NumMicroOps = 36;
848}
849
850//===----------------------------------------------------------------------===//
851// Define generic 54 micro-op types
852
853def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01,
854                                                   V2UnitL01, V2UnitL01,
855                                                   V2UnitL01, V2UnitL01,
856                                                   V2UnitL01, V2UnitL01,
857                                                   V2UnitL01, V2UnitL01,
858                                                   V2UnitL01, V2UnitL01,
859                                                   V2UnitL01, V2UnitL01,
860                                                   V2UnitL01, V2UnitL01,
861                                                   V2UnitL01, V2UnitL01,
862                                                   V2UnitS, V2UnitS, V2UnitS,
863                                                   V2UnitS, V2UnitS, V2UnitS,
864                                                   V2UnitS, V2UnitS, V2UnitS,
865                                                   V2UnitS, V2UnitS, V2UnitS,
866                                                   V2UnitS, V2UnitS, V2UnitS,
867                                                   V2UnitS, V2UnitS, V2UnitS,
868                                                   V2UnitV01, V2UnitV01,
869                                                   V2UnitV01, V2UnitV01,
870                                                   V2UnitV01, V2UnitV01,
871                                                   V2UnitV01, V2UnitV01,
872                                                   V2UnitV01, V2UnitV01,
873                                                   V2UnitV01, V2UnitV01,
874                                                   V2UnitV01, V2UnitV01,
875                                                   V2UnitV01, V2UnitV01,
876                                                   V2UnitV01, V2UnitV01]> {
877  let Latency     = 11;
878  let NumMicroOps = 54;
879}
880
881//===----------------------------------------------------------------------===//
882// Define predicate-controlled types
883
884def V2Write_ArithI : SchedWriteVariant<[
885                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1I]>,
886                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
887
888def V2Write_ArithF : SchedWriteVariant<[
889                       SchedVar<IsCheapLSL,  [V2Write_1cyc_1F]>,
890                       SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>;
891
892def V2Write_Logical : SchedWriteVariant<[
893                        SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>,
894                        SchedVar<NoSchedPred,   [V2Write_2cyc_1M]>]>;
895
896def V2Write_Extr : SchedWriteVariant<[
897                     SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>,
898                     SchedVar<NoSchedPred,       [V2Write_3cyc_1I_1M]>]>;
899
900def V2Write_LdrHQ : SchedWriteVariant<[
901                      SchedVar<NeoverseHQForm,  [V2Write_7cyc_1I_1L]>,
902                      SchedVar<NoSchedPred,     [V2Write_6cyc_1L]>]>;
903
904def V2Write_StrHQ : SchedWriteVariant<[
905                      SchedVar<NeoverseHQForm,  [V2Write_2cyc_1L01_1V01_1I]>,
906                      SchedVar<NoSchedPred,     [V2Write_2cyc_1L01_1V01]>]>;
907
908def V2Write_0or1cyc_1I : SchedWriteVariant<[
909                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
910                      SchedVar<NoSchedPred,      [V2Write_1cyc_1I]>]>;
911
912def V2Write_0or2cyc_1V : SchedWriteVariant<[
913                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
914                      SchedVar<NoSchedPred,      [V2Write_2cyc_1V]>]>;
915
916def V2Write_0or3cyc_1M0 : SchedWriteVariant<[
917                      SchedVar<NeoverseZeroMove, [V2Write_0cyc]>,
918                      SchedVar<NoSchedPred,      [V2Write_3cyc_1M0]>]>;
919
920def V2Write_2or3cyc_1M : SchedWriteVariant<[
921                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M]>,
922                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M]>]>;
923
924def V2Write_3or4cyc_2M : SchedWriteVariant<[
925                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_2M]>,
926                      SchedVar<NoSchedPred,     [V2Write_3cyc_2M]>]>;
927
928def V2Write_1or2cyc_1M0 : SchedWriteVariant<[
929                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0]>,
930                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0]>]>;
931
932def V2Write_2or3cyc_1M0 : SchedWriteVariant<[
933                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1M0]>,
934                      SchedVar<NoSchedPred,     [V2Write_2cyc_1M0]>]>;
935
936def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[
937                      SchedVar<NeoversePdIsPg,  [V2Write_2cyc_1M0_1M]>,
938                      SchedVar<NoSchedPred,     [V2Write_1cyc_1M0_1M]>]>;
939
940def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[
941                      SchedVar<NeoversePdIsPg,  [V2Write_4cyc_1M0_1M]>,
942                      SchedVar<NoSchedPred,     [V2Write_3cyc_1M0_1M]>]>;
943
944def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[
945                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_2M0_2M]>,
946                      SchedVar<NoSchedPred,     [V2Write_4cyc_2M0_2M]>]>;
947
948def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[
949                      SchedVar<NeoversePdIsPg,  [V2Write_5cyc_1V0_1M0]>,
950                      SchedVar<NoSchedPred,     [V2Write_4cyc_1V0_1M0]>]>;
951
952def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[
953                      SchedVar<NeoversePdIsPg,  [V2Write_3cyc_1V0_1M]>,
954                      SchedVar<NoSchedPred,     [V2Write_2cyc_1V0_1M]>]>;
955
956def V2Write_IncDec : SchedWriteVariant<[
957                      SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>,
958                      SchedVar<NoSchedPred,         [V2Write_2cyc_1M]>]>;
959
960//===----------------------------------------------------------------------===//
961// Define forwarded types
962
963// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for
964// consumers of 64 bit multiply high operations?
965def V2Wr_IM   : SchedWriteRes<[V2UnitM]>  { let Latency = 2; }
966def V2Wr_IMA  : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
967def V2Wr_IMUL : SchedWriteVariant<[
968                  SchedVar<IsReg3ZeroPred, [V2Wr_IM]>,
969                  SchedVar<NoSchedPred,    [V2Wr_IMA]>]>;
970def V2Rd_IMA  : SchedReadAdvance<1, [V2Wr_IMA]>;
971
972def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
973def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>;
974
975def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
976def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>;
977
978def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
979def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>;
980
981def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
982def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>;
983
984def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
985def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>;
986
987def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
988def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>;
989
990def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
991def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>;
992
993def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
994def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>;
995
996def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
997def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>;
998
999def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1000def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>;
1001
1002def V2Wr_VFM  : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1003def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1004def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>;
1005
1006def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1007def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>;
1008
1009def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1010def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>;
1011def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1012def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>;
1013def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1014def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>;
1015
1016def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; }
1017def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>;
1018
1019def V2Wr_ZA  : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1020def V2Rd_ZA  : SchedReadAdvance<3, [V2Wr_ZA]>;
1021def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1022def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>;
1023def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; }
1024def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>;
1025
1026def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]>   { let Latency = 3; }
1027def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>;
1028def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1029def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>;
1030
1031// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce
1032// throughput to 1 in case of forwarding?
1033def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1034def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>;
1035def V2Wr_ZCMAD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1036def V2Rd_ZCMAD   : SchedReadAdvance<2, [V2Wr_ZCMAD]>;
1037
1038def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; }
1039def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>;
1040
1041def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; }
1042def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>;
1043def V2Wr_ZMAD  : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1044def V2Rd_ZMAD  : SchedReadAdvance<2, [V2Wr_ZMAD]>;
1045
1046def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; }
1047def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>;
1048
1049def V2Wr_ZMASQL   : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1050def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]>            { let Latency = 4; }
1051def V2Wr_ZMASQD   : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; }
1052def V2Rd_ZMASQ    : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS,
1053                                         V2Wr_ZMASQD]>;
1054
1055def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1056def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>;
1057
1058def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1059def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>;
1060
1061def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; }
1062def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>;
1063
1064def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1065def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>;
1066def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; }
1067def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>;
1068def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; }
1069def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>;
1070
1071//===----------------------------------------------------------------------===//
1072// Define types with long resource cycles (rc)
1073
1074def V2Write_6cyc_1V1_5rc    : SchedWriteRes<[V2UnitV1]>  { let Latency =  6; let ReleaseAtCycles = [ 5]; }
1075def V2Write_7cyc_1V02_7rc   : SchedWriteRes<[V2UnitV02]> { let Latency =  7; let ReleaseAtCycles = [ 7]; }
1076def V2Write_10cyc_1V02_5rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; }
1077def V2Write_10cyc_1V02_9rc  : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1078def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; }
1079def V2Write_10cyc_1V0_9rc   : SchedWriteRes<[V2UnitV0]>  { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1080def V2Write_10cyc_1V1_9rc   : SchedWriteRes<[V2UnitV1]>  { let Latency = 10; let ReleaseAtCycles = [ 9]; }
1081def V2Write_13cyc_1V0_12rc  : SchedWriteRes<[V2UnitV0]>  { let Latency = 13; let ReleaseAtCycles = [12]; }
1082def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; }
1083def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; }
1084def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; }
1085def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; }
1086def V2Write_16cyc_1V0_14rc  : SchedWriteRes<[V2UnitV0]>  { let Latency = 16; let ReleaseAtCycles = [14]; }
1087
1088// Miscellaneous
1089// -----------------------------------------------------------------------------
1090
1091def : InstRW<[WriteI], (instrs COPY)>;
1092
1093// §3.3 Branch instructions
1094// -----------------------------------------------------------------------------
1095
1096// Branch, immed
1097// Compare and branch
1098def : SchedAlias<WriteBr,    V2Write_1cyc_1B>;
1099
1100// Branch, register
1101def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>;
1102
1103// Branch and link, immed
1104// Branch and link, register
1105def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>;
1106
1107// §3.4 Arithmetic and Logical Instructions
1108// -----------------------------------------------------------------------------
1109
1110// ALU, basic
1111// ALU, basic, flagset
1112def : SchedAlias<WriteI, V2Write_1cyc_1I>;
1113def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>;
1114def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>;
1115
1116// ALU, extend and shift
1117def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>;
1118
1119// Arithmetic, LSL shift, shift <= 4
1120// Arithmetic, flagset, LSL shift, shift <= 4
1121// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4
1122def : SchedAlias<WriteISReg, V2Write_ArithI>;
1123def : InstRW<[V2Write_ArithF],
1124             (instregex "^(ADD|SUB)S[WX]rs$")>;
1125
1126// Arithmetic, immediate to logical address tag
1127def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>;
1128
1129// Convert floating-point condition flags
1130// Flag manipulation instructions
1131def : WriteRes<WriteSys, []> { let Latency = 1; }
1132
1133// Insert Random Tags
1134def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>;
1135
1136// Insert Tag Mask
1137// Subtract Pointer
1138// Subtract Pointer, flagset
1139def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>;
1140
1141// Logical, shift, no flagset
1142def : InstRW<[V2Write_1cyc_1I],    (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>;
1143def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>;
1144
1145// Logical, shift, flagset
1146def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>;
1147
1148// Move and shift instructions
1149// -----------------------------------------------------------------------------
1150
1151def : SchedAlias<WriteImm, V2Write_1cyc_1I>;
1152
1153// §3.5 Divide and multiply instructions
1154// -----------------------------------------------------------------------------
1155
1156// SDIV, UDIV
1157def : SchedAlias<WriteID32,  V2Write_12cyc_1M0>;
1158def : SchedAlias<WriteID64,  V2Write_20cyc_1M0>;
1159
1160def : SchedAlias<WriteIM32, V2Write_2cyc_1M>;
1161def : SchedAlias<WriteIM64, V2Write_2cyc_1M>;
1162
1163// Multiply
1164// Multiply accumulate, W-form
1165// Multiply accumulate, X-form
1166def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1167             (instregex "^M(ADD|SUB)[WX]rrr$")>;
1168
1169// Multiply accumulate long
1170// Multiply long
1171def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA],
1172             (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1173
1174// Multiply high
1175def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>;
1176
1177// Pointer Authentication Instructions (v8.3 PAC)
1178// -----------------------------------------------------------------------------
1179
1180// Authenticate data address
1181// Authenticate instruction address
1182// Compute pointer authentication code for data address
1183// Compute pointer authentication code, using generic key
1184// Compute pointer authentication code for instruction address
1185def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>;
1186
1187// Branch and link, register, with pointer authentication
1188// Branch, register, with pointer authentication
1189// Branch, return, with pointer authentication
1190def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA,
1191                                            BRAAZ, BRAB, BRABZ, RETAA, RETAB,
1192                                            ERETAA, ERETAB)>;
1193
1194
1195// Load register, with pointer authentication
1196def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>;
1197
1198// Strip pointer authentication code
1199def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>;
1200
1201// Miscellaneous data-processing instructions
1202// -----------------------------------------------------------------------------
1203
1204// Address generation
1205def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>;
1206
1207// Bitfield extract, one reg
1208// Bitfield extract, two regs
1209def : SchedAlias<WriteExtr, V2Write_Extr>;
1210def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>;
1211
1212// Bitfield move, basic
1213def : SchedAlias<WriteIS, V2Write_1cyc_1I>;
1214
1215// Bitfield move, insert
1216def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>;
1217
1218// Load instructions
1219// -----------------------------------------------------------------------------
1220
1221// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3.
1222
1223def : SchedAlias<WriteLD,    V2Write_4cyc_1L>;
1224def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>;
1225
1226// Load register, literal
1227def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>;
1228
1229// Load pair, signed immed offset, signed words
1230def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>;
1231
1232// Load pair, immed post-index or immed pre-index, signed words
1233def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi],
1234             (instregex "^LDPSW(post|pre)$")>;
1235
1236// Store instructions
1237// -----------------------------------------------------------------------------
1238
1239// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I.
1240
1241def : SchedAlias<WriteST,    V2Write_1cyc_1L01_1D>;
1242def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>;
1243def : SchedAlias<WriteSTP,   V2Write_1cyc_1L01_1D>;
1244def : SchedAlias<WriteAdr,   V2Write_1cyc_1I>;
1245
1246// Tag load instructions
1247// -----------------------------------------------------------------------------
1248
1249// Load allocation tag
1250// Load multiple allocation tags
1251def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>;
1252
1253// Tag store instructions
1254// -----------------------------------------------------------------------------
1255
1256// Store allocation tags to one or two granules, post-index
1257// Store allocation tags to one or two granules, pre-index
1258// Store allocation tag to one or two granules, zeroing, post-index
1259// Store Allocation Tag to one or two granules, zeroing, pre-index
1260// Store allocation tag and reg pair to memory, post-Index
1261// Store allocation tag and reg pair to memory, pre-Index
1262def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex,
1263                                                ST2GPreIndex, ST2GPostIndex,
1264                                                STZGPreIndex, STZGPostIndex,
1265                                                STZ2GPreIndex, STZ2GPostIndex,
1266                                                STGPpre, STGPpost)>;
1267
1268// Store allocation tags to one or two granules, signed offset
1269// Store allocation tag to two granules, zeroing, signed offset
1270// Store allocation tag and reg pair to memory, signed offset
1271// Store multiple allocation tags
1272def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi,
1273                                             STZ2Gi, STGPi, STGM, STZGM)>;
1274
1275// FP data processing instructions
1276// -----------------------------------------------------------------------------
1277
1278// FP absolute value
1279// FP arithmetic
1280// FP min/max
1281// FP negate
1282// FP select
1283def : SchedAlias<WriteF,     V2Write_2cyc_1V>;
1284
1285// FP compare
1286def : SchedAlias<WriteFCmp,  V2Write_2cyc_1V0>;
1287
1288// FP divide, square root
1289def : SchedAlias<WriteFDiv,  V2Write_7cyc_1V02>;
1290
1291// FP divide, H-form
1292def : InstRW<[V2Write_7cyc_1V02],  (instrs FDIVHrr)>;
1293// FP divide, S-form
1294def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>;
1295// FP divide, D-form
1296def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>;
1297
1298// FP square root, H-form
1299def : InstRW<[V2Write_7cyc_1V02],  (instrs FSQRTHr)>;
1300// FP square root, S-form
1301def : InstRW<[V2Write_9cyc_1V02],  (instrs FSQRTSr)>;
1302// FP square root, D-form
1303def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>;
1304
1305// FP multiply
1306def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; }
1307
1308// FP multiply accumulate
1309def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA],
1310             (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>;
1311
1312// FP round to integral
1313def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$",
1314                                             "^FRINT(32|64)[XZ][SD]r$")>;
1315
1316// FP miscellaneous instructions
1317// -----------------------------------------------------------------------------
1318
1319// FP convert, from gen to vec reg
1320def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>;
1321
1322// FP convert, from vec to gen reg
1323def : InstRW<[V2Write_3cyc_1V01],
1324             (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>;
1325
1326// FP convert, Javascript from vec to gen reg
1327def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>;
1328
1329// FP convert, from vec to vec reg
1330def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr,
1331                                          FCVTHDr, FCVTSDr, FCVTXNv1i64)>;
1332
1333// FP move, immed
1334// FP move, register
1335def : SchedAlias<WriteFImm, V2Write_2cyc_1V>;
1336
1337// FP transfer, from gen to low half of vec reg
1338def : InstRW<[V2Write_0or3cyc_1M0],
1339             (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>;
1340
1341// FP transfer, from gen to high half of vec reg
1342def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>;
1343
1344// FP transfer, from vec to gen reg
1345def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>;
1346
1347// FP load instructions
1348// -----------------------------------------------------------------------------
1349
1350// Load vector reg, literal, S/D/Q forms
1351def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>;
1352
1353// Load vector reg, unscaled immed
1354def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>;
1355
1356// Load vector reg, immed post-index
1357// Load vector reg, immed pre-index
1358def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L],
1359             (instregex "^LDR[BHSDQ](pre|post)$")>;
1360
1361// Load vector reg, unsigned immed
1362def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>;
1363
1364// Load vector reg, register offset, basic
1365// Load vector reg, register offset, scale, S/D-form
1366// Load vector reg, register offset, scale, H/Q-form
1367// Load vector reg, register offset, extend
1368// Load vector reg, register offset, extend, scale, S/D-form
1369// Load vector reg, register offset, extend, scale, H/Q-form
1370def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>;
1371
1372// Load vector pair, immed offset, S/D-form
1373def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>;
1374
1375// Load vector pair, immed offset, Q-form
1376def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>;
1377
1378// Load vector pair, immed post-index, S/D-form
1379// Load vector pair, immed pre-index, S/D-form
1380def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi],
1381             (instregex "^LDP[SD](pre|post)$")>;
1382
1383// Load vector pair, immed post-index, Q-form
1384// Load vector pair, immed pre-index, Q-form
1385def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost,
1386                                                                LDPQpre)>;
1387
1388// FP store instructions
1389// -----------------------------------------------------------------------------
1390
1391// Store vector reg, unscaled immed, B/H/S/D-form
1392// Store vector reg, unscaled immed, Q-form
1393def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>;
1394
1395// Store vector reg, immed post-index, B/H/S/D-form
1396// Store vector reg, immed post-index, Q-form
1397// Store vector reg, immed pre-index, B/H/S/D-form
1398// Store vector reg, immed pre-index, Q-form
1399def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1400             (instregex "^STR[BHSDQ](pre|post)$")>;
1401
1402// Store vector reg, unsigned immed, B/H/S/D-form
1403// Store vector reg, unsigned immed, Q-form
1404def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>;
1405
1406// Store vector reg, register offset, basic, B/H/S/D-form
1407// Store vector reg, register offset, basic, Q-form
1408// Store vector reg, register offset, scale, H-form
1409// Store vector reg, register offset, scale, S/D-form
1410// Store vector reg, register offset, scale, Q-form
1411// Store vector reg, register offset, extend, B/H/S/D-form
1412// Store vector reg, register offset, extend, Q-form
1413// Store vector reg, register offset, extend, scale, H-form
1414// Store vector reg, register offset, extend, scale, S/D-form
1415// Store vector reg, register offset, extend, scale, Q-form
1416def : InstRW<[V2Write_StrHQ, ReadAdrBase],
1417             (instregex "^STR[BHSDQ]ro[WX]$")>;
1418
1419// Store vector pair, immed offset, S-form
1420// Store vector pair, immed offset, D-form
1421def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>;
1422
1423// Store vector pair, immed offset, Q-form
1424def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>;
1425
1426// Store vector pair, immed post-index, S-form
1427// Store vector pair, immed post-index, D-form
1428// Store vector pair, immed pre-index, S-form
1429// Store vector pair, immed pre-index, D-form
1430def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I],
1431             (instregex "^STP[SD](pre|post)$")>;
1432
1433// Store vector pair, immed post-index, Q-form
1434def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>;
1435
1436// Store vector pair, immed pre-index, Q-form
1437def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>;
1438
1439// ASIMD integer instructions
1440// -----------------------------------------------------------------------------
1441
1442// ASIMD absolute diff
1443// ASIMD absolute diff long
1444// ASIMD arith, basic
1445// ASIMD arith, complex
1446// ASIMD arith, pair-wise
1447// ASIMD compare
1448// ASIMD logical
1449// ASIMD max/min, basic and pair-wise
1450def : SchedAlias<WriteVd, V2Write_2cyc_1V>;
1451def : SchedAlias<WriteVq, V2Write_2cyc_1V>;
1452
1453// ASIMD absolute diff accum
1454// ASIMD absolute diff accum long
1455def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>;
1456
1457// ASIMD arith, reduce, 4H/4S
1458def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>;
1459
1460// ASIMD arith, reduce, 8B/8H
1461def : InstRW<[V2Write_4cyc_1V13_1V],
1462             (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>;
1463
1464// ASIMD arith, reduce, 16B
1465def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>;
1466
1467// ASIMD dot product
1468// ASIMD dot product using signed and unsigned integers
1469def : InstRW<[V2Wr_VDOT, V2Rd_VDOT],
1470             (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>;
1471
1472// ASIMD matrix multiply-accumulate
1473def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>;
1474
1475// ASIMD max/min, reduce, 4H/4S
1476def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$",
1477                                             "^[SU](MAX|MIN)Vv4i32v$")>;
1478
1479// ASIMD max/min, reduce, 8B/8H
1480def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$",
1481                                                "^[SU](MAX|MIN)Vv8i16v$")>;
1482
1483// ASIMD max/min, reduce, 16B
1484def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>;
1485
1486// ASIMD multiply
1487def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>;
1488
1489// ASIMD multiply accumulate
1490def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>;
1491
1492// ASIMD multiply accumulate high
1493def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>;
1494
1495// ASIMD multiply accumulate long
1496def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>;
1497
1498// ASIMD multiply accumulate saturating long
1499def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>;
1500
1501// ASIMD multiply/multiply long (8x8) polynomial, D-form
1502// ASIMD multiply/multiply long (8x8) polynomial, Q-form
1503def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>;
1504
1505// ASIMD multiply long
1506def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>;
1507
1508// ASIMD pairwise add and accumulate long
1509def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>;
1510
1511// ASIMD shift accumulate
1512def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>;
1513
1514// ASIMD shift by immed, basic
1515def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv",
1516                                             "^SSHLLv", "^SSHR[dv]", "^USHLLv",
1517                                             "^USHR[dv]")>;
1518
1519// ASIMD shift by immed and insert, basic
1520def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>;
1521
1522// ASIMD shift by immed, complex
1523def : InstRW<[V2Write_4cyc_1V13],
1524             (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$",
1525                        "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$",
1526                        "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]",
1527                        "^UQSHRN[bhsv]", "^URSHR[dv]")>;
1528
1529// ASIMD shift by register, basic
1530def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>;
1531
1532// ASIMD shift by register, complex
1533def : InstRW<[V2Write_4cyc_1V13],
1534             (instregex "^[SU]RSHLv", "^[SU]QRSHLv",
1535                        "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>;
1536
1537// ASIMD floating-point instructions
1538// -----------------------------------------------------------------------------
1539
1540// ASIMD FP absolute value/difference
1541// ASIMD FP arith, normal
1542// ASIMD FP compare
1543// ASIMD FP complex add
1544// ASIMD FP max/min, normal
1545// ASIMD FP max/min, pairwise
1546// ASIMD FP negate
1547// Handled by SchedAlias<WriteV[dq], ...>
1548
1549// ASIMD FP complex multiply add
1550def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>;
1551
1552// ASIMD FP convert, long (F16 to F32)
1553def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>;
1554
1555// ASIMD FP convert, long (F32 to F64)
1556def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>;
1557
1558// ASIMD FP convert, narrow (F32 to F16)
1559def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>;
1560
1561// ASIMD FP convert, narrow (F64 to F32)
1562def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32",
1563                                             "^FCVTXN(v2|v4)f32")>;
1564
1565// ASIMD FP convert, other, D-form F32 and Q-form F64
1566def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$",
1567                                             "^FCVT[AMNPZ][SU]v1i64$",
1568                                             "^FCVTZ[SU]d$",
1569                                             "^[SU]CVTFv2f(32|64)$",
1570                                             "^[SU]CVTFv1i64$",
1571                                             "^[SU]CVTFd$")>;
1572
1573// ASIMD FP convert, other, D-form F16 and Q-form F32
1574def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$",
1575                                             "^FCVT[AMNPZ][SU]v1i32$",
1576                                             "^FCVTZ[SU]s$",
1577                                             "^[SU]CVTFv4f(16|32)$",
1578                                             "^[SU]CVTFv1i32$",
1579                                             "^[SU]CVTFs$")>;
1580
1581// ASIMD FP convert, other, Q-form F16
1582def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$",
1583                                             "^FCVT[AMNPZ][SU]v1f16$",
1584                                             "^FCVTZ[SU]h$",
1585                                             "^[SU]CVTFv8f16$",
1586                                             "^[SU]CVTFv1i16$",
1587                                             "^[SU]CVTFh$")>;
1588
1589// ASIMD FP divide, D-form, F16
1590def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>;
1591
1592// ASIMD FP divide, D-form, F32
1593def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>;
1594
1595// ASIMD FP divide, Q-form, F16
1596def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>;
1597
1598// ASIMD FP divide, Q-form, F32
1599def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>;
1600
1601// ASIMD FP divide, Q-form, F64
1602def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>;
1603
1604// ASIMD FP max/min, reduce, F32 and D-form F16
1605def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>;
1606
1607// ASIMD FP max/min, reduce, Q-form F16
1608def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>;
1609
1610// ASIMD FP multiply
1611def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>;
1612
1613// ASIMD FP multiply accumulate
1614def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>;
1615
1616// ASIMD FP multiply accumulate long
1617def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>;
1618
1619// ASIMD FP round, D-form F32 and Q-form F64
1620def : InstRW<[V2Write_3cyc_1V02],
1621             (instregex "^FRINT[AIMNPXZ]v2f(32|64)$",
1622                        "^FRINT(32|64)[XZ]v2f(32|64)$")>;
1623
1624// ASIMD FP round, D-form F16 and Q-form F32
1625def : InstRW<[V2Write_4cyc_2V02],
1626             (instregex "^FRINT[AIMNPXZ]v4f(16|32)$",
1627                        "^FRINT(32|64)[XZ]v4f32$")>;
1628
1629// ASIMD FP round, Q-form F16
1630def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>;
1631
1632// ASIMD FP square root, D-form, F16
1633def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>;
1634
1635// ASIMD FP square root, D-form, F32
1636def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>;
1637
1638// ASIMD FP square root, Q-form, F16
1639def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>;
1640
1641// ASIMD FP square root, Q-form, F32
1642def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>;
1643
1644// ASIMD FP square root, Q-form, F64
1645def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>;
1646
1647// ASIMD BFloat16 (BF16) instructions
1648// -----------------------------------------------------------------------------
1649
1650// ASIMD convert, F32 to BF16
1651def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>;
1652
1653// ASIMD dot product
1654def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>;
1655
1656// ASIMD matrix multiply accumulate
1657def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>;
1658
1659// ASIMD multiply accumulate long
1660def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT,
1661                                                 BFMLALTIdx)>;
1662
1663// Scalar convert, F32 to BF16
1664def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>;
1665
1666// ASIMD miscellaneous instructions
1667// -----------------------------------------------------------------------------
1668
1669// ASIMD bit reverse
1670// ASIMD bitwise insert
1671// ASIMD count
1672// ASIMD duplicate, element
1673// ASIMD extract
1674// ASIMD extract narrow
1675// ASIMD insert, element to element
1676// ASIMD move, FP immed
1677// ASIMD move, integer immed
1678// ASIMD reverse
1679// ASIMD table lookup extension, 1 table reg
1680// ASIMD transpose
1681// ASIMD unzip/zip
1682// Handled by SchedAlias<WriteV[dq], ...>
1683def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>;
1684
1685// ASIMD duplicate, gen reg
1686def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>;
1687
1688// ASIMD extract narrow, saturating
1689def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>;
1690
1691// ASIMD reciprocal and square root estimate, D-form U32
1692def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>;
1693
1694// ASIMD reciprocal and square root estimate, Q-form U32
1695def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>;
1696
1697// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms
1698def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32,
1699                                          FRECPEv1i64, FRECPEv2f32,
1700                                          FRSQRTEv1f16, FRSQRTEv1i32,
1701                                          FRSQRTEv1i64, FRSQRTEv2f32)>;
1702
1703// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32
1704def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32,
1705                                          FRSQRTEv4f16, FRSQRTEv4f32)>;
1706
1707// ASIMD reciprocal and square root estimate, Q-form F16
1708def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>;
1709
1710// ASIMD reciprocal exponent
1711def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>;
1712
1713// ASIMD reciprocal step
1714def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)",
1715                                           "^FRSQRTS(32|64|v)")>;
1716
1717// ASIMD table lookup, 1 or 2 table regs
1718def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One,
1719                                          TBLv8i8Two, TBLv16i8Two)>;
1720
1721// ASIMD table lookup, 3 table regs
1722def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>;
1723
1724// ASIMD table lookup, 4 table regs
1725def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>;
1726
1727// ASIMD table lookup extension, 2 table reg
1728def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>;
1729
1730// ASIMD table lookup extension, 3 table reg
1731def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>;
1732
1733// ASIMD table lookup extension, 4 table reg
1734def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>;
1735
1736// ASIMD transfer, element to gen reg
1737def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>;
1738
1739// ASIMD transfer, gen reg to element
1740def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>;
1741
1742// ASIMD load instructions
1743// -----------------------------------------------------------------------------
1744
1745// ASIMD load, 1 element, multiple, 1 reg, D-form
1746def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>;
1747def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1748             (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>;
1749
1750// ASIMD load, 1 element, multiple, 1 reg, Q-form
1751def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>;
1752def : InstRW<[WriteAdr, V2Write_6cyc_1L],
1753             (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>;
1754
1755// ASIMD load, 1 element, multiple, 2 reg, D-form
1756def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1757def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1758             (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1759
1760// ASIMD load, 1 element, multiple, 2 reg, Q-form
1761def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1762def : InstRW<[WriteAdr, V2Write_6cyc_2L],
1763             (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1764
1765// ASIMD load, 1 element, multiple, 3 reg, D-form
1766def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1767def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1768             (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1769
1770// ASIMD load, 1 element, multiple, 3 reg, Q-form
1771def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1772def : InstRW<[WriteAdr, V2Write_6cyc_3L],
1773             (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1774
1775// ASIMD load, 1 element, multiple, 4 reg, D-form
1776def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1777def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1778             (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1779
1780// ASIMD load, 1 element, multiple, 4 reg, Q-form
1781def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1782def : InstRW<[WriteAdr, V2Write_7cyc_4L],
1783             (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1784
1785// ASIMD load, 1 element, one lane, B/H/S
1786// ASIMD load, 1 element, one lane, D
1787def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1i(8|16|32|64)$")>;
1788def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>;
1789
1790// ASIMD load, 1 element, all lanes, D-form, B/H/S
1791// ASIMD load, 1 element, all lanes, D-form, D
1792def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(8b|4h|2s|1d)$")>;
1793def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>;
1794
1795// ASIMD load, 1 element, all lanes, Q-form
1796def : InstRW<[V2Write_8cyc_1L_1V],           (instregex "LD1Rv(16b|8h|4s|2d)$")>;
1797def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>;
1798
1799// ASIMD load, 2 element, multiple, D-form, B/H/S
1800def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Twov(8b|4h|2s)$")>;
1801def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>;
1802
1803// ASIMD load, 2 element, multiple, Q-form, B/H/S
1804// ASIMD load, 2 element, multiple, Q-form, D
1805def : InstRW<[V2Write_8cyc_2L_2V],           (instregex "LD2Twov(16b|8h|4s|2d)$")>;
1806def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>;
1807
1808// ASIMD load, 2 element, one lane, B/H
1809// ASIMD load, 2 element, one lane, S
1810// ASIMD load, 2 element, one lane, D
1811def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2i(8|16|32|64)$")>;
1812def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>;
1813
1814// ASIMD load, 2 element, all lanes, D-form, B/H/S
1815// ASIMD load, 2 element, all lanes, D-form, D
1816def : InstRW<[V2Write_8cyc_1L_2V],            (instregex "LD2Rv(8b|4h|2s|1d)$")>;
1817def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V],  (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>;
1818
1819// ASIMD load, 2 element, all lanes, Q-form
1820def : InstRW<[V2Write_8cyc_1L_2V],           (instregex "LD2Rv(16b|8h|4s|2d)$")>;
1821def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>;
1822
1823// ASIMD load, 3 element, multiple, D-form, B/H/S
1824def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Threev(8b|4h|2s)$")>;
1825def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>;
1826
1827// ASIMD load, 3 element, multiple, Q-form, B/H/S
1828// ASIMD load, 3 element, multiple, Q-form, D
1829def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Threev(16b|8h|4s|2d)$")>;
1830def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
1831
1832// ASIMD load, 3 element, one lane, B/H
1833// ASIMD load, 3 element, one lane, S
1834// ASIMD load, 3 element, one lane, D
1835def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3i(8|16|32|64)$")>;
1836def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>;
1837
1838// ASIMD load, 3 element, all lanes, D-form, B/H/S
1839// ASIMD load, 3 element, all lanes, D-form, D
1840def : InstRW<[V2Write_8cyc_2L_3V],           (instregex "LD3Rv(8b|4h|2s|1d)$")>;
1841def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>;
1842
1843// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1844// ASIMD load, 3 element, all lanes, Q-form, D
1845def : InstRW<[V2Write_8cyc_3L_3V],           (instregex "LD3Rv(16b|8h|4s|2d)$")>;
1846def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>;
1847
1848// ASIMD load, 4 element, multiple, D-form, B/H/S
1849def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Fourv(8b|4h|2s)$")>;
1850def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>;
1851
1852// ASIMD load, 4 element, multiple, Q-form, B/H/S
1853// ASIMD load, 4 element, multiple, Q-form, D
1854def : InstRW<[V2Write_9cyc_6L_4V],           (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
1855def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
1856
1857// ASIMD load, 4 element, one lane, B/H
1858// ASIMD load, 4 element, one lane, S
1859// ASIMD load, 4 element, one lane, D
1860def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4i(8|16|32|64)$")>;
1861def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>;
1862
1863// ASIMD load, 4 element, all lanes, D-form, B/H/S
1864// ASIMD load, 4 element, all lanes, D-form, D
1865def : InstRW<[V2Write_8cyc_3L_4V],           (instregex "LD4Rv(8b|4h|2s|1d)$")>;
1866def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>;
1867
1868// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1869// ASIMD load, 4 element, all lanes, Q-form, D
1870def : InstRW<[V2Write_8cyc_4L_4V],           (instregex "LD4Rv(16b|8h|4s|2d)$")>;
1871def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>;
1872
1873// ASIMD store instructions
1874// -----------------------------------------------------------------------------
1875
1876// ASIMD store, 1 element, multiple, 1 reg, D-form
1877def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(8b|4h|2s|1d)$")>;
1878def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>;
1879
1880// ASIMD store, 1 element, multiple, 1 reg, Q-form
1881def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Onev(16b|8h|4s|2d)$")>;
1882def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>;
1883
1884// ASIMD store, 1 element, multiple, 2 reg, D-form
1885def : InstRW<[V2Write_2cyc_1L01_1V01],           (instregex "ST1Twov(8b|4h|2s|1d)$")>;
1886def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>;
1887
1888// ASIMD store, 1 element, multiple, 2 reg, Q-form
1889def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Twov(16b|8h|4s|2d)$")>;
1890def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>;
1891
1892// ASIMD store, 1 element, multiple, 3 reg, D-form
1893def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Threev(8b|4h|2s|1d)$")>;
1894def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>;
1895
1896// ASIMD store, 1 element, multiple, 3 reg, Q-form
1897def : InstRW<[V2Write_2cyc_3L01_3V01],           (instregex "ST1Threev(16b|8h|4s|2d)$")>;
1898def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>;
1899
1900// ASIMD store, 1 element, multiple, 4 reg, D-form
1901def : InstRW<[V2Write_2cyc_2L01_2V01],           (instregex "ST1Fourv(8b|4h|2s|1d)$")>;
1902def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>;
1903
1904// ASIMD store, 1 element, multiple, 4 reg, Q-form
1905def : InstRW<[V2Write_2cyc_4L01_4V01],           (instregex "ST1Fourv(16b|8h|4s|2d)$")>;
1906def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>;
1907
1908// ASIMD store, 1 element, one lane, B/H/S
1909// ASIMD store, 1 element, one lane, D
1910def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST1i(8|16|32|64)$")>;
1911def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>;
1912
1913// ASIMD store, 2 element, multiple, D-form, B/H/S
1914def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2Twov(8b|4h|2s)$")>;
1915def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
1916
1917// ASIMD store, 2 element, multiple, Q-form, B/H/S
1918// ASIMD store, 2 element, multiple, Q-form, D
1919def : InstRW<[V2Write_4cyc_2L01_4V01],           (instregex "ST2Twov(16b|8h|4s|2d)$")>;
1920def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
1921
1922// ASIMD store, 2 element, one lane, B/H/S
1923// ASIMD store, 2 element, one lane, D
1924def : InstRW<[V2Write_4cyc_1L01_2V01],           (instregex "ST2i(8|16|32|64)$")>;
1925def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>;
1926
1927// ASIMD store, 3 element, multiple, D-form, B/H/S
1928def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3Threev(8b|4h|2s)$")>;
1929def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>;
1930
1931// ASIMD store, 3 element, multiple, Q-form, B/H/S
1932// ASIMD store, 3 element, multiple, Q-form, D
1933def : InstRW<[V2Write_6cyc_3L01_6V01],           (instregex "ST3Threev(16b|8h|4s|2d)$")>;
1934def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>;
1935
1936// ASIMD store, 3 element, one lane, B/H
1937// ASIMD store, 3 element, one lane, S
1938// ASIMD store, 3 element, one lane, D
1939def : InstRW<[V2Write_5cyc_2L01_4V01],           (instregex "ST3i(8|16|32|64)$")>;
1940def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>;
1941
1942// ASIMD store, 4 element, multiple, D-form, B/H/S
1943def : InstRW<[V2Write_6cyc_2L01_6V01],           (instregex "ST4Fourv(8b|4h|2s)$")>;
1944def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>;
1945
1946// ASIMD store, 4 element, multiple, Q-form, B/H/S
1947def : InstRW<[V2Write_7cyc_4L01_12V01],           (instregex "ST4Fourv(16b|8h|4s)$")>;
1948def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>;
1949
1950// ASIMD store, 4 element, multiple, Q-form, D
1951def : InstRW<[V2Write_5cyc_4L01_8V01],           (instregex "ST4Fourv(2d)$")>;
1952def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>;
1953
1954// ASIMD store, 4 element, one lane, B/H/S
1955def : InstRW<[V2Write_6cyc_1L01_3V01],           (instregex "ST4i(8|16|32)$")>;
1956def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>;
1957
1958// ASIMD store, 4 element, one lane, D
1959def : InstRW<[V2Write_4cyc_2L01_4V01],            (instregex "ST4i(64)$")>;
1960def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01],  (instregex "ST4i(64)_POST$")>;
1961
1962// Cryptography extensions
1963// -----------------------------------------------------------------------------
1964
1965// Crypto AES ops
1966def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>;
1967
1968// Crypto polynomial (64x64) multiply long
1969def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>;
1970
1971// Crypto SHA1 hash acceleration op
1972// Crypto SHA1 schedule acceleration ops
1973def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>;
1974
1975// Crypto SHA1 hash acceleration ops
1976// Crypto SHA256 hash acceleration ops
1977def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>;
1978
1979// Crypto SHA256 schedule acceleration ops
1980def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>;
1981
1982// Crypto SHA512 hash acceleration ops
1983def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>;
1984
1985// Crypto SHA3 ops
1986def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>;
1987
1988// Crypto SM3 ops
1989def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$",
1990                                            "^SM3TT[12][AB]$")>;
1991
1992// Crypto SM4 ops
1993def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>;
1994
1995// CRC
1996// -----------------------------------------------------------------------------
1997
1998def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>;
1999
2000// SVE Predicate instructions
2001// -----------------------------------------------------------------------------
2002
2003// Loop control, based on predicate
2004def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP,
2005                                           BRKB_PPmP, BRKB_PPzP)>;
2006
2007// Loop control, based on predicate and flag setting
2008def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>;
2009
2010// Loop control, propagating
2011def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP,
2012                                            BRKPB_PPzPP)>;
2013
2014// Loop control, propagating and flag setting
2015def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP,
2016                                               BRKPBS_PPzPP)>;
2017
2018// Loop control, based on GPR
2019def : InstRW<[V2Write_3cyc_2M],
2020             (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>;
2021def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>;
2022
2023// Loop terminate
2024def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>;
2025
2026// Predicate counting scalar
2027def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>;
2028def : InstRW<[V2Write_2cyc_1M],
2029             (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI",
2030                        "^SQ(DEC|INC)[BHWD]_XPiWdI",
2031                        "^UQ(DEC|INC)[BHWD]_WPiI")>;
2032
2033// Predicate counting scalar, ALL, {1,2,4}
2034def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>;
2035
2036// Predicate counting scalar, active predicate
2037def : InstRW<[V2Write_2cyc_1M],
2038             (instregex "^CNTP_XPP_[BHSD]",
2039                        "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]",
2040                        "^(UQDEC|UQINC)P_WP_[BHSD]",
2041                        "^(SQDEC|SQINC)P_XPWd_[BHSD]")>;
2042
2043// Predicate counting vector, active predicate
2044def : InstRW<[V2Write_7cyc_1M_1M0_1V],
2045             (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>;
2046
2047// Predicate logical
2048def : InstRW<[V2Write_1or2cyc_1M0],
2049             (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>;
2050
2051// Predicate logical, flag setting
2052def : InstRW<[V2Write_1or2cyc_1M0_1M],
2053             (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>;
2054
2055// Predicate reverse
2056def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>;
2057
2058// Predicate select
2059def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>;
2060
2061// Predicate set
2062def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>;
2063
2064// Predicate set/initialize, set flags
2065def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>;
2066
2067// Predicate find first/next
2068def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>;
2069
2070// Predicate test
2071def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>;
2072
2073// Predicate transpose
2074def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>;
2075
2076// Predicate unpack and widen
2077def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>;
2078
2079// Predicate zip/unzip
2080def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>;
2081
2082// SVE integer instructions
2083// -----------------------------------------------------------------------------
2084
2085// Arithmetic, absolute diff
2086def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]",
2087                                           "^[SU]ABD_ZPZZ_[BHSD]")>;
2088
2089// Arithmetic, absolute diff accum
2090def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>;
2091
2092// Arithmetic, absolute diff accum long
2093def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>;
2094
2095// Arithmetic, absolute diff long
2096def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>;
2097
2098// Arithmetic, basic
2099def : InstRW<[V2Write_2cyc_1V],
2100             (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2101                        "^(ADD|SUB)_ZZZ_[BHSD]",
2102                        "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]",
2103                        "^(ADD|SUB|SUBR)_ZI_[BHSD]",
2104                        "^ADR_[SU]XTW_ZZZ_D_[0123]",
2105                        "^ADR_LSL_ZZZ_[SD]_[0123]",
2106                        "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]",
2107                        "^SADDLBT_ZZZ_[HSD]",
2108                        "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]",
2109                        "^SSUBL(BT|TB)_ZZZ_[HSD]")>;
2110
2111// Arithmetic, complex
2112def : InstRW<[V2Write_2cyc_1V],
2113             (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]",
2114                        "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]",
2115                        "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]",
2116                        "^[SU]Q(ADD|SUB)_ZI_[BHSD]",
2117                        "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]",
2118                        "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>;
2119
2120// Arithmetic, large integer
2121def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>;
2122
2123// Arithmetic, pairwise add
2124def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>;
2125
2126// Arithmetic, pairwise add and accum long
2127def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA],
2128             (instregex "^[SU]ADALP_ZPmZ_[HSD]")>;
2129
2130// Arithmetic, shift
2131def : InstRW<[V2Write_2cyc_1V13],
2132             (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]",
2133                        "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]",
2134                        "^(ASR|LSL|LSR)_ZPmI_[BHSD]",
2135                        "^(ASR|LSL|LSR)_ZPmZ_[BHSD]",
2136                        "^(ASR|LSL|LSR)_ZZI_[BHSD]",
2137                        "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]",
2138                        "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>;
2139
2140// Arithmetic, shift and accumulate
2141def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>;
2142
2143// Arithmetic, shift by immediate
2144def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]",
2145                                             "^[SU]SHLL[BT]_ZZI_[HSD]")>;
2146
2147// Arithmetic, shift by immediate and insert
2148def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>;
2149
2150// Arithmetic, shift complex
2151def : InstRW<[V2Write_4cyc_1V13],
2152             (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]",
2153                        "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]",
2154                        "^[SU]QR?SHL_ZPZZ_[BHSD]",
2155                        "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]",
2156                        "^SQSHRU?N[BT]_ZZI_[BHS]",
2157                        "^UQR?SHRN[BT]_ZZI_[BHS]")>;
2158
2159// Arithmetic, shift right for divide
2160def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>;
2161
2162// Arithmetic, shift rounding
2163def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]",
2164                                             "^[SU]RSHL_ZPZZ_[BHSD]",
2165                                             "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>;
2166
2167// Bit manipulation
2168def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>;
2169
2170// Bitwise select
2171def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>;
2172
2173// Count/reverse bits
2174def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>;
2175
2176// Broadcast logical bitmask immediate to vector
2177def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>;
2178
2179// Compare and set flags
2180def : InstRW<[V2Write_4or5cyc_1V0_1M0],
2181             (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]",
2182                        "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>;
2183
2184// Complex add
2185def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>;
2186
2187// Complex dot product 8-bit element
2188def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>;
2189
2190// Complex dot product 16-bit element
2191def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>;
2192
2193// Complex multiply-add B, H, S element size
2194def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]",
2195                                                      "^CMLA_ZZZI_[HS]")>;
2196
2197// Complex multiply-add D element size
2198def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>;
2199
2200// Conditional extract operations, scalar form
2201def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>;
2202
2203// Conditional extract operations, SIMD&FP scalar and vector forms
2204def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]",
2205                                            "^COMPACT_ZPZ_[SD]",
2206                                            "^SPLICE_ZPZZ?_[BHSD]")>;
2207
2208// Convert to floating point, 64b to float or convert to double
2209def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]",
2210                                             "^[SU]CVTF_ZPmZ_StoD")>;
2211
2212// Convert to floating point, 32b to single or half
2213def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>;
2214
2215// Convert to floating point, 16b to half
2216def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>;
2217
2218// Copy, scalar
2219def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>;
2220
2221// Copy, scalar SIMD&FP or imm
2222def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]",
2223                                           "^CPY_ZPzI_[BHSD]")>;
2224
2225// Divides, 32 bit
2226def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S",
2227                                             "^[SU]DIV_ZPZZ_S")>;
2228
2229// Divides, 64 bit
2230def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D",
2231                                             "^[SU]DIV_ZPZZ_D")>;
2232
2233// Dot product, 8 bit
2234def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>;
2235
2236// Dot product, 8 bit, using signed and unsigned integers
2237def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>;
2238
2239// Dot product, 16 bit
2240def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>;
2241
2242// Duplicate, immediate and indexed form
2243def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]",
2244                                           "^DUP_ZZI_[BHSDQ]")>;
2245
2246// Duplicate, scalar form
2247def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>;
2248
2249// Extend, sign or zero
2250def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]",
2251                                             "^[SU]XTH_ZPmZ_[SD]",
2252                                             "^[SU]XTW_ZPmZ_[D]")>;
2253
2254// Extract
2255def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>;
2256
2257// Extract narrow saturating
2258def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]",
2259                                             "^SQXTUN[BT]_ZZ_[BHS]")>;
2260
2261// Extract/insert operation, SIMD and FP scalar form
2262def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]",
2263                                            "^INSR_ZV_[BHSD]")>;
2264
2265// Extract/insert operation, scalar
2266def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]",
2267                                                "^INSR_ZR_[BHSD]")>;
2268
2269// Histogram operations
2270def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]",
2271                                           "^HISTSEG_ZZZ")>;
2272
2273// Horizontal operations, B, H, S form, immediate operands only
2274def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>;
2275
2276// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar
2277// operands only / immediate, scalar operands
2278def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>;
2279
2280// Horizontal operations, D form, immediate operands only
2281def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>;
2282
2283// Horizontal operations, D form, scalar, immediate operands)/ scalar operands
2284// only / immediate, scalar operands
2285def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>;
2286
2287// Logical
2288def : InstRW<[V2Write_2cyc_1V],
2289             (instregex "^(AND|EOR|ORR)_ZI",
2290                        "^(AND|BIC|EOR|ORR)_ZZZ",
2291                        "^EOR(BT|TB)_ZZZ_[BHSD]",
2292                        "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]",
2293                        "^NOT_ZPmZ_[BHSD]")>;
2294
2295// Max/min, basic and pairwise
2296def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]",
2297                                           "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]",
2298                                           "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>;
2299
2300// Matching operations
2301// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the
2302// latency for this instruction is 4 cycles.
2303def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>;
2304
2305// Matrix multiply-accumulate
2306def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>;
2307
2308// Move prefix
2309def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]",
2310                                           "^MOVPRFX_ZZ")>;
2311
2312// Multiply, B, H, S element size
2313def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]",
2314                                             "^MUL_ZPZZ_[BHS]",
2315                                             "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]",
2316                                             "^[SU]MULH_ZPZZ_[BHS]")>;
2317
2318// Multiply, D element size
2319def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D",
2320                                             "^MUL_ZPZZ_D",
2321                                             "^[SU]MULH_(ZPmZ|ZZZ)_D",
2322                                             "^[SU]MULH_ZPZZ_D")>;
2323
2324// Multiply long
2325def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]",
2326                                             "^[SU]MULL[BT]_ZZZ_[HSD]")>;
2327
2328// Multiply accumulate, B, H, S element size
2329def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS],
2330             (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>;
2331def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS],
2332             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>;
2333
2334// Multiply accumulate, D element size
2335def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD],
2336             (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>;
2337def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD],
2338             (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>;
2339
2340// Multiply accumulate long
2341def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]",
2342                                                "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>;
2343
2344// Multiply accumulate saturating doubling long regular
2345def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ],
2346             (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]",
2347                        "^SQDML[AS]L[BT]_ZZZI_[SD]")>;
2348
2349// Multiply saturating doubling high, B, H, S element size
2350def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]",
2351                                             "^SQDMULH_ZZZI_[HS]")>;
2352
2353// Multiply saturating doubling high, D element size
2354def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>;
2355
2356// Multiply saturating doubling long
2357def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]",
2358                                             "^SQDMULL[BT]_ZZZI_[SD]")>;
2359
2360// Multiply saturating rounding doubling regular/complex accumulate, B, H, S
2361// element size
2362def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]",
2363                                                     "^SQRDCMLAH_ZZZ_[BHS]",
2364                                                     "^SQRDML[AS]H_ZZZI_[HS]",
2365                                                     "^SQRDCMLAH_ZZZI_[HS]")>;
2366
2367// Multiply saturating rounding doubling regular/complex accumulate, D element
2368// size
2369def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D",
2370                                                   "^SQRDCMLAH_ZZZ_D")>;
2371
2372// Multiply saturating rounding doubling regular/complex, B, H, S element size
2373def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]",
2374                                             "^SQRDMULH_ZZZI_[HS]")>;
2375
2376// Multiply saturating rounding doubling regular/complex, D element size
2377def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>;
2378
2379// Multiply/multiply long, (8x8) polynomial
2380def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B",
2381                                             "^PMULL[BT]_ZZZ_[HDQ]")>;
2382
2383// Predicate counting vector
2384def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>;
2385
2386// Reciprocal estimate
2387def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>;
2388
2389// Reduction, arithmetic, B form
2390def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>;
2391
2392// Reduction, arithmetic, H form
2393def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>;
2394
2395// Reduction, arithmetic, S form
2396def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>;
2397
2398// Reduction, arithmetic, D form
2399def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>;
2400
2401// Reduction, logical
2402def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>;
2403
2404// Reverse, vector
2405def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]",
2406                                           "^REVB_ZPmZ_[HSD]",
2407                                           "^REVH_ZPmZ_[SD]",
2408                                           "^REVW_ZPmZ_D")>;
2409
2410// Select, vector form
2411def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>;
2412
2413// Table lookup
2414def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>;
2415
2416// Table lookup extension
2417def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>;
2418
2419// Transpose, vector form
2420def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>;
2421
2422// Unpack and extend
2423def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>;
2424
2425// Zip/unzip
2426def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>;
2427
2428// SVE floating-point instructions
2429// -----------------------------------------------------------------------------
2430
2431// Floating point absolute value/difference
2432def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]",
2433                                           "^FABD_ZPZZ_[HSD]",
2434                                           "^FABS_ZPmZ_[HSD]")>;
2435
2436// Floating point arithmetic
2437def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]",
2438                                           "^F(ADD|SUB)_ZPZ[IZ]_[HSD]",
2439                                           "^FADDP_ZPmZZ_[HSD]",
2440                                           "^FNEG_ZPmZ_[HSD]",
2441                                           "^FSUBR_ZPm[IZ]_[HSD]",
2442                                           "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>;
2443
2444// Floating point associative add, F16
2445def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>;
2446
2447// Floating point associative add, F32
2448def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>;
2449
2450// Floating point associative add, F64
2451def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>;
2452
2453// Floating point compare
2454def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]",
2455                                            "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]",
2456                                            "^FCM(LE|LT)_PPzZ0_[HSD]",
2457                                            "^FCMUO_PPzZZ_[HSD]")>;
2458
2459// Floating point complex add
2460def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>;
2461
2462// Floating point complex multiply add
2463def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>;
2464def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA],              (instregex "^FCMLA_ZZZI_[HS]")>;
2465
2466// Floating point convert, long or narrow (F16 to F32 or F32 to F16)
2467def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)",
2468                                             "^FCVTLT_ZPmZ_HtoS",
2469                                             "^FCVTNT_ZPmZ_StoH")>;
2470
2471// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32
2472// or F64 to F16)
2473def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)",
2474                                             "^FCVTLT_ZPmZ_StoD",
2475                                             "^FCVTNT_ZPmZ_DtoS")>;
2476
2477// Floating point convert, round to odd
2478def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>;
2479
2480// Floating point base2 log, F16
2481def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>;
2482
2483// Floating point base2 log, F32
2484def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>;
2485
2486// Floating point base2 log, F64
2487def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>;
2488
2489// Floating point convert to integer, F16
2490def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>;
2491
2492// Floating point convert to integer, F32
2493def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>;
2494
2495// Floating point convert to integer, F64
2496def : InstRW<[V2Write_3cyc_1V02],
2497             (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>;
2498
2499// Floating point copy
2500def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]",
2501                                           "^FDUP_ZI_[HSD]")>;
2502
2503// Floating point divide, F16
2504def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>;
2505
2506// Floating point divide, F32
2507def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>;
2508
2509// Floating point divide, F64
2510def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>;
2511
2512// Floating point min/max pairwise
2513def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>;
2514
2515// Floating point min/max
2516def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]",
2517                                           "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>;
2518
2519// Floating point multiply
2520def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]",
2521                                           "^FMULX_ZPZZ_[HSD]",
2522                                           "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]",
2523                                           "^FMUL_ZPZ[IZ]_[HSD]")>;
2524
2525// Floating point multiply accumulate
2526def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA],
2527             (instregex "^FN?ML[AS]_ZPmZZ_[HSD]",
2528                        "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>;
2529def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA],
2530             (instregex "^FML[AS]_ZZZI_[HSD]",
2531                        "^FN?ML[AS]_ZPZZZ_[HSD]")>;
2532
2533// Floating point multiply add/sub accumulate long
2534def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>;
2535
2536// Floating point reciprocal estimate, F16
2537def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>;
2538
2539// Floating point reciprocal estimate, F32
2540def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>;
2541
2542// Floating point reciprocal estimate, F64
2543def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>;
2544
2545// Floating point reciprocal step
2546def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>;
2547
2548// Floating point reduction, F16
2549def : InstRW<[V2Write_8cyc_4V],
2550             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>;
2551
2552// Floating point reduction, F32
2553def : InstRW<[V2Write_6cyc_3V],
2554             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>;
2555
2556// Floating point reduction, F64
2557def : InstRW<[V2Write_4cyc_2V],
2558             (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>;
2559
2560// Floating point round to integral, F16
2561def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>;
2562
2563// Floating point round to integral, F32
2564def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>;
2565
2566// Floating point round to integral, F64
2567def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>;
2568
2569// Floating point square root, F16
2570def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>;
2571
2572// Floating point square root, F32
2573def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>;
2574
2575// Floating point square root, F64
2576def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>;
2577
2578// Floating point trigonometric exponentiation
2579def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>;
2580
2581// Floating point trigonometric multiply add
2582def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>;
2583
2584// Floating point trigonometric, miscellaneous
2585def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>;
2586
2587// SVE BFloat16 (BF16) instructions
2588// -----------------------------------------------------------------------------
2589
2590// Convert, F32 to BF16
2591def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>;
2592
2593// Dot product
2594def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>;
2595
2596// Matrix multiply accumulate
2597def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>;
2598
2599// Multiply accumulate long
2600def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>;
2601
2602// SVE Load instructions
2603// -----------------------------------------------------------------------------
2604
2605// Load vector
2606def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>;
2607
2608// Load predicate
2609def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>;
2610
2611// Contiguous load, scalar + imm
2612def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$",
2613                                           "^LD1S?B_[HSD]_IMM$",
2614                                           "^LD1S?H_[SD]_IMM$",
2615                                           "^LD1S?W_D_IMM$" )>;
2616// Contiguous load, scalar + scalar
2617def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$",
2618                                           "^LD1S?B_[HSD]$",
2619                                           "^LD1S?H_[SD]$",
2620                                           "^LD1S?W_D$" )>;
2621
2622// Contiguous load broadcast, scalar + imm
2623def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$",
2624                                           "^LD1RS?B_[HSD]_IMM$",
2625                                           "^LD1RS?H_[SD]_IMM$",
2626                                           "^LD1RW_D_IMM$",
2627                                           "^LD1RSW_IMM$",
2628                                           "^LD1RQ_[BHWD]_IMM$")>;
2629
2630// Contiguous load broadcast, scalar + scalar
2631def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>;
2632
2633// Non temporal load, scalar + imm
2634// Non temporal load, scalar + scalar
2635def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>;
2636
2637// Non temporal gather load, vector + scalar 32-bit element size
2638def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$",
2639                                              "^LDNT1S[BH]_ZZR_S_REAL$")>;
2640
2641// Non temporal gather load, vector + scalar 64-bit element size
2642def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>;
2643def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>;
2644
2645// Contiguous first faulting load, scalar + scalar
2646def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$",
2647                                              "^LDFF1S?B_[HSD]_REAL$",
2648                                              "^LDFF1S?H_[SD]_REAL$",
2649                                              "^LDFF1S?W_D_REAL$")>;
2650
2651// Contiguous non faulting load, scalar + imm
2652def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$",
2653                                           "^LDNF1S?B_[HSD]_IMM_REAL$",
2654                                           "^LDNF1S?H_[SD]_IMM_REAL$",
2655                                           "^LDNF1S?W_D_IMM_REAL$")>;
2656
2657// Contiguous Load two structures to two vectors, scalar + imm
2658def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>;
2659
2660// Contiguous Load two structures to two vectors, scalar + scalar
2661def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>;
2662
2663// Contiguous Load three structures to three vectors, scalar + imm
2664def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>;
2665
2666// Contiguous Load three structures to three vectors, scalar + scalar
2667def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>;
2668
2669// Contiguous Load four structures to four vectors, scalar + imm
2670def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>;
2671
2672// Contiguous Load four structures to four vectors, scalar + scalar
2673def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>;
2674
2675// Gather load, vector + imm, 32-bit element size
2676def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$",
2677                                              "^GLD(FF)?1W_IMM_REAL$")>;
2678
2679// Gather load, vector + imm, 64-bit element size
2680def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$",
2681                                              "^GLD(FF)?1D_IMM_REAL$")>;
2682
2683// Gather load, 32-bit scaled offset
2684def : InstRW<[V2Write_10cyc_1L_8V],
2685             (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$",
2686                        "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>;
2687
2688// Gather load, 64-bit scaled offset
2689// NOTE: These instructions are not specified in the SOG.
2690def : InstRW<[V2Write_10cyc_1L_4V],
2691             (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$",
2692                        "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>;
2693
2694// Gather load, 32-bit unpacked unscaled offset
2695def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$",
2696                                              "^GLD(FF)?1W_[SU]XTW_REAL$")>;
2697
2698// Gather load, 64-bit unpacked unscaled offset
2699// NOTE: These instructions are not specified in the SOG.
2700def : InstRW<[V2Write_9cyc_1L_2V],
2701             (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$",
2702                        "^GLD(FF)?1D_([SU]XTW_)?REAL$")>;
2703
2704// SVE Store instructions
2705// -----------------------------------------------------------------------------
2706
2707// Store from predicate reg
2708def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>;
2709
2710// Store from vector reg
2711def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>;
2712
2713// Contiguous store, scalar + imm
2714def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$",
2715                                                  "^ST1B_[HSD]_IMM$",
2716                                                  "^ST1H_[SD]_IMM$",
2717                                                  "^ST1W_D_IMM$")>;
2718
2719// Contiguous store, scalar + scalar
2720def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>;
2721def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$",
2722                                                  "^ST1B_[HSD]$",
2723                                                  "^ST1W_D$")>;
2724
2725// Contiguous store two structures from two vectors, scalar + imm
2726def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>;
2727
2728// Contiguous store two structures from two vectors, scalar + scalar
2729def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>;
2730def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>;
2731
2732// Contiguous store three structures from three vectors, scalar + imm
2733def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>;
2734
2735// Contiguous store three structures from three vectors, scalar + scalar
2736def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>;
2737
2738// Contiguous store four structures from four vectors, scalar + imm
2739def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>;
2740
2741// Contiguous store four structures from four vectors, scalar + scalar
2742def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>;
2743
2744// Non temporal store, scalar + imm
2745def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>;
2746
2747// Non temporal store, scalar + scalar
2748def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>;
2749def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>;
2750
2751// Scatter non temporal store, vector + scalar 32-bit element size
2752def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>;
2753
2754// Scatter non temporal store, vector + scalar 64-bit element size
2755def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>;
2756
2757// Scatter store vector + imm 32-bit element size
2758def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$",
2759                                                  "^SST1W_IMM$")>;
2760
2761// Scatter store vector + imm 64-bit element size
2762def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$",
2763                                                  "^SST1D_IMM$")>;
2764
2765// Scatter store, 32-bit scaled offset
2766def : InstRW<[V2Write_4cyc_4L01_4V01],
2767             (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>;
2768
2769// Scatter store, 32-bit unpacked unscaled offset
2770def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$",
2771                                                  "^SST1D_[SU]XTW$")>;
2772
2773// Scatter store, 32-bit unpacked scaled offset
2774def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$",
2775                                                  "^SST1D_[SU]XTW_SCALED$")>;
2776
2777// Scatter store, 32-bit unscaled offset
2778def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$",
2779                                                  "^SST1W_[SU]XTW$")>;
2780
2781// Scatter store, 64-bit scaled offset
2782def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$",
2783                                                  "^SST1D_SCALED$")>;
2784
2785// Scatter store, 64-bit unscaled offset
2786def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$",
2787                                                  "^SST1D$")>;
2788
2789// SVE Miscellaneous instructions
2790// -----------------------------------------------------------------------------
2791
2792// Read first fault register, unpredicated
2793def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>;
2794
2795// Read first fault register, predicated
2796def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>;
2797
2798// Read first fault register and set flags
2799def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>;
2800
2801// Set first fault register
2802// Write to first fault register
2803def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>;
2804
2805// Prefetch
2806// NOTE: This is not specified in the SOG.
2807def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>;
2808
2809// SVE Cryptographic instructions
2810// -----------------------------------------------------------------------------
2811
2812// Crypto AES ops
2813def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$",
2814                                           "^AESI?MC_ZZ_B$")>;
2815
2816// Crypto SHA3 ops
2817def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$",
2818                                            "^RAX1_ZZZ_D$",
2819                                            "^XAR_ZZZI_[BHSD]$")>;
2820
2821// Crypto SM4 ops
2822def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>;
2823
2824}
2825