xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for the Ampere Computing Ampere-1 to
10// support instruction scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14// The Ampere-1 core is an out-of-order micro-architecture.  The front
15// end has branch prediction, with a 10-cycle recovery time from a
16// mispredicted branch.  Instructions coming out of the front end are
17// decoded into internal micro-ops (uops).
18
19def Ampere1Model : SchedMachineModel {
20  let IssueWidth            =   4;  // 4-way decode and dispatch
21  let MicroOpBufferSize     = 192;  // re-order buffer size
22  let LoadLatency           =   4;  // Optimistic load latency
23  let MispredictPenalty     =  10;  // Branch mispredict penalty
24  let LoopMicroOpBufferSize =  32;  // Instruction queue size
25  let CompleteModel = 0;
26
27  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
28                                                    SMEUnsupported.F,
29                                                    PAUnsupported.F,
30                                                    [HasMTE]);
31}
32
33let SchedModel = Ampere1Model in {
34
35//===----------------------------------------------------------------------===//
36// Define each kind of processor resource and number available on Ampere-1.
37// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP,
38// and 2 memory) issue into.  The integer and FP schedulers can each issue
39// one uop per cycle, while the memory schedulers can each issue one load
40// and one store address calculation per cycle.
41
42def Ampere1UnitA  : ProcResource<2>;  // integer single-cycle, branch, and flags r/w
43def Ampere1UnitB  : ProcResource<2>;  // integer single-cycle, and complex shifts
44def Ampere1UnitBS : ProcResource<1>;  // integer multi-cycle
45def Ampere1UnitL  : ProcResource<2>;  // load
46def Ampere1UnitS  : ProcResource<2>;  // store address calculation
47def Ampere1UnitX  : ProcResource<1>;  // FP and vector operations, and flag write
48def Ampere1UnitY  : ProcResource<1>;  // FP and vector operations, and crypto
49def Ampere1UnitZ  : ProcResource<1>;  // FP store data and FP-to-integer moves
50
51def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>;
52def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>;
53
54//===----------------------------------------------------------------------===//
55// Define customized scheduler read/write types specific to the Ampere-1.
56
57def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> {
58  let Latency = 1;
59  let NumMicroOps = 1;
60}
61
62def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> {
63  let Latency = 1;
64  let NumMicroOps = 2;
65}
66
67def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> {
68  let Latency = 1;
69  let NumMicroOps = 1;
70}
71
72def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> {
73  let Latency = 1;
74  let NumMicroOps = 1;
75}
76
77def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
78  let Latency = 1;
79  let NumMicroOps = 1;
80}
81
82def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> {
83  let Latency = 1;
84  let NumMicroOps = 1;
85}
86
87def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> {
88  let Latency = 1;
89  let NumMicroOps = 2;
90}
91
92def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
93  let Latency = 2;
94  let NumMicroOps = 1;
95}
96
97def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> {
98  let Latency = 2;
99  let NumMicroOps = 2;
100}
101
102def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> {
103  let Latency = 2;
104  let NumMicroOps = 2;
105}
106
107def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> {
108  let Latency = 2;
109  let NumMicroOps = 2;
110}
111
112def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> {
113  let Latency = 2;
114  let NumMicroOps = 2;
115}
116
117def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
118  let Latency = 2;
119  let NumMicroOps = 2;
120}
121
122def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
123                                                             Ampere1UnitS]> {
124  let Latency = 2;
125  let NumMicroOps = 3;
126}
127
128def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
129                                                                Ampere1UnitZ]> {
130  let Latency = 2;
131  let NumMicroOps = 3;
132}
133
134def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> {
135  let Latency = 2;
136  let NumMicroOps = 2;
137}
138
139def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
140  let Latency = 2;
141  let NumMicroOps = 1;
142}
143
144def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> {
145  let Latency = 2;
146  let NumMicroOps = 2;
147}
148
149def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
150  let Latency = 3;
151  let NumMicroOps = 1;
152}
153
154def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
155  let Latency = 3;
156  let NumMicroOps = 1;
157}
158
159def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS,
160                                                               Ampere1UnitAB]> {
161  let Latency = 2;
162  let NumMicroOps = 3;
163}
164
165def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> {
166  let Latency = 2;
167  let NumMicroOps = 3;
168}
169
170def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
171                                             Ampere1UnitZ, Ampere1UnitZ]> {
172  let Latency = 2;
173  let NumMicroOps = 4;
174}
175
176def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
177  let Latency = 4;
178  let NumMicroOps = 1;
179}
180
181def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
182  let Latency = 4;
183  let NumMicroOps = 1;
184}
185
186def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
187  let Latency = 4;
188  let NumMicroOps = 1;
189}
190
191def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
192  let Latency = 4;
193  let NumMicroOps = 1;
194}
195
196def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> {
197  let Latency = 4;
198  let NumMicroOps = 1;
199}
200
201def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
202  let Latency = 4;
203  let NumMicroOps = 2;
204}
205
206def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
207  let Latency = 4;
208  let NumMicroOps = 1;
209}
210
211def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
212  let Latency = 4;
213  let NumMicroOps = 2;
214}
215
216def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> {
217  let Latency = 4;
218  let NumMicroOps = 3;
219}
220
221def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
222                                             Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
223  let Latency = 4;
224  let NumMicroOps = 6;
225}
226
227def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
228  let Latency = 5;
229  let NumMicroOps = 2;
230}
231
232def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
233  let Latency = 5;
234  let NumMicroOps = 1;
235}
236
237def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
238  let Latency = 5;
239  let NumMicroOps = 1;
240}
241
242def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
243  let Latency = 5;
244  let NumMicroOps = 1;
245}
246
247def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
248  let Latency = 5;
249  let NumMicroOps = 2;
250}
251
252def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> {
253  let Latency = 5;
254  let NumMicroOps = 2;
255}
256
257def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
258  let Latency = 5;
259  let NumMicroOps = 1;
260}
261
262def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
263  let Latency = 5;
264  let NumMicroOps = 2;
265}
266
267def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
268                                             Ampere1UnitS, Ampere1UnitS,
269                                             Ampere1UnitZ, Ampere1UnitZ,
270                                             Ampere1UnitZ, Ampere1UnitZ]> {
271  let Latency = 5;
272  let NumMicroOps = 8;
273}
274
275def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
276                                                 Ampere1UnitS, Ampere1UnitS,
277                                                 Ampere1UnitZ, Ampere1UnitZ]> {
278  let Latency = 5;
279  let NumMicroOps = 6;
280}
281
282def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
283                                                 Ampere1UnitS, Ampere1UnitS,
284                                                 Ampere1UnitZ, Ampere1UnitZ]> {
285  let Latency = 6;
286  let NumMicroOps = 6;
287}
288
289def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY,
290                                                 Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
291                                                 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
292  let Latency = 6;
293  let NumMicroOps = 9;
294}
295
296def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
297  let Latency = 6;
298  let NumMicroOps = 2;
299}
300
301def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
302  let Latency = 6;
303  let NumMicroOps = 1;
304}
305
306def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
307  let Latency = 6;
308  let NumMicroOps = 2;
309}
310
311def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
312  let Latency = 6;
313  let NumMicroOps = 3;
314}
315
316def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> {
317  let Latency = 6;
318  let NumMicroOps = 3;
319}
320
321def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
322                                          Ampere1UnitL, Ampere1UnitL]> {
323  let Latency = 6;
324  let NumMicroOps = 4;
325}
326
327def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
328  let Latency = 6;
329  let NumMicroOps = 2;
330}
331
332def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
333  let Latency = 7;
334  let NumMicroOps = 1;
335}
336
337def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> {
338  let Latency = 7;
339  let NumMicroOps = 2;
340}
341
342def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> {
343  let Latency = 7;
344  let NumMicroOps = 2;
345}
346
347def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
348                                              Ampere1UnitXY, Ampere1UnitXY]> {
349  let Latency = 7;
350  let NumMicroOps = 4;
351}
352
353def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
354  let Latency = 7;
355  let NumMicroOps = 2;
356}
357
358def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
359                                                 Ampere1UnitXY, Ampere1UnitXY,
360                                                 Ampere1UnitS, Ampere1UnitS,
361                                                 Ampere1UnitS, Ampere1UnitS,
362                                                 Ampere1UnitZ, Ampere1UnitZ,
363                                                 Ampere1UnitZ, Ampere1UnitZ]> {
364  let Latency = 7;
365  let NumMicroOps = 12;
366}
367
368def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> {
369  let Latency = 8;
370  let NumMicroOps = 2;
371}
372
373def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA,
374                                                             Ampere1UnitA]> {
375  let Latency = 8;
376  let NumMicroOps = 3;
377}
378
379def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
380  let Latency = 8;
381  let NumMicroOps = 2;
382}
383
384def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
385                                           Ampere1UnitXY, Ampere1UnitXY]> {
386  let Latency = 8;
387  let NumMicroOps = 4;
388}
389
390def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
391                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
392  let Latency = 8;
393  let NumMicroOps = 6;
394}
395
396def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
397                                              Ampere1UnitL, Ampere1UnitL,
398                                              Ampere1UnitXY, Ampere1UnitXY,
399                                              Ampere1UnitXY, Ampere1UnitXY]> {
400  let Latency = 8;
401  let NumMicroOps = 8;
402}
403
404def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
405                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
406  let Latency = 9;
407  let NumMicroOps = 6;
408}
409
410def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
411                                              Ampere1UnitL, Ampere1UnitL,
412                                              Ampere1UnitXY, Ampere1UnitXY,
413                                              Ampere1UnitXY, Ampere1UnitXY]> {
414  let Latency = 9;
415  let NumMicroOps = 8;
416}
417
418def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
419  let Latency = 9;
420  let NumMicroOps = 3;
421}
422
423def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
424                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
425  let Latency = 9;
426  let NumMicroOps = 5;
427}
428
429def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
430                                                 Ampere1UnitXY, Ampere1UnitXY,
431                                                 Ampere1UnitXY, Ampere1UnitXY,
432                                                 Ampere1UnitS, Ampere1UnitS,
433                                                 Ampere1UnitS, Ampere1UnitS,
434                                                 Ampere1UnitZ, Ampere1UnitZ,
435                                                 Ampere1UnitZ, Ampere1UnitZ]> {
436  let Latency = 9;
437  let NumMicroOps = 14;
438}
439
440def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
441                                                 Ampere1UnitXY, Ampere1UnitXY,
442                                                 Ampere1UnitXY, Ampere1UnitXY,
443                                                 Ampere1UnitXY, Ampere1UnitXY,
444                                                 Ampere1UnitS, Ampere1UnitS,
445                                                 Ampere1UnitS, Ampere1UnitS,
446                                                 Ampere1UnitZ, Ampere1UnitZ,
447                                                 Ampere1UnitZ, Ampere1UnitZ]> {
448  let Latency = 9;
449  let NumMicroOps = 16;
450}
451
452def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
453  let Latency = 10;
454  let NumMicroOps = 2;
455}
456
457def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
458  let Latency = 10;
459  let NumMicroOps = 2;
460}
461
462def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> {
463  let Latency = 10;
464  let NumMicroOps = 2;
465}
466
467def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
468                                               Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
469  let Latency = 10;
470  let NumMicroOps = 6;
471}
472
473def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
474  let Latency = 10;
475  let NumMicroOps = 3;
476}
477
478def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
479  let Latency = 10;
480  let NumMicroOps = 3;
481}
482
483def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> {
484  let Latency = 11;
485  let NumMicroOps = 2;
486}
487
488def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
489  let Latency = 11;
490  let NumMicroOps = 3;
491}
492
493def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
494  let Latency = 11;
495  let NumMicroOps = 3;
496}
497
498def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
499                                               Ampere1UnitL, Ampere1UnitL,
500                                               Ampere1UnitXY, Ampere1UnitXY,
501                                               Ampere1UnitXY, Ampere1UnitXY,
502                                               Ampere1UnitXY, Ampere1UnitXY,
503                                               Ampere1UnitXY, Ampere1UnitXY]> {
504  let Latency = 11;
505  let NumMicroOps = 12;
506}
507
508def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
509                                               Ampere1UnitL, Ampere1UnitL,
510                                               Ampere1UnitXY, Ampere1UnitXY,
511                                               Ampere1UnitXY, Ampere1UnitXY,
512                                               Ampere1UnitXY, Ampere1UnitXY,
513                                               Ampere1UnitXY, Ampere1UnitXY]> {
514  let Latency = 12;
515  let NumMicroOps = 12;
516}
517
518def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
519  let Latency = 12;
520  let NumMicroOps = 3;
521}
522
523def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
524                                            Ampere1UnitXY, Ampere1UnitXY]> {
525  let Latency = 12;
526  let NumMicroOps = 4;
527}
528
529def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
530  let Latency = 18;
531  let NumMicroOps = 1;
532}
533
534def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
535  let Latency = 19;
536  let NumMicroOps = 1;
537}
538
539def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
540  let Latency = 25;
541  let NumMicroOps = 1;
542}
543
544def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
545  let Latency = 32;
546  let NumMicroOps = 1;
547}
548
549def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
550  let Latency = 34;
551  let NumMicroOps = 1;
552}
553
554def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
555  let Latency = 34;
556  let NumMicroOps = 1;
557}
558
559def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
560  let Latency = 39;
561  let NumMicroOps = 1;
562}
563
564def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
565  let Latency = 62;
566  let NumMicroOps = 1;
567}
568
569// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4),
570// which are a single uop, and for extended registers, which have full flexibility
571// across Unit A or B for both uops.
572def Ampere1Write_Arith : SchedWriteVariant<[
573                                SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>,
574                                SchedVar<IsCheapLSL,      [Ampere1Write_1cyc_1AB]>,
575                                SchedVar<NoSchedPred,     [Ampere1Write_2cyc_1B_1AB]>]>;
576
577def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[
578                                SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>,
579                                SchedVar<IsCheapLSL,      [Ampere1Write_1cyc_1A]>,
580                                SchedVar<NoSchedPred,     [Ampere1Write_2cyc_1B_1A]>]>;
581
582//===----------------------------------------------------------------------===//
583// Map the target-defined scheduler read/write resources and latencies for Ampere-1.
584// This provides a coarse model, which is then specialised below.
585
586def : WriteRes<WriteImm,   [Ampere1UnitAB]>;  // MOVN, MOVZ
587def : WriteRes<WriteI,     [Ampere1UnitAB]>;  // ALU
588def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> {
589  let Latency = 2;
590  let NumMicroOps = 2;
591}  // ALU of Shifted-Reg
592def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> {
593  let Latency = 2;
594  let NumMicroOps = 2;
595}  // ALU of Extended-Reg
596def : WriteRes<WriteExtr,  [Ampere1UnitB]>;  // EXTR shifts a reg pair
597def : WriteRes<WriteIS,    [Ampere1UnitB]>;  // Shift/Scale
598def : WriteRes<WriteID32,  [Ampere1UnitBS]> {
599  let Latency = 18;
600}  // 32-bit Divide
601def : WriteRes<WriteID64,  [Ampere1UnitBS]> {
602  let Latency = 34;
603}  // 64-bit Divide
604def : WriteRes<WriteIM32,  [Ampere1UnitBS]> {
605  let Latency = 3;
606}  // 32-bit Multiply
607def : WriteRes<WriteIM64,  [Ampere1UnitBS]> {
608  let Latency = 3;
609}  // 32-bit Multiply
610def : WriteRes<WriteBr,    [Ampere1UnitA]>;
611def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>;
612def : WriteRes<WriteLD,    [Ampere1UnitL]> {
613  let Latency = 4;
614}  // Load from base addr plus immediate offset
615def : WriteRes<WriteST,    [Ampere1UnitS]> {
616  let Latency = 1;
617}  // Store to base addr plus immediate offset
618def : WriteRes<WriteSTP,   [Ampere1UnitS, Ampere1UnitS]> {
619  let Latency = 1;
620  let NumMicroOps = 2;
621}  // Store a register pair.
622def : WriteRes<WriteAdr,   [Ampere1UnitAB]>;
623def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> {
624  let Latency = 5;
625  let NumMicroOps = 2;
626}  // Load from a register index (maybe scaled).
627def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> {
628  let Latency = 1;
629  let NumMicroOps = 2;
630}  // Store to a register index (maybe scaled).
631def : WriteRes<WriteF,  [Ampere1UnitXY]> {
632  let Latency = 2;
633}  // General floating-point ops.
634def : WriteRes<WriteFCmp,  [Ampere1UnitX]> {
635  let Latency = 5;
636}  // Floating-point compare.
637def : WriteRes<WriteFCvt,  [Ampere1UnitXY]> {
638  let Latency = 6;
639}  // Float conversion.
640def : WriteRes<WriteFCopy, [Ampere1UnitXY]> {
641}  // Float-int register copy.
642def : WriteRes<WriteFImm,  [Ampere1UnitXY]> {
643  let Latency = 2;
644}  // Float-int register copy.
645def : WriteRes<WriteFMul,  [Ampere1UnitXY]> {
646  let Latency = 5;
647}  // Floating-point multiply.
648def : WriteRes<WriteFDiv,  [Ampere1UnitXY]> {
649  let Latency = 34;
650}  // Floating-point division.
651def : WriteRes<WriteVd,    [Ampere1UnitXY]> {
652  let Latency = 3;
653}  // 64bit Vector D ops.
654def : WriteRes<WriteVq,    [Ampere1UnitXY]> {
655  let Latency = 3;
656}  // 128bit Vector Q ops.
657def : WriteRes<WriteVLD,   [Ampere1UnitL, Ampere1UnitL]> {
658  let Latency = 5;
659}  // Vector loads.
660def : WriteRes<WriteVST,   [Ampere1UnitS, Ampere1UnitZ]> {
661  let Latency = 2;
662}  // Vector stores.
663
664def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
665
666def : WriteRes<WriteSys,     []> { let Latency = 1; }
667def : WriteRes<WriteBarrier, []> { let Latency = 1; }
668def : WriteRes<WriteHint,    []> { let Latency = 1; }
669
670def : WriteRes<WriteLDHi,    []> {
671  let Latency = 4;
672}  // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP
673
674// Forwarding logic.
675def : ReadAdvance<ReadI,       0>;
676def : ReadAdvance<ReadISReg,   0>;
677def : ReadAdvance<ReadIEReg,   0>;
678def : ReadAdvance<ReadIM,      0>;
679def : ReadAdvance<ReadIMA,     1, [WriteIM32, WriteIM64]>;
680def : ReadAdvance<ReadID,      0>;
681def : ReadAdvance<ReadExtrHi,  0>;
682def : ReadAdvance<ReadST,      0>;
683def : ReadAdvance<ReadAdrBase, 0>;
684def : ReadAdvance<ReadVLD,     0>;
685
686//===----------------------------------------------------------------------===//
687// Specialising the scheduling model further for Ampere-1.
688
689def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>;
690
691// Branch instructions
692def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>;
693def : InstRW<[Ampere1Write_1cyc_1A],
694        (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
695def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>;
696
697// Cryptography instructions
698// -- AES encryption/decryption
699def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>;
700def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>;
701// -- Polynomial multiplication
702def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>;
703// -- SHA-256 hash
704def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>;
705// -- SHA-256 schedule update
706def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>;
707// -- SHA-3 instructions
708def : InstRW<[Ampere1Write_2cyc_1XY],
709        (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>;
710// -- SHA-512 hash
711def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>;
712// -- SHA-512 schedule update
713def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>;
714// -- SHA1 choose/majority/parity
715def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>;
716// -- SHA1 hash/schedule update
717def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>;
718def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>;
719
720// FP and vector load instructions
721// -- Load 1-element structure to one/all lanes
722// ---- all lanes
723def : InstRW<[Ampere1Write_7cyc_1L_1XY],
724        (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>;
725// ---- one lane
726def : InstRW<[Ampere1Write_7cyc_1L_1XY],
727        (instregex "^LD1i(8|16|32|64)")>;
728// -- Load 1-element structure to one/all lanes, 1D size
729def : InstRW<[Ampere1Write_5cyc_1L],
730        (instregex "^LD1Rv1d")>;
731// -- Load 1-element structures to 1 register
732def : InstRW<[Ampere1Write_5cyc_1L],
733        (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
734// -- Load 1-element structures to 2 registers
735def : InstRW<[Ampere1Write_5cyc_2L],
736        (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
737// -- Load 1-element structures to 3 registers
738def : InstRW<[Ampere1Write_6cyc_3L],
739        (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
740// -- Load 1-element structures to 4 registers
741def : InstRW<[Ampere1Write_6cyc_4L],
742        (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
743// -- Load 2-element structure to all lanes of 2 registers, 1D size
744def : InstRW<[Ampere1Write_5cyc_2L],
745        (instregex "^LD2Rv1d")>;
746// -- Load 2-element structure to all lanes of 2 registers, other sizes
747def : InstRW<[Ampere1Write_7cyc_2L_2XY],
748        (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>;
749// -- Load 2-element structure to one lane of 2 registers
750def : InstRW<[Ampere1Write_7cyc_2L_2XY],
751        (instregex "^LD2i(8|16|32|64)")>;
752// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size
753def : InstRW<[Ampere1Write_7cyc_2L_2XY],
754        (instregex "^LD2Twov(16b|8h|4s|2d)")>;
755// -- Load 2-element structures to 2 registers, 8B/4H/2S size
756def : InstRW<[Ampere1Write_9cyc_2L_3XY],
757        (instregex "^LD2Twov(8b|4h|2s)")>;
758// -- Load 3-element structure to all lanes of 3 registers, 1D size
759def : InstRW<[Ampere1Write_6cyc_3L],
760        (instregex "^LD3Rv1d")>;
761// -- Load 3-element structure to all lanes of 3 registers, other sizes
762def : InstRW<[Ampere1Write_8cyc_3L_3XY],
763        (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>;
764// -- Load 3-element structure to one lane of 3 registers
765def : InstRW<[Ampere1Write_8cyc_3L_3XY],
766        (instregex "^LD3i(8|16|32|64)")>;
767// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes
768def : InstRW<[Ampere1Write_9cyc_3L_3XY],
769        (instregex "^LD3Threev(16b|8h|4s)")>;
770// -- Load 3-element structures to 3 registers, 2D size
771def : InstRW<[Ampere1Write_8cyc_3L_3XY],
772        (instregex "^LD3Threev2d")>;
773// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes
774def : InstRW<[Ampere1Write_10cyc_3L_3XY],
775        (instregex "^LD3Threev(8b|4h|2s)")>;
776// -- Load 4-element structure to all lanes of 4 registers, 1D size
777def : InstRW<[Ampere1Write_6cyc_4L],
778        (instregex "^LD4Rv1d")>;
779// -- Load 4-element structure to all lanes of 4 registers, other sizes
780def : InstRW<[Ampere1Write_8cyc_4L_4XY],
781        (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>;
782// -- Load 4-element structure to one lane of 4 registers
783def : InstRW<[Ampere1Write_6cyc_4L],
784        (instregex "^LD4i(8|16|32|64)")>;
785// -- Load 4-element structures to 4 registers, 2D size
786def : InstRW<[Ampere1Write_9cyc_4L_4XY],
787        (instregex "^LD4Fourv2d")>;
788// -- Load 4-element structures to 4 registers, 2S size
789def : InstRW<[Ampere1Write_12cyc_4L_8XY],
790        (instregex "^LD4Fourv2s")>;
791// -- Load 4-element structures to 4 registers, other sizes
792def : InstRW<[Ampere1Write_11cyc_4L_8XY],
793        (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>;
794// -- Load pair, Q-form
795def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>;
796// -- Load pair, S/D-form
797def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>;
798// -- Load register
799def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>;
800// -- Load register, sign-extended register
801def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>;
802
803// FP and vector store instructions
804// -- Store 1-element structure from one lane of 1 register
805def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z],
806        (instregex "^ST1i(8|16|32|64)")>;
807// -- Store 1-element structures from 1 register
808def : InstRW<[Ampere1Write_2cyc_1S_1Z],
809        (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
810// -- Store 1-element structures from 2 registers
811def : InstRW<[Ampere1Write_3cyc_2S_2Z],
812        (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
813// -- Store 1-element structures from 3 registers
814def : InstRW<[Ampere1Write_4cyc_3S_3Z],
815        (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
816// -- Store 1-element structures from 4 registers
817def : InstRW<[Ampere1Write_5cyc_4S_4Z],
818        (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
819// -- Store 2-element structure from one lane of 2 registers
820def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
821        (instregex "^ST2i(8|16|32|64)")>;
822// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes
823def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
824        (instregex "^ST2Twov(16b|8h|4s|2d)")>;
825// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes
826def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z],
827        (instregex "^ST2Twov(8b|4h|2s)")>;
828// -- Store 3-element structure from one lane of 3 registers
829def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
830        (instregex "^ST3i(8|16|32|64)")>;
831// -- Store 3-element structures from 3 registers
832def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
833        (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
834// -- Store 4-element structure from one lane of 4 registers
835def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
836        (instregex "^ST4i(8|16|32|64)")>;
837// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes
838def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z],
839        (instregex "^ST4Fourv(16b|8h|4s)")>;
840// -- Store 4-element structures from 4 registers, 2D sizes
841def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
842        (instregex "^ST4Fourv2d")>;
843// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes
844def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z],
845        (instregex "^ST4Fourv(8b|4h|2s)")>;
846// -- Store pair, Q-form
847def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>;
848// -- Store pair, S/D-form
849def : InstRW<[Ampere1Write_3cyc_1S_2Z],	(instregex "^STN?P[SD]")>;
850// -- Store register
851def : InstRW<[Ampere1Write_2cyc_1S_1Z],	(instregex "^STU?R[BHSDQ](ui|i)")>;
852// -- Store register, sign-extended register offset
853def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>;
854
855// FP data processing, bfloat16 format
856def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>;
857def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>;
858def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>;
859def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>;
860def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>;
861
862// FP data processing, scalar/vector, half precision
863def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>;
864def : InstRW<[Ampere1Write_4cyc_1XY],
865        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>;
866def : InstRW<[Ampere1Write_4cyc_1XY],
867        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>;
868def : InstRW<[Ampere1Write_4cyc_1XY],
869        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>;
870def : InstRW<[Ampere1Write_4cyc_1X],
871        (instregex "^FCMPE?H")>;
872def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X],
873        (instregex "^FCCMPE?H")>;
874def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY],
875        (instregex "^FCSELH")>;
876def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>;
877def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>;
878def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>;
879def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>;
880def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>;
881def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>;
882def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>;
883def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>;
884def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>;
885def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>;
886def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>;
887def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>;
888def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>;
889def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>;
890
891// FP data processing, scalar/vector, single/double precision
892def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>;
893def : InstRW<[Ampere1Write_5cyc_1XY],
894        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>;
895def : InstRW<[Ampere1Write_5cyc_1XY],
896        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>;
897def : InstRW<[Ampere1Write_5cyc_1XY],
898        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>;
899def : InstRW<[Ampere1Write_5cyc_1X],
900        (instregex "^FCMPE?(S|D)")>;
901def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X],
902        (instregex "^FCCMPE?(S|D)")>;
903def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY],
904        (instregex "^FCSEL(S|D)")>;
905def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>;
906def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>;
907def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>;
908def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>;
909def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>;
910def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>;
911def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>;
912def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>;
913def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
914def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>;
915def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>;
916def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>;
917def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>;
918def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>;
919def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>;
920def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>;
921
922// FP miscellaneous instructions
923def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
924def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>;
925def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>;
926def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>;
927def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>;
928def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>;
929def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>;
930def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>;
931def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>;
932def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>;
933def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
934
935// Integer arithmetic and logical instructions
936def : InstRW<[Ampere1Write_1cyc_1A],
937        (instregex "ADC(W|X)r", "SBC(W|X)r")>;
938def : InstRW<[Ampere1Write_Arith],
939        (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[sx]")>;
940def : InstRW<[Ampere1Write_1cyc_1AB],
941        (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[ri]")>;
942def : InstRW<[Ampere1Write_ArithFlagsetting],
943        (instregex "(ADD|AND|BIC|SUB)S(W|X)r[sx]")>;
944def : InstRW<[Ampere1Write_1cyc_1A],
945        (instregex "(ADD|AND|BIC|SUB)S(W|X)r[ri]")>;
946def : InstRW<[Ampere1Write_1cyc_1A],
947        (instregex "(ADC|SBC)S(W|X)r")>;
948def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>;
949def : InstRW<[Ampere1Write_1cyc_1A],
950        (instregex "(CCMN|CCMP)(X|W)")>;
951def : InstRW<[Ampere1Write_1cyc_1A],
952        (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>;
953def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>;
954def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>;
955def : InstRW<[Ampere1Write_3cyc_1BS],
956        (instregex "(S|U)MULHr")>;
957def : InstRW<[Ampere1Write_4cyc_1BS],
958        (instregex "(S|U)?M(ADD|SUB)L?r")>;
959
960// Integer load instructions
961def : InstRW<[Ampere1Write_4cyc_2L],
962        (instregex "(LDNP|LDP|LDPSW)(X|W)")>;
963def : InstRW<[Ampere1Write_4cyc_1L],
964        (instregex "LDR(B|D|H|Q|S)ui")>;
965def : InstRW<[Ampere1Write_4cyc_1L],
966        (instregex "LDR(D|Q|W|X)l")>;
967def : InstRW<[Ampere1Write_4cyc_1L],
968        (instregex "LDTR(B|H|W|X)i")>;
969def : InstRW<[Ampere1Write_4cyc_1L],
970        (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
971def : InstRW<[Ampere1Write_4cyc_1L],
972        (instregex "LDUR(BB|HH|X|W)i")>;
973def : InstRW<[Ampere1Write_4cyc_1L],
974        (instregex "LDURS(BW|BX|HW|HX|W)i")>;
975def : InstRW<[Ampere1Write_5cyc_1AB_1L],
976        (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>;
977def : InstRW<[Ampere1Write_1cyc_1L],
978        (instrs PRFMl, PRFUMi, PRFUMi)>;
979def : InstRW<[Ampere1Write_2cyc_1AB_1L],
980        (instrs PRFMroW, PRFMroX)>;
981
982// Integer miscellaneous instructions
983def : InstRW<[Ampere1Write_1cyc_1A],  (instrs ADR, ADRP)>;
984def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "EXTR(W|X)")>;
985def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "(S|U)?BFM(W|X)")>;
986def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>;
987def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "CLS(W|X)")>;
988def : InstRW<[Ampere1Write_1cyc_1A],  (instrs SETF8, SETF16)>;
989def : InstRW<[Ampere1Write_1cyc_1AB],
990        (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
991def : InstRW<[Ampere1Write_1cyc_1B],
992        (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>;
993def : InstRW<[Ampere1Write_1cyc_1B],
994        (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>;
995
996// Integer store instructions
997def : InstRW<[Ampere1Write_1cyc_2S],  (instregex "STNP(X|W)i")>;
998def : InstRW<[Ampere1Write_2cyc_1B_1S],
999        (instrs STPWi, STPXi)>;
1000def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB],
1001        (instregex "STP(W|X)(pre|post)")>;
1002def : InstRW<[Ampere1Write_1cyc_1S],
1003        (instrs STTRBi, STTRHi, STTRWi, STTRXi)>;
1004def : InstRW<[Ampere1Write_1cyc_1S],
1005        (instregex "STUR(BB|HH|X|W)i",
1006                   "STR(X|W)ui",
1007                   "STUR(BB|HH|X|W)i")>;
1008def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>;
1009def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>;
1010
1011// Pointer authentication
1012//def : InstRW<[Ampere1Write_7cyc_1BS],
1013//	(instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>;
1014def : InstRW<[Ampere1Write_8cyc_1BS_1A],
1015        (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>;
1016def : InstRW<[Ampere1Write_8cyc_1BS_2A],
1017        (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>;
1018//def : InstRW<[Ampere1Write_7cyc_1BS],
1019//	(instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>;
1020def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>;
1021def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>;
1022
1023// Vector integer instructions
1024// -- absolute difference
1025def : InstRW<[Ampere1Write_3cyc_1XY],
1026             (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv",
1027                        "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>;
1028// -- arithmetic
1029def : InstRW<[Ampere1Write_3cyc_1XY],
1030        (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD",
1031                   "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW",
1032                   "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>;
1033// -- arithmetic, horizontal, 16B
1034def : InstRW<[Ampere1Write_12cyc_4XY],
1035            (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>;
1036def : InstRW<[Ampere1Write_12cyc_4XY],
1037            (instregex "^[SU](MIN|MAX)Vv16i8v")>;
1038// -- arithmetic, horizontal, 4H/4S
1039def : InstRW<[Ampere1Write_6cyc_2XY],
1040            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>;
1041def : InstRW<[Ampere1Write_6cyc_2XY],
1042            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>;
1043// -- arithmetic, horizontal, 8B/8H
1044def : InstRW<[Ampere1Write_9cyc_3XY],
1045            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>;
1046def : InstRW<[Ampere1Write_9cyc_3XY],
1047            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>;
1048// -- arithmetic, narrowing
1049def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>;
1050def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>;
1051// -- arithmetic, pairwise
1052def : InstRW<[Ampere1Write_3cyc_1XY],
1053        (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>;
1054// -- arithmetic, saturating
1055def : InstRW<[Ampere1Write_3cyc_1XY],
1056        (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>;
1057// -- bit count
1058def : InstRW<[Ampere1Write_2cyc_1XY],
1059        (instregex "^(CLS|CLZ|CNT)v")>;
1060// -- compare
1061def : InstRW<[Ampere1Write_3cyc_1XY],
1062        (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv",
1063                   "^CMHIv", "^CMHSv")>;
1064// -- compare non-zero
1065def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>;
1066// -- dot product
1067def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>;
1068// -- fp reciprocal estimate
1069def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>;
1070// -- integer reciprocal estimate
1071def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>;
1072// -- logical
1073def : InstRW<[Ampere1Write_2cyc_1XY],
1074        (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1075// -- logical, narrowing
1076def : InstRW<[Ampere1Write_5cyc_2XY],
1077        (instregex "RSHRNv",
1078                   "SHRNv", "SQSHRNv", "SQSHRUNv",
1079                   "UQXTNv")>;
1080// -- matrix multiply
1081def : InstRW<[Ampere1Write_6cyc_2XY],
1082        (instrs SMMLA, UMMLA, USMMLA)>;
1083// -- max/min
1084def : InstRW<[Ampere1Write_3cyc_1XY],
1085        (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1086def : InstRW<[Ampere1Write_3cyc_1XY],
1087        (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1088// -- move immediate
1089def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>;
1090// -- multiply
1091def : InstRW<[Ampere1Write_3cyc_1XY],
1092        (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>;
1093// -- multiply accumulate
1094def : InstRW<[Ampere1Write_3cyc_1XY],
1095        (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>;
1096// -- negation, saturating
1097def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>;
1098// -- reverse bits/bytes
1099def : InstRW<[Ampere1Write_2cyc_1XY],
1100        (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>;
1101// -- shift
1102def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1103// -- shift and accumulate
1104def : InstRW<[Ampere1Write_3cyc_1XY],
1105        (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>;
1106// -- shift, saturating
1107def : InstRW<[Ampere1Write_3cyc_1XY],
1108        (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU",
1109                   "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL",
1110                   "^UQSHL")>;
1111
1112// Vector miscellaneous instructions
1113// -- duplicate element
1114def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>;
1115// -- duplicate from GPR
1116def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>;
1117// -- extract narrow
1118def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>;
1119// -- insert/extract element
1120def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>;
1121// -- move FP immediate
1122def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>;
1123// -- move element to GPR
1124def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>;
1125// -- move from GPR to any element
1126def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>;
1127// -- table lookup
1128def : InstRW<[Ampere1Write_2cyc_1XY],
1129            (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
1130def : InstRW<[Ampere1Write_4cyc_2XY],
1131            (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
1132def : InstRW<[Ampere1Write_6cyc_3XY],
1133            (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
1134def : InstRW<[Ampere1Write_8cyc_4XY],
1135            (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
1136// -- transpose
1137def : InstRW<[Ampere1Write_2cyc_1XY],
1138              (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
1139// -- zip/unzip
1140def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>;
1141
1142} // SchedModel = Ampere1Model
1143