xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedAmpere1.td (revision 1f1e2261e341e6ca6862f82261066ef1705f0a7a)
1//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for the Ampere Computing Ampere-1 to
10// support instruction scheduling and other instruction cost heuristics.
11//
12//===----------------------------------------------------------------------===//
13
14// The Ampere-1 core is an out-of-order micro-architecture.  The front
15// end has branch prediction, with a 10-cycle recovery time from a
16// mispredicted branch.  Instructions coming out of the front end are
17// decoded into internal micro-ops (uops).
18
19def Ampere1Model : SchedMachineModel {
20  let IssueWidth            =   4;  // 4-way decode and dispatch
21  let MicroOpBufferSize     = 174;  // micro-op re-order buffer size
22  let LoadLatency           =   4;  // Optimistic load latency
23  let MispredictPenalty     =  10;  // Branch mispredict penalty
24  let LoopMicroOpBufferSize =  32;  // Instruction queue size
25  let CompleteModel = 1;
26
27  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
28                                                    SMEUnsupported.F);
29}
30
31let SchedModel = Ampere1Model in {
32
33//===----------------------------------------------------------------------===//
34// Define each kind of processor resource and number available on Ampere-1.
35// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP,
36// and 2 memory) issue into.  The integer and FP schedulers can each issue
37// one uop per cycle, while the memory schedulers can each issue one load
38// and one store address calculation per cycle.
39
40def Ampere1UnitA  : ProcResource<2>;  // integer single-cycle, branch, and flags r/w
41def Ampere1UnitB  : ProcResource<2>;  // integer single-cycle, and complex shifts
42def Ampere1UnitBS : ProcResource<1>;  // integer multi-cycle
43def Ampere1UnitL  : ProcResource<2>;  // load
44def Ampere1UnitS  : ProcResource<2>;  // store address calculation
45def Ampere1UnitX  : ProcResource<1>;  // FP and vector operations, and flag write
46def Ampere1UnitY  : ProcResource<1>;  // FP and vector operations, and crypto
47def Ampere1UnitZ  : ProcResource<1>;  // FP store data and FP-to-integer moves
48
49def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>;
50def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>;
51
52//===----------------------------------------------------------------------===//
53// Define customized scheduler read/write types specific to the Ampere-1.
54
55def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> {
56  let Latency = 1;
57  let NumMicroOps = 1;
58}
59
60def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> {
61  let Latency = 1;
62  let NumMicroOps = 2;
63}
64
65def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> {
66  let Latency = 1;
67  let NumMicroOps = 1;
68}
69
70def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> {
71  let Latency = 1;
72  let NumMicroOps = 1;
73}
74
75def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
76  let Latency = 1;
77  let NumMicroOps = 1;
78}
79
80def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> {
81  let Latency = 1;
82  let NumMicroOps = 1;
83}
84
85def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> {
86  let Latency = 1;
87  let NumMicroOps = 2;
88}
89
90def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
91  let Latency = 2;
92  let NumMicroOps = 1;
93}
94
95def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> {
96  let Latency = 2;
97  let NumMicroOps = 2;
98}
99
100def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> {
101  let Latency = 2;
102  let NumMicroOps = 2;
103}
104
105def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> {
106  let Latency = 2;
107  let NumMicroOps = 2;
108}
109
110def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> {
111  let Latency = 2;
112  let NumMicroOps = 2;
113}
114
115def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
116  let Latency = 2;
117  let NumMicroOps = 2;
118}
119
120def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
121                                                             Ampere1UnitS]> {
122  let Latency = 2;
123  let NumMicroOps = 3;
124}
125
126def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS,
127                                                                Ampere1UnitZ]> {
128  let Latency = 2;
129  let NumMicroOps = 3;
130}
131
132def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> {
133  let Latency = 2;
134  let NumMicroOps = 2;
135}
136
137def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
138  let Latency = 2;
139  let NumMicroOps = 1;
140}
141
142def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> {
143  let Latency = 2;
144  let NumMicroOps = 2;
145}
146
147def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
148  let Latency = 3;
149  let NumMicroOps = 1;
150}
151
152def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
153  let Latency = 3;
154  let NumMicroOps = 1;
155}
156
157def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS,
158                                                               Ampere1UnitAB]> {
159  let Latency = 2;
160  let NumMicroOps = 3;
161}
162
163def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> {
164  let Latency = 2;
165  let NumMicroOps = 3;
166}
167
168def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
169                                             Ampere1UnitZ, Ampere1UnitZ]> {
170  let Latency = 2;
171  let NumMicroOps = 4;
172}
173
174def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
175  let Latency = 4;
176  let NumMicroOps = 1;
177}
178
179def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
180  let Latency = 4;
181  let NumMicroOps = 1;
182}
183
184def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
185  let Latency = 4;
186  let NumMicroOps = 1;
187}
188
189def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> {
190  let Latency = 4;
191  let NumMicroOps = 1;
192}
193
194def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> {
195  let Latency = 4;
196  let NumMicroOps = 1;
197}
198
199def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
200  let Latency = 4;
201  let NumMicroOps = 2;
202}
203
204def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
205  let Latency = 4;
206  let NumMicroOps = 1;
207}
208
209def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
210  let Latency = 4;
211  let NumMicroOps = 2;
212}
213
214def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> {
215  let Latency = 4;
216  let NumMicroOps = 3;
217}
218
219def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
220                                             Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
221  let Latency = 4;
222  let NumMicroOps = 6;
223}
224
225def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
226  let Latency = 5;
227  let NumMicroOps = 2;
228}
229
230def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
231  let Latency = 5;
232  let NumMicroOps = 1;
233}
234
235def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> {
236  let Latency = 5;
237  let NumMicroOps = 1;
238}
239
240def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> {
241  let Latency = 5;
242  let NumMicroOps = 1;
243}
244
245def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> {
246  let Latency = 5;
247  let NumMicroOps = 2;
248}
249
250def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> {
251  let Latency = 5;
252  let NumMicroOps = 2;
253}
254
255def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
256  let Latency = 5;
257  let NumMicroOps = 1;
258}
259
260def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
261  let Latency = 5;
262  let NumMicroOps = 2;
263}
264
265def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS,
266                                             Ampere1UnitS, Ampere1UnitS,
267                                             Ampere1UnitZ, Ampere1UnitZ,
268                                             Ampere1UnitZ, Ampere1UnitZ]> {
269  let Latency = 5;
270  let NumMicroOps = 8;
271}
272
273def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
274                                                 Ampere1UnitS, Ampere1UnitS,
275                                                 Ampere1UnitZ, Ampere1UnitZ]> {
276  let Latency = 5;
277  let NumMicroOps = 6;
278}
279
280def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
281                                                 Ampere1UnitS, Ampere1UnitS,
282                                                 Ampere1UnitZ, Ampere1UnitZ]> {
283  let Latency = 6;
284  let NumMicroOps = 6;
285}
286
287def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY,
288                                                 Ampere1UnitS, Ampere1UnitS, Ampere1UnitS,
289                                                 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> {
290  let Latency = 6;
291  let NumMicroOps = 9;
292}
293
294def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> {
295  let Latency = 6;
296  let NumMicroOps = 2;
297}
298
299def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
300  let Latency = 6;
301  let NumMicroOps = 1;
302}
303
304def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
305  let Latency = 6;
306  let NumMicroOps = 2;
307}
308
309def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
310  let Latency = 6;
311  let NumMicroOps = 3;
312}
313
314def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> {
315  let Latency = 6;
316  let NumMicroOps = 3;
317}
318
319def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
320                                          Ampere1UnitL, Ampere1UnitL]> {
321  let Latency = 6;
322  let NumMicroOps = 4;
323}
324
325def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
326  let Latency = 6;
327  let NumMicroOps = 2;
328}
329
330def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
331  let Latency = 7;
332  let NumMicroOps = 1;
333}
334
335def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> {
336  let Latency = 7;
337  let NumMicroOps = 2;
338}
339
340def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> {
341  let Latency = 7;
342  let NumMicroOps = 2;
343}
344
345def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
346                                              Ampere1UnitXY, Ampere1UnitXY]> {
347  let Latency = 7;
348  let NumMicroOps = 4;
349}
350
351def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
352  let Latency = 7;
353  let NumMicroOps = 2;
354}
355
356def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
357                                                 Ampere1UnitXY, Ampere1UnitXY,
358                                                 Ampere1UnitS, Ampere1UnitS,
359                                                 Ampere1UnitS, Ampere1UnitS,
360                                                 Ampere1UnitZ, Ampere1UnitZ,
361                                                 Ampere1UnitZ, Ampere1UnitZ]> {
362  let Latency = 7;
363  let NumMicroOps = 12;
364}
365
366def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> {
367  let Latency = 8;
368  let NumMicroOps = 2;
369}
370
371def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA,
372                                                             Ampere1UnitA]> {
373  let Latency = 8;
374  let NumMicroOps = 3;
375}
376
377def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
378  let Latency = 8;
379  let NumMicroOps = 2;
380}
381
382def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
383                                           Ampere1UnitXY, Ampere1UnitXY]> {
384  let Latency = 8;
385  let NumMicroOps = 4;
386}
387
388def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
389                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
390  let Latency = 8;
391  let NumMicroOps = 6;
392}
393
394def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
395                                              Ampere1UnitL, Ampere1UnitL,
396                                              Ampere1UnitXY, Ampere1UnitXY,
397                                              Ampere1UnitXY, Ampere1UnitXY]> {
398  let Latency = 8;
399  let NumMicroOps = 8;
400}
401
402def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
403                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
404  let Latency = 9;
405  let NumMicroOps = 6;
406}
407
408def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
409                                              Ampere1UnitL, Ampere1UnitL,
410                                              Ampere1UnitXY, Ampere1UnitXY,
411                                              Ampere1UnitXY, Ampere1UnitXY]> {
412  let Latency = 9;
413  let NumMicroOps = 8;
414}
415
416def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
417  let Latency = 9;
418  let NumMicroOps = 3;
419}
420
421def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
422                                              Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
423  let Latency = 9;
424  let NumMicroOps = 5;
425}
426
427def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
428                                                 Ampere1UnitXY, Ampere1UnitXY,
429                                                 Ampere1UnitXY, Ampere1UnitXY,
430                                                 Ampere1UnitS, Ampere1UnitS,
431                                                 Ampere1UnitS, Ampere1UnitS,
432                                                 Ampere1UnitZ, Ampere1UnitZ,
433                                                 Ampere1UnitZ, Ampere1UnitZ]> {
434  let Latency = 9;
435  let NumMicroOps = 14;
436}
437
438def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
439                                                 Ampere1UnitXY, Ampere1UnitXY,
440                                                 Ampere1UnitXY, Ampere1UnitXY,
441                                                 Ampere1UnitXY, Ampere1UnitXY,
442                                                 Ampere1UnitS, Ampere1UnitS,
443                                                 Ampere1UnitS, Ampere1UnitS,
444                                                 Ampere1UnitZ, Ampere1UnitZ,
445                                                 Ampere1UnitZ, Ampere1UnitZ]> {
446  let Latency = 9;
447  let NumMicroOps = 16;
448}
449
450def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> {
451  let Latency = 10;
452  let NumMicroOps = 2;
453}
454
455def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> {
456  let Latency = 10;
457  let NumMicroOps = 2;
458}
459
460def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> {
461  let Latency = 10;
462  let NumMicroOps = 2;
463}
464
465def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL,
466                                               Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
467  let Latency = 10;
468  let NumMicroOps = 6;
469}
470
471def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
472  let Latency = 10;
473  let NumMicroOps = 3;
474}
475
476def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
477  let Latency = 10;
478  let NumMicroOps = 3;
479}
480
481def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> {
482  let Latency = 11;
483  let NumMicroOps = 2;
484}
485
486def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> {
487  let Latency = 11;
488  let NumMicroOps = 3;
489}
490
491def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> {
492  let Latency = 11;
493  let NumMicroOps = 3;
494}
495
496def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
497                                               Ampere1UnitL, Ampere1UnitL,
498                                               Ampere1UnitXY, Ampere1UnitXY,
499                                               Ampere1UnitXY, Ampere1UnitXY,
500                                               Ampere1UnitXY, Ampere1UnitXY,
501                                               Ampere1UnitXY, Ampere1UnitXY]> {
502  let Latency = 11;
503  let NumMicroOps = 12;
504}
505
506def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL,
507                                               Ampere1UnitL, Ampere1UnitL,
508                                               Ampere1UnitXY, Ampere1UnitXY,
509                                               Ampere1UnitXY, Ampere1UnitXY,
510                                               Ampere1UnitXY, Ampere1UnitXY,
511                                               Ampere1UnitXY, Ampere1UnitXY]> {
512  let Latency = 12;
513  let NumMicroOps = 12;
514}
515
516def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> {
517  let Latency = 12;
518  let NumMicroOps = 3;
519}
520
521def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY,
522                                            Ampere1UnitXY, Ampere1UnitXY]> {
523  let Latency = 12;
524  let NumMicroOps = 4;
525}
526
527def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
528  let Latency = 18;
529  let NumMicroOps = 1;
530}
531
532def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
533  let Latency = 19;
534  let NumMicroOps = 1;
535}
536
537def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
538  let Latency = 25;
539  let NumMicroOps = 1;
540}
541
542def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
543  let Latency = 32;
544  let NumMicroOps = 1;
545}
546
547def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> {
548  let Latency = 34;
549  let NumMicroOps = 1;
550}
551
552def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
553  let Latency = 34;
554  let NumMicroOps = 1;
555}
556
557def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
558  let Latency = 39;
559  let NumMicroOps = 1;
560}
561
562def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> {
563  let Latency = 62;
564  let NumMicroOps = 1;
565}
566
567// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4),
568// which are a single uop, and for extended registers, which have full flexibility
569// across Unit A or B for both uops.
570def Ampere1Write_Arith : SchedWriteVariant<[
571                                SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>,
572                                SchedVar<AmpereCheapLSL,  [Ampere1Write_1cyc_1AB]>,
573                                SchedVar<NoSchedPred,     [Ampere1Write_2cyc_1B_1AB]>]>;
574
575def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[
576                                SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>,
577                                SchedVar<AmpereCheapLSL,  [Ampere1Write_1cyc_1A]>,
578                                SchedVar<NoSchedPred,     [Ampere1Write_2cyc_1B_1A]>]>;
579
580//===----------------------------------------------------------------------===//
581// Map the target-defined scheduler read/write resources and latencies for Ampere-1.
582// This provides a coarse model, which is then specialised below.
583
584def : WriteRes<WriteImm,   [Ampere1UnitAB]>;  // MOVN, MOVZ
585def : WriteRes<WriteI,     [Ampere1UnitAB]>;  // ALU
586def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> {
587  let Latency = 2;
588  let NumMicroOps = 2;
589}  // ALU of Shifted-Reg
590def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> {
591  let Latency = 2;
592  let NumMicroOps = 2;
593}  // ALU of Extended-Reg
594def : WriteRes<WriteExtr,  [Ampere1UnitB]>;  // EXTR shifts a reg pair
595def : WriteRes<WriteIS,    [Ampere1UnitB]>;  // Shift/Scale
596def : WriteRes<WriteID32,  [Ampere1UnitBS]> {
597  let Latency = 18;
598}  // 32-bit Divide
599def : WriteRes<WriteID64,  [Ampere1UnitBS]> {
600  let Latency = 34;
601}  // 64-bit Divide
602def : WriteRes<WriteIM32,  [Ampere1UnitBS]> {
603  let Latency = 3;
604}  // 32-bit Multiply
605def : WriteRes<WriteIM64,  [Ampere1UnitBS]> {
606  let Latency = 3;
607}  // 32-bit Multiply
608def : WriteRes<WriteBr,    [Ampere1UnitA]>;
609def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>;
610def : WriteRes<WriteLD,    [Ampere1UnitL]> {
611  let Latency = 4;
612}  // Load from base addr plus immediate offset
613def : WriteRes<WriteST,    [Ampere1UnitS]> {
614  let Latency = 1;
615}  // Store to base addr plus immediate offset
616def : WriteRes<WriteSTP,   [Ampere1UnitS, Ampere1UnitS]> {
617  let Latency = 1;
618  let NumMicroOps = 2;
619}  // Store a register pair.
620def : WriteRes<WriteAdr,   [Ampere1UnitAB]>;
621def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> {
622  let Latency = 5;
623  let NumMicroOps = 2;
624}  // Load from a register index (maybe scaled).
625def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> {
626  let Latency = 1;
627  let NumMicroOps = 2;
628}  // Store to a register index (maybe scaled).
629def : WriteRes<WriteF,  [Ampere1UnitXY]> {
630  let Latency = 2;
631}  // General floating-point ops.
632def : WriteRes<WriteFCmp,  [Ampere1UnitX]> {
633  let Latency = 5;
634}  // Floating-point compare.
635def : WriteRes<WriteFCvt,  [Ampere1UnitXY]> {
636  let Latency = 6;
637}  // Float conversion.
638def : WriteRes<WriteFCopy, [Ampere1UnitXY]> {
639}  // Float-int register copy.
640def : WriteRes<WriteFImm,  [Ampere1UnitXY]> {
641  let Latency = 2;
642}  // Float-int register copy.
643def : WriteRes<WriteFMul,  [Ampere1UnitXY]> {
644  let Latency = 5;
645}  // Floating-point multiply.
646def : WriteRes<WriteFDiv,  [Ampere1UnitXY]> {
647  let Latency = 34;
648}  // Floating-point division.
649def : WriteRes<WriteVd,    [Ampere1UnitXY]> {
650  let Latency = 3;
651}  // 64bit Vector D ops.
652def : WriteRes<WriteVq,    [Ampere1UnitXY]> {
653  let Latency = 3;
654}  // 128bit Vector Q ops.
655def : WriteRes<WriteVLD,   [Ampere1UnitL, Ampere1UnitL]> {
656  let Latency = 5;
657}  // Vector loads.
658def : WriteRes<WriteVST,   [Ampere1UnitS, Ampere1UnitZ]> {
659  let Latency = 2;
660}  // Vector stores.
661
662def : WriteRes<WriteAtomic,  []> { let Unsupported = 1; }
663
664def : WriteRes<WriteSys,     []> { let Latency = 1; }
665def : WriteRes<WriteBarrier, []> { let Latency = 1; }
666def : WriteRes<WriteHint,    []> { let Latency = 1; }
667
668def : WriteRes<WriteLDHi,    []> {
669  let Latency = 4;
670}  // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP
671
672// Forwarding logic.
673def : ReadAdvance<ReadI,       0>;
674def : ReadAdvance<ReadISReg,   0>;
675def : ReadAdvance<ReadIEReg,   0>;
676def : ReadAdvance<ReadIM,      0>;
677def : ReadAdvance<ReadIMA,     1, [WriteIM32, WriteIM64]>;
678def : ReadAdvance<ReadID,      0>;
679def : ReadAdvance<ReadExtrHi,  0>;
680def : ReadAdvance<ReadST,      0>;
681def : ReadAdvance<ReadAdrBase, 0>;
682def : ReadAdvance<ReadVLD,     0>;
683
684//===----------------------------------------------------------------------===//
685// Specialising the scheduling model further for Ampere-1.
686
687def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>;
688
689// Branch instructions
690def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>;
691def : InstRW<[Ampere1Write_1cyc_1A],
692        (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
693def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>;
694
695// Cryptography instructions
696// -- AES encryption/decryption
697def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>;
698def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>;
699// -- Polynomial multiplication
700def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>;
701// -- SHA-256 hash
702def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>;
703// -- SHA-256 schedule update
704def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>;
705// -- SHA-3 instructions
706def : InstRW<[Ampere1Write_2cyc_1XY],
707        (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>;
708// -- SHA-512 hash
709def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>;
710// -- SHA-512 schedule update
711def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>;
712// -- SHA1 choose/majority/parity
713def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>;
714// -- SHA1 hash/schedule update
715def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>;
716def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>;
717
718// FP and vector load instructions
719// -- Load 1-element structure to one/all lanes
720// ---- all lanes
721def : InstRW<[Ampere1Write_7cyc_1L_1XY],
722        (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>;
723// ---- one lane
724def : InstRW<[Ampere1Write_7cyc_1L_1XY],
725        (instregex "^LD1i(8|16|32|64)")>;
726// -- Load 1-element structure to one/all lanes, 1D size
727def : InstRW<[Ampere1Write_5cyc_1L],
728        (instregex "^LD1Rv1d")>;
729// -- Load 1-element structures to 1 register
730def : InstRW<[Ampere1Write_5cyc_1L],
731        (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
732// -- Load 1-element structures to 2 registers
733def : InstRW<[Ampere1Write_5cyc_2L],
734        (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
735// -- Load 1-element structures to 3 registers
736def : InstRW<[Ampere1Write_6cyc_3L],
737        (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
738// -- Load 1-element structures to 4 registers
739def : InstRW<[Ampere1Write_6cyc_4L],
740        (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
741// -- Load 2-element structure to all lanes of 2 registers, 1D size
742def : InstRW<[Ampere1Write_5cyc_2L],
743        (instregex "^LD2Rv1d")>;
744// -- Load 2-element structure to all lanes of 2 registers, other sizes
745def : InstRW<[Ampere1Write_7cyc_2L_2XY],
746        (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>;
747// -- Load 2-element structure to one lane of 2 registers
748def : InstRW<[Ampere1Write_7cyc_2L_2XY],
749        (instregex "^LD2i(8|16|32|64)")>;
750// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size
751def : InstRW<[Ampere1Write_7cyc_2L_2XY],
752        (instregex "^LD2Twov(16b|8h|4s|2d)")>;
753// -- Load 2-element structures to 2 registers, 8B/4H/2S size
754def : InstRW<[Ampere1Write_9cyc_2L_3XY],
755        (instregex "^LD2Twov(8b|4h|2s)")>;
756// -- Load 3-element structure to all lanes of 3 registers, 1D size
757def : InstRW<[Ampere1Write_6cyc_3L],
758        (instregex "^LD3Rv1d")>;
759// -- Load 3-element structure to all lanes of 3 registers, other sizes
760def : InstRW<[Ampere1Write_8cyc_3L_3XY],
761        (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>;
762// -- Load 3-element structure to one lane of 3 registers
763def : InstRW<[Ampere1Write_8cyc_3L_3XY],
764        (instregex "^LD3i(8|16|32|64)")>;
765// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes
766def : InstRW<[Ampere1Write_9cyc_3L_3XY],
767        (instregex "^LD3Threev(16b|8h|4s)")>;
768// -- Load 3-element structures to 3 registers, 2D size
769def : InstRW<[Ampere1Write_8cyc_3L_3XY],
770        (instregex "^LD3Threev2d")>;
771// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes
772def : InstRW<[Ampere1Write_10cyc_3L_3XY],
773        (instregex "^LD3Threev(8b|4h|2s)")>;
774// -- Load 4-element structure to all lanes of 4 registers, 1D size
775def : InstRW<[Ampere1Write_6cyc_4L],
776        (instregex "^LD4Rv1d")>;
777// -- Load 4-element structure to all lanes of 4 registers, other sizes
778def : InstRW<[Ampere1Write_8cyc_4L_4XY],
779        (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>;
780// -- Load 4-element structure to one lane of 4 registers
781def : InstRW<[Ampere1Write_6cyc_4L],
782        (instregex "^LD4i(8|16|32|64)")>;
783// -- Load 4-element structures to 4 registers, 2D size
784def : InstRW<[Ampere1Write_9cyc_4L_4XY],
785        (instregex "^LD4Fourv2d")>;
786// -- Load 4-element structures to 4 registers, 2S size
787def : InstRW<[Ampere1Write_12cyc_4L_8XY],
788        (instregex "^LD4Fourv2s")>;
789// -- Load 4-element structures to 4 registers, other sizes
790def : InstRW<[Ampere1Write_11cyc_4L_8XY],
791        (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>;
792// -- Load pair, Q-form
793def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>;
794// -- Load pair, S/D-form
795def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>;
796// -- Load register
797def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>;
798// -- Load register, sign-extended register
799def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>;
800
801// FP and vector store instructions
802// -- Store 1-element structure from one lane of 1 register
803def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z],
804        (instregex "^ST1i(8|16|32|64)")>;
805// -- Store 1-element structures from 1 register
806def : InstRW<[Ampere1Write_2cyc_1S_1Z],
807        (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
808// -- Store 1-element structures from 2 registers
809def : InstRW<[Ampere1Write_3cyc_2S_2Z],
810        (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>;
811// -- Store 1-element structures from 3 registers
812def : InstRW<[Ampere1Write_4cyc_3S_3Z],
813        (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
814// -- Store 1-element structures from 4 registers
815def : InstRW<[Ampere1Write_5cyc_4S_4Z],
816        (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>;
817// -- Store 2-element structure from one lane of 2 registers
818def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
819        (instregex "^ST2i(8|16|32|64)")>;
820// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes
821def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z],
822        (instregex "^ST2Twov(16b|8h|4s|2d)")>;
823// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes
824def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z],
825        (instregex "^ST2Twov(8b|4h|2s)")>;
826// -- Store 3-element structure from one lane of 3 registers
827def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
828        (instregex "^ST3i(8|16|32|64)")>;
829// -- Store 3-element structures from 3 registers
830def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z],
831        (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>;
832// -- Store 4-element structure from one lane of 4 registers
833def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
834        (instregex "^ST4i(8|16|32|64)")>;
835// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes
836def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z],
837        (instregex "^ST4Fourv(16b|8h|4s)")>;
838// -- Store 4-element structures from 4 registers, 2D sizes
839def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z],
840        (instregex "^ST4Fourv2d")>;
841// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes
842def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z],
843        (instregex "^ST4Fourv(8b|4h|2s)")>;
844// -- Store pair, Q-form
845def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>;
846// -- Store pair, S/D-form
847def : InstRW<[Ampere1Write_3cyc_1S_2Z],	(instregex "^STN?P[SD]")>;
848// -- Store register
849def : InstRW<[Ampere1Write_2cyc_1S_1Z],	(instregex "^STU?R[BHSDQ](ui|i)")>;
850// -- Store register, sign-extended register offset
851def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>;
852
853// FP data processing, bfloat16 format
854def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>;
855def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>;
856def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>;
857def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>;
858def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>;
859
860// FP data processing, scalar/vector, half precision
861def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>;
862def : InstRW<[Ampere1Write_4cyc_1XY],
863        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>;
864def : InstRW<[Ampere1Write_4cyc_1XY],
865        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>;
866def : InstRW<[Ampere1Write_4cyc_1XY],
867        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>;
868def : InstRW<[Ampere1Write_4cyc_1X],
869        (instregex "^FCMPE?H")>;
870def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X],
871        (instregex "^FCCMPE?H")>;
872def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY],
873        (instregex "^FCSELH")>;
874def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>;
875def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>;
876def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>;
877def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>;
878def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>;
879def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>;
880def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>;
881def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>;
882def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>;
883def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>;
884def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>;
885def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>;
886def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>;
887def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>;
888
889// FP data processing, scalar/vector, single/double precision
890def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>;
891def : InstRW<[Ampere1Write_5cyc_1XY],
892        (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>;
893def : InstRW<[Ampere1Write_5cyc_1XY],
894        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>;
895def : InstRW<[Ampere1Write_5cyc_1XY],
896        (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>;
897def : InstRW<[Ampere1Write_5cyc_1X],
898        (instregex "^FCMPE?(S|D)")>;
899def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X],
900        (instregex "^FCCMPE?(S|D)")>;
901def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY],
902        (instregex "^FCSEL(S|D)")>;
903def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>;
904def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>;
905def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>;
906def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>;
907def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>;
908def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>;
909def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>;
910def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>;
911def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>;
912def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>;
913def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>;
914def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>;
915def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>;
916def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>;
917def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>;
918def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>;
919
920// FP miscellaneous instructions
921def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
922def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>;
923def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>;
924def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>;
925def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>;
926def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>;
927def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>;
928def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>;
929def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>;
930def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>;
931def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>;
932
933// Integer arithmetic and logical instructions
934def : InstRW<[Ampere1Write_1cyc_1A],
935        (instregex "ADC(W|X)r", "SBC(W|X)r")>;
936def : InstRW<[Ampere1Write_Arith],
937        (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>;
938def : InstRW<[Ampere1Write_ArithFlagsetting],
939        (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>;
940def : InstRW<[Ampere1Write_1cyc_1A],
941        (instregex "(ADC|SBC)S(W|X)r")>;
942def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>;
943def : InstRW<[Ampere1Write_1cyc_1A],
944        (instregex "(CCMN|CCMP)(X|W)")>;
945def : InstRW<[Ampere1Write_1cyc_1A],
946        (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>;
947def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>;
948def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>;
949def : InstRW<[Ampere1Write_3cyc_1BS],
950        (instregex "(S|U)MULHr")>;
951def : InstRW<[Ampere1Write_4cyc_1BS],
952        (instregex "(S|U)?M(ADD|SUB)L?r")>;
953
954// Integer load instructions
955def : InstRW<[Ampere1Write_4cyc_2L],
956        (instregex "(LDNP|LDP|LDPSW)(X|W)")>;
957def : InstRW<[Ampere1Write_4cyc_1L],
958        (instregex "LDR(B|D|H|Q|S)ui")>;
959def : InstRW<[Ampere1Write_4cyc_1L],
960        (instregex "LDR(D|Q|W|X)l")>;
961def : InstRW<[Ampere1Write_4cyc_1L],
962        (instregex "LDTR(B|H|W|X)i")>;
963def : InstRW<[Ampere1Write_4cyc_1L],
964        (instregex "LDTRS(BW|BX|HW|HX|W)i")>;
965def : InstRW<[Ampere1Write_4cyc_1L],
966        (instregex "LDUR(BB|HH|X|W)i")>;
967def : InstRW<[Ampere1Write_4cyc_1L],
968        (instregex "LDURS(BW|BX|HW|HX|W)i")>;
969def : InstRW<[Ampere1Write_5cyc_1AB_1L],
970        (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>;
971def : InstRW<[Ampere1Write_1cyc_1L],
972        (instrs PRFMl, PRFUMi, PRFUMi)>;
973def : InstRW<[Ampere1Write_2cyc_1AB_1L],
974        (instrs PRFMroW, PRFMroX)>;
975
976// Integer miscellaneous instructions
977def : InstRW<[Ampere1Write_1cyc_1A],  (instrs ADR, ADRP)>;
978def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "EXTR(W|X)")>;
979def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "(S|U)?BFM(W|X)")>;
980def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>;
981def : InstRW<[Ampere1Write_1cyc_1B],  (instregex "CLS(W|X)")>;
982def : InstRW<[Ampere1Write_1cyc_1A],  (instrs SETF8, SETF16)>;
983def : InstRW<[Ampere1Write_1cyc_1AB],
984        (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
985def : InstRW<[Ampere1Write_1cyc_1B],
986        (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>;
987def : InstRW<[Ampere1Write_1cyc_1B],
988        (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>;
989
990// Integer store instructions
991def : InstRW<[Ampere1Write_1cyc_2S],  (instregex "STNP(X|W)i")>;
992def : InstRW<[Ampere1Write_2cyc_1B_1S],
993        (instrs STPWi, STPXi)>;
994def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB],
995        (instregex "STP(W|X)(pre|post)")>;
996def : InstRW<[Ampere1Write_1cyc_1S],
997        (instrs STTRBi, STTRHi, STTRWi, STTRXi)>;
998def : InstRW<[Ampere1Write_1cyc_1S],
999        (instregex "STUR(BB|HH|X|W)i",
1000                   "STR(X|W)ui",
1001                   "STUR(BB|HH|X|W)i")>;
1002def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>;
1003def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>;
1004
1005// Pointer authentication
1006//def : InstRW<[Ampere1Write_7cyc_1BS],
1007//	(instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>;
1008def : InstRW<[Ampere1Write_8cyc_1BS_1A],
1009        (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>;
1010def : InstRW<[Ampere1Write_8cyc_1BS_2A],
1011        (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>;
1012//def : InstRW<[Ampere1Write_7cyc_1BS],
1013//	(instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>;
1014def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>;
1015def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>;
1016
1017// Vector integer instructions
1018// -- absolute difference
1019def : InstRW<[Ampere1Write_3cyc_1XY],
1020             (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv",
1021                        "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>;
1022// -- arithmetic
1023def : InstRW<[Ampere1Write_3cyc_1XY],
1024        (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD",
1025                   "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW",
1026                   "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>;
1027// -- arithmetic, horizontal, 16B
1028def : InstRW<[Ampere1Write_12cyc_4XY],
1029            (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>;
1030def : InstRW<[Ampere1Write_12cyc_4XY],
1031            (instregex "^[SU](MIN|MAX)Vv16i8v")>;
1032// -- arithmetic, horizontal, 4H/4S
1033def : InstRW<[Ampere1Write_6cyc_2XY],
1034            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>;
1035def : InstRW<[Ampere1Write_6cyc_2XY],
1036            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>;
1037// -- arithmetic, horizontal, 8B/8H
1038def : InstRW<[Ampere1Write_9cyc_3XY],
1039            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>;
1040def : InstRW<[Ampere1Write_9cyc_3XY],
1041            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>;
1042// -- arithmetic, narrowing
1043def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>;
1044def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>;
1045// -- arithmetic, pairwise
1046def : InstRW<[Ampere1Write_3cyc_1XY],
1047        (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>;
1048// -- arithmetic, saturating
1049def : InstRW<[Ampere1Write_3cyc_1XY],
1050        (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>;
1051// -- bit count
1052def : InstRW<[Ampere1Write_2cyc_1XY],
1053        (instregex "^(CLS|CLZ|CNT)v")>;
1054// -- compare
1055def : InstRW<[Ampere1Write_3cyc_1XY],
1056        (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv",
1057                   "^CMHIv", "^CMHSv")>;
1058// -- compare non-zero
1059def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>;
1060// -- dot product
1061def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>;
1062// -- fp reciprocal estimate
1063def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>;
1064// -- integer reciprocal estimate
1065def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>;
1066// -- logical
1067def : InstRW<[Ampere1Write_2cyc_1XY],
1068        (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1069// -- logical, narrowing
1070def : InstRW<[Ampere1Write_5cyc_2XY],
1071        (instregex "RSHRNv",
1072                   "SHRNv", "SQSHRNv", "SQSHRUNv",
1073                   "UQXTNv")>;
1074// -- matrix multiply
1075def : InstRW<[Ampere1Write_6cyc_2XY],
1076        (instrs SMMLA, UMMLA, USMMLA)>;
1077// -- max/min
1078def : InstRW<[Ampere1Write_3cyc_1XY],
1079        (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1080def : InstRW<[Ampere1Write_3cyc_1XY],
1081        (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1082// -- move immediate
1083def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>;
1084// -- multiply
1085def : InstRW<[Ampere1Write_3cyc_1XY],
1086        (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>;
1087// -- multiply accumulate
1088def : InstRW<[Ampere1Write_3cyc_1XY],
1089        (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>;
1090// -- negation, saturating
1091def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>;
1092// -- reverse bits/bytes
1093def : InstRW<[Ampere1Write_2cyc_1XY],
1094        (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>;
1095// -- shift
1096def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1097// -- shift and accumulate
1098def : InstRW<[Ampere1Write_3cyc_1XY],
1099        (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>;
1100// -- shift, saturating
1101def : InstRW<[Ampere1Write_3cyc_1XY],
1102        (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU",
1103                   "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL",
1104                   "^UQSHL")>;
1105
1106// Vector miscellaneous instructions
1107// -- duplicate element
1108def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>;
1109// -- duplicate from GPR
1110def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>;
1111// -- extract narrow
1112def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>;
1113// -- insert/extract element
1114def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>;
1115// -- move FP immediate
1116def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>;
1117// -- move element to GPR
1118def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>;
1119// -- move from GPR to any element
1120def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>;
1121// -- table lookup
1122def : InstRW<[Ampere1Write_2cyc_1XY],
1123            (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>;
1124def : InstRW<[Ampere1Write_4cyc_2XY],
1125            (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>;
1126def : InstRW<[Ampere1Write_6cyc_3XY],
1127            (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>;
1128def : InstRW<[Ampere1Write_8cyc_4XY],
1129            (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>;
1130// -- transpose
1131def : InstRW<[Ampere1Write_2cyc_1XY],
1132              (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>;
1133// -- zip/unzip
1134def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>;
1135
1136} // SchedModel = Ampere1Model
1137