xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Fujitsu A64FX processors.
10//
11//===----------------------------------------------------------------------===//
12
13def A64FXModel : SchedMachineModel {
14  let IssueWidth            =   6; // 6 micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
16  let LoadLatency           =   5; // Optimistic load latency.
17  let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
18  // Determined via a mix of micro-arch details and experimentation.
19  let LoopMicroOpBufferSize = 128;
20  let PostRAScheduler       =   1; // Using PostRA sched.
21  let CompleteModel         =   1;
22
23  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
24                                                    [HasMTE, HasMatMulInt8, HasBF16,
25                                                    HasPAuth, HasPAuthLR, HasCPA,
26                                                    HasCSSC]);
27  let FullInstRWOverlapCheck = 0;
28}
29
30let SchedModel = A64FXModel in {
31
32// Define the issue ports.
33
34// A64FXIP*
35
36// Port 0
37def A64FXIPFLA : ProcResource<1>;
38
39// Port 1
40def A64FXIPPR : ProcResource<1>;
41
42// Port 2
43def A64FXIPEXA : ProcResource<1>;
44
45// Port 3
46def A64FXIPFLB : ProcResource<1>;
47
48// Port 4
49def A64FXIPEXB : ProcResource<1>;
50
51// Port 5
52def A64FXIPEAGA : ProcResource<1>;
53
54// Port 6
55def A64FXIPEAGB : ProcResource<1>;
56
57// Port 7
58def A64FXIPBR : ProcResource<1>;
59
60// Define groups for the functional units on each issue port.  Each group
61// created will be used by a WriteRes later on.
62
63def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
64
65def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
66
67def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
68
69def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
70
71def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
72
73def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
74
75def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
76
77def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
78
79def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
80
81def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
82
83def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
84
85def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
86
87def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
88
89def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
90
91def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
92                             A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>;
93
94def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
95  let Latency = 1;
96}
97
98def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
99  let Latency = 2;
100}
101
102def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
103  let Latency = 4;
104}
105
106def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
107  let Latency = 6;
108}
109
110def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
111  let Latency = 8;
112}
113
114def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
115  let Latency = 9;
116}
117
118def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
119  let Latency = 3;
120}
121
122def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
123  let Latency = 5;
124}
125
126def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
127  let Latency = 4;
128}
129
130def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
131  let Latency = 6;
132}
133
134def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
135  let Latency = 4;
136}
137
138def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
139  let Latency = 8;
140}
141
142def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
143  let Latency = 9;
144}
145
146def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
147  let Latency = 10;
148}
149
150def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
151  let Latency = 12;
152}
153
154def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
155  let Latency = 20;
156}
157
158def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
159  let Latency = 5;
160}
161
162def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
163  let Latency = 11;
164}
165
166def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
167  let Latency = 5;
168}
169
170def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
171  let Latency = 1;
172}
173
174def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
175  let Latency = 2;
176}
177
178def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
179  let Latency = 4;
180  let NumMicroOps = 4;
181}
182
183def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
184  let Latency = 1;
185}
186
187def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
188  let Latency = 5;
189}
190
191def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
192  let Latency = 8;
193}
194
195def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
196  let Latency = 11;
197}
198
199def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
200  let Latency = 5;
201  let NumMicroOps = 2;
202}
203
204def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
205  let Latency = 5;
206  let NumMicroOps = 3;
207}
208
209def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
210  let Latency = 5;
211  let NumMicroOps = 2;
212}
213
214def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
215  let Latency = 8;
216  let NumMicroOps = 2;
217}
218
219def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
220  let Latency = 11;
221  let NumMicroOps = 2;
222
223}
224
225def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
226  let Latency = 8;
227  let NumMicroOps = 3;
228}
229
230def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
231  let Latency = 11;
232  let NumMicroOps = 3;
233}
234
235def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
236  let Latency = 8;
237  let NumMicroOps = 4;
238}
239
240def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
241  let Latency = 11;
242  let NumMicroOps = 4;
243}
244
245def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
246  let Latency = 8;
247  let NumMicroOps = 2;
248}
249
250def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
251  let Latency = 11;
252  let NumMicroOps = 2;
253}
254
255def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
256  let Latency = 8;
257  let NumMicroOps = 3;
258}
259
260def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
261  let Latency = 11;
262  let NumMicroOps = 3;
263}
264
265def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
266  let Latency = 8;
267  let NumMicroOps = 4;
268}
269
270def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
271  let Latency = 11;
272  let NumMicroOps = 4;
273}
274
275def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
276  let Latency = 8;
277  let NumMicroOps = 5;
278}
279
280def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
281  let Latency = 11;
282  let NumMicroOps = 5;
283}
284
285def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
286  let Latency = 8;
287  let NumMicroOps = 2;
288}
289
290def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
291  let Latency = 8;
292  let NumMicroOps = 3;
293}
294
295def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
296  let Latency = 8;
297  let NumMicroOps = 4;
298}
299
300def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
301  let Latency = 8;
302  let NumMicroOps = 5;
303}
304
305def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
306  let Latency = 8;
307  let NumMicroOps = 6;
308}
309
310def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
311  let Latency = 8;
312  let NumMicroOps = 7;
313}
314
315def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
316  let Latency = 8;
317  let NumMicroOps = 8;
318}
319
320def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
321  let Latency = 8;
322  let NumMicroOps = 9;
323}
324
325def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
326  let Latency = 1;
327}
328
329def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
330  let Latency = 10;
331}
332
333def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
334  let Latency = 14;
335}
336
337def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
338  let Latency = 12;
339}
340
341def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
342  let Latency = 14;
343}
344
345def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
346  let Latency = 14;
347}
348
349def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
350  let Latency = 6;
351}
352
353def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
354  let Latency = 8;
355}
356
357def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
358  let Latency = 10;
359}
360
361def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
362  let Latency = 12;
363  let NumMicroOps = 6;
364}
365
366def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
367  let Latency = 14;
368  let NumMicroOps = 6;
369}
370
371def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
372  let Latency = 9;
373}
374
375def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
376  let Latency = 8;
377}
378
379
380def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
381  let Latency = 8;
382  let NumMicroOps = 3;
383}
384
385def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
386  let Latency = 8;
387  let NumMicroOps = 2;
388}
389
390def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
391  let Latency = 10;
392  let NumMicroOps = 3;
393}
394
395def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
396  let Latency = 10;
397  let NumMicroOps = 2;
398}
399
400
401def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
402  let Latency = 10;
403  let NumMicroOps = 3;
404}
405
406def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
407  let Latency = 15;
408  let NumMicroOps = 2;
409}
410
411def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
412  let Latency = 15;
413  let NumMicroOps = 3;
414}
415
416def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
417  let Latency = 10;
418  let NumMicroOps = 3;
419}
420
421def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
422  let Latency = 10;
423  let NumMicroOps = 2;
424}
425
426def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
427  let Latency = 15;
428  let NumMicroOps = 2;
429}
430
431def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
432  let Latency = 14;
433  let NumMicroOps = 7;
434}
435
436def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
437  let Latency = 5;
438}
439
440def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
441  let Latency = 10;
442}
443
444def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
445  let Latency = 9;
446}
447
448def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
449  let Latency = 12;
450}
451
452def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
453  let Latency = 25;
454}
455
456def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
457  let Latency = 10;
458  let NumMicroOps = 3;
459}
460
461def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
462  let Latency = 10;
463  let NumMicroOps = 5;
464}
465
466def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
467  let Latency = 10;
468  let NumMicroOps = 7;
469}
470
471def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
472  let Latency = 10;
473  let NumMicroOps = 9;
474}
475
476def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
477  let Latency = 0;
478}
479
480def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
481  let Latency = 0;
482}
483
484def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
485  let Latency = 0;
486}
487
488def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
489  let Latency = 0;
490}
491
492def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
493  let Latency = 0;
494}
495
496def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
497  let Latency = 0;
498}
499
500def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
501  let Latency = 0;
502}
503
504def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
505  let Latency = 0;
506}
507
508def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
509  let Latency = 0;
510}
511
512def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
513  let Latency = 0;
514}
515
516def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
517  let Latency = 1;
518}
519
520def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
521  let Latency = 1;
522}
523
524def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
525  let Latency = 1;
526}
527
528def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
529  let Latency = 1;
530}
531
532def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
533  let Latency = 7;
534}
535
536// Define commonly used read types.
537
538// No forwarding is provided for these types.
539def : ReadAdvance<ReadI,       0>;
540def : ReadAdvance<ReadISReg,   0>;
541def : ReadAdvance<ReadIEReg,   0>;
542def : ReadAdvance<ReadIM,      0>;
543def : ReadAdvance<ReadIMA,     0>;
544def : ReadAdvance<ReadID,      0>;
545def : ReadAdvance<ReadExtrHi,  0>;
546def : ReadAdvance<ReadAdrBase, 0>;
547def : ReadAdvance<ReadST,      0>;
548def : ReadAdvance<ReadVLD,     0>;
549
550//===----------------------------------------------------------------------===//
551// 3. Instruction Tables.
552
553//---
554// 3.1 Branch Instructions
555//---
556
557// Branch, immed
558// Branch and link, immed
559// Compare and branch
560def : WriteRes<WriteBr,      [A64FXGI7]> {
561  let Latency = 1;
562}
563
564// Branch, register
565// Branch and link, register != LR
566// Branch and link, register = LR
567def : WriteRes<WriteBrReg,   [A64FXGI7]> {
568  let Latency = 1;
569}
570
571def : WriteRes<WriteSys,     []> { let Latency = 1; }
572def : WriteRes<WriteBarrier, []> { let Latency = 1; }
573def : WriteRes<WriteHint,    []> { let Latency = 1; }
574
575def : WriteRes<WriteAtomic,  []> {
576  let Latency = 4;
577}
578
579//---
580// Branch
581//---
582def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
583def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
584def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
585def : InstRW<[A64FXWrite_1Cyc_GI7],
586            (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
587
588//---
589// 3.2 Arithmetic and Logical Instructions
590// 3.3 Move and Shift Instructions
591//---
592
593// ALU, basic
594// Conditional compare
595// Conditional select
596// Address generation
597def : WriteRes<WriteI,       [A64FXGI2456]> {
598  let Latency = 1;
599}
600
601def : InstRW<[WriteI],
602            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
603                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
604                       "ADC(W|X)r",
605                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
606                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
607                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
608                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
609                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
610                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
611                       "CSINC(W|X)r",           "CSINV(W|X)r",
612                       "CSNEG(W|X)r")>;
613
614def : InstRW<[WriteI], (instrs COPY)>;
615
616// ALU, extend and/or shift
617def : WriteRes<WriteISReg,   [A64FXGI2456]> {
618  let Latency = 2;
619}
620
621def : InstRW<[WriteISReg],
622            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
623                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
624                       "ADC(W|X)r",
625                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
626                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
627                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
628                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
629                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
630                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
631                       "CSINC(W|X)r",           "CSINV(W|X)r",
632                       "CSNEG(W|X)r")>;
633
634def : WriteRes<WriteIEReg,   [A64FXGI2456]> {
635  let Latency = 1;
636}
637
638def : InstRW<[WriteIEReg],
639            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
640                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
641                       "ADC(W|X)r",
642                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
643                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
644                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
645                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
646                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
647                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
648                       "CSINC(W|X)r",           "CSINV(W|X)r",
649                       "CSNEG(W|X)r")>;
650
651// Move immed
652def : WriteRes<WriteImm,     [A64FXGI2456]> {
653  let Latency = 1;
654}
655
656def : InstRW<[A64FXWrite_1Cyc_GI2456],
657            (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
658
659def : InstRW<[A64FXWrite_2Cyc_GI24],
660            (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
661
662// Variable shift
663def : WriteRes<WriteIS,      [A64FXGI2456]> {
664  let Latency = 1;
665}
666
667//---
668// 3.4 Divide and Multiply Instructions
669//---
670
671// Divide, W-form
672def : WriteRes<WriteID32,    [A64FXGI4]> {
673  let Latency = 39;
674  let ReleaseAtCycles = [39];
675}
676
677// Divide, X-form
678def : WriteRes<WriteID64,    [A64FXGI4]> {
679  let Latency = 23;
680  let ReleaseAtCycles = [23];
681}
682
683// Multiply accumulate, W-form
684def : WriteRes<WriteIM32,    [A64FXGI2456]> {
685  let Latency = 5;
686}
687
688// Multiply accumulate, X-form
689def : WriteRes<WriteIM64,    [A64FXGI2456]> {
690  let Latency = 5;
691}
692
693def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
694def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
695def : InstRW<[A64FXWrite_MADDL],
696            (instregex "(S|U)(MADDL|MSUBL)rrr")>;
697
698def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
699def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
700
701// Bitfield extract, two reg
702def : WriteRes<WriteExtr,    [A64FXGI2456]> {
703  let Latency = 1;
704}
705
706// Multiply high
707def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
708
709// Miscellaneous Data-Processing Instructions
710// Bitfield extract
711def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
712
713// Bitifield move - basic
714def : InstRW<[A64FXWrite_1Cyc_GI24],
715            (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
716
717// Bitfield move, insert
718def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
719def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
720
721// Count leading
722def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
723                                               "^CLZ(W|X)r$")>;
724
725// Reverse bits
726def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
727
728// Cryptography Extensions
729def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
730def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
731def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
732def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
733def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
734def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
735def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
736def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
737def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
738
739// CRC Instructions
740def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
741def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
742def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
743
744def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
745def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
746def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
747
748// Reverse bits/bytes
749// NOTE: Handled by WriteI.
750
751//---
752// 3.6 Load Instructions
753// 3.10 FP Load Instructions
754//---
755
756// Load register, literal
757// Load register, unscaled immed
758// Load register, immed unprivileged
759// Load register, unsigned immed
760def : WriteRes<WriteLD,      [A64FXGI56]> {
761  let Latency = 4;
762}
763
764// Load register, immed post-index
765// NOTE: Handled by WriteLD, WriteI.
766// Load register, immed pre-index
767// NOTE: Handled by WriteLD, WriteAdr.
768def : WriteRes<WriteAdr,     [A64FXGI2456]> {
769  let Latency = 1;
770}
771
772// Load pair, immed offset, normal
773// Load pair, immed offset, signed words, base != SP
774// Load pair, immed offset signed words, base = SP
775// LDP only breaks into *one* LS micro-op.  Thus
776// the resources are handled by WriteLD.
777def : WriteRes<WriteLDHi,    []> {
778  let Latency = 5;
779}
780
781// Load register offset, basic
782// Load register, register offset, scale by 4/8
783// Load register, register offset, scale by 2
784// Load register offset, extend
785// Load register, register offset, extend, scale by 4/8
786// Load register, register offset, extend, scale by 2
787def A64FXWriteLDIdx : SchedWriteVariant<[
788  SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
789  SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>;
790def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
791
792def A64FXReadAdrBase : SchedReadVariant<[
793  SchedVar<ScaledIdxPred, [ReadDefault]>,
794  SchedVar<NoSchedPred,   [ReadDefault]>]>;
795def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
796
797// Load pair, immed pre-index, normal
798// Load pair, immed pre-index, signed words
799// Load pair, immed post-index, normal
800// Load pair, immed post-index, signed words
801// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
802
803def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
804def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
807def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
808
809def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
810def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
814def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
815
816def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
817def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
820def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
821
822def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
823def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
825def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
826
827def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
828def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
830def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
831
832def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
833def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
836def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
837
838def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
839            (instrs LDPDpre)>;
840def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
841            (instrs LDPQpre)>;
842def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
843            (instrs LDPSpre)>;
844def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
845            (instrs LDPWpre)>;
846def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
847            (instrs LDPWpre)>;
848
849def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
850def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
855def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
856
857def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
858def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
860def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
861
862def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
863def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
865def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
866
867def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
868def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
869
870def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
871def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
872
873def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
874            (instrs LDPDpost)>;
875def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
876            (instrs LDPQpost)>;
877def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
878            (instrs LDPSpost)>;
879def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
880            (instrs LDPWpost)>;
881def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
882            (instrs LDPXpost)>;
883
884def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
885def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
890def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
891
892def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
893            (instrs LDPDpre)>;
894def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
895            (instrs LDPQpre)>;
896def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
897            (instrs LDPSpre)>;
898def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
899            (instrs LDPWpre)>;
900def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
901            (instrs LDPXpre)>;
902
903def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
904def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
909def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
910
911def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
912            (instrs LDPDpost)>;
913def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
914            (instrs LDPQpost)>;
915def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
916            (instrs LDPSpost)>;
917def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
918            (instrs LDPWpost)>;
919def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
920            (instrs LDPXpost)>;
921
922def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
923def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
928def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
929
930def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
931def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
939def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
940
941def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
942def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
950def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
951
952def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
953            (instrs LDRBroW)>;
954def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
955            (instrs LDRBroW)>;
956def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
957             (instrs LDRDroW)>;
958def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
959            (instrs LDRHroW)>;
960def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
961            (instrs LDRHHroW)>;
962def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
963            (instrs LDRQroW)>;
964def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
965            (instrs LDRSroW)>;
966def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
967            (instrs LDRSHWroW)>;
968def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
969            (instrs LDRSHXroW)>;
970def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
971            (instrs LDRWroW)>;
972def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
973            (instrs LDRXroW)>;
974def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
975            (instrs LDRBroX)>;
976def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
977            (instrs LDRDroX)>;
978def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
979            (instrs LDRHroX)>;
980def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
981            (instrs LDRHHroX)>;
982def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
983            (instrs LDRQroX)>;
984def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
985            (instrs LDRSroX)>;
986def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
987            (instrs LDRSHWroX)>;
988def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
989            (instrs LDRSHXroX)>;
990def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
991            (instrs LDRWroX)>;
992def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
993            (instrs LDRXroX)>;
994
995def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
996def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
1007def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
1008
1009//---
1010// Prefetch
1011//---
1012def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
1013def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
1014def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
1016def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
1017
1018//--
1019// 3.7 Store Instructions
1020// 3.11 FP Store Instructions
1021//--
1022
1023// Store register, unscaled immed
1024// Store register, immed unprivileged
1025// Store register, unsigned immed
1026def : WriteRes<WriteST,      [A64FXGI56]> {
1027  let Latency = 1;
1028}
1029
1030// Store register, immed post-index
1031// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
1032
1033// Store register, immed pre-index
1034// NOTE: Handled by WriteAdr, WriteST
1035
1036// Store register, register offset, basic
1037// Store register, register offset, scaled by 4/8
1038// Store register, register offset, scaled by 2
1039// Store register, register offset, extend
1040// Store register, register offset, extend, scale by 4/8
1041// Store register, register offset, extend, scale by 1
1042def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
1043  let Latency = 1;
1044}
1045
1046// Store pair, immed offset, W-form
1047// Store pair, immed offset, X-form
1048def : WriteRes<WriteSTP,     [A64FXGI56]> {
1049  let Latency = 1;
1050}
1051
1052// Store pair, immed post-index, W-form
1053// Store pair, immed post-index, X-form
1054// Store pair, immed pre-index, W-form
1055// Store pair, immed pre-index, X-form
1056// NOTE: Handled by WriteAdr, WriteSTP.
1057
1058def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
1059def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
1060def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
1061def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
1062def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
1063def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
1064def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
1065def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
1066def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
1067
1068def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
1069def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
1071def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
1072
1073def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
1074def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
1075def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
1076def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
1077
1078def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
1079def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
1080def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
1081def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
1082
1083def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1084def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1085def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1086def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1087def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1088def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1089def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1090def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1091def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1092def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1093def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1094def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1095
1096def : InstRW<[A64FXWrite_STP01],
1097            (instrs STPDpre, STPDpost)>;
1098def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1099            (instrs STPDpre, STPDpost)>;
1100def : InstRW<[A64FXWrite_STP01],
1101            (instrs STPDpre, STPDpost)>;
1102def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1103            (instrs STPDpre, STPDpost)>;
1104def : InstRW<[A64FXWrite_STP01],
1105            (instrs STPQpre, STPQpost)>;
1106def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1107            (instrs STPQpre, STPQpost)>;
1108def : InstRW<[A64FXWrite_STP01],
1109            (instrs STPQpre, STPQpost)>;
1110def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1111            (instrs STPQpre, STPQpost)>;
1112def : InstRW<[A64FXWrite_STP01],
1113            (instrs STPSpre, STPSpost)>;
1114def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1115            (instrs STPSpre, STPSpost)>;
1116def : InstRW<[A64FXWrite_STP01],
1117            (instrs STPSpre, STPSpost)>;
1118def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1119            (instrs STPSpre, STPSpost)>;
1120def : InstRW<[A64FXWrite_STP01],
1121            (instrs STPWpre, STPWpost)>;
1122def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1123            (instrs STPWpre, STPWpost)>;
1124def : InstRW<[A64FXWrite_STP01],
1125            (instrs STPWpre, STPWpost)>;
1126def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1127            (instrs STPWpre, STPWpost)>;
1128def : InstRW<[A64FXWrite_STP01],
1129            (instrs STPXpre, STPXpost)>;
1130def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1131            (instrs STPXpre, STPXpost)>;
1132def : InstRW<[A64FXWrite_STP01],
1133            (instrs STPXpre, STPXpost)>;
1134def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1135            (instrs STPXpre, STPXpost)>;
1136
1137def : InstRW<[WriteAdr, A64FXWrite_STP01],
1138            (instrs STRBpre, STRBpost)>;
1139def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1140            (instrs STRBpre, STRBpost)>;
1141def : InstRW<[WriteAdr, A64FXWrite_STP01],
1142            (instrs STRBpre, STRBpost)>;
1143def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1144            (instrs STRBpre, STRBpost)>;
1145def : InstRW<[WriteAdr, A64FXWrite_STP01],
1146            (instrs STRBBpre, STRBBpost)>;
1147def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1148            (instrs STRBBpre, STRBBpost)>;
1149def : InstRW<[WriteAdr, A64FXWrite_STP01],
1150            (instrs STRBBpre, STRBBpost)>;
1151def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1152            (instrs STRBBpre, STRBBpost)>;
1153def : InstRW<[WriteAdr, A64FXWrite_STP01],
1154            (instrs STRDpre, STRDpost)>;
1155def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1156            (instrs STRDpre, STRDpost)>;
1157def : InstRW<[WriteAdr, A64FXWrite_STP01],
1158            (instrs STRDpre, STRDpost)>;
1159def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1160            (instrs STRDpre, STRDpost)>;
1161def : InstRW<[WriteAdr, A64FXWrite_STP01],
1162            (instrs STRHpre, STRHpost)>;
1163def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1164            (instrs STRHpre, STRHpost)>;
1165def : InstRW<[WriteAdr, A64FXWrite_STP01],
1166            (instrs STRHpre, STRHpost)>;
1167def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1168            (instrs STRHpre, STRHpost)>;
1169def : InstRW<[WriteAdr, A64FXWrite_STP01],
1170            (instrs STRHHpre, STRHHpost)>;
1171def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1172            (instrs STRHHpre, STRHHpost)>;
1173def : InstRW<[WriteAdr, A64FXWrite_STP01],
1174            (instrs STRHHpre, STRHHpost)>;
1175def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1176            (instrs STRHHpre, STRHHpost)>;
1177def : InstRW<[WriteAdr, A64FXWrite_STP01],
1178            (instrs STRQpre, STRQpost)>;
1179def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1180            (instrs STRQpre, STRQpost)>;
1181def : InstRW<[WriteAdr, A64FXWrite_STP01],
1182            (instrs STRQpre, STRQpost)>;
1183def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1184            (instrs STRQpre, STRQpost)>;
1185def : InstRW<[WriteAdr, A64FXWrite_STP01],
1186            (instrs STRSpre, STRSpost)>;
1187def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1188            (instrs STRSpre, STRSpost)>;
1189def : InstRW<[WriteAdr, A64FXWrite_STP01],
1190            (instrs STRSpre, STRSpost)>;
1191def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1192            (instrs STRSpre, STRSpost)>;
1193def : InstRW<[WriteAdr, A64FXWrite_STP01],
1194            (instrs STRWpre, STRWpost)>;
1195def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1196            (instrs STRWpre, STRWpost)>;
1197def : InstRW<[WriteAdr, A64FXWrite_STP01],
1198            (instrs STRWpre, STRWpost)>;
1199def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1200            (instrs STRWpre, STRWpost)>;
1201def : InstRW<[WriteAdr, A64FXWrite_STP01],
1202            (instrs STRXpre, STRXpost)>;
1203def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1204            (instrs STRXpre, STRXpost)>;
1205def : InstRW<[WriteAdr, A64FXWrite_STP01],
1206            (instrs STRXpre, STRXpost)>;
1207def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1208            (instrs STRXpre, STRXpost)>;
1209
1210def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1211            (instrs STRBroW, STRBroX)>;
1212def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1213            (instrs STRBroW, STRBroX)>;
1214def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1215            (instrs STRBBroW, STRBBroX)>;
1216def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1217            (instrs STRBBroW, STRBBroX)>;
1218def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1219            (instrs STRDroW, STRDroX)>;
1220def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1221            (instrs STRDroW, STRDroX)>;
1222def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1223            (instrs STRHroW, STRHroX)>;
1224def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1225            (instrs STRHroW, STRHroX)>;
1226def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1227            (instrs STRHHroW, STRHHroX)>;
1228def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1229            (instrs STRHHroW, STRHHroX)>;
1230def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1231            (instrs STRQroW, STRQroX)>;
1232def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1233            (instrs STRQroW, STRQroX)>;
1234def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1235            (instrs STRSroW, STRSroX)>;
1236def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1237            (instrs STRSroW, STRSroX)>;
1238def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1239            (instrs STRWroW, STRWroX)>;
1240def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1241            (instrs STRWroW, STRWroX)>;
1242def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1243            (instrs STRXroW, STRXroX)>;
1244def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1245            (instrs STRXroW, STRXroX)>;
1246
1247//---
1248// 3.8 FP Data Processing Instructions
1249//---
1250
1251// FP absolute value
1252// FP min/max
1253// FP negate
1254def : WriteRes<WriteF,       [A64FXGI03]> {
1255  let Latency = 4;
1256  let ReleaseAtCycles = [2];
1257}
1258
1259// FP arithmetic
1260
1261def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
1262def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
1263
1264// FP compare
1265def : WriteRes<WriteFCmp,    [A64FXGI03]> {
1266  let Latency = 4;
1267  let ReleaseAtCycles = [2];
1268}
1269
1270// FP Div, Sqrt
1271def : WriteRes<WriteFDiv, [A64FXGI0]> {
1272  let Latency = 43;
1273}
1274
1275def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
1276  let Latency = 38;
1277}
1278
1279def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
1280  let Latency = 29;
1281}
1282
1283def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
1284  let Latency = 43;
1285}
1286
1287def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
1288  let Latency = 29;
1289}
1290
1291def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
1292  let Latency = 43;
1293}
1294
1295// FP divide, S-form
1296// FP square root, S-form
1297def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
1298def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
1299def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
1300def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
1301def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
1302def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
1303
1304// FP divide, D-form
1305// FP square root, D-form
1306def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
1307def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
1308def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
1309def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
1310def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
1311def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
1312
1313// FP round to integral
1314def : InstRW<[A64FXWrite_9Cyc_GI03],
1315            (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
1316
1317// FP select
1318def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
1319
1320//---
1321// 3.9 FP Miscellaneous Instructions
1322//---
1323
1324// FP convert, from vec to vec reg
1325// FP convert, from gen to vec reg
1326// FP convert, from vec to gen reg
1327def : WriteRes<WriteFCvt, [A64FXGI03]> {
1328  let Latency = 9;
1329  let ReleaseAtCycles = [2];
1330}
1331
1332// FP move, immed
1333// FP move, register
1334def : WriteRes<WriteFImm, [A64FXGI0]> {
1335  let Latency = 4;
1336  let ReleaseAtCycles = [2];
1337}
1338
1339// FP transfer, from gen to vec reg
1340// FP transfer, from vec to gen reg
1341def : WriteRes<WriteFCopy, [A64FXGI0]> {
1342  let Latency = 4;
1343  let ReleaseAtCycles = [2];
1344}
1345
1346def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
1347def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
1348
1349//---
1350// 3.12 ASIMD Integer Instructions
1351//---
1352
1353// ASIMD absolute diff, D-form
1354// ASIMD absolute diff, Q-form
1355// ASIMD absolute diff accum, D-form
1356// ASIMD absolute diff accum, Q-form
1357// ASIMD absolute diff accum long
1358// ASIMD absolute diff long
1359// ASIMD arith, basic
1360// ASIMD arith, complex
1361// ASIMD compare
1362// ASIMD logical (AND, BIC, EOR)
1363// ASIMD max/min, basic
1364// ASIMD max/min, reduce, 4H/4S
1365// ASIMD max/min, reduce, 8B/8H
1366// ASIMD max/min, reduce, 16B
1367// ASIMD multiply, D-form
1368// ASIMD multiply, Q-form
1369// ASIMD multiply accumulate long
1370// ASIMD multiply accumulate saturating long
1371// ASIMD multiply long
1372// ASIMD pairwise add and accumulate
1373// ASIMD shift accumulate
1374// ASIMD shift by immed, basic
1375// ASIMD shift by immed and insert, basic, D-form
1376// ASIMD shift by immed and insert, basic, Q-form
1377// ASIMD shift by immed, complex
1378// ASIMD shift by register, basic, D-form
1379// ASIMD shift by register, basic, Q-form
1380// ASIMD shift by register, complex, D-form
1381// ASIMD shift by register, complex, Q-form
1382def : WriteRes<WriteVd, [A64FXGI03]> {
1383  let Latency = 4;
1384}
1385def : WriteRes<WriteVq, [A64FXGI03]> {
1386  let Latency = 4;
1387}
1388
1389// ASIMD arith, reduce, 4H/4S
1390// ASIMD arith, reduce, 8B/8H
1391// ASIMD arith, reduce, 16B
1392
1393// ASIMD logical (MVN (alias for NOT), ORN, ORR)
1394def : InstRW<[A64FXWrite_4Cyc_GI03],
1395            (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1396
1397// ASIMD arith, reduce
1398def : InstRW<[A64FXWrite_ADDLV],
1399            (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
1400
1401// ASIMD polynomial (8x8) multiply long
1402def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
1403def : InstRW<[A64FXWrite_MULLV],
1404            (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
1405def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
1406def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
1407
1408// ASIMD absolute diff accum, D-form
1409def : InstRW<[A64FXWrite_ABA],
1410            (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
1411// ASIMD absolute diff accum, Q-form
1412def : InstRW<[A64FXWrite_ABA],
1413            (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
1414// ASIMD absolute diff accum long
1415def : InstRW<[A64FXWrite_ABAL],
1416            (instregex "^[SU]ABAL")>;
1417// ASIMD arith, reduce, 4H/4S
1418def : InstRW<[A64FXWrite_ADDLV1],
1419            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
1420// ASIMD arith, reduce, 8B
1421def : InstRW<[A64FXWrite_ADDLV1],
1422            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
1423// ASIMD arith, reduce, 16B/16H
1424def : InstRW<[A64FXWrite_ADDLV1],
1425            (instregex "^[SU]?ADDL?Vv16i8v$")>;
1426// ASIMD max/min, reduce, 4H/4S
1427def : InstRW<[A64FXWrite_MINMAXV],
1428            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
1429// ASIMD max/min, reduce, 8B/8H
1430def : InstRW<[A64FXWrite_MINMAXV],
1431            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
1432// ASIMD max/min, reduce, 16B/16H
1433def : InstRW<[A64FXWrite_MINMAXV],
1434            (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
1435// ASIMD multiply, D-form
1436def : InstRW<[A64FXWrite_PMUL],
1437            (instregex "^(P?MUL|SQR?DMUL)" #
1438                       "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
1439                       "(_indexed)?$")>;
1440
1441// ASIMD multiply, Q-form
1442def : InstRW<[A64FXWrite_PMUL],
1443            (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1444
1445// ASIMD multiply, Q-form
1446def : InstRW<[A64FXWrite_SQRDMULH],
1447            (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1448
1449// ASIMD multiply accumulate, D-form
1450def : InstRW<[A64FXWrite_9Cyc_GI03],
1451            (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
1452// ASIMD multiply accumulate, Q-form
1453def : InstRW<[A64FXWrite_9Cyc_GI03],
1454            (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
1455// ASIMD shift accumulate
1456def : InstRW<[A64FXWrite_SRSRAV],
1457            (instregex "SRSRAv", "URSRAv")>;
1458def : InstRW<[A64FXWrite_SSRAV],
1459            (instregex "SSRAv", "USRAv")>;
1460
1461// ASIMD shift by immed, basic
1462def : InstRW<[A64FXWrite_RSHRN],
1463            (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
1464def : InstRW<[A64FXWrite_SHRN],
1465            (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
1466
1467def : InstRW<[A64FXWrite_6Cyc_GI3],
1468            (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
1469
1470// ASIMD shift by immed, complex
1471def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
1472def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
1473// ASIMD shift by register, basic, Q-form
1474def : InstRW<[A64FXWrite_6Cyc_GI3],
1475            (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1476// ASIMD shift by register, complex, D-form
1477def : InstRW<[A64FXWrite_6Cyc_GI3],
1478            (instregex "^[SU][QR]{1,2}SHL" #
1479                       "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
1480// ASIMD shift by register, complex, Q-form
1481def : InstRW<[A64FXWrite_6Cyc_GI3],
1482            (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
1483
1484// ASIMD Arithmetic
1485def : InstRW<[A64FXWrite_4Cyc_GI03],
1486            (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
1487def : InstRW<[A64FXWrite_4Cyc_GI03],
1488            (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
1489def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
1490def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
1491def : InstRW<[A64FXWrite_4Cyc_GI03],
1492            (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
1493                       "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
1494def : InstRW<[A64FXWrite_ADDP],
1495            (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
1496def : InstRW<[A64FXWrite_4Cyc_GI03],
1497            (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
1498                       "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
1499def : InstRW<[A64FXWrite_4Cyc_GI0],
1500            (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
1501def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
1502def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
1503def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
1504def : InstRW<[A64FXWrite_MINMAXV],
1505             (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
1506def : InstRW<[A64FXWrite_ABA],
1507             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
1508def : InstRW<[A64FXWrite_4Cyc_GI03],
1509            (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
1510def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
1511def : InstRW<[A64FXWrite_SHRN],
1512            (instregex "^ADDHNv", "^SUBHNv")>;
1513def : InstRW<[A64FXWrite_RSHRN],
1514            (instregex "^RADDHNv", "^RSUBHNv")>;
1515def : InstRW<[A64FXWrite_4Cyc_GI03],
1516            (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
1517                       "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
1518                      "^URHADD", "^USQADD")>;
1519
1520def : InstRW<[A64FXWrite_4Cyc_GI03],
1521            (instregex "^CMEQv", "^CMGEv", "^CMGTv",
1522                       "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
1523def : InstRW<[A64FXWrite_MINMAXV],
1524            (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1525def : InstRW<[A64FXWrite_ADDP],
1526            (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1527def : InstRW<[A64FXWrite_4Cyc_GI03],
1528            (instregex "^SABDv", "^UABDv")>;
1529def : InstRW<[A64FXWrite_TBX1],
1530            (instregex "^SABDLv", "^UABDLv")>;
1531
1532//---
1533// 3.13 ASIMD Floating-point Instructions
1534//---
1535
1536def : WriteRes<WriteFMul, [A64FXGI03]> {
1537  let Latency = 9;
1538}
1539
1540// ASIMD FP absolute value
1541def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
1542
1543// ASIMD FP arith, normal, D-form
1544// ASIMD FP arith, normal, Q-form
1545def : InstRW<[A64FXWrite_9Cyc_GI03],
1546            (instregex "^FABDv", "^FADDv", "^FSUBv")>;
1547
1548// ASIMD FP arith, pairwise, D-form
1549// ASIMD FP arith, pairwise, Q-form
1550def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
1551
1552// ASIMD FP compare, D-form
1553// ASIMD FP compare, Q-form
1554def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
1555def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
1556                                                 "^FCMGTv", "^FCMLEv",
1557                                                 "^FCMLTv")>;
1558// ASIMD FP round, D-form
1559def : InstRW<[A64FXWrite_9Cyc_GI03],
1560            (instregex "^FRINT[AIMNPXZ](v2f32)")>;
1561// ASIMD FP round, Q-form
1562def : InstRW<[A64FXWrite_9Cyc_GI03],
1563            (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
1564
1565// ASIMD FP convert, long
1566// ASIMD FP convert, narrow
1567// ASIMD FP convert, other, D-form
1568// ASIMD FP convert, other, Q-form
1569
1570// ASIMD FP convert, long and narrow
1571def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
1572// ASIMD FP convert, other, D-form
1573def : InstRW<[A64FXWrite_FCVTXNV],
1574      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
1575// ASIMD FP convert, other, Q-form
1576def : InstRW<[A64FXWrite_FCVTXNV],
1577      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
1578
1579// ASIMD FP divide, D-form, F32
1580def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
1581def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
1582
1583// ASIMD FP divide, Q-form, F32
1584def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
1585def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
1586
1587// ASIMD FP divide, Q-form, F64
1588def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
1589def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
1590
1591// ASIMD FP max/min, normal, D-form
1592// ASIMD FP max/min, normal, Q-form
1593def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
1594                                               "^FMINv", "^FMINNMv")>;
1595
1596// ASIMD FP max/min, pairwise, D-form
1597// ASIMD FP max/min, pairwise, Q-form
1598def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
1599                                           "^FMINPv", "^FMINNMPv")>;
1600
1601// ASIMD FP max/min, reduce
1602def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
1603                                              "^FMINVv", "^FMINNMVv")>;
1604
1605// ASIMD FP multiply, D-form, FZ
1606// ASIMD FP multiply, D-form, no FZ
1607// ASIMD FP multiply, Q-form, FZ
1608// ASIMD FP multiply, Q-form, no FZ
1609def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
1610def : InstRW<[A64FXWrite_FMULXE],
1611            (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
1612def : InstRW<[A64FXWrite_FMULXE],
1613            (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
1614
1615// ASIMD FP multiply accumulate, Dform, FZ
1616// ASIMD FP multiply accumulate, Dform, no FZ
1617// ASIMD FP multiply accumulate, Qform, FZ
1618// ASIMD FP multiply accumulate, Qform, no FZ
1619def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
1620def : InstRW<[A64FXWrite_FMULXE],
1621            (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
1622def : InstRW<[A64FXWrite_FMULXE],
1623            (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
1624
1625// ASIMD FP negate
1626def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
1627
1628//--
1629// 3.14 ASIMD Miscellaneous Instructions
1630//--
1631
1632// ASIMD bit reverse
1633def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
1634
1635// ASIMD bitwise insert, D-form
1636// ASIMD bitwise insert, Q-form
1637def : InstRW<[A64FXWrite_BIF],
1638            (instregex "^BIFv", "^BITv", "^BSLv")>;
1639
1640// ASIMD count, D-form
1641// ASIMD count, Q-form
1642def : InstRW<[A64FXWrite_4Cyc_GI0],
1643            (instregex "^CLSv", "^CLZv", "^CNTv")>;
1644
1645// ASIMD duplicate, gen reg
1646// ASIMD duplicate, element
1647def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
1648def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
1649def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
1650
1651// ASIMD extract
1652def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
1653
1654// ASIMD extract narrow
1655def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
1656
1657// ASIMD extract narrow, saturating
1658def : InstRW<[A64FXWrite_6Cyc_GI3],
1659            (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
1660
1661// ASIMD insert, element to element
1662def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1663
1664// ASIMD transfer, element to gen reg
1665def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1666
1667// ASIMD move, integer immed
1668def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
1669
1670// ASIMD move, FP immed
1671def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
1672
1673// ASIMD table lookup, D-form
1674def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
1675def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
1676def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
1677def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
1678def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
1679def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
1680def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
1681def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
1682
1683// ASIMD table lookup, Q-form
1684def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
1685def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
1686def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
1687def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
1688def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
1689def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
1690def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
1691def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
1692
1693// ASIMD unzip/zip
1694def : InstRW<[A64FXWrite_6Cyc_GI0],
1695            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
1696
1697// ASIMD reciprocal estimate, D-form
1698// ASIMD reciprocal estimate, Q-form
1699def : InstRW<[A64FXWrite_4Cyc_GI03],
1700            (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
1701                       "^FRSQRTEv", "^URSQRTEv")>;
1702
1703// ASIMD reciprocal step, D-form, FZ
1704// ASIMD reciprocal step, D-form, no FZ
1705// ASIMD reciprocal step, Q-form, FZ
1706// ASIMD reciprocal step, Q-form, no FZ
1707def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
1708
1709// ASIMD reverse
1710def : InstRW<[A64FXWrite_4Cyc_GI03],
1711            (instregex "^REV16v", "^REV32v", "^REV64v")>;
1712
1713// ASIMD table lookup, D-form
1714// ASIMD table lookup, Q-form
1715def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
1716
1717// ASIMD transfer, element to word or word
1718def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1719
1720// ASIMD transfer, element to gen reg
1721def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
1722
1723// ASIMD transfer gen reg to element
1724def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1725
1726// ASIMD transpose
1727def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
1728                                                 "^UZP1v", "^UZP2v")>;
1729
1730// ASIMD unzip/zip
1731def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
1732
1733//--
1734// 3.15 ASIMD Load Instructions
1735//--
1736
1737// ASIMD load, 1 element, multiple, 1 reg, D-form
1738// ASIMD load, 1 element, multiple, 1 reg, Q-form
1739def : InstRW<[A64FXWrite_8Cyc_GI56],
1740            (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
1741def : InstRW<[A64FXWrite_11Cyc_GI56],
1742            (instregex "^LD1Onev(16b|8h|4s)$")>;
1743def : InstRW<[A64FXWrite_LD108, WriteAdr],
1744            (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
1745def : InstRW<[A64FXWrite_LD109, WriteAdr],
1746            (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
1747
1748// ASIMD load, 1 element, multiple, 2 reg, D-form
1749// ASIMD load, 1 element, multiple, 2 reg, Q-form
1750def : InstRW<[A64FXWrite_LD102],
1751            (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
1752def : InstRW<[A64FXWrite_LD103],
1753            (instregex "^LD1Twov(16b|8h|4s)$")>;
1754def : InstRW<[A64FXWrite_LD110, WriteAdr],
1755            (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
1756def : InstRW<[A64FXWrite_LD111, WriteAdr],
1757            (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
1758
1759// ASIMD load, 1 element, multiple, 3 reg, D-form
1760// ASIMD load, 1 element, multiple, 3 reg, Q-form
1761def : InstRW<[A64FXWrite_LD104],
1762            (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
1763def : InstRW<[A64FXWrite_LD105],
1764            (instregex "^LD1Threev(16b|8h|4s)$")>;
1765def : InstRW<[A64FXWrite_LD112, WriteAdr],
1766            (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
1767def : InstRW<[A64FXWrite_LD113, WriteAdr],
1768            (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
1769
1770// ASIMD load, 1 element, multiple, 4 reg, D-form
1771// ASIMD load, 1 element, multiple, 4 reg, Q-form
1772def : InstRW<[A64FXWrite_LD106],
1773            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
1774def : InstRW<[A64FXWrite_LD107],
1775            (instregex "^LD1Fourv(16b|8h|4s)$")>;
1776def : InstRW<[A64FXWrite_LD114, WriteAdr],
1777            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
1778def : InstRW<[A64FXWrite_LD115, WriteAdr],
1779            (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
1780
1781// ASIMD load, 1 element, one lane, B/H/S
1782// ASIMD load, 1 element, one lane, D
1783def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
1784def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
1785            (instregex "^LD1i(8|16|32|64)_POST$")>;
1786
1787// ASIMD load, 1 element, all lanes, D-form, B/H/S
1788// ASIMD load, 1 element, all lanes, D-form, D
1789// ASIMD load, 1 element, all lanes, Q-form
1790def : InstRW<[A64FXWrite_8Cyc_GI03],
1791            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1792def : InstRW<[A64FXWrite_LD108, WriteAdr],
1793            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1794
1795// ASIMD load, 2 element, multiple, D-form, B/H/S
1796// ASIMD load, 2 element, multiple, Q-form, D
1797def : InstRW<[A64FXWrite_LD103],
1798            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1799def : InstRW<[A64FXWrite_LD111, WriteAdr],
1800            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1801
1802// ASIMD load, 2 element, one lane, B/H
1803// ASIMD load, 2 element, one lane, S
1804// ASIMD load, 2 element, one lane, D
1805def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
1806def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
1807            (instregex "^LD2i(8|16|32|64)_POST$")>;
1808
1809// ASIMD load, 2 element, all lanes, D-form, B/H/S
1810// ASIMD load, 2 element, all lanes, D-form, D
1811// ASIMD load, 2 element, all lanes, Q-form
1812def : InstRW<[A64FXWrite_LD102],
1813            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1814def : InstRW<[A64FXWrite_LD110, WriteAdr],
1815            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1816
1817// ASIMD load, 3 element, multiple, D-form, B/H/S
1818// ASIMD load, 3 element, multiple, Q-form, B/H/S
1819// ASIMD load, 3 element, multiple, Q-form, D
1820def : InstRW<[A64FXWrite_LD105],
1821            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1822def : InstRW<[A64FXWrite_LD113, WriteAdr],
1823            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1824
1825// ASIMD load, 3 element, one lone, B/H
1826// ASIMD load, 3 element, one lane, S
1827// ASIMD load, 3 element, one lane, D
1828def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
1829def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
1830            (instregex "^LD3i(8|16|32|64)_POST$")>;
1831
1832// ASIMD load, 3 element, all lanes, D-form, B/H/S
1833// ASIMD load, 3 element, all lanes, D-form, D
1834// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1835// ASIMD load, 3 element, all lanes, Q-form, D
1836def : InstRW<[A64FXWrite_LD104],
1837            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1838def : InstRW<[A64FXWrite_LD112, WriteAdr],
1839            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1840
1841// ASIMD load, 4 element, multiple, D-form, B/H/S
1842// ASIMD load, 4 element, multiple, Q-form, B/H/S
1843// ASIMD load, 4 element, multiple, Q-form, D
1844def : InstRW<[A64FXWrite_LD107],
1845            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1846def : InstRW<[A64FXWrite_LD115, WriteAdr],
1847            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1848
1849// ASIMD load, 4 element, one lane, B/H
1850// ASIMD load, 4 element, one lane, S
1851// ASIMD load, 4 element, one lane, D
1852def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
1853def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
1854            (instregex "^LD4i(8|16|32|64)_POST$")>;
1855
1856// ASIMD load, 4 element, all lanes, D-form, B/H/S
1857// ASIMD load, 4 element, all lanes, D-form, D
1858// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1859// ASIMD load, 4 element, all lanes, Q-form, D
1860def : InstRW<[A64FXWrite_LD106],
1861            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1862def : InstRW<[A64FXWrite_LD114, WriteAdr],
1863            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1864
1865//--
1866// 3.16 ASIMD Store Instructions
1867//--
1868
1869// ASIMD store, 1 element, multiple, 1 reg, D-form
1870// ASIMD store, 1 element, multiple, 1 reg, Q-form
1871def : InstRW<[A64FXWrite_ST10],
1872            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1873def : InstRW<[A64FXWrite_ST14, WriteAdr],
1874            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1875
1876// ASIMD store, 1 element, multiple, 2 reg, D-form
1877// ASIMD store, 1 element, multiple, 2 reg, Q-form
1878def : InstRW<[A64FXWrite_ST11],
1879            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1880def : InstRW<[A64FXWrite_ST15, WriteAdr],
1881            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1882
1883// ASIMD store, 1 element, multiple, 3 reg, D-form
1884// ASIMD store, 1 element, multiple, 3 reg, Q-form
1885def : InstRW<[A64FXWrite_ST12],
1886            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1887def : InstRW<[A64FXWrite_ST16, WriteAdr],
1888            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1889
1890// ASIMD store, 1 element, multiple, 4 reg, D-form
1891// ASIMD store, 1 element, multiple, 4 reg, Q-form
1892def : InstRW<[A64FXWrite_ST13],
1893            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1894def : InstRW<[A64FXWrite_ST17, WriteAdr],
1895            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1896
1897// ASIMD store, 1 element, one lane, B/H/S
1898// ASIMD store, 1 element, one lane, D
1899def : InstRW<[A64FXWrite_ST10],
1900            (instregex "^ST1i(8|16|32|64)$")>;
1901def : InstRW<[A64FXWrite_ST14, WriteAdr],
1902            (instregex "^ST1i(8|16|32|64)_POST$")>;
1903
1904// ASIMD store, 2 element, multiple, D-form, B/H/S
1905// ASIMD store, 2 element, multiple, Q-form, B/H/S
1906// ASIMD store, 2 element, multiple, Q-form, D
1907def : InstRW<[A64FXWrite_ST11],
1908            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1909def : InstRW<[A64FXWrite_ST15, WriteAdr],
1910            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1911
1912// ASIMD store, 2 element, one lane, B/H/S
1913// ASIMD store, 2 element, one lane, D
1914def : InstRW<[A64FXWrite_ST11],
1915            (instregex "^ST2i(8|16|32|64)$")>;
1916def : InstRW<[A64FXWrite_ST15, WriteAdr],
1917            (instregex "^ST2i(8|16|32|64)_POST$")>;
1918
1919// ASIMD store, 3 element, multiple, D-form, B/H/S
1920// ASIMD store, 3 element, multiple, Q-form, B/H/S
1921// ASIMD store, 3 element, multiple, Q-form, D
1922def : InstRW<[A64FXWrite_ST12],
1923            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1924def : InstRW<[A64FXWrite_ST16, WriteAdr],
1925            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1926
1927// ASIMD store, 3 element, one lane, B/H
1928// ASIMD store, 3 element, one lane, S
1929// ASIMD store, 3 element, one lane, D
1930def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
1931def : InstRW<[A64FXWrite_ST16, WriteAdr],
1932            (instregex "^ST3i(8|16|32|64)_POST$")>;
1933
1934// ASIMD store, 4 element, multiple, D-form, B/H/S
1935// ASIMD store, 4 element, multiple, Q-form, B/H/S
1936// ASIMD store, 4 element, multiple, Q-form, D
1937def : InstRW<[A64FXWrite_ST13],
1938            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1939def : InstRW<[A64FXWrite_ST17, WriteAdr],
1940            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1941
1942// ASIMD store, 4 element, one lane, B/H
1943// ASIMD store, 4 element, one lane, S
1944// ASIMD store, 4 element, one lane, D
1945def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
1946def : InstRW<[A64FXWrite_ST17, WriteAdr],
1947            (instregex "^ST4i(8|16|32|64)_POST$")>;
1948
1949// V8.1a Atomics (LSE)
1950def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1951            (instrs CASB, CASH, CASW, CASX)>;
1952
1953def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1954            (instrs CASAB, CASAH, CASAW, CASAX)>;
1955
1956def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1957            (instrs CASLB, CASLH, CASLW, CASLX)>;
1958
1959def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1960            (instrs CASALB, CASALH, CASALW, CASALX)>;
1961
1962def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1963            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
1964
1965def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1966            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
1967
1968def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1969            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
1970
1971def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1972            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
1973
1974def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1975            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
1976
1977def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1978            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
1979
1980def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1981            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
1982
1983def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1984            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
1985
1986def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1987            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
1988
1989def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1990            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
1991
1992def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1993            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
1994
1995def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1996            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
1997
1998def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1999            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
2000
2001def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2002            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
2003
2004def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2005            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
2006
2007def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2008            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
2009
2010def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2011            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
2012
2013def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2014            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
2015             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
2016             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
2017             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
2018
2019def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2020            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
2021             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
2022             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
2023             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
2024
2025def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2026            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
2027             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
2028             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
2029             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
2030
2031def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2032            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
2033             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
2034             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
2035             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
2036
2037def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2038            (instrs SWPB, SWPH, SWPW, SWPX)>;
2039
2040def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2041            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
2042
2043def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2044            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
2045
2046def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2047            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
2048
2049def : InstRW<[A64FXWrite_STUR, WriteAtomic],
2050            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
2051
2052// SVE instructions
2053
2054// The modeling method for SVE instructions is more accurate than others.
2055// TODO: modify the model of other instructions similarly.
2056
2057def : InstRW<[A64FXWrite_4Cyc_GI0],
2058            (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z",
2059                       "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P",
2060                       "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI",
2061                       "^SUBR?_ZI")>;
2062
2063def : InstRW<[A64FXWrite_6Cyc_GI0],
2064            (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR",
2065                       "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z",
2066                       "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>;
2067
2068def : InstRW<[A64FXWrite_9Cyc_GI0],
2069            (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z",
2070                       "^INDEX_II_[SD]", "^MUL_ZI")>;
2071
2072def : InstRW<[A64FXWrite_4Cyc_GI3],
2073            (instregex "^CNT_Z")>;
2074
2075def : InstRW<[A64FXWrite_4Cyc_GI03],
2076            (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z",
2077                       "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z",
2078                       "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z",
2079                       "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_",
2080                       "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z",
2081                       "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z",
2082                       "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z",
2083                       "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>;
2084
2085def : InstRW<[A64FXWrite_9Cyc_GI03      ],
2086            (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP",
2087                       "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z",
2088                       "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z",
2089                       "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP",
2090                       "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>;
2091
2092def : InstRW<[A64FXWrite_3Cyc_GI1],
2093            (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P",
2094                       "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT",
2095                       "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)",
2096                       "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>;
2097
2098def : InstRW<[A64FXWrite_1Cyc_GI24],
2099            (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X",
2100                       "^RDVLI")>;
2101
2102def : InstRW<[A64FXWrite_11Cyc_GI5],
2103            (instregex "^LDR_[PZ]XI")>;
2104
2105def : InstRW<[A64FXWrite_11Cyc_GI56],
2106            (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>;
2107
2108def A64FXWrite_None : SchedWriteRes<[]> {
2109}
2110def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
2111
2112def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
2113  let Latency = 15;
2114  let NumMicroOps = 2;
2115  let ReleaseAtCycles = [2];
2116}
2117def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
2118
2119def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
2120  let Latency = 5;
2121  let NumMicroOps = 2;
2122  let ReleaseAtCycles = [2];
2123}
2124def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
2125
2126def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> {
2127  let Latency = 8;
2128  let NumMicroOps = 2;
2129}
2130def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
2131
2132def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
2133  let Latency = 46;
2134  let NumMicroOps = 10;
2135  let ReleaseAtCycles = [10];
2136}
2137def : InstRW<[A64FXWrite_Reduction4CycB],
2138      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
2139
2140def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
2141  let Latency = 42;
2142  let NumMicroOps = 9;
2143  let ReleaseAtCycles = [9];
2144}
2145def : InstRW<[A64FXWrite_Reduction4CycH],
2146      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
2147
2148def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
2149  let Latency = 38;
2150  let NumMicroOps = 8;
2151  let ReleaseAtCycles = [8];
2152}
2153def : InstRW<[A64FXWrite_Reduction4CycS],
2154      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
2155
2156def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
2157  let Latency = 34;
2158  let NumMicroOps = 7;
2159  let ReleaseAtCycles = [7];
2160}
2161def : InstRW<[A64FXWrite_Reduction4CycD],
2162      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
2163
2164def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2165  let Latency = 29;
2166}
2167def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>;
2168
2169def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2170  let Latency = 4;
2171}
2172def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>;
2173
2174def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> {
2175  let Latency = 6;
2176}
2177def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>;
2178
2179def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2180  let Latency = 8;
2181}
2182def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
2183
2184def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
2185  let Latency = 2;
2186  let ReleaseAtCycles = [2];
2187}
2188def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
2189
2190def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> {
2191  let Latency = 7;
2192  let NumMicroOps = 2;
2193}
2194def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>;
2195
2196def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2197  let Latency = 12;
2198}
2199def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
2200
2201def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
2202  let Latency = 75;
2203  let NumMicroOps = 11;
2204  let ReleaseAtCycles = [11];
2205}
2206def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
2207
2208def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
2209  let Latency = 60;
2210  let NumMicroOps = 9;
2211  let ReleaseAtCycles = [9];
2212}
2213def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
2214
2215def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
2216  let Latency = 45;
2217  let NumMicroOps = 7;
2218  let ReleaseAtCycles = [7];
2219}
2220def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
2221
2222def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
2223  let Latency = 468;
2224  let NumMicroOps = 63;
2225  let ReleaseAtCycles = [63];
2226}
2227def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
2228
2229def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
2230  let Latency = 228;
2231  let NumMicroOps = 31;
2232  let ReleaseAtCycles = [31];
2233}
2234def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
2235
2236def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
2237  let Latency = 108;
2238  let NumMicroOps = 15;
2239  let ReleaseAtCycles = [15];
2240}
2241def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
2242
2243def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2244  let Latency = 15;
2245  let NumMicroOps = 2;
2246}
2247def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
2248
2249def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
2250  let Latency = 15;
2251  let NumMicroOps = 3;
2252  let ReleaseAtCycles = [3];
2253}
2254def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
2255
2256def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
2257  let Latency = 134;
2258  let ReleaseAtCycles = [134];
2259}
2260def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
2261
2262def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
2263  let Latency = 98;
2264  let ReleaseAtCycles = [98];
2265}
2266def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
2267
2268def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
2269  let Latency = 154;
2270  let ReleaseAtCycles = [154];
2271}
2272def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
2273
2274def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
2275  let Latency = 54;
2276  let NumMicroOps = 11;
2277  let ReleaseAtCycles = [11];
2278}
2279def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
2280
2281def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
2282  let Latency = 44;
2283  let NumMicroOps = 9;
2284  let ReleaseAtCycles = [9];
2285}
2286def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
2287
2288def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
2289  let Latency = 34;
2290  let NumMicroOps = 7;
2291  let ReleaseAtCycles = [7];
2292}
2293def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
2294
2295def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2296  let Latency = 17;
2297  let NumMicroOps = 2;
2298  let ReleaseAtCycles = [2, 2];
2299}
2300def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
2301
2302def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2303  let Latency = 13;
2304  let NumMicroOps = 1;
2305}
2306def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
2307
2308def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
2309  let Latency = 13;
2310  let NumMicroOps = 2;
2311  let ReleaseAtCycles = [2];
2312}
2313def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
2314
2315def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
2316  let Latency = 17;
2317  let NumMicroOps = 3;
2318  let ReleaseAtCycles = [2, 2, 1];
2319}
2320def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
2321
2322def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2323  let Latency = 17;
2324  let NumMicroOps = 2;
2325  let ReleaseAtCycles = [2, 1];
2326}
2327def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
2328
2329def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2330  let Latency = 10;
2331}
2332def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>;
2333
2334def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2335  let Latency = 25;
2336}
2337def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
2338
2339def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2340  let Latency = 19;
2341  let ReleaseAtCycles = [2, 4, 4];
2342}
2343def : InstRW<[A64FXWrite_GLD_S_ZI],
2344      (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
2345
2346def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2347  let Latency = 16;
2348  let ReleaseAtCycles = [1, 2, 2];
2349}
2350def : InstRW<[A64FXWrite_GLD_D_ZI],
2351      (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
2352
2353def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2354  let Latency = 23;
2355  let ReleaseAtCycles = [2, 1, 4, 4];
2356}
2357def : InstRW<[A64FXWrite_GLD_S_RZ],
2358      (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
2359
2360def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2361  let Latency = 20;
2362  let ReleaseAtCycles = [1, 1, 2, 2];
2363}
2364def : InstRW<[A64FXWrite_GLD_D_RZ],
2365      (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
2366                 "^GLD(FF)?1S?[BHW]_D$")>;
2367
2368def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
2369  let Latency = 15;
2370  let NumMicroOps = 3;
2371  let ReleaseAtCycles = [9];
2372}
2373def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
2374
2375def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2376  let Latency = 11;
2377  let NumMicroOps = 2;
2378  let ReleaseAtCycles = [2];
2379}
2380def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
2381
2382def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
2383  let Latency = 12;
2384  let NumMicroOps = 3;
2385  let ReleaseAtCycles = [3];
2386}
2387def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
2388
2389def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
2390  let Latency = 15;
2391  let NumMicroOps = 4;
2392  let ReleaseAtCycles = [13];
2393}
2394def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
2395
2396def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2397  let Latency = 11;
2398  let NumMicroOps = 3;
2399  let ReleaseAtCycles = [3];
2400}
2401def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
2402
2403def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
2404  let Latency = 12;
2405  let NumMicroOps = 4;
2406  let ReleaseAtCycles = [4];
2407}
2408def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
2409
2410def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
2411  let Latency = 15;
2412  let NumMicroOps = 5;
2413  let ReleaseAtCycles = [17];
2414}
2415def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
2416
2417def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2418  let Latency = 11;
2419  let NumMicroOps = 4;
2420  let ReleaseAtCycles = [4];
2421}
2422def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
2423
2424def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
2425  let Latency = 12;
2426  let NumMicroOps = 5;
2427  let ReleaseAtCycles = [5];
2428}
2429def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
2430
2431def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
2432}
2433def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
2434
2435def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2436  let ReleaseAtCycles = [2, 1, 4];
2437}
2438def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
2439
2440def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2441  let ReleaseAtCycles = [2, 4];
2442}
2443def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
2444
2445def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2446  let ReleaseAtCycles = [1, 1, 2];
2447}
2448def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
2449
2450def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2451  let ReleaseAtCycles = [1, 2];
2452}
2453def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
2454
2455def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
2456  let Latency = 114;
2457  let ReleaseAtCycles = [114];
2458}
2459def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
2460
2461def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
2462  let Latency = 178;
2463  let ReleaseAtCycles = [178];
2464}
2465def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
2466
2467def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2468  let Latency = 15;
2469  let NumMicroOps = 2;
2470}
2471def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
2472
2473def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
2474  let Latency = 2;
2475  let ReleaseAtCycles = [2];
2476}
2477def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
2478
2479def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2480  let Latency = 6;
2481  let NumMicroOps = 2;
2482  let ReleaseAtCycles = [3, 1];
2483}
2484def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
2485
2486def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2487  let Latency = 12;
2488}
2489def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>;
2490
2491def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2492  let Latency = 11;
2493}
2494def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
2495
2496def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2497  let Latency = 20;
2498  let NumMicroOps = 8;
2499  let ReleaseAtCycles = [8, 8, 8, 8];
2500}
2501def : InstRW<[A64FXWrite_SST1_W_RZ],
2502      (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
2503
2504def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2505  let Latency = 20;
2506  let NumMicroOps = 4;
2507  let ReleaseAtCycles = [4, 4, 4, 4];
2508}
2509def : InstRW<[A64FXWrite_SST1_D_RZ],
2510      (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
2511
2512def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2513  let Latency = 16;
2514  let NumMicroOps = 8;
2515  let ReleaseAtCycles = [12, 8, 8];
2516}
2517def : InstRW<[A64FXWrite_SST1_W_ZI],
2518      (instregex "^SST1[BH]_S_I", "^SST1W_I")>;
2519
2520def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2521  let Latency = 16;
2522  let NumMicroOps = 4;
2523  let ReleaseAtCycles = [4, 4, 4];
2524}
2525def : InstRW<[A64FXWrite_SST1_D_ZI],
2526      (instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
2527
2528def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2529  let Latency = 12;
2530  let NumMicroOps = 3;
2531  let ReleaseAtCycles = [8, 9];
2532}
2533def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
2534
2535def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2536  let Latency = 11;
2537  let NumMicroOps = 2;
2538  let ReleaseAtCycles = [2, 2];
2539}
2540def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
2541
2542def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2543  let Latency = 12;
2544  let NumMicroOps = 3;
2545  let ReleaseAtCycles = [2, 3];
2546}
2547def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
2548
2549def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2550  let Latency = 15;
2551  let NumMicroOps = 4;
2552  let ReleaseAtCycles = [12, 13];
2553}
2554def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
2555
2556def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2557  let Latency = 11;
2558  let NumMicroOps = 3;
2559  let ReleaseAtCycles = [3, 3];
2560}
2561def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
2562
2563def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2564  let Latency = 12;
2565  let NumMicroOps = 4;
2566  let ReleaseAtCycles = [3, 4];
2567}
2568def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
2569
2570def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2571  let Latency = 15;
2572  let NumMicroOps = 5;
2573  let ReleaseAtCycles = [16, 17];
2574}
2575def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
2576
2577def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2578  let Latency = 11;
2579  let NumMicroOps = 4;
2580  let ReleaseAtCycles = [4, 4];
2581}
2582def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
2583
2584def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2585  let Latency = 12;
2586  let NumMicroOps = 5;
2587  let ReleaseAtCycles = [4, 5];
2588}
2589def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
2590
2591def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2592  let Latency = 11;
2593}
2594def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>;
2595
2596def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> {
2597  let Latency = 11;
2598}
2599def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>;
2600
2601def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2602  let Latency = 4;
2603}
2604def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>;
2605
2606def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2607  let Latency = 3;
2608  let NumMicroOps = 2;
2609}
2610def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>;
2611
2612} // SchedModel = A64FXModel
2613