xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td (revision 1db9f3b21e39176dd5b67cf8ac378633b172463e)
1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Fujitsu A64FX processors.
10//
11//===----------------------------------------------------------------------===//
12
13def A64FXModel : SchedMachineModel {
14  let IssueWidth            =   6; // 6 micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
16  let LoadLatency           =   5; // Optimistic load latency.
17  let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
18  // Determined via a mix of micro-arch details and experimentation.
19  let LoopMicroOpBufferSize = 128;
20  let PostRAScheduler       =   1; // Using PostRA sched.
21  let CompleteModel         =   1;
22
23  list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F,
24                                                    [HasMTE, HasMatMulInt8, HasBF16,
25                                                    HasPAuth, HasPAuthLR, HasCPA]);
26  let FullInstRWOverlapCheck = 0;
27}
28
29let SchedModel = A64FXModel in {
30
31// Define the issue ports.
32
33// A64FXIP*
34
35// Port 0
36def A64FXIPFLA : ProcResource<1>;
37
38// Port 1
39def A64FXIPPR : ProcResource<1>;
40
41// Port 2
42def A64FXIPEXA : ProcResource<1>;
43
44// Port 3
45def A64FXIPFLB : ProcResource<1>;
46
47// Port 4
48def A64FXIPEXB : ProcResource<1>;
49
50// Port 5
51def A64FXIPEAGA : ProcResource<1>;
52
53// Port 6
54def A64FXIPEAGB : ProcResource<1>;
55
56// Port 7
57def A64FXIPBR : ProcResource<1>;
58
59// Define groups for the functional units on each issue port.  Each group
60// created will be used by a WriteRes later on.
61
62def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
63
64def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
65
66def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
67
68def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
69
70def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
71
72def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
73
74def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
75
76def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
77
78def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
79
80def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
81
82def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
83
84def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
85
86def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
87
88def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
89
90def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
91                             A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>;
92
93def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
94  let Latency = 1;
95}
96
97def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
98  let Latency = 2;
99}
100
101def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
102  let Latency = 4;
103}
104
105def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
106  let Latency = 6;
107}
108
109def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
110  let Latency = 8;
111}
112
113def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
114  let Latency = 9;
115}
116
117def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
118  let Latency = 3;
119}
120
121def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
122  let Latency = 5;
123}
124
125def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
126  let Latency = 4;
127}
128
129def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
130  let Latency = 6;
131}
132
133def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
134  let Latency = 4;
135}
136
137def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
138  let Latency = 8;
139}
140
141def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
142  let Latency = 9;
143}
144
145def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
146  let Latency = 10;
147}
148
149def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
150  let Latency = 12;
151}
152
153def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
154  let Latency = 20;
155}
156
157def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
158  let Latency = 5;
159}
160
161def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
162  let Latency = 11;
163}
164
165def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
166  let Latency = 5;
167}
168
169def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
170  let Latency = 1;
171}
172
173def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
174  let Latency = 2;
175}
176
177def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
178  let Latency = 4;
179  let NumMicroOps = 4;
180}
181
182def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
183  let Latency = 1;
184}
185
186def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
187  let Latency = 5;
188}
189
190def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
191  let Latency = 8;
192}
193
194def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
195  let Latency = 11;
196}
197
198def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
199  let Latency = 5;
200  let NumMicroOps = 2;
201}
202
203def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
204  let Latency = 5;
205  let NumMicroOps = 3;
206}
207
208def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
209  let Latency = 5;
210  let NumMicroOps = 2;
211}
212
213def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
214  let Latency = 8;
215  let NumMicroOps = 2;
216}
217
218def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
219  let Latency = 11;
220  let NumMicroOps = 2;
221
222}
223
224def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
225  let Latency = 8;
226  let NumMicroOps = 3;
227}
228
229def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
230  let Latency = 11;
231  let NumMicroOps = 3;
232}
233
234def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
235  let Latency = 8;
236  let NumMicroOps = 4;
237}
238
239def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
240  let Latency = 11;
241  let NumMicroOps = 4;
242}
243
244def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
245  let Latency = 8;
246  let NumMicroOps = 2;
247}
248
249def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
250  let Latency = 11;
251  let NumMicroOps = 2;
252}
253
254def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
255  let Latency = 8;
256  let NumMicroOps = 3;
257}
258
259def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
260  let Latency = 11;
261  let NumMicroOps = 3;
262}
263
264def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
265  let Latency = 8;
266  let NumMicroOps = 4;
267}
268
269def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
270  let Latency = 11;
271  let NumMicroOps = 4;
272}
273
274def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
275  let Latency = 8;
276  let NumMicroOps = 5;
277}
278
279def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
280  let Latency = 11;
281  let NumMicroOps = 5;
282}
283
284def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
285  let Latency = 8;
286  let NumMicroOps = 2;
287}
288
289def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
290  let Latency = 8;
291  let NumMicroOps = 3;
292}
293
294def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
295  let Latency = 8;
296  let NumMicroOps = 4;
297}
298
299def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
300  let Latency = 8;
301  let NumMicroOps = 5;
302}
303
304def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
305  let Latency = 8;
306  let NumMicroOps = 6;
307}
308
309def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
310  let Latency = 8;
311  let NumMicroOps = 7;
312}
313
314def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
315  let Latency = 8;
316  let NumMicroOps = 8;
317}
318
319def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
320  let Latency = 8;
321  let NumMicroOps = 9;
322}
323
324def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
325  let Latency = 1;
326}
327
328def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
329  let Latency = 10;
330}
331
332def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
333  let Latency = 14;
334}
335
336def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
337  let Latency = 12;
338}
339
340def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
341  let Latency = 14;
342}
343
344def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
345  let Latency = 14;
346}
347
348def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
349  let Latency = 6;
350}
351
352def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
353  let Latency = 8;
354}
355
356def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
357  let Latency = 10;
358}
359
360def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
361  let Latency = 12;
362  let NumMicroOps = 6;
363}
364
365def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
366  let Latency = 14;
367  let NumMicroOps = 6;
368}
369
370def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
371  let Latency = 9;
372}
373
374def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
375  let Latency = 8;
376}
377
378
379def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
380  let Latency = 8;
381  let NumMicroOps = 3;
382}
383
384def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
385  let Latency = 8;
386  let NumMicroOps = 2;
387}
388
389def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
390  let Latency = 10;
391  let NumMicroOps = 3;
392}
393
394def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
395  let Latency = 10;
396  let NumMicroOps = 2;
397}
398
399
400def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
401  let Latency = 10;
402  let NumMicroOps = 3;
403}
404
405def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
406  let Latency = 15;
407  let NumMicroOps = 2;
408}
409
410def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
411  let Latency = 15;
412  let NumMicroOps = 3;
413}
414
415def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
416  let Latency = 10;
417  let NumMicroOps = 3;
418}
419
420def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
421  let Latency = 10;
422  let NumMicroOps = 2;
423}
424
425def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
426  let Latency = 15;
427  let NumMicroOps = 2;
428}
429
430def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
431  let Latency = 14;
432  let NumMicroOps = 7;
433}
434
435def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
436  let Latency = 5;
437}
438
439def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
440  let Latency = 10;
441}
442
443def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
444  let Latency = 9;
445}
446
447def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
448  let Latency = 12;
449}
450
451def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
452  let Latency = 25;
453}
454
455def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
456  let Latency = 10;
457  let NumMicroOps = 3;
458}
459
460def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
461  let Latency = 10;
462  let NumMicroOps = 5;
463}
464
465def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
466  let Latency = 10;
467  let NumMicroOps = 7;
468}
469
470def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
471  let Latency = 10;
472  let NumMicroOps = 9;
473}
474
475def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
476  let Latency = 0;
477}
478
479def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
480  let Latency = 0;
481}
482
483def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
484  let Latency = 0;
485}
486
487def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
488  let Latency = 0;
489}
490
491def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
492  let Latency = 0;
493}
494
495def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
496  let Latency = 0;
497}
498
499def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
500  let Latency = 0;
501}
502
503def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
504  let Latency = 0;
505}
506
507def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
508  let Latency = 0;
509}
510
511def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
512  let Latency = 0;
513}
514
515def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
516  let Latency = 1;
517}
518
519def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
520  let Latency = 1;
521}
522
523def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
524  let Latency = 1;
525}
526
527def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
528  let Latency = 1;
529}
530
531def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
532  let Latency = 7;
533}
534
535// Define commonly used read types.
536
537// No forwarding is provided for these types.
538def : ReadAdvance<ReadI,       0>;
539def : ReadAdvance<ReadISReg,   0>;
540def : ReadAdvance<ReadIEReg,   0>;
541def : ReadAdvance<ReadIM,      0>;
542def : ReadAdvance<ReadIMA,     0>;
543def : ReadAdvance<ReadID,      0>;
544def : ReadAdvance<ReadExtrHi,  0>;
545def : ReadAdvance<ReadAdrBase, 0>;
546def : ReadAdvance<ReadST,      0>;
547def : ReadAdvance<ReadVLD,     0>;
548
549//===----------------------------------------------------------------------===//
550// 3. Instruction Tables.
551
552//---
553// 3.1 Branch Instructions
554//---
555
556// Branch, immed
557// Branch and link, immed
558// Compare and branch
559def : WriteRes<WriteBr,      [A64FXGI7]> {
560  let Latency = 1;
561}
562
563// Branch, register
564// Branch and link, register != LR
565// Branch and link, register = LR
566def : WriteRes<WriteBrReg,   [A64FXGI7]> {
567  let Latency = 1;
568}
569
570def : WriteRes<WriteSys,     []> { let Latency = 1; }
571def : WriteRes<WriteBarrier, []> { let Latency = 1; }
572def : WriteRes<WriteHint,    []> { let Latency = 1; }
573
574def : WriteRes<WriteAtomic,  []> {
575  let Latency = 4;
576}
577
578//---
579// Branch
580//---
581def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
582def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
583def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
584def : InstRW<[A64FXWrite_1Cyc_GI7],
585            (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
586
587//---
588// 3.2 Arithmetic and Logical Instructions
589// 3.3 Move and Shift Instructions
590//---
591
592// ALU, basic
593// Conditional compare
594// Conditional select
595// Address generation
596def : WriteRes<WriteI,       [A64FXGI2456]> {
597  let Latency = 1;
598}
599
600def : InstRW<[WriteI],
601            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
602                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
603                       "ADC(W|X)r",
604                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
605                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
606                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
607                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
608                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
609                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
610                       "CSINC(W|X)r",           "CSINV(W|X)r",
611                       "CSNEG(W|X)r")>;
612
613def : InstRW<[WriteI], (instrs COPY)>;
614
615// ALU, extend and/or shift
616def : WriteRes<WriteISReg,   [A64FXGI2456]> {
617  let Latency = 2;
618}
619
620def : InstRW<[WriteISReg],
621            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
622                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
623                       "ADC(W|X)r",
624                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
625                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
626                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
627                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
628                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
629                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
630                       "CSINC(W|X)r",           "CSINV(W|X)r",
631                       "CSNEG(W|X)r")>;
632
633def : WriteRes<WriteIEReg,   [A64FXGI2456]> {
634  let Latency = 1;
635}
636
637def : InstRW<[WriteIEReg],
638            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
639                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
640                       "ADC(W|X)r",
641                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
642                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
643                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
644                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
645                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
646                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
647                       "CSINC(W|X)r",           "CSINV(W|X)r",
648                       "CSNEG(W|X)r")>;
649
650// Move immed
651def : WriteRes<WriteImm,     [A64FXGI2456]> {
652  let Latency = 1;
653}
654
655def : InstRW<[A64FXWrite_1Cyc_GI2456],
656            (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
657
658def : InstRW<[A64FXWrite_2Cyc_GI24],
659            (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
660
661// Variable shift
662def : WriteRes<WriteIS,      [A64FXGI2456]> {
663  let Latency = 1;
664}
665
666//---
667// 3.4 Divide and Multiply Instructions
668//---
669
670// Divide, W-form
671def : WriteRes<WriteID32,    [A64FXGI4]> {
672  let Latency = 39;
673  let ReleaseAtCycles = [39];
674}
675
676// Divide, X-form
677def : WriteRes<WriteID64,    [A64FXGI4]> {
678  let Latency = 23;
679  let ReleaseAtCycles = [23];
680}
681
682// Multiply accumulate, W-form
683def : WriteRes<WriteIM32,    [A64FXGI2456]> {
684  let Latency = 5;
685}
686
687// Multiply accumulate, X-form
688def : WriteRes<WriteIM64,    [A64FXGI2456]> {
689  let Latency = 5;
690}
691
692def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
693def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
694def : InstRW<[A64FXWrite_MADDL],
695            (instregex "(S|U)(MADDL|MSUBL)rrr")>;
696
697def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
698def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
699
700// Bitfield extract, two reg
701def : WriteRes<WriteExtr,    [A64FXGI2456]> {
702  let Latency = 1;
703}
704
705// Multiply high
706def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
707
708// Miscellaneous Data-Processing Instructions
709// Bitfield extract
710def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
711
712// Bitifield move - basic
713def : InstRW<[A64FXWrite_1Cyc_GI24],
714            (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
715
716// Bitfield move, insert
717def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
718def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
719
720// Count leading
721def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
722                                               "^CLZ(W|X)r$")>;
723
724// Reverse bits
725def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
726
727// Cryptography Extensions
728def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
729def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
730def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
731def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
732def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
733def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
734def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
735def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
736def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
737
738// CRC Instructions
739def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
740def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
741def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
742
743def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
744def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
745def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
746
747// Reverse bits/bytes
748// NOTE: Handled by WriteI.
749
750//---
751// 3.6 Load Instructions
752// 3.10 FP Load Instructions
753//---
754
755// Load register, literal
756// Load register, unscaled immed
757// Load register, immed unprivileged
758// Load register, unsigned immed
759def : WriteRes<WriteLD,      [A64FXGI56]> {
760  let Latency = 4;
761}
762
763// Load register, immed post-index
764// NOTE: Handled by WriteLD, WriteI.
765// Load register, immed pre-index
766// NOTE: Handled by WriteLD, WriteAdr.
767def : WriteRes<WriteAdr,     [A64FXGI2456]> {
768  let Latency = 1;
769}
770
771// Load pair, immed offset, normal
772// Load pair, immed offset, signed words, base != SP
773// Load pair, immed offset signed words, base = SP
774// LDP only breaks into *one* LS micro-op.  Thus
775// the resources are handled by WriteLD.
776def : WriteRes<WriteLDHi,    []> {
777  let Latency = 5;
778}
779
780// Load register offset, basic
781// Load register, register offset, scale by 4/8
782// Load register, register offset, scale by 2
783// Load register offset, extend
784// Load register, register offset, extend, scale by 4/8
785// Load register, register offset, extend, scale by 2
786def A64FXWriteLDIdx : SchedWriteVariant<[
787  SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
788  SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>;
789def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
790
791def A64FXReadAdrBase : SchedReadVariant<[
792  SchedVar<ScaledIdxPred, [ReadDefault]>,
793  SchedVar<NoSchedPred,   [ReadDefault]>]>;
794def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
795
796// Load pair, immed pre-index, normal
797// Load pair, immed pre-index, signed words
798// Load pair, immed post-index, normal
799// Load pair, immed post-index, signed words
800// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
801
802def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
803def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
804def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
807
808def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
809def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
810def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
814
815def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
816def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
817def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
820
821def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
822def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
823def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
825
826def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
827def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
828def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
830
831def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
832def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
833def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
836
837def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
838            (instrs LDPDpre)>;
839def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
840            (instrs LDPQpre)>;
841def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
842            (instrs LDPSpre)>;
843def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
844            (instrs LDPWpre)>;
845def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
846            (instrs LDPWpre)>;
847
848def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
849def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
850def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
855
856def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
857def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
858def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
860
861def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
862def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
863def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
865
866def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
867def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
868
869def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
870def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
871
872def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
873            (instrs LDPDpost)>;
874def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
875            (instrs LDPQpost)>;
876def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
877            (instrs LDPSpost)>;
878def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
879            (instrs LDPWpost)>;
880def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
881            (instrs LDPXpost)>;
882
883def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
884def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
885def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
890
891def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
892            (instrs LDPDpre)>;
893def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
894            (instrs LDPQpre)>;
895def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
896            (instrs LDPSpre)>;
897def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
898            (instrs LDPWpre)>;
899def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
900            (instrs LDPXpre)>;
901
902def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
903def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
904def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
909
910def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
911            (instrs LDPDpost)>;
912def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
913            (instrs LDPQpost)>;
914def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
915            (instrs LDPSpost)>;
916def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
917            (instrs LDPWpost)>;
918def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
919            (instrs LDPXpost)>;
920
921def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
922def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
923def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
928
929def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
930def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
931def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
939
940def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
941def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
942def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
950
951def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
952            (instrs LDRBroW)>;
953def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
954            (instrs LDRBroW)>;
955def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
956             (instrs LDRDroW)>;
957def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
958            (instrs LDRHroW)>;
959def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
960            (instrs LDRHHroW)>;
961def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
962            (instrs LDRQroW)>;
963def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
964            (instrs LDRSroW)>;
965def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
966            (instrs LDRSHWroW)>;
967def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
968            (instrs LDRSHXroW)>;
969def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
970            (instrs LDRWroW)>;
971def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
972            (instrs LDRXroW)>;
973def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
974            (instrs LDRBroX)>;
975def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
976            (instrs LDRDroX)>;
977def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
978            (instrs LDRHroX)>;
979def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
980            (instrs LDRHHroX)>;
981def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
982            (instrs LDRQroX)>;
983def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
984            (instrs LDRSroX)>;
985def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
986            (instrs LDRSHWroX)>;
987def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
988            (instrs LDRSHXroX)>;
989def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
990            (instrs LDRWroX)>;
991def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
992            (instrs LDRXroX)>;
993
994def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
995def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
996def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
1007
1008//---
1009// Prefetch
1010//---
1011def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
1012def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
1013def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
1014def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
1016
1017//--
1018// 3.7 Store Instructions
1019// 3.11 FP Store Instructions
1020//--
1021
1022// Store register, unscaled immed
1023// Store register, immed unprivileged
1024// Store register, unsigned immed
1025def : WriteRes<WriteST,      [A64FXGI56]> {
1026  let Latency = 1;
1027}
1028
1029// Store register, immed post-index
1030// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
1031
1032// Store register, immed pre-index
1033// NOTE: Handled by WriteAdr, WriteST
1034
1035// Store register, register offset, basic
1036// Store register, register offset, scaled by 4/8
1037// Store register, register offset, scaled by 2
1038// Store register, register offset, extend
1039// Store register, register offset, extend, scale by 4/8
1040// Store register, register offset, extend, scale by 1
1041def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
1042  let Latency = 1;
1043}
1044
1045// Store pair, immed offset, W-form
1046// Store pair, immed offset, X-form
1047def : WriteRes<WriteSTP,     [A64FXGI56]> {
1048  let Latency = 1;
1049}
1050
1051// Store pair, immed post-index, W-form
1052// Store pair, immed post-index, X-form
1053// Store pair, immed pre-index, W-form
1054// Store pair, immed pre-index, X-form
1055// NOTE: Handled by WriteAdr, WriteSTP.
1056
1057def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
1058def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
1059def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
1060def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
1061def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
1062def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
1063def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
1064def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
1065def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
1066
1067def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
1068def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
1069def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
1071
1072def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
1073def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
1074def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
1075def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
1076
1077def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
1078def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
1079def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
1080def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
1081
1082def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1083def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1084def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1085def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1086def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1087def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1088def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1089def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1090def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1091def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1092def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1093def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1094
1095def : InstRW<[A64FXWrite_STP01],
1096            (instrs STPDpre, STPDpost)>;
1097def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1098            (instrs STPDpre, STPDpost)>;
1099def : InstRW<[A64FXWrite_STP01],
1100            (instrs STPDpre, STPDpost)>;
1101def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1102            (instrs STPDpre, STPDpost)>;
1103def : InstRW<[A64FXWrite_STP01],
1104            (instrs STPQpre, STPQpost)>;
1105def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1106            (instrs STPQpre, STPQpost)>;
1107def : InstRW<[A64FXWrite_STP01],
1108            (instrs STPQpre, STPQpost)>;
1109def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1110            (instrs STPQpre, STPQpost)>;
1111def : InstRW<[A64FXWrite_STP01],
1112            (instrs STPSpre, STPSpost)>;
1113def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1114            (instrs STPSpre, STPSpost)>;
1115def : InstRW<[A64FXWrite_STP01],
1116            (instrs STPSpre, STPSpost)>;
1117def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1118            (instrs STPSpre, STPSpost)>;
1119def : InstRW<[A64FXWrite_STP01],
1120            (instrs STPWpre, STPWpost)>;
1121def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1122            (instrs STPWpre, STPWpost)>;
1123def : InstRW<[A64FXWrite_STP01],
1124            (instrs STPWpre, STPWpost)>;
1125def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1126            (instrs STPWpre, STPWpost)>;
1127def : InstRW<[A64FXWrite_STP01],
1128            (instrs STPXpre, STPXpost)>;
1129def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1130            (instrs STPXpre, STPXpost)>;
1131def : InstRW<[A64FXWrite_STP01],
1132            (instrs STPXpre, STPXpost)>;
1133def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1134            (instrs STPXpre, STPXpost)>;
1135
1136def : InstRW<[WriteAdr, A64FXWrite_STP01],
1137            (instrs STRBpre, STRBpost)>;
1138def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1139            (instrs STRBpre, STRBpost)>;
1140def : InstRW<[WriteAdr, A64FXWrite_STP01],
1141            (instrs STRBpre, STRBpost)>;
1142def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1143            (instrs STRBpre, STRBpost)>;
1144def : InstRW<[WriteAdr, A64FXWrite_STP01],
1145            (instrs STRBBpre, STRBBpost)>;
1146def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1147            (instrs STRBBpre, STRBBpost)>;
1148def : InstRW<[WriteAdr, A64FXWrite_STP01],
1149            (instrs STRBBpre, STRBBpost)>;
1150def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1151            (instrs STRBBpre, STRBBpost)>;
1152def : InstRW<[WriteAdr, A64FXWrite_STP01],
1153            (instrs STRDpre, STRDpost)>;
1154def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1155            (instrs STRDpre, STRDpost)>;
1156def : InstRW<[WriteAdr, A64FXWrite_STP01],
1157            (instrs STRDpre, STRDpost)>;
1158def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1159            (instrs STRDpre, STRDpost)>;
1160def : InstRW<[WriteAdr, A64FXWrite_STP01],
1161            (instrs STRHpre, STRHpost)>;
1162def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1163            (instrs STRHpre, STRHpost)>;
1164def : InstRW<[WriteAdr, A64FXWrite_STP01],
1165            (instrs STRHpre, STRHpost)>;
1166def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1167            (instrs STRHpre, STRHpost)>;
1168def : InstRW<[WriteAdr, A64FXWrite_STP01],
1169            (instrs STRHHpre, STRHHpost)>;
1170def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1171            (instrs STRHHpre, STRHHpost)>;
1172def : InstRW<[WriteAdr, A64FXWrite_STP01],
1173            (instrs STRHHpre, STRHHpost)>;
1174def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1175            (instrs STRHHpre, STRHHpost)>;
1176def : InstRW<[WriteAdr, A64FXWrite_STP01],
1177            (instrs STRQpre, STRQpost)>;
1178def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1179            (instrs STRQpre, STRQpost)>;
1180def : InstRW<[WriteAdr, A64FXWrite_STP01],
1181            (instrs STRQpre, STRQpost)>;
1182def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1183            (instrs STRQpre, STRQpost)>;
1184def : InstRW<[WriteAdr, A64FXWrite_STP01],
1185            (instrs STRSpre, STRSpost)>;
1186def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1187            (instrs STRSpre, STRSpost)>;
1188def : InstRW<[WriteAdr, A64FXWrite_STP01],
1189            (instrs STRSpre, STRSpost)>;
1190def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1191            (instrs STRSpre, STRSpost)>;
1192def : InstRW<[WriteAdr, A64FXWrite_STP01],
1193            (instrs STRWpre, STRWpost)>;
1194def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1195            (instrs STRWpre, STRWpost)>;
1196def : InstRW<[WriteAdr, A64FXWrite_STP01],
1197            (instrs STRWpre, STRWpost)>;
1198def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1199            (instrs STRWpre, STRWpost)>;
1200def : InstRW<[WriteAdr, A64FXWrite_STP01],
1201            (instrs STRXpre, STRXpost)>;
1202def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1203            (instrs STRXpre, STRXpost)>;
1204def : InstRW<[WriteAdr, A64FXWrite_STP01],
1205            (instrs STRXpre, STRXpost)>;
1206def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1207            (instrs STRXpre, STRXpost)>;
1208
1209def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1210            (instrs STRBroW, STRBroX)>;
1211def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1212            (instrs STRBroW, STRBroX)>;
1213def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1214            (instrs STRBBroW, STRBBroX)>;
1215def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1216            (instrs STRBBroW, STRBBroX)>;
1217def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1218            (instrs STRDroW, STRDroX)>;
1219def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1220            (instrs STRDroW, STRDroX)>;
1221def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1222            (instrs STRHroW, STRHroX)>;
1223def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1224            (instrs STRHroW, STRHroX)>;
1225def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1226            (instrs STRHHroW, STRHHroX)>;
1227def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1228            (instrs STRHHroW, STRHHroX)>;
1229def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1230            (instrs STRQroW, STRQroX)>;
1231def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1232            (instrs STRQroW, STRQroX)>;
1233def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1234            (instrs STRSroW, STRSroX)>;
1235def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1236            (instrs STRSroW, STRSroX)>;
1237def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1238            (instrs STRWroW, STRWroX)>;
1239def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1240            (instrs STRWroW, STRWroX)>;
1241def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1242            (instrs STRXroW, STRXroX)>;
1243def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1244            (instrs STRXroW, STRXroX)>;
1245
1246//---
1247// 3.8 FP Data Processing Instructions
1248//---
1249
1250// FP absolute value
1251// FP min/max
1252// FP negate
1253def : WriteRes<WriteF,       [A64FXGI03]> {
1254  let Latency = 4;
1255  let ReleaseAtCycles = [2];
1256}
1257
1258// FP arithmetic
1259
1260def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
1261def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
1262
1263// FP compare
1264def : WriteRes<WriteFCmp,    [A64FXGI03]> {
1265  let Latency = 4;
1266  let ReleaseAtCycles = [2];
1267}
1268
1269// FP Div, Sqrt
1270def : WriteRes<WriteFDiv, [A64FXGI0]> {
1271  let Latency = 43;
1272}
1273
1274def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
1275  let Latency = 38;
1276}
1277
1278def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
1279  let Latency = 29;
1280}
1281
1282def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
1283  let Latency = 43;
1284}
1285
1286def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
1287  let Latency = 29;
1288}
1289
1290def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
1291  let Latency = 43;
1292}
1293
1294// FP divide, S-form
1295// FP square root, S-form
1296def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
1297def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
1298def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
1299def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
1300def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
1301def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
1302
1303// FP divide, D-form
1304// FP square root, D-form
1305def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
1306def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
1307def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
1308def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
1309def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
1310def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
1311
1312// FP round to integral
1313def : InstRW<[A64FXWrite_9Cyc_GI03],
1314            (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
1315
1316// FP select
1317def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
1318
1319//---
1320// 3.9 FP Miscellaneous Instructions
1321//---
1322
1323// FP convert, from vec to vec reg
1324// FP convert, from gen to vec reg
1325// FP convert, from vec to gen reg
1326def : WriteRes<WriteFCvt, [A64FXGI03]> {
1327  let Latency = 9;
1328  let ReleaseAtCycles = [2];
1329}
1330
1331// FP move, immed
1332// FP move, register
1333def : WriteRes<WriteFImm, [A64FXGI0]> {
1334  let Latency = 4;
1335  let ReleaseAtCycles = [2];
1336}
1337
1338// FP transfer, from gen to vec reg
1339// FP transfer, from vec to gen reg
1340def : WriteRes<WriteFCopy, [A64FXGI0]> {
1341  let Latency = 4;
1342  let ReleaseAtCycles = [2];
1343}
1344
1345def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
1346def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
1347
1348//---
1349// 3.12 ASIMD Integer Instructions
1350//---
1351
1352// ASIMD absolute diff, D-form
1353// ASIMD absolute diff, Q-form
1354// ASIMD absolute diff accum, D-form
1355// ASIMD absolute diff accum, Q-form
1356// ASIMD absolute diff accum long
1357// ASIMD absolute diff long
1358// ASIMD arith, basic
1359// ASIMD arith, complex
1360// ASIMD compare
1361// ASIMD logical (AND, BIC, EOR)
1362// ASIMD max/min, basic
1363// ASIMD max/min, reduce, 4H/4S
1364// ASIMD max/min, reduce, 8B/8H
1365// ASIMD max/min, reduce, 16B
1366// ASIMD multiply, D-form
1367// ASIMD multiply, Q-form
1368// ASIMD multiply accumulate long
1369// ASIMD multiply accumulate saturating long
1370// ASIMD multiply long
1371// ASIMD pairwise add and accumulate
1372// ASIMD shift accumulate
1373// ASIMD shift by immed, basic
1374// ASIMD shift by immed and insert, basic, D-form
1375// ASIMD shift by immed and insert, basic, Q-form
1376// ASIMD shift by immed, complex
1377// ASIMD shift by register, basic, D-form
1378// ASIMD shift by register, basic, Q-form
1379// ASIMD shift by register, complex, D-form
1380// ASIMD shift by register, complex, Q-form
1381def : WriteRes<WriteVd, [A64FXGI03]> {
1382  let Latency = 4;
1383}
1384def : WriteRes<WriteVq, [A64FXGI03]> {
1385  let Latency = 4;
1386}
1387
1388// ASIMD arith, reduce, 4H/4S
1389// ASIMD arith, reduce, 8B/8H
1390// ASIMD arith, reduce, 16B
1391
1392// ASIMD logical (MVN (alias for NOT), ORN, ORR)
1393def : InstRW<[A64FXWrite_4Cyc_GI03],
1394            (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1395
1396// ASIMD arith, reduce
1397def : InstRW<[A64FXWrite_ADDLV],
1398            (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
1399
1400// ASIMD polynomial (8x8) multiply long
1401def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
1402def : InstRW<[A64FXWrite_MULLV],
1403            (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
1404def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
1405def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
1406
1407// ASIMD absolute diff accum, D-form
1408def : InstRW<[A64FXWrite_ABA],
1409            (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
1410// ASIMD absolute diff accum, Q-form
1411def : InstRW<[A64FXWrite_ABA],
1412            (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
1413// ASIMD absolute diff accum long
1414def : InstRW<[A64FXWrite_ABAL],
1415            (instregex "^[SU]ABAL")>;
1416// ASIMD arith, reduce, 4H/4S
1417def : InstRW<[A64FXWrite_ADDLV1],
1418            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
1419// ASIMD arith, reduce, 8B
1420def : InstRW<[A64FXWrite_ADDLV1],
1421            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
1422// ASIMD arith, reduce, 16B/16H
1423def : InstRW<[A64FXWrite_ADDLV1],
1424            (instregex "^[SU]?ADDL?Vv16i8v$")>;
1425// ASIMD max/min, reduce, 4H/4S
1426def : InstRW<[A64FXWrite_MINMAXV],
1427            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
1428// ASIMD max/min, reduce, 8B/8H
1429def : InstRW<[A64FXWrite_MINMAXV],
1430            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
1431// ASIMD max/min, reduce, 16B/16H
1432def : InstRW<[A64FXWrite_MINMAXV],
1433            (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
1434// ASIMD multiply, D-form
1435def : InstRW<[A64FXWrite_PMUL],
1436            (instregex "^(P?MUL|SQR?DMUL)" #
1437                       "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
1438                       "(_indexed)?$")>;
1439
1440// ASIMD multiply, Q-form
1441def : InstRW<[A64FXWrite_PMUL],
1442            (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1443
1444// ASIMD multiply, Q-form
1445def : InstRW<[A64FXWrite_SQRDMULH],
1446            (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1447
1448// ASIMD multiply accumulate, D-form
1449def : InstRW<[A64FXWrite_9Cyc_GI03],
1450            (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
1451// ASIMD multiply accumulate, Q-form
1452def : InstRW<[A64FXWrite_9Cyc_GI03],
1453            (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
1454// ASIMD shift accumulate
1455def : InstRW<[A64FXWrite_SRSRAV],
1456            (instregex "SRSRAv", "URSRAv")>;
1457def : InstRW<[A64FXWrite_SSRAV],
1458            (instregex "SSRAv", "USRAv")>;
1459
1460// ASIMD shift by immed, basic
1461def : InstRW<[A64FXWrite_RSHRN],
1462            (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
1463def : InstRW<[A64FXWrite_SHRN],
1464            (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
1465
1466def : InstRW<[A64FXWrite_6Cyc_GI3],
1467            (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
1468
1469// ASIMD shift by immed, complex
1470def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
1471def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
1472// ASIMD shift by register, basic, Q-form
1473def : InstRW<[A64FXWrite_6Cyc_GI3],
1474            (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1475// ASIMD shift by register, complex, D-form
1476def : InstRW<[A64FXWrite_6Cyc_GI3],
1477            (instregex "^[SU][QR]{1,2}SHL" #
1478                       "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
1479// ASIMD shift by register, complex, Q-form
1480def : InstRW<[A64FXWrite_6Cyc_GI3],
1481            (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
1482
1483// ASIMD Arithmetic
1484def : InstRW<[A64FXWrite_4Cyc_GI03],
1485            (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
1486def : InstRW<[A64FXWrite_4Cyc_GI03],
1487            (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
1488def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
1489def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
1490def : InstRW<[A64FXWrite_4Cyc_GI03],
1491            (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
1492                       "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
1493def : InstRW<[A64FXWrite_ADDP],
1494            (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
1495def : InstRW<[A64FXWrite_4Cyc_GI03],
1496            (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
1497                       "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
1498def : InstRW<[A64FXWrite_4Cyc_GI0],
1499            (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
1500def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
1501def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
1502def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
1503def : InstRW<[A64FXWrite_MINMAXV],
1504             (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
1505def : InstRW<[A64FXWrite_ABA],
1506             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
1507def : InstRW<[A64FXWrite_4Cyc_GI03],
1508            (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
1509def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
1510def : InstRW<[A64FXWrite_SHRN],
1511            (instregex "^ADDHNv", "^SUBHNv")>;
1512def : InstRW<[A64FXWrite_RSHRN],
1513            (instregex "^RADDHNv", "^RSUBHNv")>;
1514def : InstRW<[A64FXWrite_4Cyc_GI03],
1515            (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
1516                       "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
1517                      "^URHADD", "^USQADD")>;
1518
1519def : InstRW<[A64FXWrite_4Cyc_GI03],
1520            (instregex "^CMEQv", "^CMGEv", "^CMGTv",
1521                       "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
1522def : InstRW<[A64FXWrite_MINMAXV],
1523            (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1524def : InstRW<[A64FXWrite_ADDP],
1525            (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1526def : InstRW<[A64FXWrite_4Cyc_GI03],
1527            (instregex "^SABDv", "^UABDv")>;
1528def : InstRW<[A64FXWrite_TBX1],
1529            (instregex "^SABDLv", "^UABDLv")>;
1530
1531//---
1532// 3.13 ASIMD Floating-point Instructions
1533//---
1534
1535def : WriteRes<WriteFMul, [A64FXGI03]> {
1536  let Latency = 9;
1537}
1538
1539// ASIMD FP absolute value
1540def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
1541
1542// ASIMD FP arith, normal, D-form
1543// ASIMD FP arith, normal, Q-form
1544def : InstRW<[A64FXWrite_9Cyc_GI03],
1545            (instregex "^FABDv", "^FADDv", "^FSUBv")>;
1546
1547// ASIMD FP arith, pairwise, D-form
1548// ASIMD FP arith, pairwise, Q-form
1549def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
1550
1551// ASIMD FP compare, D-form
1552// ASIMD FP compare, Q-form
1553def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
1554def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
1555                                                 "^FCMGTv", "^FCMLEv",
1556                                                 "^FCMLTv")>;
1557// ASIMD FP round, D-form
1558def : InstRW<[A64FXWrite_9Cyc_GI03],
1559            (instregex "^FRINT[AIMNPXZ](v2f32)")>;
1560// ASIMD FP round, Q-form
1561def : InstRW<[A64FXWrite_9Cyc_GI03],
1562            (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
1563
1564// ASIMD FP convert, long
1565// ASIMD FP convert, narrow
1566// ASIMD FP convert, other, D-form
1567// ASIMD FP convert, other, Q-form
1568
1569// ASIMD FP convert, long and narrow
1570def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
1571// ASIMD FP convert, other, D-form
1572def : InstRW<[A64FXWrite_FCVTXNV],
1573      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
1574// ASIMD FP convert, other, Q-form
1575def : InstRW<[A64FXWrite_FCVTXNV],
1576      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
1577
1578// ASIMD FP divide, D-form, F32
1579def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
1580def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
1581
1582// ASIMD FP divide, Q-form, F32
1583def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
1584def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
1585
1586// ASIMD FP divide, Q-form, F64
1587def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
1588def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
1589
1590// ASIMD FP max/min, normal, D-form
1591// ASIMD FP max/min, normal, Q-form
1592def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
1593                                               "^FMINv", "^FMINNMv")>;
1594
1595// ASIMD FP max/min, pairwise, D-form
1596// ASIMD FP max/min, pairwise, Q-form
1597def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
1598                                           "^FMINPv", "^FMINNMPv")>;
1599
1600// ASIMD FP max/min, reduce
1601def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
1602                                              "^FMINVv", "^FMINNMVv")>;
1603
1604// ASIMD FP multiply, D-form, FZ
1605// ASIMD FP multiply, D-form, no FZ
1606// ASIMD FP multiply, Q-form, FZ
1607// ASIMD FP multiply, Q-form, no FZ
1608def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
1609def : InstRW<[A64FXWrite_FMULXE],
1610            (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
1611def : InstRW<[A64FXWrite_FMULXE],
1612            (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
1613
1614// ASIMD FP multiply accumulate, Dform, FZ
1615// ASIMD FP multiply accumulate, Dform, no FZ
1616// ASIMD FP multiply accumulate, Qform, FZ
1617// ASIMD FP multiply accumulate, Qform, no FZ
1618def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
1619def : InstRW<[A64FXWrite_FMULXE],
1620            (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
1621def : InstRW<[A64FXWrite_FMULXE],
1622            (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
1623
1624// ASIMD FP negate
1625def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
1626
1627//--
1628// 3.14 ASIMD Miscellaneous Instructions
1629//--
1630
1631// ASIMD bit reverse
1632def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
1633
1634// ASIMD bitwise insert, D-form
1635// ASIMD bitwise insert, Q-form
1636def : InstRW<[A64FXWrite_BIF],
1637            (instregex "^BIFv", "^BITv", "^BSLv")>;
1638
1639// ASIMD count, D-form
1640// ASIMD count, Q-form
1641def : InstRW<[A64FXWrite_4Cyc_GI0],
1642            (instregex "^CLSv", "^CLZv", "^CNTv")>;
1643
1644// ASIMD duplicate, gen reg
1645// ASIMD duplicate, element
1646def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
1647def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
1648def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
1649
1650// ASIMD extract
1651def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
1652
1653// ASIMD extract narrow
1654def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
1655
1656// ASIMD extract narrow, saturating
1657def : InstRW<[A64FXWrite_6Cyc_GI3],
1658            (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
1659
1660// ASIMD insert, element to element
1661def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1662
1663// ASIMD transfer, element to gen reg
1664def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1665
1666// ASIMD move, integer immed
1667def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
1668
1669// ASIMD move, FP immed
1670def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
1671
1672// ASIMD table lookup, D-form
1673def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
1674def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
1675def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
1676def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
1677def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
1678def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
1679def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
1680def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
1681
1682// ASIMD table lookup, Q-form
1683def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
1684def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
1685def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
1686def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
1687def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
1688def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
1689def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
1690def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
1691
1692// ASIMD unzip/zip
1693def : InstRW<[A64FXWrite_6Cyc_GI0],
1694            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
1695
1696// ASIMD reciprocal estimate, D-form
1697// ASIMD reciprocal estimate, Q-form
1698def : InstRW<[A64FXWrite_4Cyc_GI03],
1699            (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
1700                       "^FRSQRTEv", "^URSQRTEv")>;
1701
1702// ASIMD reciprocal step, D-form, FZ
1703// ASIMD reciprocal step, D-form, no FZ
1704// ASIMD reciprocal step, Q-form, FZ
1705// ASIMD reciprocal step, Q-form, no FZ
1706def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
1707
1708// ASIMD reverse
1709def : InstRW<[A64FXWrite_4Cyc_GI03],
1710            (instregex "^REV16v", "^REV32v", "^REV64v")>;
1711
1712// ASIMD table lookup, D-form
1713// ASIMD table lookup, Q-form
1714def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
1715
1716// ASIMD transfer, element to word or word
1717def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1718
1719// ASIMD transfer, element to gen reg
1720def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
1721
1722// ASIMD transfer gen reg to element
1723def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1724
1725// ASIMD transpose
1726def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
1727                                                 "^UZP1v", "^UZP2v")>;
1728
1729// ASIMD unzip/zip
1730def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
1731
1732//--
1733// 3.15 ASIMD Load Instructions
1734//--
1735
1736// ASIMD load, 1 element, multiple, 1 reg, D-form
1737// ASIMD load, 1 element, multiple, 1 reg, Q-form
1738def : InstRW<[A64FXWrite_8Cyc_GI56],
1739            (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
1740def : InstRW<[A64FXWrite_11Cyc_GI56],
1741            (instregex "^LD1Onev(16b|8h|4s)$")>;
1742def : InstRW<[A64FXWrite_LD108, WriteAdr],
1743            (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
1744def : InstRW<[A64FXWrite_LD109, WriteAdr],
1745            (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
1746
1747// ASIMD load, 1 element, multiple, 2 reg, D-form
1748// ASIMD load, 1 element, multiple, 2 reg, Q-form
1749def : InstRW<[A64FXWrite_LD102],
1750            (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
1751def : InstRW<[A64FXWrite_LD103],
1752            (instregex "^LD1Twov(16b|8h|4s)$")>;
1753def : InstRW<[A64FXWrite_LD110, WriteAdr],
1754            (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
1755def : InstRW<[A64FXWrite_LD111, WriteAdr],
1756            (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
1757
1758// ASIMD load, 1 element, multiple, 3 reg, D-form
1759// ASIMD load, 1 element, multiple, 3 reg, Q-form
1760def : InstRW<[A64FXWrite_LD104],
1761            (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
1762def : InstRW<[A64FXWrite_LD105],
1763            (instregex "^LD1Threev(16b|8h|4s)$")>;
1764def : InstRW<[A64FXWrite_LD112, WriteAdr],
1765            (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
1766def : InstRW<[A64FXWrite_LD113, WriteAdr],
1767            (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
1768
1769// ASIMD load, 1 element, multiple, 4 reg, D-form
1770// ASIMD load, 1 element, multiple, 4 reg, Q-form
1771def : InstRW<[A64FXWrite_LD106],
1772            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
1773def : InstRW<[A64FXWrite_LD107],
1774            (instregex "^LD1Fourv(16b|8h|4s)$")>;
1775def : InstRW<[A64FXWrite_LD114, WriteAdr],
1776            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
1777def : InstRW<[A64FXWrite_LD115, WriteAdr],
1778            (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
1779
1780// ASIMD load, 1 element, one lane, B/H/S
1781// ASIMD load, 1 element, one lane, D
1782def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
1783def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
1784            (instregex "^LD1i(8|16|32|64)_POST$")>;
1785
1786// ASIMD load, 1 element, all lanes, D-form, B/H/S
1787// ASIMD load, 1 element, all lanes, D-form, D
1788// ASIMD load, 1 element, all lanes, Q-form
1789def : InstRW<[A64FXWrite_8Cyc_GI03],
1790            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1791def : InstRW<[A64FXWrite_LD108, WriteAdr],
1792            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1793
1794// ASIMD load, 2 element, multiple, D-form, B/H/S
1795// ASIMD load, 2 element, multiple, Q-form, D
1796def : InstRW<[A64FXWrite_LD103],
1797            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1798def : InstRW<[A64FXWrite_LD111, WriteAdr],
1799            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1800
1801// ASIMD load, 2 element, one lane, B/H
1802// ASIMD load, 2 element, one lane, S
1803// ASIMD load, 2 element, one lane, D
1804def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
1805def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
1806            (instregex "^LD2i(8|16|32|64)_POST$")>;
1807
1808// ASIMD load, 2 element, all lanes, D-form, B/H/S
1809// ASIMD load, 2 element, all lanes, D-form, D
1810// ASIMD load, 2 element, all lanes, Q-form
1811def : InstRW<[A64FXWrite_LD102],
1812            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1813def : InstRW<[A64FXWrite_LD110, WriteAdr],
1814            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1815
1816// ASIMD load, 3 element, multiple, D-form, B/H/S
1817// ASIMD load, 3 element, multiple, Q-form, B/H/S
1818// ASIMD load, 3 element, multiple, Q-form, D
1819def : InstRW<[A64FXWrite_LD105],
1820            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1821def : InstRW<[A64FXWrite_LD113, WriteAdr],
1822            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1823
1824// ASIMD load, 3 element, one lone, B/H
1825// ASIMD load, 3 element, one lane, S
1826// ASIMD load, 3 element, one lane, D
1827def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
1828def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
1829            (instregex "^LD3i(8|16|32|64)_POST$")>;
1830
1831// ASIMD load, 3 element, all lanes, D-form, B/H/S
1832// ASIMD load, 3 element, all lanes, D-form, D
1833// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1834// ASIMD load, 3 element, all lanes, Q-form, D
1835def : InstRW<[A64FXWrite_LD104],
1836            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1837def : InstRW<[A64FXWrite_LD112, WriteAdr],
1838            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1839
1840// ASIMD load, 4 element, multiple, D-form, B/H/S
1841// ASIMD load, 4 element, multiple, Q-form, B/H/S
1842// ASIMD load, 4 element, multiple, Q-form, D
1843def : InstRW<[A64FXWrite_LD107],
1844            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1845def : InstRW<[A64FXWrite_LD115, WriteAdr],
1846            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1847
1848// ASIMD load, 4 element, one lane, B/H
1849// ASIMD load, 4 element, one lane, S
1850// ASIMD load, 4 element, one lane, D
1851def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
1852def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
1853            (instregex "^LD4i(8|16|32|64)_POST$")>;
1854
1855// ASIMD load, 4 element, all lanes, D-form, B/H/S
1856// ASIMD load, 4 element, all lanes, D-form, D
1857// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1858// ASIMD load, 4 element, all lanes, Q-form, D
1859def : InstRW<[A64FXWrite_LD106],
1860            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1861def : InstRW<[A64FXWrite_LD114, WriteAdr],
1862            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1863
1864//--
1865// 3.16 ASIMD Store Instructions
1866//--
1867
1868// ASIMD store, 1 element, multiple, 1 reg, D-form
1869// ASIMD store, 1 element, multiple, 1 reg, Q-form
1870def : InstRW<[A64FXWrite_ST10],
1871            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1872def : InstRW<[A64FXWrite_ST14, WriteAdr],
1873            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1874
1875// ASIMD store, 1 element, multiple, 2 reg, D-form
1876// ASIMD store, 1 element, multiple, 2 reg, Q-form
1877def : InstRW<[A64FXWrite_ST11],
1878            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1879def : InstRW<[A64FXWrite_ST15, WriteAdr],
1880            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1881
1882// ASIMD store, 1 element, multiple, 3 reg, D-form
1883// ASIMD store, 1 element, multiple, 3 reg, Q-form
1884def : InstRW<[A64FXWrite_ST12],
1885            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1886def : InstRW<[A64FXWrite_ST16, WriteAdr],
1887            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1888
1889// ASIMD store, 1 element, multiple, 4 reg, D-form
1890// ASIMD store, 1 element, multiple, 4 reg, Q-form
1891def : InstRW<[A64FXWrite_ST13],
1892            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1893def : InstRW<[A64FXWrite_ST17, WriteAdr],
1894            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1895
1896// ASIMD store, 1 element, one lane, B/H/S
1897// ASIMD store, 1 element, one lane, D
1898def : InstRW<[A64FXWrite_ST10],
1899            (instregex "^ST1i(8|16|32|64)$")>;
1900def : InstRW<[A64FXWrite_ST14, WriteAdr],
1901            (instregex "^ST1i(8|16|32|64)_POST$")>;
1902
1903// ASIMD store, 2 element, multiple, D-form, B/H/S
1904// ASIMD store, 2 element, multiple, Q-form, B/H/S
1905// ASIMD store, 2 element, multiple, Q-form, D
1906def : InstRW<[A64FXWrite_ST11],
1907            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1908def : InstRW<[A64FXWrite_ST15, WriteAdr],
1909            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1910
1911// ASIMD store, 2 element, one lane, B/H/S
1912// ASIMD store, 2 element, one lane, D
1913def : InstRW<[A64FXWrite_ST11],
1914            (instregex "^ST2i(8|16|32|64)$")>;
1915def : InstRW<[A64FXWrite_ST15, WriteAdr],
1916            (instregex "^ST2i(8|16|32|64)_POST$")>;
1917
1918// ASIMD store, 3 element, multiple, D-form, B/H/S
1919// ASIMD store, 3 element, multiple, Q-form, B/H/S
1920// ASIMD store, 3 element, multiple, Q-form, D
1921def : InstRW<[A64FXWrite_ST12],
1922            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1923def : InstRW<[A64FXWrite_ST16, WriteAdr],
1924            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1925
1926// ASIMD store, 3 element, one lane, B/H
1927// ASIMD store, 3 element, one lane, S
1928// ASIMD store, 3 element, one lane, D
1929def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
1930def : InstRW<[A64FXWrite_ST16, WriteAdr],
1931            (instregex "^ST3i(8|16|32|64)_POST$")>;
1932
1933// ASIMD store, 4 element, multiple, D-form, B/H/S
1934// ASIMD store, 4 element, multiple, Q-form, B/H/S
1935// ASIMD store, 4 element, multiple, Q-form, D
1936def : InstRW<[A64FXWrite_ST13],
1937            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1938def : InstRW<[A64FXWrite_ST17, WriteAdr],
1939            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1940
1941// ASIMD store, 4 element, one lane, B/H
1942// ASIMD store, 4 element, one lane, S
1943// ASIMD store, 4 element, one lane, D
1944def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
1945def : InstRW<[A64FXWrite_ST17, WriteAdr],
1946            (instregex "^ST4i(8|16|32|64)_POST$")>;
1947
1948// V8.1a Atomics (LSE)
1949def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1950            (instrs CASB, CASH, CASW, CASX)>;
1951
1952def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1953            (instrs CASAB, CASAH, CASAW, CASAX)>;
1954
1955def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1956            (instrs CASLB, CASLH, CASLW, CASLX)>;
1957
1958def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1959            (instrs CASALB, CASALH, CASALW, CASALX)>;
1960
1961def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1962            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
1963
1964def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1965            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
1966
1967def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1968            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
1969
1970def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1971            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
1972
1973def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1974            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
1975
1976def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1977            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
1978
1979def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1980            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
1981
1982def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1983            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
1984
1985def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1986            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
1987
1988def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1989            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
1990
1991def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1992            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
1993
1994def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1995            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
1996
1997def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1998            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
1999
2000def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2001            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
2002
2003def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2004            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
2005
2006def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2007            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
2008
2009def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2010            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
2011
2012def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2013            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
2014             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
2015             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
2016             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
2017
2018def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2019            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
2020             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
2021             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
2022             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
2023
2024def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2025            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
2026             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
2027             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
2028             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
2029
2030def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2031            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
2032             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
2033             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
2034             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
2035
2036def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2037            (instrs SWPB, SWPH, SWPW, SWPX)>;
2038
2039def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2040            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
2041
2042def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2043            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
2044
2045def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2046            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
2047
2048def : InstRW<[A64FXWrite_STUR, WriteAtomic],
2049            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
2050
2051// SVE instructions
2052
2053// The modeling method for SVE instructions is more accurate than others.
2054// TODO: modify the model of other instructions similarly.
2055
2056def : InstRW<[A64FXWrite_4Cyc_GI0],
2057            (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z",
2058                       "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P",
2059                       "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI",
2060                       "^SUBR?_ZI")>;
2061
2062def : InstRW<[A64FXWrite_6Cyc_GI0],
2063            (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR",
2064                       "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z",
2065                       "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>;
2066
2067def : InstRW<[A64FXWrite_9Cyc_GI0],
2068            (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z",
2069                       "^INDEX_II_[SD]", "^MUL_ZI")>;
2070
2071def : InstRW<[A64FXWrite_4Cyc_GI3],
2072            (instregex "^CNT_Z")>;
2073
2074def : InstRW<[A64FXWrite_4Cyc_GI03],
2075            (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z",
2076                       "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z",
2077                       "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z",
2078                       "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_",
2079                       "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z",
2080                       "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z",
2081                       "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z",
2082                       "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>;
2083
2084def : InstRW<[A64FXWrite_9Cyc_GI03      ],
2085            (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP",
2086                       "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z",
2087                       "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z",
2088                       "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP",
2089                       "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>;
2090
2091def : InstRW<[A64FXWrite_3Cyc_GI1],
2092            (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P",
2093                       "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT",
2094                       "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)",
2095                       "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>;
2096
2097def : InstRW<[A64FXWrite_1Cyc_GI24],
2098            (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X",
2099                       "^RDVLI")>;
2100
2101def : InstRW<[A64FXWrite_11Cyc_GI5],
2102            (instregex "^LDR_[PZ]XI")>;
2103
2104def : InstRW<[A64FXWrite_11Cyc_GI56],
2105            (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>;
2106
2107def A64FXWrite_None : SchedWriteRes<[]> {
2108}
2109def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
2110
2111def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
2112  let Latency = 15;
2113  let NumMicroOps = 2;
2114  let ReleaseAtCycles = [2];
2115}
2116def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
2117
2118def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
2119  let Latency = 5;
2120  let NumMicroOps = 2;
2121  let ReleaseAtCycles = [2];
2122}
2123def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
2124
2125def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> {
2126  let Latency = 8;
2127  let NumMicroOps = 2;
2128}
2129def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
2130
2131def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
2132  let Latency = 46;
2133  let NumMicroOps = 10;
2134  let ReleaseAtCycles = [10];
2135}
2136def : InstRW<[A64FXWrite_Reduction4CycB],
2137      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
2138
2139def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
2140  let Latency = 42;
2141  let NumMicroOps = 9;
2142  let ReleaseAtCycles = [9];
2143}
2144def : InstRW<[A64FXWrite_Reduction4CycH],
2145      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
2146
2147def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
2148  let Latency = 38;
2149  let NumMicroOps = 8;
2150  let ReleaseAtCycles = [8];
2151}
2152def : InstRW<[A64FXWrite_Reduction4CycS],
2153      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
2154
2155def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
2156  let Latency = 34;
2157  let NumMicroOps = 7;
2158  let ReleaseAtCycles = [7];
2159}
2160def : InstRW<[A64FXWrite_Reduction4CycD],
2161      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
2162
2163def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2164  let Latency = 29;
2165}
2166def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>;
2167
2168def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2169  let Latency = 4;
2170}
2171def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>;
2172
2173def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> {
2174  let Latency = 6;
2175}
2176def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>;
2177
2178def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2179  let Latency = 8;
2180}
2181def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
2182
2183def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
2184  let Latency = 2;
2185  let ReleaseAtCycles = [2];
2186}
2187def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
2188
2189def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> {
2190  let Latency = 7;
2191  let NumMicroOps = 2;
2192}
2193def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>;
2194
2195def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2196  let Latency = 12;
2197}
2198def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
2199
2200def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
2201  let Latency = 75;
2202  let NumMicroOps = 11;
2203  let ReleaseAtCycles = [11];
2204}
2205def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
2206
2207def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
2208  let Latency = 60;
2209  let NumMicroOps = 9;
2210  let ReleaseAtCycles = [9];
2211}
2212def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
2213
2214def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
2215  let Latency = 45;
2216  let NumMicroOps = 7;
2217  let ReleaseAtCycles = [7];
2218}
2219def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
2220
2221def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
2222  let Latency = 468;
2223  let NumMicroOps = 63;
2224  let ReleaseAtCycles = [63];
2225}
2226def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
2227
2228def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
2229  let Latency = 228;
2230  let NumMicroOps = 31;
2231  let ReleaseAtCycles = [31];
2232}
2233def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
2234
2235def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
2236  let Latency = 108;
2237  let NumMicroOps = 15;
2238  let ReleaseAtCycles = [15];
2239}
2240def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
2241
2242def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2243  let Latency = 15;
2244  let NumMicroOps = 2;
2245}
2246def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
2247
2248def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
2249  let Latency = 15;
2250  let NumMicroOps = 3;
2251  let ReleaseAtCycles = [3];
2252}
2253def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
2254
2255def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
2256  let Latency = 134;
2257  let ReleaseAtCycles = [134];
2258}
2259def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
2260
2261def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
2262  let Latency = 98;
2263  let ReleaseAtCycles = [98];
2264}
2265def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
2266
2267def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
2268  let Latency = 154;
2269  let ReleaseAtCycles = [154];
2270}
2271def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
2272
2273def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
2274  let Latency = 54;
2275  let NumMicroOps = 11;
2276  let ReleaseAtCycles = [11];
2277}
2278def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
2279
2280def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
2281  let Latency = 44;
2282  let NumMicroOps = 9;
2283  let ReleaseAtCycles = [9];
2284}
2285def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
2286
2287def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
2288  let Latency = 34;
2289  let NumMicroOps = 7;
2290  let ReleaseAtCycles = [7];
2291}
2292def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
2293
2294def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2295  let Latency = 17;
2296  let NumMicroOps = 2;
2297  let ReleaseAtCycles = [2, 2];
2298}
2299def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
2300
2301def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2302  let Latency = 13;
2303  let NumMicroOps = 1;
2304}
2305def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
2306
2307def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
2308  let Latency = 13;
2309  let NumMicroOps = 2;
2310  let ReleaseAtCycles = [2];
2311}
2312def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
2313
2314def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
2315  let Latency = 17;
2316  let NumMicroOps = 3;
2317  let ReleaseAtCycles = [2, 2, 1];
2318}
2319def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
2320
2321def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2322  let Latency = 17;
2323  let NumMicroOps = 2;
2324  let ReleaseAtCycles = [2, 1];
2325}
2326def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
2327
2328def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2329  let Latency = 10;
2330}
2331def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>;
2332
2333def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2334  let Latency = 25;
2335}
2336def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
2337
2338def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2339  let Latency = 19;
2340  let ReleaseAtCycles = [2, 4, 4];
2341}
2342def : InstRW<[A64FXWrite_GLD_S_ZI],
2343      (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
2344
2345def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2346  let Latency = 16;
2347  let ReleaseAtCycles = [1, 2, 2];
2348}
2349def : InstRW<[A64FXWrite_GLD_D_ZI],
2350      (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
2351
2352def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2353  let Latency = 23;
2354  let ReleaseAtCycles = [2, 1, 4, 4];
2355}
2356def : InstRW<[A64FXWrite_GLD_S_RZ],
2357      (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
2358
2359def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2360  let Latency = 20;
2361  let ReleaseAtCycles = [1, 1, 2, 2];
2362}
2363def : InstRW<[A64FXWrite_GLD_D_RZ],
2364      (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
2365                 "^GLD(FF)?1S?[BHW]_D$")>;
2366
2367def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
2368  let Latency = 15;
2369  let NumMicroOps = 3;
2370  let ReleaseAtCycles = [9];
2371}
2372def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
2373
2374def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2375  let Latency = 11;
2376  let NumMicroOps = 2;
2377  let ReleaseAtCycles = [2];
2378}
2379def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
2380
2381def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
2382  let Latency = 12;
2383  let NumMicroOps = 3;
2384  let ReleaseAtCycles = [3];
2385}
2386def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
2387
2388def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
2389  let Latency = 15;
2390  let NumMicroOps = 4;
2391  let ReleaseAtCycles = [13];
2392}
2393def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
2394
2395def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2396  let Latency = 11;
2397  let NumMicroOps = 3;
2398  let ReleaseAtCycles = [3];
2399}
2400def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
2401
2402def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
2403  let Latency = 12;
2404  let NumMicroOps = 4;
2405  let ReleaseAtCycles = [4];
2406}
2407def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
2408
2409def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
2410  let Latency = 15;
2411  let NumMicroOps = 5;
2412  let ReleaseAtCycles = [17];
2413}
2414def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
2415
2416def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2417  let Latency = 11;
2418  let NumMicroOps = 4;
2419  let ReleaseAtCycles = [4];
2420}
2421def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
2422
2423def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
2424  let Latency = 12;
2425  let NumMicroOps = 5;
2426  let ReleaseAtCycles = [5];
2427}
2428def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
2429
2430def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
2431}
2432def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
2433
2434def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2435  let ReleaseAtCycles = [2, 1, 4];
2436}
2437def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
2438
2439def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2440  let ReleaseAtCycles = [2, 4];
2441}
2442def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
2443
2444def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2445  let ReleaseAtCycles = [1, 1, 2];
2446}
2447def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
2448
2449def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2450  let ReleaseAtCycles = [1, 2];
2451}
2452def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
2453
2454def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
2455  let Latency = 114;
2456  let ReleaseAtCycles = [114];
2457}
2458def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
2459
2460def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
2461  let Latency = 178;
2462  let ReleaseAtCycles = [178];
2463}
2464def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
2465
2466def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2467  let Latency = 15;
2468  let NumMicroOps = 2;
2469}
2470def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
2471
2472def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
2473  let Latency = 2;
2474  let ReleaseAtCycles = [2];
2475}
2476def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
2477
2478def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2479  let Latency = 6;
2480  let NumMicroOps = 2;
2481  let ReleaseAtCycles = [3, 1];
2482}
2483def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
2484
2485def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2486  let Latency = 12;
2487}
2488def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>;
2489
2490def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2491  let Latency = 11;
2492}
2493def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
2494
2495def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2496  let Latency = 20;
2497  let NumMicroOps = 8;
2498  let ReleaseAtCycles = [8, 8, 8, 8];
2499}
2500def : InstRW<[A64FXWrite_SST1_W_RZ],
2501      (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
2502
2503def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2504  let Latency = 20;
2505  let NumMicroOps = 4;
2506  let ReleaseAtCycles = [4, 4, 4, 4];
2507}
2508def : InstRW<[A64FXWrite_SST1_D_RZ],
2509      (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
2510
2511def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2512  let Latency = 16;
2513  let NumMicroOps = 8;
2514  let ReleaseAtCycles = [12, 8, 8];
2515}
2516def : InstRW<[A64FXWrite_SST1_W_ZI],
2517      (instregex "^SST1[BH]_S_I", "^SST1W_I")>;
2518
2519def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2520  let Latency = 16;
2521  let NumMicroOps = 4;
2522  let ReleaseAtCycles = [4, 4, 4];
2523}
2524def : InstRW<[A64FXWrite_SST1_D_ZI],
2525      (instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
2526
2527def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2528  let Latency = 12;
2529  let NumMicroOps = 3;
2530  let ReleaseAtCycles = [8, 9];
2531}
2532def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
2533
2534def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2535  let Latency = 11;
2536  let NumMicroOps = 2;
2537  let ReleaseAtCycles = [2, 2];
2538}
2539def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
2540
2541def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2542  let Latency = 12;
2543  let NumMicroOps = 3;
2544  let ReleaseAtCycles = [2, 3];
2545}
2546def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
2547
2548def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2549  let Latency = 15;
2550  let NumMicroOps = 4;
2551  let ReleaseAtCycles = [12, 13];
2552}
2553def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
2554
2555def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2556  let Latency = 11;
2557  let NumMicroOps = 3;
2558  let ReleaseAtCycles = [3, 3];
2559}
2560def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
2561
2562def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2563  let Latency = 12;
2564  let NumMicroOps = 4;
2565  let ReleaseAtCycles = [3, 4];
2566}
2567def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
2568
2569def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2570  let Latency = 15;
2571  let NumMicroOps = 5;
2572  let ReleaseAtCycles = [16, 17];
2573}
2574def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
2575
2576def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2577  let Latency = 11;
2578  let NumMicroOps = 4;
2579  let ReleaseAtCycles = [4, 4];
2580}
2581def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
2582
2583def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2584  let Latency = 12;
2585  let NumMicroOps = 5;
2586  let ReleaseAtCycles = [4, 5];
2587}
2588def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
2589
2590def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2591  let Latency = 11;
2592}
2593def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>;
2594
2595def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> {
2596  let Latency = 11;
2597}
2598def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>;
2599
2600def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2601  let Latency = 4;
2602}
2603def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>;
2604
2605def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2606  let Latency = 3;
2607  let NumMicroOps = 2;
2608}
2609def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>;
2610
2611} // SchedModel = A64FXModel
2612