xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA64FX.td (revision e0c4386e7e71d93b0edc0c8fa156263fc4a8b0b6)
1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for the Fujitsu A64FX processors.
10//
11//===----------------------------------------------------------------------===//
12
13def A64FXModel : SchedMachineModel {
14  let IssueWidth            =   6; // 6 micro-ops dispatched at a time.
15  let MicroOpBufferSize     = 180; // 180 entries in micro-op re-order buffer.
16  let LoadLatency           =   5; // Optimistic load latency.
17  let MispredictPenalty     =  12; // Extra cycles for mispredicted branch.
18  // Determined via a mix of micro-arch details and experimentation.
19  let LoopMicroOpBufferSize = 128;
20  let PostRAScheduler       =   1; // Using PostRA sched.
21  let CompleteModel         =   1;
22
23  list<Predicate> UnsupportedFeatures =
24    [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth,
25     HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1,
26     HasSVE2p1_or_HasSME2p1, HasSMEF16F16];
27
28  let FullInstRWOverlapCheck = 0;
29}
30
31let SchedModel = A64FXModel in {
32
33// Define the issue ports.
34
35// A64FXIP*
36
37// Port 0
38def A64FXIPFLA : ProcResource<1>;
39
40// Port 1
41def A64FXIPPR : ProcResource<1>;
42
43// Port 2
44def A64FXIPEXA : ProcResource<1>;
45
46// Port 3
47def A64FXIPFLB : ProcResource<1>;
48
49// Port 4
50def A64FXIPEXB : ProcResource<1>;
51
52// Port 5
53def A64FXIPEAGA : ProcResource<1>;
54
55// Port 6
56def A64FXIPEAGB : ProcResource<1>;
57
58// Port 7
59def A64FXIPBR : ProcResource<1>;
60
61// Define groups for the functional units on each issue port.  Each group
62// created will be used by a WriteRes later on.
63
64def A64FXGI7 : ProcResGroup<[A64FXIPBR]>;
65
66def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>;
67
68def A64FXGI1 : ProcResGroup<[A64FXIPPR]>;
69
70def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>;
71
72def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>;
73
74def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>;
75
76def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>;
77
78def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>;
79
80def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>;
81
82def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>;
83
84def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>;
85
86def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>;
87
88def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>;
89
90def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>;
91
92def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB,
93                             A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>;
94
95def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> {
96  let Latency = 1;
97}
98
99def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
100  let Latency = 2;
101}
102
103def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
104  let Latency = 4;
105}
106
107def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
108  let Latency = 6;
109}
110
111def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
112  let Latency = 8;
113}
114
115def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> {
116  let Latency = 9;
117}
118
119def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> {
120  let Latency = 3;
121}
122
123def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> {
124  let Latency = 5;
125}
126
127def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
128  let Latency = 4;
129}
130
131def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> {
132  let Latency = 6;
133}
134
135def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
136  let Latency = 4;
137}
138
139def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
140  let Latency = 8;
141}
142
143def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> {
144  let Latency = 9;
145}
146
147def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
148  let Latency = 10;
149}
150
151def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
152  let Latency = 12;
153}
154
155def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> {
156  let Latency = 20;
157}
158
159def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
160  let Latency = 5;
161}
162
163def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> {
164  let Latency = 11;
165}
166
167def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> {
168  let Latency = 5;
169}
170
171def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
172  let Latency = 1;
173}
174
175def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> {
176  let Latency = 2;
177}
178
179def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> {
180  let Latency = 4;
181  let NumMicroOps = 4;
182}
183
184def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
185  let Latency = 1;
186}
187
188def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
189  let Latency = 5;
190}
191
192def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
193  let Latency = 8;
194}
195
196def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> {
197  let Latency = 11;
198}
199
200def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> {
201  let Latency = 5;
202  let NumMicroOps = 2;
203}
204
205def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> {
206  let Latency = 5;
207  let NumMicroOps = 3;
208}
209
210def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> {
211  let Latency = 5;
212  let NumMicroOps = 2;
213}
214
215def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> {
216  let Latency = 8;
217  let NumMicroOps = 2;
218}
219
220def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> {
221  let Latency = 11;
222  let NumMicroOps = 2;
223
224}
225
226def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> {
227  let Latency = 8;
228  let NumMicroOps = 3;
229}
230
231def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> {
232  let Latency = 11;
233  let NumMicroOps = 3;
234}
235
236def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> {
237  let Latency = 8;
238  let NumMicroOps = 4;
239}
240
241def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> {
242  let Latency = 11;
243  let NumMicroOps = 4;
244}
245
246def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> {
247  let Latency = 8;
248  let NumMicroOps = 2;
249}
250
251def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> {
252  let Latency = 11;
253  let NumMicroOps = 2;
254}
255
256def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> {
257  let Latency = 8;
258  let NumMicroOps = 3;
259}
260
261def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> {
262  let Latency = 11;
263  let NumMicroOps = 3;
264}
265
266def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> {
267  let Latency = 8;
268  let NumMicroOps = 4;
269}
270
271def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> {
272  let Latency = 11;
273  let NumMicroOps = 4;
274}
275
276def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> {
277  let Latency = 8;
278  let NumMicroOps = 5;
279}
280
281def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> {
282  let Latency = 11;
283  let NumMicroOps = 5;
284}
285
286def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> {
287  let Latency = 8;
288  let NumMicroOps = 2;
289}
290
291def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> {
292  let Latency = 8;
293  let NumMicroOps = 3;
294}
295
296def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> {
297  let Latency = 8;
298  let NumMicroOps = 4;
299}
300
301def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> {
302  let Latency = 8;
303  let NumMicroOps = 5;
304}
305
306def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> {
307  let Latency = 8;
308  let NumMicroOps = 6;
309}
310
311def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> {
312  let Latency = 8;
313  let NumMicroOps = 7;
314}
315
316def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> {
317  let Latency = 8;
318  let NumMicroOps = 8;
319}
320
321def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> {
322  let Latency = 8;
323  let NumMicroOps = 9;
324}
325
326def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> {
327  let Latency = 1;
328}
329
330def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> {
331  let Latency = 10;
332}
333
334def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> {
335  let Latency = 14;
336}
337
338def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> {
339  let Latency = 12;
340}
341
342def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> {
343  let Latency = 14;
344}
345
346def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> {
347  let Latency = 14;
348}
349
350def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> {
351  let Latency = 6;
352}
353
354def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> {
355  let Latency = 8;
356}
357
358def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> {
359  let Latency = 10;
360}
361
362def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> {
363  let Latency = 12;
364  let NumMicroOps = 6;
365}
366
367def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> {
368  let Latency = 14;
369  let NumMicroOps = 6;
370}
371
372def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> {
373  let Latency = 9;
374}
375
376def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> {
377  let Latency = 8;
378}
379
380
381def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> {
382  let Latency = 8;
383  let NumMicroOps = 3;
384}
385
386def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> {
387  let Latency = 8;
388  let NumMicroOps = 2;
389}
390
391def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> {
392  let Latency = 10;
393  let NumMicroOps = 3;
394}
395
396def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> {
397  let Latency = 10;
398  let NumMicroOps = 2;
399}
400
401
402def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> {
403  let Latency = 10;
404  let NumMicroOps = 3;
405}
406
407def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> {
408  let Latency = 15;
409  let NumMicroOps = 2;
410}
411
412def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> {
413  let Latency = 15;
414  let NumMicroOps = 3;
415}
416
417def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> {
418  let Latency = 10;
419  let NumMicroOps = 3;
420}
421
422def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> {
423  let Latency = 10;
424  let NumMicroOps = 2;
425}
426
427def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> {
428  let Latency = 15;
429  let NumMicroOps = 2;
430}
431
432def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> {
433  let Latency = 14;
434  let NumMicroOps = 7;
435}
436
437def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> {
438  let Latency = 5;
439}
440
441def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> {
442  let Latency = 10;
443}
444
445def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> {
446  let Latency = 9;
447}
448
449def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> {
450  let Latency = 12;
451}
452
453def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> {
454  let Latency = 25;
455}
456
457def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> {
458  let Latency = 10;
459  let NumMicroOps = 3;
460}
461
462def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> {
463  let Latency = 10;
464  let NumMicroOps = 5;
465}
466
467def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> {
468  let Latency = 10;
469  let NumMicroOps = 7;
470}
471
472def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> {
473  let Latency = 10;
474  let NumMicroOps = 9;
475}
476
477def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> {
478  let Latency = 0;
479}
480
481def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> {
482  let Latency = 0;
483}
484
485def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> {
486  let Latency = 0;
487}
488
489def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> {
490  let Latency = 0;
491}
492
493def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> {
494  let Latency = 0;
495}
496
497def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> {
498  let Latency = 0;
499}
500
501def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> {
502  let Latency = 0;
503}
504
505def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> {
506  let Latency = 0;
507}
508
509def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> {
510  let Latency = 0;
511}
512
513def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> {
514  let Latency = 0;
515}
516
517def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> {
518  let Latency = 1;
519}
520
521def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> {
522  let Latency = 1;
523}
524
525def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> {
526  let Latency = 1;
527}
528
529def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> {
530  let Latency = 1;
531}
532
533def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> {
534  let Latency = 7;
535}
536
537// Define commonly used read types.
538
539// No forwarding is provided for these types.
540def : ReadAdvance<ReadI,       0>;
541def : ReadAdvance<ReadISReg,   0>;
542def : ReadAdvance<ReadIEReg,   0>;
543def : ReadAdvance<ReadIM,      0>;
544def : ReadAdvance<ReadIMA,     0>;
545def : ReadAdvance<ReadID,      0>;
546def : ReadAdvance<ReadExtrHi,  0>;
547def : ReadAdvance<ReadAdrBase, 0>;
548def : ReadAdvance<ReadST,      0>;
549def : ReadAdvance<ReadVLD,     0>;
550
551//===----------------------------------------------------------------------===//
552// 3. Instruction Tables.
553
554//---
555// 3.1 Branch Instructions
556//---
557
558// Branch, immed
559// Branch and link, immed
560// Compare and branch
561def : WriteRes<WriteBr,      [A64FXGI7]> {
562  let Latency = 1;
563}
564
565// Branch, register
566// Branch and link, register != LR
567// Branch and link, register = LR
568def : WriteRes<WriteBrReg,   [A64FXGI7]> {
569  let Latency = 1;
570}
571
572def : WriteRes<WriteSys,     []> { let Latency = 1; }
573def : WriteRes<WriteBarrier, []> { let Latency = 1; }
574def : WriteRes<WriteHint,    []> { let Latency = 1; }
575
576def : WriteRes<WriteAtomic,  []> {
577  let Latency = 4;
578}
579
580//---
581// Branch
582//---
583def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>;
584def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>;
585def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>;
586def : InstRW<[A64FXWrite_1Cyc_GI7],
587            (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>;
588
589//---
590// 3.2 Arithmetic and Logical Instructions
591// 3.3 Move and Shift Instructions
592//---
593
594// ALU, basic
595// Conditional compare
596// Conditional select
597// Address generation
598def : WriteRes<WriteI,       [A64FXGI2456]> {
599  let Latency = 1;
600}
601
602def : InstRW<[WriteI],
603            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
604                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
605                       "ADC(W|X)r",
606                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
607                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
608                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
609                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
610                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
611                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
612                       "CSINC(W|X)r",           "CSINV(W|X)r",
613                       "CSNEG(W|X)r")>;
614
615def : InstRW<[WriteI], (instrs COPY)>;
616
617// ALU, extend and/or shift
618def : WriteRes<WriteISReg,   [A64FXGI2456]> {
619  let Latency = 2;
620}
621
622def : InstRW<[WriteISReg],
623            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
624                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
625                       "ADC(W|X)r",
626                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
627                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
628                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
629                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
630                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
631                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
632                       "CSINC(W|X)r",           "CSINV(W|X)r",
633                       "CSNEG(W|X)r")>;
634
635def : WriteRes<WriteIEReg,   [A64FXGI2456]> {
636  let Latency = 1;
637}
638
639def : InstRW<[WriteIEReg],
640            (instregex "ADD?(W|X)r(i|r|s|x)",   "ADDS?(W|X)r(i|r|s|x)(64)?",
641                       "AND?(W|X)r(i|r|s|x)",   "ANDS?(W|X)r(i|r|s|x)",
642                       "ADC(W|X)r",
643                       "BIC?(W|X)r(i|r|s|x)",   "BICS?(W|X)r(i|r|s|x)",
644                       "EON?(W|X)r(i|r|s|x)",   "ORN?(W|X)r(i|r|s|x)",
645                       "ORR?(W|X)r(i|r|s|x)",   "SUB?(W|X)r(i|r|s|x)",
646                       "SUBS?(W|X)r(i|r|s|x)",  "SBC(W|X)r",
647                       "SBCS(W|X)r",            "CCMN(W|X)(i|r)",
648                       "CCMP(W|X)(i|r)",        "CSEL(W|X)r",
649                       "CSINC(W|X)r",           "CSINV(W|X)r",
650                       "CSNEG(W|X)r")>;
651
652// Move immed
653def : WriteRes<WriteImm,     [A64FXGI2456]> {
654  let Latency = 1;
655}
656
657def : InstRW<[A64FXWrite_1Cyc_GI2456],
658            (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>;
659
660def : InstRW<[A64FXWrite_2Cyc_GI24],
661            (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>;
662
663// Variable shift
664def : WriteRes<WriteIS,      [A64FXGI2456]> {
665  let Latency = 1;
666}
667
668//---
669// 3.4 Divide and Multiply Instructions
670//---
671
672// Divide, W-form
673def : WriteRes<WriteID32,    [A64FXGI4]> {
674  let Latency = 39;
675  let ResourceCycles = [39];
676}
677
678// Divide, X-form
679def : WriteRes<WriteID64,    [A64FXGI4]> {
680  let Latency = 23;
681  let ResourceCycles = [23];
682}
683
684// Multiply accumulate, W-form
685def : WriteRes<WriteIM32,    [A64FXGI2456]> {
686  let Latency = 5;
687}
688
689// Multiply accumulate, X-form
690def : WriteRes<WriteIM64,    [A64FXGI2456]> {
691  let Latency = 5;
692}
693
694def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>;
695def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>;
696def : InstRW<[A64FXWrite_MADDL],
697            (instregex "(S|U)(MADDL|MSUBL)rrr")>;
698
699def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>;
700def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>;
701
702// Bitfield extract, two reg
703def : WriteRes<WriteExtr,    [A64FXGI2456]> {
704  let Latency = 1;
705}
706
707// Multiply high
708def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>;
709
710// Miscellaneous Data-Processing Instructions
711// Bitfield extract
712def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>;
713
714// Bitifield move - basic
715def : InstRW<[A64FXWrite_1Cyc_GI24],
716            (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>;
717
718// Bitfield move, insert
719def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>;
720def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>;
721
722// Count leading
723def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$",
724                                               "^CLZ(W|X)r$")>;
725
726// Reverse bits
727def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>;
728
729// Cryptography Extensions
730def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>;
731def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>;
732def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>;
733def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>;
734def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>;
735def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>;
736def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>;
737def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>;
738def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>;
739
740// CRC Instructions
741def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>;
742def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>;
743def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>;
744
745def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>;
746def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>;
747def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>;
748
749// Reverse bits/bytes
750// NOTE: Handled by WriteI.
751
752//---
753// 3.6 Load Instructions
754// 3.10 FP Load Instructions
755//---
756
757// Load register, literal
758// Load register, unscaled immed
759// Load register, immed unprivileged
760// Load register, unsigned immed
761def : WriteRes<WriteLD,      [A64FXGI56]> {
762  let Latency = 4;
763}
764
765// Load register, immed post-index
766// NOTE: Handled by WriteLD, WriteI.
767// Load register, immed pre-index
768// NOTE: Handled by WriteLD, WriteAdr.
769def : WriteRes<WriteAdr,     [A64FXGI2456]> {
770  let Latency = 1;
771}
772
773// Load pair, immed offset, normal
774// Load pair, immed offset, signed words, base != SP
775// Load pair, immed offset signed words, base = SP
776// LDP only breaks into *one* LS micro-op.  Thus
777// the resources are handled by WriteLD.
778def : WriteRes<WriteLDHi,    []> {
779  let Latency = 5;
780}
781
782// Load register offset, basic
783// Load register, register offset, scale by 4/8
784// Load register, register offset, scale by 2
785// Load register offset, extend
786// Load register, register offset, extend, scale by 4/8
787// Load register, register offset, extend, scale by 2
788def A64FXWriteLDIdx : SchedWriteVariant<[
789  SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>,
790  SchedVar<NoSchedPred,   [A64FXWrite_1Cyc_GI56]>]>;
791def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>;
792
793def A64FXReadAdrBase : SchedReadVariant<[
794  SchedVar<ScaledIdxPred, [ReadDefault]>,
795  SchedVar<NoSchedPred,   [ReadDefault]>]>;
796def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>;
797
798// Load pair, immed pre-index, normal
799// Load pair, immed pre-index, signed words
800// Load pair, immed post-index, normal
801// Load pair, immed post-index, signed words
802// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
803
804def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>;
805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>;
806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>;
807def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>;
808def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>;
809
810def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>;
811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>;
812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>;
813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>;
814def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>;
815def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>;
816
817def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>;
818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>;
819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>;
820def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>;
821def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>;
822
823def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>;
824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>;
825def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>;
826def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>;
827
828def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>;
829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>;
830def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>;
831def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>;
832
833def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>;
834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>;
835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>;
836def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>;
837def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>;
838
839def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
840            (instrs LDPDpre)>;
841def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
842            (instrs LDPQpre)>;
843def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
844            (instrs LDPSpre)>;
845def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
846            (instrs LDPWpre)>;
847def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
848            (instrs LDPWpre)>;
849
850def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
855def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
856def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
857
858def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>;
859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>;
860def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>;
861def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>;
862
863def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>;
864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>;
865def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>;
866def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>;
867
868def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>;
869def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>;
870
871def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>;
872def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>;
873
874def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
875            (instrs LDPDpost)>;
876def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
877            (instrs LDPQpost)>;
878def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
879            (instrs LDPSpost)>;
880def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
881            (instrs LDPWpost)>;
882def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
883            (instrs LDPXpost)>;
884
885def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
890def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
891def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
892
893def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
894            (instrs LDPDpre)>;
895def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
896            (instrs LDPQpre)>;
897def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
898            (instrs LDPSpre)>;
899def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
900            (instrs LDPWpre)>;
901def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
902            (instrs LDPXpre)>;
903
904def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>;
905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>;
906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>;
907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>;
908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>;
909def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>;
910def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>;
911
912def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
913            (instrs LDPDpost)>;
914def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
915            (instrs LDPQpost)>;
916def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
917            (instrs LDPSpost)>;
918def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
919            (instrs LDPWpost)>;
920def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr],
921            (instrs LDPXpost)>;
922
923def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>;
924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>;
925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>;
926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>;
927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>;
928def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>;
929def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>;
930
931def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>;
932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>;
933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>;
934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>;
935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>;
936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>;
937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>;
938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>;
939def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>;
940def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>;
941
942def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>;
943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>;
944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>;
945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>;
946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>;
947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>;
948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>;
949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>;
950def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>;
951def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>;
952
953def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
954            (instrs LDRBroW)>;
955def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
956            (instrs LDRBroW)>;
957def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
958             (instrs LDRDroW)>;
959def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
960            (instrs LDRHroW)>;
961def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
962            (instrs LDRHHroW)>;
963def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
964            (instrs LDRQroW)>;
965def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
966            (instrs LDRSroW)>;
967def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
968            (instrs LDRSHWroW)>;
969def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
970            (instrs LDRSHXroW)>;
971def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
972            (instrs LDRWroW)>;
973def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
974            (instrs LDRXroW)>;
975def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
976            (instrs LDRBroX)>;
977def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
978            (instrs LDRDroX)>;
979def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
980            (instrs LDRHroX)>;
981def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
982            (instrs LDRHHroX)>;
983def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
984            (instrs LDRQroX)>;
985def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
986            (instrs LDRSroX)>;
987def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
988            (instrs LDRSHWroX)>;
989def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
990            (instrs LDRSHXroX)>;
991def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
992            (instrs LDRWroX)>;
993def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase],
994            (instrs LDRXroX)>;
995
996def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>;
997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>;
998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>;
999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>;
1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>;
1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>;
1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>;
1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>;
1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>;
1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>;
1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>;
1007def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>;
1008def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>;
1009
1010//---
1011// Prefetch
1012//---
1013def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>;
1014def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>;
1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>;
1016def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>;
1017def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>;
1018
1019//--
1020// 3.7 Store Instructions
1021// 3.11 FP Store Instructions
1022//--
1023
1024// Store register, unscaled immed
1025// Store register, immed unprivileged
1026// Store register, unsigned immed
1027def : WriteRes<WriteST,      [A64FXGI56]> {
1028  let Latency = 1;
1029}
1030
1031// Store register, immed post-index
1032// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase
1033
1034// Store register, immed pre-index
1035// NOTE: Handled by WriteAdr, WriteST
1036
1037// Store register, register offset, basic
1038// Store register, register offset, scaled by 4/8
1039// Store register, register offset, scaled by 2
1040// Store register, register offset, extend
1041// Store register, register offset, extend, scale by 4/8
1042// Store register, register offset, extend, scale by 1
1043def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> {
1044  let Latency = 1;
1045}
1046
1047// Store pair, immed offset, W-form
1048// Store pair, immed offset, X-form
1049def : WriteRes<WriteSTP,     [A64FXGI56]> {
1050  let Latency = 1;
1051}
1052
1053// Store pair, immed post-index, W-form
1054// Store pair, immed post-index, X-form
1055// Store pair, immed pre-index, W-form
1056// Store pair, immed pre-index, X-form
1057// NOTE: Handled by WriteAdr, WriteSTP.
1058
1059def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>;
1060def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>;
1061def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>;
1062def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>;
1063def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>;
1064def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>;
1065def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>;
1066def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>;
1067def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>;
1068
1069def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>;
1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>;
1071def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>;
1072def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>;
1073
1074def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>;
1075def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>;
1076def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>;
1077def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>;
1078
1079def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>;
1080def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>;
1081def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>;
1082def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>;
1083
1084def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1085def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>;
1086def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1087def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>;
1088def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1089def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>;
1090def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1091def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>;
1092def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1093def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>;
1094def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1095def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>;
1096
1097def : InstRW<[A64FXWrite_STP01],
1098            (instrs STPDpre, STPDpost)>;
1099def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1100            (instrs STPDpre, STPDpost)>;
1101def : InstRW<[A64FXWrite_STP01],
1102            (instrs STPDpre, STPDpost)>;
1103def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1104            (instrs STPDpre, STPDpost)>;
1105def : InstRW<[A64FXWrite_STP01],
1106            (instrs STPQpre, STPQpost)>;
1107def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1108            (instrs STPQpre, STPQpost)>;
1109def : InstRW<[A64FXWrite_STP01],
1110            (instrs STPQpre, STPQpost)>;
1111def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1112            (instrs STPQpre, STPQpost)>;
1113def : InstRW<[A64FXWrite_STP01],
1114            (instrs STPSpre, STPSpost)>;
1115def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1116            (instrs STPSpre, STPSpost)>;
1117def : InstRW<[A64FXWrite_STP01],
1118            (instrs STPSpre, STPSpost)>;
1119def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1120            (instrs STPSpre, STPSpost)>;
1121def : InstRW<[A64FXWrite_STP01],
1122            (instrs STPWpre, STPWpost)>;
1123def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1124            (instrs STPWpre, STPWpost)>;
1125def : InstRW<[A64FXWrite_STP01],
1126            (instrs STPWpre, STPWpost)>;
1127def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1128            (instrs STPWpre, STPWpost)>;
1129def : InstRW<[A64FXWrite_STP01],
1130            (instrs STPXpre, STPXpost)>;
1131def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1132            (instrs STPXpre, STPXpost)>;
1133def : InstRW<[A64FXWrite_STP01],
1134            (instrs STPXpre, STPXpost)>;
1135def : InstRW<[A64FXWrite_STP01, ReadAdrBase],
1136            (instrs STPXpre, STPXpost)>;
1137
1138def : InstRW<[WriteAdr, A64FXWrite_STP01],
1139            (instrs STRBpre, STRBpost)>;
1140def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1141            (instrs STRBpre, STRBpost)>;
1142def : InstRW<[WriteAdr, A64FXWrite_STP01],
1143            (instrs STRBpre, STRBpost)>;
1144def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1145            (instrs STRBpre, STRBpost)>;
1146def : InstRW<[WriteAdr, A64FXWrite_STP01],
1147            (instrs STRBBpre, STRBBpost)>;
1148def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1149            (instrs STRBBpre, STRBBpost)>;
1150def : InstRW<[WriteAdr, A64FXWrite_STP01],
1151            (instrs STRBBpre, STRBBpost)>;
1152def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1153            (instrs STRBBpre, STRBBpost)>;
1154def : InstRW<[WriteAdr, A64FXWrite_STP01],
1155            (instrs STRDpre, STRDpost)>;
1156def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1157            (instrs STRDpre, STRDpost)>;
1158def : InstRW<[WriteAdr, A64FXWrite_STP01],
1159            (instrs STRDpre, STRDpost)>;
1160def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1161            (instrs STRDpre, STRDpost)>;
1162def : InstRW<[WriteAdr, A64FXWrite_STP01],
1163            (instrs STRHpre, STRHpost)>;
1164def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1165            (instrs STRHpre, STRHpost)>;
1166def : InstRW<[WriteAdr, A64FXWrite_STP01],
1167            (instrs STRHpre, STRHpost)>;
1168def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1169            (instrs STRHpre, STRHpost)>;
1170def : InstRW<[WriteAdr, A64FXWrite_STP01],
1171            (instrs STRHHpre, STRHHpost)>;
1172def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1173            (instrs STRHHpre, STRHHpost)>;
1174def : InstRW<[WriteAdr, A64FXWrite_STP01],
1175            (instrs STRHHpre, STRHHpost)>;
1176def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1177            (instrs STRHHpre, STRHHpost)>;
1178def : InstRW<[WriteAdr, A64FXWrite_STP01],
1179            (instrs STRQpre, STRQpost)>;
1180def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1181            (instrs STRQpre, STRQpost)>;
1182def : InstRW<[WriteAdr, A64FXWrite_STP01],
1183            (instrs STRQpre, STRQpost)>;
1184def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1185            (instrs STRQpre, STRQpost)>;
1186def : InstRW<[WriteAdr, A64FXWrite_STP01],
1187            (instrs STRSpre, STRSpost)>;
1188def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1189            (instrs STRSpre, STRSpost)>;
1190def : InstRW<[WriteAdr, A64FXWrite_STP01],
1191            (instrs STRSpre, STRSpost)>;
1192def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1193            (instrs STRSpre, STRSpost)>;
1194def : InstRW<[WriteAdr, A64FXWrite_STP01],
1195            (instrs STRWpre, STRWpost)>;
1196def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1197            (instrs STRWpre, STRWpost)>;
1198def : InstRW<[WriteAdr, A64FXWrite_STP01],
1199            (instrs STRWpre, STRWpost)>;
1200def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1201            (instrs STRWpre, STRWpost)>;
1202def : InstRW<[WriteAdr, A64FXWrite_STP01],
1203            (instrs STRXpre, STRXpost)>;
1204def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1205            (instrs STRXpre, STRXpost)>;
1206def : InstRW<[WriteAdr, A64FXWrite_STP01],
1207            (instrs STRXpre, STRXpost)>;
1208def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase],
1209            (instrs STRXpre, STRXpost)>;
1210
1211def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1212            (instrs STRBroW, STRBroX)>;
1213def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1214            (instrs STRBroW, STRBroX)>;
1215def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1216            (instrs STRBBroW, STRBBroX)>;
1217def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1218            (instrs STRBBroW, STRBBroX)>;
1219def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1220            (instrs STRDroW, STRDroX)>;
1221def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1222            (instrs STRDroW, STRDroX)>;
1223def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1224            (instrs STRHroW, STRHroX)>;
1225def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1226            (instrs STRHroW, STRHroX)>;
1227def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1228            (instrs STRHHroW, STRHHroX)>;
1229def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1230            (instrs STRHHroW, STRHHroX)>;
1231def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1232            (instrs STRQroW, STRQroX)>;
1233def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1234            (instrs STRQroW, STRQroX)>;
1235def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1236            (instrs STRSroW, STRSroX)>;
1237def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1238            (instrs STRSroW, STRSroX)>;
1239def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1240            (instrs STRWroW, STRWroX)>;
1241def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1242            (instrs STRWroW, STRWroX)>;
1243def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1244            (instrs STRXroW, STRXroX)>;
1245def : InstRW<[A64FXWrite_STUR, ReadAdrBase],
1246            (instrs STRXroW, STRXroX)>;
1247
1248//---
1249// 3.8 FP Data Processing Instructions
1250//---
1251
1252// FP absolute value
1253// FP min/max
1254// FP negate
1255def : WriteRes<WriteF,       [A64FXGI03]> {
1256  let Latency = 4;
1257  let ResourceCycles = [2];
1258}
1259
1260// FP arithmetic
1261
1262def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>;
1263def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>;
1264
1265// FP compare
1266def : WriteRes<WriteFCmp,    [A64FXGI03]> {
1267  let Latency = 4;
1268  let ResourceCycles = [2];
1269}
1270
1271// FP Div, Sqrt
1272def : WriteRes<WriteFDiv, [A64FXGI0]> {
1273  let Latency = 43;
1274}
1275
1276def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> {
1277  let Latency = 38;
1278}
1279
1280def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> {
1281  let Latency = 29;
1282}
1283
1284def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> {
1285  let Latency = 43;
1286}
1287
1288def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> {
1289  let Latency = 29;
1290}
1291
1292def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> {
1293  let Latency = 43;
1294}
1295
1296// FP divide, S-form
1297// FP square root, S-form
1298def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>;
1299def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>;
1300def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>;
1301def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
1302def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>;
1303def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>;
1304
1305// FP divide, D-form
1306// FP square root, D-form
1307def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>;
1308def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>;
1309def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>;
1310def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
1311def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>;
1312def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>;
1313
1314// FP round to integral
1315def : InstRW<[A64FXWrite_9Cyc_GI03],
1316            (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
1317
1318// FP select
1319def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>;
1320
1321//---
1322// 3.9 FP Miscellaneous Instructions
1323//---
1324
1325// FP convert, from vec to vec reg
1326// FP convert, from gen to vec reg
1327// FP convert, from vec to gen reg
1328def : WriteRes<WriteFCvt, [A64FXGI03]> {
1329  let Latency = 9;
1330  let ResourceCycles = [2];
1331}
1332
1333// FP move, immed
1334// FP move, register
1335def : WriteRes<WriteFImm, [A64FXGI0]> {
1336  let Latency = 4;
1337  let ResourceCycles = [2];
1338}
1339
1340// FP transfer, from gen to vec reg
1341// FP transfer, from vec to gen reg
1342def : WriteRes<WriteFCopy, [A64FXGI0]> {
1343  let Latency = 4;
1344  let ResourceCycles = [2];
1345}
1346
1347def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>;
1348def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>;
1349
1350//---
1351// 3.12 ASIMD Integer Instructions
1352//---
1353
1354// ASIMD absolute diff, D-form
1355// ASIMD absolute diff, Q-form
1356// ASIMD absolute diff accum, D-form
1357// ASIMD absolute diff accum, Q-form
1358// ASIMD absolute diff accum long
1359// ASIMD absolute diff long
1360// ASIMD arith, basic
1361// ASIMD arith, complex
1362// ASIMD compare
1363// ASIMD logical (AND, BIC, EOR)
1364// ASIMD max/min, basic
1365// ASIMD max/min, reduce, 4H/4S
1366// ASIMD max/min, reduce, 8B/8H
1367// ASIMD max/min, reduce, 16B
1368// ASIMD multiply, D-form
1369// ASIMD multiply, Q-form
1370// ASIMD multiply accumulate long
1371// ASIMD multiply accumulate saturating long
1372// ASIMD multiply long
1373// ASIMD pairwise add and accumulate
1374// ASIMD shift accumulate
1375// ASIMD shift by immed, basic
1376// ASIMD shift by immed and insert, basic, D-form
1377// ASIMD shift by immed and insert, basic, Q-form
1378// ASIMD shift by immed, complex
1379// ASIMD shift by register, basic, D-form
1380// ASIMD shift by register, basic, Q-form
1381// ASIMD shift by register, complex, D-form
1382// ASIMD shift by register, complex, Q-form
1383def : WriteRes<WriteVd, [A64FXGI03]> {
1384  let Latency = 4;
1385}
1386def : WriteRes<WriteVq, [A64FXGI03]> {
1387  let Latency = 4;
1388}
1389
1390// ASIMD arith, reduce, 4H/4S
1391// ASIMD arith, reduce, 8B/8H
1392// ASIMD arith, reduce, 16B
1393
1394// ASIMD logical (MVN (alias for NOT), ORN, ORR)
1395def : InstRW<[A64FXWrite_4Cyc_GI03],
1396            (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>;
1397
1398// ASIMD arith, reduce
1399def : InstRW<[A64FXWrite_ADDLV],
1400            (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>;
1401
1402// ASIMD polynomial (8x8) multiply long
1403def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>;
1404def : InstRW<[A64FXWrite_MULLV],
1405            (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>;
1406def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>;
1407def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>;
1408
1409// ASIMD absolute diff accum, D-form
1410def : InstRW<[A64FXWrite_ABA],
1411            (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>;
1412// ASIMD absolute diff accum, Q-form
1413def : InstRW<[A64FXWrite_ABA],
1414            (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>;
1415// ASIMD absolute diff accum long
1416def : InstRW<[A64FXWrite_ABAL],
1417            (instregex "^[SU]ABAL")>;
1418// ASIMD arith, reduce, 4H/4S
1419def : InstRW<[A64FXWrite_ADDLV1],
1420            (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>;
1421// ASIMD arith, reduce, 8B
1422def : InstRW<[A64FXWrite_ADDLV1],
1423            (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>;
1424// ASIMD arith, reduce, 16B/16H
1425def : InstRW<[A64FXWrite_ADDLV1],
1426            (instregex "^[SU]?ADDL?Vv16i8v$")>;
1427// ASIMD max/min, reduce, 4H/4S
1428def : InstRW<[A64FXWrite_MINMAXV],
1429            (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>;
1430// ASIMD max/min, reduce, 8B/8H
1431def : InstRW<[A64FXWrite_MINMAXV],
1432            (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>;
1433// ASIMD max/min, reduce, 16B/16H
1434def : InstRW<[A64FXWrite_MINMAXV],
1435            (instregex "^[SU](MIN|MAX)Vv16i8v$")>;
1436// ASIMD multiply, D-form
1437def : InstRW<[A64FXWrite_PMUL],
1438            (instregex "^(P?MUL|SQR?DMUL)" #
1439                       "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" #
1440                       "(_indexed)?$")>;
1441
1442// ASIMD multiply, Q-form
1443def : InstRW<[A64FXWrite_PMUL],
1444            (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1445
1446// ASIMD multiply, Q-form
1447def : InstRW<[A64FXWrite_SQRDMULH],
1448            (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
1449
1450// ASIMD multiply accumulate, D-form
1451def : InstRW<[A64FXWrite_9Cyc_GI03],
1452            (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>;
1453// ASIMD multiply accumulate, Q-form
1454def : InstRW<[A64FXWrite_9Cyc_GI03],
1455            (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>;
1456// ASIMD shift accumulate
1457def : InstRW<[A64FXWrite_SRSRAV],
1458            (instregex "SRSRAv", "URSRAv")>;
1459def : InstRW<[A64FXWrite_SSRAV],
1460            (instregex "SSRAv", "USRAv")>;
1461
1462// ASIMD shift by immed, basic
1463def : InstRW<[A64FXWrite_RSHRN],
1464            (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>;
1465def : InstRW<[A64FXWrite_SHRN],
1466            (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>;
1467
1468def : InstRW<[A64FXWrite_6Cyc_GI3],
1469            (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>;
1470
1471// ASIMD shift by immed, complex
1472def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>;
1473def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>;
1474// ASIMD shift by register, basic, Q-form
1475def : InstRW<[A64FXWrite_6Cyc_GI3],
1476            (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>;
1477// ASIMD shift by register, complex, D-form
1478def : InstRW<[A64FXWrite_6Cyc_GI3],
1479            (instregex "^[SU][QR]{1,2}SHL" #
1480                       "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>;
1481// ASIMD shift by register, complex, Q-form
1482def : InstRW<[A64FXWrite_6Cyc_GI3],
1483            (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>;
1484
1485// ASIMD Arithmetic
1486def : InstRW<[A64FXWrite_4Cyc_GI03],
1487            (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>;
1488def : InstRW<[A64FXWrite_4Cyc_GI03],
1489            (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>;
1490def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>;
1491def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>;
1492def : InstRW<[A64FXWrite_4Cyc_GI03],
1493            (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD",
1494                       "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>;
1495def : InstRW<[A64FXWrite_ADDP],
1496            (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>;
1497def : InstRW<[A64FXWrite_4Cyc_GI03],
1498            (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" #
1499                       "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>;
1500def : InstRW<[A64FXWrite_4Cyc_GI0],
1501            (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>;
1502def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>;
1503def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>;
1504def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>;
1505def : InstRW<[A64FXWrite_MINMAXV],
1506             (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>;
1507def : InstRW<[A64FXWrite_ABA],
1508             (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>;
1509def : InstRW<[A64FXWrite_4Cyc_GI03],
1510            (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>;
1511def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>;
1512def : InstRW<[A64FXWrite_SHRN],
1513            (instregex "^ADDHNv", "^SUBHNv")>;
1514def : InstRW<[A64FXWrite_RSHRN],
1515            (instregex "^RADDHNv", "^RSUBHNv")>;
1516def : InstRW<[A64FXWrite_4Cyc_GI03],
1517            (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB",
1518                       "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB",
1519                      "^URHADD", "^USQADD")>;
1520
1521def : InstRW<[A64FXWrite_4Cyc_GI03],
1522            (instregex "^CMEQv", "^CMGEv", "^CMGTv",
1523                       "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>;
1524def : InstRW<[A64FXWrite_MINMAXV],
1525            (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>;
1526def : InstRW<[A64FXWrite_ADDP],
1527            (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>;
1528def : InstRW<[A64FXWrite_4Cyc_GI03],
1529            (instregex "^SABDv", "^UABDv")>;
1530def : InstRW<[A64FXWrite_TBX1],
1531            (instregex "^SABDLv", "^UABDLv")>;
1532
1533//---
1534// 3.13 ASIMD Floating-point Instructions
1535//---
1536
1537def : WriteRes<WriteFMul, [A64FXGI03]> {
1538  let Latency = 9;
1539}
1540
1541// ASIMD FP absolute value
1542def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>;
1543
1544// ASIMD FP arith, normal, D-form
1545// ASIMD FP arith, normal, Q-form
1546def : InstRW<[A64FXWrite_9Cyc_GI03],
1547            (instregex "^FABDv", "^FADDv", "^FSUBv")>;
1548
1549// ASIMD FP arith, pairwise, D-form
1550// ASIMD FP arith, pairwise, Q-form
1551def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>;
1552
1553// ASIMD FP compare, D-form
1554// ASIMD FP compare, Q-form
1555def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>;
1556def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv",
1557                                                 "^FCMGTv", "^FCMLEv",
1558                                                 "^FCMLTv")>;
1559// ASIMD FP round, D-form
1560def : InstRW<[A64FXWrite_9Cyc_GI03],
1561            (instregex "^FRINT[AIMNPXZ](v2f32)")>;
1562// ASIMD FP round, Q-form
1563def : InstRW<[A64FXWrite_9Cyc_GI03],
1564            (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>;
1565
1566// ASIMD FP convert, long
1567// ASIMD FP convert, narrow
1568// ASIMD FP convert, other, D-form
1569// ASIMD FP convert, other, Q-form
1570
1571// ASIMD FP convert, long and narrow
1572def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>;
1573// ASIMD FP convert, other, D-form
1574def : InstRW<[A64FXWrite_FCVTXNV],
1575      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>;
1576// ASIMD FP convert, other, Q-form
1577def : InstRW<[A64FXWrite_FCVTXNV],
1578      (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>;
1579
1580// ASIMD FP divide, D-form, F32
1581def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>;
1582def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>;
1583
1584// ASIMD FP divide, Q-form, F32
1585def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>;
1586def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>;
1587
1588// ASIMD FP divide, Q-form, F64
1589def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>;
1590def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>;
1591
1592// ASIMD FP max/min, normal, D-form
1593// ASIMD FP max/min, normal, Q-form
1594def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv",
1595                                               "^FMINv", "^FMINNMv")>;
1596
1597// ASIMD FP max/min, pairwise, D-form
1598// ASIMD FP max/min, pairwise, Q-form
1599def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv",
1600                                           "^FMINPv", "^FMINNMPv")>;
1601
1602// ASIMD FP max/min, reduce
1603def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv",
1604                                              "^FMINVv", "^FMINNMVv")>;
1605
1606// ASIMD FP multiply, D-form, FZ
1607// ASIMD FP multiply, D-form, no FZ
1608// ASIMD FP multiply, Q-form, FZ
1609// ASIMD FP multiply, Q-form, no FZ
1610def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>;
1611def : InstRW<[A64FXWrite_FMULXE],
1612            (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>;
1613def : InstRW<[A64FXWrite_FMULXE],
1614            (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>;
1615
1616// ASIMD FP multiply accumulate, Dform, FZ
1617// ASIMD FP multiply accumulate, Dform, no FZ
1618// ASIMD FP multiply accumulate, Qform, FZ
1619// ASIMD FP multiply accumulate, Qform, no FZ
1620def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>;
1621def : InstRW<[A64FXWrite_FMULXE],
1622            (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>;
1623def : InstRW<[A64FXWrite_FMULXE],
1624            (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>;
1625
1626// ASIMD FP negate
1627def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>;
1628
1629//--
1630// 3.14 ASIMD Miscellaneous Instructions
1631//--
1632
1633// ASIMD bit reverse
1634def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>;
1635
1636// ASIMD bitwise insert, D-form
1637// ASIMD bitwise insert, Q-form
1638def : InstRW<[A64FXWrite_BIF],
1639            (instregex "^BIFv", "^BITv", "^BSLv")>;
1640
1641// ASIMD count, D-form
1642// ASIMD count, Q-form
1643def : InstRW<[A64FXWrite_4Cyc_GI0],
1644            (instregex "^CLSv", "^CLZv", "^CNTv")>;
1645
1646// ASIMD duplicate, gen reg
1647// ASIMD duplicate, element
1648def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>;
1649def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>;
1650def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>;
1651
1652// ASIMD extract
1653def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>;
1654
1655// ASIMD extract narrow
1656def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>;
1657
1658// ASIMD extract narrow, saturating
1659def : InstRW<[A64FXWrite_6Cyc_GI3],
1660            (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>;
1661
1662// ASIMD insert, element to element
1663def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1664
1665// ASIMD transfer, element to gen reg
1666def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1667
1668// ASIMD move, integer immed
1669def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>;
1670
1671// ASIMD move, FP immed
1672def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>;
1673
1674// ASIMD table lookup, D-form
1675def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>;
1676def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>;
1677def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>;
1678def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>;
1679def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>;
1680def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>;
1681def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>;
1682def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>;
1683
1684// ASIMD table lookup, Q-form
1685def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>;
1686def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>;
1687def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>;
1688def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>;
1689def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>;
1690def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>;
1691def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>;
1692def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>;
1693
1694// ASIMD unzip/zip
1695def : InstRW<[A64FXWrite_6Cyc_GI0],
1696            (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>;
1697
1698// ASIMD reciprocal estimate, D-form
1699// ASIMD reciprocal estimate, Q-form
1700def : InstRW<[A64FXWrite_4Cyc_GI03],
1701            (instregex "^FRECPEv", "^FRECPXv", "^URECPEv",
1702                       "^FRSQRTEv", "^URSQRTEv")>;
1703
1704// ASIMD reciprocal step, D-form, FZ
1705// ASIMD reciprocal step, D-form, no FZ
1706// ASIMD reciprocal step, Q-form, FZ
1707// ASIMD reciprocal step, Q-form, no FZ
1708def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>;
1709
1710// ASIMD reverse
1711def : InstRW<[A64FXWrite_4Cyc_GI03],
1712            (instregex "^REV16v", "^REV32v", "^REV64v")>;
1713
1714// ASIMD table lookup, D-form
1715// ASIMD table lookup, Q-form
1716def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>;
1717
1718// ASIMD transfer, element to word or word
1719def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>;
1720
1721// ASIMD transfer, element to gen reg
1722def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>;
1723
1724// ASIMD transfer gen reg to element
1725def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>;
1726
1727// ASIMD transpose
1728def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v",
1729                                                 "^UZP1v", "^UZP2v")>;
1730
1731// ASIMD unzip/zip
1732def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>;
1733
1734//--
1735// 3.15 ASIMD Load Instructions
1736//--
1737
1738// ASIMD load, 1 element, multiple, 1 reg, D-form
1739// ASIMD load, 1 element, multiple, 1 reg, Q-form
1740def : InstRW<[A64FXWrite_8Cyc_GI56],
1741            (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>;
1742def : InstRW<[A64FXWrite_11Cyc_GI56],
1743            (instregex "^LD1Onev(16b|8h|4s)$")>;
1744def : InstRW<[A64FXWrite_LD108, WriteAdr],
1745            (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>;
1746def : InstRW<[A64FXWrite_LD109, WriteAdr],
1747            (instregex "^LD1Onev(16b|8h|4s)_POST$")>;
1748
1749// ASIMD load, 1 element, multiple, 2 reg, D-form
1750// ASIMD load, 1 element, multiple, 2 reg, Q-form
1751def : InstRW<[A64FXWrite_LD102],
1752            (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>;
1753def : InstRW<[A64FXWrite_LD103],
1754            (instregex "^LD1Twov(16b|8h|4s)$")>;
1755def : InstRW<[A64FXWrite_LD110, WriteAdr],
1756            (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>;
1757def : InstRW<[A64FXWrite_LD111, WriteAdr],
1758            (instregex "^LD1Twov(16b|8h|4s)_POST$")>;
1759
1760// ASIMD load, 1 element, multiple, 3 reg, D-form
1761// ASIMD load, 1 element, multiple, 3 reg, Q-form
1762def : InstRW<[A64FXWrite_LD104],
1763            (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>;
1764def : InstRW<[A64FXWrite_LD105],
1765            (instregex "^LD1Threev(16b|8h|4s)$")>;
1766def : InstRW<[A64FXWrite_LD112, WriteAdr],
1767            (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>;
1768def : InstRW<[A64FXWrite_LD113, WriteAdr],
1769            (instregex "^LD1Threev(16b|8h|4s)_POST$")>;
1770
1771// ASIMD load, 1 element, multiple, 4 reg, D-form
1772// ASIMD load, 1 element, multiple, 4 reg, Q-form
1773def : InstRW<[A64FXWrite_LD106],
1774            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>;
1775def : InstRW<[A64FXWrite_LD107],
1776            (instregex "^LD1Fourv(16b|8h|4s)$")>;
1777def : InstRW<[A64FXWrite_LD114, WriteAdr],
1778            (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>;
1779def : InstRW<[A64FXWrite_LD115, WriteAdr],
1780            (instregex "^LD1Fourv(16b|8h|4s)_POST$")>;
1781
1782// ASIMD load, 1 element, one lane, B/H/S
1783// ASIMD load, 1 element, one lane, D
1784def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>;
1785def : InstRW<[A64FXWrite_LD1I1, WriteAdr],
1786            (instregex "^LD1i(8|16|32|64)_POST$")>;
1787
1788// ASIMD load, 1 element, all lanes, D-form, B/H/S
1789// ASIMD load, 1 element, all lanes, D-form, D
1790// ASIMD load, 1 element, all lanes, Q-form
1791def : InstRW<[A64FXWrite_8Cyc_GI03],
1792            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1793def : InstRW<[A64FXWrite_LD108, WriteAdr],
1794            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1795
1796// ASIMD load, 2 element, multiple, D-form, B/H/S
1797// ASIMD load, 2 element, multiple, Q-form, D
1798def : InstRW<[A64FXWrite_LD103],
1799            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1800def : InstRW<[A64FXWrite_LD111, WriteAdr],
1801            (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1802
1803// ASIMD load, 2 element, one lane, B/H
1804// ASIMD load, 2 element, one lane, S
1805// ASIMD load, 2 element, one lane, D
1806def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>;
1807def : InstRW<[A64FXWrite_LD2I1, WriteAdr],
1808            (instregex "^LD2i(8|16|32|64)_POST$")>;
1809
1810// ASIMD load, 2 element, all lanes, D-form, B/H/S
1811// ASIMD load, 2 element, all lanes, D-form, D
1812// ASIMD load, 2 element, all lanes, Q-form
1813def : InstRW<[A64FXWrite_LD102],
1814            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1815def : InstRW<[A64FXWrite_LD110, WriteAdr],
1816            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1817
1818// ASIMD load, 3 element, multiple, D-form, B/H/S
1819// ASIMD load, 3 element, multiple, Q-form, B/H/S
1820// ASIMD load, 3 element, multiple, Q-form, D
1821def : InstRW<[A64FXWrite_LD105],
1822            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1823def : InstRW<[A64FXWrite_LD113, WriteAdr],
1824            (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1825
1826// ASIMD load, 3 element, one lone, B/H
1827// ASIMD load, 3 element, one lane, S
1828// ASIMD load, 3 element, one lane, D
1829def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>;
1830def : InstRW<[A64FXWrite_LD3I1, WriteAdr],
1831            (instregex "^LD3i(8|16|32|64)_POST$")>;
1832
1833// ASIMD load, 3 element, all lanes, D-form, B/H/S
1834// ASIMD load, 3 element, all lanes, D-form, D
1835// ASIMD load, 3 element, all lanes, Q-form, B/H/S
1836// ASIMD load, 3 element, all lanes, Q-form, D
1837def : InstRW<[A64FXWrite_LD104],
1838            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1839def : InstRW<[A64FXWrite_LD112, WriteAdr],
1840            (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1841
1842// ASIMD load, 4 element, multiple, D-form, B/H/S
1843// ASIMD load, 4 element, multiple, Q-form, B/H/S
1844// ASIMD load, 4 element, multiple, Q-form, D
1845def : InstRW<[A64FXWrite_LD107],
1846            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1847def : InstRW<[A64FXWrite_LD115, WriteAdr],
1848            (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1849
1850// ASIMD load, 4 element, one lane, B/H
1851// ASIMD load, 4 element, one lane, S
1852// ASIMD load, 4 element, one lane, D
1853def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>;
1854def : InstRW<[A64FXWrite_LD4I1, WriteAdr],
1855            (instregex "^LD4i(8|16|32|64)_POST$")>;
1856
1857// ASIMD load, 4 element, all lanes, D-form, B/H/S
1858// ASIMD load, 4 element, all lanes, D-form, D
1859// ASIMD load, 4 element, all lanes, Q-form, B/H/S
1860// ASIMD load, 4 element, all lanes, Q-form, D
1861def : InstRW<[A64FXWrite_LD106],
1862            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1863def : InstRW<[A64FXWrite_LD114, WriteAdr],
1864            (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1865
1866//--
1867// 3.16 ASIMD Store Instructions
1868//--
1869
1870// ASIMD store, 1 element, multiple, 1 reg, D-form
1871// ASIMD store, 1 element, multiple, 1 reg, Q-form
1872def : InstRW<[A64FXWrite_ST10],
1873            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1874def : InstRW<[A64FXWrite_ST14, WriteAdr],
1875            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1876
1877// ASIMD store, 1 element, multiple, 2 reg, D-form
1878// ASIMD store, 1 element, multiple, 2 reg, Q-form
1879def : InstRW<[A64FXWrite_ST11],
1880            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1881def : InstRW<[A64FXWrite_ST15, WriteAdr],
1882            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1883
1884// ASIMD store, 1 element, multiple, 3 reg, D-form
1885// ASIMD store, 1 element, multiple, 3 reg, Q-form
1886def : InstRW<[A64FXWrite_ST12],
1887            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1888def : InstRW<[A64FXWrite_ST16, WriteAdr],
1889            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1890
1891// ASIMD store, 1 element, multiple, 4 reg, D-form
1892// ASIMD store, 1 element, multiple, 4 reg, Q-form
1893def : InstRW<[A64FXWrite_ST13],
1894            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1895def : InstRW<[A64FXWrite_ST17, WriteAdr],
1896            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1897
1898// ASIMD store, 1 element, one lane, B/H/S
1899// ASIMD store, 1 element, one lane, D
1900def : InstRW<[A64FXWrite_ST10],
1901            (instregex "^ST1i(8|16|32|64)$")>;
1902def : InstRW<[A64FXWrite_ST14, WriteAdr],
1903            (instregex "^ST1i(8|16|32|64)_POST$")>;
1904
1905// ASIMD store, 2 element, multiple, D-form, B/H/S
1906// ASIMD store, 2 element, multiple, Q-form, B/H/S
1907// ASIMD store, 2 element, multiple, Q-form, D
1908def : InstRW<[A64FXWrite_ST11],
1909            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>;
1910def : InstRW<[A64FXWrite_ST15, WriteAdr],
1911            (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1912
1913// ASIMD store, 2 element, one lane, B/H/S
1914// ASIMD store, 2 element, one lane, D
1915def : InstRW<[A64FXWrite_ST11],
1916            (instregex "^ST2i(8|16|32|64)$")>;
1917def : InstRW<[A64FXWrite_ST15, WriteAdr],
1918            (instregex "^ST2i(8|16|32|64)_POST$")>;
1919
1920// ASIMD store, 3 element, multiple, D-form, B/H/S
1921// ASIMD store, 3 element, multiple, Q-form, B/H/S
1922// ASIMD store, 3 element, multiple, Q-form, D
1923def : InstRW<[A64FXWrite_ST12],
1924            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>;
1925def : InstRW<[A64FXWrite_ST16, WriteAdr],
1926            (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1927
1928// ASIMD store, 3 element, one lane, B/H
1929// ASIMD store, 3 element, one lane, S
1930// ASIMD store, 3 element, one lane, D
1931def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>;
1932def : InstRW<[A64FXWrite_ST16, WriteAdr],
1933            (instregex "^ST3i(8|16|32|64)_POST$")>;
1934
1935// ASIMD store, 4 element, multiple, D-form, B/H/S
1936// ASIMD store, 4 element, multiple, Q-form, B/H/S
1937// ASIMD store, 4 element, multiple, Q-form, D
1938def : InstRW<[A64FXWrite_ST13],
1939            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>;
1940def : InstRW<[A64FXWrite_ST17, WriteAdr],
1941            (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>;
1942
1943// ASIMD store, 4 element, one lane, B/H
1944// ASIMD store, 4 element, one lane, S
1945// ASIMD store, 4 element, one lane, D
1946def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>;
1947def : InstRW<[A64FXWrite_ST17, WriteAdr],
1948            (instregex "^ST4i(8|16|32|64)_POST$")>;
1949
1950// V8.1a Atomics (LSE)
1951def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1952            (instrs CASB, CASH, CASW, CASX)>;
1953
1954def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1955            (instrs CASAB, CASAH, CASAW, CASAX)>;
1956
1957def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1958            (instrs CASLB, CASLH, CASLW, CASLX)>;
1959
1960def : InstRW<[A64FXWrite_CAS, WriteAtomic],
1961            (instrs CASALB, CASALH, CASALW, CASALX)>;
1962
1963def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1964            (instrs LDLARB, LDLARH, LDLARW, LDLARX)>;
1965
1966def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1967            (instrs LDADDB, LDADDH, LDADDW, LDADDX)>;
1968
1969def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1970            (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>;
1971
1972def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1973            (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>;
1974
1975def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1976            (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>;
1977
1978def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1979            (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>;
1980
1981def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1982            (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>;
1983
1984def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1985            (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>;
1986
1987def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1988            (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>;
1989
1990def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1991            (instrs LDEORB, LDEORH, LDEORW, LDEORX)>;
1992
1993def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1994            (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>;
1995
1996def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
1997            (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>;
1998
1999def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2000            (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>;
2001
2002def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2003            (instrs LDSETB, LDSETH, LDSETW, LDSETX)>;
2004
2005def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2006            (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>;
2007
2008def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2009            (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>;
2010
2011def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2012            (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>;
2013
2014def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2015            (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX,
2016             LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX,
2017             LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX,
2018             LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>;
2019
2020def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2021            (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX,
2022             LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX,
2023             LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX,
2024             LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>;
2025
2026def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2027            (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX,
2028             LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX,
2029             LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX,
2030             LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>;
2031
2032def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic],
2033            (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX,
2034             LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX,
2035             LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX,
2036             LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>;
2037
2038def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2039            (instrs SWPB, SWPH, SWPW, SWPX)>;
2040
2041def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2042            (instrs SWPAB, SWPAH, SWPAW, SWPAX)>;
2043
2044def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2045            (instrs SWPLB, SWPLH, SWPLW, SWPLX)>;
2046
2047def : InstRW<[A64FXWrite_SWP, WriteAtomic],
2048            (instrs SWPALB, SWPALH, SWPALW, SWPALX)>;
2049
2050def : InstRW<[A64FXWrite_STUR, WriteAtomic],
2051            (instrs STLLRB, STLLRH, STLLRW, STLLRX)>;
2052
2053// SVE instructions
2054
2055// The modeling method for SVE instructions is more accurate than others.
2056// TODO: modify the model of other instructions similarly.
2057
2058def : InstRW<[A64FXWrite_4Cyc_GI0],
2059            (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z",
2060                       "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P",
2061                       "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI",
2062                       "^SUBR?_ZI")>;
2063
2064def : InstRW<[A64FXWrite_6Cyc_GI0],
2065            (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR",
2066                       "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z",
2067                       "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>;
2068
2069def : InstRW<[A64FXWrite_9Cyc_GI0],
2070            (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z",
2071                       "^INDEX_II_[SD]", "^MUL_ZI")>;
2072
2073def : InstRW<[A64FXWrite_4Cyc_GI3],
2074            (instregex "^CNT_Z")>;
2075
2076def : InstRW<[A64FXWrite_4Cyc_GI03],
2077            (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z",
2078                       "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z",
2079                       "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z",
2080                       "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_",
2081                       "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z",
2082                       "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z",
2083                       "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z",
2084                       "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>;
2085
2086def : InstRW<[A64FXWrite_9Cyc_GI03      ],
2087            (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP",
2088                       "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z",
2089                       "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z",
2090                       "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP",
2091                       "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>;
2092
2093def : InstRW<[A64FXWrite_3Cyc_GI1],
2094            (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P",
2095                       "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT",
2096                       "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)",
2097                       "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>;
2098
2099def : InstRW<[A64FXWrite_1Cyc_GI24],
2100            (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X",
2101                       "^RDVLI")>;
2102
2103def : InstRW<[A64FXWrite_11Cyc_GI5],
2104            (instregex "^LDR_[PZ]XI")>;
2105
2106def : InstRW<[A64FXWrite_11Cyc_GI56],
2107            (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>;
2108
2109def A64FXWrite_None : SchedWriteRes<[]> {
2110}
2111def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>;
2112
2113def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> {
2114  let Latency = 15;
2115  let NumMicroOps = 2;
2116  let ResourceCycles = [2];
2117}
2118def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>;
2119
2120def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> {
2121  let Latency = 5;
2122  let NumMicroOps = 2;
2123  let ResourceCycles = [2];
2124}
2125def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>;
2126
2127def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> {
2128  let Latency = 8;
2129  let NumMicroOps = 2;
2130}
2131def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>;
2132
2133def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> {
2134  let Latency = 46;
2135  let NumMicroOps = 10;
2136  let ResourceCycles = [10];
2137}
2138def : InstRW<[A64FXWrite_Reduction4CycB],
2139      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>;
2140
2141def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> {
2142  let Latency = 42;
2143  let NumMicroOps = 9;
2144  let ResourceCycles = [9];
2145}
2146def : InstRW<[A64FXWrite_Reduction4CycH],
2147      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>;
2148
2149def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> {
2150  let Latency = 38;
2151  let NumMicroOps = 8;
2152  let ResourceCycles = [8];
2153}
2154def : InstRW<[A64FXWrite_Reduction4CycS],
2155      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>;
2156
2157def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> {
2158  let Latency = 34;
2159  let NumMicroOps = 7;
2160  let ResourceCycles = [7];
2161}
2162def : InstRW<[A64FXWrite_Reduction4CycD],
2163      (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>;
2164
2165def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2166  let Latency = 29;
2167}
2168def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>;
2169
2170def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2171  let Latency = 4;
2172}
2173def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>;
2174
2175def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> {
2176  let Latency = 6;
2177}
2178def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>;
2179
2180def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2181  let Latency = 8;
2182}
2183def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>;
2184
2185def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> {
2186  let Latency = 2;
2187  let ResourceCycles = [2];
2188}
2189def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>;
2190
2191def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> {
2192  let Latency = 7;
2193  let NumMicroOps = 2;
2194}
2195def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>;
2196
2197def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> {
2198  let Latency = 12;
2199}
2200def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>;
2201
2202def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> {
2203  let Latency = 75;
2204  let NumMicroOps = 11;
2205  let ResourceCycles = [11];
2206}
2207def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>;
2208
2209def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> {
2210  let Latency = 60;
2211  let NumMicroOps = 9;
2212  let ResourceCycles = [9];
2213}
2214def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>;
2215
2216def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> {
2217  let Latency = 45;
2218  let NumMicroOps = 7;
2219  let ResourceCycles = [7];
2220}
2221def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>;
2222
2223def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> {
2224  let Latency = 468;
2225  let NumMicroOps = 63;
2226  let ResourceCycles = [63];
2227}
2228def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>;
2229
2230def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> {
2231  let Latency = 228;
2232  let NumMicroOps = 31;
2233  let ResourceCycles = [31];
2234}
2235def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>;
2236
2237def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> {
2238  let Latency = 108;
2239  let NumMicroOps = 15;
2240  let ResourceCycles = [15];
2241}
2242def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>;
2243
2244def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2245  let Latency = 15;
2246  let NumMicroOps = 2;
2247}
2248def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>;
2249
2250def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> {
2251  let Latency = 15;
2252  let NumMicroOps = 3;
2253  let ResourceCycles = [3];
2254}
2255def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>;
2256
2257def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> {
2258  let Latency = 134;
2259  let ResourceCycles = [134];
2260}
2261def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>;
2262
2263def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> {
2264  let Latency = 98;
2265  let ResourceCycles = [98];
2266}
2267def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>;
2268
2269def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> {
2270  let Latency = 154;
2271  let ResourceCycles = [154];
2272}
2273def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>;
2274
2275def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> {
2276  let Latency = 54;
2277  let NumMicroOps = 11;
2278  let ResourceCycles = [11];
2279}
2280def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>;
2281
2282def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> {
2283  let Latency = 44;
2284  let NumMicroOps = 9;
2285  let ResourceCycles = [9];
2286}
2287def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>;
2288
2289def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> {
2290  let Latency = 34;
2291  let NumMicroOps = 7;
2292  let ResourceCycles = [7];
2293}
2294def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>;
2295
2296def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2297  let Latency = 17;
2298  let NumMicroOps = 2;
2299  let ResourceCycles = [2, 2];
2300}
2301def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>;
2302
2303def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2304  let Latency = 13;
2305  let NumMicroOps = 1;
2306}
2307def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>;
2308
2309def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> {
2310  let Latency = 13;
2311  let NumMicroOps = 2;
2312  let ResourceCycles = [2];
2313}
2314def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>;
2315
2316def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> {
2317  let Latency = 17;
2318  let NumMicroOps = 3;
2319  let ResourceCycles = [2, 2, 1];
2320}
2321def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>;
2322
2323def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2324  let Latency = 17;
2325  let NumMicroOps = 2;
2326  let ResourceCycles = [2, 1];
2327}
2328def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>;
2329
2330def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> {
2331  let Latency = 10;
2332}
2333def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>;
2334
2335def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2336  let Latency = 25;
2337}
2338def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>;
2339
2340def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2341  let Latency = 19;
2342  let ResourceCycles = [2, 4, 4];
2343}
2344def : InstRW<[A64FXWrite_GLD_S_ZI],
2345      (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>;
2346
2347def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2348  let Latency = 16;
2349  let ResourceCycles = [1, 2, 2];
2350}
2351def : InstRW<[A64FXWrite_GLD_D_ZI],
2352      (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>;
2353
2354def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2355  let Latency = 23;
2356  let ResourceCycles = [2, 1, 4, 4];
2357}
2358def : InstRW<[A64FXWrite_GLD_S_RZ],
2359      (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>;
2360
2361def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2362  let Latency = 20;
2363  let ResourceCycles = [1, 1, 2, 2];
2364}
2365def : InstRW<[A64FXWrite_GLD_D_RZ],
2366      (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]",
2367                 "^GLD(FF)?1S?[BHW]_D$")>;
2368
2369def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> {
2370  let Latency = 15;
2371  let NumMicroOps = 3;
2372  let ResourceCycles = [9];
2373}
2374def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>;
2375
2376def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2377  let Latency = 11;
2378  let NumMicroOps = 2;
2379  let ResourceCycles = [2];
2380}
2381def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>;
2382
2383def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> {
2384  let Latency = 12;
2385  let NumMicroOps = 3;
2386  let ResourceCycles = [3];
2387}
2388def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>;
2389
2390def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> {
2391  let Latency = 15;
2392  let NumMicroOps = 4;
2393  let ResourceCycles = [13];
2394}
2395def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>;
2396
2397def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2398  let Latency = 11;
2399  let NumMicroOps = 3;
2400  let ResourceCycles = [3];
2401}
2402def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>;
2403
2404def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> {
2405  let Latency = 12;
2406  let NumMicroOps = 4;
2407  let ResourceCycles = [4];
2408}
2409def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>;
2410
2411def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> {
2412  let Latency = 15;
2413  let NumMicroOps = 5;
2414  let ResourceCycles = [17];
2415}
2416def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>;
2417
2418def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> {
2419  let Latency = 11;
2420  let NumMicroOps = 4;
2421  let ResourceCycles = [4];
2422}
2423def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>;
2424
2425def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> {
2426  let Latency = 12;
2427  let NumMicroOps = 5;
2428  let ResourceCycles = [5];
2429}
2430def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>;
2431
2432def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> {
2433}
2434def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>;
2435
2436def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2437  let ResourceCycles = [2, 1, 4];
2438}
2439def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>;
2440
2441def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2442  let ResourceCycles = [2, 4];
2443}
2444def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>;
2445
2446def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> {
2447  let ResourceCycles = [1, 1, 2];
2448}
2449def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>;
2450
2451def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2452  let ResourceCycles = [1, 2];
2453}
2454def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>;
2455
2456def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> {
2457  let Latency = 114;
2458  let ResourceCycles = [114];
2459}
2460def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>;
2461
2462def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> {
2463  let Latency = 178;
2464  let ResourceCycles = [178];
2465}
2466def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>;
2467
2468def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> {
2469  let Latency = 15;
2470  let NumMicroOps = 2;
2471}
2472def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>;
2473
2474def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> {
2475  let Latency = 2;
2476  let ResourceCycles = [2];
2477}
2478def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>;
2479
2480def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2481  let Latency = 6;
2482  let NumMicroOps = 2;
2483  let ResourceCycles = [3, 1];
2484}
2485def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>;
2486
2487def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> {
2488  let Latency = 12;
2489}
2490def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>;
2491
2492def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2493  let Latency = 11;
2494}
2495def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>;
2496
2497def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2498  let Latency = 20;
2499  let NumMicroOps = 8;
2500  let ResourceCycles = [8, 8, 8, 8];
2501}
2502def : InstRW<[A64FXWrite_SST1_W_RZ],
2503      (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>;
2504
2505def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> {
2506  let Latency = 20;
2507  let NumMicroOps = 4;
2508  let ResourceCycles = [4, 4, 4, 4];
2509}
2510def : InstRW<[A64FXWrite_SST1_D_RZ],
2511      (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>;
2512
2513def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2514  let Latency = 16;
2515  let NumMicroOps = 8;
2516  let ResourceCycles = [12, 8, 8];
2517}
2518def : InstRW<[A64FXWrite_SST1_W_ZI],
2519      (instregex "^SST1[BH]_S_I", "^SST1W_I")>;
2520
2521def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> {
2522  let Latency = 16;
2523  let NumMicroOps = 4;
2524  let ResourceCycles = [4, 4, 4];
2525}
2526def : InstRW<[A64FXWrite_SST1_D_ZI],
2527      (instregex "^SST1[BHW]_D_I", "^SST1D_I")>;
2528
2529def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2530  let Latency = 12;
2531  let NumMicroOps = 3;
2532  let ResourceCycles = [8, 9];
2533}
2534def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>;
2535
2536def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2537  let Latency = 11;
2538  let NumMicroOps = 2;
2539  let ResourceCycles = [2, 2];
2540}
2541def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>;
2542
2543def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2544  let Latency = 12;
2545  let NumMicroOps = 3;
2546  let ResourceCycles = [2, 3];
2547}
2548def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>;
2549
2550def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2551  let Latency = 15;
2552  let NumMicroOps = 4;
2553  let ResourceCycles = [12, 13];
2554}
2555def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>;
2556
2557def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2558  let Latency = 11;
2559  let NumMicroOps = 3;
2560  let ResourceCycles = [3, 3];
2561}
2562def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>;
2563
2564def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2565  let Latency = 12;
2566  let NumMicroOps = 4;
2567  let ResourceCycles = [3, 4];
2568}
2569def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>;
2570
2571def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2572  let Latency = 15;
2573  let NumMicroOps = 5;
2574  let ResourceCycles = [16, 17];
2575}
2576def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>;
2577
2578def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2579  let Latency = 11;
2580  let NumMicroOps = 4;
2581  let ResourceCycles = [4, 4];
2582}
2583def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>;
2584
2585def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> {
2586  let Latency = 12;
2587  let NumMicroOps = 5;
2588  let ResourceCycles = [4, 5];
2589}
2590def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>;
2591
2592def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2593  let Latency = 11;
2594}
2595def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>;
2596
2597def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> {
2598  let Latency = 11;
2599}
2600def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>;
2601
2602def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2603  let Latency = 4;
2604}
2605def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>;
2606
2607def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> {
2608  let Latency = 3;
2609  let NumMicroOps = 2;
2610}
2611def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>;
2612
2613} // SchedModel = A64FXModel
2614