xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedOryon.td (revision 5036d9652a5701d00e9e40ea942c278e9f77d33d)
1//=- AArch64SchedOryon.td - Qualcomm Oryon CPU 001 ---*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the scheduling model for Qualcomm Oryon
10// family of processors.
11//
12//===----------------------------------------------------------------------===//
13
14//===----------------------------------------------------------------------===//
15// Pipeline Description.
16
17def OryonModel : SchedMachineModel {
18  let IssueWidth            =  14;
19  let MicroOpBufferSize     = 376;
20  let LoadLatency           =   4;
21  let MispredictPenalty     =  13; // 13 cycles for mispredicted branch.
22  let LoopMicroOpBufferSize =   0; // Do not have a LoopMicroOpBuffer
23  let PostRAScheduler       =   1; // Using PostRA sched.
24  let CompleteModel         =   1;
25
26  list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F,
27                                                    SMEUnsupported.F,
28                                                    MTEUnsupported.F,
29                                                    PAUnsupported.F,
30                                                    [HasPAuth, HasCSSC]);
31}
32
33let SchedModel = OryonModel in {
34
35// Issue ports.
36// IXU has 6 ports p0 ~ p5
37// LSU has 4 ports p6 ~ p9(ls0 ~ ls3), p10/p11(std0, std1) has to work with ls0~ls3
38// VXU has 4 ports p12 ~ p15
39
40// cross IXU/LSU/VXU resource group for FMOV P41 of VXU
41// I2V
42def ORYONI4FP0 : ProcResource<1>;
43def ORYONI5FP1 : ProcResource<1>;
44// V2I
45def ORYONFP0I4 : ProcResource<1>;
46def ORYONFP1I5 : ProcResource<1>;
47
48// store 1 for normal store instructions
49def ORYONST0 : ProcResource<1>;
50// store 2 for normal store instructions
51def ORYONST1 : ProcResource<1>;
52
53// Port 0: ALU/Indirect/Direct Branch.
54def ORYONP0 : ProcResource<1>;
55
56// Port 1: ALU/Direct Branch.
57def ORYONP1 : ProcResource<1>;
58
59// Port 2: ALU.
60def ORYONP2 : ProcResource<1>;
61
62// Port 3: ALU.
63def ORYONP3 : ProcResource<1>;
64
65// Port 4: ALU.
66def ORYONP4 : ProcResource<1> {
67    let Super = ORYONI4FP0;
68    let Super = ORYONFP0I4; }
69
70// Port 5: ALU.
71def ORYONP5 : ProcResource<1> {
72    let Super = ORYONI5FP1;
73    let Super = ORYONFP1I5; }
74
75// Port 6: Load/Store. LS0
76def ORYONP6 : ProcResource<1> {
77    let Super = ORYONST0; }
78
79// Port 7: Load/store. LS1
80def ORYONP7 : ProcResource<1> {
81    let Super = ORYONST0; }
82
83// Port 8: Load/Store. LS2
84def ORYONP8 : ProcResource<1> {
85    let Super = ORYONST1; }
86
87// Port 9: Load/store. LS3
88def ORYONP9 : ProcResource<1> {
89    let Super = ORYONST1; }
90
91// Port 10: Load/Store. STD0
92def ORYONP10SD0 : ProcResource<1> {
93    let Super = ORYONST0; }
94
95// Port 11: Load/store. STD1
96def ORYONP11SD1 : ProcResource<1> {
97    let Super = ORYONST1; }
98
99// Port 12: FP/Neon/SIMD/Crypto.
100def ORYONP12FP0 : ProcResource<1> {
101    let Super = ORYONI4FP0;
102    let Super = ORYONFP0I4; }
103
104// Port 13: FP/Neon/SIMD/Crypto.
105def ORYONP13FP1 : ProcResource<1> {
106    let Super = ORYONI5FP1;
107    let Super = ORYONFP1I5; }
108
109// Port 14: FP/Neon/SIMD/Crypto.
110def ORYONP14FP2 : ProcResource<1>;
111
112// Port 15: FP/Neon/SIMD/Crypto.
113def ORYONP15FP3 : ProcResource<1>;
114
115// Define groups for the functional units on each issue port.  Each group
116// created will be used by a WriteRes.
117
118// Integer add/shift/logical/misc. instructions on port I0/I1/I2/I3/I4/I5.
119def ORYONI012345 : ProcResGroup<[ORYONP0, ORYONP1, ORYONP2,
120                                  ORYONP3, ORYONP4, ORYONP5]> {
121  let BufferSize = 120;
122}
123
124// Direct Conditional Branch instructions on ports I0/I1.
125def ORYONI01 : ProcResGroup<[ORYONP0, ORYONP1]> {
126  let BufferSize = 40;
127}
128
129// Indirect/crypto Conditional Branch instructions on ports I0.
130def ORYONI0 : ProcResGroup<[ORYONP0]> {
131  let BufferSize = 20;
132}
133
134// Crypto/CRC/PAU instructions on ports I2.
135def ORYONI2 : ProcResGroup<[ORYONP2]> {
136  let BufferSize = 20;
137}
138
139// Multiply/Multiply-ADD instructions on ports I4/I5.
140def ORYONI45 : ProcResGroup<[ORYONP4, ORYONP5]> {
141  let BufferSize = 40;
142}
143
144// Divide instructions on ports I5.
145def ORYONI5 : ProcResGroup<[ORYONP5]> {
146  let BufferSize = 20;
147}
148
149// Comparison instructions on ports I0/I1/I2/I3.
150def ORYONI0123 : ProcResGroup<[ORYONP0, ORYONP1,
151                                ORYONP2, ORYONP3]> {
152  let BufferSize = 80;
153}
154
155// Load instructions on ports P6/P7/P8/P9.
156def ORYONLD : ProcResGroup<[ORYONP6, ORYONP7, ORYONP8, ORYONP9]> {
157  let BufferSize = 64;
158}
159
160// Store instructions on combo of STA/STD pipes
161def ORYONST : ProcResGroup<[ORYONST0, ORYONST1]> {
162    let BufferSize = 64;
163}
164
165// Arithmetic and CRYP-AED ASIMD/FP instructions on ports FP0/FP1/FP2/FP3.
166def ORYONFP0123 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1,
167                                   ORYONP14FP2, ORYONP15FP3]> {
168  let BufferSize = 192;
169}
170
171// FP Comparison and F/I move instructions on ports FP0/FP1.
172def ORYONFP01 : ProcResGroup<[ORYONP12FP0, ORYONP13FP1]> {
173  let BufferSize = 96;
174}
175
176// FDIV instructions on ports FP3.
177def ORYONFP3 : ProcResGroup<[ORYONP15FP3]> {
178  let BufferSize = 48;
179}
180
181// CRYP-SHA instructions on ports FP1.
182def ORYONFP1 : ProcResGroup<[ORYONP14FP2]> {
183  let BufferSize = 48;
184}
185
186def ORYONFP2 : ProcResGroup<[ORYONP14FP2]> {
187  let BufferSize = 48;
188}
189
190// Reciprocal, Squre root on FP0.
191def ORYONFP0 : ProcResGroup<[ORYONP12FP0]> {
192  let BufferSize = 48;
193}
194
195// cross IXU/LSU/VXU resource group for FMOV P41 of VXU
196// I2V
197def ORYONI2V : ProcResGroup<[ORYONI4FP0, ORYONI5FP1]> {
198    let BufferSize = 40;
199}
200
201// V2I
202def ORYONV2I : ProcResGroup<[ORYONFP0I4, ORYONFP1I5]> {
203    let BufferSize = 96;
204}
205
206// Define commonly used write types for InstRW specializations.
207// All definitions follow the format: ORYONWrite_<NumCycles>Cyc_<Resources>.
208
209// Because of the complexity of Oryon CPU, we skip the following
210// generic definitions and define each instruction specifically
211
212// These WriteRes entries are not used in the Falkor sched model.
213def : WriteRes<WriteImm, []>     { let Unsupported = 1; }
214def : WriteRes<WriteI, []>       { let Unsupported = 1; }
215def : WriteRes<WriteISReg, []>   { let Unsupported = 1; }
216def : WriteRes<WriteIEReg, []>   { let Unsupported = 1; }
217def : WriteRes<WriteExtr, []>    { let Unsupported = 1; }
218def : WriteRes<WriteIS, []>      { let Unsupported = 1; }
219def : WriteRes<WriteID32, []>    { let Unsupported = 1; }
220def : WriteRes<WriteID64, []>    { let Unsupported = 1; }
221def : WriteRes<WriteIM32, []>    { let Unsupported = 1; }
222def : WriteRes<WriteIM64, []>    { let Unsupported = 1; }
223def : WriteRes<WriteBr, []>      { let Unsupported = 1; }
224def : WriteRes<WriteBrReg, []>   { let Unsupported = 1; }
225def : WriteRes<WriteLD, []>      { let Unsupported = 1; }
226def : WriteRes<WriteST, []>      { let Unsupported = 1; }
227def : WriteRes<WriteSTP, []>     { let Unsupported = 1; }
228def : WriteRes<WriteAdr, []>     { let Unsupported = 1; }
229def : WriteRes<WriteLDIdx, []>   { let Unsupported = 1; }
230def : WriteRes<WriteSTIdx, []>   { let Unsupported = 1; }
231def : WriteRes<WriteF, []>       { let Unsupported = 1; }
232def : WriteRes<WriteFCmp, []>    { let Unsupported = 1; }
233def : WriteRes<WriteFCvt, []>    { let Unsupported = 1; }
234def : WriteRes<WriteFCopy, []>   { let Unsupported = 1; }
235def : WriteRes<WriteFImm, []>    { let Unsupported = 1; }
236def : WriteRes<WriteFMul, []>    { let Unsupported = 1; }
237def : WriteRes<WriteFDiv, []>    { let Unsupported = 1; }
238def : WriteRes<WriteVd, []>      { let Unsupported = 1; }
239def : WriteRes<WriteVq, []>      { let Unsupported = 1; }
240def : WriteRes<WriteVLD, []>     { let Unsupported = 1; }
241def : WriteRes<WriteVST, []>     { let Unsupported = 1; }
242def : WriteRes<WriteSys, []>     { let Unsupported = 1; }
243def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
244def : WriteRes<WriteHint, []>    { let Unsupported = 1; }
245def : WriteRes<WriteLDHi, []>    { let Unsupported = 1; }
246def : WriteRes<WriteAtomic, []>  { let Unsupported = 1; }
247
248// These ReadAdvance entries will be defined in later implementation
249def : ReadAdvance<ReadI,       0>;
250def : ReadAdvance<ReadISReg,   0>;
251def : ReadAdvance<ReadIEReg,   0>;
252def : ReadAdvance<ReadIM,      0>;
253def : ReadAdvance<ReadIMA,     0>;
254def : ReadAdvance<ReadID,      0>;
255def : ReadAdvance<ReadExtrHi,  0>;
256def : ReadAdvance<ReadAdrBase, 0>;
257def : ReadAdvance<ReadVLD,     0>;
258def : ReadAdvance<ReadST,      0>;
259
260
261//IXU resource definition
262// 1 cycles NO pipe
263def ORYONWrite_1Cyc_NONE : SchedWriteRes<[]>;
264
265// 1 cycles on I01.
266def ORYONWrite_1Cyc_I01 : SchedWriteRes<[ORYONI01]>;
267
268def ORYONWrite_1Cyc_2Uops_I01 : SchedWriteRes<[ORYONI01]> {
269  let NumMicroOps = 2;
270}
271
272def ORYONWrite_1Cyc_I0 : SchedWriteRes<[ORYONI0]>;
273
274// 7 cycles on I2. PAC*/AUT* instructions
275def ORYONWrite_7Cyc_I2 : SchedWriteRes<[ORYONI2]> {
276  let Latency = 7;
277}
278
279// 7 cycles on I2. PAC*/AUT* instructions
280def ORYONWrite_7Cyc_3Uops_I2 : SchedWriteRes<[ORYONI2]> {
281  let Latency = 7;
282  let NumMicroOps = 3;
283}
284
285// 9 (7+1+1) cycles on I2 and I0/I1, I0. Authentication branch instructions
286// these instructions are broken down to three uops
287// a.	PtrAuth on pipe 2 taking 7 cycles
288// b.	Link Register Update on pipes 0 and 1 taking 1 cycle
289// c.	Indirect branch on pipe 0 taking 1 cycle
290
291def ORYONWrite_9Cyc_I012 : SchedWriteRes<[ORYONI2, ORYONI01]> {
292  let Latency = 9;
293  let NumMicroOps = 3;
294}
295
296// 3 cycles on I2. CRC32 and CRC32C instructions
297def ORYONWrite_3Cyc_I2 : SchedWriteRes<[ORYONI2]> {
298  let Latency = 3;
299}
300
301// 1 cycle on I012345
302def ORYONWrite_1Cyc_I012345 : SchedWriteRes<[ORYONI012345]>;
303
304// 1 cycle on I0123
305def ORYONWrite_1Cyc_I0123 : SchedWriteRes<[ORYONI0123]>;
306
307// 1 cycle on 2 of I012345
308def ORYONWrite_1Cyc_I012345_I012345 :
309SchedWriteRes<[ORYONI012345, ORYONI012345]> ;
310
311// 2 cycle on 2 of I0123 with ReleaseAtCycles
312def ORYONWrite_2Cyc_I0123_I0123_RC :
313SchedWriteRes<[ORYONI0123, ORYONI0123]> {
314  let Latency = 2;
315  let ReleaseAtCycles = [2,2];
316}
317
318// 2 cycle on 2 of I012345
319def ORYONWrite_2Cyc_I012345_I012345_RC :
320SchedWriteRes<[ORYONI012345, ORYONI012345]> {
321  let Latency = 2;
322  let ReleaseAtCycles = [2,2];
323}
324
325// 3 cycle on 2 of I45
326def ORYONWrite_3Cyc_I45_I45_RC :
327SchedWriteRes<[ORYONI45, ORYONI45]> {
328  let Latency = 3;
329  let ReleaseAtCycles = [2,2];
330}
331
332// 3 cycle on I45
333def ORYONWrite_3Cyc_I45 : SchedWriteRes<[ORYONI45]> {
334  let Latency = 3;
335}
336
337// 7 cycle on I2 32-bit integer division
338def ORYONWrite_7Cyc_I2_RC : SchedWriteRes<[ORYONI2]> {
339  let Latency = 7;
340  let ReleaseAtCycles = [2];
341}
342
343// 9 cycle on I2 64-bit integer division
344def ORYONWrite_9Cyc_I2_RC : SchedWriteRes<[ORYONI2]> {
345  let Latency = 9;
346  let ReleaseAtCycles = [2];
347}
348
349// LSU resource definition
350// need to define WriteLDAdr, WriteAdrAdr, WriteLDHi, WriteSTX
351// 4 cycle on LS(P6789)
352def ORYONWrite_4Cyc_LD : SchedWriteRes<[ORYONLD]> {
353  let Latency = 4;
354}
355
356// 4 cycle for Post/Pre inc/dec access, also covers all pair loads Post/Pre
357def ORYONWrite_4Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
358  let Latency = 4;
359}
360
361// 5 (4+1) for VXU SIMD access/could also include FP
362// resource might not be correct, as VXU resource not included
363def ORYONWrite_5Cyc_LD : SchedWriteRes<[ORYONLD]> {
364  let Latency = 5;
365}
366
367def ORYONWrite_5Cyc_2Uops_LD : SchedWriteRes<[ORYONLD]> {
368  let Latency = 5;
369  let NumMicroOps = 2;
370}
371
372def ORYONWrite_5Cyc_3Uops_LD : SchedWriteRes<[ORYONLD]> {
373  let Latency = 5;
374  let NumMicroOps = 3;
375}
376
377def ORYONWrite_5Cyc_4Uops_LD : SchedWriteRes<[ORYONLD]> {
378  let Latency = 5;
379  let NumMicroOps = 4;
380}
381
382def ORYONWrite_5Cyc_5Uops_LD : SchedWriteRes<[ORYONLD]> {
383  let Latency = 5;
384  let NumMicroOps = 5;
385}
386
387def ORYONWrite_5Cyc_6Uops_LD : SchedWriteRes<[ORYONLD]> {
388  let Latency = 5;
389  let NumMicroOps = 6;
390}
391
392def ORYONWrite_5Cyc_8Uops_LD : SchedWriteRes<[ORYONLD]> {
393  let Latency = 5;
394  let NumMicroOps = 8;
395}
396
397def ORYONWrite_5Cyc_10Uops_LD : SchedWriteRes<[ORYONLD]> {
398  let Latency = 5;
399  let NumMicroOps = 10;
400}
401
402// 6 cycle for Post/Pre inc/dec access
403def ORYONWrite_5Cyc_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
404  let Latency = 5;
405}
406
407def ORYONWrite_5Cyc_2Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
408  let Latency = 5;
409  let NumMicroOps = 2;
410}
411
412def ORYONWrite_5Cyc_3Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
413  let Latency = 5;
414  let NumMicroOps = 3;
415}
416
417def ORYONWrite_5Cyc_4Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
418  let Latency = 5;
419  let NumMicroOps = 4;
420}
421
422def ORYONWrite_5Cyc_5Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
423  let Latency = 5;
424  let NumMicroOps = 5;
425}
426
427def ORYONWrite_5Cyc_6Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
428  let Latency = 5;
429  let NumMicroOps = 6;
430}
431
432def ORYONWrite_5Cyc_8Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
433  let Latency = 5;
434  let NumMicroOps = 8;
435}
436
437def ORYONWrite_5Cyc_10Uops_LD_I012345 : SchedWriteRes<[ORYONLD, ORYONI012345]> {
438  let Latency = 5;
439  let NumMicroOps = 10;
440}
441
442// 1 cycle for all generic stores
443def ORYONWrite_1Cyc_ST : SchedWriteRes<[ORYONST]>;
444
445def ORYONWrite_1Cyc_2Uops_ST : SchedWriteRes<[ORYONST]> {
446  let NumMicroOps = 2;
447}
448
449def ORYONWrite_1Cyc_3Uops_ST : SchedWriteRes<[ORYONST]> {
450  let NumMicroOps = 3;
451}
452
453def ORYONWrite_1Cyc_4Uops_ST : SchedWriteRes<[ORYONST]> {
454  let NumMicroOps = 4;
455}
456
457def ORYONWrite_1Cyc_5Uops_ST : SchedWriteRes<[ORYONST]> {
458  let NumMicroOps = 5;
459}
460
461def ORYONWrite_1Cyc_6Uops_ST : SchedWriteRes<[ORYONST]> {
462  let NumMicroOps = 6;
463}
464
465def ORYONWrite_1Cyc_8Uops_ST : SchedWriteRes<[ORYONST]> {
466  let NumMicroOps = 8;
467}
468
469def ORYONWrite_1Cyc_10Uops_ST : SchedWriteRes<[ORYONST]> {
470  let NumMicroOps = 10;
471}
472
473// 1 cycle for neon write: float + ASIMD with Post/Pre Inc/Dec access
474// also includes Pair store until further informed
475def ORYONWrite_1Cyc_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
476  let NumMicroOps = 3;
477}
478
479def ORYONWrite_1Cyc_2Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
480  let NumMicroOps = 2;
481}
482
483def ORYONWrite_1Cyc_3Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
484  let NumMicroOps = 3;
485}
486
487def ORYONWrite_1Cyc_4Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
488  let NumMicroOps = 4;
489}
490
491def ORYONWrite_1Cyc_5Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
492  let NumMicroOps = 5;
493}
494
495def ORYONWrite_1Cyc_6Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
496  let NumMicroOps = 6;
497}
498
499def ORYONWrite_1Cyc_8Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
500  let NumMicroOps = 8;
501}
502
503def ORYONWrite_1Cyc_10Uops_ST_I012345 : SchedWriteRes<[ORYONST, ORYONI012345]> {
504  let NumMicroOps = 10;
505}
506
507// VXU resource definition
508
509// I2V instruction has 1 uOp
510// I2v with convert has 2 uOps
511// all I2V, V2I's throughputs are 2
512// On VXU doc, p37 -- latencies and throughput
513// P41, resource taken, P42, uOps
514def ORYONWrite_I2V_4Cyc_I45 : SchedWriteRes<[ORYONI2V]> {
515  let Latency = 4;
516}
517
518// inline a FCVT, so add one more uOp
519def ORYONWrite_I2V_7Cyc_I45 : SchedWriteRes<[ORYONI2V]> {
520  let Latency = 7;
521  let NumMicroOps = 2;
522}
523
524// V2I move instruction has 1/2 uOps, P42 in VXU doc
525// Latency is 3, FCVT is also 3 cycle
526// move + convert is 6 (3+3) cycles
527// throughput is 2
528def ORYONWrite_V2I_3Cyc_FP01 : SchedWriteRes<[ORYONV2I]> {
529  let Latency = 3;
530}
531
532// inline a FCVT, so add one more uOp
533def ORYONWrite_V2I_6Cyc_FP01 : SchedWriteRes<[ORYONV2I]> {
534  let Latency = 6;
535  let NumMicroOps = 2;
536}
537
538def ORYONWrite_V2V_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
539  let Latency = 2;
540}
541
542def ORYONWrite_V2V_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
543  let Latency = 3;
544}
545
546def ORYONWrite_V2V_6Cyc_FP01 : SchedWriteRes<[ORYONFP0123]> {
547  let Latency = 6;
548  let NumMicroOps = 3;
549}
550
551def ORYONWrite_4Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
552  let Latency = 4;
553}
554
555def ORYONWrite_3Cyc_FP0 : SchedWriteRes<[ORYONFP0]> {
556  let Latency = 3;
557}
558
559def ORYONWrite_3Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
560  let Latency = 3;
561}
562
563def ORYONWrite_3Cyc_2Uops_FP0123 : SchedWriteRes<[ORYONFP0123]> {
564  let Latency = 3;
565  let NumMicroOps = 2;
566}
567
568def ORYONWrite_2Cyc_FP0123 : SchedWriteRes<[ORYONFP0123]> {
569  let Latency = 2;
570}
571
572def ORYONWrite_2Cyc_FP01 : SchedWriteRes<[ORYONFP01]> {
573  let Latency = 2;
574}
575
576// 2 cycle on FP1
577def ORYONWrite_2Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
578  let Latency = 2;
579}
580
581// 3 cycle on FP1
582def ORYONWrite_3Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
583  let Latency = 3;
584}
585
586// 4 cycle , 0.5 throughput on FP1
587def ORYONWrite_4Cyc_FP1_RC4 : SchedWriteRes<[ORYONFP1]> {
588  let Latency = 4;
589  let ReleaseAtCycles = [4];
590}
591
592// 5 cycle , 1 throughput on FP1
593def ORYONWrite_5Cyc_FP1 : SchedWriteRes<[ORYONFP1]> {
594  let Latency = 5;
595}
596
597// 8 cycle , 2 throughput on FP0123
598def ORYONWrite_8Cyc_FP0123_RC : SchedWriteRes<[ORYONFP0123]> {
599  let Latency = 8;
600  let ReleaseAtCycles = [2];
601}
602
603def ORYONWrite_6Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
604  let Latency = 6;
605}
606
607def ORYONWrite_7Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
608  let Latency = 7;
609}
610
611def ORYONWrite_8Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
612  let Latency = 8;
613}
614
615def ORYONWrite_9Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
616  let Latency = 9;
617}
618
619def ORYONWrite_10Cyc_FP3 : SchedWriteRes<[ORYONFP3]> {
620  let Latency = 10;
621}
622
623def ORYONWrite_8Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
624  let Latency = 8;
625  let ReleaseAtCycles = [2];
626}
627
628def ORYONWrite_10Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
629  let Latency = 10;
630  let ReleaseAtCycles = [2];
631}
632
633def ORYONWrite_13Cyc_FP3_RC : SchedWriteRes<[ORYONFP3]> {
634  let Latency = 13;
635  let ReleaseAtCycles = [2];
636}
637
638def ORYONWrite_4Cyc_FP0123_RC :
639SchedWriteRes<[ORYONFP0123]> {
640  let Latency = 4;
641  let ReleaseAtCycles = [2];
642}
643
644def ORYONWrite_4Cyc_FP0123_FP0123_RC :
645SchedWriteRes<[ORYONFP0123, ORYONFP0123]> {
646  let Latency = 4;
647  let NumMicroOps = 2;
648  let ReleaseAtCycles = [2,2];
649}
650
651def ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC :
652SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123]> {
653  let Latency = 4;
654  let NumMicroOps = 3;
655  let ReleaseAtCycles = [3,3,3];
656}
657
658def ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC :
659SchedWriteRes<[ORYONFP0123, ORYONFP0123, ORYONFP0123, ORYONFP0123]> {
660  let Latency = 6;
661  let NumMicroOps = 4;
662  let ReleaseAtCycles = [6,6,6,6];
663}
664
665//===----------------------------------------------------------------------===//
666// Instruction Tables in IXU
667//===----------------------------------------------------------------------===//
668
669//---
670// Arithmetic Instructions
671//---
672
673//1, 1, 6
674def : InstRW<[ORYONWrite_1Cyc_I012345],
675            (instregex "^ADD(W|X)r(i|r|x)", "^SUB(W|X)r(i|r|x)")>;
676
677//2,2,3
678def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC],
679            (instregex "^ADD(W|X)rs", "^SUB(W|X)rs")>;
680
681//1,1,4 alias CMP, CMN on page 75
682def : InstRW<[ORYONWrite_1Cyc_I0123],
683            (instregex "^ADDS(W|X)r(i|r|x)(64)?", "^SUBS(W|X)r(i|r|x)")>;
684
685//2,2,2 alias CMP, CMN on page 75
686def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC],
687            (instregex "^ADDS(W|X)rs", "^SUBS(W|X)rs")>;
688
689//1,1,4
690def : InstRW<[ORYONWrite_1Cyc_I0123],
691            (instregex "^ADC(W|X)r","^SBC(W|X)r",
692                       "^ADCS(W|X)r","^SBCS(W|X)r")>;
693
694//1,1,2
695def : InstRW<[ORYONWrite_1Cyc_2Uops_I01],
696            (instrs ADR,ADRP)>;
697
698//1,1,4
699def : InstRW<[ORYONWrite_1Cyc_I0123],
700            (instregex "^CSEL(W|X)r", "^CSINV(W|X)r",
701                       "^CSNEG(W|X)r", "^CSINC(W|X)r")>;
702
703//---
704//Compare Instruciton
705//---
706
707// We have CCMP, CCMN as LLVM DAG node
708// CMP is an alias of SUBS as above
709// CMN is an alias of ADDS as above
710// We also have no way to get shift compare node in LLVM
711//2,2,1.5 CMP, CMN
712
713//1,1,4
714def : InstRW<[ORYONWrite_1Cyc_I0123],
715            (instregex "^CCMP(W|X)(i|r)", "^CCMN(W|X)(i|r)")>;
716
717//---
718// Branch
719//---
720
721def : InstRW<[ORYONWrite_1Cyc_NONE], (instrs B)>;
722def : InstRW<[ORYONWrite_1Cyc_I01], (instrs BL)>;
723def : InstRW<[ORYONWrite_1Cyc_I01],
724            (instrs Bcc, CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>;
725def : InstRW<[ORYONWrite_1Cyc_I0], (instrs BR, BLR)>;
726def : InstRW<[ORYONWrite_1Cyc_I0], (instrs RET)>;
727
728// 3 uOp, 1 cycle for branch, 7 cycle for Authentication,
729// 1 cycle for updating link register
730// V8.3a PAC
731def : InstRW<[ORYONWrite_9Cyc_I012],
732            (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ,
733                    BRAA, BRAAZ, BRAB, BRABZ)>;
734def : InstRW<[ORYONWrite_9Cyc_I012], (instrs RETAA, RETAB, ERETAA, ERETAB)>;
735
736def : InstRW<[ORYONWrite_7Cyc_3Uops_I2], (instregex "^LDRAA", "^LDRAB")>;
737
738// Logical Instructions
739//---
740
741//1,1,4 TST is an alias of ANDS
742def : InstRW<[ORYONWrite_1Cyc_I0123],
743            (instregex "^ANDS(W|X)r(i|r|x)", "^BICS(W|X)r(i|r|x)")>;
744
745//2,2,2 TST shift is an alias
746def : InstRW<[ORYONWrite_2Cyc_I0123_I0123_RC],
747            (instregex "^ANDS(W|X)rs", "^BICS(W|X)rs")>;
748
749//1,1,6
750def : InstRW<[ORYONWrite_1Cyc_I012345],
751            (instregex "^AND(W|X)r(i|r|x)", "^EOR(W|X)r(i|r|x)",
752                       "^ORR(W|X)r(i|r|x)", "^BIC(W|X)r(i|r|x)",
753                       "^EON(W|X)r(i|r|x)", "^ORN(W|X)r(i|r|x)")>;
754
755//2,2,3
756def : InstRW<[ORYONWrite_2Cyc_I012345_I012345_RC],
757            (instregex "^AND(W|X)rs", "^EOR(W|X)rs", "^ORR(W|X)rs",
758                       "^BIC(W|X)rs", "^EON(W|X)rs", "^ORN(W|X)rs")>;
759
760
761//---
762// Shift Instructions
763//---
764
765//1,1,6
766def : InstRW<[ORYONWrite_1Cyc_I012345],
767            (instregex "^ASRV(W|X)r", "^LSLV(W|X)r",
768                       "^LSRV(W|X)r", "^RORV(W|X)r",
769                       "RMIF")>;
770
771//---
772// Move-Data Bit-field and Sign_Extension Instructions
773//---
774
775//1,1,6
776def : InstRW<[ORYONWrite_1Cyc_I012345],
777            (instregex "^MOVK(W|X)i", "^MOVN(W|X)i",
778                       "^MOVZ(W|X)i", "^SBFM(W|X)ri",
779                       "^UBFM(W|X)ri", "^BFM(W|X)ri",
780                       "^SXT(W|B|H|X)", "^UXT(H|B)")>;
781
782// COPY instruction is an LLVM internal DAG node, needs further study
783def : InstRW<[ORYONWrite_1Cyc_I012345], (instrs COPY)>;
784
785//---
786// Reverse Instructions
787//---
788
789//1,1,6
790def : InstRW<[ORYONWrite_1Cyc_I012345],
791            (instregex "^RBIT(W|X)r", "^REV(16|32|64)?(W|X)r")>;
792
793
794//---
795// Flag Manipulate Instructions
796//---
797
798//1,1,4
799def : InstRW<[ORYONWrite_1Cyc_I0123],
800            (instregex "^SETF8", "^SETF16", "^CFINV")>;
801
802//---
803// Miscellaneous Instructions
804//---
805
806//1,1,6
807def : InstRW<[ORYONWrite_1Cyc_I012345],
808              (instregex "^CLS(W|X)r$", "^CLZ(W|X)r$", "^EXTR(W|X)rri")>;
809
810
811//---
812// Multiply Instructions
813//---
814
815//1,3,2
816def : InstRW<[ORYONWrite_3Cyc_I45],
817            (instregex "^MADD(W|X)rrr", "^MSUB(W|X)rrr",
818                       "^(S|U)MADDLrrr", "^(S|U)MSUBLrrr",
819                       "^(S|U)MULHrr")>;
820
821//---
822// Divide Instructions
823//---
824
825def : InstRW<[ORYONWrite_7Cyc_I2_RC],
826             (instregex "^(S|U)DIVWr")>;
827
828def : InstRW<[ORYONWrite_9Cyc_I2_RC],
829             (instregex "^(S|U)DIVXr")>;
830
831
832//---
833// Cryptgraphy Instructions
834//
835//1,3,1  on I2
836def : InstRW<[ORYONWrite_3Cyc_I2],
837            (instregex "^CRC32(B|H|W|X)rr", "^CRC32C(B|H|W|X)rr")>;
838
839//---
840// PAU instructions
841//---
842
843// on p47 of IXU document, we have 7 cycles for all PAU instructions
844// here we just assume all signing and pauth instructions are 7 cycles
845// assume all are 7 cycles here
846
847// signing instrucitons
848def : InstRW<[ORYONWrite_7Cyc_I2], (instrs PACIA, PACIB,
849                                            PACDA, PACDB,
850                                            PACIZA, PACIZB,
851                                            PACDZA, PACDZB,
852                                            PACGA)>;
853// authentication instrucitons
854def : InstRW<[ORYONWrite_7Cyc_I2], (instrs AUTIA, AUTIB,
855                                            AUTDA, AUTDB,
856                                            AUTIZA, AUTIZB,
857                                            AUTDZA, AUTDZB)>;
858def : InstRW<[ORYONWrite_7Cyc_I2], (instrs XPACI, XPACD)>;
859
860//===----------------------------------------------------------------------===//
861// Instruction Tables in LSU
862//===----------------------------------------------------------------------===//
863
864// 4 cycle Load-to-use from L1D$
865// Neon load with 5 cycle
866// 6 cycle to STA ?
867// STD cycle ?
868// NEON STD + 2
869
870// Load Instructions
871// FP Load Instructions
872
873// Load pair, immed pre-index, normal
874// Load pair, immed pre-index, signed words
875// Load pair, immed post-index, normal
876// Load pair, immed post-index, signed words
877// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr.
878
879def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPDi)>;
880def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPQi)>;
881def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPSi)>;
882def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPWi)>;
883def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDNPXi)>;
884
885def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPDi)>;
886def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPQi)>;
887def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSi)>;
888def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPSWi)>;
889def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPWi)>;
890def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDPXi)>;
891
892def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBui)>;
893def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDui)>;
894def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHui)>;
895def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQui)>;
896def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSui)>;
897
898def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDl)>;
899def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQl)>;
900def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWl)>;
901def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXl)>;
902
903def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRBi)>;
904def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRHi)>;
905def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRWi)>;
906def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRXi)>;
907
908def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBWi)>;
909def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSBXi)>;
910def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHWi)>;
911def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSHXi)>;
912def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDTRSWi)>;
913
914def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
915            (instrs LDPDpre)>;
916def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
917            (instrs LDPQpre)>;
918def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
919            (instrs LDPSpre)>;
920def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
921            (instrs LDPWpre)>;
922
923def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpre)>;
924def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpre)>;
925def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpre)>;
926def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpre)>;
927def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpre)>;
928def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpre)>;
929def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpre)>;
930
931def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpre)>;
932def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpre)>;
933def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBWpost)>;
934def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSBXpost)>;
935
936def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpre)>;
937def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpre)>;
938def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHWpost)>;
939def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSHXpost)>;
940
941def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpre)>;
942def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBBpost)>;
943
944def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpre)>;
945def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHHpost)>;
946
947def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
948            (instrs LDPDpost)>;
949def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
950            (instrs LDPQpost)>;
951def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
952            (instrs LDPSpost)>;
953def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
954            (instrs LDPWpost)>;
955def : InstRW<[ORYONWrite_4Cyc_LD_I012345],
956            (instrs LDPXpost)>;
957
958def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRBpost)>;
959def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRDpost)>;
960def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRHpost)>;
961def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRQpost)>;
962def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRSpost)>;
963def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRWpost)>;
964def : InstRW<[ORYONWrite_4Cyc_LD_I012345], (instrs LDRXpost)>;
965
966def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroW)>;
967def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroW)>;
968def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroW)>;
969def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroW)>;
970def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroW)>;
971def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroW)>;
972def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroW)>;
973def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroW)>;
974def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroW)>;
975def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroW)>;
976
977def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRBroX)>;
978def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRDroX)>;
979def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHHroX)>;
980def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRHroX)>;
981def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRQroX)>;
982def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSroX)>;
983def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHWroX)>;
984def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRSHXroX)>;
985def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRWroX)>;
986def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDRXroX)>;
987
988def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBi)>;
989def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURBBi)>;
990def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURDi)>;
991def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHi)>;
992def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURHHi)>;
993def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURQi)>;
994def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSi)>;
995def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURXi)>;
996def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBWi)>;
997def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSBXi)>;
998def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHWi)>;
999def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSHXi)>;
1000def : InstRW<[ORYONWrite_4Cyc_LD], (instrs LDURSWi)>;
1001
1002
1003
1004// Store register, immed post-index
1005// NOTE: Handled by WriteST, ReadAdrBase
1006
1007// Store register, immed pre-index
1008// NOTE: Handled by WriteST
1009
1010// Store pair, immed post-index, W-form
1011// Store pair, immed post-indx, X-form
1012// Store pair, immed pre-index, W-form
1013// Store pair, immed pre-index, X-form
1014// NOTE: Handled by WriteSTP.
1015
1016def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBi)>;
1017def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURBBi)>;
1018def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURDi)>;
1019def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHi)>;
1020def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURHHi)>;
1021def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURQi)>;
1022def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURSi)>;
1023def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURWi)>;
1024def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STURXi)>;
1025
1026def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRBi)>;
1027def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRHi)>;
1028def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRWi)>;
1029def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STTRXi)>;
1030
1031def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPDi)>;
1032def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPQi)>;
1033def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPXi)>;
1034def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STNPWi)>;
1035
1036def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPDi)>;
1037def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPQi)>;
1038def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPXi)>;
1039def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STPWi)>;
1040
1041def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRBui)>;
1042def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRDui)>;
1043def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRHui)>;
1044def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRQui)>;
1045def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRXui)>;
1046def : InstRW<[ORYONWrite_1Cyc_ST], (instrs STRWui)>;
1047
1048def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1049            (instrs STPDpre, STPDpost)>;
1050def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1051            (instrs STPSpre, STPSpost)>;
1052def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1053            (instrs STPWpre, STPWpost)>;
1054def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1055            (instrs STPXpre, STPXpost)>;
1056
1057def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1058            (instrs STRBpre, STRBpost)>;
1059def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1060            (instrs STRBBpre, STRBBpost)>;
1061def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1062            (instrs STRDpre, STRDpost)>;
1063def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1064            (instrs STRHpre, STRHpost)>;
1065def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1066            (instrs STRHHpre, STRHHpost)>;
1067def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1068            (instrs STRQpre, STRQpost)>;
1069def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1070            (instrs STRSpre, STRSpost)>;
1071def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1072            (instrs STRWpre, STRWpost)>;
1073def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1074            (instrs STRXpre, STRXpost)>;
1075
1076def : InstRW<[ORYONWrite_1Cyc_ST],
1077            (instrs STRBroW, STRBroX)>;
1078def : InstRW<[ORYONWrite_1Cyc_ST],
1079            (instrs STRDroW, STRDroX)>;
1080def : InstRW<[ORYONWrite_1Cyc_ST],
1081            (instrs STRHroW, STRHroX)>;
1082def : InstRW<[ORYONWrite_1Cyc_ST],
1083            (instrs STRHHroW, STRHHroX)>;
1084def : InstRW<[ORYONWrite_1Cyc_ST],
1085            (instrs STRQroW, STRQroX)>;
1086def : InstRW<[ORYONWrite_1Cyc_ST],
1087            (instrs STRSroW, STRSroX)>;
1088def : InstRW<[ORYONWrite_1Cyc_ST],
1089            (instrs STRWroW, STRWroX)>;
1090def : InstRW<[ORYONWrite_1Cyc_ST],
1091            (instrs STRXroW, STRXroX)>;
1092
1093// ASIMD Load instructions, 4 cycle access + 2 cycle NEON access
1094// ASIMD load, 1 element, multiple, 1 reg, D-form 1uOps
1095// ASIMD load, 1 element, multiple, 1 reg, Q-form 1uOps
1096def : InstRW<[ORYONWrite_5Cyc_LD],
1097            (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1098
1099def : InstRW<[ORYONWrite_5Cyc_LD_I012345],
1100            (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1101
1102// ASIMD load, 1 element, multiple, 2 reg, D-form 3 uOps
1103// ASIMD load, 1 element, multiple, 2 reg, Q-form 2 uOps
1104def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
1105            (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
1106
1107def : InstRW<[ORYONWrite_5Cyc_2Uops_LD],
1108            (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
1109
1110def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
1111            (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
1112
1113def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
1114            (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
1115
1116// ASIMD load, 1 element, multiple, 3 reg, D-form 4 uOps
1117// ASIMD load, 1 element, multiple, 3 reg, Q-form 3 uOps
1118def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
1119            (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
1120
1121def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
1122            (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
1123
1124def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
1125            (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
1126
1127def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
1128            (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
1129
1130// ASIMD load, 1 element, multiple, 4 reg, D-form 6 uOps
1131// ASIMD load, 1 element, multiple, 4 reg, Q-form 4 uOps
1132def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
1133            (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
1134def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
1135            (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
1136
1137def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
1138            (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
1139def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
1140            (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
1141
1142// ASIMD load, 1 element, one lane, B/H/S 2uOps
1143// ASIMD load, 1 element, one lane, D     2UOps
1144def : InstRW<[ORYONWrite_5Cyc_2Uops_LD], (instregex "^LD1i(8|16|32|64)$")>;
1145def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
1146            (instregex "^LD1i(8|16|32|64)_POST$")>;
1147
1148// ASIMD load, 1 element, all lanes, D-form, B/H/S 2uOps
1149// ASIMD load, 1 element, all lanes, D-form, D     2uOps
1150// ASIMD load, 1 element, all lanes, Q-form        2uOps
1151def : InstRW<[ORYONWrite_5Cyc_2Uops_LD],
1152            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1153def : InstRW<[ORYONWrite_5Cyc_2Uops_LD_I012345],
1154            (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1155
1156// ASIMD load, 2 element, multiple, D-form, B/H/S 3 uOps
1157// ASIMD load, 2 element, multiple, Q-form, D     4 uOps
1158def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
1159            (instregex "^LD2Twov(8b|4h|2s)$")>;
1160def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
1161            (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
1162def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
1163            (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
1164def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
1165            (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
1166
1167// ASIMD load, 2 element, one lane, B/H           3 uOps
1168// ASIMD load, 2 element, one lane, S             3 uOps
1169// ASIMD load, 2 element, one lane, D             3 uOps
1170def : InstRW<[ORYONWrite_5Cyc_3Uops_LD], (instregex "^LD2i(8|16|32|64)$")>;
1171def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
1172            (instregex "^LD2i(8|16|32|64)_POST$")>;
1173
1174// ASIMD load, 2 element, all lanes, D-form, B/H/S 3 uOps
1175// ASIMD load, 2 element, all lanes, D-form, D     3 uOps
1176// ASIMD load, 2 element, all lanes, Q-form        3 uOps
1177def : InstRW<[ORYONWrite_5Cyc_3Uops_LD],
1178            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1179def : InstRW<[ORYONWrite_5Cyc_3Uops_LD_I012345],
1180            (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1181
1182// ASIMD load, 3 element, multiple, D-form, B/H/S  5 uOps
1183// ASIMD load, 3 element, multiple, Q-form, B/H/S  6 uOps
1184// ASIMD load, 3 element, multiple, Q-form, D      6 uOps
1185def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
1186            (instregex "^LD3Threev(8b|4h|2s)$")>;
1187def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
1188            (instregex "^LD3Threev(16b|8h|4s|2d)$")>;
1189def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
1190            (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
1191def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
1192            (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>;
1193
1194// ASIMD load, 3 element, one lone, B/H            4 uOps
1195// ASIMD load, 3 element, one lane, S              4 uOps
1196// ASIMD load, 3 element, one lane, D              5 uOps
1197def : InstRW<[ORYONWrite_5Cyc_4Uops_LD], (instregex "^LD3i(8|16|32)$")>;
1198def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD3i(64)$")>;
1199def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
1200            (instregex "^LD3i(8|16|32)_POST$")>;
1201def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
1202            (instregex "^LD3i(64)_POST$")>;
1203
1204// ASIMD load, 3 element, all lanes, D-form, B/H/S 4 uOps
1205// ASIMD load, 3 element, all lanes, D-form, D     5 uOps
1206// ASIMD load, 3 element, all lanes, Q-form, B/H/S 4 uOps
1207// ASIMD load, 3 element, all lanes, Q-form, D     5 uOps
1208def : InstRW<[ORYONWrite_5Cyc_4Uops_LD],
1209            (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)$")>;
1210def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
1211            (instregex "^LD3Rv(1d|2d)$")>;
1212def : InstRW<[ORYONWrite_5Cyc_4Uops_LD_I012345],
1213            (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s)_POST$")>;
1214def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
1215            (instregex "^LD3Rv(1d|2d)_POST$")>;
1216
1217// ASIMD load, 4 element, multiple, D-form, B/H/S  6 uOps
1218// ASIMD load, 4 element, multiple, Q-form, B/H/S  10 uOps
1219// ASIMD load, 4 element, multiple, Q-form, D      8 uOps
1220def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
1221            (instregex "^LD4Fourv(8b|4h|2s)$")>;
1222def : InstRW<[ORYONWrite_5Cyc_10Uops_LD],
1223            (instregex "^LD4Fourv(16b|8h|4s)$")>;
1224def : InstRW<[ORYONWrite_5Cyc_8Uops_LD],
1225            (instregex "^LD4Fourv(2d)$")>;
1226def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
1227            (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
1228def : InstRW<[ORYONWrite_5Cyc_10Uops_LD_I012345],
1229            (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
1230def : InstRW<[ORYONWrite_5Cyc_8Uops_LD_I012345],
1231            (instregex "^LD4Fourv(2d)_POST$")>;
1232
1233// ASIMD load, 4 element, one lane, B/H            5 uOps
1234// ASIMD load, 4 element, one lane, S              5 uOps
1235// ASIMD load, 4 element, one lane, D              6 uOps
1236def : InstRW<[ORYONWrite_5Cyc_5Uops_LD], (instregex "^LD4i(8|16|32)$")>;
1237def : InstRW<[ORYONWrite_5Cyc_6Uops_LD], (instregex "^LD4i(64)$")>;
1238def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
1239            (instregex "^LD4i(8|16|32)_POST$")>;
1240def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
1241            (instregex "^LD4i(64)_POST$")>;
1242
1243// ASIMD load, 4 element, all lanes, D-form, B/H/S    5 uOps
1244// ASIMD load, 4 element, all lanes, D-form, D        6 uOps
1245// ASIMD load, 4 element, all lanes, Q-form, B/H/S    5 uOps
1246// ASIMD load, 4 element, all lanes, Q-form, D        6 uOps
1247def : InstRW<[ORYONWrite_5Cyc_5Uops_LD],
1248            (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)$")>;
1249def : InstRW<[ORYONWrite_5Cyc_6Uops_LD],
1250            (instregex "^LD4Rv(1d|2d)$")>;
1251def : InstRW<[ORYONWrite_5Cyc_5Uops_LD_I012345],
1252            (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s)_POST$")>;
1253def : InstRW<[ORYONWrite_5Cyc_6Uops_LD_I012345],
1254            (instregex "^LD4Rv(1d|2d)_POST$")>;
1255
1256// ASIMD Store Instructions
1257// ASIMD store, 1 element, multiple, 1 reg, D-form    1 uOps
1258// ASIMD store, 1 element, multiple, 1 reg, Q-form    1 uops
1259def : InstRW<[ORYONWrite_1Cyc_ST],
1260            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1261def : InstRW<[ORYONWrite_1Cyc_ST_I012345],
1262            (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1263
1264// ASIMD store, 1 element, multiple, 2 reg, D-form    2 uOps
1265// ASIMD store, 1 element, multiple, 2 reg, Q-form    2 uOps
1266def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
1267            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1268def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
1269            (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1270
1271// ASIMD store, 1 element, multiple, 3 reg, D-form    3 uOps
1272// ASIMD store, 1 element, multiple, 3 reg, Q-form    3 uOps
1273def : InstRW<[ORYONWrite_1Cyc_3Uops_ST],
1274            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1275def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345],
1276            (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1277
1278// ASIMD store, 1 element, multiple, 4 reg, D-form    4 uOps
1279// ASIMD store, 1 element, multiple, 4 reg, Q-form    4 uOps
1280def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
1281            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
1282def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
1283            (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
1284
1285// ASIMD store, 1 element, one lane, B/H/S            2 uOps
1286// ASIMD store, 1 element, one lane, D                2 uOps
1287def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
1288            (instregex "^ST1i(8|16|32|64)$")>;
1289def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
1290            (instregex "^ST1i(8|16|32|64)_POST$")>;
1291
1292// ASIMD store, 2 element, multiple, D-form, B/H/S    2 uOps
1293// ASIMD store, 2 element, multiple, Q-form, B/H/S    4 uOps
1294// ASIMD store, 2 element, multiple, Q-form, D        4 uOps
1295def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
1296            (instregex "^ST2Twov(8b|4h|2s)$")>;
1297def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
1298            (instregex "^ST2Twov(16b|8h|4s|2d)$")>;
1299def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
1300            (instregex "^ST2Twov(8b|4h|2s)_POST$")>;
1301def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
1302            (instregex "^ST2Twov(16b|8h|4s|2d)_POST$")>;
1303
1304// ASIMD store, 2 element, one lane, B/H/S            2 uOps
1305// ASIMD store, 2 element, one lane, D                2 uOps
1306def : InstRW<[ORYONWrite_1Cyc_2Uops_ST],
1307            (instregex "^ST2i(8|16|32|64)$")>;
1308def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
1309            (instregex "^ST2i(8|16|32|64)_POST$")>;
1310
1311// ASIMD store, 3 element, multiple, D-form, B/H/S    4 uOps
1312// ASIMD store, 3 element, multiple, Q-form, B/H/S    6 uOps
1313// ASIMD store, 3 element, multiple, Q-form, D        6 uOps
1314def : InstRW<[ORYONWrite_1Cyc_4Uops_ST],
1315            (instregex "^ST3Threev(8b|4h|2s)$")>;
1316def : InstRW<[ORYONWrite_1Cyc_6Uops_ST],
1317            (instregex "^ST3Threev(16b|8h|4s|2d)$")>;
1318def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
1319            (instregex "^ST3Threev(8b|4h|2s)_POST$")>;
1320def : InstRW<[ORYONWrite_1Cyc_6Uops_ST_I012345],
1321            (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>;
1322
1323// ASIMD store, 3 element, one lane, B/H              2 uOps
1324// ASIMD store, 3 element, one lane, S                2 uOps
1325// ASIMD store, 3 element, one lane, D                4 uOps
1326def : InstRW<[ORYONWrite_1Cyc_2Uops_ST], (instregex "^ST3i(8|16|32)$")>;
1327def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST3i(64)$")>;
1328def : InstRW<[ORYONWrite_1Cyc_2Uops_ST_I012345],
1329            (instregex "^ST3i(8|16|32)_POST$")>;
1330def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
1331            (instregex "^ST3i(64)_POST$")>;
1332
1333
1334// ASIMD store, 4 element, multiple, D-form, B/H/S    5 uOps
1335// ASIMD store, 4 element, multiple, Q-form, B/H/S    10 uOps
1336// ASIMD store, 4 element, multiple, Q-form, D        8 uOps
1337def : InstRW<[ORYONWrite_1Cyc_5Uops_ST],
1338            (instregex "^ST4Fourv(8b|4h|2s)$")>;
1339def : InstRW<[ORYONWrite_1Cyc_10Uops_ST],
1340            (instregex "^ST4Fourv(16b|8h|4s)$")>;
1341def : InstRW<[ORYONWrite_1Cyc_8Uops_ST],
1342            (instregex "^ST4Fourv(2d)$")>;
1343def : InstRW<[ORYONWrite_1Cyc_5Uops_ST_I012345],
1344            (instregex "^ST4Fourv(8b|4h|2s)_POST$")>;
1345def : InstRW<[ORYONWrite_1Cyc_10Uops_ST_I012345],
1346            (instregex "^ST4Fourv(16b|8h|4s)_POST$")>;
1347def : InstRW<[ORYONWrite_1Cyc_8Uops_ST_I012345],
1348            (instregex "^ST4Fourv(2d)_POST$")>;
1349
1350// ASIMD store, 4 element, one lane, B/H              3 uOps
1351// ASIMD store, 4 element, one lane, S                3 uOps
1352// ASIMD store, 4 element, one lane, D                4 uOps
1353def : InstRW<[ORYONWrite_1Cyc_3Uops_ST], (instregex "^ST4i(8|16|32)$")>;
1354def : InstRW<[ORYONWrite_1Cyc_4Uops_ST], (instregex "^ST4i(64)$")>;
1355def : InstRW<[ORYONWrite_1Cyc_3Uops_ST_I012345],
1356            (instregex "^ST4i(8|16|32)_POST$")>;
1357def : InstRW<[ORYONWrite_1Cyc_4Uops_ST_I012345],
1358            (instregex "^ST4i(64)_POST$")>;
1359
1360
1361//===----------------------------------------------------------------------===//
1362// Instruction Tables in VXU
1363//===----------------------------------------------------------------------===//
1364// all uOps are not clearly written in the VXU document
1365
1366// I2V
1367def : InstRW<[ORYONWrite_I2V_4Cyc_I45], (instregex "^FMOV[HSD][WX]r", "^FMOVDXHighr")>;
1368
1369// I2V with convert
1370def : InstRW<[ORYONWrite_I2V_7Cyc_I45], (instregex "^[SU]CVTF[SU][XW][HSD]ri")>;
1371
1372// V2I
1373def : InstRW<[ORYONWrite_V2I_3Cyc_FP01], (instregex "^FMOV[WX][HSD]r", "FMOVXDHighr")>;
1374
1375// V2I with convert 2nd [SU] necessary?
1376def : InstRW<[ORYONWrite_V2I_6Cyc_FP01], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>;
1377
1378// float to float move immediate, row 7 in big chart
1379def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]r")>;
1380def : InstRW<[ORYONWrite_V2V_2Cyc_FP0123], (instregex "^FMOV[HSD]i")>;
1381
1382// float to float conversion within VXU, precision conversion
1383def : InstRW<[ORYONWrite_V2V_6Cyc_FP01], (instregex "^FJCVTZS")>;
1384def : InstRW<[ORYONWrite_V2V_3Cyc_FP0123], (instregex "^FCVT[HSD][HSD]r",
1385                                                       "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>;
1386
1387// floating comparison write to NZCV
1388def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCMP(E)?[HSD]r[ir]")>;
1389def : InstRW<[ORYONWrite_2Cyc_FP01], (instregex "^FCCMP(E)?[HSD]rr")>;
1390
1391// floating point conditional select
1392def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FCSEL")>;
1393
1394// floating multiply-add
1395def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^(F|FN)MADD", "^(F|FN)MSUB")>;
1396
1397// floating unary, cycle/throughput? xls row14
1398def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^F(ABS|NEG)[SD]r")>;
1399
1400//floating division/square root
1401def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVHrr")>;
1402def : InstRW<[ORYONWrite_8Cyc_FP3], (instregex "^FDIVSrr")>;
1403def : InstRW<[ORYONWrite_10Cyc_FP3], (instregex "^FDIVDrr")>;
1404
1405def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTHr")>;
1406def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTSr")>;
1407def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTDr")>;
1408
1409//==========
1410// SIMD move instructions
1411//==========
1412
1413// ASIMD DUP element
1414def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^DUPv.+lane")>;
1415// ASIMD DUP general thoughput undecided, 3? FP0123
1416// VXU doc, p42, 2 uOps
1417def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^DUPv.+gpr")>;
1418
1419// ASIMD insert, element to element
1420def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^INSv.+lane")>;
1421// ASIMD insert,  gen reg 3? FP0123?
1422def : InstRW<[ORYONWrite_3Cyc_2Uops_FP0123], (instregex "^INSv.+gpr")>;
1423
1424// ASIMD move, FP immed
1425def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^FMOVv")>;
1426
1427// ASIMD transfer, element to gen reg
1428def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^[SU]MOVv")>;
1429
1430//==========
1431// SIMD arithmetic instructions
1432//==========
1433def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDv", "^SUBv",
1434                                         "^BIFv", "^BITv", "^BSLv",
1435                                         "^ANDv", "^BICv", "^EORv",
1436                                         "^ORRv", "^ORNv")>;
1437
1438
1439def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FABDv", "^FADDv", "^FSUBv")>;
1440
1441// floating division
1442def : InstRW<[ORYONWrite_6Cyc_FP3], (instregex "^FDIVv.*16$")>;
1443def : InstRW<[ORYONWrite_7Cyc_FP3], (instregex "^FDIVv.*32$")>;
1444def : InstRW<[ORYONWrite_9Cyc_FP3], (instregex "^FDIVv.*64$")>;
1445
1446def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMUL(X)?v",
1447                                                   "^FRECPSv", "^FRSQRTSv")>;
1448
1449def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^MLAv","^MLSv", "^MULv",
1450                                                   "^PMULv", "UABAv")>;
1451
1452def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "SABAv", "SABDv",
1453                                                   "^(SH|UH)(ADD|SUB)v",
1454                                                   "^S(MAX|MIN)v",
1455                                                   "^(SQ|UQ)(ADD|SUB)v",
1456                                                   "^(SQ|SQR|UQ|UQR)SHLv",
1457                                                   "^(SR|UR)HADDv",
1458                                                   "^(SR|UR)SHLv",
1459                                                   "^UABDv",
1460                                                   "^U(MAX|MIN)v")>;
1461// IMAX or UMAX in the above line
1462//==========
1463// SIMD compare instructions
1464//==========
1465
1466def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^CMEQv","^CMGEv","^CMGTv",
1467                                                   "^CMLEv","^CMLTv", "^CMHIv",
1468                                                   "^CMHSv",
1469                                                   "^FCMEQv", "^FCMGEv",
1470                                                   "^FCMGTv", "^FCMLEv",
1471                                                   "^FCMLTv",
1472                                                   "^FACGEv", "^FACGTv")>;
1473
1474//==========
1475// SIMD widening and narrowing arithmetic instructions
1476//==========
1477// NO need to list ADDHN2, RADDHN2, RSUBHN2 as they are not distinguished
1478// from ADDHN, RADDHN, RSUBHN in td file(v16i8, v8i16, v4i32).
1479def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDHNv",
1480                                                   "^SUBHNv",
1481                                                   "^RADDHNv",
1482                                                   "^RSUBHNv",
1483                                                   "^SABD(L|L2)v", "^UABD(L|L2)v",
1484                                                   "^(S|U)(ADD|SUB)(L|L2|W|W2)v")>;
1485
1486def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^PMUL(L|L2)v","^SABA(L|L2)v",
1487                                                   "^(S|U|SQ)(MLA|MSL|MUL)(L|L2)v")>;
1488
1489//==========
1490// SIMD unary arithmetic instructions
1491//==========
1492//^MVNv is an alias of ^NOTv
1493def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ABSv", "^CLSv","^CLZv", "^CNTv",
1494                                                   "^NEGv", "^NOTv",
1495                                                   "^RBITv", "^REV(16|32|64)v",
1496                                                   "^SQ(ABS|NEG)v", "^SQ(XT|XTU)(N|N2)v",
1497                                                   "^(SU|US)QADDv",
1498                                                   "^UQXT(N|N2)v", "^XTN2?v")>;
1499
1500def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^FCVT(L|L2|N|N2|XN|XN2)v",
1501                                                   "^FRINT[AIMNPXZ]v",
1502                                                   "^FRSQRTEv",
1503                                                   "^(S|U)ADALPv",
1504                                                   "^(S|U)ADDLPv")>;
1505
1506
1507def : InstRW<[ORYONWrite_3Cyc_FP0], (instregex "^URECPEv", "^URSQRTEv",
1508                                                "^FRECPEv", "^FRECPXv")>;
1509
1510def : InstRW<[ORYONWrite_8Cyc_FP3_RC], (instregex "^FSQRTv.*16$")>;
1511def : InstRW<[ORYONWrite_10Cyc_FP3_RC], (instregex "^FSQRTv.*32$")>;
1512def : InstRW<[ORYONWrite_13Cyc_FP3_RC], (instregex "^FSQRTv.*64$")>;
1513
1514//==========
1515// SIMD binary elememt arithmetic instructions
1516//==========
1517
1518def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FMLAv", "^FMLSv")>;
1519
1520def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex  "^SQDMULHv",
1521                                                   "^SQRD(MLA|MLS|MUL)Hv")>;
1522
1523//==========
1524// SIMD permute instructions
1525//==========
1526
1527def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^EXTv", "^TRN(1|2)v",
1528                                                   "^UZP(1|2)v", "^ZIP(1|2)v")>;
1529
1530//==========
1531// SIMD immediate instructions
1532//==========
1533
1534def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex  "^MOVIv", "^MVNIv")>;
1535
1536//==========
1537// SIMD shift(immediate) instructions
1538//==========
1539def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^RSHR(N|N2)v", "^SHLv",
1540                                                   "^(SHL|SHR)(N|N2)v",
1541                                                   "^SLIv",
1542                                                   "^(SQ|SQR)SHR(U)?(N|N2)v",
1543                                                   "^(UQ|UQR)SHR(N|N2)v",
1544                                                   "^SQSHLUv",
1545                                                   "^SRIv",
1546                                                   "^(S|SR|U|UR)SHRv",
1547                                                   "^(S|SR|U|UR)SRAv",
1548                                                   "^(S|U)SHL(L|L2)v")>;
1549
1550//==========
1551// SIMD floating-point and integer conversion instructions
1552//==========
1553// same as above conversion
1554
1555//==========
1556// SIMD reduce (acoss vector lanes) instructions
1557//==========
1558
1559def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDVv",
1560                                                   "^(FMAX|FMIN)(V|NMV)v",
1561                                                   "^(S|U)ADDLVv",
1562                                                   "^(S|U)(MAX|MIN)Vv")>;
1563//==========
1564// SIMD pairwise arithmetic instructions
1565//==========
1566
1567def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^ADDPv", "^FADDPv",
1568                                                   "^(FMAX|FMIN)(NMP|P)v",
1569                                                   "^(S|U)(MIN|MAX)Pv")>;
1570//==========
1571// SIMD dot prodcut instructions
1572//==========
1573
1574def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(U|S)DOTv")>;
1575
1576//==========
1577// SIMD table lookup instructions
1578//==========
1579// TBL 1-reg/2-reg; TBX 1-reg, 1uOp, throughput=4 latency=2
1580def : InstRW<[ORYONWrite_2Cyc_FP0123], (instrs TBLv8i8One, TBLv16i8One,
1581                                                TBXv8i8One, TBXv16i8One,
1582                                                TBLv8i8Two, TBLv16i8Two)>;
1583
1584// TBL 3-reg/4-reg, 3uops, throughtput=4/3=1.33 latency=4
1585def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_FP0123_RC],
1586            (instrs TBLv8i8Three, TBLv16i8Three,
1587                    TBLv8i8Four, TBLv16i8Four)>;
1588
1589
1590// TBX 2-reg 2 uOps, throughput=2 latency=4
1591def : InstRW<[ORYONWrite_4Cyc_FP0123_FP0123_RC], (instrs TBXv8i8Two, TBXv16i8Two)>;
1592
1593// TBX 3-reg/4-reg, 4uOps, throughput=1, latency=6
1594def : InstRW<[ORYONWrite_6Cyc_FP0123_FP0123_FP0123_FP0123_RC],
1595            (instrs TBXv8i8Three, TBXv16i8Three,
1596                    TBXv8i8Four, TBXv16i8Four)>;
1597
1598
1599//==========
1600// SIMD complex number arithmetic instructions
1601//==========
1602
1603def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^FCADDv", "^FCMLAv")>;
1604
1605//==========
1606// SIMD cryptographic instructions
1607//==========
1608// 3,4 on IMLA, CRYP
1609def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^AES[DE]",
1610                                                   "^SM3(TT1|TT2)(A|B)")>;
1611
1612// 2,4 on CRYP
1613def : InstRW<[ORYONWrite_2Cyc_FP0123], (instregex "^AESI?MC",
1614                                                   "^EOR3",
1615                                                   "^RAX1",
1616                                                   "^XAR",
1617                                                   "^BCAX",
1618                                                   "^SM3SS1",
1619                                                   "^SM3PART(W1|W2)")>;
1620// 5,1 on CRYP
1621def : InstRW<[ORYONWrite_5Cyc_FP1], (instregex "^SM4E",
1622                                                "^SM4EKEY")>;
1623
1624// 2,1 on CRYP
1625def : InstRW<[ORYONWrite_2Cyc_FP1], (instregex "^SHA1(H|SU0|SU1)",
1626                                                "^SHA256SU0",
1627                                                "^SHA512(SU0|SU1)")>;
1628
1629// 3,1 on CRYP
1630def : InstRW<[ORYONWrite_3Cyc_FP1], (instregex "^SHA256SU1",
1631                                                "^SHA512(H|H2)")>;
1632
1633// 4,0.25 on CRYP
1634def : InstRW<[ORYONWrite_4Cyc_FP1_RC4], (instregex "^SHA1(C|P|M)",
1635                                                "^SHA256(H|H2)")>;
1636
1637//==========
1638// SIMD v8.6 instructions
1639//==========
1640// 4,2 on IMLA
1641def : InstRW<[ORYONWrite_4Cyc_FP0123_RC], (instregex "^(S|U|US)MMLA$")>;
1642
1643// 4,0.5 on IMLA
1644def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMMLA$")>;
1645
1646// 4,0.5 on IMLA
1647def : InstRW<[ORYONWrite_8Cyc_FP0123_RC], (instregex "^BFMLAL(B|T)")>;
1648
1649// 3,4
1650def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^(US|SU)DOTv")>;
1651
1652// 3,1
1653def : InstRW<[ORYONWrite_4Cyc_FP0123], (instregex "^BF(16)?DOTv")>;
1654
1655// 3,4
1656def : InstRW<[ORYONWrite_3Cyc_FP0123], (instregex "^BFCVT(N|N2)?$")>;
1657
1658
1659} // SchedModel = OryonModel
1660