xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1//==- AArch64SchedFalkorDetails.td - Falkor Scheduling Defs -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the uop and latency details for the machine model for the
10// Qualcomm Falkor subtarget.
11//
12//===----------------------------------------------------------------------===//
13
14// Contains all of the Falkor specific SchedWriteRes types. The approach
15// below is to define a generic SchedWriteRes for every combination of
16// latency and microOps. The naming conventions is to use a prefix, one field
17// for latency, and one or more microOp count/type designators.
18//   Prefix: FalkorWr
19//   MicroOp Count/Types: #(B|X|Y|Z|LD|ST|SD|VX|VY|VSD)
20//   Latency: #cyc
21//
22// e.g. FalkorWr_1Z_6SD_4VX_6cyc means there are 11 micro-ops to be issued
23//      down one Z pipe, six SD pipes, four VX pipes and the total latency is
24//      six cycles.
25//
26// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
27//
28// Contains all of the Falkor specific WriteVariant types for immediate zero
29// and LSLFast.
30//===----------------------------------------------------------------------===//
31
32//===----------------------------------------------------------------------===//
33// Define 0 micro-op types
34def FalkorWr_LdInc_none_2cyc : SchedWriteRes<[]> {
35  let Latency = 2;
36  let NumMicroOps = 0;
37}
38def FalkorWr_StInc_none_2cyc : SchedWriteRes<[]> {
39  let Latency = 2;
40  let NumMicroOps = 0;
41}
42def FalkorWr_none_3cyc : SchedWriteRes<[]> {
43  let Latency = 3;
44  let NumMicroOps = 0;
45}
46def FalkorWr_none_4cyc : SchedWriteRes<[]> {
47  let Latency = 4;
48  let NumMicroOps = 0;
49}
50
51//===----------------------------------------------------------------------===//
52// Define 1 micro-op types
53
54def FalkorWr_1X_2cyc    : SchedWriteRes<[FalkorUnitX]>   { let Latency = 2; }
55def FalkorWr_IMUL32_1X_2cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
56def FalkorWr_IMUL64_1X_4cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 4; }
57def FalkorWr_IMUL64_1X_5cyc : SchedWriteRes<[FalkorUnitX]> { let Latency = 5; }
58def FalkorWr_1Z_0cyc    : SchedWriteRes<[FalkorUnitZ]>   { let Latency = 0; }
59def FalkorWr_1ZB_0cyc   : SchedWriteRes<[FalkorUnitZB]>  { let Latency = 0; }
60def FalkorWr_1LD_3cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 3; }
61def FalkorWr_1LD_4cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 4; }
62def FalkorWr_1XYZ_0cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
63def FalkorWr_1XYZ_1cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
64def FalkorWr_1XYZ_2cyc  : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
65def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
66def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
67def FalkorWr_1none_0cyc : SchedWriteRes<[]>              { let Latency = 0; }
68
69def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
70def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
71def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
72def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
73def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
74def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
75def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
76def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
77def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
78def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
79
80def FalkorWr_1LD_0cyc   : SchedWriteRes<[FalkorUnitLD]>  { let Latency = 0; }
81def FalkorWr_1ST_0cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 0; }
82def FalkorWr_1ST_3cyc   : SchedWriteRes<[FalkorUnitST]>  { let Latency = 3; }
83
84def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
85def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
86def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
87def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
88
89//===----------------------------------------------------------------------===//
90// Define 2 micro-op types
91
92def FalkorWr_2VXVY_0cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
93  let Latency = 0;
94  let NumMicroOps = 2;
95}
96def FalkorWr_2VXVY_1cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
97  let Latency = 1;
98  let NumMicroOps = 2;
99}
100def FalkorWr_2VXVY_2cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
101  let Latency = 2;
102  let NumMicroOps = 2;
103}
104def FalkorWr_2VXVY_3cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
105  let Latency = 3;
106  let NumMicroOps = 2;
107}
108def FalkorWr_2VXVY_4cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
109  let Latency = 4;
110  let NumMicroOps = 2;
111}
112def FalkorWr_VMUL32_2VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
113  let Latency = 4;
114  let NumMicroOps = 2;
115}
116def FalkorWr_2VXVY_5cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
117  let Latency = 5;
118  let NumMicroOps = 2;
119}
120def FalkorWr_FMUL32_2VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
121  let Latency = 5;
122  let NumMicroOps = 2;
123}
124def FalkorWr_2VXVY_6cyc   : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
125  let Latency = 6;
126  let NumMicroOps = 2;
127}
128def FalkorWr_FMUL64_2VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
129  let Latency = 6;
130  let NumMicroOps = 2;
131}
132
133def FalkorWr_1LD_1VXVY_4cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
134  let Latency = 4;
135  let NumMicroOps = 2;
136}
137def FalkorWr_1XYZ_1LD_4cyc  : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
138  let Latency = 4;
139  let NumMicroOps = 2;
140}
141def FalkorWr_2LD_3cyc   : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
142  let Latency = 3;
143  let NumMicroOps = 2;
144}
145
146def FalkorWr_1VX_1VY_5cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
147  let Latency = 5;
148  let NumMicroOps = 2;
149}
150
151def FalkorWr_1VX_1VY_2cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
152  let Latency = 2;
153  let NumMicroOps = 2;
154}
155
156def FalkorWr_1VX_1VY_4cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
157  let Latency = 4;
158  let NumMicroOps = 2;
159}
160
161def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
162  let Latency = 10;
163  let NumMicroOps = 2;
164}
165
166def FalkorWr_1VX_1VY_12cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
167  let Latency = 12;
168  let NumMicroOps = 2;
169}
170
171def FalkorWr_1VX_1VY_14cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
172  let Latency = 14;
173  let NumMicroOps = 2;
174}
175
176def FalkorWr_1VX_1VY_21cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
177  let Latency = 21;
178  let NumMicroOps = 2;
179}
180
181def FalkorWr_1GTOV_1VXVY_2cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitVXVY]> {
182  let Latency = 2;
183  let NumMicroOps = 2;
184}
185
186def FalkorWr_2GTOV_1cyc    : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
187  let Latency = 1;
188  let NumMicroOps = 2;
189}
190
191def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
192  let Latency = 4;
193  let NumMicroOps = 2;
194}
195def FalkorWr_1XYZ_1LD_5cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitLD]> {
196  let Latency = 5;
197  let NumMicroOps = 2;
198}
199
200def FalkorWr_2XYZ_2cyc   : SchedWriteRes<[FalkorUnitXYZ, FalkorUnitXYZ]> {
201  let Latency = 2;
202  let NumMicroOps = 2;
203}
204
205def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
206  let Latency = 0;
207  let NumMicroOps = 2;
208}
209
210def FalkorWr_1X_1Z_8cyc  : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
211  let Latency = 8;
212  let NumMicroOps = 2;
213  let ReleaseAtCycles = [2, 8];
214}
215
216def FalkorWr_1X_1Z_11cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
217  let Latency = 11;
218  let NumMicroOps = 2;
219  let ReleaseAtCycles = [2, 11];
220}
221
222def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
223  let Latency = 3;
224  let NumMicroOps = 2;
225}
226
227def FalkorWr_1LD_1none_3cyc : SchedWriteRes<[FalkorUnitLD]> {
228  let Latency = 3;
229  let NumMicroOps = 2;
230}
231
232def FalkorWr_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitSD, FalkorUnitST]> {
233  let Latency = 0;
234  let NumMicroOps = 2;
235}
236
237def FalkorWr_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitVSD, FalkorUnitST]> {
238  let Latency = 0;
239  let NumMicroOps = 2;
240}
241
242//===----------------------------------------------------------------------===//
243// Define 3 micro-op types
244
245def FalkorWr_1ST_1SD_1LD_0cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
246                                               FalkorUnitLD]> {
247  let Latency = 0;
248  let NumMicroOps = 3;
249}
250
251def FalkorWr_1ST_1SD_1LD_3cyc : SchedWriteRes<[FalkorUnitST, FalkorUnitSD,
252                                               FalkorUnitLD]> {
253  let Latency = 3;
254  let NumMicroOps = 3;
255}
256
257def FalkorWr_3VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
258  let Latency = 3;
259  let NumMicroOps = 3;
260}
261
262def FalkorWr_3VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
263  let Latency = 4;
264  let NumMicroOps = 3;
265}
266
267def FalkorWr_3VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
268  let Latency = 5;
269  let NumMicroOps = 3;
270}
271
272def FalkorWr_3VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
273  let Latency = 6;
274  let NumMicroOps = 3;
275}
276
277def FalkorWr_1LD_2VXVY_4cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY]> {
278  let Latency = 4;
279  let NumMicroOps = 3;
280}
281
282def FalkorWr_2LD_1none_3cyc  : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
283  let Latency = 3;
284  let NumMicroOps = 3;
285}
286
287def FalkorWr_3LD_3cyc        : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
288                                              FalkorUnitLD]> {
289  let Latency = 3;
290  let NumMicroOps = 3;
291}
292
293def FalkorWr_2LD_1Z_3cyc     : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
294                                             FalkorUnitZ]> {
295  let Latency = 3;
296  let NumMicroOps = 3;
297}
298
299def FalkorWr_1XYZ_1SD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitSD, FalkorUnitST]> {
300  let Latency = 0;
301  let NumMicroOps = 3;
302}
303def FalkorWr_1XYZ_1VSD_1ST_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitVSD, FalkorUnitST]> {
304  let Latency = 0;
305  let NumMicroOps = 3;
306}
307//===----------------------------------------------------------------------===//
308// Define 4 micro-op types
309
310def FalkorWr_2VX_2VY_14cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
311                                             FalkorUnitVX, FalkorUnitVY]> {
312  let Latency = 14;
313  let NumMicroOps = 4;
314}
315
316def FalkorWr_2VX_2VY_20cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
317                                             FalkorUnitVX, FalkorUnitVY]> {
318  let Latency = 20;
319  let NumMicroOps = 4;
320}
321
322def FalkorWr_2VX_2VY_21cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
323                                             FalkorUnitVX, FalkorUnitVY]> {
324  let Latency = 21;
325  let NumMicroOps = 4;
326}
327
328def FalkorWr_2VX_2VY_24cyc  : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY,
329                                             FalkorUnitVX, FalkorUnitVY]> {
330  let Latency = 24;
331  let NumMicroOps = 4;
332}
333
334def FalkorWr_4VXVY_2cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
335                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
336  let Latency = 2;
337  let NumMicroOps = 4;
338}
339def FalkorWr_4VXVY_3cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
340                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
341  let Latency = 3;
342  let NumMicroOps = 4;
343}
344def FalkorWr_4VXVY_4cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
345                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
346  let Latency = 4;
347  let NumMicroOps = 4;
348}
349def FalkorWr_4VXVY_6cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
350                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
351  let Latency = 6;
352  let NumMicroOps = 4;
353}
354
355def FalkorWr_4LD_3cyc      : SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
356                                            FalkorUnitLD, FalkorUnitLD]> {
357  let Latency = 3;
358  let NumMicroOps = 4;
359}
360
361def FalkorWr_1LD_3VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
362                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
363  let Latency = 4;
364  let NumMicroOps = 4;
365}
366
367def FalkorWr_2LD_2none_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD]> {
368  let Latency = 3;
369  let NumMicroOps = 4;
370}
371
372def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
373                                              FalkorUnitSD, FalkorUnitLD]> {
374  let Latency = 3;
375  let NumMicroOps = 4;
376}
377
378def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
379                                           FalkorUnitST, FalkorUnitVSD]> {
380  let Latency = 0;
381  let NumMicroOps = 4;
382}
383
384//===----------------------------------------------------------------------===//
385// Define 5 micro-op types
386
387def FalkorWr_1LD_4VXVY_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitVXVY,
388                                            FalkorUnitVXVY, FalkorUnitVXVY,
389                                            FalkorUnitVXVY]> {
390  let Latency = 4;
391  let NumMicroOps = 5;
392}
393def FalkorWr_2LD_2VXVY_1none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
394                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
395  let Latency = 4;
396  let NumMicroOps = 5;
397}
398def FalkorWr_5VXVY_7cyc    : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY,
399                                            FalkorUnitVXVY, FalkorUnitVXVY,
400                                            FalkorUnitVXVY]> {
401  let Latency = 7;
402  let NumMicroOps = 5;
403}
404def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
405                                                FalkorUnitVSD, FalkorUnitST,
406                                                FalkorUnitVSD]> {
407  let Latency = 0;
408  let NumMicroOps = 5;
409}
410def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
411                                                  FalkorUnitVSD, FalkorUnitST,
412                                                  FalkorUnitVSD]> {
413  let Latency = 0;
414  let NumMicroOps = 5;
415}
416//===----------------------------------------------------------------------===//
417// Define 6 micro-op types
418
419def FalkorWr_2LD_2VXVY_2none_4cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
420                                            FalkorUnitVXVY, FalkorUnitVXVY]> {
421  let Latency = 4;
422  let NumMicroOps = 6;
423}
424
425def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
426                                                FalkorUnitVSD, FalkorUnitXYZ,
427                                                FalkorUnitST, FalkorUnitVSD]> {
428  let Latency = 0;
429  let NumMicroOps = 6;
430}
431
432def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
433                                                 FalkorUnitVSD, FalkorUnitVXVY,
434                                                 FalkorUnitST, FalkorUnitVSD]> {
435  let Latency = 0;
436  let NumMicroOps = 6;
437}
438
439def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
440                                           FalkorUnitST, FalkorUnitVSD,
441                                           FalkorUnitST, FalkorUnitVSD]> {
442  let Latency = 0;
443  let NumMicroOps = 6;
444}
445
446//===----------------------------------------------------------------------===//
447// Define 8 micro-op types
448
449def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
450                                             FalkorUnitVXVY, FalkorUnitVXVY,
451                                             FalkorUnitLD, FalkorUnitLD,
452                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
453  let Latency = 4;
454  let NumMicroOps = 8;
455}
456
457def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
458                                           FalkorUnitST, FalkorUnitVSD,
459                                           FalkorUnitST, FalkorUnitVSD,
460                                           FalkorUnitST, FalkorUnitVSD]> {
461  let Latency = 0;
462  let NumMicroOps = 8;
463}
464
465//===----------------------------------------------------------------------===//
466// Define 9 micro-op types
467
468def FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
469                                             FalkorUnitLD, FalkorUnitVXVY,
470                                             FalkorUnitVXVY, FalkorUnitLD,
471                                             FalkorUnitLD, FalkorUnitXYZ,
472                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
473  let Latency = 4;
474  let NumMicroOps = 9;
475}
476
477def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
478                                             FalkorUnitLD, FalkorUnitVXVY,
479                                             FalkorUnitVXVY, FalkorUnitXYZ,
480                                             FalkorUnitLD, FalkorUnitLD,
481                                             FalkorUnitVXVY, FalkorUnitVXVY]> {
482  let Latency = 4;
483  let NumMicroOps = 9;
484}
485
486//===----------------------------------------------------------------------===//
487// Define 10 micro-op types
488
489def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
490                                                 FalkorUnitVSD, FalkorUnitVXVY,
491                                                 FalkorUnitST, FalkorUnitVSD,
492                                                 FalkorUnitST, FalkorUnitVSD,
493                                                 FalkorUnitST, FalkorUnitVSD]> {
494  let Latency = 0;
495  let NumMicroOps = 10;
496}
497
498//===----------------------------------------------------------------------===//
499// Define 12 micro-op types
500
501def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
502                                                 FalkorUnitVSD, FalkorUnitVXVY,
503                                                 FalkorUnitST, FalkorUnitVSD,
504                                                 FalkorUnitVXVY, FalkorUnitST,
505                                                 FalkorUnitVSD, FalkorUnitVXVY,
506                                                 FalkorUnitST, FalkorUnitVSD]> {
507  let Latency = 0;
508  let NumMicroOps = 12;
509}
510
511// Forwarding logic is modeled for multiply add/accumulate and
512// load/store base register increment.
513// -----------------------------------------------------------------------------
514def FalkorReadIMA32  : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;
515def FalkorReadIMA64  : SchedReadAdvance<4, [FalkorWr_IMUL64_1X_4cyc, FalkorWr_IMUL64_1X_5cyc]>;
516def FalkorReadVMA    : SchedReadAdvance<3, [FalkorWr_VMUL32_1VXVY_4cyc, FalkorWr_VMUL32_2VXVY_4cyc]>;
517def FalkorReadFMA32  : SchedReadAdvance<1, [FalkorWr_FMUL32_1VXVY_5cyc, FalkorWr_FMUL32_2VXVY_5cyc]>;
518def FalkorReadFMA64  : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr_FMUL64_2VXVY_6cyc]>;
519
520def FalkorReadIncLd  : SchedReadAdvance<1, [FalkorWr_LdInc_none_2cyc]>;
521def FalkorReadIncSt  : SchedReadAdvance<1, [FalkorWr_StInc_none_2cyc]>;
522
523// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
524// -----------------------------------------------------------------------------
525def FalkorImmZPred    : SchedPredicate<[{MI->getOperand(1).isImm() &&
526                                         MI->getOperand(1).getImm() == 0}]>;
527def FalkorOp1ZrReg    : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
528
529                                         MI->getOperand(1).getReg() == AArch64::XZR}]>;
530def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
531
532def FalkorWr_FMOV  : SchedWriteVariant<[
533                       SchedVar<FalkorOp1ZrReg,  [FalkorWr_1none_0cyc]>,
534                       SchedVar<NoSchedPred,     [FalkorWr_1GTOV_1cyc]>]>;
535
536def FalkorWr_MOVZ  : SchedWriteVariant<[
537                       SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
538                       SchedVar<NoSchedPred,    [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
539
540
541def FalkorWr_ADDSUBsx : SchedWriteVariant<[
542                          SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
543                          SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2cyc]>]>;
544
545def FalkorWr_LDRro : SchedWriteVariant<[
546                       SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_3cyc]>,
547                       SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_4cyc]>]>;
548
549def FalkorWr_LDRSro : SchedWriteVariant<[
550                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
551                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1LD_5cyc]>]>;
552
553def FalkorWr_ORRi : SchedWriteVariant<[
554                      SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
555                      SchedVar<NoSchedPred,    [FalkorWr_1XYZ_1cyc]>]>;
556
557def FalkorWr_PRFMro : SchedWriteVariant<[
558                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
559                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1ST_4cyc]>]>;
560
561def FalkorWr_STRVro : SchedWriteVariant<[
562                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1VSD_1ST_0cyc]>,
563                        SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1VSD_1ST_0cyc]>]>;
564
565def FalkorWr_STRQro : SchedWriteVariant<[
566                        SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_2ST_2VSD_0cyc]>,
567                        SchedVar<NoSchedPred,            [FalkorWr_2XYZ_2ST_2VSD_0cyc]>]>;
568
569def FalkorWr_STRro : SchedWriteVariant<[
570                       SchedVar<FalkorShiftExtFastPred, [FalkorWr_1SD_1ST_0cyc]>,
571                       SchedVar<NoSchedPred,            [FalkorWr_1XYZ_1SD_1ST_0cyc]>]>;
572
573//===----------------------------------------------------------------------===//
574// Specialize the coarse model by associating instruction groups with the
575// subtarget-defined types. As the modeled is refined, this will override most
576// of the earlier mappings.
577
578// Miscellaneous
579// -----------------------------------------------------------------------------
580
581// FIXME: This could be better modeled by looking at the regclasses of the operands.
582def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs COPY)>;
583
584// SIMD Floating-point Instructions
585// -----------------------------------------------------------------------------
586def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)v2f32$")>;
587
588def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT))(v2f32|v2i32p)$")>;
589def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FAC(GE|GT)(32|64)$")>;
590def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|GE|GT)(32|64|v2f32|v2i32)$")>;
591def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v1i32|v1i64|v2i32)rz$")>;
592def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)v2f32$")>;
593
594def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^F(MAX|MIN)(NM)?Vv4i32v$")>;
595def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FABD|FADD|FSUB)v2f32$")>;
596def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FADDP(v2i32p|v2i64p|v2f32)$")>;
597
598def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v2f32)$")>;
599def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTXNv1i64)>;
600def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^FCVTZ(S|U)v2i32(_shift)?$")>;
601
602def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
603                                      (instregex "^(FMUL|FMULX)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
604def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
605                                      (instrs FMULX32)>;
606
607def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
608                                      (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
609def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
610                                      (instrs FMULX64)>;
611
612def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(FABS|FNEG)(v2f64|v4f32)$")>;
613
614def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(F(MAX|MIN)(NM)?P?|FAC(GE|GT)|FCM(EQ|GE|GT))(v2f64|v4f32|v2i64p)$")>;
615def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FCM(EQ|LE|GE|GT|LT)(v2i64|v4i32)rz$")>;
616def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs FCVTLv4i16, FCVTLv2i32)>;
617def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(v2f64|v4f32)$")>;
618
619def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVv2f32)>;
620def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTv2f32)>;
621
622def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4f32)$")>;
623
624def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32)$")>;
625def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs FCVTLv8i16, FCVTLv4i32)>;
626def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^FCVTZ(S|U)(v2i64|v4i32)(_shift)?$")>;
627
628def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
629                                      (instregex "^(FMUL|FMULX)(v2f64|v4f32|v4i32_indexed)$")>;
630
631def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
632                                      (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
633
634def : InstRW<[FalkorWr_3VXVY_4cyc],   (instrs FCVTNv4i16, FCVTNv2i32, FCVTXNv2f32)>;
635def : InstRW<[FalkorWr_3VXVY_5cyc],   (instrs FCVTNv8i16, FCVTNv4i32, FCVTXNv4f32)>;
636
637def : InstRW<[FalkorWr_2VX_2VY_14cyc],(instrs FDIVv2f64)>;
638def : InstRW<[FalkorWr_2VX_2VY_20cyc],(instrs FDIVv4f32)>;
639def : InstRW<[FalkorWr_2VX_2VY_21cyc],(instrs FSQRTv2f64)>;
640def : InstRW<[FalkorWr_2VX_2VY_24cyc],(instrs FSQRTv4f32)>;
641
642def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
643                                      (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
644def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
645                                      (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
646
647def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, FalkorReadFMA32],
648                                      (instregex "^FML(A|S)(v2f32|(v1i32_indexed|v2i32_indexed))$")>;
649def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, FalkorReadFMA64],
650                                      (instregex "^FML(A|S)v1i64_indexed$")>;
651def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc, FalkorReadFMA32],
652                                      (instregex "^FML(A|S)(v4f32|v4i32_indexed)$")>;
653def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc, FalkorReadFMA64],
654                                      (instregex "^FML(A|S)(v2f64|v2i64_indexed)$")>;
655
656// SIMD Integer Instructions
657// -----------------------------------------------------------------------------
658def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
659def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs ADDPv2i64p)>;
660def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v8i8$")>;
661def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIC|ORR)(v2i32|v4i16)$")>;
662def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^NEG(v1i64|v2i32|v4i16|v8i8)$")>;
663def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^SUB(v1i64|v2i32|v4i16|v8i8)$")>;
664
665def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v2i32|v4i16|v8i8)(_v.*)?$")>;
666def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHLv1i64$")>;
667def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHR(v2i32|v4i16|v8i8)_shift$")>;
668def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(S|U)SHRd$")>;
669def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS|ADDP|CM(EQ|GE|HS|GT|HI))(v1i64|v2i32|v4i16|v8i8)$")>;
670def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v1i64|v2i32|v4i16|v8i8)$")>;
671def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v1i64|v2i32|v4i16|v8i8)rz$")>;
672def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^CMTST(v1i64|v2i32|v4i16|v8i8)$")>;
673def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs PMULv8i8)>;
674def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHL(v2i32|v4i16|v8i8)_shift$")>;
675def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^SHLd$")>;
676
677def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v2i32|v4i16|v8i8)$")>;
678def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)R?SRA(d|(v2i32|v4i16|v8i8)_shift)$")>;
679def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(ABD|ADALP)(v8i8|v4i16|v2i32)(_v.*)?$")>;
680def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i16v$")>;
681def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
682def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSHLU?(d|s|h|b|(v8i8|v4i16|v2i32)_shift)$")>;
683def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL)(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
684def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(s|h|b)$")>;
685def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)QSUB(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
686def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RHADD(v2i32|v4i16|v8i8)$")>;
687def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i32|v4i16|v8i8)_shift$")>;
688def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)RSHRd$")>;
689def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^R?SHRN(v2i32|v4i16|v8i8)_shift$")>;
690def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(SU|US)QADD(v1i8|v1i16|v2i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
691def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(S|U)?(MAX|MIN)V(v4i16v|v4i32v)$")>;
692def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs ADDVv4i16v)>;
693def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^S(L|R)I(d|(v8i8|v4i16|v2i32)_shift)$")>;
694def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQABS(v1i8|v1i16|v1i32|v1i64|v2i32|v4i16|v8i8)$")>;
695def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^SQNEG(v1i8|v1i16|v1i32|v1i64)$")>;
696
697def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)ADDLVv8i8v$")>;
698def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v)$")>;
699def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs ADDVv8i8v)>;
700def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
701                                      (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
702def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
703                                      (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
704def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc],
705                                      (instregex "^SQDMULL(i16|i32)$")>;
706def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
707                                      (instregex "^SQRDML(A|S)H(i16|i32|v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
708
709def : InstRW<[FalkorWr_1VXVY_5cyc],   (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
710
711def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs ADDVv4i32v)>;
712
713def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs ADDVv8i16v)>;
714def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(ADD|SUB)HNv.*$")>;
715def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)ABA(v2i32|v4i16|v8i8)$")>;
716
717def : InstRW<[FalkorWr_2VXVY_5cyc],   (instrs ADDVv16i8v)>;
718
719def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^(SQR?SHRN|UQR?SHRN|SQR?SHRUN)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32)_shift?$")>;
720def : InstRW<[FalkorWr_2VXVY_6cyc],   (instregex "^R(ADD|SUB)HNv.*$")>;
721
722def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^ADD(v16i8|v8i16|v4i32|v2i64)$")>;
723def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs ADDPv2i64)>; // sz==11
724def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(AND|ORR|ORN|BIC|EOR)v16i8$")>;
725def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIC|ORR)(v8i16|v4i32)$")>;
726def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(NEG|SUB)(v16i8|v8i16|v4i32|v2i64)$")>;
727
728def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)ADDLv.*$")>;
729def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)(ADDLP|HADD|HSUB|SHL)(v16i8|v2i64|v4i32|v8i16)(_v.*)?$")>;
730def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
731def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SHR(v16i8|v8i16|v4i32|v2i64)_shift$")>;
732def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(S|U)SUBLv.*$")>;
733def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^((S|U)?(MAX|MIN)P?|ABS)(v16i8|v2i64|v4i32|v8i16)$")>;
734def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^ADDP(v4i32|v8i16|v16i8)$")>; // sz!=11
735def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|GE|HS|GT|HI)(v16i8|v2i64|v4i32|v8i16)$")>;
736def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^CM(EQ|LE|GE|GT|LT)(v16i8|v2i64|v4i32|v8i16)rz$")>;
737def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CMTST|PMUL)(v16i8|v2i64|v4i32|v8i16)$")>;
738def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^PMULL(v8i8|v16i8)$")>;
739def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHL(v16i8|v8i16|v4i32|v2i64)_shift$")>;
740def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^SHLL(v16i8|v8i16|v4i32|v8i8|v4i16|v2i32)(_shift)?$")>;
741
742def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)R?SRA(v2i64|v4i32|v8i16|v16i8)_shift$")>;
743def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABD(v16i8|v8i16|v4i32|v2i64)$")>;
744def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)ABDLv.*$")>;
745def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(ADALP|QADD)(v16i8|v8i16|v4i32|v2i64)(_v.*)?$")>;
746def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)QSHLU?(v2i64|v4i32|v8i16|v16i8)_shift$")>;
747def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)(QSHL|RSHL|QRSHL|QSUB|RHADD)(v16i8|v8i16|v4i32|v2i64)$")>;
748def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(S|U)RSHR(v2i64|v4i32|v8i16|v16i8)_shift$")>;
749def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^R?SHRN(v2i64|v4i32|v8i16|v16i8)_shift$")>;
750def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^(SU|US)QADD(v16i8|v8i16|v4i32|v2i64)$")>;
751def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^PMULL(v1i64|v2i64)$")>;
752def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i64)_shift$")>;
753def : InstRW<[FalkorWr_2VXVY_3cyc],   (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
754
755def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
756                                      (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
757def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
758                                      (instregex "^SQDMULLv.*$")>;
759def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
760                                      (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
761
762def : InstRW<[FalkorWr_3VXVY_3cyc],   (instregex "^(S|U)ADDLVv4i32v$")>;
763
764def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^(S|U)ADDLVv8i16v$")>;
765
766def : InstRW<[FalkorWr_3VXVY_6cyc],   (instregex "^(S|U)ADDLVv16i8v$")>;
767
768def : InstRW<[FalkorWr_4VXVY_2cyc],   (instregex "^(S|U)(ADD|SUB)Wv.*$")>;
769
770def : InstRW<[FalkorWr_4VXVY_3cyc],   (instregex "^(S|U)ABALv.*$")>;
771
772def : InstRW<[FalkorWr_4VXVY_4cyc],   (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
773
774def : InstRW<[FalkorWr_VMUL32_1VXVY_4cyc, FalkorReadVMA],
775                                      (instregex "^SQD(MLAL|MLSL)(i16|i32|v1i32_indexed|v1i64_indexed)$")>;
776def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
777                                      (instregex "^SQD(MLAL|MLSL)v[248].*$")>;
778
779// SIMD Load Instructions
780// -----------------------------------------------------------------------------
781def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
782def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
783                                                         (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))_POST$")>;
784def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
785def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
786                                                         (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
787def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],       (instrs LD2i64)>;
788def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
789                                                         (instrs LD2i64_POST)>;
790
791def : InstRW<[FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd], (instregex "^LD1i(8|16|32)$")>;
792def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1VXVY_4cyc, FalkorReadIncLd],
793                                                         (instregex "^LD1i(8|16|32)_POST$")>;
794
795def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Twov(8b|4h|2s|1d)$")>;
796def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
797                                                         (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>;
798def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Twov(8b|4h|2s)$")>;
799def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
800                                                         (instregex "^LD2Twov(8b|4h|2s)_POST$")>;
801def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD2Rv(8b|4h|2s|1d)$")>;
802def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorReadIncLd],
803                                                         (instregex "^LD2Rv(8b|4h|2s|1d)_POST$")>;
804
805def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Twov(16b|8h|4s|2d)$")>;
806def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
807                                                         (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>;
808def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD2Twov(16b|8h|4s|2d)$")>;
809def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
810                                                         (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>;
811def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instregex "^LD2Rv(16b|8h|4s|2d)$")>;
812def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
813                                                         (instregex "^LD2Rv(16b|8h|4s|2d)_POST$")>;
814def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instrs LD3i64)>;
815def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
816                                                         (instrs LD3i64_POST)>;
817def : InstRW<[FalkorWr_2LD_3cyc, FalkorReadIncLd],       (instrs LD4i64)>;
818def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorReadIncLd],
819                                                         (instrs LD4i64_POST)>;
820
821def : InstRW<[FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd], (instregex "^LD2i(8|16|32)$")>;
822def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_2VXVY_4cyc, FalkorReadIncLd],
823                                                         (instregex "^LD2i(8|16|32)_POST$")>;
824
825def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD1Threev(8b|4h|2s|1d)$")>;
826def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
827                                                         (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>;
828def : InstRW<[FalkorWr_2LD_1none_3cyc, FalkorReadIncLd], (instregex "^LD3Rv(8b|4h|2s|1d)$")>;
829def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_1none_3cyc, FalkorReadIncLd],
830                                                         (instregex "^LD3Rv(8b|4h|2s|1d)_POST$")>;
831
832def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Threev(16b|8h|4s|2d)$")>;
833def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
834                                                         (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>;
835def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instrs LD3Threev2d)>;
836def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
837                                                         (instrs LD3Threev2d_POST)>;
838def : InstRW<[FalkorWr_3LD_3cyc, FalkorReadIncLd],       (instregex "^LD3Rv(16b|8h|4s|2d)$")>;
839def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_3LD_3cyc, FalkorReadIncLd],
840                                                         (instregex "^LD3Rv(16b|8h|4s|2d)_POST$")>;
841
842def : InstRW<[FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd], (instregex "^LD3i(8|16|32)$")>;
843def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3VXVY_4cyc, FalkorReadIncLd],
844                                                         (instregex "^LD3i(8|16|32)_POST$")>;
845
846def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>;
847def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
848                                                         (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>;
849def : InstRW<[FalkorWr_2LD_2none_3cyc, FalkorReadIncLd], (instregex "^LD4Rv(8b|4h|2s|1d)$")>;
850def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2none_3cyc, FalkorReadIncLd],
851                                                         (instregex "^LD4Rv(8b|4h|2s|1d)_POST$")>;
852
853def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instregex "^LD1Fourv(16b|8h|4s|2d)$")>;
854def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
855                                                         (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>;
856def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instrs LD4Fourv2d)>;
857def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
858                                                         (instrs LD4Fourv2d_POST)>;
859def : InstRW<[FalkorWr_4LD_3cyc, FalkorReadIncLd],       (instregex "^LD4Rv(16b|8h|4s|2d)$")>;
860def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_4LD_3cyc, FalkorReadIncLd],
861                                                         (instregex "^LD4Rv(16b|8h|4s|2d)_POST$")>;
862
863def : InstRW<[FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd], (instregex "^LD4i(8|16|32)$")>;
864def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4VXVY_4cyc, FalkorReadIncLd],
865                                                         (instregex "^LD4i(8|16|32)_POST$")>;
866
867def : InstRW<[FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
868                                                         (instregex "^LD3Threev(8b|4h|2s)$")>;
869def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1none_4cyc, FalkorReadIncLd],
870                                                         (instregex "^LD3Threev(8b|4h|2s)_POST$")>;
871
872def : InstRW<[FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
873                                                         (instregex "^LD4Fourv(8b|4h|2s)$")>;
874def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2none_4cyc, FalkorReadIncLd],
875                                                         (instregex "^LD4Fourv(8b|4h|2s)_POST$")>;
876
877def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
878                                                         (instregex "^LD3Threev(16b|8h|4s)$")>;
879
880def : InstRW<[FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc, FalkorReadIncLd],
881                                                         (instregex "^LD4Fourv(16b|8h|4s)$")>;
882
883def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc, FalkorReadIncLd],
884                                                         (instregex "^LD3Threev(16b|8h|4s)_POST$")>;
885
886def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, FalkorReadIncLd],
887                                                         (instregex "^LD4Fourv(16b|8h|4s)_POST$")>;
888
889// Arithmetic and Logical Instructions
890// -----------------------------------------------------------------------------
891def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CCMN|CCMP)(W|X)(r|i)$")>;
892def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADC(S)?(W|X)r$")>;
893def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ADD(S)?(W|X)r(r|i)$")>;
894def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(CSEL|CSINC|CSINV|CSNEG)(W|X)r$")>;
895def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^AND(S)?(W|X)r(i|r|s)$")>;
896def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^BIC(S)?(W|X)r(r|s)$")>;
897def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EON(W|X)r(r|s)$")>;
898def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^EOR(W|X)r(i|r|s)$")>;
899def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORN(W|X)r(r|s)$")>;
900def : InstRW<[FalkorWr_ORRi],         (instregex "^ORR(W|X)ri$")>;
901def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^ORR(W|X)r(r|s)$")>;
902def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SBC(S)?(W|X)r$")>;
903def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^SUB(S)?(W|X)r(r|i)$")>;
904def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
905def : InstRW<[FalkorWr_ADDSUBsx],     (instregex "^SUB(S)?(W|X)r(s|x|x64)$")>;
906
907// SIMD Miscellaneous Instructions
908// -----------------------------------------------------------------------------
909def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
910def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
911def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^DUP(i8|i16|i32|i64)$")>;
912def : InstRW<[FalkorWr_1GTOV_1cyc],   (instregex "^INSv(i8|i16)(gpr|lane)$")>;
913def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^(S|U)MOVv.*$")>;
914def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v8i8$")>;
915def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs EXTv8i8)>;
916def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
917def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs TBLv8i8One)>;
918def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs NOTv8i8)>;
919def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^REV(16|32|64)v.*$")>;
920def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
921
922def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v2i32|v4i16|v8i8)$")>;
923
924def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "(S|U)QXTU?Nv.*$")>;
925def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
926def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FRECPXv1i32, FRECPXv1i64)>;
927def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs URECPEv2i32, URSQRTEv2i32)>;
928
929def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
930                                      (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
931
932def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
933                                      (instrs FRECPS64, FRSQRTS64)>;
934
935def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
936                                      (instregex "^INSv(i32|i64)(gpr|lane)$")>;
937def : InstRW<[FalkorWr_2GTOV_1cyc],   (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
938def : InstRW<[FalkorWr_2VXVY_1cyc],   (instregex "^(BIF|BIT|BSL|BSP)v16i8$")>;
939def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs EXTv16i8)>;
940def : InstRW<[FalkorWr_2VXVY_0cyc],   (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
941def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs NOTv16i8)>;
942def : InstRW<[FalkorWr_2VXVY_1cyc],   (instrs TBLv16i8One)>;
943
944def : InstRW<[FalkorWr_2VXVY_2cyc],   (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
945def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
946def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs URECPEv4i32, URSQRTEv4i32)>;
947
948def : InstRW<[FalkorWr_2VXVY_4cyc],   (instrs TBLv8i8Two)>;
949def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^TBX(v8|v16)i8One$")>;
950
951def : InstRW<[FalkorWr_FMUL32_2VXVY_5cyc],
952                                      (instrs FRECPSv4f32, FRSQRTSv4f32)>;
953
954def : InstRW<[FalkorWr_FMUL64_2VXVY_6cyc],
955                                      (instrs FRECPSv2f64, FRSQRTSv2f64)>;
956
957def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBL(v8i8Three|v16i8Two)$")>;
958def : InstRW<[FalkorWr_3VXVY_5cyc],   (instregex "^TBX(v8i8Two|v16i8Two)$")>;
959
960def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBL(v8i8Four|v16i8Three)$")>;
961def : InstRW<[FalkorWr_4VXVY_6cyc],   (instregex "^TBX(v8i8Three|v16i8Three)$")>;
962
963def : InstRW<[FalkorWr_5VXVY_7cyc],   (instrs TBLv16i8Four)>;
964def : InstRW<[FalkorWr_5VXVY_7cyc],   (instregex "^TBX(v8i8Four|v16i8Four)$")>;
965
966// SIMD Store Instructions
967// -----------------------------------------------------------------------------
968
969def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
970                                       (instregex "^STR(Q|D|S|H|B)ui$")>;
971def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
972                                       (instregex "^STR(Q|D|S|H|B)(post|pre)$")>;
973def : InstRW<[FalkorWr_STRVro, ReadDefault, FalkorReadIncSt],
974                                       (instregex "^STR(D|S|H|B)ro(W|X)$")>;
975def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
976                                       (instregex "^STPQi$")>;
977def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
978                                       (instregex "^STPQ(post|pre)$")>;
979def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
980                                       (instregex "^STP(D|S)(i)$")>;
981def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
982                                       (instregex "^STP(D|S)(post|pre)$")>;
983def : InstRW<[FalkorWr_STRQro, ReadDefault, FalkorReadIncSt],
984                                       (instregex "^STRQro(W|X)$")>;
985def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
986                                       (instregex "^STUR(Q|D|S|B|H)i$")>;
987def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
988                                       (instrs STNPDi, STNPSi)>;
989def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
990                                       (instrs STNPQi)>;
991
992def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
993                                       (instregex "^ST1(One(v8b|v4h|v2s|v1d)|(i8|i16|i32|i64)|One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))$")>;
994def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
995                                       (instregex "^ST1(One(v8b|v4h|v2s|v1d)_POST|(i8|i16|i32|i64)_POST)$")>;
996def : InstRW<[FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
997                                       (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))$")>;
998def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
999                                       (instregex "^ST1(One(v16b|v8h|v4s|v2d)|Two(v8b|v4h|v2s|v1d))_POST$")>;
1000def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VSD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1001                                       (instregex "^ST2(Two(v8b|v4h|v2s)|(i8|i16|i32|i64))_POST$")>;
1002
1003def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1004                                       (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))$")>;
1005def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1006                                       (instregex "^ST2Two(v16b|v8h|v4s|v2d)$")>;
1007def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1008                                       (instregex "^ST3(i8|i16|i32|i64)$")>;
1009def : InstRW<[FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1010                                       (instregex "^ST4(i8|i16|i32|i64)$")>;
1011// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1012def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1013                                       (instregex "^ST1(Two(v16b|v8h|v4s|v2d)|(Three|Four)(v8b|v4h|v2s|v1d))_POST$")>;
1014// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1015def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1016                                       (instregex "^ST2Two(v16b|v8h|v4s|v2d)_POST$")>;
1017// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1018def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1019                                       (instregex "^ST3(i8|i16|i32|i64)_POST$")>;
1020// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1021def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VSD_2ST_0cyc, ReadDefault, FalkorReadIncSt],
1022                                       (instregex "^ST4(i8|i16|i32|i64)_POST$")>;
1023
1024def : InstRW<[FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1025                                       (instregex "^ST3Three(v8b|v4h|v2s)$")>;
1026// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1027def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_1VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1028                                       (instregex "^ST3Three(v8b|v4h|v2s)_POST$")>;
1029
1030def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1031                                       (instregex "^ST1Three(v16b|v8h|v4s|v2d)$")>;
1032def : InstRW<[FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1033                                       (instrs ST3Threev2d)>;
1034// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1035def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1036                                       (instregex "^ST1Three(v16b|v8h|v4s|v2d)_POST$")>;
1037// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1038def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_3VSD_3ST_0cyc, ReadDefault, FalkorReadIncSt],
1039                                       (instrs ST3Threev2d_POST)>;
1040
1041def : InstRW<[FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1042                                       (instregex "^ST4Four(v8b|v4h|v2s)$")>;
1043// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1044def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_2ST_2VSD_0cyc, ReadDefault, FalkorReadIncSt],
1045                                       (instregex "^ST4Four(v8b|v4h|v2s)_POST$")>;
1046
1047def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1048                                       (instregex "^ST1Four(v16b|v8h|v4s|v2d)$")>;
1049def : InstRW<[FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1050                                       (instrs ST4Fourv2d)>;
1051// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1052def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1053                                       (instregex "^ST1Four(v16b|v8h|v4s|v2d)_POST$")>;
1054// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1055def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VSD_4ST_0cyc, ReadDefault, FalkorReadIncSt],
1056                                       (instrs ST4Fourv2d_POST)>;
1057
1058def : InstRW<[FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1059                                       (instregex "^ST3Three(v16b|v8h|v4s)$")>;
1060// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1061def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_2VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1062                                       (instregex "^ST3Three(v16b|v8h|v4s)_POST$")>;
1063
1064def : InstRW<[FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1065                                       (instregex "^ST4Four(v16b|v8h|v4s)$")>;
1066// FIXME: This is overly conservative in the imm POST case (no XYZ used in that case).
1067def : InstRW<[FalkorWr_1XYZ_1cyc, FalkorWr_4VXVY_4ST_4VSD_0cyc, ReadDefault, FalkorReadIncSt],
1068                                       (instregex "^ST4Four(v16b|v8h|v4s)_POST$")>;
1069
1070// Branch Instructions
1071// -----------------------------------------------------------------------------
1072def : InstRW<[FalkorWr_1none_0cyc],   (instrs B, TCRETURNdi)>;
1073def : InstRW<[FalkorWr_1Z_0cyc],      (instregex "^(BR|RET|(CBZ|CBNZ|TBZ|TBNZ)(W|X))$")>;
1074def : InstRW<[FalkorWr_1Z_0cyc],      (instrs RET_ReallyLR, TCRETURNri)>;
1075def : InstRW<[FalkorWr_1ZB_0cyc],     (instrs Bcc)>;
1076def : InstRW<[FalkorWr_1XYZB_0cyc],   (instrs BL)>;
1077def : InstRW<[FalkorWr_1Z_1XY_0cyc],  (instrs BLR)>;
1078
1079// Cryptography Extensions
1080// -----------------------------------------------------------------------------
1081def : InstRW<[FalkorWr_1VXVY_1cyc],   (instrs SHA1Hrr)>;
1082def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs AESIMCrr, AESMCrr)>;
1083def : InstRW<[FalkorWr_2VXVY_3cyc],   (instrs AESDrr, AESErr)>;
1084def : InstRW<[FalkorWr_2VXVY_2cyc],   (instrs SHA1SU0rrr, SHA1SU1rr, SHA256SU0rr)>;
1085def : InstRW<[FalkorWr_1VX_1VY_4cyc], (instregex "^SHA1(C|M|P)rrr$")>;
1086def : InstRW<[FalkorWr_1VX_1VY_5cyc], (instrs SHA256H2rrr, SHA256Hrrr)>;
1087def : InstRW<[FalkorWr_4VXVY_3cyc],   (instrs SHA256SU1rrr)>;
1088
1089// FP Load Instructions
1090// -----------------------------------------------------------------------------
1091def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1092                                      (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
1093def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
1094                                      (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
1095def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1096                                      (instregex "^LDUR(Q|D|S|H|B)i$")>;
1097def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
1098                                      (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
1099def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1100                                      (instrs LDNPQi)>;
1101def : InstRW<[FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1102                                      (instrs LDPQi)>;
1103def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1104                                      (instregex "LDNP(D|S)i$")>;
1105def : InstRW<[FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1106                                      (instregex "LDP(D|S)i$")>;
1107def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_1none_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1108                                      (instregex "LDP(D|S)(pre|post)$")>;
1109def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_2LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1110                                      (instregex "^LDPQ(pre|post)$")>;
1111
1112// FP Data Processing Instructions
1113// -----------------------------------------------------------------------------
1114def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCCMP(E)?(S|D)rr$")>;
1115def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCMP(E)?(S|D)r(r|i)$")>;
1116def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVT(A|M|N|P|Z)(S|U)U(W|X)(S|D)r$")>;
1117def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^(FABS|FNEG)(S|D)r$")>;
1118def : InstRW<[FalkorWr_1VXVY_1cyc],   (instregex "^FCSEL(S|D)rrr$")>;
1119
1120def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?(S|D)rr$")>;
1121def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^F(MAX|MIN)(NM)?Pv2i(32|64)p$")>;
1122def : InstRW<[FalkorWr_1VXVY_2cyc],   (instrs FCVTSHr, FCVTDHr)>;
1123def : InstRW<[FalkorWr_1VXVY_2cyc],   (instregex "^FRINT(A|I|M|N|P|X|Z)(S|D)r$")>;
1124
1125def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^FABD(32|64)$")>;
1126def : InstRW<[FalkorWr_1VXVY_3cyc],   (instregex "^(FADD|FSUB)(S|D)rr$")>;
1127def : InstRW<[FalkorWr_1VXVY_3cyc],   (instrs FCVTHSr, FCVTHDr)>;
1128
1129def : InstRW<[FalkorWr_1VXVY_4cyc],   (instrs FCVTSDr, FCVTDSr)>;
1130
1131def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc],
1132                                      (instregex "^F(N)?MULSrr$")>;
1133
1134def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc],
1135                                      (instregex "^F(N)?MULDrr$")>;
1136
1137def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instrs FDIVSrr)>;
1138def : InstRW<[FalkorWr_1VX_1VY_14cyc],(instrs FDIVDrr)>;
1139def : InstRW<[FalkorWr_1VX_1VY_12cyc],(instrs FSQRTSr)>;
1140def : InstRW<[FalkorWr_1VX_1VY_21cyc],(instrs FSQRTDr)>;
1141
1142def : InstRW<[FalkorWr_FMUL32_1VXVY_5cyc, ReadDefault, ReadDefault, FalkorReadFMA32],
1143                                      (instregex "^F(N)?M(ADD|SUB)Srrr$")>;
1144def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFMA64],
1145                                      (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
1146
1147// FP Miscellaneous Instructions
1148// -----------------------------------------------------------------------------
1149def : InstRW<[FalkorWr_FMOV],         (instregex "^FMOV(WS|XD|XDHigh)r$")>;
1150def : InstRW<[FalkorWr_1GTOV_0cyc],   (instregex "^FMOV(S|D)i$")>; // imm fwd
1151def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
1152def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FCVTZ(S|U)(d|s)$")>;
1153def : InstRW<[FalkorWr_1VTOG_1cyc],   (instregex "^FMOV(SW|DX|DXHigh)r$")>;
1154def : InstRW<[FalkorWr_1VXVY_0cyc],   (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
1155// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
1156def : InstRW<[FalkorWr_2VXVY_0cyc],   (instrs FMOVD0, FMOVS0)>; // imm fwd
1157
1158def : InstRW<[FalkorWr_1GTOV_4cyc],   (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
1159def : InstRW<[FalkorWr_1VXVY_4cyc],   (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
1160
1161def : InstRW<[FalkorWr_2VXVY_4cyc],   (instregex "^(S|U)CVTF(v2i64|v4i32|v2f64|v4f32)(_shift)?")>;
1162
1163// Load Instructions
1164// -----------------------------------------------------------------------------
1165def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFMui, PRFMl)>;
1166def : InstRW<[FalkorWr_1ST_0cyc],     (instrs PRFUMi)>;
1167def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1168                                      (instregex "^LDNP(W|X)i$")>;
1169def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1170                                      (instregex "^LDP(W|X)i$")>;
1171def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1172                                      (instregex "^LDP(W|X)(post|pre)$")>;
1173def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1174                                      (instregex "^LDR(BB|HH|W|X)ui$")>;
1175def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_3cyc, FalkorReadIncLd],
1176                                      (instregex "^LDR(BB|HH|W|X)(post|pre)$")>;
1177def : InstRW<[FalkorWr_LDRro, FalkorReadIncLd],
1178                                      (instregex "^LDR(BB|HH|W|X)ro(W|X)$")>;
1179def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1180                                      (instregex "^LDR(W|X)l$")>;
1181def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1182                                      (instregex "^LDTR(B|H|W|X)i$")>;
1183def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1184                                      (instregex "^LDUR(BB|HH|W|X)i$")>;
1185def : InstRW<[FalkorWr_PRFMro],       (instregex "^PRFMro(W|X)$")>;
1186def : InstRW<[FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
1187                                      (instrs LDPSWi)>;
1188def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorWr_none_4cyc, FalkorReadIncLd],
1189                                      (instregex "^LDPSW(post|pre)$")>;
1190def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1191                                      (instregex "^LDRS(BW|BX|HW|HX|W)ui$")>;
1192def : InstRW<[FalkorWr_LdInc_none_2cyc, FalkorWr_1LD_4cyc, FalkorReadIncLd],
1193                                      (instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
1194def : InstRW<[FalkorWr_LDRSro, FalkorReadIncLd],
1195                                      (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
1196def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1197                                      (instrs LDRSWl)>;
1198def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1199                                      (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
1200def : InstRW<[FalkorWr_1LD_4cyc, FalkorReadIncLd],
1201                                      (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
1202
1203// Miscellaneous Data-Processing Instructions
1204// -----------------------------------------------------------------------------
1205def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(S|U)?BFM(W|X)ri$")>;
1206def : InstRW<[FalkorWr_1X_2cyc],      (instregex "^CRC32.*$")>;
1207def : InstRW<[FalkorWr_1XYZ_2cyc],    (instregex "^(CLS|CLZ|RBIT|REV|REV16|REV32)(W|X)r$")>;
1208def : InstRW<[FalkorWr_2XYZ_2cyc],    (instregex "^EXTR(W|X)rri$")>;
1209
1210// Divide and Multiply Instructions
1211// -----------------------------------------------------------------------------
1212def : InstRW<[FalkorWr_IMUL64_1X_4cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1213                                        (instregex "^(S|U)M(ADD|SUB)Lrrr$")>;
1214def : InstRW<[FalkorWr_IMUL32_1X_2cyc, ReadDefault, ReadDefault, FalkorReadIMA32],
1215                                        (instregex "^M(ADD|SUB)Wrrr$")>;
1216
1217def : InstRW<[FalkorWr_IMUL64_1X_5cyc], (instregex "^(S|U)MULHrr$")>;
1218def : InstRW<[FalkorWr_IMUL64_1X_5cyc, ReadDefault, ReadDefault, FalkorReadIMA64],
1219                                        (instregex "^M(ADD|SUB)Xrrr$")>;
1220
1221def : InstRW<[FalkorWr_1X_1Z_8cyc],     (instregex "^(S|U)DIVWr$")>;
1222def : InstRW<[FalkorWr_1X_1Z_11cyc],    (instregex "^(S|U)DIVXr$")>;
1223
1224def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc],
1225                                        (instregex "^(S|U)MULLv.*$")>;
1226def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
1227                                        (instregex "^(S|U)(MLAL|MLSL)v.*$")>;
1228
1229// Move and Shift Instructions
1230// -----------------------------------------------------------------------------
1231def : InstRW<[FalkorWr_1XYZ_1cyc],    (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
1232def : InstRW<[FalkorWr_1XYZ_0cyc],    (instregex "^MOVK(W|X)i$")>; // imm fwd
1233def : InstRW<[FalkorWr_1XYZB_0cyc],   (instregex "^ADRP?$")>; // imm fwd
1234def : InstRW<[FalkorWr_1XYZB_0cyc],   (instregex "^MOVN(W|X)i$")>; // imm fwd
1235def : InstRW<[FalkorWr_MOVZ],         (instregex "^MOVZ(W|X)i$")>;
1236def : InstRW<[FalkorWr_1XYZ_0cyc],    (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
1237def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
1238                                      (instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
1239def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
1240                                      (instrs LOADgot)>;
1241
1242// Other Instructions
1243// -----------------------------------------------------------------------------
1244def : InstRW<[FalkorWr_1LD_0cyc],     (instrs CLREX, DMB, DSB)>;
1245def : InstRW<[FalkorWr_1none_0cyc],   (instrs BRK, DCPS1, DCPS2, DCPS3, HINT, HLT, HVC, ISB, SMC, SVC)>;
1246def : InstRW<[FalkorWr_1ST_0cyc],     (instrs SYSxt, SYSLxt)>;
1247def : InstRW<[FalkorWr_1Z_0cyc],      (instrs MSRpstateImm1, MSRpstateImm4)>;
1248
1249def : InstRW<[FalkorWr_1LD_3cyc, FalkorReadIncLd],
1250                                      (instregex "^(LDAR(B|H|W|X)|LDAXR(B|H|W|X)|LDXR(B|H|W|X))$")>;
1251def : InstRW<[FalkorWr_1LD_3cyc, FalkorWr_none_3cyc, FalkorReadIncLd],
1252                                      (instregex "^(LDAXP(W|X)|LDXP(W|X))$")>;
1253def : InstRW<[FalkorWr_1LD_3cyc],     (instrs MRS, MOVbaseTLS)>;
1254
1255def : InstRW<[FalkorWr_1LD_1Z_3cyc],  (instrs DRPS)>;
1256
1257def : InstRW<[FalkorWr_1SD_1ST_0cyc], (instrs MSR)>;
1258def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1259                                      (instrs STNPWi, STNPXi)>;
1260def : InstRW<[FalkorWr_2LD_1Z_3cyc],  (instrs ERET)>;
1261
1262
1263def : InstRW<[FalkorWr_1ST_1SD_1LD_3cyc], (instregex "^LDCLR(A|AL|L)?(B|H)?$")>;
1264def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, FalkorReadIncSt],
1265                                      (instregex "^STLR(B|H|W|X)$")>;
1266def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1267                                      (instregex "^STXP(W|X)$")>;
1268def : InstRW<[FalkorWr_1ST_1SD_1LD_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1269                                      (instregex "^STXR(B|H|W|X)$")>;
1270
1271def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, ReadDefault, FalkorReadIncSt],
1272                                      (instregex "^STLXP(W|X)$")>;
1273def : InstRW<[FalkorWr_2LD_1ST_1SD_3cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1274                                      (instregex "^STLXR(B|H|W|X)$")>;
1275
1276// Store Instructions
1277// -----------------------------------------------------------------------------
1278def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1279                                          (instregex "^STP(W|X)i$")>;
1280def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, ReadDefault, FalkorReadIncSt],
1281                                          (instregex "^STP(W|X)(post|pre)$")>;
1282def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1283                                          (instregex "^STR(BB|HH|W|X)ui$")>;
1284def : InstRW<[FalkorWr_StInc_none_2cyc, FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1285                                          (instregex "^STR(BB|HH|W|X)(post|pre)$")>;
1286def : InstRW<[FalkorWr_STRro, ReadDefault, FalkorReadIncSt],
1287                                          (instregex "^STR(BB|HH|W|X)ro(W|X)$")>;
1288def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1289                                          (instregex "^STTR(B|H|W|X)i$")>;
1290def : InstRW<[FalkorWr_1SD_1ST_0cyc, ReadDefault, FalkorReadIncSt],
1291                                          (instregex "^STUR(BB|HH|W|X)i$")>;
1292
1293