xref: /freebsd/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64SchedA55.td (revision a3266ba2697a383d2ede56803320d941866c7e76)
1//==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for the ARM Cortex-A55 processors.
10//
11//===----------------------------------------------------------------------===//
12
13// ===---------------------------------------------------------------------===//
14// The following definitions describe the per-operand machine model.
15// This works with MachineScheduler. See MCSchedModel.h for details.
16
17// Cortex-A55 machine model for scheduling and other instruction cost heuristics.
18def CortexA55Model : SchedMachineModel {
19  let MicroOpBufferSize = 0;  // The Cortex-A55 is an in-order processor
20  let IssueWidth = 2;         // It dual-issues under most circumstances
21  let LoadLatency = 4;        // Cycles for loads to access the cache. The
22                              // optimisation guide shows that most loads have
23                              // a latency of 3, but some have a latency of 4
24                              // or 5. Setting it 4 looked to be good trade-off.
25  let MispredictPenalty = 8;  // A branch direction mispredict.
26  let PostRAScheduler = 1;    // Enable PostRA scheduler pass.
27  let CompleteModel = 0;      // Covers instructions applicable to Cortex-A55.
28
29  list<Predicate> UnsupportedFeatures = [HasSVE];
30
31  // FIXME: Remove when all errors have been fixed.
32  let FullInstRWOverlapCheck = 0;
33}
34
35//===----------------------------------------------------------------------===//
36// Define each kind of processor resource and number available.
37
38// Modeling each pipeline as a ProcResource using the BufferSize = 0 since the
39// Cortex-A55 is in-order.
40
41def CortexA55UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
42def CortexA55UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
43def CortexA55UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division, not pipelined
44def CortexA55UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load pipe
45def CortexA55UnitSt     : ProcResource<1> { let BufferSize = 0; } // Store pipe
46def CortexA55UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
47
48// The FP DIV/SQRT instructions execute totally differently from the FP ALU
49// instructions, which can mostly be dual-issued; that's why for now we model
50// them with 2 resources.
51def CortexA55UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
52def CortexA55UnitFPMAC  : ProcResource<2> { let BufferSize = 0; } // FP MAC
53def CortexA55UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP Div/SQRT, 64/128
54
55//===----------------------------------------------------------------------===//
56// Subtarget-specific SchedWrite types
57
58let SchedModel = CortexA55Model in {
59
60// These latencies are modeled without taking into account forwarding paths
61// (the software optimisation guide lists latencies taking into account
62// typical forwarding paths).
63def : WriteRes<WriteImm, [CortexA55UnitALU]> { let Latency = 3; }    // MOVN, MOVZ
64def : WriteRes<WriteI, [CortexA55UnitALU]> { let Latency = 3; }      // ALU
65def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Shifted-Reg
66def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; }  // ALU of Extended-Reg
67def : WriteRes<WriteExtr, [CortexA55UnitALU]> { let Latency = 3; }   // EXTR from a reg pair
68def : WriteRes<WriteIS, [CortexA55UnitALU]> { let Latency = 3; }     // Shift/Scale
69
70// MAC
71def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; }   // 32-bit Multiply
72def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; }   // 64-bit Multiply
73
74// Div
75def : WriteRes<WriteID32, [CortexA55UnitDiv]> {
76  let Latency = 8; let ResourceCycles = [8];
77}
78def : WriteRes<WriteID64, [CortexA55UnitDiv]> {
79  let Latency = 8; let ResourceCycles = [8];
80}
81
82// Load
83def : WriteRes<WriteLD, [CortexA55UnitLd]> { let Latency = 3; }
84def : WriteRes<WriteLDIdx, [CortexA55UnitLd]> { let Latency = 4; }
85def : WriteRes<WriteLDHi, [CortexA55UnitLd]> { let Latency = 5; }
86
87// Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
88//               below, choosing the median of 3 which makes the latency 6.
89// An extra cycle is needed to get the swizzling right.
90def : WriteRes<WriteVLD, [CortexA55UnitLd]> { let Latency = 6;
91                                           let ResourceCycles = [3]; }
92def CortexA55WriteVLD1 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 4; }
93def CortexA55WriteVLD2 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 5;
94                                                  let ResourceCycles = [2]; }
95def CortexA55WriteVLD3 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 6;
96                                                  let ResourceCycles = [3]; }
97def CortexA55WriteVLD4 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 7;
98                                                  let ResourceCycles = [4]; }
99def CortexA55WriteVLD5 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 8;
100                                                  let ResourceCycles = [5]; }
101def CortexA55WriteVLD6 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 9;
102                                                  let ResourceCycles = [6]; }
103def CortexA55WriteVLD7 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 10;
104                                                  let ResourceCycles = [7]; }
105def CortexA55WriteVLD8 : SchedWriteRes<[CortexA55UnitLd]> { let Latency = 11;
106                                                  let ResourceCycles = [8]; }
107
108// Pre/Post Indexing - Performed as part of address generation
109def : WriteRes<WriteAdr, []> { let Latency = 0; }
110
111// Store
112def : WriteRes<WriteST, [CortexA55UnitSt]> { let Latency = 4; }
113def : WriteRes<WriteSTP, [CortexA55UnitSt]> { let Latency = 4; }
114def : WriteRes<WriteSTIdx, [CortexA55UnitSt]> { let Latency = 4; }
115def : WriteRes<WriteSTX, [CortexA55UnitSt]> { let Latency = 4; }
116
117// Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
118def : WriteRes<WriteVST, [CortexA55UnitSt]> { let Latency = 5;
119                                          let ResourceCycles = [2];}
120def CortexA55WriteVST1 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 4; }
121def CortexA55WriteVST2 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
122                                                  let ResourceCycles = [2]; }
123def CortexA55WriteVST3 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 6;
124                                                  let ResourceCycles = [3]; }
125def CortexA55WriteVST4 : SchedWriteRes<[CortexA55UnitSt]> { let Latency = 5;
126                                                  let ResourceCycles = [4]; }
127
128def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
129
130// Branch
131def : WriteRes<WriteBr, [CortexA55UnitB]>;
132def : WriteRes<WriteBrReg, [CortexA55UnitB]>;
133def : WriteRes<WriteSys, [CortexA55UnitB]>;
134def : WriteRes<WriteBarrier, [CortexA55UnitB]>;
135def : WriteRes<WriteHint, [CortexA55UnitB]>;
136
137// FP ALU
138//   As WriteF result is produced in F5 and it can be mostly forwarded
139//   to consumer at F1, the effectively latency is set as 4.
140def : WriteRes<WriteF, [CortexA55UnitFPALU]> { let Latency = 4; }
141def : WriteRes<WriteFCmp, [CortexA55UnitFPALU]> { let Latency = 3; }
142def : WriteRes<WriteFCvt, [CortexA55UnitFPALU]> { let Latency = 4; }
143def : WriteRes<WriteFCopy, [CortexA55UnitFPALU]> { let Latency = 3; }
144def : WriteRes<WriteFImm, [CortexA55UnitFPALU]> { let Latency = 3; }
145def : WriteRes<WriteV, [CortexA55UnitFPALU]> { let Latency = 4; }
146
147// FP ALU specific new schedwrite definitions
148def CortexA55WriteFPALU_F3 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 3;}
149def CortexA55WriteFPALU_F4 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 4;}
150def CortexA55WriteFPALU_F5 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 5;}
151
152// FP Mul, Div, Sqrt. Div/Sqrt are not pipelined
153def : WriteRes<WriteFMul, [CortexA55UnitFPMAC]> { let Latency = 4; }
154def : WriteRes<WriteFDiv, [CortexA55UnitFPDIV]> { let Latency = 22;
155                                            let ResourceCycles = [29]; }
156def CortexA55WriteFMAC : SchedWriteRes<[CortexA55UnitFPMAC]> { let Latency = 4; }
157def CortexA55WriteFDivHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
158                                                     let ResourceCycles = [5]; }
159def CortexA55WriteFDivSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 13;
160                                                     let ResourceCycles = [10]; }
161def CortexA55WriteFDivDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
162                                                     let ResourceCycles = [19]; }
163def CortexA55WriteFSqrtHP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 8;
164                                                      let ResourceCycles = [5]; }
165def CortexA55WriteFSqrtSP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 12;
166                                                      let ResourceCycles = [9]; }
167def CortexA55WriteFSqrtDP : SchedWriteRes<[CortexA55UnitFPDIV]> { let Latency = 22;
168                                                      let ResourceCycles = [19]; }
169
170//===----------------------------------------------------------------------===//
171// Subtarget-specific SchedRead types.
172
173def : ReadAdvance<ReadVLD, 0>;
174def : ReadAdvance<ReadExtrHi, 1>;
175def : ReadAdvance<ReadAdrBase, 1>;
176
177// ALU - ALU input operands are generally needed in EX1. An operand produced in
178//       in say EX2 can be forwarded for consumption to ALU in EX1, thereby
179//       allowing back-to-back ALU operations such as add. If an operand requires
180//       a shift, it will, however, be required in ISS stage.
181def : ReadAdvance<ReadI, 2, [WriteImm,WriteI,
182                             WriteISReg, WriteIEReg,WriteIS,
183                             WriteID32,WriteID64,
184                             WriteIM32,WriteIM64]>;
185// Shifted operand
186def CortexA55ReadShifted : SchedReadAdvance<1, [WriteImm,WriteI,
187                                          WriteISReg, WriteIEReg,WriteIS,
188                                          WriteID32,WriteID64,
189                                          WriteIM32,WriteIM64]>;
190def CortexA55ReadNotShifted : SchedReadAdvance<2, [WriteImm,WriteI,
191                                             WriteISReg, WriteIEReg,WriteIS,
192                                             WriteID32,WriteID64,
193                                             WriteIM32,WriteIM64]>;
194def CortexA55ReadISReg : SchedReadVariant<[
195        SchedVar<RegShiftedPred, [CortexA55ReadShifted]>,
196        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
197def : SchedAlias<ReadISReg, CortexA55ReadISReg>;
198
199def CortexA55ReadIEReg : SchedReadVariant<[
200        SchedVar<RegExtendedPred, [CortexA55ReadShifted]>,
201        SchedVar<NoSchedPred, [CortexA55ReadNotShifted]>]>;
202def : SchedAlias<ReadIEReg, CortexA55ReadIEReg>;
203
204// MUL
205def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI,
206                              WriteISReg, WriteIEReg,WriteIS,
207                              WriteID32,WriteID64,
208                              WriteIM32,WriteIM64]>;
209def : ReadAdvance<ReadIMA, 2, [WriteImm,WriteI,
210                               WriteISReg, WriteIEReg,WriteIS,
211                               WriteID32,WriteID64,
212                               WriteIM32,WriteIM64]>;
213
214// Div
215def : ReadAdvance<ReadID, 1, [WriteImm,WriteI,
216                              WriteISReg, WriteIEReg,WriteIS,
217                              WriteID32,WriteID64,
218                              WriteIM32,WriteIM64]>;
219
220//===----------------------------------------------------------------------===//
221// Subtarget-specific InstRWs.
222
223//---
224// Miscellaneous
225//---
226def : InstRW<[CortexA55WriteVLD2,CortexA55WriteVLD1], (instregex "LDP.*")>;
227def : InstRW<[WriteI], (instrs COPY)>;
228//---
229// Vector Loads - 64-bit per cycle
230//---
231//   1-element structures
232def : InstRW<[CortexA55WriteVLD1], (instregex "LD1i(8|16|32|64)$")>;                // single element
233def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // replicate
234def : InstRW<[CortexA55WriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d)$")>;
235def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Onev(16b|8h|4s|2d)$")>;
236def : InstRW<[CortexA55WriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d)$")>; // multiple structures
237def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Twov(16b|8h|4s|2d)$")>;
238def : InstRW<[CortexA55WriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d)$")>;
239def : InstRW<[CortexA55WriteVLD6], (instregex "LD1Threev(16b|8h|4s|2d)$")>;
240def : InstRW<[CortexA55WriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d)$")>;
241def : InstRW<[CortexA55WriteVLD8], (instregex "LD1Fourv(16b|8h|4s|2d)$")>;
242
243def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>;
244def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
245def : InstRW<[CortexA55WriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>;
246def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>;
247def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>;
248def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>;
249def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>;
250def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>;
251def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>;
252def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>;
253
254//    2-element structures
255def : InstRW<[CortexA55WriteVLD2], (instregex "LD2i(8|16|32|64)$")>;
256def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
257def : InstRW<[CortexA55WriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>;
258def : InstRW<[CortexA55WriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>;
259
260def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>;
261def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>;
262def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>;
263def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>;
264
265//    3-element structures
266def : InstRW<[CortexA55WriteVLD2], (instregex "LD3i(8|16|32|64)$")>;
267def : InstRW<[CortexA55WriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
268def : InstRW<[CortexA55WriteVLD3], (instregex "LD3Threev(8b|4h|2s|1d)$")>;
269def : InstRW<[CortexA55WriteVLD6], (instregex "LD3Threev(16b|8h|4s|2d)$")>;
270
271def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>;
272def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
273def : InstRW<[CortexA55WriteVLD3, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d)_POST$")>;
274def : InstRW<[CortexA55WriteVLD6, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>;
275
276//    4-element structures
277def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>;                // load single 4-el structure to one lane of 4 regs.
278def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
279def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>;           // load multiple 4-el structures to 4 regs.
280def : InstRW<[CortexA55WriteVLD8], (instregex "LD4Fourv(16b|8h|4s|2d)$")>;
281
282def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>;
283def : InstRW<[CortexA55WriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
284def : InstRW<[CortexA55WriteVLD4, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d)_POST$")>;
285def : InstRW<[CortexA55WriteVLD8, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>;
286
287//---
288// Vector Stores
289//---
290def : InstRW<[CortexA55WriteVST1], (instregex "ST1i(8|16|32|64)$")>;
291def : InstRW<[CortexA55WriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
292def : InstRW<[CortexA55WriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
293def : InstRW<[CortexA55WriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
294def : InstRW<[CortexA55WriteVST4], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
295def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>;
296def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
297def : InstRW<[CortexA55WriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
298def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
299def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
300
301def : InstRW<[CortexA55WriteVST2], (instregex "ST2i(8|16|32|64)$")>;
302def : InstRW<[CortexA55WriteVST2], (instregex "ST2Twov(8b|4h|2s)$")>;
303def : InstRW<[CortexA55WriteVST4], (instregex "ST2Twov(16b|8h|4s|2d)$")>;
304def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>;
305def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>;
306def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>;
307
308def : InstRW<[CortexA55WriteVST2], (instregex "ST3i(8|16|32|64)$")>;
309def : InstRW<[CortexA55WriteVST4], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
310def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>;
311def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|2d|16b|8h|4s|4d)_POST$")>;
312
313def : InstRW<[CortexA55WriteVST2], (instregex "ST4i(8|16|32|64)$")>;
314def : InstRW<[CortexA55WriteVST4], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>;
315def : InstRW<[CortexA55WriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>;
316def : InstRW<[CortexA55WriteVST4, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>;
317
318//---
319// Floating Point Conversions, MAC, DIV, SQRT
320//---
321def : InstRW<[CortexA55WriteFPALU_F3], (instregex "^FCVT[ALMNPZ][SU](S|U)?(W|X)")>;
322def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^FCVT(X)?[ALMNPXZ](S|U|N)?v")>;
323
324def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(S|U)(W|X)(H|S|D)")>;
325def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTF(h|s|d)")>;
326def : InstRW<[CortexA55WriteFPALU_F4], (instregex "^(S|U)CVTFv")>;
327
328def : InstRW<[CortexA55WriteFMAC], (instregex "^FN?M(ADD|SUB).*")>;
329def : InstRW<[CortexA55WriteFMAC], (instregex "^FML(A|S).*")>;
330def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVHrr)>;
331def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVSrr)>;
332def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVDrr)>;
333def : InstRW<[CortexA55WriteFDivHP], (instregex "^FDIVv.*16$")>;
334def : InstRW<[CortexA55WriteFDivSP], (instregex "^FDIVv.*32$")>;
335def : InstRW<[CortexA55WriteFDivDP], (instregex "^FDIVv.*64$")>;
336def : InstRW<[CortexA55WriteFSqrtHP], (instregex "^.*SQRT.*16$")>;
337def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>;
338def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>;
339}
340