xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM85.td (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1//=- ARMScheduleM85.td - ARM Cortex-M85 Scheduling Definitions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the machine model for the ARM Cortex-M85 processor.
10//
11// All timing is referred to EX2.  Thus, operands which are needed at EX1 are
12// stated to have a ReadAdvance of -1.  The FP/MVE pipe actually begins at EX3
13// but is described as if it were in EX2 to avoid having unnaturally long latencies
14// with delayed inputs on every instruction.  Instead, whenever an FP instruction
15// must access a GP register or a non-FP instruction (which includes loads/stores)
16// must access an FP register, the operand timing is adjusted:
17//     FP accessing GPR:     read one cycle later, write one cycle later
18//                           NOTE: absolute spec timing already includes this if
19//                                 referenced to EX2
20//     non-FP accessing FPR: read one cycle earlier, write one cycle earlier
21//===----------------------------------------------------------------------===//
22
23def CortexM85Model : SchedMachineModel {
24  let IssueWidth = 2;        // Dual issue for most instructions.
25  let MicroOpBufferSize = 0; // M85 is in-order.
26  let LoadLatency = 2;       // Best case for load-use case.
27  let MispredictPenalty = 4; // Mispredict cost for forward branches is 7,
28                             // but 4 works better
29  let CompleteModel = 0;
30}
31
32let SchedModel = CortexM85Model in {
33
34//===--------------------------------------------------------------------===//
35// CortexM85 has two ALU, two LOAD, two STORE, a MAC, a BRANCH and two VFP
36// pipes (with three units).  There are three shifters available: one per
37// stage.
38
39def M85UnitLoadL  : ProcResource<1> { let BufferSize = 0; }
40def M85UnitLoadH  : ProcResource<1> { let BufferSize = 0; }
41def M85UnitLoad   : ProcResGroup<[M85UnitLoadL,M85UnitLoadH]> { let BufferSize = 0; }
42def M85UnitStoreL : ProcResource<1> { let BufferSize = 0; }
43def M85UnitStoreH : ProcResource<1> { let BufferSize = 0; }
44def M85UnitStore  : ProcResGroup<[M85UnitStoreL,M85UnitStoreH]> { let BufferSize = 0; }
45def M85UnitALU    : ProcResource<2> { let BufferSize = 0; }
46def M85UnitShift1 : ProcResource<1> { let BufferSize = 0; }
47def M85UnitShift2 : ProcResource<1> { let BufferSize = 0; }
48def M85UnitMAC    : ProcResource<1> { let BufferSize = 0; }
49def M85UnitBranch : ProcResource<1> { let BufferSize = 0; }
50def M85UnitVFPAL  : ProcResource<1> { let BufferSize = 0; }
51def M85UnitVFPAH  : ProcResource<1> { let BufferSize = 0; }
52def M85UnitVFPA   : ProcResGroup<[M85UnitVFPAL,M85UnitVFPAH]> { let BufferSize = 0; }
53def M85UnitVFPBL  : ProcResource<1> { let BufferSize = 0; }
54def M85UnitVFPBH  : ProcResource<1> { let BufferSize = 0; }
55def M85UnitVFPB   : ProcResGroup<[M85UnitVFPBL,M85UnitVFPBH]> { let BufferSize = 0; }
56def M85UnitVFPCL  : ProcResource<1> { let BufferSize = 0; }
57def M85UnitVFPCH  : ProcResource<1> { let BufferSize = 0; }
58def M85UnitVFPC   : ProcResGroup<[M85UnitVFPCL,M85UnitVFPCH]> { let BufferSize = 0; }
59def M85UnitVFPD   : ProcResource<1> { let BufferSize = 0; }
60def M85UnitVPortL : ProcResource<1> { let BufferSize = 0; }
61def M85UnitVPortH : ProcResource<1> { let BufferSize = 0; }
62def M85UnitVPort  : ProcResGroup<[M85UnitVPortL,M85UnitVPortH]> { let BufferSize = 0; }
63def M85UnitSIMD   : ProcResource<1> { let BufferSize = 0; }
64def M85UnitLShift : ProcResource<1> { let BufferSize = 0; }
65def M85UnitDiv    : ProcResource<1> { let BufferSize = 0; }
66
67def M85UnitSlot0 : ProcResource<1> { let BufferSize = 0; }
68
69//===---------------------------------------------------------------------===//
70// Subtarget-specific SchedWrite types with map ProcResources and set latency.
71
72def : WriteRes<WriteALU, [M85UnitALU]> { let Latency = 1; }
73
74// Basic ALU with shifts.
75let Latency = 1 in {
76  def : WriteRes<WriteALUsi,  [M85UnitALU, M85UnitShift1]>;
77  def : WriteRes<WriteALUsr,  [M85UnitALU, M85UnitShift1]>;
78  def : WriteRes<WriteALUSsr, [M85UnitALU, M85UnitShift1]>;
79}
80
81// Compares.
82def : WriteRes<WriteCMP,   [M85UnitALU]> { let Latency = 1; }
83def : WriteRes<WriteCMPsi, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
84def : WriteRes<WriteCMPsr, [M85UnitALU, M85UnitShift1]> { let Latency = 2; }
85
86// Multiplies.
87let Latency = 2 in {
88  def : WriteRes<WriteMUL16,   [M85UnitMAC]>;
89  def : WriteRes<WriteMUL32,   [M85UnitMAC]>;
90  def : WriteRes<WriteMUL64Lo, [M85UnitMAC]>;
91  def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; }
92}
93
94// Multiply-accumulates.
95let Latency = 2 in {
96def : WriteRes<WriteMAC16,   [M85UnitMAC]>;
97def : WriteRes<WriteMAC32,   [M85UnitMAC]>;
98def : WriteRes<WriteMAC64Lo, [M85UnitMAC]>;
99def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; }
100}
101
102// Divisions.
103def : WriteRes<WriteDIV, [M85UnitDiv]> {
104  let Latency = 7;
105}
106
107// Loads/Stores.
108def : WriteRes<WriteLd,    [M85UnitLoad]> { let Latency = 1; }
109def : WriteRes<WritePreLd, [M85UnitLoad]> { let Latency = 2; }
110def : WriteRes<WriteST,    [M85UnitStore]> { let Latency = 2; }
111def M85WriteLdWide : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH]> { let Latency = 1; }
112def M85WriteStWide : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH]> { let Latency = 2; }
113
114// Branches.
115def : WriteRes<WriteBr,    [M85UnitBranch]> { let Latency = 2; }
116def : WriteRes<WriteBrL,   [M85UnitBranch]> { let Latency = 2; }
117def : WriteRes<WriteBrTbl, [M85UnitBranch]> { let Latency = 2; }
118
119// Noop.
120def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
121
122//===---------------------------------------------------------------------===//
123// Sched definitions for floating-point instructions
124//
125// Floating point conversions.
126def : WriteRes<WriteFPCVT, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
127  let Latency = 2;
128}
129def : WriteRes<WriteFPMOV, [M85UnitVPort, M85UnitSlot0]> { let Latency = 1; }
130def M85WriteFPMOV64 : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { let Latency = 1; }
131
132// ALU operations (32/64-bit).  These go down the FP pipeline.
133def : WriteRes<WriteFPALU32, [M85UnitVFPA, M85UnitVPort, M85UnitSlot0]> {
134  let Latency = 2;
135}
136def : WriteRes<WriteFPALU64, [M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
137  let Latency = 6;
138}
139
140// Multiplication
141def : WriteRes<WriteFPMUL32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
142  let Latency = 3;
143}
144def : WriteRes<WriteFPMUL64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
145  let Latency = 8;
146}
147
148// Multiply-accumulate.  FPMAC goes down the FP Pipeline.
149def : WriteRes<WriteFPMAC32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
150  let Latency = 5;
151}
152def : WriteRes<WriteFPMAC64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
153  let Latency = 14;
154}
155
156// Division.   Effective scheduling latency is 3, though real latency is larger
157def : WriteRes<WriteFPDIV32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
158  let Latency = 14;
159}
160def : WriteRes<WriteFPDIV64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
161  let Latency = 29;
162}
163
164// Square-root.  Effective scheduling latency is 3, though real latency is larger
165def : WriteRes<WriteFPSQRT32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
166  let Latency = 14;
167}
168def : WriteRes<WriteFPSQRT64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
169  let Latency = 29;
170}
171
172let NumMicroOps = 0 in {
173  def M85SingleIssue : SchedWriteRes<[]> { let SingleIssue = 1; }
174  def M85Slot0Only   : SchedWriteRes<[M85UnitSlot0]> { }
175}
176
177// What pipeline stage operands need to be ready for depending on
178// where they come from.
179def : ReadAdvance<ReadALUsr, 0>;
180def : ReadAdvance<ReadMUL, 0>;
181def : ReadAdvance<ReadMAC, 1>;
182def : ReadAdvance<ReadALU, 0>;
183def : ReadAdvance<ReadFPMUL, 0>;
184def : ReadAdvance<ReadFPMAC, 3>;
185def M85Read_ISSm1 : SchedReadAdvance<-2>;    // operands needed at ISS
186def M85Read_ISS : SchedReadAdvance<-1>;    // operands needed at EX1
187def M85Read_EX1 : SchedReadAdvance<0>;     // operands needed at EX2
188def M85Read_EX2 : SchedReadAdvance<1>;    // operands needed at EX3
189def M85Read_EX3 : SchedReadAdvance<2>;    // operands needed at EX4
190def M85Read_EX4 : SchedReadAdvance<3>;    // operands needed at EX5
191def M85Write1   : SchedWriteRes<[]> {
192  let Latency = 1;
193  let NumMicroOps = 0;
194}
195def M85Write2   : SchedWriteRes<[]> {
196  let Latency = 2;
197  let NumMicroOps = 0;
198}
199def M85WriteShift2   : SchedWriteRes<[M85UnitALU, M85UnitShift2]> {}
200
201// Non general purpose instructions may not be dual issued. These
202// use both issue units.
203def M85NonGeneralPurpose : SchedWriteRes<[]> {
204  // Assume that these will go down the main ALU pipeline.
205  // In reality, many look likely to stall the whole pipeline.
206  let Latency = 3;
207  let SingleIssue = 1;
208}
209
210// List the non general purpose instructions.
211def : InstRW<[M85NonGeneralPurpose],
212                (instregex "t2MRS", "tSVC", "tBKPT", "t2MSR", "t2DMB", "t2DSB",
213                           "t2ISB", "t2HVC", "t2SMC", "t2UDF", "ERET", "tHINT",
214                           "t2HINT", "t2CLREX", "t2CLRM", "BUNDLE")>;
215
216//===---------------------------------------------------------------------===//
217// Sched definitions for load/store
218//
219// Mark whether the loads/stores must be single-issue
220// Address operands are needed earlier
221// Data operands are needed later
222
223let NumMicroOps = 0 in {
224  def M85BaseUpdate : SchedWriteRes<[]> {
225    // Update is bypassable out of EX1
226    let Latency = 0;
227  }
228  def M85MVERBaseUpdate : SchedWriteRes<[]> { let Latency = 1; }
229  // Q register base update is available in EX3 to bypass into EX2/ISS.
230  //  Latency=2 matches what we want for ISS, Latency=1 for EX2.  Going
231  //  with 2, as base update into another load/store is most likely.  Could
232  //  change later in an override.
233  def M85MVEQBaseUpdate : SchedWriteRes<[]> { let Latency = 2; }
234  def M85LoadLatency1 : SchedWriteRes<[]> { let Latency = 1; }
235}
236def M85SlowLoad : SchedWriteRes<[M85UnitLoad]> { let Latency = 2; }
237
238// Byte and half-word loads should have greater latency than other loads.
239// So should load exclusive?
240
241def : InstRW<[M85SlowLoad],
242               (instregex "t2LDR(B|H|SB|SH)pc")>;
243def : InstRW<[M85SlowLoad, M85Read_ISS],
244               (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i",
245                          "tLDRspi", "tLDR(B|H)i")>;
246def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
247               (instregex "t2LDR(B|H|SB|SH)s")>;
248def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS],
249               (instregex "tLDR(B|H)r", "tLDR(SB|SH)")>;
250def : InstRW<[M85SlowLoad, M85BaseUpdate, M85Read_ISS],
251               (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>;
252
253// Exclusive/acquire/release loads/stores cannot be dual-issued
254def : InstRW<[WriteLd, M85SingleIssue, M85Read_ISS],
255               (instregex "t2LDREX$", "t2LDA(EX)?$")>;
256def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85SingleIssue, M85Read_ISS],
257               (instregex "t2LDAEXD$")>;
258def : InstRW<[M85SlowLoad, M85SingleIssue, M85Read_ISS],
259               (instregex "t2LDREX(B|H)", "t2LDA(EX)?(B|H)$")>;
260def : InstRW<[WriteST, M85SingleIssue, M85Read_EX2, M85Read_ISS],
261               (instregex "t2STREX(B|H)?$", "t2STL(EX)?(B|H)?$")>;
262def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_EX2, M85Read_EX2, M85Read_ISS],
263               (instregex "t2STLEXD$")>;
264
265// Load/store multiples end issue groups.
266
267def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
268               (instregex "(t|t2)LDM(DB|IA)$")>;
269def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS],
270               (instregex "(t|t2)STM(DB|IA)$")>;
271def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
272               (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>;
273def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue, M85Read_ISS],
274               (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>;
275
276// Load/store doubles
277
278def : InstRW<[M85BaseUpdate, M85WriteStWide,
279              M85Read_EX2, M85Read_EX2, M85Read_ISS],
280               (instregex "t2STRD_(PRE|POST)")>;
281def : InstRW<[M85WriteStWide, M85Read_EX2, M85Read_EX2, M85Read_ISS],
282               (instregex "t2STRDi")>;
283def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85BaseUpdate, M85Read_ISS],
284               (instregex "t2LDRD_(PRE|POST)")>;
285def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85Read_ISS],
286               (instregex "t2LDRDi")>;
287
288// Word load / preload
289def : InstRW<[WriteLd],
290               (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>;
291def : InstRW<[WriteLd, M85Read_ISS],
292               (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi")>;
293def : InstRW<[WriteLd, M85Read_ISS, M85Read_ISS],
294               (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>;
295def : InstRW<[WriteLd, M85BaseUpdate, M85Read_ISS],
296               (instregex "t2LDR_(POST|PRE)")>;
297
298// Stores
299def : InstRW<[M85BaseUpdate, WriteST, M85Read_EX2, M85Read_ISS],
300               (instregex "t2STR(B|H)?_(POST|PRE)")>;
301def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS, M85Read_ISS],
302               (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>;
303def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS],
304               (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>;
305
306// TBB/TBH - single-issue only
307
308def M85TableLoad : SchedWriteRes<[M85UnitLoad]> { let SingleIssue = 1; }
309
310def : InstRW<[M85TableLoad, M85Read_ISS, M85Read_ISS],
311                (instregex "t2TB")>;
312
313// VFP/MVE loads and stores
314//   Note: timing for VLDR/VSTR special has not been broken out
315//   Note 2: see notes at top of file for the reason load latency is 1 and
316//           store data is in EX3.
317
318def M85LoadSP  : SchedWriteRes<[M85UnitLoad, M85UnitVPort]>;
319def M85LoadDP  : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
320                                M85UnitVPortL, M85UnitVPortH]>;
321def M85LoadSys  : SchedWriteRes<[M85UnitLoad, M85UnitVPort,
322                                 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]> {
323  let Latency = 4;
324}
325def M85StoreSP : SchedWriteRes<[M85UnitStore, M85UnitVPort]>;
326def M85StoreDP : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
327                                M85UnitVPortL, M85UnitVPortH]>;
328def M85StoreSys : SchedWriteRes<[M85UnitStore, M85UnitVPort,
329                                 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]>;
330let ReleaseAtCycles = [2,2,1,1], EndGroup = 1 in {
331  def M85LoadMVE  : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
332                                   M85UnitVPortL, M85UnitVPortH]>;
333  def M85LoadMVELate  : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH,
334                                       M85UnitVPortL, M85UnitVPortH]> {
335    let Latency = 4; // 3 cycles later
336  }
337  def M85StoreMVE : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH,
338                                   M85UnitVPortL, M85UnitVPortH]>;
339}
340
341def : InstRW<[M85LoadSP, M85Read_ISS], (instregex "VLDR(S|H)$")>;
342def : InstRW<[M85LoadSys, M85Read_ISS], (instregex "VLDR_")>;
343def : InstRW<[M85LoadDP, M85Read_ISS], (instregex "VLDRD$")>;
344def : InstRW<[M85StoreSP, M85Read_EX3, M85Read_ISS], (instregex "VSTR(S|H)$")>;
345def : InstRW<[M85StoreSys, M85Read_EX1, M85Read_ISS], (instregex "VSTR_")>;
346def : InstRW<[M85StoreDP, M85Read_EX3, M85Read_ISS], (instregex "VSTRD$")>;
347
348def : InstRW<[M85LoadMVELate, M85Read_ISS],
349               (instregex "MVE_VLD[24]._[0-9]+$")>;
350def : InstRW<[M85LoadMVELate, M85MVERBaseUpdate, M85Read_ISS],
351               (instregex "MVE_VLD[24].*wb")>;
352def : InstRW<[M85LoadMVE, M85Read_ISS],
353               (instregex "MVE_VLDR.*(8|16|32|64)$")>;
354def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS, M85Read_ISS],
355               (instregex "MVE_VLDR.*(_rq|_rq|_rq_u)$")>;
356def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS],
357               (instregex "MVE_VLDR.*_qi$")>;
358def : InstRW<[M85MVERBaseUpdate, M85LoadMVE, M85Read_ISS],
359               (instregex "MVE_VLDR.*(_post|[^i]_pre)$")>;
360def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85LoadMVE, M85Read_ISS],
361               (instregex "MVE_VLDR.*(qi_pre)$")>;
362
363def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
364               (instregex "MVE_VST[24]._[0-9]+$")>;
365def : InstRW<[M85StoreMVE, M85Read_EX3, M85MVERBaseUpdate, M85Read_ISS],
366               (instregex "MVE_VST[24].*wb")>;
367def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS],
368               (instregex "MVE_VSTR.*(8|16|32|64)$")>;
369def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS, M85Read_ISS],
370               (instregex "MVE_VSTR.*(_rq|_rq|_rq_u)$")>;
371def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS],
372               (instregex "MVE_VSTR.*_qi$")>;
373def : InstRW<[M85MVERBaseUpdate, M85StoreMVE, M85Read_EX3, M85Read_ISS],
374               (instregex "MVE_VSTR.*(_post|[^i]_pre)$")>;
375def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85StoreMVE,
376              M85Read_EX3, M85Read_ISS],
377               (instregex "MVE_VSTR.*(qi_pre)$")>;
378
379// Load/store multiples end issue groups.
380
381def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS],
382               (instregex "VLDM(S|D|Q)(DB|IA)$")>;
383def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS, M85Read_EX3],
384               (instregex "VSTM(S|D|Q)(DB|IA)$")>;
385def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS],
386               (instregex "VLDM(S|D|Q)(DB|IA)_UPD$", "VLLDM")>;
387def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue,
388              M85Read_ISS, M85Read_EX3],
389               (instregex "VSTM(S|D|Q)(DB|IA)_UPD$", "VLSTM")>;
390
391//===---------------------------------------------------------------------===//
392// Sched definitions for ALU
393//
394
395// Non-small shifted ALU operands are read a cycle early; small LSLs
396// aren't, as they don't require the shifter.
397
398def M85NonsmallShiftWrite : SchedWriteRes<[M85UnitALU,M85UnitShift1]> {
399  let Latency = 1;
400}
401
402def M85WriteALUsi : SchedWriteVariant<[
403  SchedVar<NoSchedPred, [M85NonsmallShiftWrite]>
404]>;
405def M85Ex1ReadNoFastBypass : SchedReadAdvance<-1,
406                                   [WriteLd, M85WriteLdWide, M85LoadLatency1]>;
407def M85ReadALUsi : SchedReadVariant<[
408  SchedVar<NoSchedPred, [M85Read_ISS]>
409]>;
410
411def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
412               (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
413                          "SUBS|CMP|CMNz|TEQ|TST)rs$")>;
414def : InstRW<[M85WriteALUsi, M85ReadALUsi],
415               (instregex "t2MVNs")>;
416
417// CortexM85 treats LSL #0 as needing a shifter. In practice the throughput
418// seems to reliably be 2 when run on a cyclemodel, so we don't require a
419// shift resource.
420def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi],
421               (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|"
422                          "SUBS|CMP|CMNz|TEQ|TST)rr$")>;
423def : InstRW<[M85WriteALUsi, M85ReadALUsi],
424               (instregex "t2MVNr")>;
425
426// Shift instructions: most pure shifts (i.e. MOV w/ shift) will use whichever
427// shifter is free, thus it is possible to dual-issue them freely with anything
428// else.  As a result, they are not modeled as needing a shifter.
429// RRX is odd because it must use the EX2 shifter, so it cannot dual-issue with
430// itself.
431//
432// Note that pure shifts which use the EX1 shifter would need their operands
433// a cycle earlier.  However, they are only forced to use the EX1 shifter
434// when issuing against an RRX instructions, which should be rare.
435
436def : InstRW<[M85WriteShift2],
437               (instregex "t2RRX$")>;
438def : InstRW<[WriteALU],
439               (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>;
440
441// Instructions that use the shifter, but have normal timing
442
443def : InstRW<[WriteALUsi,M85Slot0Only], (instregex "t2(BFC|BFI)$")>;
444
445// Stack pointer add/sub happens in EX1 with checks in EX2
446
447def M85WritesToSPPred : MCSchedPredicate<CheckRegOperand<0, SP>>;
448
449def M85ReadForSP : SchedReadVariant<[
450  SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
451  SchedVar<NoSchedPred, [M85Read_EX1]>
452]>;
453def M85ReadForSPShift : SchedReadVariant<[
454  SchedVar<M85WritesToSPPred, [M85Read_ISS]>,
455  SchedVar<NoSchedPred, [M85Read_ISS]>
456]>;
457
458def : InstRW<[WriteALU, M85Read_ISS],
459               (instregex "tADDspi", "tSUBspi")>;
460def : InstRW<[WriteALU, M85ReadForSP],
461               (instregex "t2(ADD|SUB)ri", "t2MOVr", "tMOVr")>;
462def : InstRW<[WriteALU, M85ReadForSP, M85ReadForSP],
463               (instregex "tADDrSP", "tADDspr", "tADDhirr")>;
464def : InstRW<[M85WriteALUsi, M85ReadForSP, M85ReadForSPShift],
465               (instregex "t2(ADD|SUB)rs")>;
466
467def : InstRW<[WriteALU, M85Slot0Only], (instregex "t2CLZ")>;
468
469// MAC operations that don't have SchedRW set
470
471def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>;
472
473// Divides are special because they stall for their latency, and so look like
474// two cycles as far as scheduling opportunities go.  By putting M85Write2
475// first, we make the operand latency 2, but keep the instruction latency 7.
476// Divide operands are read early.
477
478def : InstRW<[M85Write2, WriteDIV, M85Read_ISS, M85Read_ISS, WriteALU],
479               (instregex "t2(S|U)DIV")>;
480
481// DSP extension operations
482
483def M85WriteSIMD1   : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
484  let Latency = 1;
485}
486def M85WriteSIMD2   : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> {
487  let Latency = 2;
488}
489def M85WriteShSIMD0 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
490                                       M85UnitShift1, M85UnitSlot0]> {
491  let Latency = 0; // Finishes at EX1
492}
493def M85WriteShSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
494                                       M85UnitShift1, M85UnitSlot0]> {
495  let Latency = 1;
496}
497def M85WriteShSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU,
498                                     M85UnitShift1, M85UnitSlot0]> {
499  let Latency = 2;
500}
501
502def : InstRW<[M85WriteShSIMD2, M85Read_ISS],
503               (instregex "t2(S|U)SAT")>;
504def : InstRW<[M85WriteSIMD1, ReadALU],
505               (instregex "(t|t2)(S|U)XT(B|H)")>;
506def : InstRW<[M85WriteSIMD1, ReadALU, ReadALU],
507               (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)",
508                          "t2SEL")>;
509def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU],
510               (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>;
511def : InstRW<[M85WriteShSIMD2, M85Read_ISS, M85Read_ISS],
512               (instregex "t2QD(ADD|SUB)")>;
513def : InstRW<[M85WriteShSIMD0, M85Read_ISS],
514               (instregex "t2(RBIT|REV)", "tREV")>;
515def : InstRW<[M85WriteShSIMD1, ReadALU, M85Read_ISS],
516               (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>;
517def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU, M85Read_EX2],
518               (instregex "t2USADA8")>;
519
520// MSR/MRS
521def : InstRW<[M85NonGeneralPurpose], (instregex "MSR", "MRS")>;
522
523// 64-bit shift operations in EX3
524
525def M85WriteLShift : SchedWriteRes<[M85UnitLShift, M85UnitALU]> {
526  let Latency = 2;
527}
528def M85WriteLat2  : SchedWriteRes<[]>  { let Latency = 2; let NumMicroOps = 0; }
529
530def : InstRW<[M85WriteLShift, M85WriteLat2, M85Read_EX2, M85Read_EX2],
531               (instregex "MVE_(ASRLi|LSLLi|LSRL|SQSHLL|SRSHRL|UQSHLL|URSHRL)$")>;
532def : InstRW<[M85WriteLShift, M85WriteLat2,
533              M85Read_EX2, M85Read_EX2, M85Read_EX2],
534               (instregex "MVE_(ASRLr|LSLLr|SQRSHRL|UQRSHLL)$")>;
535def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2],
536               (instregex "MVE_(SQRSHR|UQRSHL)$")>;
537def : InstRW<[M85WriteLShift, M85Read_EX2],
538               (instregex "MVE_(SQSHL|SRSHR|UQSHL|URSHR)$")>;
539
540// Loop control/branch future instructions
541
542def M85LE   : SchedWriteRes<[]> { let NumMicroOps = 0; let Latency = -2; }
543
544def : InstRW<[WriteALU], (instregex "t2BF(_|Lr|i|Li|r)")>;
545
546def : InstRW<[WriteALU], (instregex "MVE_LCTP")>;
547def : InstRW<[WriteALU],
548               (instregex "t2DLS", "t2WLS", "MVE_DLSTP", "MVE_WLSTP")>;
549def : InstRW<[M85LE], (instregex "t2LE$")>;
550def : InstRW<[M85LE, M85Read_ISSm1],
551               (instregex "t2LEUpdate", "MVE_LETP")>;  // LE is executed at ISS
552
553// Conditional selects
554
555def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2, M85Read_EX2],
556              (instregex "t2(CSEL|CSINC|CSINV|CSNEG)")>;
557
558//===---------------------------------------------------------------------===//
559// Sched definitions for FP and MVE operations
560
561let NumMicroOps = 0 in {
562  def M85OverrideVFPLat5 : SchedWriteRes<[]> { let Latency = 5; }
563  def M85OverrideVFPLat4 : SchedWriteRes<[]> { let Latency = 4; }
564  def M85OverrideVFPLat3 : SchedWriteRes<[]> { let Latency = 3; }
565  def M85OverrideVFPLat2 : SchedWriteRes<[]> { let Latency = 2; }
566}
567
568let Latency = 1 in {
569   def M85GroupALat1S : SchedWriteRes<[M85UnitVFPA, M85UnitVPort, M85UnitSlot0]>;
570   def M85GroupBLat1S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
571   def M85GroupCLat1S : SchedWriteRes<[M85UnitVFPC, M85UnitVPort, M85UnitSlot0]>;
572   def M85GroupALat1D : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
573   def M85GroupBLat1D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
574   def M85GroupCLat1D : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
575   def M85GroupABLat1S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
576}
577let Latency = 2 in {
578   def M85GroupBLat2S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>;
579   def M85GroupBLat2D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
580   def M85GroupABLat2S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>;
581   def M85GroupABLat2D : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
582}
583
584// Instructions which are missing default schedules
585def : InstRW<[M85GroupALat1S],  (instregex "V(FP_VMAXNM|FP_VMINNM)(H|S)$")>;
586def : InstRW<[M85GroupALat1D],  (instregex "V(FP_VMAXNM|FP_VMINNM)D$")>;
587def : InstRW<[M85GroupCLat1S],  (instregex "VCMPE?Z?(H|S)$")>;
588def : InstRW<[M85GroupCLat1D],  (instregex "VCMPE?Z?D$")>;
589def : InstRW<[M85GroupBLat2S],
590               (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S)",
591                          "VRINT(A|M|N|P|R|X|Z)(H|S)")>;
592def : InstRW<[M85GroupBLat2D],
593               (instregex "VCVT(B|T)(DH|HD)", "VCVT(A|M|N|P|R|X|Z)(S|U)D",
594                          "V.*TOD", "VTO.*D", "VCVTDS", "VCVTSD",
595                          "VRINT(A|M|N|P|R|X|Z)D")>;
596def : InstRW<[M85GroupABLat1S], (instregex "VINSH")>;
597def : InstRW<[M85GroupBLat1S],  (instregex "V(ABS|NEG)(H|S)$")>;
598def : InstRW<[M85GroupBLat1D],  (instregex "V(ABS|NEG)D$")>;
599
600// VMRS/VMSR
601let SingleIssue = 1 in {
602  def M85VMRSEarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 2;}
603  def M85VMRSLate  : SchedWriteRes<[M85UnitVPort]> { let Latency = 4; }
604  def M85VMSREarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 1; }
605  def M85VMSRLate  : SchedWriteRes<[M85UnitVPort]> { let Latency = 3; }
606}
607
608def M85FPSCRFlagPred : MCSchedPredicate<
609                           CheckAll<[CheckIsRegOperand<0>,
610                                     CheckRegOperand<0, PC>]>>;
611
612def M85VMRSFPSCR : SchedWriteVariant<[
613  SchedVar<M85FPSCRFlagPred, [M85VMRSEarly]>,
614  SchedVar<NoSchedPred, [M85VMRSLate]>
615]>;
616
617def : InstRW<[M85VMSREarly, M85Read_EX2],
618               (instregex "VMSR$", "VMSR_FPSCR_NZCVQC", "VMSR_P0", "VMSR_VPR")>;
619def : InstRW<[M85VMRSEarly], (instregex "VMRS_P0", "VMRS_VPR", "FMSTAT")>;
620def : InstRW<[M85VMRSLate], (instregex "VMRS_FPSCR_NZCVQC")>;
621def : InstRW<[M85VMRSFPSCR], (instregex "VMRS$")>;
622// Not matching properly
623//def : InstRW<[M85VMSRLate, M85Read_EX2], (instregex "VMSR_FPCTX(NS|S)")>;
624//def : InstRW<[M85VMRSLate], (instregex "VMRS_FPCTX(NS|S)")>;
625
626// VSEL cannot bypass in its implied $cpsr operand; model as earlier read
627def : InstRW<[M85GroupBLat1S, ReadALU, ReadALU, M85Read_ISS],
628               (instregex "VSEL.*(S|H)$")>;
629def : InstRW<[M85GroupBLat1D, ReadALU, ReadALU, M85Read_ISS],
630               (instregex "VSEL.*D$")>;
631
632// VMOV
633def : InstRW<[WriteFPMOV],
634               (instregex "VMOV(H|S)$", "FCONST(H|S)")>;
635def : InstRW<[WriteFPMOV, M85Read_EX2],
636               (instregex "VMOVHR$", "VMOVSR$")>;
637def : InstRW<[M85GroupABLat2S],
638               (instregex "VMOVRH$", "VMOVRS$")>;
639def : InstRW<[M85WriteFPMOV64],
640               (instregex "VMOVD$")>;
641def : InstRW<[M85WriteFPMOV64],
642               (instregex "FCONSTD")>;
643def : InstRW<[M85WriteFPMOV64, M85Read_EX2, M85Read_EX2],
644               (instregex "VMOVDRR")>;
645def : InstRW<[M85WriteFPMOV64, M85Write1, M85Read_EX2, M85Read_EX2],
646               (instregex "VMOVSRR")>;
647def : InstRW<[M85GroupABLat2D, M85Write2],
648               (instregex "VMOV(RRD|RRS)")>;
649
650// These shouldn't even exist, but Cortex-m55 defines them, so here they are.
651def : InstRW<[WriteFPMOV, M85Read_EX2],
652               (instregex "VGETLNi32$")>;
653def : InstRW<[M85GroupABLat2S],
654               (instregex "VSETLNi32")>;
655
656// Larger-latency overrides
657
658def M85FPDIV16 : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> {
659  let Latency = 8;
660}
661def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VDIVH")>;
662def : InstRW<[M85OverrideVFPLat2, WriteFPDIV32],   (instregex "VDIVS")>;
663def : InstRW<[M85OverrideVFPLat2, WriteFPDIV64],   (instregex "VDIVD")>;
664def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VSQRTH")>;
665def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT32],  (instregex "VSQRTS")>;
666def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT64],  (instregex "VSQRTD")>;
667def : InstRW<[M85OverrideVFPLat3, WriteFPMUL64],   (instregex "V(MUL|NMUL)D")>;
668def : InstRW<[M85OverrideVFPLat2, WriteFPALU64],   (instregex "V(ADD|SUB)D")>;
669
670// Multiply-accumulate.  Chained SP timing is correct; rest need overrides
671// Double-precision chained MAC should also be seen as having latency of 5,
672// as stalls stall everything.
673
674def : InstRW<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL],
675               (instregex "VN?ML(A|S)H")>;
676
677def : InstRW<[M85OverrideVFPLat5, WriteFPMAC64,
678              ReadFPMUL, ReadFPMUL, ReadFPMUL],
679               (instregex "VN?ML(A|S)D$")>;
680
681// Single-precision fused MACs look like latency 4 with advance of 2.
682
683def M85ReadFPMAC2   : SchedReadAdvance<2>;
684
685def : InstRW<[M85OverrideVFPLat4, WriteFPMAC32,
686              M85ReadFPMAC2, ReadFPMUL, ReadFPMUL],
687               (instregex "VF(N)?M(A|S)(H|S)$")>;
688
689// Double-precision fused MAC looks like latency 4.
690
691def : InstRW<[M85OverrideVFPLat4, WriteFPMAC64,
692              ReadFPMUL, ReadFPMUL, ReadFPMUL],
693               (instregex "VF(N)?M(A|S)D$")>;
694
695// MVE beatwise instructions
696// NOTE: Q-register timing for the 2nd beat is off by a cycle and needs
697//       DAG overrides to correctly set latencies.
698// NOTE2: MVE integer MAC->MAC accumulate latencies are set as if the
699//        accumulate value arrives from an unmatching MAC instruction;
700//        matching ones are handled via DAG mutation.  These are marked as
701//        "limited accumulate bypass"
702
703let Latency = 4, EndGroup = 1 in {
704   def M85GrpALat2MveR : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
705     let ReleaseAtCycles = [2,2,1,1,1];
706   }
707   def M85GrpABLat2MveR : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
708   def M85GrpBLat2MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
709     let ReleaseAtCycles = [2,2,1,1,1];
710   }
711   def M85Lat2MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
712   def M85GrpBLat4Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
713     let ReleaseAtCycles = [2,2,1,1,1];
714   }
715}
716let Latency = 3, EndGroup = 1 in {
717   def M85GrpBLat3Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
718     let ReleaseAtCycles = [2,2,1,1,1];
719   }
720   def M85GrpBLat1MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
721     let ReleaseAtCycles = [2,2,1,1,1];
722   }
723   def M85Lat1MveR : SchedWriteRes<[]> { let NumMicroOps = 0; }
724}
725let Latency = 2, EndGroup = 1 in {
726   def M85GrpALat2Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
727     let ReleaseAtCycles = [2,2,1,1,1];
728   }
729   def M85GrpABLat2Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
730   def M85GrpBLat2Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
731     let ReleaseAtCycles = [2,2,1,1,1];
732   }
733   def M85Lat2Mve : SchedWriteRes<[]> { let NumMicroOps = 0; }
734}
735let Latency = 1, EndGroup = 1 in {
736   def M85GrpALat1Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
737     let ReleaseAtCycles = [2,2,1,1,1];
738   }
739   def M85GrpABLat1Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>;
740   def M85GrpBLat1Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
741     let ReleaseAtCycles = [2,2,1,1,1];
742   }
743   def M85GrpCLat1Mve : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
744     let ReleaseAtCycles = [2,2,1,1,1];
745   }
746   def M85GrpDLat1Mve : SchedWriteRes<[M85UnitVFPD, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> {
747     let ReleaseAtCycles = [2,1,1,1];
748   }
749}
750
751def : InstRW<[M85GrpABLat1Mve, M85Read_EX1, M85Read_EX2, M85Read_EX2],
752                (instregex "MVE_VMOV_q_rr")>;
753
754def : InstRW<[M85GrpABLat1Mve, M85Read_EX2],
755                (instregex "MVE_VMOV_to_lane_(8|16|32)")>;
756
757def : InstRW<[M85GrpABLat1Mve],
758                (instregex "MVE_VAND$",
759                           "MVE_VBIC$", "MVE_VBICimm",
760                           "MVE_VCLSs(8|16|32)",
761                           "MVE_VCLZs(8|16|32)",
762                           "MVE_VEOR",
763                           "MVE_VMOVimmf32", "MVE_VMOVimmi(8|16|32|64)",
764                           "MVE_VMVN$", "MVE_VMVNimmi(16|32)",
765                           "MVE_VORN$",
766                           "MVE_VORR$", "MVE_VORRimm", "MQPRCopy",
767                           "MVE_VPSEL",
768                           "MVE_VREV(16|32|64)_(8|16|32)"
769                           )>;
770
771def : InstRW<[M85GrpABLat2MveR, M85Lat2MveR],
772                (instregex "MVE_VMOV_rr_q")>;
773
774def : InstRW<[M85GrpABLat2MveR],
775                (instregex "MVE_VMOV_from_lane_(32|u8|s8|u16|s16)")>;
776
777def : InstRW<[M85GrpALat1Mve, M85Lat1MveR,
778              M85Read_EX1, M85Read_EX1, M85Read_EX2],
779                (instregex "MVE_VADC$")>;
780
781def : InstRW<[M85GrpALat1Mve, M85Lat1MveR],
782                (instregex "MVE_VADCI")>;
783
784def : InstRW<[M85GrpALat1Mve, M85Read_EX1, M85Read_EX2],
785                (instregex "MVE_VADD_qr_i(8|16|32)",
786                           "MVE_VBRSR(16|32|8)",
787                           "MVE_VHADD_qr_[su](8|16|32)",
788                           "MVE_VHSUB_qr_[su](8|16|32)",
789                           "MVE_VQADD_qr_[su](8|16|32)",
790                           "MVE_VQSUB_qr_[su](8|16|32)",
791                           "MVE_VSHL_qr[su](8|16|32)",
792                           "MVE_VSUB_qr_i(8|16|32)"
793                )>;
794
795def : InstRW<[M85GrpALat1Mve],
796                (instregex "MVE_VABD(s|u)(8|16|32)",
797                           "MVE_VABS(s|u)(8|16|32)",
798                           "MVE_V(MAX|MIN)A?[us](8|16|32)",
799                           "MVE_VADDi(8|16|32)",
800                           "MVE_VCADDi(8|16|32)",
801                           "MVE_VHCADDs(8|16|32)",
802                           "MVE_VHSUB[su](8|16|32)",
803                           "MVE_VMOVL[su](8|16)[tb]h",
804                           "MVE_VMOVNi(16|32)[tb]h",
805                           "MVE_VMULL[BT]?[p](8|16|32)(bh|th)?",
806                           "MVE_VNEGs(8|16|32)",
807                           "MVE_VQABSs(8|16|32)",
808                           "MVE_VQADD[su](8|16|32)",
809                           "MVE_VQNEGs(8|16|32)",
810                           "MVE_VQSUB[su](8|16|32)",
811                           "MVE_VR?HADD[su](8|16|32)",
812                           "MVE_VSBC$", "MVE_VSBCI",
813                           "MVE_VSHL_by_vec[su](8|16|32)",
814                           "MVE_VSHL_immi(8|16|32)",
815                           "MVE_VSHLL_imm[su](8|16)[bt]h",
816                           "MVE_VSHLL_lw[su](8|16)[bt]h",
817                           "MVE_VSHRNi(16|32)[bt]h",
818                           "MVE_VSHR_imm[su](8|16|32)",
819                           "MVE_VSLIimm[su]?(8|16|32)",
820                           "MVE_VSRIimm[su]?(8|16|32)",
821                           "MVE_VSUBi(8|16|32)"
822                 )>;
823
824def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2, M85Read_EX2],
825                (instregex "MVE_V(D|I)WDUPu(8|16|32)")>;
826
827def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2],
828                (instregex "MVE_V(D|I)DUPu(8|16|32)")>;
829
830def : InstRW<[M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
831                (instregex "MVE_V(Q|R|QR)SHL_qr[su](8|16|32)",
832                           "MVE_VADD_qr_f(16|32)",
833                           "MVE_VSUB_qr_f(16|32)"
834                )>;
835
836def : InstRW<[M85GrpALat1Mve, M85Read_EX2],
837                (instregex "MVE_VDUP(8|16|32)")>;
838
839def : InstRW<[M85GrpBLat1Mve],
840                (instregex "MVE_VABSf(16|32)",
841                           "MVE_V(MAX|MIN)NMA?f(16|32)",
842                           "MVE_VNEGf(16|32)"
843                )>;
844
845def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
846                (instregex "MVE_VADDLV[us]32acc")>;
847
848def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
849                (instregex "MVE_VADDLV[us]32no_acc")>;
850
851def : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
852                (instregex "MVE_VADDV[us](8|16|32)acc"
853                )>;
854
855def : InstRW<[M85GrpALat2MveR, M85Read_EX3],
856                (instregex "MVE_V(MAX|MIN)A?V[us](8|16|32)",
857                           "MVE_VABAV(s|u)(8|16|32)"
858                )>;
859
860def : InstRW<[M85GrpALat2MveR],
861                (instregex "MVE_VADDV[us](8|16|32)no_acc")>;
862
863def : InstRW<[M85GrpALat2Mve],
864                (instregex "MVE_V(Q|R|QR)SHL_by_vec[su](8|16|32)",
865                           "MVE_VABDf(16|32)",
866                           "MVE_VADDf(16|32)",
867                           "MVE_VCADDf(16|32)",
868                           "MVE_VQMOVU?N[su](8|16|32)[tb]h",
869                           "MVE_VQR?SHL(U_)?imm[su](8|16|32)",
870                           "MVE_VQR?SHRN[bt]h[su](16|32)",
871                           "MVE_VQR?SHRUNs(16|32)[bt]h",
872                           "MVE_VRSHR_imm[su](8|16|32)",
873                           "MVE_VRSHRNi(16|32)[bt]h",
874                           "MVE_VSUBf(16|32)"
875                 )>;
876
877def : InstRW<[M85GrpBLat2MveR, M85Read_EX2],
878                (instregex "MVE_V(MAX|MIN)NMA?Vf(16|32)")>;
879
880def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
881                (instregex "MVE_VMUL_qr_i(8|16|32)")>;
882
883def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
884                (instregex "MVE_VQDMULL_qr_s(16|32)[tb]h")>;
885
886def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2],
887                (instregex "MVE_VQR?DMULH_qr_s(8|16|32)")>;
888
889def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX3],
890                // limited accumulate bypass
891                (instregex "MVE_VMLAS?_qr_i(8|16|32)")>;
892
893def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
894                // limited accumulate bypass
895                (instregex "MVE_VQR?DMLAS?H_qrs(8|16|32)")>;
896
897def : InstRW<[M85GrpBLat2Mve],
898                // limited accumulate bypass
899                (instregex "MVE_VQR?DML[AS]DHX?s(8|16|32)")>;
900
901def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3],
902                (instregex "MVE_VR?ML[AS]LDAVH?ax?[su](8|16|32)")>;
903
904def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR],
905                (instregex "MVE_VR?ML[AS]LDAVH?x?[su](8|16|32)")>;
906
907def : InstRW<[M85GrpBLat2MveR, M85Read_EX3],
908                (instregex "MVE_VML[AS]DAVax?[su](8|16|32)")>;
909
910def : InstRW<[M85GrpBLat2MveR],
911                (instregex "MVE_VML[AS]DAVx?[su](8|16|32)")>;
912
913def : InstRW<[M85GrpBLat2Mve],
914                (instregex "MVE_VCVTf16(u|s)16", "MVE_VCVTf32(u|s)32",
915                           "MVE_VCVT(u|s)16f16", "MVE_VCVT(u|s)32f32",
916                           "MVE_VCVTf16f32", "MVE_VCVTf32f16",
917                           "MVE_VMULL[BT]?[su](8|16|32)(bh|th)?",
918                           "MVE_VMUL(t1)*i(8|16|32)",
919                           "MVE_VQDMULLs(16|32)[tb]h",
920                           "MVE_VQR?DMULHi(8|16|32)",
921                           "MVE_VR?MULH[su](8|16|32)",
922                           "MVE_VRINTf(16|32)"
923                )>;
924
925def : InstRW<[M85GrpBLat3Mve, M85Read_EX1, M85Read_EX2],
926                (instregex "MVE_VMUL_qr_f(16|32)")>;
927
928def : InstRW<[M85GrpBLat3Mve],
929                (instregex "MVE_VCMULf(16|32)",
930                           "MVE_VMULf(16|32)"
931                )>;
932
933def : InstRW<[M85GrpBLat4Mve, M85Read_EX3, M85Read_EX1, M85Read_EX2],
934                (instregex "MVE_VFMA_qr_Sf(16|32)", // VFMAS
935                           "MVE_VFMA_qr_f(16|32)" // VFMA
936                )>;
937
938def : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
939                (instregex "MVE_VCMLAf(16|32)")>;
940
941def : InstRW<[M85GrpBLat4Mve, M85Read_EX3],
942                (instregex "MVE_VFM(A|S)f(16|32)")>;
943
944def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
945                (instregex "MVE_VPTv(4|8)f(16|32)r")>;
946
947def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2],
948                (instregex "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)r")>;
949
950def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX2],
951                (instregex "MVE_VCMP[isu](8|16|32)r$", "MVE_VCMPf(16|32)r$")>;
952
953def : InstRW<[M85GrpDLat1Mve, M85Read_EX2],
954                (instregex "MVE_VCTP(8|16|32|64)")>;
955
956def : InstRW<[M85GrpCLat1Mve],
957                (instregex "MVE_VCMPf(16|32)$", "MVE_VCMP[isu](8|16|32)$",
958                           "MVE_VPTv(4|8)f(16|32)$",
959                           "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)$"
960                )>;
961
962def : InstRW<[M85GrpDLat1Mve],
963                (instregex "MVE_VPNOT",
964                           "MVE_VPST"
965                )>;
966
967def : InstRW<[M85Lat2MveR, M85GrpALat2Mve, M85Read_EX1, M85Read_EX2],
968                (instregex "MVE_VSHLC")>;
969
970// VFP instructions
971
972def : WriteRes<WriteVLD1, []>;
973def : WriteRes<WriteVLD2, []>;
974def : WriteRes<WriteVLD3, []>;
975def : WriteRes<WriteVLD4, []>;
976def : WriteRes<WriteVST1, []>;
977def : WriteRes<WriteVST2, []>;
978def : WriteRes<WriteVST3, []>;
979def : WriteRes<WriteVST4, []>;
980
981}  // SchedModel = CortexCortexM85Model
982