xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleR52.td (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1//==- ARMScheduleR52.td - Cortex-R52 Scheduling Definitions -*- tablegen -*-=//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SchedRead/Write data for the ARM Cortex-R52 processor.
10//
11//===----------------------------------------------------------------------===//
12
13// ===---------------------------------------------------------------------===//
14// The Cortex-R52 is an in-order pipelined superscalar microprocessor with
15// a 8 stage pipeline. It can issue maximum two instructions in each cycle.
16// There are two ALUs, one LDST, one MUL  and a non-pipelined integer DIV.
17// A number of forwarding paths enable results of computations to be input
18// to subsequent operations before they are written to registers.
19// This scheduler is a MachineScheduler. See TargetSchedule.td for details.
20
21def CortexR52Model : SchedMachineModel {
22  let MicroOpBufferSize = 0;  // R52 is in-order processor
23  let IssueWidth = 2;         // 2 micro-ops dispatched per cycle
24  let LoadLatency = 1;        // Optimistic, assuming no misses
25  let MispredictPenalty = 8;  // A branch direction mispredict, including PFU
26  let CompleteModel = 0;      // Covers instructions applicable to cortex-r52.
27}
28
29
30//===----------------------------------------------------------------------===//
31// Define each kind of processor resource and number available.
32
33// Modeling each pipeline as a ProcResource using the BufferSize = 0 since
34// Cortex-R52 is an in-order processor.
35
36def R52UnitALU    : ProcResource<2> { let BufferSize = 0; } // Int ALU
37def R52UnitMAC    : ProcResource<1> { let BufferSize = 0; } // Int MAC
38def R52UnitDiv    : ProcResource<1> { let BufferSize = 0; } // Int Division
39def R52UnitLd     : ProcResource<1> { let BufferSize = 0; } // Load/Store
40def R52UnitB      : ProcResource<1> { let BufferSize = 0; } // Branch
41def R52UnitFPALU  : ProcResource<2> { let BufferSize = 0; } // FP ALU
42def R52UnitFPMUL  : ProcResource<2> { let BufferSize = 0; } // FP MUL
43def R52UnitFPDIV  : ProcResource<1> { let BufferSize = 0; } // FP DIV
44
45// Cortex-R52 specific SchedReads
46def R52Read_ISS   : SchedRead;
47def R52Read_EX1   : SchedRead;
48def R52Read_EX2   : SchedRead;
49def R52Read_WRI   : SchedRead;
50def R52Read_F0    : SchedRead; // F0 maps to ISS stage of integer pipe
51def R52Read_F1    : SchedRead;
52def R52Read_F2    : SchedRead;
53
54
55//===----------------------------------------------------------------------===//
56// Subtarget-specific SchedWrite types which map ProcResources and set latency.
57
58let SchedModel = CortexR52Model in {
59
60// ALU - Write occurs in Late EX2 (independent of whether shift was required)
61def : WriteRes<WriteALU, [R52UnitALU]> { let Latency = 3; }
62def : WriteRes<WriteALUsi, [R52UnitALU]> { let Latency = 3; }
63def : WriteRes<WriteALUsr, [R52UnitALU]> { let Latency = 3; }
64def : WriteRes<WriteALUSsr, [R52UnitALU]> { let Latency = 3; }
65
66// Compares
67def : WriteRes<WriteCMP, [R52UnitALU]> { let Latency = 0; }
68def : WriteRes<WriteCMPsi, [R52UnitALU]> { let Latency = 0; }
69def : WriteRes<WriteCMPsr, [R52UnitALU]> { let Latency = 0; }
70
71// Multiply - aliased to sub-target specific later
72
73// Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2)
74def : WriteRes<WriteDIV, [R52UnitDiv]> {
75  let Latency = 8; let ResourceCycles = [8]; // non-pipelined
76}
77
78// Branches  - LR written in Late EX2
79def : WriteRes<WriteBr, [R52UnitB]> { let Latency = 0; }
80def : WriteRes<WriteBrL, [R52UnitB]> { let Latency = 0; }
81def : WriteRes<WriteBrTbl, [R52UnitALU]> { let Latency = 0; }
82
83// Misc
84def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
85
86// Integer pipeline by-passes
87def : ReadAdvance<ReadALU, 1>;   // Operand needed in EX1 stage
88def : ReadAdvance<ReadALUsr, 0>; // Shift operands needed in ISS
89def : ReadAdvance<ReadMUL, 0>;
90def : ReadAdvance<ReadMAC, 0>;
91
92// Floating-point. Map target-defined SchedReadWrites to subtarget
93def : WriteRes<WriteFPMUL32, [R52UnitFPMUL]> { let Latency = 6; }
94
95def : WriteRes<WriteFPMUL64, [R52UnitFPMUL, R52UnitFPMUL]> {
96  let Latency = 6;
97}
98
99def : WriteRes<WriteFPMAC32, [R52UnitFPMUL, R52UnitFPALU]> {
100  let Latency = 11;     // as it is internally two insns (MUL then ADD)
101}
102
103def : WriteRes<WriteFPMAC64, [R52UnitFPMUL, R52UnitFPMUL,
104                              R52UnitFPALU, R52UnitFPALU]> {
105  let Latency = 11;
106}
107
108def : WriteRes<WriteFPDIV32, [R52UnitDiv]> {
109  let Latency = 7;          // FP div takes fixed #cycles
110  let ResourceCycles = [7]; // is not pipelined
111}
112
113def : WriteRes<WriteFPDIV64, [R52UnitDiv]> {
114  let Latency = 17;
115  let ResourceCycles = [17];
116}
117
118def : WriteRes<WriteFPSQRT32, [R52UnitDiv]> { let Latency = 7; }
119def : WriteRes<WriteFPSQRT64, [R52UnitDiv]> { let Latency = 17; }
120
121// Overriden via InstRW for this processor.
122def : WriteRes<WriteVST1, []>;
123def : WriteRes<WriteVST2, []>;
124def : WriteRes<WriteVST3, []>;
125def : WriteRes<WriteVST4, []>;
126
127def : ReadAdvance<ReadFPMUL, 1>; // mul operand read in F1
128def : ReadAdvance<ReadFPMAC, 1>; // fp-mac operand read in F1
129
130//===----------------------------------------------------------------------===//
131// Subtarget-specific SchedReadWrites.
132
133// Forwarding information - based on when an operand is read
134def : ReadAdvance<R52Read_ISS, 0>;
135def : ReadAdvance<R52Read_EX1, 1>;
136def : ReadAdvance<R52Read_EX2, 2>;
137def : ReadAdvance<R52Read_F0, 0>;
138def : ReadAdvance<R52Read_F1, 1>;
139def : ReadAdvance<R52Read_F2, 2>;
140
141
142// Cortex-R52 specific SchedWrites for use with InstRW
143def R52WriteMAC        : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; }
144def R52WriteMACHi      : SchedWriteRes<[R52UnitMAC]> {
145  let Latency = 4; let NumMicroOps = 0;
146}
147def R52WriteDIV        : SchedWriteRes<[R52UnitDiv]> {
148  let Latency = 8; let ResourceCycles = [8]; // not pipelined
149}
150def R52WriteLd         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
151def R52WriteST         : SchedWriteRes<[R52UnitLd]> { let Latency = 4; }
152def R52WriteAdr        : SchedWriteRes<[]> { let Latency = 0; }
153def R52WriteCC         : SchedWriteRes<[]> { let Latency = 0; }
154def R52WriteALU_EX1    : SchedWriteRes<[R52UnitALU]> { let Latency = 2; }
155def R52WriteALU_EX2    : SchedWriteRes<[R52UnitALU]> { let Latency = 3; }
156def R52WriteALU_WRI    : SchedWriteRes<[R52UnitALU]> { let Latency = 4; }
157
158def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; }
159def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; }
160
161// Alias generics to sub-target specific
162def : SchedAlias<WriteMUL16, R52WriteMAC>;
163def : SchedAlias<WriteMUL32, R52WriteMAC>;
164def : SchedAlias<WriteMUL64Lo, R52WriteMAC>;
165def : SchedAlias<WriteMUL64Hi, R52WriteMACHi>;
166def : SchedAlias<WriteMAC16, R52WriteMAC>;
167def : SchedAlias<WriteMAC32, R52WriteMAC>;
168def : SchedAlias<WriteMAC64Lo, R52WriteMAC>;
169def : SchedAlias<WriteMAC64Hi, R52WriteMACHi>;
170def : SchedAlias<WritePreLd, R52WriteLd>;
171def : SchedAlias<WriteLd, R52WriteLd>;
172def : SchedAlias<WriteST, R52WriteST>;
173
174def R52WriteFPALU_F3   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; }
175def R52Write2FPALU_F3  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
176  let Latency = 4;
177}
178def R52WriteFPALU_F4   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 5; }
179def R52Write2FPALU_F4  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
180  let Latency = 5;
181}
182def R52WriteFPALU_F5   : SchedWriteRes<[R52UnitFPALU]> { let Latency = 6; }
183def R52Write2FPALU_F5  : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> {
184  let Latency = 6;
185}
186def R52WriteFPMUL_F5   : SchedWriteRes<[R52UnitFPMUL]> { let Latency = 6; }
187def R52Write2FPMUL_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL]> {
188  let Latency = 6;
189}
190def R52WriteFPMAC_F5   : SchedWriteRes<[R52UnitFPMUL, R52UnitFPALU]> {
191  let Latency = 11;     // as it is internally two insns (MUL then ADD)
192}
193def R52Write2FPMAC_F5  : SchedWriteRes<[R52UnitFPMUL, R52UnitFPMUL,
194                                         R52UnitFPALU, R52UnitFPALU]> {
195  let Latency = 11;
196}
197
198def R52WriteFPLd_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
199def R52WriteFPST_F4    : SchedWriteRes<[R52UnitLd]> { let Latency = 5; }
200
201//===----------------------------------------------------------------------===//
202// Floating-point. Map target defined SchedReadWrites to processor specific ones
203//
204def : SchedAlias<WriteFPCVT,   R52WriteFPALU_F5>;
205def : SchedAlias<WriteFPMOV, R52WriteFPALU_F3>;
206def : SchedAlias<WriteFPALU32, R52WriteFPALU_F5>;
207def : SchedAlias<WriteFPALU64, R52WriteFPALU_F5>;
208
209//===----------------------------------------------------------------------===//
210// Subtarget-specific overrides. Map opcodes to list of SchedReadWrites types.
211//
212def : InstRW<[WriteALU], (instrs COPY)>;
213
214def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS],
215      (instregex "SXTB", "SXTH", "SXTB16", "UXTB", "UXTH", "UXTB16",
216      "t2SXTB", "t2SXTH", "t2SXTB16", "t2UXTB", "t2UXTH", "t2UXTB16")>;
217
218def : InstRW<[R52WriteALU_EX1, R52Read_ISS],
219      (instregex "MOVCCi32imm", "MOVi32imm", "t2MOVCCi", "t2MOVi")>;
220def : InstRW<[R52WriteALU_EX2, R52Read_EX1],
221      (instregex "MOV_ga_pcrel$")>;
222def : InstRW<[R52WriteLd,R52Read_ISS],
223      (instregex "MOV_ga_pcrel_ldr")>;
224
225def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "SEL", "t2SEL")>;
226
227def : InstRW< [R52WriteALU_EX2, R52Read_ISS, R52Read_ISS],
228      (instregex "BFC", "BFI", "UBFX", "SBFX", "(t|t2)BFC", "(t|t2)BFI",
229      "(t|t2)UBFX", "(t|t2)SBFX")>;
230
231// Saturating arithmetic
232def : InstRW< [R52WriteALU_WRI, R52Read_EX1, R52Read_EX1],
233      (instregex "QADD", "QSUB", "QDADD", "QDSUB", "SSAT", "SSAT16", "USAT",
234      "QADD8", "QADD16", "QSUB8", "QSUB16", "QASX", "QSAX",
235      "UQADD8", "UQADD16","UQSUB8","UQSUB16","UQASX","UQSAX", "t2QADD",
236      "t2QSUB", "t2QDADD", "t2QDSUB", "t2SSAT", "t2SSAT16", "t2USAT",
237      "t2QADD8", "t2QADD16", "t2QSUB8", "t2QSUB16", "t2QASX", "t2QSAX",
238      "t2UQADD8", "t2UQADD16","t2UQSUB8","t2UQSUB16","t2UQASX","t2UQSAX","t2ABS")>;
239
240// Parallel arithmetic
241def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
242      (instregex "SADD8", "SADD16", "SSUB8", "SSUB16", "SASX", "SSAX",
243      "UADD8", "UADD16", "USUB8", "USUB16", "UASX", "USAX", "t2SADD8",
244      "t2SADD16", "t2SSUB8", "t2SSUB16", "t2SASX", "t2SSAX", "t2UADD8",
245      "t2UADD16", "t2USUB8", "t2USUB16", "t2UASX", "t2USAX")>;
246
247// Flag setting.
248def : InstRW< [R52WriteALU_EX2, R52Read_EX1, R52Read_EX1],
249      (instregex "SHADD8", "SHADD16", "SHSUB8", "SHSUB16", "SHASX", "SHSAX",
250      "SXTAB", "SXTAB16", "SXTAH", "UHADD8", "UHADD16", "UHSUB8", "UHSUB16",
251      "UHASX", "UHSAX", "UXTAB", "UXTAB16", "UXTAH", "t2SHADD8", "t2SHADD16",
252      "t2SHSUB8", "t2SHSUB16", "t2SHASX", "t2SHSAX", "t2SXTAB", "t2SXTAB16",
253      "t2SXTAH", "t2UHADD8", "t2UHADD16", "t2UHSUB8", "t2UHSUB16", "t2UHASX",
254      "t2UHSAX", "t2UXTAB", "t2UXTAB16", "t2UXTAH")>;
255
256// Sum of Absolute Difference
257def : InstRW< [R52WriteALU_WRI, R52Read_ISS, R52Read_ISS, R52Read_ISS],
258      (instregex "USAD8", "t2USAD8", "USADA8", "t2USADA8") >;
259
260// Integer Multiply
261def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS],
262      (instregex "MUL", "SMMUL", "SMMULR", "SMULBB", "SMULBT",
263      "SMULTB", "SMULTT", "SMULWB", "SMULWT", "SMUSD", "SMUSDX", "t2MUL",
264      "t2SMMUL", "t2SMMULR", "t2SMULBB", "t2SMULBT", "t2SMULTB", "t2SMULTT",
265      "t2SMULWB", "t2SMULWT", "t2SMUSD")>;
266
267// Multiply Accumulate
268// Even for 64-bit accumulation (or Long), the single MAC is used (not ALUs).
269// The store pipeline is used partly for 64-bit operations.
270def : InstRW<[R52WriteMAC, R52Read_ISS, R52Read_ISS, R52Read_ISS],
271      (instregex "MLA", "MLS", "SMMLA", "SMMLAR", "SMMLS", "SMMLSR",
272      "t2MLA", "t2MLS", "t2SMMLA", "t2SMMLAR", "t2SMMLS", "t2SMMLSR",
273      "SMUAD", "SMUADX", "t2SMUAD", "t2SMUADX",
274      "SMLABB", "SMLABT", "SMLATB", "SMLATT", "SMLSD", "SMLSDX",
275      "SMLAWB", "SMLAWT", "t2SMLABB", "t2SMLABT", "t2SMLATB", "t2SMLATT",
276      "t2SMLSD", "t2SMLSDX", "t2SMLAWB", "t2SMLAWT",
277      "SMLAD", "SMLADX", "t2SMLAD", "t2SMLADX",
278      "SMULL$", "UMULL$", "t2SMULL$", "t2UMULL$",
279      "SMLAL", "UMLAL", "SMLALBT",
280      "SMLALTB", "SMLALTT", "SMLALD", "SMLALDX", "SMLSLD", "SMLSLDX",
281      "UMAAL", "t2SMLAL", "t2UMLAL",
282      "t2SMLALBT", "t2SMLALTB", "t2SMLALTT", "t2SMLALD", "t2SMLALDX",
283      "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>;
284
285def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS],
286      (instregex "t2SDIV", "t2UDIV")>;
287
288// Loads (except POST) with SHL > 2, or ror, require 2 extra cycles.
289// However, that's non-trivial to specify, so we keep it uniform
290def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_ISS],
291      (instregex "LDR(i12|rs)$", "LDRB(i12|rs)$", "t2LDR(i8|i12|s|pci)",
292      "t2LDR(H|B)(i8|i12|s|pci)", "LDREX", "t2LDREX",
293      "tLDR[BH](r|i|spi|pci|pciASM)", "tLDR(r|i|spi|pci|pciASM)",
294      "LDRH$",  "PICLDR$", "PICLDR(H|B)$", "LDRcp$",
295      "PICLDRS(H|B)$", "t2LDRS(H|B)(i|r|p|s)", "LDRS(H|B)$",
296      "t2LDRpci_pic", "tLDRS(B|H)", "t2LDRDi8", "LDRD$", "LDA", "t2LDA")>;
297def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_ISS],
298      (instregex "LD(RB|R)(_|T_)(POST|PRE)_(IMM|REG)", "LDRH(_PRE|_POST)",
299      "LDRBT_POST$", "LDR(T|BT)_POST_(REG|IMM)", "LDRHT(i|r)",
300      "t2LD(R|RB|RH)_(PRE|POST)", "t2LD(R|RB|RH)T",
301      "LDR(SH|SB)(_POST|_PRE)", "t2LDR(SH|SB)(_POST|_PRE)",
302      "LDRS(B|H)T(i|r)", "t2LDRS(B|H)T(i|r)?",
303      "LDRD_(POST|PRE)", "t2LDRD_(POST|PRE)")>;
304
305def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "MOVS?sr", "t2MOVS?sr")>;
306def : InstRW<[R52WriteALU_WRI, R52Read_EX2], (instregex "MOVT", "t2MOVT")>;
307
308def : InstRW<[R52WriteALU_EX2, R52Read_EX1], (instregex "AD(C|D)S?ri", "ANDS?ri",
309      "BICS?ri", "CLZ", "EORri", "MVNS?r", "ORRri", "RSBS?ri", "RSCri", "SBCri",
310      "t2AD(C|D)S?ri", "t2ANDS?ri", "t2BICS?ri","t2CLZ", "t2EORri", "t2MVN",
311      "t2ORRri", "t2RSBS?ri", "t2SBCri")>;
312
313def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_EX1], (instregex "AD(C|D)S?rr",
314      "ANDS?rr", "BICS?rr", "CRC", "EORrr", "ORRrr", "RSBrr", "RSCrr", "SBCrr",
315      "t2AD(C|D)S?rr", "t2ANDS?rr", "t2BICS?rr", "t2CRC", "t2EORrr", "t2SBCrr")>;
316
317def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS], (instregex "AD(C|D)S?rsi",
318      "ANDS?rsi", "BICS?rsi", "EORrsi", "ORRrsi", "RSBrsi", "RSCrsi", "SBCrsi",
319      "t2AD(C|D)S?rs", "t2ANDS?rs", "t2BICS?rs", "t2EORrs", "t2ORRrs", "t2RSBrs", "t2SBCrs")>;
320
321def : InstRW<[R52WriteALU_EX2, R52Read_EX1, R52Read_ISS, R52Read_ISS],
322      (instregex "AD(C|D)S?rsr", "ANDS?rsr", "BICS?rsr", "EORrsr", "MVNS?sr",
323      "ORRrsr", "RSBrsr", "RSCrsr", "SBCrsr")>;
324
325def : InstRW<[R52WriteALU_EX1],
326    (instregex "ADR", "MOVsi", "MVNS?s?i", "t2MOVS?si")>;
327
328def : InstRW<[R52WriteALU_EX1, R52Read_ISS], (instregex "ASRi", "RORS?i")>;
329def : InstRW<[R52WriteALU_EX1, R52Read_ISS, R52Read_ISS],
330      (instregex "ASRr", "RORS?r", "LSR", "LSL")>;
331
332def : InstRW<[R52WriteCC, R52Read_EX1], (instregex "CMPri", "CMNri")>;
333def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_EX1], (instregex "CMPrr", "CMNzrr")>;
334def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS], (instregex "CMPrsi", "CMNzrsi")>;
335def : InstRW<[R52WriteCC, R52Read_EX1, R52Read_ISS, R52Read_ISS], (instregex "CMPrsr", "CMNzrsr")>;
336
337def : InstRW<[R52WriteALU_EX2, R52Read_ISS],
338      (instregex "t2LDC", "RBIT", "REV", "REV16", "REVSH", "RRX")>;
339
340def : InstRW<[R52WriteCC, R52Read_ISS], (instregex "TST")>;
341
342def : InstRW<[R52WriteLd], (instregex "MRS", "MRSbanked")>;
343def : InstRW<[R52WriteLd, R52Read_EX1], (instregex "MSR", "MSRbanked")>;
344
345// Integer Load, Multiple.
346foreach Lat = 3-25 in {
347  def R52WriteILDM#Lat#Cy : SchedWriteRes<[R52UnitLd]> {
348    let Latency = Lat;
349  }
350  def R52WriteILDM#Lat#CyNo : SchedWriteRes<[]> {
351    let Latency = Lat;
352    let NumMicroOps = 0;
353  }
354}
355foreach NAddr = 1-16 in {
356  def R52ILDMAddr#NAddr#Pred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == "#NAddr>;
357}
358def R52WriteILDMAddrNoWB : SchedWriteRes<[R52UnitLd]> { let Latency = 0; }
359def R52WriteILDMAddrWB : SchedWriteRes<[R52UnitLd]>;
360def R52WriteILDM : SchedWriteVariant<[
361    SchedVar<R52ILDMAddr2Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy]>,
362
363    SchedVar<R52ILDMAddr3Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
364                                 R52WriteILDM6Cy]>,
365    SchedVar<R52ILDMAddr4Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
366                                 R52WriteILDM6Cy, R52WriteILDM7Cy]>,
367
368    SchedVar<R52ILDMAddr5Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
369                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
370                                 R52WriteILDM8Cy]>,
371    SchedVar<R52ILDMAddr6Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
372                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
373                                 R52WriteILDM8Cy, R52WriteILDM9Cy]>,
374
375    SchedVar<R52ILDMAddr7Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
376                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
377                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
378                                 R52WriteILDM10Cy]>,
379    SchedVar<R52ILDMAddr8Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
380                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
381                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
382                                 R52WriteILDM10Cy, R52WriteILDM11Cy]>,
383
384    SchedVar<R52ILDMAddr9Pred, [R52WriteILDM4Cy, R52WriteILDM5Cy,
385                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
386                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
387                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
388                                 R52WriteILDM12Cy]>,
389    SchedVar<R52ILDMAddr10Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
390                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
391                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
392                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
393                                 R52WriteILDM12Cy, R52WriteILDM13Cy]>,
394
395    SchedVar<R52ILDMAddr11Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
396                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
397                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
398                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
399                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
400                                 R52WriteILDM14Cy]>,
401    SchedVar<R52ILDMAddr12Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
402                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
403                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
404                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
405                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
406                                 R52WriteILDM14Cy, R52WriteILDM15Cy]>,
407
408    SchedVar<R52ILDMAddr13Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
409                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
410                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
411                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
412                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
413                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
414                                 R52WriteILDM16Cy]>,
415    SchedVar<R52ILDMAddr14Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
416                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
417                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
418                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
419                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
420                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
421                                 R52WriteILDM16Cy, R52WriteILDM17Cy]>,
422
423    SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
424                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
425                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
426                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
427                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
428                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
429                                 R52WriteILDM16Cy, R52WriteILDM17Cy,
430                                 R52WriteILDM18Cy]>,
431    SchedVar<R52ILDMAddr15Pred,[R52WriteILDM4Cy, R52WriteILDM5Cy,
432                                 R52WriteILDM6Cy, R52WriteILDM7Cy,
433                                 R52WriteILDM8Cy, R52WriteILDM9Cy,
434                                 R52WriteILDM10Cy, R52WriteILDM11Cy,
435                                 R52WriteILDM12Cy, R52WriteILDM13Cy,
436                                 R52WriteILDM14Cy, R52WriteILDM15Cy,
437                                 R52WriteILDM16Cy, R52WriteILDM17Cy,
438                                 R52WriteILDM18Cy, R52WriteILDM19Cy]>,
439
440// Unknown number of registers, just use resources for two registers.
441    SchedVar<NoSchedPred,      [R52WriteILDM4Cy, R52WriteILDM5Cy,
442                                R52WriteILDM6CyNo, R52WriteILDM7CyNo,
443                                R52WriteILDM8CyNo, R52WriteILDM9CyNo,
444                                R52WriteILDM10CyNo, R52WriteILDM11CyNo,
445                                R52WriteILDM12CyNo, R52WriteILDM13CyNo,
446                                R52WriteILDM14CyNo, R52WriteILDM15CyNo,
447                                R52WriteILDM16CyNo, R52WriteILDM17CyNo,
448                                R52WriteILDM18Cy, R52WriteILDM19Cy]>
449]> { let Variadic=1; }
450
451// Integer Store, Multiple
452def R52WriteIStIncAddr : SchedWriteRes<[R52UnitLd]> {
453  let Latency = 4;
454  let NumMicroOps = 2;
455}
456foreach NumAddr = 1-16 in {
457  def R52WriteISTM#NumAddr : WriteSequence<[R52WriteIStIncAddr], NumAddr>;
458}
459def R52WriteISTM : SchedWriteVariant<[
460    SchedVar<R52ILDMAddr2Pred, [R52WriteISTM2]>,
461    SchedVar<R52ILDMAddr3Pred, [R52WriteISTM3]>,
462    SchedVar<R52ILDMAddr4Pred, [R52WriteISTM4]>,
463    SchedVar<R52ILDMAddr5Pred, [R52WriteISTM5]>,
464    SchedVar<R52ILDMAddr6Pred, [R52WriteISTM6]>,
465    SchedVar<R52ILDMAddr7Pred, [R52WriteISTM7]>,
466    SchedVar<R52ILDMAddr8Pred, [R52WriteISTM8]>,
467    SchedVar<R52ILDMAddr9Pred, [R52WriteISTM9]>,
468    SchedVar<R52ILDMAddr10Pred,[R52WriteISTM10]>,
469    SchedVar<R52ILDMAddr11Pred,[R52WriteISTM11]>,
470    SchedVar<R52ILDMAddr12Pred,[R52WriteISTM12]>,
471    SchedVar<R52ILDMAddr13Pred,[R52WriteISTM13]>,
472    SchedVar<R52ILDMAddr14Pred,[R52WriteISTM14]>,
473    SchedVar<R52ILDMAddr15Pred,[R52WriteISTM15]>,
474    SchedVar<R52ILDMAddr16Pred,[R52WriteISTM16]>,
475    // Unknow number of registers, just use resources for two registers.
476    SchedVar<NoSchedPred,      [R52WriteISTM2]>
477]>;
478
479def : InstRW<[R52WriteILDM, R52Read_ISS],
480      (instregex "LDM(IA|DA|DB|IB)$", "t2LDM(IA|DA|DB|IB)$",
481      "(t|sys)LDM(IA|DA|DB|IB)$")>;
482def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
483      (instregex "LDM(IA|DA|DB|IB)_UPD", "(t2|sys|t)LDM(IA|DA|DB|IB)_UPD")>;
484def : InstRW<[R52WriteILDM, R52WriteAdr, R52Read_ISS],
485        (instregex "LDMIA_RET", "(t|t2)LDMIA_RET", "tPOP")>;
486
487// Integer Store, Single Element
488def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
489      (instregex "PICSTR", "STR(i12|rs)", "STRB(i12|rs)", "STRH$", "STREX", "SRS", "t2SRS",
490      "t2SRSDB", "t2STREX", "t2STREXB", "t2STREXD", "t2STREXH", "t2STR(i12|i8|s)$",
491      "RFE", "t2RFE", "t2STR[BH](i12|i8|s)$", "tSTR[BH](i|r)", "tSTR(i|r)", "tSTRspi")>;
492
493def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
494      (instregex "STR(B_|_|BT_|T_)(PRE_IMM|PRE_REG|POST_REG|POST_IMM)",
495      "STR(i|r)_preidx", "STRB(i|r)_preidx", "STRH_preidx", "STR(H_|HT_)(PRE|POST)",
496      "STR(BT|HT|T)", "t2STR_(PRE|POST)", "t2STR[BH]_(PRE|POST)",
497      "t2STR_preidx", "t2STR[BH]_preidx", "t2ST(RB|RH|R)T")>;
498
499// Integer Store, Dual
500def : InstRW<[R52WriteLd, R52Read_ISS, R52Read_EX2],
501    (instregex "STRD$", "t2STRDi8", "STL", "t2STL")>;
502def : InstRW<[R52WriteLd, R52WriteAdr, R52Read_ISS, R52Read_EX2],
503    (instregex "(t2|t)STRD_(POST|PRE)", "STRD_(POST|PRE)")>;
504
505def : InstRW<[R52WriteISTM, R52Read_ISS, R52Read_EX2],
506    (instregex "STM(IB|IA|DB|DA)$", "(t2|sys|t)STM(IB|IA|DB|DA)$")>;
507def : InstRW<[R52WriteISTM, R52WriteAdr, R52Read_ISS, R52Read_EX2],
508    (instregex "STM(IB|IA|DB|DA)_UPD", "(t2|sys|t)STM(IB|IA|DB|DA)_UPD",
509    "tPUSH")>;
510
511// LDRLIT pseudo instructions, they expand to LDR + PICADD
512def : InstRW<[R52WriteLd],
513      (instregex "t?LDRLIT_ga_abs", "t?LDRLIT_ga_pcrel$")>;
514// LDRLIT_ga_pcrel_ldr expands to LDR + PICLDR
515def : InstRW<[R52WriteLd], (instregex "LDRLIT_ga_pcrel_ldr")>;
516
517
518
519//===----------------------------------------------------------------------===//
520// VFP, Floating Point Support
521def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fd|hd)")>;
522def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "VABD(fq|hq)")>;
523
524def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(D|S|H)")>;
525def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VABS(fd|hd)")>;
526def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VABS(fq|hq)")>;
527
528def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fd|hd)")>;
529def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "(VACGE|VACGT)(fq|hq)")>;
530
531def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(D|S|H|fd|hd)$")>;
532def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1], (instregex "(VADD|VSUB)(fq|hq)")>;
533
534def : InstRW<[R52WriteFPLd_F4, R52Read_ISS, R52Read_F1], (instregex "VLDR")>;
535def : InstRW<[R52WriteFPST_F4, R52Read_ISS, R52Read_F1], (instregex "VSTR")>;
536
537
538//===----------------------------------------------------------------------===//
539// Neon Support
540
541// vector multiple load stores
542foreach NumAddr = 1-16 in {
543  def R52LMAddrPred#NumAddr :
544    SchedPredicate<"MI->getNumOperands() == "#NumAddr>;
545}
546foreach Lat = 1-32 in {
547  def R52WriteLM#Lat#Cy : SchedWriteRes<[]> {
548    let Latency = Lat;
549  }
550}
551foreach Num = 1-32 in { // reserve LdSt resource, no dual-issue
552  def R52ReserveLd#Num#Cy : SchedWriteRes<[R52UnitLd]> {
553    let Latency = 0;
554    let NumMicroOps = Num;
555    let ResourceCycles = [Num];
556  }
557}
558def R52WriteVLDM : SchedWriteVariant<[
559  // 1 D reg
560  SchedVar<R52LMAddrPred1,  [R52WriteLM5Cy,
561                              R52ReserveLd5Cy]>,
562  SchedVar<R52LMAddrPred2,  [R52WriteLM5Cy,
563                              R52ReserveLd5Cy]>,
564
565  // 2 D reg
566  SchedVar<R52LMAddrPred3,  [R52WriteLM5Cy, R52WriteLM6Cy,
567                              R52ReserveLd6Cy]>,
568  SchedVar<R52LMAddrPred4,  [R52WriteLM5Cy, R52WriteLM6Cy,
569                              R52ReserveLd6Cy]>,
570
571  // 3 D reg
572  SchedVar<R52LMAddrPred5,  [R52WriteLM5Cy, R52WriteLM6Cy,
573                              R52WriteLM7Cy,
574                              R52ReserveLd4Cy]>,
575  SchedVar<R52LMAddrPred6,  [R52WriteLM5Cy, R52WriteLM6Cy,
576                              R52WriteLM7Cy,
577                              R52ReserveLd7Cy]>,
578
579  // 4 D reg
580  SchedVar<R52LMAddrPred7,  [R52WriteLM5Cy, R52WriteLM6Cy,
581                              R52WriteLM7Cy, R52WriteLM8Cy,
582                              R52ReserveLd8Cy]>,
583  SchedVar<R52LMAddrPred8,  [R52WriteLM5Cy, R52WriteLM6Cy,
584                              R52WriteLM7Cy, R52WriteLM8Cy,
585                              R52ReserveLd8Cy]>,
586
587  // 5 D reg
588  SchedVar<R52LMAddrPred9,  [R52WriteLM5Cy, R52WriteLM6Cy,
589                              R52WriteLM7Cy, R52WriteLM8Cy,
590                              R52WriteLM9Cy,
591                              R52ReserveLd9Cy]>,
592  SchedVar<R52LMAddrPred10, [R52WriteLM5Cy, R52WriteLM6Cy,
593                              R52WriteLM7Cy, R52WriteLM8Cy,
594                              R52WriteLM9Cy,
595                              R52ReserveLd9Cy]>,
596
597  // 6 D reg
598  SchedVar<R52LMAddrPred11, [R52WriteLM5Cy, R52WriteLM6Cy,
599                              R52WriteLM7Cy, R52WriteLM8Cy,
600                              R52WriteLM9Cy, R52WriteLM10Cy,
601                              R52ReserveLd10Cy]>,
602  SchedVar<R52LMAddrPred12, [R52WriteLM5Cy, R52WriteLM6Cy,
603                              R52WriteLM7Cy, R52WriteLM8Cy,
604                              R52WriteLM9Cy, R52WriteLM10Cy,
605                              R52ReserveLd10Cy]>,
606
607  // 7 D reg
608  SchedVar<R52LMAddrPred13, [R52WriteLM5Cy, R52WriteLM6Cy,
609                              R52WriteLM7Cy, R52WriteLM8Cy,
610                              R52WriteLM9Cy, R52WriteLM10Cy,
611                              R52WriteLM11Cy,
612                              R52ReserveLd11Cy]>,
613  SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
614                              R52WriteLM7Cy, R52WriteLM8Cy,
615                              R52WriteLM9Cy, R52WriteLM10Cy,
616                              R52WriteLM11Cy,
617                              R52ReserveLd11Cy]>,
618
619  // 8 D reg
620  SchedVar<R52LMAddrPred14, [R52WriteLM5Cy, R52WriteLM6Cy,
621                              R52WriteLM7Cy, R52WriteLM8Cy,
622                              R52WriteLM9Cy, R52WriteLM10Cy,
623                              R52WriteLM11Cy, R52WriteLM12Cy,
624                              R52ReserveLd12Cy]>,
625  SchedVar<R52LMAddrPred15, [R52WriteLM5Cy, R52WriteLM6Cy,
626                              R52WriteLM7Cy, R52WriteLM8Cy,
627                              R52WriteLM9Cy, R52WriteLM10Cy,
628                              R52WriteLM11Cy, R52WriteLM12Cy,
629                              R52ReserveLd12Cy]>,
630  // unknown number of reg.
631  SchedVar<NoSchedPred,      [R52WriteLM5Cy, R52WriteLM6Cy,
632                              R52WriteLM7Cy, R52WriteLM8Cy,
633                              R52WriteLM9Cy, R52WriteLM10Cy,
634                              R52WriteLM11Cy, R52WriteLM12Cy,
635                              R52ReserveLd5Cy]>
636]> { let Variadic=1;}
637
638// variable stores. Cannot dual-issue
639def R52WriteSTM5  : SchedWriteRes<[R52UnitLd]> {
640  let Latency = 5;
641  let NumMicroOps = 2;
642  let ResourceCycles = [1];
643}
644def R52WriteSTM6  : SchedWriteRes<[R52UnitLd]> {
645  let Latency = 6;
646  let NumMicroOps = 4;
647  let ResourceCycles = [2];
648}
649def R52WriteSTM7  : SchedWriteRes<[R52UnitLd]> {
650  let Latency = 7;
651  let NumMicroOps = 6;
652  let ResourceCycles = [3];
653}
654def R52WriteSTM8  : SchedWriteRes<[R52UnitLd]> {
655  let Latency = 8;
656  let NumMicroOps = 8;
657  let ResourceCycles = [4];
658}
659def R52WriteSTM9  : SchedWriteRes<[R52UnitLd]> {
660  let Latency = 9;
661  let NumMicroOps = 10;
662  let ResourceCycles = [5];
663}
664def R52WriteSTM10 : SchedWriteRes<[R52UnitLd]> {
665  let Latency = 10;
666  let NumMicroOps = 12;
667  let ResourceCycles = [6];
668}
669def R52WriteSTM11 : SchedWriteRes<[R52UnitLd]> {
670  let Latency = 11;
671  let NumMicroOps = 14;
672  let ResourceCycles = [7];
673}
674def R52WriteSTM12 : SchedWriteRes<[R52UnitLd]> {
675  let Latency = 12;
676  let NumMicroOps = 16;
677  let ResourceCycles = [8];
678}
679def R52WriteSTM13 : SchedWriteRes<[R52UnitLd]> {
680  let Latency = 13;
681  let NumMicroOps = 18;
682  let ResourceCycles = [9];
683}
684def R52WriteSTM14 : SchedWriteRes<[R52UnitLd]> {
685  let Latency = 14;
686  let NumMicroOps = 20;
687  let ResourceCycles = [10];
688}
689def R52WriteSTM15 : SchedWriteRes<[R52UnitLd]> {
690  let Latency = 15;
691  let NumMicroOps = 22;
692  let ResourceCycles = [11];
693}
694
695def R52WriteSTM : SchedWriteVariant<[
696  SchedVar<R52LMAddrPred1, [R52WriteSTM5]>,
697  SchedVar<R52LMAddrPred2, [R52WriteSTM5]>,
698  SchedVar<R52LMAddrPred3, [R52WriteSTM6]>,
699  SchedVar<R52LMAddrPred4, [R52WriteSTM6]>,
700  SchedVar<R52LMAddrPred5, [R52WriteSTM7]>,
701  SchedVar<R52LMAddrPred6, [R52WriteSTM7]>,
702  SchedVar<R52LMAddrPred7, [R52WriteSTM8]>,
703  SchedVar<R52LMAddrPred8, [R52WriteSTM8]>,
704  SchedVar<R52LMAddrPred9,  [R52WriteSTM9]>,
705  SchedVar<R52LMAddrPred10, [R52WriteSTM9]>,
706  SchedVar<R52LMAddrPred11, [R52WriteSTM10]>,
707  SchedVar<R52LMAddrPred12, [R52WriteSTM10]>,
708  SchedVar<R52LMAddrPred13, [R52WriteSTM11]>,
709  SchedVar<R52LMAddrPred14, [R52WriteSTM11]>,
710  SchedVar<R52LMAddrPred15, [R52WriteSTM12]>,
711  SchedVar<R52LMAddrPred16, [R52WriteSTM12]>,
712  // unknown number of registers, just use resources for two
713  SchedVar<NoSchedPred,      [R52WriteSTM6]>
714]>;
715
716// Vector Load/Stores. Can issue only in slot-0. Can dual-issue with
717// another instruction in slot-1, but only in the last issue.
718def : WriteRes<WriteVLD1, [R52UnitLd]> { let Latency = 5;}
719def : WriteRes<WriteVLD2, [R52UnitLd]> {
720  let Latency = 6;
721  let NumMicroOps = 3;
722  let ResourceCycles = [2];
723  let SingleIssue = 1;
724}
725def : WriteRes<WriteVLD3, [R52UnitLd]> {
726  let Latency = 7;
727  let NumMicroOps = 5;
728  let ResourceCycles = [3];
729  let SingleIssue = 1;
730}
731def : WriteRes<WriteVLD4, [R52UnitLd]> {
732  let Latency = 8;
733  let NumMicroOps = 7;
734  let ResourceCycles = [4];
735  let SingleIssue = 1;
736}
737def R52WriteVST1Mem  : SchedWriteRes<[R52UnitLd]> {
738  let Latency = 5;
739  let NumMicroOps = 1;
740  let ResourceCycles = [1];
741}
742def R52WriteVST2Mem  : SchedWriteRes<[R52UnitLd]> {
743  let Latency = 6;
744  let NumMicroOps = 3;
745  let ResourceCycles = [2];
746}
747def R52WriteVST3Mem  : SchedWriteRes<[R52UnitLd]> {
748  let Latency = 7;
749  let NumMicroOps = 5;
750  let ResourceCycles = [3];
751}
752def R52WriteVST4Mem  : SchedWriteRes<[R52UnitLd]> {
753  let Latency = 8;
754  let NumMicroOps = 7;
755  let ResourceCycles = [4];
756}
757def R52WriteVST5Mem  : SchedWriteRes<[R52UnitLd]> {
758  let Latency = 9;
759  let NumMicroOps = 9;
760  let ResourceCycles = [5];
761}
762
763
764def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v8i8|v4i16|v2i32)")>;
765def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABA(u|s)(v16i8|v8i16|v4i32)")>;
766def : InstRW<[R52Write2FPALU_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VABAL(u|s)(v8i16|v4i32|v2i64)")>;
767
768def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v8i8|v4i16|v2i32)")>;
769def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABD(u|s)(v16i8|v8i16|v4i32)")>;
770def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VABDL(u|s)(v16i8|v8i16|v4i32)")>;
771
772def : InstRW<[R52Write2FPALU_F4, R52Read_F1], (instregex "VABS(v16i8|v8i16|v4i32)")>;
773
774def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2],
775                               (instregex "(VADD|VSUB)(v8i8|v4i16|v2i32|v1i64)")>;
776def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2],
777                                (instregex "(VADD|VSUB)(v16i8|v8i16|v4i32|v2i64)")>;
778def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
779                               (instregex "(VADDHN|VRADDHN|VSUBHN|VRSUBHN)(v8i8|v4i16|v2i32)")>;
780
781def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1],
782                                            (instregex "VADDL", "VADDW", "VSUBL", "VSUBW")>;
783
784def : InstRW<[R52WriteFPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)d")>;
785def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC|VEOR)q")>;
786
787def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
788def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
789
790def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
791def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
792
793def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
794      (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
795def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
796      (instregex "VCVT", "VSITO", "VUITO", "VTO")>;
797
798def : InstRW<[R52WriteFPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)d")>;
799def : InstRW<[R52Write2FPALU_F3, R52Read_ISS], (instregex "VDUP(8|16|32)q")>;
800def : InstRW<[R52WriteFPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)d")>;
801def : InstRW<[R52Write2FPALU_F3, R52Read_F1], (instregex "VDUPLN(8|16|32)q")>;
802
803def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTd(8|16|32)", "VSEL")>;
804def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F1], (instregex "VEXTq(8|16|32|64)")>;
805
806def : InstRW<[R52WriteFPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)d")>;
807def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "(VFMA|VFMS)(f|h)q")>;
808
809def : InstRW<[R52WriteFPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v8i8|v4i16|v2i32)")>;
810def : InstRW<[R52Write2FPALU_F4, R52Read_F2, R52Read_F2], (instregex "(VHADD|VHSUB)(u|s)(v16i8|v8i16|v4i32)")>;
811
812def : InstRW<[R52WriteVLDM], (instregex "VLDM[SD](IA|DB)$")>;
813def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VMAX", "VMIN", "VPMAX", "VPMIN")>;
814def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VORR", "VORN", "VREV")>;
815def : InstRW<[R52WriteNoRSRC_WRI], (instregex "VMRS")>;
816def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VNEG")>;
817def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADDi")>;
818def : InstRW<[R52Write2FPALU_F4, R52Read_F1, R52Read_F1], (instregex "VPADAL", "VPADDL")>;
819def : InstRW<[R52WriteFPALU_F5, R52Read_F1], (instregex "VQABS(v8i8|v4i16|v2i32|v1i64)")>;
820def : InstRW<[R52Write2FPALU_F5, R52Read_F1], (instregex "VQABS(v16i8|v8i16|v4i32|v2i64)")>;
821def : InstRW<[R52WriteFPALU_F5, R52Read_F2, R52Read_F2],
822                  (instregex "(VQADD|VQSUB)(u|s)(v8i8|v4i16|v2i32|v1i64)")>;
823def : InstRW<[R52Write2FPALU_F5, R52Read_F2, R52Read_F2],
824                  (instregex "(VQADD|VQSUB)(u|s)(v16i8|v8i16|v4i32|v2i64)")>;
825def : InstRW<[R52Write2FPMAC_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMLAL", "VQDMLSL")>;
826def : InstRW<[R52WriteFPMUL_F5, R52Read_F1, R52Read_F1, R52Read_F1], (instregex "VQDMUL","VQRDMUL")>;
827def : InstRW<[R52WriteFPALU_F5, R52Read_F1, R52Read_F1],
828                 (instregex "VQMOVN", "VQNEG", "VQSHL", "VQSHRN")>;
829def : InstRW<[R52WriteFPALU_F4, R52Read_F1, R52Read_F1], (instregex "VRSHL", "VRSHR", "VRSHRN", "VTB")>;
830def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1], (instregex "VSWP", "VTRN", "VUZP", "VZIP")>;
831
832//---
833// VSTx. Vector Stores
834//---
835// 1-element structure store
836def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)$")>;
837def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)$")>;
838def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)T$")>;
839def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Q$")>;
840def : InstRW<[R52WriteVST3Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudo$")>;
841def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudo$")>;
842
843def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)$")>;
844def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNdAsm_(8|16|32)$")>;
845def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo$")>;
846
847def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)wb")>;
848def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1q(8|16|32|64)wb")>;
849def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Twb")>;
850def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d(8|16|32|64)Qwb")>;
851def : InstRW<[R52WriteVST3Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64TPseudoWB")>;
852def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1d64QPseudoWB")>;
853
854def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNd(8|16|32)_UPD")>;
855def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNdWB_(fixed|register)_Asm_(8|16|32)")>;
856def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST1LNq(8|16|32)Pseudo_UPD")>;
857
858// 2-element structure store
859def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)$")>;
860def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)$")>;
861def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)Pseudo$")>;
862
863def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)$")>;
864def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNdAsm_(8|16|32)$")>;
865def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo$")>;
866def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)$")>;
867def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNqAsm_(16|32)$")>;
868def : InstRW<[R52WriteVST1Mem, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo$")>;
869
870def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2(d|b)(8|16|32)wb")>;
871def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)wb")>;
872def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2q(8|16|32)PseudoWB")>;
873
874def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)_UPD")>;
875def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNdWB_(fixed|register)_Asm_(8|16|32)")>;
876def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNd(8|16|32)Pseudo_UPD")>;
877def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)_UPD")>;
878def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNqWB_(fixed|register)_Asm_(16|32)")>;
879def : InstRW<[R52WriteVST1Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST2LNq(16|32)Pseudo_UPD")>;
880
881// 3-element structure store
882def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)$")>;
883def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)Asm_(8|16|32)$")>;
884def : InstRW<[R52WriteVST4Mem, R52Read_ISS, R52Read_F2], (instregex "VST3d(8|16|32)(oddP|P)seudo$")>;
885
886def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)$")>;
887def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNdAsm_(8|16|32)$")>;
888def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo$")>;
889def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)$")>;
890def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNqAsm_(16|32)$")>;
891def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo$")>;
892
893def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)_UPD$")>;
894def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)WB_(fixed|register)_Asm_(8|16|32)$")>;
895def : InstRW<[R52WriteVST4Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
896
897def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)_UPD$")>;
898def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNdWB_(fixed|register)_Asm_(8|16|32)")>;
899def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNd(8|16|32)Pseudo_UPD$")>;
900def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)_UPD$")>;
901def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNqWB_(fixed|register)_Asm_(16|32)$")>;
902def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST3LNq(16|32)Pseudo_UPD$")>;
903
904// 4-element structure store
905def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)$")>;
906def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)Asm_(8|16|32)$")>;
907def : InstRW<[R52WriteVST5Mem, R52Read_ISS, R52Read_F2], (instregex "VST4d(8|16|32)Pseudo$")>;
908
909def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)$")>;
910def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNdAsm_(8|16|32)$")>;
911def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo$")>;
912def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)$")>;
913def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNqAsm_(16|32)$")>;
914def : InstRW<[R52WriteVST2Mem, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo$")>;
915
916def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)_UPD")>;
917def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)WB_(fixed|register)_Asm_(8|16|32)")>;
918def : InstRW<[R52WriteVST5Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
919
920def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)_UPD")>;
921def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNdWB_(fixed|register)_Asm_(8|16|32)")>;
922def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNd(8|16|32)Pseudo_UPD")>;
923def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)_UPD")>;
924def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNqWB_(fixed|register)_Asm_(16|32)")>;
925def : InstRW<[R52WriteVST2Mem, R52WriteAdr, R52Read_ISS, R52Read_F2], (instregex "VST4LNq(16|32)Pseudo_UPD")>;
926
927} // R52 SchedModel
928