xref: /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleM4.td (revision 4b50c451720d8b427757a6da1dd2bb4c52cd9e35)
1//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor.
10//
11//===----------------------------------------------------------------------===//
12
13def CortexM4Model : SchedMachineModel {
14  let IssueWidth        = 1; // Only IT can be dual-issued, so assume single-issue
15  let MicroOpBufferSize = 0; // In-order
16  let LoadLatency       = 2; // Latency when not pipelined, not pc-relative
17  let MispredictPenalty = 2; // Best case branch taken cost
18  let PostRAScheduler   = 1;
19
20  let CompleteModel = 0;
21}
22
23
24// We model the entire cpu as a single pipeline with a BufferSize = 0 since
25// Cortex-M4 is in-order.
26
27def M4Unit : ProcResource<1> { let BufferSize = 0; }
28
29
30let SchedModel = CortexM4Model in {
31
32// Some definitions of latencies we apply to different instructions
33
34class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; }
35class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; }
36class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; }
37class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; }
38def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; }
39def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; }
40class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>;
41class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>;
42
43
44// Loads, MAC's and DIV all get a higher latency of 2
45def : M4UnitL2<WriteLd>;
46def : M4UnitL2<WriteMAC32>;
47def : M4UnitL2<WriteMAC64Hi>;
48def : M4UnitL2<WriteMAC64Lo>;
49def : M4UnitL2<WriteMAC16>;
50def : M4UnitL2<WriteDIV>;
51
52def : M4UnitL2I<(instregex "(t|t2)LDM")>;
53
54
55// Stores we use a latency of 1 as they have no outputs
56
57def : M4UnitL1<WriteST>;
58def : M4UnitL1I<(instregex "(t|t2)STM")>;
59
60
61// Everything else has a Latency of 1
62
63def : M4UnitL1<WriteALU>;
64def : M4UnitL1<WriteALUsi>;
65def : M4UnitL1<WriteALUsr>;
66def : M4UnitL1<WriteALUSsr>;
67def : M4UnitL1<WriteBr>;
68def : M4UnitL1<WriteBrL>;
69def : M4UnitL1<WriteBrTbl>;
70def : M4UnitL1<WriteCMPsi>;
71def : M4UnitL1<WriteCMPsr>;
72def : M4UnitL1<WriteCMP>;
73def : M4UnitL1<WriteMUL32>;
74def : M4UnitL1<WriteMUL64Hi>;
75def : M4UnitL1<WriteMUL64Lo>;
76def : M4UnitL1<WriteMUL16>;
77def : M4UnitL1<WriteNoop>;
78def : M4UnitL1<WritePreLd>;
79def : M4UnitL1I<(instregex "(t|t2)MOV")>;
80def : M4UnitL1I<(instrs COPY)>;
81def : M4UnitL1I<(instregex "t2IT")>;
82def : M4UnitL1I<(instregex "t2SEL", "t2USAD8",
83    "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>;
84
85def : ReadAdvance<ReadALU, 0>;
86def : ReadAdvance<ReadALUsr, 0>;
87def : ReadAdvance<ReadMUL, 0>;
88def : ReadAdvance<ReadMAC, 0>;
89
90// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's.
91// Loads still take 2 cycles.
92
93def : M4UnitL1<WriteFPCVT>;
94def : M4UnitL1<WriteFPMOV>;
95def : M4UnitL1<WriteFPALU32>;
96def : M4UnitL1<WriteFPALU64>;
97def : M4UnitL1<WriteFPMUL32>;
98def : M4UnitL1<WriteFPMUL64>;
99def : M4UnitL2I<(instregex "VLD")>;
100def : M4UnitL1I<(instregex "VST")>;
101def : M4UnitL3<WriteFPMAC32>;
102def : M4UnitL3<WriteFPMAC64>;
103def : M4UnitL14<WriteFPDIV32>;
104def : M4UnitL14<WriteFPDIV64>;
105def : M4UnitL14<WriteFPSQRT32>;
106def : M4UnitL14<WriteFPSQRT64>;
107def : M4UnitL1<WriteVLD1>;
108def : M4UnitL1<WriteVLD2>;
109def : M4UnitL1<WriteVLD3>;
110def : M4UnitL1<WriteVLD4>;
111def : M4UnitL1<WriteVST1>;
112def : M4UnitL1<WriteVST2>;
113def : M4UnitL1<WriteVST3>;
114def : M4UnitL1<WriteVST4>;
115
116def : ReadAdvance<ReadFPMUL, 0>;
117def : ReadAdvance<ReadFPMAC, 0>;
118
119}
120