1//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor. 10// 11//===----------------------------------------------------------------------===// 12 13def CortexM4Model : SchedMachineModel { 14 let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue 15 let MicroOpBufferSize = 0; // In-order 16 let LoadLatency = 2; // Latency when not pipelined, not pc-relative 17 let MispredictPenalty = 2; // Best case branch taken cost 18 let PostRAScheduler = 1; 19 20 let CompleteModel = 0; 21} 22 23 24// We model the entire cpu as a single pipeline with a BufferSize = 0 since 25// Cortex-M4 is in-order. 26 27def M4Unit : ProcResource<1> { let BufferSize = 0; } 28 29 30let SchedModel = CortexM4Model in { 31 32// Some definitions of latencies we apply to different instructions 33 34class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; } 35class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; } 36class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; } 37class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; } 38def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; } 39def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; } 40class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>; 41class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>; 42 43 44// Loads, MAC's and DIV all get a higher latency of 2 45def : M4UnitL2<WriteLd>; 46def : M4UnitL2<WriteMAC32>; 47def : M4UnitL2<WriteMAC64Hi>; 48def : M4UnitL2<WriteMAC64Lo>; 49def : M4UnitL2<WriteMAC16>; 50def : M4UnitL2<WriteDIV>; 51 52def : M4UnitL2I<(instregex "(t|t2)LDM")>; 53 54 55// Stores we use a latency of 1 as they have no outputs 56 57def : M4UnitL1<WriteST>; 58def : M4UnitL1I<(instregex "(t|t2)STM")>; 59 60 61// Everything else has a Latency of 1 62 63def : M4UnitL1<WriteALU>; 64def : M4UnitL1<WriteALUsi>; 65def : M4UnitL1<WriteALUsr>; 66def : M4UnitL1<WriteALUSsr>; 67def : M4UnitL1<WriteBr>; 68def : M4UnitL1<WriteBrL>; 69def : M4UnitL1<WriteBrTbl>; 70def : M4UnitL1<WriteCMPsi>; 71def : M4UnitL1<WriteCMPsr>; 72def : M4UnitL1<WriteCMP>; 73def : M4UnitL1<WriteMUL32>; 74def : M4UnitL1<WriteMUL64Hi>; 75def : M4UnitL1<WriteMUL64Lo>; 76def : M4UnitL1<WriteMUL16>; 77def : M4UnitL1<WriteNoop>; 78def : M4UnitL1<WritePreLd>; 79def : M4UnitL1I<(instregex "(t|t2)MOV")>; 80def : M4UnitL1I<(instrs COPY)>; 81def : M4UnitL1I<(instregex "t2IT")>; 82def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", 83 "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>; 84 85def : ReadAdvance<ReadALU, 0>; 86def : ReadAdvance<ReadALUsr, 0>; 87def : ReadAdvance<ReadMUL, 0>; 88def : ReadAdvance<ReadMAC, 0>; 89 90// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's. 91// Loads still take 2 cycles. 92 93def : M4UnitL1<WriteFPCVT>; 94def : M4UnitL1<WriteFPMOV>; 95def : M4UnitL1<WriteFPALU32>; 96def : M4UnitL1<WriteFPALU64>; 97def : M4UnitL1<WriteFPMUL32>; 98def : M4UnitL1<WriteFPMUL64>; 99def : M4UnitL2I<(instregex "VLD")>; 100def : M4UnitL1I<(instregex "VST")>; 101def : M4UnitL3<WriteFPMAC32>; 102def : M4UnitL3<WriteFPMAC64>; 103def : M4UnitL14<WriteFPDIV32>; 104def : M4UnitL14<WriteFPDIV64>; 105def : M4UnitL14<WriteFPSQRT32>; 106def : M4UnitL14<WriteFPSQRT64>; 107def : M4UnitL1<WriteVLD1>; 108def : M4UnitL1<WriteVLD2>; 109def : M4UnitL1<WriteVLD3>; 110def : M4UnitL1<WriteVLD4>; 111def : M4UnitL1<WriteVST1>; 112def : M4UnitL1<WriteVST2>; 113def : M4UnitL1<WriteVST3>; 114def : M4UnitL1<WriteVST4>; 115 116def : ReadAdvance<ReadFPMUL, 0>; 117def : ReadAdvance<ReadFPMAC, 0>; 118 119} 120