1//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor. 10// 11//===----------------------------------------------------------------------===// 12 13def CortexM4Model : SchedMachineModel { 14 let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue 15 let MicroOpBufferSize = 0; // In-order 16 let LoadLatency = 2; // Latency when not pipelined, not pc-relative 17 let MispredictPenalty = 2; // Best case branch taken cost 18 let PostRAScheduler = 1; 19 20 let CompleteModel = 0; 21 let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasZCZ, HasMVEInt, 22 IsNotMClass, HasDPVFP, HasFPARMv8, HasFullFP16, Has8MSecExt, HasV8, 23 HasV8_3a, HasTrustZone, HasDFB, IsWindows]; 24} 25 26 27// We model the entire cpu as a single pipeline with a BufferSize = 0 since 28// Cortex-M4 is in-order. 29 30def M4Unit : ProcResource<1> { let BufferSize = 0; } 31 32 33let SchedModel = CortexM4Model in { 34 35// Some definitions of latencies we apply to different instructions 36 37class M4UnitL1<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 1; } 38class M4UnitL2<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 2; } 39class M4UnitL3<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 3; } 40class M4UnitL14<SchedWrite write> : WriteRes<write, [M4Unit]> { let Latency = 14; } 41def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; } 42def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; } 43class M4UnitL1I<dag instr> : InstRW<[M4UnitL1_wr], instr>; 44class M4UnitL2I<dag instr> : InstRW<[M4UnitL2_wr], instr>; 45 46 47// Loads, MAC's and DIV all get a higher latency of 2 48def : M4UnitL2<WriteLd>; 49def : M4UnitL2<WriteMAC32>; 50def : M4UnitL2<WriteMAC64Hi>; 51def : M4UnitL2<WriteMAC64Lo>; 52def : M4UnitL2<WriteMAC16>; 53def : M4UnitL2<WriteDIV>; 54 55def : M4UnitL2I<(instregex "(t|t2)LDM")>; 56def : M4UnitL2I<(instregex "(t|t2)LDR")>; 57 58 59// Stores we use a latency of 1 as they have no outputs 60 61def : M4UnitL1<WriteST>; 62def : M4UnitL1I<(instregex "(t|t2)STM")>; 63 64 65// Everything else has a Latency of 1 66 67def : M4UnitL1<WriteALU>; 68def : M4UnitL1<WriteALUsi>; 69def : M4UnitL1<WriteALUsr>; 70def : M4UnitL1<WriteALUSsr>; 71def : M4UnitL1<WriteBr>; 72def : M4UnitL1<WriteBrL>; 73def : M4UnitL1<WriteBrTbl>; 74def : M4UnitL1<WriteCMPsi>; 75def : M4UnitL1<WriteCMPsr>; 76def : M4UnitL1<WriteCMP>; 77def : M4UnitL1<WriteMUL32>; 78def : M4UnitL1<WriteMUL64Hi>; 79def : M4UnitL1<WriteMUL64Lo>; 80def : M4UnitL1<WriteMUL16>; 81def : M4UnitL1<WriteNoop>; 82def : M4UnitL1<WritePreLd>; 83def : M4UnitL1I<(instregex "(t|t2)MOV")>; 84def : M4UnitL1I<(instrs COPY)>; 85def : M4UnitL1I<(instregex "t2IT", "t2MSR", "t2MRS")>; 86def : M4UnitL1I<(instregex "t2CLREX")>; 87def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", "t2SML[AS]", 88 "t2(S|Q|SH|U|UQ|UH|QD)(ADD|ASX|SAX|SUB)", "t2USADA8", "(t|t2)REV")>; 89 90// These instructions are not of much interest to scheduling as they will not 91// be generated or it is not very useful to schedule them. They are here to make 92// the model more complete. 93def : M4UnitL1I<(instregex "t2CDP", "t2LDC", "t2MCR", "t2MRC", "t2MRRC", "t2STC")>; 94def : M4UnitL1I<(instregex "tCPS", "t2ISB", "t2DSB", "t2DMB", "t2?HINT$")>; 95def : M4UnitL1I<(instregex "t2?UDF$", "tBKPT", "t2DBG")>; 96def : M4UnitL1I<(instregex "t?2?Int_eh_sjlj_", "tADDframe", "t?ADJCALL")>; 97def : M4UnitL1I<(instregex "CMP_SWAP", "JUMPTABLE", "MEMCPY")>; 98def : M4UnitL1I<(instregex "VSETLNi32", "VGETLNi32")>; 99 100def : ReadAdvance<ReadALU, 0>; 101def : ReadAdvance<ReadALUsr, 0>; 102def : ReadAdvance<ReadMUL, 0>; 103def : ReadAdvance<ReadMAC, 0>; 104 105// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's. 106// Loads still take 2 cycles. 107 108def : M4UnitL1<WriteFPCVT>; 109def : M4UnitL1<WriteFPMOV>; 110def : M4UnitL1<WriteFPALU32>; 111def : M4UnitL1<WriteFPALU64>; 112def : M4UnitL1<WriteFPMUL32>; 113def : M4UnitL1<WriteFPMUL64>; 114def : M4UnitL2I<(instregex "VLD")>; 115def : M4UnitL1I<(instregex "VST")>; 116def : M4UnitL3<WriteFPMAC32>; 117def : M4UnitL3<WriteFPMAC64>; 118def : M4UnitL14<WriteFPDIV32>; 119def : M4UnitL14<WriteFPDIV64>; 120def : M4UnitL14<WriteFPSQRT32>; 121def : M4UnitL14<WriteFPSQRT64>; 122def : M4UnitL1<WriteVLD1>; 123def : M4UnitL1<WriteVLD2>; 124def : M4UnitL1<WriteVLD3>; 125def : M4UnitL1<WriteVLD4>; 126def : M4UnitL1<WriteVST1>; 127def : M4UnitL1<WriteVST2>; 128def : M4UnitL1<WriteVST3>; 129def : M4UnitL1<WriteVST4>; 130def : M4UnitL1I<(instregex "VMOVS", "FCONSTS", "VCMP", "VNEG", "VABS")>; 131def : M4UnitL2I<(instregex "VMOVD")>; 132def : M4UnitL1I<(instregex "VMRS", "VMSR", "FMSTAT")>; 133 134def : ReadAdvance<ReadFPMUL, 0>; 135def : ReadAdvance<ReadFPMAC, 0>; 136 137} 138