1//=- ARMScheduleM85.td - ARM Cortex-M85 Scheduling Definitions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for the ARM Cortex-M85 processor. 10// 11// All timing is referred to EX2. Thus, operands which are needed at EX1 are 12// stated to have a ReadAdvance of -1. The FP/MVE pipe actually begins at EX3 13// but is described as if it were in EX2 to avoid having unnaturally long latencies 14// with delayed inputs on every instruction. Instead, whenever an FP instruction 15// must access a GP register or a non-FP instruction (which includes loads/stores) 16// must access an FP register, the operand timing is adjusted: 17// FP accessing GPR: read one cycle later, write one cycle later 18// NOTE: absolute spec timing already includes this if 19// referenced to EX2 20// non-FP accessing FPR: read one cycle earlier, write one cycle earlier 21//===----------------------------------------------------------------------===// 22 23def CortexM85Model : SchedMachineModel { 24 let IssueWidth = 2; // Dual issue for most instructions. 25 let MicroOpBufferSize = 0; // M85 is in-order. 26 let LoadLatency = 2; // Best case for load-use case. 27 let MispredictPenalty = 4; // Mispredict cost for forward branches is 7, 28 // but 4 works better 29 let CompleteModel = 0; 30} 31 32let SchedModel = CortexM85Model in { 33 34//===--------------------------------------------------------------------===// 35// CortexM85 has two ALU, two LOAD, two STORE, a MAC, a BRANCH and two VFP 36// pipes (with three units). There are three shifters available: one per 37// stage. 38 39def M85UnitLoadL : ProcResource<1> { let BufferSize = 0; } 40def M85UnitLoadH : ProcResource<1> { let BufferSize = 0; } 41def M85UnitLoad : ProcResGroup<[M85UnitLoadL,M85UnitLoadH]> { let BufferSize = 0; } 42def M85UnitStoreL : ProcResource<1> { let BufferSize = 0; } 43def M85UnitStoreH : ProcResource<1> { let BufferSize = 0; } 44def M85UnitStore : ProcResGroup<[M85UnitStoreL,M85UnitStoreH]> { let BufferSize = 0; } 45def M85UnitALU : ProcResource<2> { let BufferSize = 0; } 46def M85UnitShift1 : ProcResource<1> { let BufferSize = 0; } 47def M85UnitShift2 : ProcResource<1> { let BufferSize = 0; } 48def M85UnitMAC : ProcResource<1> { let BufferSize = 0; } 49def M85UnitBranch : ProcResource<1> { let BufferSize = 0; } 50def M85UnitVFPAL : ProcResource<1> { let BufferSize = 0; } 51def M85UnitVFPAH : ProcResource<1> { let BufferSize = 0; } 52def M85UnitVFPA : ProcResGroup<[M85UnitVFPAL,M85UnitVFPAH]> { let BufferSize = 0; } 53def M85UnitVFPBL : ProcResource<1> { let BufferSize = 0; } 54def M85UnitVFPBH : ProcResource<1> { let BufferSize = 0; } 55def M85UnitVFPB : ProcResGroup<[M85UnitVFPBL,M85UnitVFPBH]> { let BufferSize = 0; } 56def M85UnitVFPCL : ProcResource<1> { let BufferSize = 0; } 57def M85UnitVFPCH : ProcResource<1> { let BufferSize = 0; } 58def M85UnitVFPC : ProcResGroup<[M85UnitVFPCL,M85UnitVFPCH]> { let BufferSize = 0; } 59def M85UnitVFPD : ProcResource<1> { let BufferSize = 0; } 60def M85UnitVPortL : ProcResource<1> { let BufferSize = 0; } 61def M85UnitVPortH : ProcResource<1> { let BufferSize = 0; } 62def M85UnitVPort : ProcResGroup<[M85UnitVPortL,M85UnitVPortH]> { let BufferSize = 0; } 63def M85UnitSIMD : ProcResource<1> { let BufferSize = 0; } 64def M85UnitLShift : ProcResource<1> { let BufferSize = 0; } 65def M85UnitDiv : ProcResource<1> { let BufferSize = 0; } 66 67def M85UnitSlot0 : ProcResource<1> { let BufferSize = 0; } 68 69//===---------------------------------------------------------------------===// 70// Subtarget-specific SchedWrite types with map ProcResources and set latency. 71 72def : WriteRes<WriteALU, [M85UnitALU]> { let Latency = 1; } 73 74// Basic ALU with shifts. 75let Latency = 1 in { 76 def : WriteRes<WriteALUsi, [M85UnitALU, M85UnitShift1]>; 77 def : WriteRes<WriteALUsr, [M85UnitALU, M85UnitShift1]>; 78 def : WriteRes<WriteALUSsr, [M85UnitALU, M85UnitShift1]>; 79} 80 81// Compares. 82def : WriteRes<WriteCMP, [M85UnitALU]> { let Latency = 1; } 83def : WriteRes<WriteCMPsi, [M85UnitALU, M85UnitShift1]> { let Latency = 2; } 84def : WriteRes<WriteCMPsr, [M85UnitALU, M85UnitShift1]> { let Latency = 2; } 85 86// Multiplies. 87let Latency = 2 in { 88 def : WriteRes<WriteMUL16, [M85UnitMAC]>; 89 def : WriteRes<WriteMUL32, [M85UnitMAC]>; 90 def : WriteRes<WriteMUL64Lo, [M85UnitMAC]>; 91 def : WriteRes<WriteMUL64Hi, []> { let NumMicroOps = 0; } 92} 93 94// Multiply-accumulates. 95let Latency = 2 in { 96def : WriteRes<WriteMAC16, [M85UnitMAC]>; 97def : WriteRes<WriteMAC32, [M85UnitMAC]>; 98def : WriteRes<WriteMAC64Lo, [M85UnitMAC]>; 99def : WriteRes<WriteMAC64Hi, []> { let NumMicroOps = 0; } 100} 101 102// Divisions. 103def : WriteRes<WriteDIV, [M85UnitDiv]> { 104 let Latency = 7; 105} 106 107// Loads/Stores. 108def : WriteRes<WriteLd, [M85UnitLoad]> { let Latency = 1; } 109def : WriteRes<WritePreLd, [M85UnitLoad]> { let Latency = 2; } 110def : WriteRes<WriteST, [M85UnitStore]> { let Latency = 2; } 111def M85WriteLdWide : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH]> { let Latency = 1; } 112def M85WriteStWide : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH]> { let Latency = 2; } 113 114// Branches. 115def : WriteRes<WriteBr, [M85UnitBranch]> { let Latency = 2; } 116def : WriteRes<WriteBrL, [M85UnitBranch]> { let Latency = 2; } 117def : WriteRes<WriteBrTbl, [M85UnitBranch]> { let Latency = 2; } 118 119// Noop. 120def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 121 122//===---------------------------------------------------------------------===// 123// Sched definitions for floating-point instructions 124// 125// Floating point conversions. 126def : WriteRes<WriteFPCVT, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> { 127 let Latency = 2; 128} 129def : WriteRes<WriteFPMOV, [M85UnitVPort, M85UnitSlot0]> { let Latency = 1; } 130def M85WriteFPMOV64 : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { let Latency = 1; } 131 132// ALU operations (32/64-bit). These go down the FP pipeline. 133def : WriteRes<WriteFPALU32, [M85UnitVFPA, M85UnitVPort, M85UnitSlot0]> { 134 let Latency = 2; 135} 136def : WriteRes<WriteFPALU64, [M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 137 let Latency = 6; 138} 139 140// Multiplication 141def : WriteRes<WriteFPMUL32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> { 142 let Latency = 3; 143} 144def : WriteRes<WriteFPMUL64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 145 let Latency = 8; 146} 147 148// Multiply-accumulate. FPMAC goes down the FP Pipeline. 149def : WriteRes<WriteFPMAC32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> { 150 let Latency = 5; 151} 152def : WriteRes<WriteFPMAC64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 153 let Latency = 14; 154} 155 156// Division. Effective scheduling latency is 3, though real latency is larger 157def : WriteRes<WriteFPDIV32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> { 158 let Latency = 14; 159} 160def : WriteRes<WriteFPDIV64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 161 let Latency = 29; 162} 163 164// Square-root. Effective scheduling latency is 3, though real latency is larger 165def : WriteRes<WriteFPSQRT32, [M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> { 166 let Latency = 14; 167} 168def : WriteRes<WriteFPSQRT64, [M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 169 let Latency = 29; 170} 171 172let NumMicroOps = 0 in { 173 def M85SingleIssue : SchedWriteRes<[]> { let SingleIssue = 1; } 174 def M85Slot0Only : SchedWriteRes<[M85UnitSlot0]> { } 175} 176 177// What pipeline stage operands need to be ready for depending on 178// where they come from. 179def : ReadAdvance<ReadALUsr, 0>; 180def : ReadAdvance<ReadMUL, 0>; 181def : ReadAdvance<ReadMAC, 1>; 182def : ReadAdvance<ReadALU, 0>; 183def : ReadAdvance<ReadFPMUL, 0>; 184def : ReadAdvance<ReadFPMAC, 3>; 185def M85Read_ISSm1 : SchedReadAdvance<-2>; // operands needed at ISS 186def M85Read_ISS : SchedReadAdvance<-1>; // operands needed at EX1 187def M85Read_EX1 : SchedReadAdvance<0>; // operands needed at EX2 188def M85Read_EX2 : SchedReadAdvance<1>; // operands needed at EX3 189def M85Read_EX3 : SchedReadAdvance<2>; // operands needed at EX4 190def M85Read_EX4 : SchedReadAdvance<3>; // operands needed at EX5 191def M85Write1 : SchedWriteRes<[]> { 192 let Latency = 1; 193 let NumMicroOps = 0; 194} 195def M85Write2 : SchedWriteRes<[]> { 196 let Latency = 2; 197 let NumMicroOps = 0; 198} 199def M85WriteShift2 : SchedWriteRes<[M85UnitALU, M85UnitShift2]> {} 200 201// Non general purpose instructions may not be dual issued. These 202// use both issue units. 203def M85NonGeneralPurpose : SchedWriteRes<[]> { 204 // Assume that these will go down the main ALU pipeline. 205 // In reality, many look likely to stall the whole pipeline. 206 let Latency = 3; 207 let SingleIssue = 1; 208} 209 210// List the non general purpose instructions. 211def : InstRW<[M85NonGeneralPurpose], 212 (instregex "t2MRS", "tSVC", "tBKPT", "t2MSR", "t2DMB", "t2DSB", 213 "t2ISB", "t2HVC", "t2SMC", "t2UDF", "ERET", "tHINT", 214 "t2HINT", "t2CLREX", "t2CLRM", "BUNDLE")>; 215 216//===---------------------------------------------------------------------===// 217// Sched definitions for load/store 218// 219// Mark whether the loads/stores must be single-issue 220// Address operands are needed earlier 221// Data operands are needed later 222 223let NumMicroOps = 0 in { 224 def M85BaseUpdate : SchedWriteRes<[]> { 225 // Update is bypassable out of EX1 226 let Latency = 0; 227 } 228 def M85MVERBaseUpdate : SchedWriteRes<[]> { let Latency = 1; } 229 // Q register base update is available in EX3 to bypass into EX2/ISS. 230 // Latency=2 matches what we want for ISS, Latency=1 for EX2. Going 231 // with 2, as base update into another load/store is most likely. Could 232 // change later in an override. 233 def M85MVEQBaseUpdate : SchedWriteRes<[]> { let Latency = 2; } 234 def M85LoadLatency1 : SchedWriteRes<[]> { let Latency = 1; } 235} 236def M85SlowLoad : SchedWriteRes<[M85UnitLoad]> { let Latency = 2; } 237 238// Byte and half-word loads should have greater latency than other loads. 239// So should load exclusive? 240 241def : InstRW<[M85SlowLoad], 242 (instregex "t2LDR(B|H|SB|SH)pc")>; 243def : InstRW<[M85SlowLoad, M85Read_ISS], 244 (instregex "t2LDR(B|H|SB|SH)T", "t2LDR(B|H|SB|SH)i", 245 "tLDRspi", "tLDR(B|H)i")>; 246def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS], 247 (instregex "t2LDR(B|H|SB|SH)s")>; 248def : InstRW<[M85SlowLoad, M85Read_ISS, M85Read_ISS], 249 (instregex "tLDR(B|H)r", "tLDR(SB|SH)")>; 250def : InstRW<[M85SlowLoad, M85BaseUpdate, M85Read_ISS], 251 (instregex "t2LDR(B|H|SB|SH)_(POST|PRE)")>; 252 253// Exclusive/acquire/release loads/stores cannot be dual-issued 254def : InstRW<[WriteLd, M85SingleIssue, M85Read_ISS], 255 (instregex "t2LDREX$", "t2LDA(EX)?$")>; 256def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85SingleIssue, M85Read_ISS], 257 (instregex "t2LDAEXD$")>; 258def : InstRW<[M85SlowLoad, M85SingleIssue, M85Read_ISS], 259 (instregex "t2LDREX(B|H)", "t2LDA(EX)?(B|H)$")>; 260def : InstRW<[WriteST, M85SingleIssue, M85Read_EX2, M85Read_ISS], 261 (instregex "t2STREX(B|H)?$", "t2STL(EX)?(B|H)?$")>; 262def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_EX2, M85Read_EX2, M85Read_ISS], 263 (instregex "t2STLEXD$")>; 264 265// Load/store multiples end issue groups. 266 267def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS], 268 (instregex "(t|t2)LDM(DB|IA)$")>; 269def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS], 270 (instregex "(t|t2)STM(DB|IA)$")>; 271def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS], 272 (instregex "(t|t2)LDM(DB|IA)_UPD$", "tPOP")>; 273def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue, M85Read_ISS], 274 (instregex "(t|t2)STM(DB|IA)_UPD$", "tPUSH")>; 275 276// Load/store doubles 277 278def : InstRW<[M85BaseUpdate, M85WriteStWide, 279 M85Read_EX2, M85Read_EX2, M85Read_ISS], 280 (instregex "t2STRD_(PRE|POST)")>; 281def : InstRW<[M85WriteStWide, M85Read_EX2, M85Read_EX2, M85Read_ISS], 282 (instregex "t2STRDi")>; 283def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85BaseUpdate, M85Read_ISS], 284 (instregex "t2LDRD_(PRE|POST)")>; 285def : InstRW<[M85WriteLdWide, M85LoadLatency1, M85Read_ISS], 286 (instregex "t2LDRDi")>; 287 288// Word load / preload 289def : InstRW<[WriteLd], 290 (instregex "t2LDRpc", "t2PL[DI]pci", "tLDRpci")>; 291def : InstRW<[WriteLd, M85Read_ISS], 292 (instregex "t2LDR(i|T)", "t2PL[DI](W)?i", "tLDRi")>; 293def : InstRW<[WriteLd, M85Read_ISS, M85Read_ISS], 294 (instregex "t2LDRs", "t2PL[DI](w)?s", "tLDRr")>; 295def : InstRW<[WriteLd, M85BaseUpdate, M85Read_ISS], 296 (instregex "t2LDR_(POST|PRE)")>; 297 298// Stores 299def : InstRW<[M85BaseUpdate, WriteST, M85Read_EX2, M85Read_ISS], 300 (instregex "t2STR(B|H)?_(POST|PRE)")>; 301def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS, M85Read_ISS], 302 (instregex "t2STR(B|H)?s$", "tSTR(B|H)?r$")>; 303def : InstRW<[WriteST, M85Read_EX2, M85Read_ISS], 304 (instregex "t2STR(B|H)?(i|T)", "tSTR(B|H)?i$", "tSTRspi")>; 305 306// TBB/TBH - single-issue only 307 308def M85TableLoad : SchedWriteRes<[M85UnitLoad]> { let SingleIssue = 1; } 309 310def : InstRW<[M85TableLoad, M85Read_ISS, M85Read_ISS], 311 (instregex "t2TB")>; 312 313// VFP/MVE loads and stores 314// Note: timing for VLDR/VSTR special has not been broken out 315// Note 2: see notes at top of file for the reason load latency is 1 and 316// store data is in EX3. 317 318def M85LoadSP : SchedWriteRes<[M85UnitLoad, M85UnitVPort]>; 319def M85LoadDP : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH, 320 M85UnitVPortL, M85UnitVPortH]>; 321def M85LoadSys : SchedWriteRes<[M85UnitLoad, M85UnitVPort, 322 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]> { 323 let Latency = 4; 324} 325def M85StoreSP : SchedWriteRes<[M85UnitStore, M85UnitVPort]>; 326def M85StoreDP : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH, 327 M85UnitVPortL, M85UnitVPortH]>; 328def M85StoreSys : SchedWriteRes<[M85UnitStore, M85UnitVPort, 329 M85UnitVFPA, M85UnitVFPB, M85UnitVFPC, M85UnitVFPD]>; 330let ReleaseAtCycles = [2,2,1,1], EndGroup = 1 in { 331 def M85LoadMVE : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH, 332 M85UnitVPortL, M85UnitVPortH]>; 333 def M85LoadMVELate : SchedWriteRes<[M85UnitLoadL, M85UnitLoadH, 334 M85UnitVPortL, M85UnitVPortH]> { 335 let Latency = 4; // 3 cycles later 336 } 337 def M85StoreMVE : SchedWriteRes<[M85UnitStoreL, M85UnitStoreH, 338 M85UnitVPortL, M85UnitVPortH]>; 339} 340 341def : InstRW<[M85LoadSP, M85Read_ISS], (instregex "VLDR(S|H)$")>; 342def : InstRW<[M85LoadSys, M85Read_ISS], (instregex "VLDR_")>; 343def : InstRW<[M85LoadDP, M85Read_ISS], (instregex "VLDRD$")>; 344def : InstRW<[M85StoreSP, M85Read_EX3, M85Read_ISS], (instregex "VSTR(S|H)$")>; 345def : InstRW<[M85StoreSys, M85Read_EX1, M85Read_ISS], (instregex "VSTR_")>; 346def : InstRW<[M85StoreDP, M85Read_EX3, M85Read_ISS], (instregex "VSTRD$")>; 347 348def : InstRW<[M85LoadMVELate, M85Read_ISS], 349 (instregex "MVE_VLD[24]._[0-9]+$")>; 350def : InstRW<[M85LoadMVELate, M85MVERBaseUpdate, M85Read_ISS], 351 (instregex "MVE_VLD[24].*wb")>; 352def : InstRW<[M85LoadMVE, M85Read_ISS], 353 (instregex "MVE_VLDR.*(8|16|32|64)$")>; 354def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS, M85Read_ISS], 355 (instregex "MVE_VLDR.*(_rq|_rq|_rq_u)$")>; 356def : InstRW<[M85LoadMVE, M85SingleIssue, M85Read_ISS], 357 (instregex "MVE_VLDR.*_qi$")>; 358def : InstRW<[M85MVERBaseUpdate, M85LoadMVE, M85Read_ISS], 359 (instregex "MVE_VLDR.*(_post|[^i]_pre)$")>; 360def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85LoadMVE, M85Read_ISS], 361 (instregex "MVE_VLDR.*(qi_pre)$")>; 362 363def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS], 364 (instregex "MVE_VST[24]._[0-9]+$")>; 365def : InstRW<[M85StoreMVE, M85Read_EX3, M85MVERBaseUpdate, M85Read_ISS], 366 (instregex "MVE_VST[24].*wb")>; 367def : InstRW<[M85StoreMVE, M85Read_EX3, M85Read_ISS], 368 (instregex "MVE_VSTR.*(8|16|32|64)$")>; 369def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS, M85Read_ISS], 370 (instregex "MVE_VSTR.*(_rq|_rq|_rq_u)$")>; 371def : InstRW<[M85StoreMVE, M85SingleIssue, M85Read_EX3, M85Read_ISS], 372 (instregex "MVE_VSTR.*_qi$")>; 373def : InstRW<[M85MVERBaseUpdate, M85StoreMVE, M85Read_EX3, M85Read_ISS], 374 (instregex "MVE_VSTR.*(_post|[^i]_pre)$")>; 375def : InstRW<[M85MVEQBaseUpdate, M85SingleIssue, M85StoreMVE, 376 M85Read_EX3, M85Read_ISS], 377 (instregex "MVE_VSTR.*(qi_pre)$")>; 378 379// Load/store multiples end issue groups. 380 381def : InstRW<[M85WriteLdWide, M85SingleIssue, M85Read_ISS], 382 (instregex "VLDM(S|D|Q)(DB|IA)$")>; 383def : InstRW<[M85WriteStWide, M85SingleIssue, M85Read_ISS, M85Read_EX3], 384 (instregex "VSTM(S|D|Q)(DB|IA)$")>; 385def : InstRW<[M85BaseUpdate, M85WriteLdWide, M85SingleIssue, M85Read_ISS], 386 (instregex "VLDM(S|D|Q)(DB|IA)_UPD$", "VLLDM")>; 387def : InstRW<[M85BaseUpdate, M85WriteStWide, M85SingleIssue, 388 M85Read_ISS, M85Read_EX3], 389 (instregex "VSTM(S|D|Q)(DB|IA)_UPD$", "VLSTM")>; 390 391//===---------------------------------------------------------------------===// 392// Sched definitions for ALU 393// 394 395// Non-small shifted ALU operands are read a cycle early; small LSLs 396// aren't, as they don't require the shifter. 397 398def M85NonsmallShiftWrite : SchedWriteRes<[M85UnitALU,M85UnitShift1]> { 399 let Latency = 1; 400} 401 402def M85WriteALUsi : SchedWriteVariant<[ 403 SchedVar<NoSchedPred, [M85NonsmallShiftWrite]> 404]>; 405def M85Ex1ReadNoFastBypass : SchedReadAdvance<-1, 406 [WriteLd, M85WriteLdWide, M85LoadLatency1]>; 407def M85ReadALUsi : SchedReadVariant<[ 408 SchedVar<NoSchedPred, [M85Read_ISS]> 409]>; 410 411def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi], 412 (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|" 413 "SUBS|CMP|CMNz|TEQ|TST)rs$")>; 414def : InstRW<[M85WriteALUsi, M85ReadALUsi], 415 (instregex "t2MVNs")>; 416 417// CortexM85 treats LSL #0 as needing a shifter. In practice the throughput 418// seems to reliably be 2 when run on a cyclemodel, so we don't require a 419// shift resource. 420def : InstRW<[M85WriteALUsi, M85Read_EX1, M85ReadALUsi], 421 (instregex "t2(ADC|ADDS|BIC|EOR|ORN|ORR|RSBS|RSB|SBC|" 422 "SUBS|CMP|CMNz|TEQ|TST)rr$")>; 423def : InstRW<[M85WriteALUsi, M85ReadALUsi], 424 (instregex "t2MVNr")>; 425 426// Shift instructions: most pure shifts (i.e. MOV w/ shift) will use whichever 427// shifter is free, thus it is possible to dual-issue them freely with anything 428// else. As a result, they are not modeled as needing a shifter. 429// RRX is odd because it must use the EX2 shifter, so it cannot dual-issue with 430// itself. 431// 432// Note that pure shifts which use the EX1 shifter would need their operands 433// a cycle earlier. However, they are only forced to use the EX1 shifter 434// when issuing against an RRX instructions, which should be rare. 435 436def : InstRW<[M85WriteShift2], 437 (instregex "t2RRX$")>; 438def : InstRW<[WriteALU], 439 (instregex "(t|t2)(LSL|LSR|ASR|ROR|SBFX|UBFX)", "t2MOVsr(a|l)")>; 440 441// Instructions that use the shifter, but have normal timing 442 443def : InstRW<[WriteALUsi,M85Slot0Only], (instregex "t2(BFC|BFI)$")>; 444 445// Stack pointer add/sub happens in EX1 with checks in EX2 446 447def M85WritesToSPPred : MCSchedPredicate<CheckRegOperand<0, SP>>; 448 449def M85ReadForSP : SchedReadVariant<[ 450 SchedVar<M85WritesToSPPred, [M85Read_ISS]>, 451 SchedVar<NoSchedPred, [M85Read_EX1]> 452]>; 453def M85ReadForSPShift : SchedReadVariant<[ 454 SchedVar<M85WritesToSPPred, [M85Read_ISS]>, 455 SchedVar<NoSchedPred, [M85Read_ISS]> 456]>; 457 458def : InstRW<[WriteALU, M85Read_ISS], 459 (instregex "tADDspi", "tSUBspi")>; 460def : InstRW<[WriteALU, M85ReadForSP], 461 (instregex "t2(ADD|SUB)ri", "t2MOVr", "tMOVr")>; 462def : InstRW<[WriteALU, M85ReadForSP, M85ReadForSP], 463 (instregex "tADDrSP", "tADDspr", "tADDhirr")>; 464def : InstRW<[M85WriteALUsi, M85ReadForSP, M85ReadForSPShift], 465 (instregex "t2(ADD|SUB)rs")>; 466 467def : InstRW<[WriteALU, M85Slot0Only], (instregex "t2CLZ")>; 468 469// MAC operations that don't have SchedRW set 470 471def : InstRW<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC], (instregex "t2SML[AS]D")>; 472 473// Divides are special because they stall for their latency, and so look like 474// two cycles as far as scheduling opportunities go. By putting M85Write2 475// first, we make the operand latency 2, but keep the instruction latency 7. 476// Divide operands are read early. 477 478def : InstRW<[M85Write2, WriteDIV, M85Read_ISS, M85Read_ISS, WriteALU], 479 (instregex "t2(S|U)DIV")>; 480 481// DSP extension operations 482 483def M85WriteSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> { 484 let Latency = 1; 485} 486def M85WriteSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, M85UnitSlot0]> { 487 let Latency = 2; 488} 489def M85WriteShSIMD0 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, 490 M85UnitShift1, M85UnitSlot0]> { 491 let Latency = 0; // Finishes at EX1 492} 493def M85WriteShSIMD1 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, 494 M85UnitShift1, M85UnitSlot0]> { 495 let Latency = 1; 496} 497def M85WriteShSIMD2 : SchedWriteRes<[M85UnitSIMD, M85UnitALU, 498 M85UnitShift1, M85UnitSlot0]> { 499 let Latency = 2; 500} 501 502def : InstRW<[M85WriteShSIMD2, M85Read_ISS], 503 (instregex "t2(S|U)SAT")>; 504def : InstRW<[M85WriteSIMD1, ReadALU], 505 (instregex "(t|t2)(S|U)XT(B|H)")>; 506def : InstRW<[M85WriteSIMD1, ReadALU, ReadALU], 507 (instregex "t2(S|SH|U|UH)(ADD16|ADD8|ASX|SAX|SUB16|SUB8)", 508 "t2SEL")>; 509def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU], 510 (instregex "t2(Q|UQ)(ADD|ASX|SAX|SUB)", "t2USAD8")>; 511def : InstRW<[M85WriteShSIMD2, M85Read_ISS, M85Read_ISS], 512 (instregex "t2QD(ADD|SUB)")>; 513def : InstRW<[M85WriteShSIMD0, M85Read_ISS], 514 (instregex "t2(RBIT|REV)", "tREV")>; 515def : InstRW<[M85WriteShSIMD1, ReadALU, M85Read_ISS], 516 (instregex "t2PKH(BT|TB)", "t2(S|U)XTA")>; 517def : InstRW<[M85WriteSIMD2, ReadALU, ReadALU, M85Read_EX2], 518 (instregex "t2USADA8")>; 519 520// MSR/MRS 521def : InstRW<[M85NonGeneralPurpose], (instregex "MSR", "MRS")>; 522 523// 64-bit shift operations in EX3 524 525def M85WriteLShift : SchedWriteRes<[M85UnitLShift, M85UnitALU]> { 526 let Latency = 2; 527} 528def M85WriteLat2 : SchedWriteRes<[]> { let Latency = 2; let NumMicroOps = 0; } 529 530def : InstRW<[M85WriteLShift, M85WriteLat2, M85Read_EX2, M85Read_EX2], 531 (instregex "MVE_(ASRLi|LSLLi|LSRL|SQSHLL|SRSHRL|UQSHLL|URSHRL)$")>; 532def : InstRW<[M85WriteLShift, M85WriteLat2, 533 M85Read_EX2, M85Read_EX2, M85Read_EX2], 534 (instregex "MVE_(ASRLr|LSLLr|SQRSHRL|UQRSHLL)$")>; 535def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2], 536 (instregex "MVE_(SQRSHR|UQRSHL)$")>; 537def : InstRW<[M85WriteLShift, M85Read_EX2], 538 (instregex "MVE_(SQSHL|SRSHR|UQSHL|URSHR)$")>; 539 540// Loop control/branch future instructions 541 542def M85LE : SchedWriteRes<[]> { let NumMicroOps = 0; let Latency = -2; } 543 544def : InstRW<[WriteALU], (instregex "t2BF(_|Lr|i|Li|r)")>; 545 546def : InstRW<[WriteALU], (instregex "MVE_LCTP")>; 547def : InstRW<[WriteALU], 548 (instregex "t2DLS", "t2WLS", "MVE_DLSTP", "MVE_WLSTP")>; 549def : InstRW<[M85LE], (instregex "t2LE$")>; 550def : InstRW<[M85LE, M85Read_ISSm1], 551 (instregex "t2LEUpdate", "MVE_LETP")>; // LE is executed at ISS 552 553// Conditional selects 554 555def : InstRW<[M85WriteLShift, M85Read_EX2, M85Read_EX2, M85Read_EX2], 556 (instregex "t2(CSEL|CSINC|CSINV|CSNEG)")>; 557 558//===---------------------------------------------------------------------===// 559// Sched definitions for FP and MVE operations 560 561let NumMicroOps = 0 in { 562 def M85OverrideVFPLat5 : SchedWriteRes<[]> { let Latency = 5; } 563 def M85OverrideVFPLat4 : SchedWriteRes<[]> { let Latency = 4; } 564 def M85OverrideVFPLat3 : SchedWriteRes<[]> { let Latency = 3; } 565 def M85OverrideVFPLat2 : SchedWriteRes<[]> { let Latency = 2; } 566} 567 568let Latency = 1 in { 569 def M85GroupALat1S : SchedWriteRes<[M85UnitVFPA, M85UnitVPort, M85UnitSlot0]>; 570 def M85GroupBLat1S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>; 571 def M85GroupCLat1S : SchedWriteRes<[M85UnitVFPC, M85UnitVPort, M85UnitSlot0]>; 572 def M85GroupALat1D : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 573 def M85GroupBLat1D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 574 def M85GroupCLat1D : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 575 def M85GroupABLat1S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>; 576} 577let Latency = 2 in { 578 def M85GroupBLat2S : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]>; 579 def M85GroupBLat2D : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 580 def M85GroupABLat2S : SchedWriteRes<[M85UnitVPort, M85UnitSlot0]>; 581 def M85GroupABLat2D : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 582} 583 584// Instructions which are missing default schedules 585def : InstRW<[M85GroupALat1S], (instregex "V(FP_VMAXNM|FP_VMINNM)(H|S)$")>; 586def : InstRW<[M85GroupALat1D], (instregex "V(FP_VMAXNM|FP_VMINNM)D$")>; 587def : InstRW<[M85GroupCLat1S], (instregex "VCMPE?Z?(H|S)$")>; 588def : InstRW<[M85GroupCLat1D], (instregex "VCMPE?Z?D$")>; 589def : InstRW<[M85GroupBLat2S], 590 (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S)", 591 "VRINT(A|M|N|P|R|X|Z)(H|S)")>; 592def : InstRW<[M85GroupBLat2D], 593 (instregex "VCVT(B|T)(DH|HD)", "VCVT(A|M|N|P|R|X|Z)(S|U)D", 594 "V.*TOD", "VTO.*D", "VCVTDS", "VCVTSD", 595 "VRINT(A|M|N|P|R|X|Z)D")>; 596def : InstRW<[M85GroupABLat1S], (instregex "VINSH")>; 597def : InstRW<[M85GroupBLat1S], (instregex "V(ABS|NEG)(H|S)$")>; 598def : InstRW<[M85GroupBLat1D], (instregex "V(ABS|NEG)D$")>; 599 600// VMRS/VMSR 601let SingleIssue = 1 in { 602 def M85VMRSEarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 2;} 603 def M85VMRSLate : SchedWriteRes<[M85UnitVPort]> { let Latency = 4; } 604 def M85VMSREarly : SchedWriteRes<[M85UnitVPort]> { let Latency = 1; } 605 def M85VMSRLate : SchedWriteRes<[M85UnitVPort]> { let Latency = 3; } 606} 607 608def M85FPSCRFlagPred : MCSchedPredicate< 609 CheckAll<[CheckIsRegOperand<0>, 610 CheckRegOperand<0, PC>]>>; 611 612def M85VMRSFPSCR : SchedWriteVariant<[ 613 SchedVar<M85FPSCRFlagPred, [M85VMRSEarly]>, 614 SchedVar<NoSchedPred, [M85VMRSLate]> 615]>; 616 617def : InstRW<[M85VMSREarly, M85Read_EX2], 618 (instregex "VMSR$", "VMSR_FPSCR_NZCVQC", "VMSR_P0", "VMSR_VPR")>; 619def : InstRW<[M85VMRSEarly], (instregex "VMRS_P0", "VMRS_VPR", "FMSTAT")>; 620def : InstRW<[M85VMRSLate], (instregex "VMRS_FPSCR_NZCVQC")>; 621def : InstRW<[M85VMRSFPSCR], (instregex "VMRS$")>; 622// Not matching properly 623//def : InstRW<[M85VMSRLate, M85Read_EX2], (instregex "VMSR_FPCTX(NS|S)")>; 624//def : InstRW<[M85VMRSLate], (instregex "VMRS_FPCTX(NS|S)")>; 625 626// VSEL cannot bypass in its implied $cpsr operand; model as earlier read 627def : InstRW<[M85GroupBLat1S, ReadALU, ReadALU, M85Read_ISS], 628 (instregex "VSEL.*(S|H)$")>; 629def : InstRW<[M85GroupBLat1D, ReadALU, ReadALU, M85Read_ISS], 630 (instregex "VSEL.*D$")>; 631 632// VMOV 633def : InstRW<[WriteFPMOV], 634 (instregex "VMOV(H|S)$", "FCONST(H|S)")>; 635def : InstRW<[WriteFPMOV, M85Read_EX2], 636 (instregex "VMOVHR$", "VMOVSR$")>; 637def : InstRW<[M85GroupABLat2S], 638 (instregex "VMOVRH$", "VMOVRS$")>; 639def : InstRW<[M85WriteFPMOV64], 640 (instregex "VMOVD$")>; 641def : InstRW<[M85WriteFPMOV64], 642 (instregex "FCONSTD")>; 643def : InstRW<[M85WriteFPMOV64, M85Read_EX2, M85Read_EX2], 644 (instregex "VMOVDRR")>; 645def : InstRW<[M85WriteFPMOV64, M85Write1, M85Read_EX2, M85Read_EX2], 646 (instregex "VMOVSRR")>; 647def : InstRW<[M85GroupABLat2D, M85Write2], 648 (instregex "VMOV(RRD|RRS)")>; 649 650// These shouldn't even exist, but Cortex-m55 defines them, so here they are. 651def : InstRW<[WriteFPMOV, M85Read_EX2], 652 (instregex "VGETLNi32$")>; 653def : InstRW<[M85GroupABLat2S], 654 (instregex "VSETLNi32")>; 655 656// Larger-latency overrides 657 658def M85FPDIV16 : SchedWriteRes<[M85UnitVFPB, M85UnitVPort, M85UnitSlot0]> { 659 let Latency = 8; 660} 661def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VDIVH")>; 662def : InstRW<[M85OverrideVFPLat2, WriteFPDIV32], (instregex "VDIVS")>; 663def : InstRW<[M85OverrideVFPLat2, WriteFPDIV64], (instregex "VDIVD")>; 664def : InstRW<[M85OverrideVFPLat2, M85FPDIV16], (instregex "VSQRTH")>; 665def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT32], (instregex "VSQRTS")>; 666def : InstRW<[M85OverrideVFPLat2, WriteFPSQRT64], (instregex "VSQRTD")>; 667def : InstRW<[M85OverrideVFPLat3, WriteFPMUL64], (instregex "V(MUL|NMUL)D")>; 668def : InstRW<[M85OverrideVFPLat2, WriteFPALU64], (instregex "V(ADD|SUB)D")>; 669 670// Multiply-accumulate. Chained SP timing is correct; rest need overrides 671// Double-precision chained MAC should also be seen as having latency of 5, 672// as stalls stall everything. 673 674def : InstRW<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL], 675 (instregex "VN?ML(A|S)H")>; 676 677def : InstRW<[M85OverrideVFPLat5, WriteFPMAC64, 678 ReadFPMUL, ReadFPMUL, ReadFPMUL], 679 (instregex "VN?ML(A|S)D$")>; 680 681// Single-precision fused MACs look like latency 4 with advance of 2. 682 683def M85ReadFPMAC2 : SchedReadAdvance<2>; 684 685def : InstRW<[M85OverrideVFPLat4, WriteFPMAC32, 686 M85ReadFPMAC2, ReadFPMUL, ReadFPMUL], 687 (instregex "VF(N)?M(A|S)(H|S)$")>; 688 689// Double-precision fused MAC looks like latency 4. 690 691def : InstRW<[M85OverrideVFPLat4, WriteFPMAC64, 692 ReadFPMUL, ReadFPMUL, ReadFPMUL], 693 (instregex "VF(N)?M(A|S)D$")>; 694 695// MVE beatwise instructions 696// NOTE: Q-register timing for the 2nd beat is off by a cycle and needs 697// DAG overrides to correctly set latencies. 698// NOTE2: MVE integer MAC->MAC accumulate latencies are set as if the 699// accumulate value arrives from an unmatching MAC instruction; 700// matching ones are handled via DAG mutation. These are marked as 701// "limited accumulate bypass" 702 703let Latency = 4, EndGroup = 1 in { 704 def M85GrpALat2MveR : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 705 let ReleaseAtCycles = [2,2,1,1,1]; 706 } 707 def M85GrpABLat2MveR : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 708 def M85GrpBLat2MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 709 let ReleaseAtCycles = [2,2,1,1,1]; 710 } 711 def M85Lat2MveR : SchedWriteRes<[]> { let NumMicroOps = 0; } 712 def M85GrpBLat4Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 713 let ReleaseAtCycles = [2,2,1,1,1]; 714 } 715} 716let Latency = 3, EndGroup = 1 in { 717 def M85GrpBLat3Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 718 let ReleaseAtCycles = [2,2,1,1,1]; 719 } 720 def M85GrpBLat1MveR : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 721 let ReleaseAtCycles = [2,2,1,1,1]; 722 } 723 def M85Lat1MveR : SchedWriteRes<[]> { let NumMicroOps = 0; } 724} 725let Latency = 2, EndGroup = 1 in { 726 def M85GrpALat2Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 727 let ReleaseAtCycles = [2,2,1,1,1]; 728 } 729 def M85GrpABLat2Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 730 def M85GrpBLat2Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 731 let ReleaseAtCycles = [2,2,1,1,1]; 732 } 733 def M85Lat2Mve : SchedWriteRes<[]> { let NumMicroOps = 0; } 734} 735let Latency = 1, EndGroup = 1 in { 736 def M85GrpALat1Mve : SchedWriteRes<[M85UnitVFPAL, M85UnitVFPAH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 737 let ReleaseAtCycles = [2,2,1,1,1]; 738 } 739 def M85GrpABLat1Mve : SchedWriteRes<[M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]>; 740 def M85GrpBLat1Mve : SchedWriteRes<[M85UnitVFPBL, M85UnitVFPBH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 741 let ReleaseAtCycles = [2,2,1,1,1]; 742 } 743 def M85GrpCLat1Mve : SchedWriteRes<[M85UnitVFPCL, M85UnitVFPCH, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 744 let ReleaseAtCycles = [2,2,1,1,1]; 745 } 746 def M85GrpDLat1Mve : SchedWriteRes<[M85UnitVFPD, M85UnitVPortL, M85UnitVPortH, M85UnitSlot0]> { 747 let ReleaseAtCycles = [2,1,1,1]; 748 } 749} 750 751def : InstRW<[M85GrpABLat1Mve, M85Read_EX1, M85Read_EX2, M85Read_EX2], 752 (instregex "MVE_VMOV_q_rr")>; 753 754def : InstRW<[M85GrpABLat1Mve, M85Read_EX2], 755 (instregex "MVE_VMOV_to_lane_(8|16|32)")>; 756 757def : InstRW<[M85GrpABLat1Mve], 758 (instregex "MVE_VAND$", 759 "MVE_VBIC$", "MVE_VBICimm", 760 "MVE_VCLSs(8|16|32)", 761 "MVE_VCLZs(8|16|32)", 762 "MVE_VEOR", 763 "MVE_VMOVimmf32", "MVE_VMOVimmi(8|16|32|64)", 764 "MVE_VMVN$", "MVE_VMVNimmi(16|32)", 765 "MVE_VORN$", 766 "MVE_VORR$", "MVE_VORRimm", "MQPRCopy", 767 "MVE_VPSEL", 768 "MVE_VREV(16|32|64)_(8|16|32)" 769 )>; 770 771def : InstRW<[M85GrpABLat2MveR, M85Lat2MveR], 772 (instregex "MVE_VMOV_rr_q")>; 773 774def : InstRW<[M85GrpABLat2MveR], 775 (instregex "MVE_VMOV_from_lane_(32|u8|s8|u16|s16)")>; 776 777def : InstRW<[M85GrpALat1Mve, M85Lat1MveR, 778 M85Read_EX1, M85Read_EX1, M85Read_EX2], 779 (instregex "MVE_VADC$")>; 780 781def : InstRW<[M85GrpALat1Mve, M85Lat1MveR], 782 (instregex "MVE_VADCI")>; 783 784def : InstRW<[M85GrpALat1Mve, M85Read_EX1, M85Read_EX2], 785 (instregex "MVE_VADD_qr_i(8|16|32)", 786 "MVE_VBRSR(16|32|8)", 787 "MVE_VHADD_qr_[su](8|16|32)", 788 "MVE_VHSUB_qr_[su](8|16|32)", 789 "MVE_VQADD_qr_[su](8|16|32)", 790 "MVE_VQSUB_qr_[su](8|16|32)", 791 "MVE_VSHL_qr[su](8|16|32)", 792 "MVE_VSUB_qr_i(8|16|32)" 793 )>; 794 795def : InstRW<[M85GrpALat1Mve], 796 (instregex "MVE_VABD(s|u)(8|16|32)", 797 "MVE_VABS(s|u)(8|16|32)", 798 "MVE_V(MAX|MIN)A?[us](8|16|32)", 799 "MVE_VADDi(8|16|32)", 800 "MVE_VCADDi(8|16|32)", 801 "MVE_VHCADDs(8|16|32)", 802 "MVE_VHSUB[su](8|16|32)", 803 "MVE_VMOVL[su](8|16)[tb]h", 804 "MVE_VMOVNi(16|32)[tb]h", 805 "MVE_VMULL[BT]?[p](8|16|32)(bh|th)?", 806 "MVE_VNEGs(8|16|32)", 807 "MVE_VQABSs(8|16|32)", 808 "MVE_VQADD[su](8|16|32)", 809 "MVE_VQNEGs(8|16|32)", 810 "MVE_VQSUB[su](8|16|32)", 811 "MVE_VR?HADD[su](8|16|32)", 812 "MVE_VSBC$", "MVE_VSBCI", 813 "MVE_VSHL_by_vec[su](8|16|32)", 814 "MVE_VSHL_immi(8|16|32)", 815 "MVE_VSHLL_imm[su](8|16)[bt]h", 816 "MVE_VSHLL_lw[su](8|16)[bt]h", 817 "MVE_VSHRNi(16|32)[bt]h", 818 "MVE_VSHR_imm[su](8|16|32)", 819 "MVE_VSLIimm[su]?(8|16|32)", 820 "MVE_VSRIimm[su]?(8|16|32)", 821 "MVE_VSUBi(8|16|32)" 822 )>; 823 824def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2, M85Read_EX2], 825 (instregex "MVE_V(D|I)WDUPu(8|16|32)")>; 826 827def : InstRW<[M85GrpALat2Mve, M85Lat2MveR, M85Read_EX2], 828 (instregex "MVE_V(D|I)DUPu(8|16|32)")>; 829 830def : InstRW<[M85GrpALat2Mve, M85Read_EX1, M85Read_EX2], 831 (instregex "MVE_V(Q|R|QR)SHL_qr[su](8|16|32)", 832 "MVE_VADD_qr_f(16|32)", 833 "MVE_VSUB_qr_f(16|32)" 834 )>; 835 836def : InstRW<[M85GrpALat1Mve, M85Read_EX2], 837 (instregex "MVE_VDUP(8|16|32)")>; 838 839def : InstRW<[M85GrpBLat1Mve], 840 (instregex "MVE_VABSf(16|32)", 841 "MVE_V(MAX|MIN)NMA?f(16|32)", 842 "MVE_VNEGf(16|32)" 843 )>; 844 845def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3], 846 (instregex "MVE_VADDLV[us]32acc")>; 847 848def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR], 849 (instregex "MVE_VADDLV[us]32no_acc")>; 850 851def : InstRW<[M85GrpBLat2MveR, M85Read_EX3], 852 (instregex "MVE_VADDV[us](8|16|32)acc" 853 )>; 854 855def : InstRW<[M85GrpALat2MveR, M85Read_EX3], 856 (instregex "MVE_V(MAX|MIN)A?V[us](8|16|32)", 857 "MVE_VABAV(s|u)(8|16|32)" 858 )>; 859 860def : InstRW<[M85GrpALat2MveR], 861 (instregex "MVE_VADDV[us](8|16|32)no_acc")>; 862 863def : InstRW<[M85GrpALat2Mve], 864 (instregex "MVE_V(Q|R|QR)SHL_by_vec[su](8|16|32)", 865 "MVE_VABDf(16|32)", 866 "MVE_VADDf(16|32)", 867 "MVE_VCADDf(16|32)", 868 "MVE_VQMOVU?N[su](8|16|32)[tb]h", 869 "MVE_VQR?SHL(U_)?imm[su](8|16|32)", 870 "MVE_VQR?SHRN[bt]h[su](16|32)", 871 "MVE_VQR?SHRUNs(16|32)[bt]h", 872 "MVE_VRSHR_imm[su](8|16|32)", 873 "MVE_VRSHRNi(16|32)[bt]h", 874 "MVE_VSUBf(16|32)" 875 )>; 876 877def : InstRW<[M85GrpBLat2MveR, M85Read_EX2], 878 (instregex "MVE_V(MAX|MIN)NMA?Vf(16|32)")>; 879 880def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2], 881 (instregex "MVE_VMUL_qr_i(8|16|32)")>; 882 883def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2], 884 (instregex "MVE_VQDMULL_qr_s(16|32)[tb]h")>; 885 886def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX2], 887 (instregex "MVE_VQR?DMULH_qr_s(8|16|32)")>; 888 889def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX3], 890 // limited accumulate bypass 891 (instregex "MVE_VMLAS?_qr_i(8|16|32)")>; 892 893def : InstRW<[M85GrpBLat2Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2], 894 // limited accumulate bypass 895 (instregex "MVE_VQR?DMLAS?H_qrs(8|16|32)")>; 896 897def : InstRW<[M85GrpBLat2Mve], 898 // limited accumulate bypass 899 (instregex "MVE_VQR?DML[AS]DHX?s(8|16|32)")>; 900 901def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR, M85Read_EX3, M85Read_EX3], 902 (instregex "MVE_VR?ML[AS]LDAVH?ax?[su](8|16|32)")>; 903 904def : InstRW<[M85GrpBLat2MveR, M85Lat2MveR], 905 (instregex "MVE_VR?ML[AS]LDAVH?x?[su](8|16|32)")>; 906 907def : InstRW<[M85GrpBLat2MveR, M85Read_EX3], 908 (instregex "MVE_VML[AS]DAVax?[su](8|16|32)")>; 909 910def : InstRW<[M85GrpBLat2MveR], 911 (instregex "MVE_VML[AS]DAVx?[su](8|16|32)")>; 912 913def : InstRW<[M85GrpBLat2Mve], 914 (instregex "MVE_VCVTf16(u|s)16", "MVE_VCVTf32(u|s)32", 915 "MVE_VCVT(u|s)16f16", "MVE_VCVT(u|s)32f32", 916 "MVE_VCVTf16f32", "MVE_VCVTf32f16", 917 "MVE_VMULL[BT]?[su](8|16|32)(bh|th)?", 918 "MVE_VMUL(t1)*i(8|16|32)", 919 "MVE_VQDMULLs(16|32)[tb]h", 920 "MVE_VQR?DMULHi(8|16|32)", 921 "MVE_VR?MULH[su](8|16|32)", 922 "MVE_VRINTf(16|32)" 923 )>; 924 925def : InstRW<[M85GrpBLat3Mve, M85Read_EX1, M85Read_EX2], 926 (instregex "MVE_VMUL_qr_f(16|32)")>; 927 928def : InstRW<[M85GrpBLat3Mve], 929 (instregex "MVE_VCMULf(16|32)", 930 "MVE_VMULf(16|32)" 931 )>; 932 933def : InstRW<[M85GrpBLat4Mve, M85Read_EX3, M85Read_EX1, M85Read_EX2], 934 (instregex "MVE_VFMA_qr_Sf(16|32)", // VFMAS 935 "MVE_VFMA_qr_f(16|32)" // VFMA 936 )>; 937 938def : InstRW<[M85GrpBLat4Mve, M85Read_EX3], 939 (instregex "MVE_VCMLAf(16|32)")>; 940 941def : InstRW<[M85GrpBLat4Mve, M85Read_EX3], 942 (instregex "MVE_VFM(A|S)f(16|32)")>; 943 944def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2], 945 (instregex "MVE_VPTv(4|8)f(16|32)r")>; 946 947def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX1, M85Read_EX2], 948 (instregex "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)r")>; 949 950def : InstRW<[M85GrpCLat1Mve, M85Read_EX1, M85Read_EX2], 951 (instregex "MVE_VCMP[isu](8|16|32)r$", "MVE_VCMPf(16|32)r$")>; 952 953def : InstRW<[M85GrpDLat1Mve, M85Read_EX2], 954 (instregex "MVE_VCTP(8|16|32|64)")>; 955 956def : InstRW<[M85GrpCLat1Mve], 957 (instregex "MVE_VCMPf(16|32)$", "MVE_VCMP[isu](8|16|32)$", 958 "MVE_VPTv(4|8)f(16|32)$", 959 "MVE_VPTv(4|8|16)(i|s|u)(8|16|32)$" 960 )>; 961 962def : InstRW<[M85GrpDLat1Mve], 963 (instregex "MVE_VPNOT", 964 "MVE_VPST" 965 )>; 966 967def : InstRW<[M85Lat2MveR, M85GrpALat2Mve, M85Read_EX1, M85Read_EX2], 968 (instregex "MVE_VSHLC")>; 969 970// VFP instructions 971 972def : WriteRes<WriteVLD1, []>; 973def : WriteRes<WriteVLD2, []>; 974def : WriteRes<WriteVLD3, []>; 975def : WriteRes<WriteVLD4, []>; 976def : WriteRes<WriteVST1, []>; 977def : WriteRes<WriteVST2, []>; 978def : WriteRes<WriteVST3, []>; 979def : WriteRes<WriteVST4, []>; 980 981} // SchedModel = CortexCortexM85Model 982