1//==- ARMScheduleM55.td - Arm Cortex-M55 Scheduling Definitions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Cortex-M55 processors. 10// 11//===----------------------------------------------------------------------===// 12 13// ===---------------------------------------------------------------------===// 14// Cortex-M55 is a lot like the M4/M33 in terms of scheduling. It technically 15// has an extra pipeline stage but that is unimportant for scheduling, just 16// starting our model a stage later. The main points of interest over an 17// Cortex-M4 are MVE instructions and the ability to dual issue thumb1 18// instructions. 19// 20// 21// MVE 22// 23// The EPU pipelines now include both MVE and FP instructions. It has four 24// pipelines across 4 stages (E1-E4). These pipelines are "control", 25// "load/store", "integer" and "float/mul". We start the schedule at E2 to line 26// up with the rest of the pipeline we model, and take the latency as the time 27// between reading registers (almost always in E2) and register write (or 28// forward, if it allows it). This mean that a lot of instructions (including 29// loads) actually take 1 cycle (amazingly). 30// 31// Each MVE instruction needs to take 2 beats, each performing 64bits of the 32// 128bit vector operation. So long as the beats are to different pipelines, 33// the execution of the first-beat-of-the-second-instruction can overlap with 34// the second-beat-of-the-first. For example a sequence of VLDR;VADD;VMUL;VSTR 35// can look like this is a pipeline: 36// 1 2 3 4 5 37// LD/ST : VLDR VLDR VSTR VSTR 38// INTEGER: VADD VADD 39// FP/MUL : VMUL VMUL 40// 41// But a sequence of VLDR;VLDRB;VADD;VSTR because the loads cannot overlap, 42// looks like: 43// 1 2 3 4 5 6 44// LD/ST : VLDR VLDR VLDRB VLDRB VSTR VSTR 45// INTEGER: VADD VADD 46// 47// For this schedule, we currently model latencies and pipelines well for each 48// instruction. MVE instruction take two beats, modelled using 49// ReleaseAtCycles=[2]. 50// 51// 52// Dual Issue 53// 54// Cortex-M55 can dual issue two 16-bit T1 instructions providing one is one of 55// NOPs, ITs, Brs, ADDri/SUBri, UXTB/H, SXTB/H and MOVri's. NOPs and IT's are 56// not relevant (they will not appear when scheduling), Brs are only at the end 57// of the block. The others are more useful, and where the problems arise. 58// 59// The first problem comes from the fact that we will only be seeing Thumb2 60// instructions at the point in the pipeline where we do the scheduling. The 61// Thumb2SizeReductionPass has not been run yet. Especially pre-ra scheduling 62// (where the scheduler has the most freedom) we can only really guess at which 63// instructions will become thumb1 instructions. We are quite optimistic, and 64// may get some things wrong as a result. 65// 66// The other problem is one of telling llvm what to do exactly. The way we 67// attempt to meld this is: 68// Set IssueWidth to 2 to allow 2 instructions per cycle. 69// All instructions we cannot dual issue are "SingleIssue=1" (MVE/FP and T2 70// instructions) 71// We guess at another set of instructions that will become T1 instruction. 72// These become the primary instruction in a dual issue pair (the normal 73// one). These use normal resources and latencies, but set SingleIssue = 0. 74// We guess at another set of instructions that will be shrank down into T1 DI 75// instructions (add, sub, mov's, etc), which become the secondary. These 76// don't use a resource, and set SingleIssue = 0. 77// 78// So our guessing is a bit rough. It may be possible to improve this by moving 79// T2SizeReduction pass earlier in the pipeline, for example, so that at least 80// Post-RA scheduling sees what is T1/T2. It may also be possible to write a 81// custom instruction matcher for more accurately guess at T1 instructions. 82 83 84def CortexM55Model : SchedMachineModel { 85 let MicroOpBufferSize = 0; // Explicitly set to zero since M55 is in-order. 86 let IssueWidth = 2; // There is some dual-issue support in M55. 87 let MispredictPenalty = 3; // Default is 10 88 let LoadLatency = 4; // Default is 4 89 let PostRAScheduler = 1; 90 let FullInstRWOverlapCheck = 1; 91 92 let CompleteModel = 0; 93 let UnsupportedFeatures = [IsARM, HasNEON, HasDotProd, HasMatMulInt8, HasZCZ, 94 IsNotMClass, HasV8, HasV8_3a, HasTrustZone, HasDFB, 95 IsWindows]; 96} 97 98 99let SchedModel = CortexM55Model in { 100 101//===----------------------------------------------------------------------===// 102// Define each kind of processor resource and number available. 103 104// Modeling each pipeline as a ProcResource using the BufferSize = 0 since 105// M55 is in-order. 106def M55UnitALU : ProcResource<1> { let BufferSize = 0; } // Int ALU 107def M55UnitVecALU : ProcResource<1> { let BufferSize = 0; } // MVE integer pipe 108def M55UnitVecFPALU : ProcResource<1> { let BufferSize = 0; } // MVE float pipe 109def M55UnitLoadStore : ProcResource<1> { let BufferSize = 0; } // MVE load/store pipe 110def M55UnitVecSys : ProcResource<1> { let BufferSize = 0; } // MVE control/sys pipe 111 112// Some VMOV's can go down either pipeline. FIXME: This M55Write2IntFPE2 is 113// intended to model the VMOV taking either Int or FP for 2 cycles. It is not 114// clear if the llvm scheduler is using it like we want though. 115def M55UnitVecIntFP: ProcResGroup<[M55UnitVecALU, M55UnitVecFPALU]>; 116 117 118//===----------------------------------------------------------------------===// 119// Subtarget-specific SchedWrite types which both map the ProcResources and 120// set the latency. 121 122//=====// 123// ALU // 124//=====// 125 126// Generic writes for Flags, GRPs and other extra operands (eg post-inc, vadc flags, vaddlv etc) 127def M55WriteLat0 : SchedWriteRes<[]> { let Latency = 0; let NumMicroOps = 0; } 128def M55WriteLat1 : SchedWriteRes<[]> { let Latency = 1; let NumMicroOps = 0; } 129def M55WriteLat2 : SchedWriteRes<[]> { let Latency = 2; let NumMicroOps = 0; } 130 131// DX instructions are ALU instructions that take a single cycle. The 132// instructions that may be shrank to T1 (and can be dual issued) are 133// SingleIssue = 0. The others are SingleIssue = 1. 134let SingleIssue = 0, Latency = 1 in { 135 def : WriteRes<WriteALU, [M55UnitALU]>; 136 def : WriteRes<WriteCMP, [M55UnitALU]>; 137 def : WriteRes<WriteBr, [M55UnitALU]>; 138 def : WriteRes<WriteBrL, [M55UnitALU]>; 139 def : WriteRes<WriteBrTbl, [M55UnitALU]>; 140 def : WriteRes<WriteST, [M55UnitALU]>; 141 def M55WriteDX_DI : SchedWriteRes<[M55UnitALU]>; 142} 143let SingleIssue = 1, Latency = 1 in { 144 def : WriteRes<WritePreLd, [M55UnitALU]>; 145 def M55WriteDX_SI : SchedWriteRes<[M55UnitALU]>; 146} 147 148def : InstRW<[M55WriteDX_SI], (instregex "t2BF[CI]", "t2CPS", "t2DBG", 149 "t2MRS", "t2MSR", "t2SEL", "t2SG", "t2TT")>; 150def : InstRW<[M55WriteDX_SI], (instregex "t2SUBS_PC_LR", "COPY")>; 151def : InstRW<[M55WriteDX_SI], (instregex "t2CS(EL|INC|INV|NEG)")>; 152// Thumb 2 instructions that could be reduced to a thumb 1 instruction and can 153// be dual issued with one of the above. This list is optimistic. 154def : InstRW<[M55WriteDX_DI], (instregex "t2ADDC?rr$", "t2ADDrr$", 155 "t2ADDSrr$", "t2ANDrr$", "t2ASRr[ir]$", "t2BICrr$", "t2CMNzrr$", 156 "t2CMPr[ir]$", "t2EORrr$", "t2LSLr[ir]$", "t2LSRr[ir]$", "t2MVNr$", 157 "t2ORRrr$", "t2REV(16|SH)?$", "t2RORrr$", "t2RSBr[ir]$", "t2RSBSri$", 158 "t2SBCrr$", "t2SUBS?rr$", "t2TEQrr$", "t2TSTrr$", "t2STRi12$", 159 "t2STRs$", "t2STRBi12$", "t2STRBs$", "t2STRHi12$", "t2STRHs$", 160 "t2STR_POST$", "t2STMIA$", "t2STMIA_UPD$", "t2STMDB$", "t2STMDB_UPD$")>; 161def : InstRW<[M55WriteDX_DI], (instregex "t2SETPAN$", "tADC$", "tADDhirr$", 162 "tADDrSP$", "tADDrSPi$", "tADDrr$", "tADDspi$", "tADDspr$", "tADR$", 163 "tAND$", "tASRri$", "tASRrr$", "tBIC$", "tBKPT$", "tCBNZ$", "tCBZ$", 164 "tCMNz$", "tCMPhir$", "tCMPi8$", "tCMPr$", "tCPS$", "tEOR$", "tHINT$", 165 "tHLT$", "tLSLri$", "tLSLrr$", "tLSRri$", "tLSRrr$", "tMOVSr$", 166 "tMUL$", "tMVN$", "tORR$", "tPICADD$", "tPOP$", "tPUSH$", "tREV$", 167 "tREV16$", "tREVSH$", "tROR$", "tRSB$", "tSBC$", "tSETEND$", 168 "tSTMIA_UPD$", "tSTRBi$", "tSTRBr$", "tSTRHi$", "tSTRHr$", "tSTRi$", 169 "tSTRr$", "tSTRspi$", "tSUBrr$", "tSUBspi$", "tSVC$", "tTRAP$", 170 "tTST$", "tUDF$")>; 171def : InstRW<[M55WriteDX_DI], (instregex "tB$", "tBLXNSr$", "tBLXr$", "tBX$", 172 "tBXNS$", "tBcc$")>; 173 174 175// CX instructions take 2 (or more) cycles. Again T1 instructions may be dual 176// issues (SingleIssue = 0) 177let SingleIssue = 0, Latency = 2 in { 178 def : WriteRes<WriteLd, [M55UnitALU]>; 179 def M55WriteCX_DI : SchedWriteRes<[M55UnitALU]>; 180} 181let SingleIssue = 1, Latency = 2 in { 182 def : WriteRes<WriteALUsi, [M55UnitALU]>; 183 def : WriteRes<WriteALUsr, [M55UnitALU]>; 184 def : WriteRes<WriteALUSsr, [M55UnitALU]>; 185 def : WriteRes<WriteCMPsi, [M55UnitALU]>; 186 def : WriteRes<WriteCMPsr, [M55UnitALU]>; 187 def : WriteRes<WriteDIV, [M55UnitALU]>; 188 def M55WriteCX_SI : SchedWriteRes<[M55UnitALU]>; 189} 190 191def : SchedAlias<WriteMUL16, M55WriteCX_SI>; 192def : SchedAlias<WriteMUL32, M55WriteCX_SI>; 193def : SchedAlias<WriteMUL64Lo, M55WriteCX_SI>; 194def : WriteRes<WriteMUL64Hi, []> { let Latency = 2; } 195def : SchedAlias<WriteMAC16, M55WriteCX_SI>; 196def : SchedAlias<WriteMAC32, M55WriteCX_SI>; 197def : SchedAlias<WriteMAC64Lo, M55WriteCX_SI>; 198def : WriteRes<WriteMAC64Hi, []> { let Latency = 2; } 199 200def : InstRW<[M55WriteCX_SI], (instregex "t2CDP", "t2CLREX", "t2[DI][MS]B", 201 "t2MCR", "t2MOVSs[ir]", "t2MRC", "t2MUL", "t2STC")>; 202def : InstRW<[M55WriteCX_SI], (instregex "t2Q", "t2[SU](ADD|ASX|BFX|DIV)", 203 "t2[SU]H(ADD|ASX|SUB|SAX)", "t2SM[LM]", "t2S(SAT|SUB|SAX)", "t2UQ", 204 "t2USA", "t2USUB", "t2UXTA[BH]")>; 205def : InstRW<[M55WriteCX_SI], (instregex "t2LD[AC]", "t2STL", "t2STRD")>; 206def : InstRW<[M55WriteCX_SI], (instregex "MVE_[SU]Q?R?SH[LR]$")>; 207def : InstRW<[M55WriteCX_SI, M55WriteLat2], (instregex "MVE_ASRL", "MVE_LSLL", 208 "MVE_LSRL", "MVE_[SU]Q?R?SH[LR]L")>; 209// This may be higher in practice, but that likely doesn't make a difference 210// for scheduling 211def : InstRW<[M55WriteCX_SI], (instregex "t2CLRM")>; 212 213def : InstRW<[M55WriteCX_DI], (instregex "t2LDR[BH]?i12$", "t2LDRS?[BH]?s$", 214 "t2LDM")>; 215def : InstRW<[M55WriteCX_DI], (instregex "tLDM", "tLDRBi$", "tLDRBr$", 216 "tLDRHi$", "tLDRHr$", "tLDRSB$", "tLDRSH$", "tLDRi$", "tLDRpci$", 217 "tLDRr$", "tLDRspi$")>; 218 219// Dual Issue instructions 220let Latency = 1, SingleIssue = 0 in { 221 def : WriteRes<WriteNoop, []>; 222 def M55WriteDI : SchedWriteRes<[]>; 223} 224 225def : InstRW<[M55WriteDI], (instregex "tADDi[38]$", "tSUBi[38]$", "tMOVi8$", 226 "tMOVr$", "tUXT[BH]$", "tSXT[BH]$")>; 227// Thumb 2 instructions that could be reduced to a dual issuable Thumb 1 228// instruction above. 229def : InstRW<[M55WriteDI], (instregex "t2ADDS?ri$", "t2MOV[ir]$", "t2MOVi16$", 230 "t2MOVr$", "t2SUBS?ri$", "t2[US]XT[BH]$")>; 231def : InstRW<[M55WriteDI], (instregex "t2IT", "IT")>; 232 233 234def : InstRW<[M55WriteLat0], (instregex "t2LoopDec")>; 235 236// Forwarding 237 238// No forwarding in the ALU normally 239def : ReadAdvance<ReadALU, 0>; 240def : ReadAdvance<ReadALUsr, 0>; 241def : ReadAdvance<ReadMUL, 0>; 242def : ReadAdvance<ReadMAC, 0>; 243 244//=============// 245// MVE and VFP // 246//=============// 247 248// The Writes that take ReleaseAtCycles=[2] are MVE instruction, the others VFP. 249 250let SingleIssue = 1, Latency = 1 in { 251 def M55WriteLSE2 : SchedWriteRes<[M55UnitLoadStore]>; 252 def M55WriteIntE2 : SchedWriteRes<[M55UnitVecALU]>; 253 def M55WriteFloatE2 : SchedWriteRes<[M55UnitVecFPALU]>; 254 def M55WriteSysE2 : SchedWriteRes<[M55UnitVecSys]>; 255 256 def M55Write2LSE2 : SchedWriteRes<[M55UnitLoadStore]> { let ReleaseAtCycles=[2]; } 257 def M55Write2IntE2 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; } 258 def M55Write2FloatE2 : SchedWriteRes<[M55UnitVecFPALU]> { let ReleaseAtCycles=[2]; } 259 def M55Write2IntFPE2 : SchedWriteRes<[M55UnitVecIntFP]> { let ReleaseAtCycles=[2]; } 260} 261 262let SingleIssue = 1, Latency = 2 in { 263 def M55WriteLSE3 : SchedWriteRes<[M55UnitLoadStore]>; 264 def M55WriteIntE3 : SchedWriteRes<[M55UnitVecALU]>; 265 def M55WriteFloatE3 : SchedWriteRes<[M55UnitVecFPALU]>; 266 267 def M55Write2LSE3 : SchedWriteRes<[M55UnitLoadStore]> { let ReleaseAtCycles=[2]; } 268 def M55Write2IntE3 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; } 269 def M55Write2FloatE3 : SchedWriteRes<[M55UnitVecFPALU]> { let ReleaseAtCycles=[2]; } 270} 271 272let SingleIssue = 1, Latency = 3 in { 273 def M55Write2IntE3Plus1 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; } 274 275 // Same as M55Write2IntE3/M55Write2FloatE3 above, but longer latency and no forwarding into stores 276 def M55Write2IntE4NoFwd : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; } 277 def M55Write2FloatE4NoFwd : SchedWriteRes<[M55UnitVecFPALU]> { let ReleaseAtCycles=[2]; } 278} 279let SingleIssue = 1, Latency = 4 in { 280 def M55Write2IntE3Plus2 : SchedWriteRes<[M55UnitVecALU]> { let ReleaseAtCycles=[2]; } 281 def M55WriteFloatE3Plus2 : SchedWriteRes<[M55UnitVecFPALU]>; 282} 283let SingleIssue = 1, Latency = 9 in { 284 def M55WriteFloatE3Plus7 : SchedWriteRes<[M55UnitVecFPALU]>; 285} 286let SingleIssue = 1, Latency = 15 in { 287 def M55WriteFloatE3Plus13 : SchedWriteRes<[M55UnitVecFPALU]>; 288} 289let SingleIssue = 1, Latency = 16 in { 290 def M55WriteFloatE3Plus14 : SchedWriteRes<[M55UnitVecFPALU]>; 291} 292let SingleIssue = 1, Latency = 21 in { 293 def M55WriteFloatE3Plus19 : SchedWriteRes<[M55UnitVecFPALU]>; 294} 295// VMUL (Double precision) + VADD (Double precision) 296let SingleIssue = 1, Latency = 24 in { 297 def M55WriteFloatE3Plus22 : SchedWriteRes<[M55UnitVecFPALU]>; 298} 299let SingleIssue = 1, Latency = 30 in { 300 def M55WriteFloatE3Plus28 : SchedWriteRes<[M55UnitVecFPALU]>; 301} 302let SingleIssue = 1, Latency = 36 in { 303 def M55WriteFloatE3Plus34 : SchedWriteRes<[M55UnitVecFPALU]>; 304} 305 306def M55Read0 : SchedReadAdvance<0>; 307def M55Read1 : SchedReadAdvance<1, [M55Write2LSE3, M55Write2IntE3, M55Write2FloatE3]>; 308def M55GatherQRead : SchedReadAdvance<-4>; 309 310// MVE instructions 311 312// Loads and Stores of different kinds 313 314// Normal loads 315def : InstRW<[M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)$")>; 316// Pre/post inc loads 317def : InstRW<[M55WriteLat1, M55Write2LSE2], (instregex "MVE_VLDR(B|H|W)(S|U)(8|16|32)_(post|pre)$")>; 318// Gather loads 319def : InstRW<[M55Write2LSE3, M55Read0, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_rq")>; 320def : InstRW<[M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(B|H|W|D)(S|U)(8|16|32|64)_qi$")>; 321def : InstRW<[M55WriteLat1, M55Write2LSE3, M55GatherQRead], (instregex "MVE_VLDR(W|D)U(32|64)_qi_pre$")>; 322// Interleaving loads 323def : InstRW<[M55Write2LSE2], (instregex "MVE_VLD[24][0-3]_(8|16|32)$")>; 324// Interleaving loads with wb 325def : InstRW<[M55Write2LSE2, M55WriteLat1], (instregex "MVE_VLD[24][0-3]_(8|16|32)_wb$")>; 326 327// Normal stores 328def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)$")>; 329// Pre/post inc stores 330def : InstRW<[M55Write2LSE2, M55Read1], (instregex "MVE_VSTR(B|H|W)U?(8|16|32)_(post|pre)$")>; 331// Scatter stores 332def : InstRW<[M55Write2LSE2, M55Read0, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_rq")>; 333def : InstRW<[M55Write2LSE2, M55Read0, M55GatherQRead], (instregex "MVE_VSTR(B|H|W|D)(8|16|32|64)_qi")>; 334// Interleaving stores 335def : InstRW<[M55Write2LSE2], (instregex "MVE_VST(2|4)")>; 336 337// Integer pipe operations 338 339def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_VABAV")>; 340def : InstRW<[M55Write2IntE2], (instregex "MVE_VABD(u|s)")>; 341def : InstRW<[M55Write2IntE2], (instregex "MVE_VABS(u|s)")>; 342def : InstRW<[M55Write2IntE3], (instregex "MVE_VADC")>; 343def : InstRW<[M55Write2IntE2], (instregex "MVE_VADD(_qr_)?i")>; 344def : InstRW<[M55Write2IntE2], (instregex "MVE_VAND")>; 345def : InstRW<[M55Write2IntE2], (instregex "MVE_VBIC")>; 346def : InstRW<[M55Write2IntE2], (instregex "MVE_VBRSR")>; 347def : InstRW<[M55Write2IntE2], (instregex "MVE_VCADDi")>; 348def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLS")>; 349def : InstRW<[M55Write2IntE2], (instregex "MVE_VCLZ")>; 350def : InstRW<[M55Write2IntE2], (instregex "MVE_V(D|I)?W?DUP")>; 351def : InstRW<[M55Write2IntE2], (instregex "MVE_VEOR")>; 352def : InstRW<[M55Write2IntE2], (instregex "MVE_VHADD")>; 353def : InstRW<[M55Write2IntE2], (instregex "MVE_VHCADD")>; 354def : InstRW<[M55Write2IntE2], (instregex "MVE_VHSUB")>; 355def : InstRW<[M55Write2IntE2], (instregex "MVE_V(MAX|MIN)A?(s|u)")>; 356def : InstRW<[M55Write2IntE3Plus2], (instregex "MVE_V(MAX|MIN)A?V(s|u)8")>; 357def : InstRW<[M55Write2IntE3Plus1], (instregex "MVE_V(MAX|MIN)A?V(s|u)16")>; 358def : InstRW<[M55Write2IntE3], (instregex "MVE_V(MAX|MIN)A?V(s|u)32")>; 359def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VMOVN")>; 360def : InstRW<[M55Write2IntE2], (instregex "MVE_VMOVL")>; 361def : InstRW<[M55Write2IntE3], (instregex "MVE_VMULL[BT]p")>; 362def : InstRW<[M55Write2IntE2], (instregex "MVE_VMVN")>; 363def : InstRW<[M55Write2IntE2], (instregex "MVE_VNEG(u|s)")>; 364def : InstRW<[M55Write2IntE2], (instregex "MVE_VORN")>; 365def : InstRW<[M55Write2IntE2], (instregex "MVE_VORR")>; 366def : InstRW<[M55Write2IntE2], (instregex "MVE_VPSEL")>; 367def : InstRW<[M55Write2IntE2], (instregex "MQPRCopy")>; 368def : InstRW<[M55Write2IntE2], (instregex "MVE_VQABS")>; 369def : InstRW<[M55Write2IntE2], (instregex "MVE_VQADD")>; 370def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQMOV")>; 371def : InstRW<[M55Write2IntE2], (instregex "MVE_VQNEG")>; 372def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHL")>; 373def : InstRW<[M55Write2IntE3], (instregex "MVE_V[QR]SHL")>; 374def : InstRW<[M55Write2IntE3], (instregex "MVE_VQRSHL")>; 375def : InstRW<[M55Write2IntE4NoFwd], (instregex "MVE_VQ?R?SHRU?N")>; 376def : InstRW<[M55Write2IntE2], (instregex "MVE_VSHR_")>; 377def : InstRW<[M55Write2IntE3], (instregex "MVE_VRSHR_")>; 378def : InstRW<[M55Write2IntE2], (instregex "MVE_VQSUB")>; 379def : InstRW<[M55Write2IntE2], (instregex "MVE_VREV")>; 380def : InstRW<[M55Write2IntE2], (instregex "MVE_VRHADD")>; 381def : InstRW<[M55Write2IntE3], (instregex "MVE_VSBC")>; 382def : InstRW<[M55Write2IntE2], (instregex "MVE_VSLI")>; 383def : InstRW<[M55Write2IntE2], (instregex "MVE_VSRI")>; 384def : InstRW<[M55Write2IntE2], (instregex "MVE_VSUB(_qr_)?i")>; 385 386// FP/Mul pipe operations. 387 388def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABDf")>; 389def : InstRW<[M55Write2FloatE2], (instregex "MVE_VABSf")>; 390def : InstRW<[M55Write2FloatE2], (instregex "MVE_VADDf")>; 391def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADD_qr_f")>; 392def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VADDLV")>; 393def : InstRW<[M55Write2FloatE3], (instregex "MVE_VADDV")>; 394def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCADDf")>; 395def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMLA")>; 396def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCMUL")>; 397def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMP(i|s|u)", "MVE_VPTv(4|8|16)(i|s|u)")>; 398def : InstRW<[M55Write2FloatE2], (instregex "MVE_VCMPf", "MVE_VPTv(4|8)f")>; 399def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf16(u|s)16")>; 400def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32(u|s)32")>; 401def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)16f16")>; 402def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVT(u|s)32f32")>; 403def : InstRW<[M55Write2FloatE4NoFwd], (instregex "MVE_VCVTf16f32")>; 404def : InstRW<[M55Write2FloatE3], (instregex "MVE_VCVTf32f16")>; 405def : InstRW<[M55Write2FloatE3], (instregex "MVE_VFM(A|S)")>; 406def : InstRW<[M55Write2FloatE2], (instregex "MVE_V(MIN|MAX)NM")>; 407def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_from_lane")>; 408def : InstRW<[M55Write2FloatE2], (instregex "MVE_VMOV_rr_q")>; 409def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMOVi")>; 410def : InstRW<[M55Write2FloatE3], (instregex "MVE_VMUL(_qr_)?[if]")>; 411def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?MULH")>; 412def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?D?MULL[TB]?[su]")>; 413def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQDMULL_qr_")>; 414def : InstRW<[M55Write2FloatE3], (instregex "MVE_VQ?R?D?ML(A|S)[^L]")>; 415def : InstRW<[M55Write2FloatE3, M55WriteLat1], (instregex "MVE_VR?ML(A|S)L")>; 416def : InstRW<[M55Write2FloatE2], (instregex "MVE_VNEGf")>; 417def : InstRW<[M55Write2FloatE3], (instregex "MVE_VRINTf")>; 418def : InstRW<[M55Write2FloatE2], (instregex "MVE_VSUBf")>; 419def : InstRW<[M55Write2FloatE3], (instregex "MVE_VSUB_qr_f")>; 420 421// Some VMOV's can go down either pipeline. 422def : InstRW<[M55Write2IntFPE2], (instregex "MVE_VMOV_to_lane", "MVE_VMOV_q_rr")>; 423 424def : InstRW<[M55WriteSysE2], (instregex "MVE_VCTP")>; 425def : InstRW<[M55WriteSysE2], (instregex "MVE_VPNOT")>; 426def : InstRW<[M55WriteSysE2], (instregex "MVE_VPST")>; 427 428 429// VFP instructions 430 431def : SchedAlias<WriteFPCVT, M55WriteFloatE3>; 432def : SchedAlias<WriteFPMOV, M55WriteFloatE3>; 433def : SchedAlias<WriteFPALU32, M55WriteFloatE3>; 434def : SchedAlias<WriteFPALU64, M55WriteFloatE3Plus13>; 435def : SchedAlias<WriteFPMUL32, M55WriteFloatE3>; 436def : SchedAlias<WriteFPMUL64, M55WriteFloatE3Plus19>; 437def : SchedAlias<WriteFPMAC32, M55WriteFloatE3Plus2>; 438def : SchedAlias<WriteFPMAC64, M55WriteFloatE3Plus34>; 439def : SchedAlias<WriteFPDIV32, M55WriteFloatE3Plus14>; 440def : SchedAlias<WriteFPDIV64, M55WriteFloatE3Plus28>; 441def : SchedAlias<WriteFPSQRT32, M55WriteFloatE3Plus14>; 442def : SchedAlias<WriteFPSQRT64, M55WriteFloatE3Plus28>; 443def : ReadAdvance<ReadFPMUL, 0>; 444def : ReadAdvance<ReadFPMAC, 0>; 445 446def : InstRW<[M55WriteLSE3], (instregex "VLD")>; 447def : InstRW<[M55WriteLSE2], (instregex "VST")>; 448def : InstRW<[M55WriteLSE3], (instregex "VLLD", "VLST")>; 449 450def : InstRW<[M55WriteFloatE3], (instregex "VABS(H|S|D)")>; 451def : InstRW<[M55WriteFloatE3], (instregex "VCVT(A|M|N|P|R|X|Z)(S|U)(H|S|D)")>; 452def : InstRW<[M55WriteFloatE3], (instregex "VCVT(B|T)(DH|HD)")>; 453def : InstRW<[M55WriteFloatE2], (instregex "VCMPZ?(E|H|S|D)")>; 454def : InstRW<[M55WriteFloatE3Plus7], (instregex "VDIVH")>; 455def : InstRW<[M55WriteFloatE3], (instregex "VFN?M(A|S)(H|S)")>; // VFMA 456def : InstRW<[M55WriteFloatE3Plus22], (instregex "VFN?M(A|S)D")>; // VFMA 457def : InstRW<[M55WriteFloatE3], (instregex "VFP_V(MAX|MIN)NM")>; 458def : InstRW<[M55WriteFloatE3], (instregex "VINSH$", "VMOVH$", "VMOVHR$", "VMOVSR$", "VMOVDRR$")>; // VINS, VMOVX, to-FP reg movs 459def : InstRW<[M55WriteFloatE2], (instregex "VMOVD$", "VMOVS$", "VMOVR")>; // Other VMOV's 460def : InstRW<[M55WriteFloatE2], (instregex "FCONSTH", "FCONSTS", "FCONSTD")>; 461def : InstRW<[M55WriteFloatE2], (instregex "VGETLNi32", "VSETLNi32")>; 462def : InstRW<[M55WriteFloatE2], (instregex "VMSR", "VMRS")>; 463def : InstRW<[M55WriteFloatE3Plus2], (instregex "VN?ML(A|S)H")>; // VMLA 464def : InstRW<[M55WriteFloatE3], (instregex "VNEG(H|S|D)")>; 465def : InstRW<[M55WriteFloatE3], (instregex "VRINT(A|M|N|P|R|X|Z)(H|S|D)")>; 466def : InstRW<[M55WriteFloatE3], (instregex "VSEL..(H|S|D)")>; 467def : InstRW<[M55WriteFloatE3Plus7], (instregex "VSQRTH")>; 468 469def : WriteRes<WriteVLD1, []>; 470def : WriteRes<WriteVLD2, []>; 471def : WriteRes<WriteVLD3, []>; 472def : WriteRes<WriteVLD4, []>; 473def : WriteRes<WriteVST1, []>; 474def : WriteRes<WriteVST2, []>; 475def : WriteRes<WriteVST3, []>; 476def : WriteRes<WriteVST4, []>; 477 478} 479