1//==- RISCVSchedSyntacoreSCR7.td - Syntacore SCR7 Sched Defs -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11// This file covers scheduling model for rv64imafdcv_zba_zbb_zbc_zbs 12// configuration of Syntacore SCR7 processor. 13// Overview: https://syntacore.com/products/scr7 14 15// SCR7 is an out-of-order superscalar dual-issue core. 16// FIXME: add V and Zkn extensions scheduling model 17def SyntacoreSCR7Model : SchedMachineModel { 18 let MicroOpBufferSize = 36; 19 let IssueWidth = 2; 20 let MispredictPenalty = 9; 21 let LoadLatency = 3; 22 let CompleteModel = 0; 23 let UnsupportedFeatures = [HasStdExtZksed, HasStdExtZksh, HasStdExtZkr, 24 HasStdExtZcmt, HasVInstructions]; 25} 26 27// Branching 28multiclass SCR7_Branching<ProcResourceKind BRU> { 29 def : WriteRes<WriteJmp, [BRU]>; 30 def : WriteRes<WriteJal, [BRU]>; 31 def : WriteRes<WriteJalr, [BRU]>; 32} 33 34// Single-cycle integer arithmetic and logic 35multiclass SCR7_IntALU<ProcResourceKind ALU> { 36 def : WriteRes<WriteIALU, [ALU]>; 37 def : WriteRes<WriteIALU32, [ALU]>; 38 def : WriteRes<WriteShiftImm, [ALU]>; 39 def : WriteRes<WriteShiftImm32, [ALU]>; 40 def : WriteRes<WriteShiftReg, [ALU]>; 41 def : WriteRes<WriteShiftReg32, [ALU]>; 42} 43 44// Pipelined integer multiplication 45multiclass SCR7_IntMul<list<ProcResourceKind> Resources> { 46 let Latency = 3 in { 47 def : WriteRes<WriteIMul, Resources>; 48 def : WriteRes<WriteIMul32, Resources>; 49 } 50} 51 52// Common implementation for WriteIDiv and WriteIDiv32 sched writes. 53multiclass SCR7_IntDivImpl<list<ProcResourceKind> Resources, 54 list<int> ReleaseCycles, int DivLatency, 55 SchedWrite DivWrite, SchedWrite RemWrite> { 56 let Latency = DivLatency, ReleaseAtCycles = ReleaseCycles in { 57 def : WriteRes<DivWrite, Resources>; 58 def : WriteRes<RemWrite, Resources>; 59 } 60} 61 62// Non-pipelined integer division 63multiclass SCR7_IntDiv<list<ProcResourceKind> Resources, 64 list<int> ReleaseCycles, 65 int DivLatency> { 66 defm : SCR7_IntDivImpl<Resources, 67 ReleaseCycles, 68 DivLatency, 69 WriteIDiv, 70 WriteIRem>; 71} 72 73multiclass SCR7_IntDiv32<list<ProcResourceKind> Resources, 74 list<int> ReleaseCycles, 75 int DivLatency> { 76 defm : SCR7_IntDivImpl<Resources, 77 ReleaseCycles, 78 DivLatency, 79 WriteIDiv32, 80 WriteIRem32>; 81} 82 83multiclass SCR7_Bitmanip<ProcResourceKind BMU> { 84 let Latency = 1 in { 85 // Zba 86 def : WriteRes<WriteSHXADD, [BMU]>; 87 def : WriteRes<WriteSHXADD32, [BMU]>; 88 // Zbb 89 def : WriteRes<WriteRotateImm, [BMU]>; 90 def : WriteRes<WriteRotateImm32, [BMU]>; 91 def : WriteRes<WriteRotateReg, [BMU]>; 92 def : WriteRes<WriteRotateReg32, [BMU]>; 93 def : WriteRes<WriteCLZ, [BMU]>; 94 def : WriteRes<WriteCLZ32, [BMU]>; 95 def : WriteRes<WriteCTZ, [BMU]>; 96 def : WriteRes<WriteCTZ32, [BMU]>; 97 def : WriteRes<WriteCPOP, [BMU]>; 98 def : WriteRes<WriteCPOP32, [BMU]>; 99 def : WriteRes<WriteREV8, [BMU]>; 100 def : WriteRes<WriteORCB, [BMU]>; 101 def : WriteRes<WriteIMinMax, [BMU]>; 102 // Zbs 103 def : WriteRes<WriteSingleBit, [BMU]>; 104 def : WriteRes<WriteSingleBitImm, [BMU]>; 105 // Zbc 106 def : WriteRes<WriteCLMUL, [BMU]>; 107 def : WriteRes<WriteBEXT, [BMU]>; 108 def : WriteRes<WriteBEXTI, [BMU]>; 109 } 110} 111 112multiclass SCR7_ScalarCrypto<ProcResourceKind SCU> { 113 let Latency = 1 in { 114 // Zbkb 115 def : WriteRes<WriteBREV8, [SCU]>; 116 def : WriteRes<WritePACK, [SCU]>; 117 def : WriteRes<WritePACK32, [SCU]>; 118 def : WriteRes<WriteZIP, [SCU]>; 119 // Zbkx 120 def : WriteRes<WriteXPERM, [SCU]>; 121 } 122} 123 124multiclass SCR7_IntPipeline<ProcResourceKind ALU_Any, 125 ProcResourceKind ALU_DIV_IS, 126 ProcResourceKind DIV, 127 ProcResourceKind ALU_MUL_IS, 128 ProcResourceKind MUL> { 129 defm : SCR7_Branching<ALU_Any>; 130 defm : SCR7_Bitmanip<ALU_Any>; 131 defm : SCR7_ScalarCrypto<ALU_Any>; 132 defm : SCR7_IntALU<ALU_Any>; 133 defm : SCR7_IntMul<[ALU_MUL_IS, MUL]>; 134 defm : SCR7_IntDiv<[ALU_DIV_IS, DIV], 135 /* ReleaseAtCycles */[1, 35], 136 /* Latency */ 35>; 137 defm : SCR7_IntDiv32<[ALU_DIV_IS, DIV], 138 /* ReleaseAtCycles */[1, 19], 139 /* Latency */ 19>; 140} 141 142// Load/store instructions 143multiclass SCR7_BasicMemory<ProcResourceKind LSU> { 144 let Latency = 3 in { 145 def : WriteRes<WriteSTB, [LSU]>; 146 def : WriteRes<WriteSTH, [LSU]>; 147 def : WriteRes<WriteSTW, [LSU]>; 148 def : WriteRes<WriteSTD, [LSU]>; 149 def : WriteRes<WriteLDB, [LSU]>; 150 def : WriteRes<WriteLDH, [LSU]>; 151 def : WriteRes<WriteLDW, [LSU]>; 152 def : WriteRes<WriteLDD, [LSU]>; 153 def : WriteRes<WriteFST32, [LSU]>; 154 def : WriteRes<WriteFST64, [LSU]>; 155 def : WriteRes<WriteFLD32, [LSU]>; 156 def : WriteRes<WriteFLD64, [LSU]>; 157 } 158} 159 160// Atomic memory 161multiclass SCR7_AtomicMemory<ProcResourceKind LSU> { 162 let Latency = 19 in { 163 def : WriteRes<WriteAtomicLDW, [LSU]>; 164 def : WriteRes<WriteAtomicLDD, [LSU]>; 165 } 166 let Latency = 21 in { 167 def : WriteRes<WriteAtomicW, [LSU]>; 168 def : WriteRes<WriteAtomicD, [LSU]>; 169 def : WriteRes<WriteAtomicSTW, [LSU]>; 170 def : WriteRes<WriteAtomicSTD, [LSU]>; 171 } 172} 173multiclass SCR7_FPU<ProcResourceKind FPU_IS, ProcResourceKind FALU, 174 ProcResourceKind FMA, ProcResourceKind FDIVSQRT> { 175 // FALU operations 176 let Latency = 4 in { 177 def : WriteRes<WriteFAdd32, [FPU_IS, FALU]>; 178 def : WriteRes<WriteFAdd64, [FPU_IS, FALU]>; 179 def : WriteRes<WriteFSGNJ32, [FPU_IS, FALU]>; 180 def : WriteRes<WriteFSGNJ64, [FPU_IS, FALU]>; 181 def : WriteRes<WriteFMinMax32, [FPU_IS, FALU]>; 182 def : WriteRes<WriteFMinMax64, [FPU_IS, FALU]>; 183 184 def : WriteRes<WriteFCvtI32ToF32, [FPU_IS, FALU]>; 185 def : WriteRes<WriteFCvtI32ToF64, [FPU_IS, FALU]>; 186 def : WriteRes<WriteFCvtI64ToF32, [FPU_IS, FALU]>; 187 def : WriteRes<WriteFCvtI64ToF64, [FPU_IS, FALU]>; 188 def : WriteRes<WriteFCvtF32ToF64, [FPU_IS, FALU]>; 189 def : WriteRes<WriteFCvtF64ToF32, [FPU_IS, FALU]>; 190 def : WriteRes<WriteFCvtF32ToI32, [FPU_IS, FALU]>; 191 def : WriteRes<WriteFCvtF32ToI64, [FPU_IS, FALU]>; 192 def : WriteRes<WriteFCvtF64ToI32, [FPU_IS, FALU]>; 193 def : WriteRes<WriteFCvtF64ToI64, [FPU_IS, FALU]>; 194 195 def : WriteRes<WriteFClass32, [FPU_IS, FALU]>; 196 def : WriteRes<WriteFClass64, [FPU_IS, FALU]>; 197 198 def : WriteRes<WriteFCmp32, [FPU_IS, FALU]>; 199 def : WriteRes<WriteFCmp64, [FPU_IS, FALU]>; 200 201 def : WriteRes<WriteFMovI32ToF32, [FPU_IS, FALU]>; 202 def : WriteRes<WriteFMovF32ToI32, [FPU_IS, FALU]>; 203 def : WriteRes<WriteFMovI64ToF64, [FPU_IS, FALU]>; 204 def : WriteRes<WriteFMovF64ToI64, [FPU_IS, FALU]>; 205 } 206 207 // FMA operations 208 let Latency = 6 in { 209 def : WriteRes<WriteFMul32, [FPU_IS, FMA]>; 210 def : WriteRes<WriteFMul64, [FPU_IS, FMA]>; 211 def : WriteRes<WriteFMA32, [FPU_IS, FMA]>; 212 def : WriteRes<WriteFMA64, [FPU_IS, FMA]>; 213 } 214 215 def : WriteRes<WriteFDiv32, [FPU_IS, FDIVSQRT]> { 216 let Latency = 16; 217 let ReleaseAtCycles = [1, 15]; 218 } 219 def : WriteRes<WriteFDiv64, [FPU_IS, FDIVSQRT]> { 220 let Latency = 30; 221 let ReleaseAtCycles = [1, 29]; 222 } 223 224 def : WriteRes<WriteFSqrt32, [FPU_IS, FDIVSQRT]> { 225 let Latency = 18; 226 let ReleaseAtCycles = [1, 16]; 227 } 228 def : WriteRes<WriteFSqrt64, [FPU_IS, FDIVSQRT]> { 229 let Latency = 32; 230 let ReleaseAtCycles = [1, 30]; 231 } 232} 233 234// Others 235multiclass SCR7_Other { 236 def : WriteRes<WriteCSR, []>; 237 def : WriteRes<WriteNop, []>; 238 239 def : InstRW<[WriteIALU], (instrs COPY)>; 240} 241 242// Unsupported scheduling classes for SCR7. 243multiclass SCR7_Unsupported { 244 defm : UnsupportedSchedQ; 245 defm : UnsupportedSchedSFB; 246 defm : UnsupportedSchedV; 247 defm : UnsupportedSchedZabha; 248 defm : UnsupportedSchedZfa; 249 defm : UnsupportedSchedZfhmin; 250 defm : UnsupportedSchedZvk; 251 defm : UnsupportedSchedXsf; 252} 253 254 255// Bypasses (none) 256multiclass SCR7_NoReadAdvances { 257 def : ReadAdvance<ReadJmp, 0>; 258 def : ReadAdvance<ReadJalr, 0>; 259 def : ReadAdvance<ReadCSR, 0>; 260 def : ReadAdvance<ReadStoreData, 0>; 261 def : ReadAdvance<ReadMemBase, 0>; 262 def : ReadAdvance<ReadIALU, 0>; 263 def : ReadAdvance<ReadIALU32, 0>; 264 def : ReadAdvance<ReadShiftImm, 0>; 265 def : ReadAdvance<ReadShiftImm32, 0>; 266 def : ReadAdvance<ReadShiftReg, 0>; 267 def : ReadAdvance<ReadShiftReg32, 0>; 268 def : ReadAdvance<ReadIDiv, 0>; 269 def : ReadAdvance<ReadIDiv32, 0>; 270 def : ReadAdvance<ReadIRem, 0>; 271 def : ReadAdvance<ReadIRem32, 0>; 272 def : ReadAdvance<ReadIMul, 0>; 273 def : ReadAdvance<ReadIMul32, 0>; 274 def : ReadAdvance<ReadAtomicWA, 0>; 275 def : ReadAdvance<ReadAtomicWD, 0>; 276 def : ReadAdvance<ReadAtomicDA, 0>; 277 def : ReadAdvance<ReadAtomicDD, 0>; 278 def : ReadAdvance<ReadAtomicLDW, 0>; 279 def : ReadAdvance<ReadAtomicLDD, 0>; 280 def : ReadAdvance<ReadAtomicSTW, 0>; 281 def : ReadAdvance<ReadAtomicSTD, 0>; 282 def : ReadAdvance<ReadSHXADD, 0>; 283 def : ReadAdvance<ReadSHXADD32, 0>; 284 def : ReadAdvance<ReadRotateImm, 0>; 285 def : ReadAdvance<ReadRotateImm32, 0>; 286 def : ReadAdvance<ReadRotateReg, 0>; 287 def : ReadAdvance<ReadRotateReg32, 0>; 288 def : ReadAdvance<ReadCLZ, 0>; 289 def : ReadAdvance<ReadCLZ32, 0>; 290 def : ReadAdvance<ReadCTZ, 0>; 291 def : ReadAdvance<ReadCTZ32, 0>; 292 def : ReadAdvance<ReadCPOP, 0>; 293 def : ReadAdvance<ReadCPOP32, 0>; 294 def : ReadAdvance<ReadREV8, 0>; 295 def : ReadAdvance<ReadORCB, 0>; 296 def : ReadAdvance<ReadIMinMax, 0>; 297 def : ReadAdvance<ReadCLMUL, 0>; 298 def : ReadAdvance<ReadBREV8, 0>; 299 def : ReadAdvance<ReadPACK, 0>; 300 def : ReadAdvance<ReadPACK32, 0>; 301 def : ReadAdvance<ReadZIP, 0>; 302 def : ReadAdvance<ReadXPERM, 0>; 303 def : ReadAdvance<ReadSingleBit, 0>; 304 def : ReadAdvance<ReadSingleBitImm, 0>; 305 def : ReadAdvance<ReadFStoreData, 0>; 306 def : ReadAdvance<ReadFMemBase, 0>; 307 def : ReadAdvance<ReadFAdd32, 0>; 308 def : ReadAdvance<ReadFAdd64, 0>; 309 def : ReadAdvance<ReadFMul32, 0>; 310 def : ReadAdvance<ReadFMul64, 0>; 311 def : ReadAdvance<ReadFMA32, 0>; 312 def : ReadAdvance<ReadFMA32Addend, 0>; 313 def : ReadAdvance<ReadFMA64, 0>; 314 def : ReadAdvance<ReadFMA64Addend, 0>; 315 def : ReadAdvance<ReadFDiv32, 0>; 316 def : ReadAdvance<ReadFDiv64, 0>; 317 def : ReadAdvance<ReadFSqrt32, 0>; 318 def : ReadAdvance<ReadFSqrt64, 0>; 319 def : ReadAdvance<ReadFCmp32, 0>; 320 def : ReadAdvance<ReadFCmp64, 0>; 321 def : ReadAdvance<ReadFSGNJ32, 0>; 322 def : ReadAdvance<ReadFSGNJ64, 0>; 323 def : ReadAdvance<ReadFMinMax32, 0>; 324 def : ReadAdvance<ReadFMinMax64, 0>; 325 def : ReadAdvance<ReadFCvtF32ToI32, 0>; 326 def : ReadAdvance<ReadFCvtF32ToI64, 0>; 327 def : ReadAdvance<ReadFCvtF64ToI32, 0>; 328 def : ReadAdvance<ReadFCvtF64ToI64, 0>; 329 def : ReadAdvance<ReadFCvtI32ToF32, 0>; 330 def : ReadAdvance<ReadFCvtI32ToF64, 0>; 331 def : ReadAdvance<ReadFCvtI64ToF32, 0>; 332 def : ReadAdvance<ReadFCvtI64ToF64, 0>; 333 def : ReadAdvance<ReadFCvtF32ToF64, 0>; 334 def : ReadAdvance<ReadFCvtF64ToF32, 0>; 335 def : ReadAdvance<ReadFMovF32ToI32, 0>; 336 def : ReadAdvance<ReadFMovI32ToF32, 0>; 337 def : ReadAdvance<ReadFMovF64ToI64, 0>; 338 def : ReadAdvance<ReadFMovI64ToF64, 0>; 339 def : ReadAdvance<ReadFClass32, 0>; 340 def : ReadAdvance<ReadFClass64, 0>; 341} 342 343let SchedModel = SyntacoreSCR7Model in { 344 // Integer pipeline has two reservation stations with single issue port 345 // each. Every station has eight entries: 346 // First station: 347 // - ALU (+ bitmanip and scalar crypto) 348 // - Pipelined Multiplier (3 stage) 349 // Second station: 350 // - ALU (+ bitmanip and scalar crypto) 351 // - Non-pipelined divider (other units are not blocked) 352 def SCR7_ALU_MUL_IS : ProcResource<1> { let BufferSize = 8; } 353 def SCR7_ALU_DIV_IS : ProcResource<1> { let BufferSize = 8; } 354 def SCR7_ALU_Any : ProcResGroup<[SCR7_ALU_MUL_IS, SCR7_ALU_DIV_IS]>; 355 def SCR7_MUL : ProcResource<1> { let BufferSize = 1; } 356 def SCR7_DIV : ProcResource<1> { let BufferSize = 1; } 357 358 defm : SCR7_IntPipeline<SCR7_ALU_Any, 359 SCR7_ALU_DIV_IS, SCR7_DIV, 360 SCR7_ALU_MUL_IS, SCR7_MUL>; 361 362 // SCR7 single-issue LSU with sixteen entries. 363 def SCR7_LSU : ProcResource<1> { let BufferSize = 16; } 364 defm : SCR7_BasicMemory<SCR7_LSU>; 365 defm : SCR7_AtomicMemory<SCR7_LSU>; 366 367 // FPU has one issue slot with eight entries: 368 // - FP ALU 369 // - FMA 370 // - Non-pipelined FDIV/FSQRT 371 def SCR7_FPU_IS : ProcResource<1> { let BufferSize = 8; } 372 def SCR7_FALU : ProcResource<1> { let BufferSize = 1; } 373 def SCR7_FMA : ProcResource<1> { let BufferSize = 1; } 374 def SCR7_FDIVSQRT : ProcResource<1> { let BufferSize = 1; } 375 defm : SCR7_FPU<SCR7_FPU_IS, SCR7_FALU, SCR7_FMA, SCR7_FDIVSQRT>; 376 377 defm : SCR7_Other; 378 defm : SCR7_Unsupported; 379 defm : SCR7_NoReadAdvances; 380} 381