1//==- RISCVSchedXiangShanNanHu.td - XS-NanHu Scheduling Defs -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8 9//===----------------------------------------------------------------------===// 10 11// XiangShan is a high-performance open-source RISC-V processor developed by 12// the Institute of Computing Technology (ICT), Chinese Academy of Sciences. 13// Source: https://github.com/OpenXiangShan/XiangShan 14// Documentation: https://github.com/OpenXiangShan/XiangShan-doc 15 16// XiangShan-NanHu is the second generation of XiangShan processor series. 17// Overview: https://xiangshan-doc.readthedocs.io/zh-cn/latest/integration/overview/ 18 19def XiangShanNanHuModel : SchedMachineModel { 20 let MicroOpBufferSize = 256; 21 let LoopMicroOpBufferSize = 48; // Instruction queue size 22 let IssueWidth = 6; // 6-way decode and dispatch 23 let LoadLatency = 4; 24 let MispredictPenalty = 11; // Based on estimate of pipeline depth. 25 let CompleteModel = 0; 26 let UnsupportedFeatures = [HasStdExtZcmt, HasStdExtZkr, HasVInstructions, 27 HasVInstructionsI64]; 28} 29 30let SchedModel = XiangShanNanHuModel in { 31 32// The reservation stations are distributed and grouped as 32-entry or 16-entry smaller ones. 33let BufferSize = 16 in { 34 def XS2ALU : ProcResource<4>; 35 def XS2MDU : ProcResource<2>; 36 def XS2MISC : ProcResource<1>; 37 38 def XS2FMAC : ProcResource<4>; 39 def XS2FMISC : ProcResource<2>; 40 41 // Load/Store queues are ignored. 42 def XS2LD : ProcResource<2>; 43 def XS2ST : ProcResource<2>; 44} 45 46// Branching 47def : WriteRes<WriteJmp, [XS2MISC]>; 48def : WriteRes<WriteJal, [XS2MISC]>; 49def : WriteRes<WriteJalr, [XS2MISC]>; 50 51// Integer arithmetic and logic 52let Latency = 1 in { 53def : WriteRes<WriteIALU, [XS2ALU]>; 54def : WriteRes<WriteIALU32, [XS2ALU]>; 55def : WriteRes<WriteShiftImm, [XS2ALU]>; 56def : WriteRes<WriteShiftImm32, [XS2ALU]>; 57def : WriteRes<WriteShiftReg, [XS2ALU]>; 58def : WriteRes<WriteShiftReg32, [XS2ALU]>; 59} 60 61// Integer multiplication 62let Latency = 3 in { 63def : WriteRes<WriteIMul, [XS2MDU]>; 64def : WriteRes<WriteIMul32, [XS2MDU]>; 65} 66 67// Integer division/remainder 68// SRT16 algorithm 69let Latency = 20, ReleaseAtCycles = [20] in { 70def : WriteRes<WriteIDiv32, [XS2MDU]>; 71def : WriteRes<WriteIDiv, [XS2MDU]>; 72def : WriteRes<WriteIRem32, [XS2MDU]>; 73def : WriteRes<WriteIRem, [XS2MDU]>; 74} 75 76// Zb* 77let Latency = 1 in { 78// Zba 79def : WriteRes<WriteSHXADD, [XS2ALU]>; 80def : WriteRes<WriteSHXADD32, [XS2ALU]>; 81 82// Zbb 83def : WriteRes<WriteRotateImm, [XS2ALU]>; 84def : WriteRes<WriteRotateImm32, [XS2ALU]>; 85def : WriteRes<WriteRotateReg, [XS2ALU]>; 86def : WriteRes<WriteRotateReg32, [XS2ALU]>; 87def : WriteRes<WriteORCB, [XS2ALU]>; 88def : WriteRes<WriteIMinMax, [XS2ALU]>; 89def : WriteRes<WriteREV8, [XS2ALU]>; 90 91// Zbkb 92def : WriteRes<WriteBREV8, [XS2ALU]>; 93def : WriteRes<WritePACK, [XS2ALU]>; 94def : WriteRes<WritePACK32, [XS2ALU]>; 95def : WriteRes<WriteZIP, [XS2ALU]>; 96 97// Zbs 98def : WriteRes<WriteSingleBit, [XS2ALU]>; 99def : WriteRes<WriteSingleBitImm, [XS2ALU]>; 100def : WriteRes<WriteBEXT, [XS2ALU]>; 101def : WriteRes<WriteBEXTI, [XS2ALU]>; 102} 103 104let Latency = 3 in { 105// Zbb 106def : WriteRes<WriteCLZ, [XS2MDU]>; 107def : WriteRes<WriteCLZ32, [XS2MDU]>; 108def : WriteRes<WriteCTZ, [XS2MDU]>; 109def : WriteRes<WriteCTZ32, [XS2MDU]>; 110def : WriteRes<WriteCPOP, [XS2MDU]>; 111def : WriteRes<WriteCPOP32, [XS2MDU]>; 112 113// Zbkc 114def : WriteRes<WriteCLMUL, [XS2MDU]>; 115 116// Zbkx 117def : WriteRes<WriteXPERM, [XS2MDU]>; 118} 119 120// Memory 121def : WriteRes<WriteSTB, [XS2ST]>; 122def : WriteRes<WriteSTH, [XS2ST]>; 123def : WriteRes<WriteSTW, [XS2ST]>; 124def : WriteRes<WriteSTD, [XS2ST]>; 125def : WriteRes<WriteFST32, [XS2ST]>; 126def : WriteRes<WriteFST64, [XS2ST]>; 127def : WriteRes<WriteAtomicSTW, [XS2ST]>; 128def : WriteRes<WriteAtomicSTD, [XS2ST]>; 129 130let Latency = 5 in { 131def : WriteRes<WriteLDB, [XS2LD]>; 132def : WriteRes<WriteLDH, [XS2LD]>; 133def : WriteRes<WriteLDW, [XS2LD]>; 134def : WriteRes<WriteLDD, [XS2LD]>; 135 136def : WriteRes<WriteAtomicW, [XS2LD]>; 137def : WriteRes<WriteAtomicD, [XS2LD]>; 138def : WriteRes<WriteAtomicLDW, [XS2LD]>; 139def : WriteRes<WriteAtomicLDD, [XS2LD]>; 140 141def : WriteRes<WriteFLD32, [XS2LD]>; 142def : WriteRes<WriteFLD64, [XS2LD]>; 143} 144 145// XiangShan-NanHu uses FuDian FPU instead of Berkeley HardFloat. 146// Documentation: https://github.com/OpenXiangShan/fudian 147 148let Latency = 3 in { 149def : WriteRes<WriteFAdd32, [XS2FMAC]>; 150def : WriteRes<WriteFSGNJ32, [XS2FMAC]>; 151def : WriteRes<WriteFMinMax32, [XS2FMAC]>; 152def : WriteRes<WriteFAdd64, [XS2FMAC]>; 153def : WriteRes<WriteFSGNJ64, [XS2FMAC]>; 154def : WriteRes<WriteFMinMax64, [XS2FMAC]>; 155 156def : WriteRes<WriteFCvtI32ToF32, [XS2FMAC]>; 157def : WriteRes<WriteFCvtI32ToF64, [XS2FMAC]>; 158def : WriteRes<WriteFCvtI64ToF32, [XS2FMAC]>; 159def : WriteRes<WriteFCvtI64ToF64, [XS2FMAC]>; 160def : WriteRes<WriteFCvtF32ToI32, [XS2FMAC]>; 161def : WriteRes<WriteFCvtF32ToI64, [XS2FMAC]>; 162def : WriteRes<WriteFCvtF64ToI32, [XS2FMAC]>; 163def : WriteRes<WriteFCvtF64ToI64, [XS2FMAC]>; 164def : WriteRes<WriteFCvtF32ToF64, [XS2FMAC]>; 165def : WriteRes<WriteFCvtF64ToF32, [XS2FMAC]>; 166 167def : WriteRes<WriteFClass32, [XS2FMAC]>; 168def : WriteRes<WriteFClass64, [XS2FMAC]>; 169def : WriteRes<WriteFCmp32, [XS2FMAC]>; 170def : WriteRes<WriteFCmp64, [XS2FMAC]>; 171def : WriteRes<WriteFMovF32ToI32, [XS2FMAC]>; 172def : WriteRes<WriteFMovI32ToF32, [XS2FMAC]>; 173def : WriteRes<WriteFMovF64ToI64, [XS2FMAC]>; 174def : WriteRes<WriteFMovI64ToF64, [XS2FMAC]>; 175} 176 177// FP multiplication 178let Latency = 3 in { 179def : WriteRes<WriteFMul32, [XS2FMAC]>; 180def : WriteRes<WriteFMul64, [XS2FMAC]>; 181} 182 183let Latency = 5 in { 184def : WriteRes<WriteFMA32, [XS2FMAC]>; 185def : WriteRes<WriteFMA64, [XS2FMAC]>; 186} 187 188// FP division 189def : WriteRes<WriteFDiv32, [XS2FMISC]> { 190 let Latency = 11; 191} 192def : WriteRes<WriteFDiv64, [XS2FMISC]> { 193 let Latency = 18; 194} 195 196def : WriteRes<WriteFSqrt32, [XS2FMISC]> { 197 let Latency = 17; 198} 199def : WriteRes<WriteFSqrt64, [XS2FMISC]> { 200 let Latency = 31; 201} 202 203// Others 204def : WriteRes<WriteCSR, [XS2MISC]>; 205def : WriteRes<WriteNop, []>; 206 207def : InstRW<[WriteIALU], (instrs COPY)>; 208 209// Bypass and advance 210 211class XS2LoadToALUBypass<SchedRead read> 212 : ReadAdvance<read, 1, [WriteLDB, WriteLDH, WriteLDW, WriteLDD, WriteAtomicW, WriteAtomicD, WriteAtomicLDW, WriteAtomicLDD]>; 213 214def : ReadAdvance<ReadJmp, 0>; 215def : ReadAdvance<ReadJalr, 0>; 216def : ReadAdvance<ReadCSR, 0>; 217def : ReadAdvance<ReadStoreData, 0>; 218def : ReadAdvance<ReadMemBase, 0>; 219def : XS2LoadToALUBypass<ReadIALU>; 220def : XS2LoadToALUBypass<ReadIALU32>; 221def : XS2LoadToALUBypass<ReadShiftImm>; 222def : XS2LoadToALUBypass<ReadShiftImm32>; 223def : XS2LoadToALUBypass<ReadShiftReg>; 224def : XS2LoadToALUBypass<ReadShiftReg32>; 225def : ReadAdvance<ReadIDiv, 0>; 226def : ReadAdvance<ReadIDiv32, 0>; 227def : ReadAdvance<ReadIRem, 0>; 228def : ReadAdvance<ReadIRem32, 0>; 229def : ReadAdvance<ReadIMul, 0>; 230def : ReadAdvance<ReadIMul32, 0>; 231def : ReadAdvance<ReadAtomicWA, 0>; 232def : ReadAdvance<ReadAtomicWD, 0>; 233def : ReadAdvance<ReadAtomicDA, 0>; 234def : ReadAdvance<ReadAtomicDD, 0>; 235def : ReadAdvance<ReadAtomicLDW, 0>; 236def : ReadAdvance<ReadAtomicLDD, 0>; 237def : ReadAdvance<ReadAtomicSTW, 0>; 238def : ReadAdvance<ReadAtomicSTD, 0>; 239def : ReadAdvance<ReadFStoreData, 0>; 240def : ReadAdvance<ReadFMemBase, 0>; 241def : ReadAdvance<ReadFAdd32, 0>; 242def : ReadAdvance<ReadFAdd64, 0>; 243def : ReadAdvance<ReadFMul32, 0>; 244def : ReadAdvance<ReadFMul64, 0>; 245def : ReadAdvance<ReadFMA32, 0>; 246def : ReadAdvance<ReadFMA32Addend, 2>; // Cascade FMA 247def : ReadAdvance<ReadFMA64, 0>; 248def : ReadAdvance<ReadFMA64Addend, 2>; // Cascade FMA 249def : ReadAdvance<ReadFDiv32, 0>; 250def : ReadAdvance<ReadFDiv64, 0>; 251def : ReadAdvance<ReadFSqrt32, 0>; 252def : ReadAdvance<ReadFSqrt64, 0>; 253def : ReadAdvance<ReadFCmp32, 0>; 254def : ReadAdvance<ReadFCmp64, 0>; 255def : ReadAdvance<ReadFSGNJ32, 0>; 256def : ReadAdvance<ReadFSGNJ64, 0>; 257def : ReadAdvance<ReadFMinMax32, 0>; 258def : ReadAdvance<ReadFMinMax64, 0>; 259def : ReadAdvance<ReadFCvtF32ToI32, 0>; 260def : ReadAdvance<ReadFCvtF32ToI64, 0>; 261def : ReadAdvance<ReadFCvtF64ToI32, 0>; 262def : ReadAdvance<ReadFCvtF64ToI64, 0>; 263def : ReadAdvance<ReadFCvtI32ToF32, 0>; 264def : ReadAdvance<ReadFCvtI32ToF64, 0>; 265def : ReadAdvance<ReadFCvtI64ToF32, 0>; 266def : ReadAdvance<ReadFCvtI64ToF64, 0>; 267def : ReadAdvance<ReadFCvtF32ToF64, 0>; 268def : ReadAdvance<ReadFCvtF64ToF32, 0>; 269def : ReadAdvance<ReadFMovF32ToI32, 0>; 270def : ReadAdvance<ReadFMovI32ToF32, 0>; 271def : ReadAdvance<ReadFMovF64ToI64, 0>; 272def : ReadAdvance<ReadFMovI64ToF64, 0>; 273def : ReadAdvance<ReadFClass32, 0>; 274def : ReadAdvance<ReadFClass64, 0>; 275 276// Zb* 277// Zba 278def : XS2LoadToALUBypass<ReadSHXADD>; 279def : XS2LoadToALUBypass<ReadSHXADD32>; 280// Zbb 281def : XS2LoadToALUBypass<ReadRotateImm>; 282def : XS2LoadToALUBypass<ReadRotateImm32>; 283def : XS2LoadToALUBypass<ReadRotateReg>; 284def : XS2LoadToALUBypass<ReadRotateReg32>; 285def : ReadAdvance<ReadCLZ, 0>; 286def : ReadAdvance<ReadCLZ32, 0>; 287def : ReadAdvance<ReadCTZ, 0>; 288def : ReadAdvance<ReadCTZ32, 0>; 289def : ReadAdvance<ReadCPOP, 0>; 290def : ReadAdvance<ReadCPOP32, 0>; 291def : XS2LoadToALUBypass<ReadORCB>; 292def : XS2LoadToALUBypass<ReadIMinMax>; 293def : XS2LoadToALUBypass<ReadREV8>; 294// Zbkc 295def : ReadAdvance<ReadCLMUL, 0>; 296// Zbs 297def : XS2LoadToALUBypass<ReadSingleBit>; 298def : XS2LoadToALUBypass<ReadSingleBitImm>; 299// Zbkb 300def : XS2LoadToALUBypass<ReadBREV8>; 301def : XS2LoadToALUBypass<ReadPACK>; 302def : XS2LoadToALUBypass<ReadPACK32>; 303def : XS2LoadToALUBypass<ReadZIP>; 304// Zbkx 305def : ReadAdvance<ReadXPERM, 0>; 306 307//===----------------------------------------------------------------------===// 308// Unsupported extensions 309defm : UnsupportedSchedV; 310defm : UnsupportedSchedZfa; 311defm : UnsupportedSchedZfh; 312defm : UnsupportedSchedSFB; 313defm : UnsupportedSchedZabha; 314defm : UnsupportedSchedXsfvcp; 315defm : UnsupportedSchedZvk; 316} 317