1//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the itinerary class data for the ARM ThunderX T8X 10// (T88, T81, T83) processors. 11// Loosely based on Cortex-A53 which is somewhat similar. 12// 13//===----------------------------------------------------------------------===// 14 15// ===---------------------------------------------------------------------===// 16// The following definitions describe the simpler per-operand machine model. 17// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. 18 19// Cavium ThunderX T8X scheduling machine model. 20def ThunderXT8XModel : SchedMachineModel { 21 let IssueWidth = 2; // 2 micro-ops dispatched per cycle. 22 let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. 23 let LoadLatency = 3; // Optimistic load latency. 24 let MispredictPenalty = 8; // Branch mispredict penalty. 25 let PostRAScheduler = 1; // Use PostRA scheduler. 26 let CompleteModel = 1; 27 28 list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 29 PAUnsupported.F, 30 SMEUnsupported.F, 31 [HasMTE, HasCSSC]); 32 // FIXME: Remove when all errors have been fixed. 33 let FullInstRWOverlapCheck = 0; 34} 35 36// Modeling each pipeline with BufferSize == 0 since T8X is in-order. 37def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU 38def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC 39def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division 40def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store 41def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch 42def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU 43def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt 44 45//===----------------------------------------------------------------------===// 46// Subtarget-specific SchedWrite types mapping the ProcResources and 47// latencies. 48 49let SchedModel = ThunderXT8XModel in { 50 51// ALU 52def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; } 53def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; } 54def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; } 55def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; } 56def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; } 57def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; } 58 59// MAC 60def : WriteRes<WriteIM32, [THXT8XUnitMAC]> { 61 let Latency = 4; 62 let ReleaseAtCycles = [1]; 63} 64 65def : WriteRes<WriteIM64, [THXT8XUnitMAC]> { 66 let Latency = 4; 67 let ReleaseAtCycles = [1]; 68} 69 70// Div 71def : WriteRes<WriteID32, [THXT8XUnitDiv]> { 72 let Latency = 12; 73 let ReleaseAtCycles = [6]; 74} 75 76def : WriteRes<WriteID64, [THXT8XUnitDiv]> { 77 let Latency = 14; 78 let ReleaseAtCycles = [8]; 79} 80 81// Load 82def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; } 83def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; } 84def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; } 85 86// Vector Load 87def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> { 88 let Latency = 8; 89 let ReleaseAtCycles = [3]; 90} 91 92def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { 93 let Latency = 6; 94 let ReleaseAtCycles = [1]; 95} 96 97def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { 98 let Latency = 11; 99 let ReleaseAtCycles = [7]; 100} 101 102def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { 103 let Latency = 12; 104 let ReleaseAtCycles = [8]; 105} 106 107def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { 108 let Latency = 13; 109 let ReleaseAtCycles = [9]; 110} 111 112def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { 113 let Latency = 13; 114 let ReleaseAtCycles = [9]; 115} 116 117// Pre/Post Indexing 118def : WriteRes<WriteAdr, []> { let Latency = 0; } 119 120// Store 121def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; } 122def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; } 123def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; } 124def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; } 125 126// Vector Store 127def : WriteRes<WriteVST, [THXT8XUnitLdSt]>; 128def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>; 129 130def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { 131 let Latency = 10; 132 let ReleaseAtCycles = [9]; 133} 134 135def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { 136 let Latency = 11; 137 let ReleaseAtCycles = [10]; 138} 139 140def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 141 142// Branch 143def : WriteRes<WriteBr, [THXT8XUnitBr]>; 144def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>; 145def : WriteRes<WriteBrReg, [THXT8XUnitBr]>; 146def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>; 147def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>; 148def : WriteRes<WriteSys, [THXT8XUnitBr]>; 149def : WriteRes<WriteBarrier, [THXT8XUnitBr]>; 150def : WriteRes<WriteHint, [THXT8XUnitBr]>; 151 152// FP ALU 153def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; } 154def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; } 155def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; } 156def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; } 157def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; } 158def : WriteRes<WriteVd, [THXT8XUnitFPALU]> { let Latency = 6; } 159def : WriteRes<WriteVq, [THXT8XUnitFPALU]> { let Latency = 6; } 160 161// FP Mul, Div, Sqrt 162def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; } 163def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> { 164 let Latency = 22; 165 let ReleaseAtCycles = [19]; 166} 167 168def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } 169 170def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { 171 let Latency = 12; 172 let ReleaseAtCycles = [9]; 173} 174 175def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { 176 let Latency = 22; 177 let ReleaseAtCycles = [19]; 178} 179 180def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { 181 let Latency = 17; 182 let ReleaseAtCycles = [14]; 183} 184 185def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { 186 let Latency = 31; 187 let ReleaseAtCycles = [28]; 188} 189 190//===----------------------------------------------------------------------===// 191// Subtarget-specific SchedRead types. 192 193// No forwarding for these reads. 194def : ReadAdvance<ReadExtrHi, 1>; 195def : ReadAdvance<ReadAdrBase, 2>; 196def : ReadAdvance<ReadVLD, 2>; 197def : ReadAdvance<ReadST, 2>; 198 199// FIXME: This needs more targeted benchmarking. 200// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable 201// operands are needed one cycle later if and only if they are to be 202// shifted. Otherwise, they too are needed two cycles later. This same 203// ReadAdvance applies to Extended registers as well, even though there is 204// a separate SchedPredicate for them. 205def : ReadAdvance<ReadI, 2, [WriteImm, WriteI, 206 WriteISReg, WriteIEReg, WriteIS, 207 WriteID32, WriteID64, 208 WriteIM32, WriteIM64]>; 209def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, 210 WriteISReg, WriteIEReg, WriteIS, 211 WriteID32, WriteID64, 212 WriteIM32, WriteIM64]>; 213def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, 214 WriteISReg, WriteIEReg, WriteIS, 215 WriteID32, WriteID64, 216 WriteIM32, WriteIM64]>; 217def THXT8XReadISReg : SchedReadVariant<[ 218 SchedVar<RegShiftedPred, [THXT8XReadShifted]>, 219 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; 220def : SchedAlias<ReadISReg, THXT8XReadISReg>; 221 222def THXT8XReadIEReg : SchedReadVariant<[ 223 SchedVar<RegExtendedPred, [THXT8XReadShifted]>, 224 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; 225def : SchedAlias<ReadIEReg, THXT8XReadIEReg>; 226 227// MAC - Operands are generally needed one cycle later in the MAC pipe. 228// Accumulator operands are needed two cycles later. 229def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, 230 WriteISReg, WriteIEReg, WriteIS, 231 WriteID32, WriteID64, 232 WriteIM32, WriteIM64]>; 233def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI, 234 WriteISReg, WriteIEReg, WriteIS, 235 WriteID32, WriteID64, 236 WriteIM32, WriteIM64]>; 237 238// Div 239def : ReadAdvance<ReadID, 1, [WriteImm, WriteI, 240 WriteISReg, WriteIEReg, WriteIS, 241 WriteID32, WriteID64, 242 WriteIM32, WriteIM64]>; 243 244//===----------------------------------------------------------------------===// 245// Subtarget-specific InstRW. 246 247//--- 248// Branch 249//--- 250def : InstRW<[THXT8XWriteBR], (instregex "^B$")>; 251def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>; 252def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>; 253def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; 254def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; 255def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; 256def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; 257def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>; 258def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>; 259 260//--- 261// Ret 262//--- 263def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; 264 265//--- 266// Miscellaneous 267//--- 268def : InstRW<[WriteI], (instrs COPY)>; 269 270//--- 271// Vector Loads 272//--- 273def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; 274def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 275def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 276def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 277def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 278def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 279def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 280def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 281def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 282def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 283def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 284def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 285 286def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; 287def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 288def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; 289def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 290def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; 291def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; 292def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; 293def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; 294 295def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; 296def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 297def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; 298def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; 299def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 300def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 301def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 302def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; 303 304def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; 305def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 306def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; 307def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; 308def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 309def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 310def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 311def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; 312 313//--- 314// Vector Stores 315//--- 316def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; 317def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 318def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 319def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 320def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 321def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 322def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 323def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 324def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 325def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 326 327def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; 328def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; 329def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 330def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 331def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 332def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 333 334def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; 335def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; 336def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; 337def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 338def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 339def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; 340 341def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; 342def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; 343def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; 344def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; 345def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 346def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; 347 348//--- 349// Floating Point MAC, DIV, SQRT 350//--- 351def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; 352def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; 353def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; 354def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; 355def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; 356def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; 357def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 358def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 359 360} 361