1//==- AArch64SchedThunderX.td - Cavium ThunderX T8X Scheduling Definitions -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the itinerary class data for the ARM ThunderX T8X 10// (T88, T81, T83) processors. 11// Loosely based on Cortex-A53 which is somewhat similar. 12// 13//===----------------------------------------------------------------------===// 14 15// ===---------------------------------------------------------------------===// 16// The following definitions describe the simpler per-operand machine model. 17// This works with MachineScheduler. See llvm/MC/MCSchedule.h for details. 18 19// Cavium ThunderX T8X scheduling machine model. 20def ThunderXT8XModel : SchedMachineModel { 21 let IssueWidth = 2; // 2 micro-ops dispatched per cycle. 22 let MicroOpBufferSize = 0; // ThunderX T88/T81/T83 are in-order. 23 let LoadLatency = 3; // Optimistic load latency. 24 let MispredictPenalty = 8; // Branch mispredict penalty. 25 let PostRAScheduler = 1; // Use PostRA scheduler. 26 let CompleteModel = 1; 27 28 list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 29 PAUnsupported.F); 30 // FIXME: Remove when all errors have been fixed. 31 let FullInstRWOverlapCheck = 0; 32} 33 34// Modeling each pipeline with BufferSize == 0 since T8X is in-order. 35def THXT8XUnitALU : ProcResource<2> { let BufferSize = 0; } // Int ALU 36def THXT8XUnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC 37def THXT8XUnitDiv : ProcResource<1> { let BufferSize = 0; } // Int Division 38def THXT8XUnitLdSt : ProcResource<1> { let BufferSize = 0; } // Load/Store 39def THXT8XUnitBr : ProcResource<1> { let BufferSize = 0; } // Branch 40def THXT8XUnitFPALU : ProcResource<1> { let BufferSize = 0; } // FP ALU 41def THXT8XUnitFPMDS : ProcResource<1> { let BufferSize = 0; } // FP Mul/Div/Sqrt 42 43//===----------------------------------------------------------------------===// 44// Subtarget-specific SchedWrite types mapping the ProcResources and 45// latencies. 46 47let SchedModel = ThunderXT8XModel in { 48 49// ALU 50def : WriteRes<WriteImm, [THXT8XUnitALU]> { let Latency = 1; } 51def : WriteRes<WriteI, [THXT8XUnitALU]> { let Latency = 1; } 52def : WriteRes<WriteISReg, [THXT8XUnitALU]> { let Latency = 2; } 53def : WriteRes<WriteIEReg, [THXT8XUnitALU]> { let Latency = 2; } 54def : WriteRes<WriteIS, [THXT8XUnitALU]> { let Latency = 2; } 55def : WriteRes<WriteExtr, [THXT8XUnitALU]> { let Latency = 2; } 56 57// MAC 58def : WriteRes<WriteIM32, [THXT8XUnitMAC]> { 59 let Latency = 4; 60 let ResourceCycles = [1]; 61} 62 63def : WriteRes<WriteIM64, [THXT8XUnitMAC]> { 64 let Latency = 4; 65 let ResourceCycles = [1]; 66} 67 68// Div 69def : WriteRes<WriteID32, [THXT8XUnitDiv]> { 70 let Latency = 12; 71 let ResourceCycles = [6]; 72} 73 74def : WriteRes<WriteID64, [THXT8XUnitDiv]> { 75 let Latency = 14; 76 let ResourceCycles = [8]; 77} 78 79// Load 80def : WriteRes<WriteLD, [THXT8XUnitLdSt]> { let Latency = 3; } 81def : WriteRes<WriteLDIdx, [THXT8XUnitLdSt]> { let Latency = 3; } 82def : WriteRes<WriteLDHi, [THXT8XUnitLdSt]> { let Latency = 3; } 83 84// Vector Load 85def : WriteRes<WriteVLD, [THXT8XUnitLdSt]> { 86 let Latency = 8; 87 let ResourceCycles = [3]; 88} 89 90def THXT8XWriteVLD1 : SchedWriteRes<[THXT8XUnitLdSt]> { 91 let Latency = 6; 92 let ResourceCycles = [1]; 93} 94 95def THXT8XWriteVLD2 : SchedWriteRes<[THXT8XUnitLdSt]> { 96 let Latency = 11; 97 let ResourceCycles = [7]; 98} 99 100def THXT8XWriteVLD3 : SchedWriteRes<[THXT8XUnitLdSt]> { 101 let Latency = 12; 102 let ResourceCycles = [8]; 103} 104 105def THXT8XWriteVLD4 : SchedWriteRes<[THXT8XUnitLdSt]> { 106 let Latency = 13; 107 let ResourceCycles = [9]; 108} 109 110def THXT8XWriteVLD5 : SchedWriteRes<[THXT8XUnitLdSt]> { 111 let Latency = 13; 112 let ResourceCycles = [9]; 113} 114 115// Pre/Post Indexing 116def : WriteRes<WriteAdr, []> { let Latency = 0; } 117 118// Store 119def : WriteRes<WriteST, [THXT8XUnitLdSt]> { let Latency = 1; } 120def : WriteRes<WriteSTP, [THXT8XUnitLdSt]> { let Latency = 1; } 121def : WriteRes<WriteSTIdx, [THXT8XUnitLdSt]> { let Latency = 1; } 122def : WriteRes<WriteSTX, [THXT8XUnitLdSt]> { let Latency = 1; } 123 124// Vector Store 125def : WriteRes<WriteVST, [THXT8XUnitLdSt]>; 126def THXT8XWriteVST1 : SchedWriteRes<[THXT8XUnitLdSt]>; 127 128def THXT8XWriteVST2 : SchedWriteRes<[THXT8XUnitLdSt]> { 129 let Latency = 10; 130 let ResourceCycles = [9]; 131} 132 133def THXT8XWriteVST3 : SchedWriteRes<[THXT8XUnitLdSt]> { 134 let Latency = 11; 135 let ResourceCycles = [10]; 136} 137 138def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 139 140// Branch 141def : WriteRes<WriteBr, [THXT8XUnitBr]>; 142def THXT8XWriteBR : SchedWriteRes<[THXT8XUnitBr]>; 143def : WriteRes<WriteBrReg, [THXT8XUnitBr]>; 144def THXT8XWriteBRR : SchedWriteRes<[THXT8XUnitBr]>; 145def THXT8XWriteRET : SchedWriteRes<[THXT8XUnitALU]>; 146def : WriteRes<WriteSys, [THXT8XUnitBr]>; 147def : WriteRes<WriteBarrier, [THXT8XUnitBr]>; 148def : WriteRes<WriteHint, [THXT8XUnitBr]>; 149 150// FP ALU 151def : WriteRes<WriteF, [THXT8XUnitFPALU]> { let Latency = 6; } 152def : WriteRes<WriteFCmp, [THXT8XUnitFPALU]> { let Latency = 6; } 153def : WriteRes<WriteFCvt, [THXT8XUnitFPALU]> { let Latency = 6; } 154def : WriteRes<WriteFCopy, [THXT8XUnitFPALU]> { let Latency = 6; } 155def : WriteRes<WriteFImm, [THXT8XUnitFPALU]> { let Latency = 6; } 156def : WriteRes<WriteV, [THXT8XUnitFPALU]> { let Latency = 6; } 157 158// FP Mul, Div, Sqrt 159def : WriteRes<WriteFMul, [THXT8XUnitFPMDS]> { let Latency = 6; } 160def : WriteRes<WriteFDiv, [THXT8XUnitFPMDS]> { 161 let Latency = 22; 162 let ResourceCycles = [19]; 163} 164 165def THXT8XWriteFMAC : SchedWriteRes<[THXT8XUnitFPMDS]> { let Latency = 10; } 166 167def THXT8XWriteFDivSP : SchedWriteRes<[THXT8XUnitFPMDS]> { 168 let Latency = 12; 169 let ResourceCycles = [9]; 170} 171 172def THXT8XWriteFDivDP : SchedWriteRes<[THXT8XUnitFPMDS]> { 173 let Latency = 22; 174 let ResourceCycles = [19]; 175} 176 177def THXT8XWriteFSqrtSP : SchedWriteRes<[THXT8XUnitFPMDS]> { 178 let Latency = 17; 179 let ResourceCycles = [14]; 180} 181 182def THXT8XWriteFSqrtDP : SchedWriteRes<[THXT8XUnitFPMDS]> { 183 let Latency = 31; 184 let ResourceCycles = [28]; 185} 186 187//===----------------------------------------------------------------------===// 188// Subtarget-specific SchedRead types. 189 190// No forwarding for these reads. 191def : ReadAdvance<ReadExtrHi, 1>; 192def : ReadAdvance<ReadAdrBase, 2>; 193def : ReadAdvance<ReadVLD, 2>; 194 195// FIXME: This needs more targeted benchmarking. 196// ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable 197// operands are needed one cycle later if and only if they are to be 198// shifted. Otherwise, they too are needed two cycles later. This same 199// ReadAdvance applies to Extended registers as well, even though there is 200// a separate SchedPredicate for them. 201def : ReadAdvance<ReadI, 2, [WriteImm, WriteI, 202 WriteISReg, WriteIEReg, WriteIS, 203 WriteID32, WriteID64, 204 WriteIM32, WriteIM64]>; 205def THXT8XReadShifted : SchedReadAdvance<1, [WriteImm, WriteI, 206 WriteISReg, WriteIEReg, WriteIS, 207 WriteID32, WriteID64, 208 WriteIM32, WriteIM64]>; 209def THXT8XReadNotShifted : SchedReadAdvance<2, [WriteImm, WriteI, 210 WriteISReg, WriteIEReg, WriteIS, 211 WriteID32, WriteID64, 212 WriteIM32, WriteIM64]>; 213def THXT8XReadISReg : SchedReadVariant<[ 214 SchedVar<RegShiftedPred, [THXT8XReadShifted]>, 215 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; 216def : SchedAlias<ReadISReg, THXT8XReadISReg>; 217 218def THXT8XReadIEReg : SchedReadVariant<[ 219 SchedVar<RegExtendedPred, [THXT8XReadShifted]>, 220 SchedVar<NoSchedPred, [THXT8XReadNotShifted]>]>; 221def : SchedAlias<ReadIEReg, THXT8XReadIEReg>; 222 223// MAC - Operands are generally needed one cycle later in the MAC pipe. 224// Accumulator operands are needed two cycles later. 225def : ReadAdvance<ReadIM, 1, [WriteImm,WriteI, 226 WriteISReg, WriteIEReg, WriteIS, 227 WriteID32, WriteID64, 228 WriteIM32, WriteIM64]>; 229def : ReadAdvance<ReadIMA, 2, [WriteImm, WriteI, 230 WriteISReg, WriteIEReg, WriteIS, 231 WriteID32, WriteID64, 232 WriteIM32, WriteIM64]>; 233 234// Div 235def : ReadAdvance<ReadID, 1, [WriteImm, WriteI, 236 WriteISReg, WriteIEReg, WriteIS, 237 WriteID32, WriteID64, 238 WriteIM32, WriteIM64]>; 239 240//===----------------------------------------------------------------------===// 241// Subtarget-specific InstRW. 242 243//--- 244// Branch 245//--- 246def : InstRW<[THXT8XWriteBR], (instregex "^B$")>; 247def : InstRW<[THXT8XWriteBR], (instregex "^BL$")>; 248def : InstRW<[THXT8XWriteBR], (instregex "^B..$")>; 249def : InstRW<[THXT8XWriteBR], (instregex "^CBNZ")>; 250def : InstRW<[THXT8XWriteBR], (instregex "^CBZ")>; 251def : InstRW<[THXT8XWriteBR], (instregex "^TBNZ")>; 252def : InstRW<[THXT8XWriteBR], (instregex "^TBZ")>; 253def : InstRW<[THXT8XWriteBRR], (instregex "^BR$")>; 254def : InstRW<[THXT8XWriteBRR], (instregex "^BLR$")>; 255 256//--- 257// Ret 258//--- 259def : InstRW<[THXT8XWriteRET], (instregex "^RET$")>; 260 261//--- 262// Miscellaneous 263//--- 264def : InstRW<[WriteI], (instrs COPY)>; 265 266//--- 267// Vector Loads 268//--- 269def : InstRW<[THXT8XWriteVLD1], (instregex "LD1i(8|16|32|64)$")>; 270def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 271def : InstRW<[THXT8XWriteVLD1], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 272def : InstRW<[THXT8XWriteVLD2], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 273def : InstRW<[THXT8XWriteVLD3], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 274def : InstRW<[THXT8XWriteVLD4], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 275def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 276def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 277def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 278def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 279def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 280def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 281 282def : InstRW<[THXT8XWriteVLD1], (instregex "LD2i(8|16|32|64)$")>; 283def : InstRW<[THXT8XWriteVLD1], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 284def : InstRW<[THXT8XWriteVLD2], (instregex "LD2Twov(8b|4h|2s)$")>; 285def : InstRW<[THXT8XWriteVLD4], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 286def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2i(8|16|32|64)(_POST)?$")>; 287def : InstRW<[THXT8XWriteVLD1, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)(_POST)?$")>; 288def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD2Twov(8b|4h|2s)(_POST)?$")>; 289def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)(_POST)?$")>; 290 291def : InstRW<[THXT8XWriteVLD2], (instregex "LD3i(8|16|32|64)$")>; 292def : InstRW<[THXT8XWriteVLD2], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 293def : InstRW<[THXT8XWriteVLD4], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; 294def : InstRW<[THXT8XWriteVLD3], (instregex "LD3Threev(2d)$")>; 295def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 296def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 297def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 298def : InstRW<[THXT8XWriteVLD3, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; 299 300def : InstRW<[THXT8XWriteVLD2], (instregex "LD4i(8|16|32|64)$")>; 301def : InstRW<[THXT8XWriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 302def : InstRW<[THXT8XWriteVLD5], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; 303def : InstRW<[THXT8XWriteVLD4], (instregex "LD4Fourv(2d)$")>; 304def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 305def : InstRW<[THXT8XWriteVLD2, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 306def : InstRW<[THXT8XWriteVLD5, WriteAdr], (instregex "LD4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 307def : InstRW<[THXT8XWriteVLD4, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; 308 309//--- 310// Vector Stores 311//--- 312def : InstRW<[THXT8XWriteVST1], (instregex "ST1i(8|16|32|64)$")>; 313def : InstRW<[THXT8XWriteVST1], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 314def : InstRW<[THXT8XWriteVST1], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 315def : InstRW<[THXT8XWriteVST2], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 316def : InstRW<[THXT8XWriteVST2], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 317def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 318def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 319def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 320def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 321def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 322 323def : InstRW<[THXT8XWriteVST1], (instregex "ST2i(8|16|32|64)$")>; 324def : InstRW<[THXT8XWriteVST1], (instregex "ST2Twov(8b|4h|2s)$")>; 325def : InstRW<[THXT8XWriteVST2], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 326def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 327def : InstRW<[THXT8XWriteVST1, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 328def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 329 330def : InstRW<[THXT8XWriteVST2], (instregex "ST3i(8|16|32|64)$")>; 331def : InstRW<[THXT8XWriteVST3], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)$")>; 332def : InstRW<[THXT8XWriteVST2], (instregex "ST3Threev(2d)$")>; 333def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 334def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST3Threev(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 335def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; 336 337def : InstRW<[THXT8XWriteVST2], (instregex "ST4i(8|16|32|64)$")>; 338def : InstRW<[THXT8XWriteVST3], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)$")>; 339def : InstRW<[THXT8XWriteVST2], (instregex "ST4Fourv(2d)$")>; 340def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4i(8|16|32|64)_POST$")>; 341def : InstRW<[THXT8XWriteVST3, WriteAdr], (instregex "ST4Fourv(8b|4h|2s|1d|16b|8h|4s)_POST$")>; 342def : InstRW<[THXT8XWriteVST2, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; 343 344//--- 345// Floating Point MAC, DIV, SQRT 346//--- 347def : InstRW<[THXT8XWriteFMAC], (instregex "^FN?M(ADD|SUB).*")>; 348def : InstRW<[THXT8XWriteFMAC], (instregex "^FML(A|S).*")>; 349def : InstRW<[THXT8XWriteFDivSP], (instrs FDIVSrr)>; 350def : InstRW<[THXT8XWriteFDivDP], (instrs FDIVDrr)>; 351def : InstRW<[THXT8XWriteFDivSP], (instregex "^FDIVv.*32$")>; 352def : InstRW<[THXT8XWriteFDivDP], (instregex "^FDIVv.*64$")>; 353def : InstRW<[THXT8XWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 354def : InstRW<[THXT8XWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 355 356} 357