1//===-- ARMSchedule.td - ARM Scheduling Definitions --------*- tablegen -*-===// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8//===----------------------------------------------------------------------===// 9// Instruction scheduling annotations for in-order and out-of-order CPUs. 10// These annotations are independent of the itinerary class defined below. 11// Here we define the subtarget independent read/write per-operand resources. 12// The subtarget schedule definitions will then map these to the subtarget's 13// resource usages. 14// For example: 15// The instruction cycle timings table might contain an entry for an operation 16// like the following: 17// Rd <- ADD Rn, Rm, <shift> Rs 18// Uops | Latency from register | Uops - resource requirements - latency 19// 2 | Rn: 1 Rm: 4 Rs: 4 | uop T0, Rm, Rs - P01 - 3 20// | | uopc Rd, Rn, T0 - P01 - 1 21// This is telling us that the result will be available in destination register 22// Rd after a minimum of three cycles after the result in Rm and Rs is available 23// and one cycle after the result in Rn is available. The micro-ops can execute 24// on resource P01. 25// To model this, we need to express that we need to dispatch two micro-ops, 26// that the resource P01 is needed and that the latency to Rn is different than 27// the latency to Rm and Rs. The scheduler can decrease Rn's producer latency by 28// two. 29// We will do this by assigning (abstract) resources to register defs/uses. 30// ARMSchedule.td: 31// def WriteALUsr : SchedWrite; 32// def ReadAdvanceALUsr : ScheRead; 33// 34// ARMInstrInfo.td: 35// def ADDrs : I<>, Sched<[WriteALUsr, ReadAdvanceALUsr, ReadDefault, 36// ReadDefault]> { ...} 37// ReadAdvance read resources allow us to define "pipeline by-passes" or 38// shorter latencies to certain registers as needed in the example above. 39// The "ReadDefault" can be omitted. 40// Next, the subtarget td file assigns resources to the abstract resources 41// defined here. 42// ARMScheduleSubtarget.td: 43// // Resources. 44// def P01 : ProcResource<3>; // ALU unit (3 of it). 45// ... 46// // Resource usages. 47// def : WriteRes<WriteALUsr, [P01, P01]> { 48// Latency = 4; // Latency of 4. 49// NumMicroOps = 2; // Dispatch 2 micro-ops. 50// // The two instances of resource P01 are occupied for one cycle. It is one 51// // cycle because these resources happen to be pipelined. 52// ReleaseAtCycles = [1, 1]; 53// } 54// def : ReadAdvance<ReadAdvanceALUsr, 3>; 55 56//===----------------------------------------------------------------------===// 57// Sched definitions for integer pipeline instructions 58// 59// Basic ALU operation. 60def WriteALU : SchedWrite; 61def ReadALU : SchedRead; 62 63// Basic ALU with shifts. 64def WriteALUsi : SchedWrite; // Shift by immediate. 65def WriteALUsr : SchedWrite; // Shift by register. 66def WriteALUSsr : SchedWrite; // Shift by register (flag setting). 67def ReadALUsr : SchedRead; // Some operands are read later. 68 69// Compares. 70def WriteCMP : SchedWrite; 71def WriteCMPsi : SchedWrite; 72def WriteCMPsr : SchedWrite; 73 74// Multiplys. 75def WriteMUL16 : SchedWrite; // 16-bit multiply. 76def WriteMUL32 : SchedWrite; // 32-bit multiply. 77def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. 78def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. 79def ReadMUL : SchedRead; 80 81// Multiply-accumulates. 82def WriteMAC16 : SchedWrite; // 16-bit mac. 83def WriteMAC32 : SchedWrite; // 32-bit mac. 84def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. 85def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. 86def ReadMAC : SchedRead; 87 88// Divisions. 89def WriteDIV : SchedWrite; 90 91// Loads/Stores. 92def WriteLd : SchedWrite; 93def WritePreLd : SchedWrite; 94def WriteST : SchedWrite; 95 96// Branches. 97def WriteBr : SchedWrite; 98def WriteBrL : SchedWrite; 99def WriteBrTbl : SchedWrite; 100 101// Noop. 102def WriteNoop : SchedWrite; 103 104//===----------------------------------------------------------------------===// 105// Sched definitions for floating-point and neon instructions 106// 107// Floating point conversions 108def WriteFPCVT : SchedWrite; 109def WriteFPMOV : SchedWrite; // FP -> GPR and vice-versa 110 111// ALU operations (32/64-bit) 112def WriteFPALU32 : SchedWrite; 113def WriteFPALU64 : SchedWrite; 114 115// Multiplication 116def WriteFPMUL32 : SchedWrite; 117def WriteFPMUL64 : SchedWrite; 118def ReadFPMUL : SchedRead; // multiplier read 119def ReadFPMAC : SchedRead; // accumulator read 120 121// Multiply-accumulate 122def WriteFPMAC32 : SchedWrite; 123def WriteFPMAC64 : SchedWrite; 124 125// Division 126def WriteFPDIV32 : SchedWrite; 127def WriteFPDIV64 : SchedWrite; 128 129// Square-root 130def WriteFPSQRT32 : SchedWrite; 131def WriteFPSQRT64 : SchedWrite; 132 133// Vector load and stores 134def WriteVLD1 : SchedWrite; 135def WriteVLD2 : SchedWrite; 136def WriteVLD3 : SchedWrite; 137def WriteVLD4 : SchedWrite; 138def WriteVST1 : SchedWrite; 139def WriteVST2 : SchedWrite; 140def WriteVST3 : SchedWrite; 141def WriteVST4 : SchedWrite; 142 143 144// Define TII for use in SchedVariant Predicates. 145def : PredicateProlog<[{ 146 const ARMBaseInstrInfo *TII = 147 static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo()); 148 (void)TII; 149 const ARMSubtarget *STI = 150 static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo()); 151 (void)STI; 152}]>; 153 154def IsPredicated : CheckFunctionPredicateWithTII< 155 "ARM_MC::isPredicated", 156 "isPredicated" 157>; 158def IsPredicatedPred : MCSchedPredicate<IsPredicated>; 159 160def IsCPSRDefined : CheckFunctionPredicateWithTII< 161 "ARM_MC::isCPSRDefined", 162 "ARMBaseInstrInfo::isCPSRDefined" 163>; 164 165def IsCPSRDefinedPred : MCSchedPredicate<IsCPSRDefined>; 166 167let FunctionMapper = "ARM_AM::getAM2ShiftOpc" in { 168 class CheckAM2NoShift<int n> : CheckImmOperand_s<n, "ARM_AM::no_shift">; 169 class CheckAM2ShiftLSL<int n> : CheckImmOperand_s<n, "ARM_AM::lsl">; 170} 171 172let FunctionMapper = "ARM_AM::getAM2Op" in { 173 class CheckAM2OpAdd<int n> : CheckImmOperand_s<n, "ARM_AM::add"> {} 174 class CheckAM2OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {} 175} 176 177let FunctionMapper = "ARM_AM::getAM2Offset" in { 178 class CheckAM2Offset<int n, int of> : CheckImmOperand<n, of> {} 179} 180 181def IsLDMBaseRegInList : CheckFunctionPredicate< 182 "ARM_MC::isLDMBaseRegInList", "ARM_MC::isLDMBaseRegInList" 183>; 184 185let FunctionMapper = "ARM_AM::getAM3Op" in { 186 class CheckAM3OpSub<int n> : CheckImmOperand_s<n, "ARM_AM::sub"> {} 187} 188 189// LDM, base reg in list 190def IsLDMBaseRegInListPred : MCSchedPredicate<IsLDMBaseRegInList>; 191 192class IsRegPCPred<int n> : MCSchedPredicate<CheckRegOperand<n, PC>>; 193 194class BranchWriteRes<int lat, int uops, list<ProcResourceKind> resl, 195 list<int> rcl, SchedWriteRes wr> : 196 SchedWriteRes<!listconcat(wr.ProcResources, resl)> { 197 let Latency = !add(wr.Latency, lat); 198 let ReleaseAtCycles = !listconcat(wr.ReleaseAtCycles, rcl); 199 let NumMicroOps = !add(wr.NumMicroOps, uops); 200 SchedWriteRes BaseWr = wr; 201} 202 203class CheckBranchForm<int n, BranchWriteRes br> : 204 SchedWriteVariant<[ 205 SchedVar<IsRegPCPred<n>, [br]>, 206 SchedVar<NoSchedPred, [br.BaseWr]> 207 ]>; 208 209//===----------------------------------------------------------------------===// 210// Instruction Itinerary classes used for ARM 211// 212def IIC_iALUx : InstrItinClass; 213def IIC_iALUi : InstrItinClass; 214def IIC_iALUr : InstrItinClass; 215def IIC_iALUsi : InstrItinClass; 216def IIC_iALUsir : InstrItinClass; 217def IIC_iALUsr : InstrItinClass; 218def IIC_iBITi : InstrItinClass; 219def IIC_iBITr : InstrItinClass; 220def IIC_iBITsi : InstrItinClass; 221def IIC_iBITsr : InstrItinClass; 222def IIC_iUNAr : InstrItinClass; 223def IIC_iUNAsi : InstrItinClass; 224def IIC_iEXTr : InstrItinClass; 225def IIC_iEXTAr : InstrItinClass; 226def IIC_iEXTAsr : InstrItinClass; 227def IIC_iCMPi : InstrItinClass; 228def IIC_iCMPr : InstrItinClass; 229def IIC_iCMPsi : InstrItinClass; 230def IIC_iCMPsr : InstrItinClass; 231def IIC_iTSTi : InstrItinClass; 232def IIC_iTSTr : InstrItinClass; 233def IIC_iTSTsi : InstrItinClass; 234def IIC_iTSTsr : InstrItinClass; 235def IIC_iMOVi : InstrItinClass; 236def IIC_iMOVr : InstrItinClass; 237def IIC_iMOVsi : InstrItinClass; 238def IIC_iMOVsr : InstrItinClass; 239def IIC_iMOVix2 : InstrItinClass; 240def IIC_iMOVix2addpc : InstrItinClass; 241def IIC_iMOVix2ld : InstrItinClass; 242def IIC_iMVNi : InstrItinClass; 243def IIC_iMVNr : InstrItinClass; 244def IIC_iMVNsi : InstrItinClass; 245def IIC_iMVNsr : InstrItinClass; 246def IIC_iCMOVi : InstrItinClass; 247def IIC_iCMOVr : InstrItinClass; 248def IIC_iCMOVsi : InstrItinClass; 249def IIC_iCMOVsr : InstrItinClass; 250def IIC_iCMOVix2 : InstrItinClass; 251def IIC_iMUL16 : InstrItinClass; 252def IIC_iMAC16 : InstrItinClass; 253def IIC_iMUL32 : InstrItinClass; 254def IIC_iMAC32 : InstrItinClass; 255def IIC_iMUL64 : InstrItinClass; 256def IIC_iMAC64 : InstrItinClass; 257def IIC_iDIV : InstrItinClass; 258def IIC_iLoad_i : InstrItinClass; 259def IIC_iLoad_r : InstrItinClass; 260def IIC_iLoad_si : InstrItinClass; 261def IIC_iLoad_iu : InstrItinClass; 262def IIC_iLoad_ru : InstrItinClass; 263def IIC_iLoad_siu : InstrItinClass; 264def IIC_iLoad_bh_i : InstrItinClass; 265def IIC_iLoad_bh_r : InstrItinClass; 266def IIC_iLoad_bh_si : InstrItinClass; 267def IIC_iLoad_bh_iu : InstrItinClass; 268def IIC_iLoad_bh_ru : InstrItinClass; 269def IIC_iLoad_bh_siu : InstrItinClass; 270def IIC_iLoad_d_i : InstrItinClass; 271def IIC_iLoad_d_r : InstrItinClass; 272def IIC_iLoad_d_ru : InstrItinClass; 273def IIC_iLoad_m : InstrItinClass; 274def IIC_iLoad_mu : InstrItinClass; 275def IIC_iLoad_mBr : InstrItinClass; 276def IIC_iPop : InstrItinClass; 277def IIC_iPop_Br : InstrItinClass; 278def IIC_iLoadiALU : InstrItinClass; 279def IIC_iStore_i : InstrItinClass; 280def IIC_iStore_r : InstrItinClass; 281def IIC_iStore_si : InstrItinClass; 282def IIC_iStore_iu : InstrItinClass; 283def IIC_iStore_ru : InstrItinClass; 284def IIC_iStore_siu : InstrItinClass; 285def IIC_iStore_bh_i : InstrItinClass; 286def IIC_iStore_bh_r : InstrItinClass; 287def IIC_iStore_bh_si : InstrItinClass; 288def IIC_iStore_bh_iu : InstrItinClass; 289def IIC_iStore_bh_ru : InstrItinClass; 290def IIC_iStore_bh_siu : InstrItinClass; 291def IIC_iStore_d_i : InstrItinClass; 292def IIC_iStore_d_r : InstrItinClass; 293def IIC_iStore_d_ru : InstrItinClass; 294def IIC_iStore_m : InstrItinClass; 295def IIC_iStore_mu : InstrItinClass; 296def IIC_Preload : InstrItinClass; 297def IIC_Br : InstrItinClass; 298def IIC_fpSTAT : InstrItinClass; 299def IIC_fpUNA16 : InstrItinClass; 300def IIC_fpUNA32 : InstrItinClass; 301def IIC_fpUNA64 : InstrItinClass; 302def IIC_fpCMP16 : InstrItinClass; 303def IIC_fpCMP32 : InstrItinClass; 304def IIC_fpCMP64 : InstrItinClass; 305def IIC_fpCVTSD : InstrItinClass; 306def IIC_fpCVTDS : InstrItinClass; 307def IIC_fpCVTSH : InstrItinClass; 308def IIC_fpCVTHS : InstrItinClass; 309def IIC_fpCVTIH : InstrItinClass; 310def IIC_fpCVTIS : InstrItinClass; 311def IIC_fpCVTID : InstrItinClass; 312def IIC_fpCVTHI : InstrItinClass; 313def IIC_fpCVTSI : InstrItinClass; 314def IIC_fpCVTDI : InstrItinClass; 315def IIC_fpMOVIS : InstrItinClass; 316def IIC_fpMOVID : InstrItinClass; 317def IIC_fpMOVSI : InstrItinClass; 318def IIC_fpMOVDI : InstrItinClass; 319def IIC_fpALU16 : InstrItinClass; 320def IIC_fpALU32 : InstrItinClass; 321def IIC_fpALU64 : InstrItinClass; 322def IIC_fpMUL16 : InstrItinClass; 323def IIC_fpMUL32 : InstrItinClass; 324def IIC_fpMUL64 : InstrItinClass; 325def IIC_fpMAC16 : InstrItinClass; 326def IIC_fpMAC32 : InstrItinClass; 327def IIC_fpMAC64 : InstrItinClass; 328def IIC_fpFMAC16 : InstrItinClass; 329def IIC_fpFMAC32 : InstrItinClass; 330def IIC_fpFMAC64 : InstrItinClass; 331def IIC_fpDIV16 : InstrItinClass; 332def IIC_fpDIV32 : InstrItinClass; 333def IIC_fpDIV64 : InstrItinClass; 334def IIC_fpSQRT16 : InstrItinClass; 335def IIC_fpSQRT32 : InstrItinClass; 336def IIC_fpSQRT64 : InstrItinClass; 337def IIC_fpLoad16 : InstrItinClass; 338def IIC_fpLoad32 : InstrItinClass; 339def IIC_fpLoad64 : InstrItinClass; 340def IIC_fpLoad_m : InstrItinClass; 341def IIC_fpLoad_mu : InstrItinClass; 342def IIC_fpStore16 : InstrItinClass; 343def IIC_fpStore32 : InstrItinClass; 344def IIC_fpStore64 : InstrItinClass; 345def IIC_fpStore_m : InstrItinClass; 346def IIC_fpStore_mu : InstrItinClass; 347def IIC_VLD1 : InstrItinClass; 348def IIC_VLD1x2 : InstrItinClass; 349def IIC_VLD1x3 : InstrItinClass; 350def IIC_VLD1x4 : InstrItinClass; 351def IIC_VLD1u : InstrItinClass; 352def IIC_VLD1x2u : InstrItinClass; 353def IIC_VLD1x3u : InstrItinClass; 354def IIC_VLD1x4u : InstrItinClass; 355def IIC_VLD1ln : InstrItinClass; 356def IIC_VLD1lnu : InstrItinClass; 357def IIC_VLD1dup : InstrItinClass; 358def IIC_VLD1dupu : InstrItinClass; 359def IIC_VLD2 : InstrItinClass; 360def IIC_VLD2x2 : InstrItinClass; 361def IIC_VLD2u : InstrItinClass; 362def IIC_VLD2x2u : InstrItinClass; 363def IIC_VLD2ln : InstrItinClass; 364def IIC_VLD2lnu : InstrItinClass; 365def IIC_VLD2dup : InstrItinClass; 366def IIC_VLD2dupu : InstrItinClass; 367def IIC_VLD3 : InstrItinClass; 368def IIC_VLD3ln : InstrItinClass; 369def IIC_VLD3u : InstrItinClass; 370def IIC_VLD3lnu : InstrItinClass; 371def IIC_VLD3dup : InstrItinClass; 372def IIC_VLD3dupu : InstrItinClass; 373def IIC_VLD4 : InstrItinClass; 374def IIC_VLD4ln : InstrItinClass; 375def IIC_VLD4u : InstrItinClass; 376def IIC_VLD4lnu : InstrItinClass; 377def IIC_VLD4dup : InstrItinClass; 378def IIC_VLD4dupu : InstrItinClass; 379def IIC_VST1 : InstrItinClass; 380def IIC_VST1x2 : InstrItinClass; 381def IIC_VST1x3 : InstrItinClass; 382def IIC_VST1x4 : InstrItinClass; 383def IIC_VST1u : InstrItinClass; 384def IIC_VST1x2u : InstrItinClass; 385def IIC_VST1x3u : InstrItinClass; 386def IIC_VST1x4u : InstrItinClass; 387def IIC_VST1ln : InstrItinClass; 388def IIC_VST1lnu : InstrItinClass; 389def IIC_VST2 : InstrItinClass; 390def IIC_VST2x2 : InstrItinClass; 391def IIC_VST2u : InstrItinClass; 392def IIC_VST2x2u : InstrItinClass; 393def IIC_VST2ln : InstrItinClass; 394def IIC_VST2lnu : InstrItinClass; 395def IIC_VST3 : InstrItinClass; 396def IIC_VST3u : InstrItinClass; 397def IIC_VST3ln : InstrItinClass; 398def IIC_VST3lnu : InstrItinClass; 399def IIC_VST4 : InstrItinClass; 400def IIC_VST4u : InstrItinClass; 401def IIC_VST4ln : InstrItinClass; 402def IIC_VST4lnu : InstrItinClass; 403def IIC_VUNAD : InstrItinClass; 404def IIC_VUNAQ : InstrItinClass; 405def IIC_VBIND : InstrItinClass; 406def IIC_VBINQ : InstrItinClass; 407def IIC_VPBIND : InstrItinClass; 408def IIC_VFMULD : InstrItinClass; 409def IIC_VFMULQ : InstrItinClass; 410def IIC_VMOV : InstrItinClass; 411def IIC_VMOVImm : InstrItinClass; 412def IIC_VMOVD : InstrItinClass; 413def IIC_VMOVQ : InstrItinClass; 414def IIC_VMOVIS : InstrItinClass; 415def IIC_VMOVID : InstrItinClass; 416def IIC_VMOVISL : InstrItinClass; 417def IIC_VMOVSI : InstrItinClass; 418def IIC_VMOVDI : InstrItinClass; 419def IIC_VMOVN : InstrItinClass; 420def IIC_VPERMD : InstrItinClass; 421def IIC_VPERMQ : InstrItinClass; 422def IIC_VPERMQ3 : InstrItinClass; 423def IIC_VMACD : InstrItinClass; 424def IIC_VMACQ : InstrItinClass; 425def IIC_VFMACD : InstrItinClass; 426def IIC_VFMACQ : InstrItinClass; 427def IIC_VRECSD : InstrItinClass; 428def IIC_VRECSQ : InstrItinClass; 429def IIC_VCNTiD : InstrItinClass; 430def IIC_VCNTiQ : InstrItinClass; 431def IIC_VUNAiD : InstrItinClass; 432def IIC_VUNAiQ : InstrItinClass; 433def IIC_VQUNAiD : InstrItinClass; 434def IIC_VQUNAiQ : InstrItinClass; 435def IIC_VBINiD : InstrItinClass; 436def IIC_VBINiQ : InstrItinClass; 437def IIC_VSUBiD : InstrItinClass; 438def IIC_VSUBiQ : InstrItinClass; 439def IIC_VBINi4D : InstrItinClass; 440def IIC_VBINi4Q : InstrItinClass; 441def IIC_VSUBi4D : InstrItinClass; 442def IIC_VSUBi4Q : InstrItinClass; 443def IIC_VABAD : InstrItinClass; 444def IIC_VABAQ : InstrItinClass; 445def IIC_VSHLiD : InstrItinClass; 446def IIC_VSHLiQ : InstrItinClass; 447def IIC_VSHLi4D : InstrItinClass; 448def IIC_VSHLi4Q : InstrItinClass; 449def IIC_VPALiD : InstrItinClass; 450def IIC_VPALiQ : InstrItinClass; 451def IIC_VMULi16D : InstrItinClass; 452def IIC_VMULi32D : InstrItinClass; 453def IIC_VMULi16Q : InstrItinClass; 454def IIC_VMULi32Q : InstrItinClass; 455def IIC_VMACi16D : InstrItinClass; 456def IIC_VMACi32D : InstrItinClass; 457def IIC_VMACi16Q : InstrItinClass; 458def IIC_VMACi32Q : InstrItinClass; 459def IIC_VEXTD : InstrItinClass; 460def IIC_VEXTQ : InstrItinClass; 461def IIC_VTB1 : InstrItinClass; 462def IIC_VTB2 : InstrItinClass; 463def IIC_VTB3 : InstrItinClass; 464def IIC_VTB4 : InstrItinClass; 465def IIC_VTBX1 : InstrItinClass; 466def IIC_VTBX2 : InstrItinClass; 467def IIC_VTBX3 : InstrItinClass; 468def IIC_VTBX4 : InstrItinClass; 469def IIC_VDOTPROD : InstrItinClass; 470