1//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for ARM Cortex-A57 to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// *** Common description and scheduling model parameters taken from AArch64 *** 16// The Cortex-A57 is a traditional superscalar microprocessor with a 17// conservative 3-wide in-order stage for decode and dispatch. Combined with the 18// much wider out-of-order issue stage, this produced a need to carefully 19// schedule micro-ops so that all three decoded each cycle are successfully 20// issued as the reservation station(s) simply don't stay occupied for long. 21// Therefore, IssueWidth is set to the narrower of the two at three, while still 22// modeling the machine as out-of-order. 23 24def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>; 25def IsCPSRDefinedAndPredicatedPred : 26 SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>; 27 28// Cortex A57 rev. r1p0 or later (false = r0px) 29def IsR1P0AndLaterPred : SchedPredicate<[{false}]>; 30 31// If Addrmode3 contains register offset (not immediate) 32def IsLdrAm3RegOffPred : 33 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>; 34// The same predicate with operand offset 2 and 3: 35def IsLdrAm3RegOffPredX2 : 36 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>; 37def IsLdrAm3RegOffPredX3 : 38 SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>; 39 40// If Addrmode3 contains "minus register" 41def IsLdrAm3NegRegOffPred : 42 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>; 43// The same predicate with operand offset 2 and 3: 44def IsLdrAm3NegRegOffPredX2 : 45 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>; 46def IsLdrAm3NegRegOffPredX3 : 47 SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>; 48 49// Load, scaled register offset, not plus LSL2 50def IsLdstsoScaledNotOptimalPredX0 : 51 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>; 52def IsLdstsoScaledNotOptimalPred : 53 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>; 54def IsLdstsoScaledNotOptimalPredX2 : 55 SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>; 56 57// Load, scaled register offset 58def IsLdstsoScaledPred : 59 SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>; 60def IsLdstsoScaledPredX2 : 61 SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>; 62 63def IsLdstsoMinusRegPredX0 : 64 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>; 65def IsLdstsoMinusRegPred : 66 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>; 67def IsLdstsoMinusRegPredX2 : 68 SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>; 69 70// Load, scaled register offset 71def IsLdrAm2ScaledPred : 72 SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>; 73 74// LDM, base reg in list 75def IsLdmBaseRegInList : 76 SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>; 77 78class A57WriteLMOpsListType<list<SchedWriteRes> writes> { 79 list <SchedWriteRes> Writes = writes; 80 SchedMachineModel SchedModel = ?; 81} 82 83// *** Common description and scheduling model parameters taken from AArch64 *** 84// (AArch64SchedA57.td) 85def CortexA57Model : SchedMachineModel { 86 let IssueWidth = 3; // 3-way decode and dispatch 87 let MicroOpBufferSize = 128; // 128 micro-op re-order buffer 88 let LoadLatency = 4; // Optimistic load latency 89 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch 90 91 // Enable partial & runtime unrolling. 92 let LoopMicroOpBufferSize = 16; 93 let CompleteModel = 1; 94 95 // FIXME: Remove when all errors have been fixed. 96 let FullInstRWOverlapCheck = 0; 97 98 let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat, 99 HasFPRegsV8_1M, HasFP16FML, HasMatMulInt8, HasBF16]; 100} 101 102//===----------------------------------------------------------------------===// 103// Define each kind of processor resource and number available on Cortex-A57. 104// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where 105// micro-ops wait for their operands and then issue out-of-order. 106 107def A57UnitB : ProcResource<1>; // Type B micro-ops 108def A57UnitI : ProcResource<2>; // Type I micro-ops 109def A57UnitM : ProcResource<1>; // Type M micro-ops 110def A57UnitL : ProcResource<1>; // Type L micro-ops 111def A57UnitS : ProcResource<1>; // Type S micro-ops 112 113def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) 114def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) 115 116let SchedModel = CortexA57Model in { 117 def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops 118} 119 120let SchedModel = CortexA57Model in { 121 122//===----------------------------------------------------------------------===// 123// Define customized scheduler read/write types specific to the Cortex-A57. 124 125include "ARMScheduleA57WriteRes.td" 126 127// To have "CompleteModel = 1", support of pseudos and special instructions 128def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", 129 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", 130 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", 131 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", 132 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", 133 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG", 134 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier", 135 "t__brkdiv0")>; 136 137def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; 138 139// Specific memory instrs 140def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", 141 "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; 142 143// coprocessor moves 144def : InstRW<[WriteNoop, WriteNoop], (instregex 145 "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", 146 "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", 147 "(t2)?MSR(banked|i|_AR|_M)?$")>; 148 149// Deprecated instructions 150def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; 151 152// Pseudos 153def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", 154 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", 155 "tLDRpci_pic", "(t2)?SUBS_PC_LR", 156 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", 157 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 158 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 159 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 160 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 161 "WIN__CHKSTK", "WIN__DBZCHK")>; 162 163// Miscellaneous 164// ----------------------------------------------------------------------------- 165 166def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; 167 168// --- 3.2 Branch Instructions --- 169// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ 170 171def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", 172 "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; 173def : InstRW<[A57Write_1cyc_1B_1I], 174 (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; 175def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; 176// Pseudos 177def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; 178def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", 179 "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; 180def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; 181 182// --- 3.3 Arithmetic and Logical Instructions --- 183// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, 184// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST 185 186def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; 187 188// shift by register, conditional or unconditional 189// TODO: according to the doc, conditional uses I0/I1, unconditional uses M 190// Why more complex instruction uses more simple pipeline? 191// May be an error in doc. 192def A57WriteALUsi : SchedWriteVariant<[ 193 // lsl #2, lsl #1, or lsr #1. 194 SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>, 195 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 196]>; 197def A57WriteALUsr : SchedWriteVariant<[ 198 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 199 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 200]>; 201def A57WriteALUSsr : SchedWriteVariant<[ 202 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 203 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 204]>; 205def A57ReadALUsr : SchedReadVariant<[ 206 SchedVar<IsPredicatedPred, [ReadDefault]>, 207 SchedVar<NoSchedPred, [ReadDefault]> 208]>; 209def : SchedAlias<WriteALUsi, A57WriteALUsi>; 210def : SchedAlias<WriteALUsr, A57WriteALUsr>; 211def : SchedAlias<WriteALUSsr, A57WriteALUSsr>; 212def : SchedAlias<ReadALUsr, A57ReadALUsr>; 213 214def A57WriteCMPsr : SchedWriteVariant<[ 215 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 216 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 217]>; 218def : SchedAlias<WriteCMP, A57Write_1cyc_1I>; 219def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>; 220def : SchedAlias<WriteCMPsr, A57WriteCMPsr>; 221 222// --- 3.4 Move and Shift Instructions --- 223// Move, basic 224// MOV{S}, MOVW, MVN{S} 225def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", 226 "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", 227 "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; 228 229// Move, shift by immed, setflags/no setflags 230// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN 231// setflags = isCPSRDefined 232def A57WriteMOVsi : SchedWriteVariant<[ 233 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, 234 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 235]>; 236def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", 237 "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", 238 "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; 239 240// shift by register, conditional or unconditional, setflags/no setflags 241def A57WriteMOVsr : SchedWriteVariant<[ 242 SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>, 243 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, 244 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 245 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 246]>; 247def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", 248 "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", 249 "(t2|t)RORrr")>; 250 251// Move, top 252// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later 253def A57WriteMOVT : SchedWriteVariant<[ 254 SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>, 255 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 256]>; 257def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; 258 259def A57WriteI2pc : 260 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; 261def A57WriteI2ld : 262 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; 263def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; 264def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 265 266// +2cyc for branch forms 267def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; 268 269// --- 3.5 Divide and Multiply Instructions --- 270// Divide: SDIV, UDIV 271// latency from documentration: 4 ‐ 20, maximum taken 272def : SchedAlias<WriteDIV, A57Write_20cyc_1M>; 273// Multiply: tMul not bound to common WriteRes types 274def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; 275def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>; 276def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>; 277def : ReadAdvance<ReadMUL, 0>; 278 279// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, 280// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} 281// Multiply-accumulate pipelines support late-forwarding of accumulate operands 282// from similar μops, allowing a typical sequence of multiply-accumulate μops 283// to issue one every 1 cycle (sched advance = 2). 284def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } 285def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; } 286def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; 287 288def : InstRW<[A57WriteMLA], 289 (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>; 290 291def : SchedAlias<WriteMAC16, A57WriteMLA>; 292def : SchedAlias<WriteMAC32, A57WriteMLA>; 293def : SchedAlias<ReadMAC, A57ReadMLA>; 294 295def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>; 296def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>; 297 298// Multiply long: SMULL, UMULL 299def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>; 300def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>; 301 302// --- 3.6 Saturating and Parallel Arithmetic Instructions --- 303// Parallel arith 304// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 305// Conditional GE-setting instructions require three extra μops 306// and two additional cycles to conditionally update the GE field. 307def A57WriteParArith : SchedWriteVariant<[ 308 SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>, 309 SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]> 310]>; 311def : InstRW< [A57WriteParArith], (instregex 312 "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", 313 "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; 314 315// Parallel arith with exchange: SASX, SSAX, UASX, USAX 316def A57WriteParArithExch : SchedWriteVariant<[ 317 SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>, 318 SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]> 319]>; 320def : InstRW<[A57WriteParArithExch], 321 (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; 322 323// Parallel halving arith 324// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 325def : InstRW<[A57Write_2cyc_1M], (instregex 326 "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", 327 "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; 328 329// Parallel halving arith with exchange 330// SHASX, SHSAX, UHASX, UHSAX 331def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", 332 "(t2)?UHASX", "(t2)?UHSAX")>; 333 334// Parallel saturating arith 335// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 336def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", 337 "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; 338 339// Parallel saturating arith with exchange 340// QASX, QSAX, UQASX, UQSAX 341def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", 342 "(t2)?UQASX", "(t2)?UQSAX")>; 343 344// Saturate: SSAT, SSAT16, USAT, USAT16 345def : InstRW<[A57Write_2cyc_1M], 346 (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; 347 348// Saturating arith: QADD, QSUB 349def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; 350 351// Saturating doubling arith: QDADD, QDSUB 352def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; 353 354// --- 3.7 Miscellaneous Data-Processing Instructions --- 355// Bit field extract: SBFX, UBFX 356def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; 357 358// Bit field insert/clear: BFI, BFC 359def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; 360 361// Select bytes, conditional/unconditional 362def A57WriteSEL : SchedWriteVariant<[ 363 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 364 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 365]>; 366def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; 367 368// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH 369def : InstRW<[A57Write_1cyc_1I], 370 (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; 371 372// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH 373def : InstRW<[A57Write_2cyc_1M], 374 (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; 375 376// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 377def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; 378 379// Sum of absolute differences: USAD8, USADA8 380def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; 381 382// --- 3.8 Load Instructions --- 383 384// Load, immed offset 385// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate 386def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", 387 "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", 388 "PICLDR", "tLDR")>; 389 390def : InstRW<[A57Write_4cyc_1L], 391 (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; 392 393// For "Load, register offset, minus" we need +1cyc, +1I 394def A57WriteLdrAm3 : SchedWriteVariant<[ 395 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>, 396 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 397]>; 398def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; 399def A57WriteLdrAm3X2 : SchedWriteVariant<[ 400 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>, 401 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 402]>; 403def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; 404def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; 405 406def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ 407 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>, 408 SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>, 409 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 410]>; 411def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; 412 413def A57WrBackOne : SchedWriteRes<[]> { 414 let Latency = 1; 415 let NumMicroOps = 0; 416} 417def A57WrBackTwo : SchedWriteRes<[]> { 418 let Latency = 2; 419 let NumMicroOps = 0; 420} 421def A57WrBackThree : SchedWriteRes<[]> { 422 let Latency = 3; 423 let NumMicroOps = 0; 424} 425 426// --- LDR pre-indexed --- 427// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) 428def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", 429 "LDRB_PRE_IMM", "t2LDRB_PRE")>; 430 431// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) 432// (5 cyc load result for not-lsl2 scaled) 433def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ 434 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>, 435 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 436]>; 437def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], 438 (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; 439 440def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ 441 SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>, 442 SchedVar<NoSchedPred, [A57WrBackOne]> 443]>; 444def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], 445 (instregex "LDR(H|SH|SB)_PRE")>; 446def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], 447 (instregex "t2LDR(H|SH|SB)?_PRE")>; 448 449// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. 450def A57WriteLdrDAm3Pre : SchedWriteVariant<[ 451 SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>, 452 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 453]>; 454def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ 455 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, 456 SchedVar<NoSchedPred, [A57WrBackOne]> 457]>; 458def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], 459 (instregex "LDRD_PRE")>; 460def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], 461 (instregex "t2LDRD_PRE")>; 462 463// --- LDR post-indexed --- 464def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", 465 "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; 466 467def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ 468 SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>, 469 SchedVar<NoSchedPred, [A57WrBackOne]> 470]>; 471def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], 472 (instregex "LDR(H|SH|SB)_POST")>; 473def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], 474 (instregex "t2LDR(H|SH|SB)?_POST")>; 475 476def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", 477 "LDRB_POST_REG", "LDR(B?)T_POST$")>; 478 479def A57WriteLdrTRegPost : SchedWriteVariant<[ 480 SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>, 481 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 482]>; 483def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ 484 SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>, 485 SchedVar<NoSchedPred, [A57WrBackTwo]> 486]>; 487// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" 488def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], 489 (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; 490 491def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; 492 493def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ 494 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, 495 SchedVar<NoSchedPred, [A57WrBackOne]> 496]>; 497// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. 498def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 499 A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; 500def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], 501 (instregex "t2LDRD_POST")>; 502 503// --- Preload instructions --- 504// Preload, immed offset 505def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", 506 "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; 507 508// Preload, register offset, 509// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 510// otherwise 4cyc "L" 511def A57WritePLD : SchedWriteVariant<[ 512 SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>, 513 SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>, 514 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 515]>; 516def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; 517 518// --- Load multiple instructions --- 519foreach NumAddr = 1-8 in { 520 def A57LMAddrPred#NumAddr : 521 SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>; 522} 523 524def A57LDMOpsListNoregin : A57WriteLMOpsListType< 525 [A57Write_3cyc_1L, A57Write_3cyc_1L, 526 A57Write_4cyc_1L, A57Write_4cyc_1L, 527 A57Write_5cyc_1L, A57Write_5cyc_1L, 528 A57Write_6cyc_1L, A57Write_6cyc_1L, 529 A57Write_7cyc_1L, A57Write_7cyc_1L, 530 A57Write_8cyc_1L, A57Write_8cyc_1L, 531 A57Write_9cyc_1L, A57Write_9cyc_1L, 532 A57Write_10cyc_1L, A57Write_10cyc_1L]>; 533def A57WriteLDMnoreginlist : SchedWriteVariant<[ 534 SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>, 535 SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>, 536 SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>, 537 SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>, 538 SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>, 539 SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>, 540 SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>, 541 SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>, 542 SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]> 543]> { let Variadic=1; } 544 545def A57LDMOpsListRegin : A57WriteLMOpsListType< 546 [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 547 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 548 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 549 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 550 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 551 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 552 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, 553 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; 554def A57WriteLDMreginlist : SchedWriteVariant<[ 555 SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>, 556 SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>, 557 SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>, 558 SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>, 559 SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>, 560 SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>, 561 SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>, 562 SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>, 563 SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]> 564]> { let Variadic=1; } 565 566def A57LDMOpsList_Upd : A57WriteLMOpsListType< 567 [A57WrBackOne, 568 A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, 569 A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 570 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 571 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 572 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 573 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 574 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 575 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; 576def A57WriteLDM_Upd : SchedWriteVariant<[ 577 SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>, 578 SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>, 579 SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>, 580 SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>, 581 SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>, 582 SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>, 583 SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>, 584 SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>, 585 SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]> 586]> { let Variadic=1; } 587 588def A57WriteLDM : SchedWriteVariant<[ 589 SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>, 590 SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]> 591]> { let Variadic=1; } 592 593def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; 594 595// TODO: no writeback latency defined in documentation (implemented as 1 cyc) 596def : InstRW<[A57WriteLDM_Upd], 597 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; 598 599def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>; 600 601// --- 3.9 Store Instructions --- 602 603// Store, immed offset 604def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", 605 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; 606 607// Store, register offset 608// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", 609// otherwise 1cyc S. 610def A57WriteStrAmLDSTSO : SchedWriteVariant<[ 611 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>, 612 SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>, 613 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 614]>; 615def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; 616 617// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. 618def A57WriteStrAm3 : SchedWriteVariant<[ 619 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>, 620 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 621]>; 622def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; 623def A57WriteStrAm3X2 : SchedWriteVariant<[ 624 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, 625 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 626]>; 627def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; 628 629// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) 630def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", 631 "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", 632 "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; 633 634// Store, register pre-indexed: 635// 1(1) "S, I0/I1" for plus reg 636// 3(2) "I0/I1, S" for minus reg 637// 1(2) "S, M" for scaled plus lsl2 638// 3(2) "I0/I1, S" for other scaled 639def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ 640 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>, 641 SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>, 642 SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>, 643 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 644]>; 645def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ 646 SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>, 647 SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>, 648 SchedVar<NoSchedPred, [A57WrBackOne]> 649]>; 650def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], 651 (instregex "STR_PRE_REG", "STRB_PRE_REG")>; 652 653// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) 654// 1(1) "S, I0/I1" for imm or reg plus 655// 3(2) "I0/I1, S" for reg minus 656def A57WriteStrAm3PreX2 : SchedWriteVariant<[ 657 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, 658 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 659]>; 660def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ 661 SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>, 662 SchedVar<NoSchedPred, [A57WrBackOne]> 663]>; 664def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], 665 (instregex "STRH_PRE")>; 666 667def A57WriteStrAm3PreX3 : SchedWriteVariant<[ 668 SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>, 669 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 670]>; 671def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ 672 SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>, 673 SchedVar<NoSchedPred, [A57WrBackOne]> 674]>; 675def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], 676 (instregex "STRD_PRE")>; 677 678def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", 679 "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; 680 681// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) 682def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", 683 "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; 684 685// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr 686// 1(1) "S, I0/I1" both for reg or imm 687def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], 688 (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; 689 690// --- Store multiple instructions --- 691// TODO: no writeback latency defined in documentation 692def A57WriteSTM : SchedWriteVariant<[ 693 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, 694 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, 695 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, 696 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, 697 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, 698 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, 699 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, 700 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, 701 SchedVar<NoSchedPred, [A57Write_2cyc_1S]> 702]>; 703def A57WriteSTM_Upd : SchedWriteVariant<[ 704 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, 705 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, 706 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, 707 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, 708 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, 709 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, 710 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, 711 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, 712 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 713]>; 714 715def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; 716def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], 717 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; 718 719def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>; 720 721// --- 3.10 FP Data Processing Instructions --- 722def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; 723def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; 724 725def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; 726 727// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional 728def A57WriteVcmp : SchedWriteVariant<[ 729 SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>, 730 SchedVar<NoSchedPred, [A57Write_3cyc_1X]> 731]>; 732def : InstRW<[A57WriteVcmp], 733 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; 734 735// fp convert 736def : InstRW<[A57Write_5cyc_1V], (instregex 737 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; 738def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>; 739def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; 740 741def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>; 742 743// FP round to integral 744def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; 745 746// FP divide, FP square root 747def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; 748def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; 749def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; 750def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; 751 752def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>; 753 754// FP max/min 755def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; 756 757// FP multiply-accumulate pipelines support late forwarding of the result 758// from FP multiply μops to the accumulate operands of an 759// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle 760// after the FP multiply μop has been issued 761// FP multiply, FZ 762def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 763 764def : SchedAlias<WriteFPMUL32, A57WriteVMUL>; 765def : SchedAlias<WriteFPMUL64, A57WriteVMUL>; 766def : ReadAdvance<ReadFPMUL, 0>; 767 768// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate 769// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS 770def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } 771 772// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) 773// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) 774// Currently, there is no way to define different read advances for VFMA operand 775// from VFMA or from VMUL, so there will be 5 read advance. 776// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. 777// The same situation with ASIMD VMUL/VFMA instructions 778// def A57ReadVFMA : SchedRead; 779// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>; 780// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>; 781def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; 782 783def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; 784def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; 785def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; 786 787// VMLAH/VMLSH are not binded to scheduling classes by default, so here custom: 788def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL], 789 (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>; 790 791def : InstRW<[A57WriteVMUL], 792 (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>; 793 794def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; 795def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; 796 797// --- 3.11 FP Miscellaneous Instructions --- 798// VMOV: 3cyc "F0/F1" for imm/reg 799def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; 800def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; 801 802def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>; 803 804// 5cyc L for FP transfer, vfp to core reg, 805// 5cyc L for FP transfer, core reg to vfp 806def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; 807// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). 808def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; 809 810// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg 811def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; 812 813// --- 3.12 FP Load Instructions --- 814def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; 815 816def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; 817 818// FP load multiple (VLDM) 819 820def A57VLDMOpsListUncond : A57WriteLMOpsListType< 821 [A57Write_5cyc_1L, A57Write_5cyc_1L, 822 A57Write_6cyc_1L, A57Write_6cyc_1L, 823 A57Write_7cyc_1L, A57Write_7cyc_1L, 824 A57Write_8cyc_1L, A57Write_8cyc_1L, 825 A57Write_9cyc_1L, A57Write_9cyc_1L, 826 A57Write_10cyc_1L, A57Write_10cyc_1L, 827 A57Write_11cyc_1L, A57Write_11cyc_1L, 828 A57Write_12cyc_1L, A57Write_12cyc_1L]>; 829def A57WriteVLDMuncond : SchedWriteVariant<[ 830 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>, 831 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>, 832 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>, 833 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>, 834 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>, 835 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>, 836 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>, 837 SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>, 838 SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]> 839]> { let Variadic=1; } 840 841def A57VLDMOpsListCond : A57WriteLMOpsListType< 842 [A57Write_5cyc_1L, A57Write_6cyc_1L, 843 A57Write_7cyc_1L, A57Write_8cyc_1L, 844 A57Write_9cyc_1L, A57Write_10cyc_1L, 845 A57Write_11cyc_1L, A57Write_12cyc_1L, 846 A57Write_13cyc_1L, A57Write_14cyc_1L, 847 A57Write_15cyc_1L, A57Write_16cyc_1L, 848 A57Write_17cyc_1L, A57Write_18cyc_1L, 849 A57Write_19cyc_1L, A57Write_20cyc_1L]>; 850def A57WriteVLDMcond : SchedWriteVariant<[ 851 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>, 852 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>, 853 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>, 854 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>, 855 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>, 856 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>, 857 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>, 858 SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>, 859 SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]> 860]> { let Variadic=1; } 861 862def A57WriteVLDM : SchedWriteVariant<[ 863 SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>, 864 SchedVar<NoSchedPred, [A57WriteVLDMuncond]> 865]> { let Variadic=1; } 866 867def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; 868 869def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< 870 [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 871 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 872 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 873 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 874 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 875 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, 876 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, 877 A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; 878def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ 879 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>, 880 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>, 881 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>, 882 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>, 883 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>, 884 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>, 885 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>, 886 SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>, 887 SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]> 888]> { let Variadic=1; } 889 890def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< 891 [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, 892 A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, 893 A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, 894 A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, 895 A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, 896 A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, 897 A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, 898 A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; 899def A57WriteVLDMcond_UPD : SchedWriteVariant<[ 900 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>, 901 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>, 902 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>, 903 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>, 904 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>, 905 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>, 906 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>, 907 SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>, 908 SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]> 909]> { let Variadic=1; } 910 911def A57WriteVLDM_UPD : SchedWriteVariant<[ 912 SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>, 913 SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]> 914]> { let Variadic=1; } 915 916def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], 917 (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; 918 919// --- 3.13 FP Store Instructions --- 920def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; 921 922def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; 923 924def A57WriteVSTMs : SchedWriteVariant<[ 925 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, 926 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, 927 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, 928 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, 929 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, 930 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, 931 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, 932 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, 933 SchedVar<NoSchedPred, [A57Write_2cyc_1S]> 934]>; 935def A57WriteVSTMd : SchedWriteVariant<[ 936 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>, 937 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>, 938 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>, 939 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>, 940 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>, 941 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>, 942 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>, 943 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>, 944 SchedVar<NoSchedPred, [A57Write_4cyc_1S]> 945]>; 946def A57WriteVSTMs_Upd : SchedWriteVariant<[ 947 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, 948 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, 949 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, 950 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, 951 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, 952 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, 953 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, 954 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, 955 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 956]>; 957def A57WriteVSTMd_Upd : SchedWriteVariant<[ 958 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>, 959 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>, 960 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>, 961 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>, 962 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>, 963 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>, 964 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>, 965 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>, 966 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 967]>; 968 969def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; 970def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; 971def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], 972 (instregex "VSTM(SIA_UPD|SDB_UPD)")>; 973def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], 974 (instregex "VSTM(DIA_UPD|DDB_UPD)")>; 975 976// --- 3.14 ASIMD Integer Instructions --- 977 978// ASIMD absolute diff, 3cyc F0/F1 for integer VABD 979def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; 980 981// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form 982def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 983def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; 984def : InstRW<[A57WriteVABAD, A57ReadVABAD], 985 (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; 986def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } 987def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; 988def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], 989 (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; 990 991// ASIMD absolute diff accum long: 4(1) F1 for VABAL 992def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 993def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; 994def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; 995 996// ASIMD absolute diff long: 3cyc F0/F1 for VABDL 997def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; 998 999// ASIMD arith, basic 1000def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW", 1001 "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", 1002 "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>; 1003 1004// ASIMD arith, complex 1005def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", 1006 "VQABS", "VQADD", "VQNEG", "VQSUB", 1007 "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; 1008 1009// ASIMD compare 1010def : InstRW<[A57Write_3cyc_1V], 1011 (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; 1012 1013// ASIMD logical 1014def : InstRW<[A57Write_3cyc_1V], 1015 (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; 1016 1017// ASIMD max/min 1018def : InstRW<[A57Write_3cyc_1V], 1019 (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; 1020 1021// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1022// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply 1023// and multiply-with-accumulate instructions relative to r0pX. 1024def A57WriteVMULD_VecInt : SchedWriteVariant<[ 1025 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1026 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1027def : InstRW<[A57WriteVMULD_VecInt], (instregex 1028 "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", 1029 "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; 1030 1031// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later 1032def A57WriteVMULQ_VecInt : SchedWriteVariant<[ 1033 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, 1034 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; 1035def : InstRW<[A57WriteVMULQ_VecInt], (instregex 1036 "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", 1037 "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; 1038 1039// ASIMD multiply accumulate, D-form 1040// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence 1041// (4 or 3 ReadAdvance) 1042def A57WriteVMLAD_VecInt : SchedWriteVariant<[ 1043 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1044 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1045def A57ReadVMLAD_VecInt : SchedReadVariant<[ 1046 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>, 1047 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]> 1048]>; 1049def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], 1050 (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; 1051 1052// ASIMD multiply accumulate, Q-form 1053// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence 1054// (4 or 3 ReadAdvance) 1055def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ 1056 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, 1057 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; 1058def A57ReadVMLAQ_VecInt : SchedReadVariant<[ 1059 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>, 1060 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]> 1061]>; 1062def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], 1063 (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; 1064 1065// ASIMD multiply accumulate long 1066// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence 1067// (4 or 3 ReadAdvance) 1068def A57WriteVMLAL_VecInt : SchedWriteVariant<[ 1069 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1070 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1071def A57ReadVMLAL_VecInt : SchedReadVariant<[ 1072 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>, 1073 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]> 1074]>; 1075def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], 1076 (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; 1077 1078// ASIMD multiply accumulate saturating long 1079// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence 1080// (3 or 2 ReadAdvance) 1081def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ 1082 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1083 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1084def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ 1085 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>, 1086 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> 1087]>; 1088def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1089 (instregex "VQDMLAL", "VQDMLSL")>; 1090 1091// Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long 1092// Scheduling info from VQDMLAL/VQDMLSL 1093def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1094 (instregex "VQRDMLAH", "VQRDMLSH")>; 1095 1096// ASIMD multiply long 1097// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1098def A57WriteVMULL_VecInt : SchedWriteVariant<[ 1099 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1100 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1101def : InstRW<[A57WriteVMULL_VecInt], 1102 (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; 1103 1104// ASIMD pairwise add and accumulate 1105// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) 1106def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 1107def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; 1108def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; 1109 1110// ASIMD shift accumulate 1111// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) 1112def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 1113def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; 1114def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; 1115 1116// ASIMD shift by immed, basic 1117def : InstRW<[A57Write_3cyc_1X], 1118 (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; 1119 1120// ASIMD shift by immed, complex 1121def : InstRW<[A57Write_4cyc_1X], (instregex 1122 "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", 1123 "VRSHRN")>; 1124 1125// ASIMD shift by immed and insert, basic, D-form 1126def : InstRW<[A57Write_4cyc_1X], (instregex 1127 "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; 1128 1129// ASIMD shift by immed and insert, basic, Q-form 1130def : InstRW<[A57Write_5cyc_1X], (instregex 1131 "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; 1132 1133// ASIMD shift by register, basic, D-form 1134def : InstRW<[A57Write_3cyc_1X], (instregex 1135 "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; 1136 1137// ASIMD shift by register, basic, Q-form 1138def : InstRW<[A57Write_4cyc_1X], (instregex 1139 "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; 1140 1141// ASIMD shift by register, complex, D-form 1142// VQRSHL, VQSHL, VRSHL 1143def : InstRW<[A57Write_4cyc_1X], (instregex 1144 "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", 1145 "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; 1146 1147// ASIMD shift by register, complex, Q-form 1148def : InstRW<[A57Write_5cyc_1X], (instregex 1149 "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", 1150 "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; 1151 1152// --- 3.15 ASIMD Floating-Point Instructions --- 1153// ASIMD FP absolute value 1154def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; 1155 1156// ASIMD FP arith 1157def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", 1158 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; 1159 1160def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>; 1161 1162// ASIMD FP compare 1163def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", 1164 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; 1165 1166// ASIMD FP convert, integer 1167def : InstRW<[A57Write_5cyc_1V], (instregex 1168 "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", 1169 "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", 1170 "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; 1171 1172// ASIMD FP convert, half-precision: 8cyc F0/F1 1173def : InstRW<[A57Write_8cyc_1V], (instregex 1174 "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", 1175 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", 1176 "VCVT(f2h|h2f)")>; 1177 1178// ASIMD FP max/min 1179def : InstRW<[A57Write_5cyc_1V], (instregex 1180 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM", 1181 "(NEON|VFP)_VMINNM")>; 1182 1183// ASIMD FP multiply 1184def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 1185def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; 1186 1187// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence 1188def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } 1189def A57ReadVMLA_VecFP : 1190 SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; 1191def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], 1192 (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; 1193 1194// ASIMD FP negate 1195def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; 1196 1197// ASIMD FP round to integral 1198def : InstRW<[A57Write_5cyc_1V], (instregex 1199 "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; 1200 1201// --- 3.16 ASIMD Miscellaneous Instructions --- 1202 1203// ASIMD bitwise insert 1204def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>; 1205 1206// ASIMD count 1207def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; 1208 1209// ASIMD duplicate, core reg: 8cyc "L, F0/F1" 1210def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; 1211 1212// ASIMD duplicate, scalar: 3cyc "F0/F1" 1213def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; 1214 1215// ASIMD extract 1216def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; 1217 1218// ASIMD move, immed 1219def : InstRW<[A57Write_3cyc_1V], (instregex 1220 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", 1221 "VMOVD0", "VMOVQ0")>; 1222 1223// ASIMD move, narrowing 1224def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; 1225 1226// ASIMD move, saturating 1227def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; 1228 1229// ASIMD reciprocal estimate 1230def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; 1231 1232// ASIMD reciprocal step, FZ 1233def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; 1234 1235// ASIMD reverse, swap, table lookup (1-2 reg) 1236def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; 1237 1238// ASIMD table lookup (3-4 reg) 1239def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; 1240 1241// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" 1242def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; 1243 1244// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" 1245def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; 1246 1247// ASIMD transpose 1248def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; 1249 1250// ASIMD unzip/zip, D-form 1251def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], 1252 (instregex "VUZPd", "VZIPd")>; 1253 1254// ASIMD unzip/zip, Q-form 1255def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], 1256 (instregex "VUZPq", "VZIPq")>; 1257 1258// --- 3.17 ASIMD Load Instructions --- 1259 1260// Overriden via InstRW for this processor. 1261def : WriteRes<WriteVLD1, []>; 1262def : WriteRes<WriteVLD2, []>; 1263def : WriteRes<WriteVLD3, []>; 1264def : WriteRes<WriteVLD4, []>; 1265def : WriteRes<WriteVST1, []>; 1266def : WriteRes<WriteVST2, []>; 1267def : WriteRes<WriteVST3, []>; 1268def : WriteRes<WriteVST4, []>; 1269 1270// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency 1271def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; 1272def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], 1273 (instregex "VLD1(d|q)(8|16|32|64)wb")>; 1274 1275// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency 1276def : InstRW<[A57Write_6cyc_1L], 1277 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; 1278 1279def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], 1280 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; 1281 1282// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" 1283def : InstRW<[A57Write_8cyc_1L_1V], (instregex 1284 "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; 1285def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex 1286 "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; 1287 1288// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" 1289def : InstRW<[A57Write_8cyc_1L_1V], 1290 (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; 1291def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1292 (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; 1293 1294// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" 1295def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; 1296def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1297 (instregex "VLD2b(8|16|32)wb")>; 1298 1299// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" 1300def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1301 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", 1302 "VLD2LN(d|q)(8|16|32)Pseudo$")>; 1303// 2 results + wb result 1304def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], 1305 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; 1306// 1 result + wb result 1307def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1308 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", 1309 "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; 1310 1311// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" 1312// 3 results 1313def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], 1314 (instregex "VLD3(d|q)(8|16|32)$")>; 1315// 1 result 1316def : InstRW<[A57Write_9cyc_1L_1V], 1317 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; 1318// 3 results + wb 1319def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1320 A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1321 (instregex "VLD3(d|q)(8|16|32)_UPD$")>; 1322// 1 result + wb 1323def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1324 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1325 1326// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" 1327def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1328 (instregex "VLD3LN(d|q)32$", 1329 "VLD3LN(d|q)32Pseudo$")>; 1330def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1331 A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1332 (instregex "VLD3LN(d|q)32_UPD")>; 1333def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1334 (instregex "VLD3LN(d|q)32Pseudo_UPD")>; 1335 1336// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" 1337def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], 1338 (instregex "VLD3LN(d|q)(8|16)$", 1339 "VLD3LN(d|q)(8|16)Pseudo$")>; 1340def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1341 A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1342 (instregex "VLD3LN(d|q)(8|16)_UPD")>; 1343def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1344 (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; 1345 1346// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" 1347def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1348 (instregex "VLD3DUP(d|q)(8|16|32)$", 1349 "VLD3DUP(d|q)(8|16|32)Pseudo$")>; 1350def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1351 A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1352 (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; 1353def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1354 (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; 1355 1356// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" 1357def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, 1358 A57Write_9cyc_1L_1V], 1359 (instregex "VLD4(d|q)(8|16|32)$")>; 1360def : InstRW<[A57Write_9cyc_1L_1V], 1361 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; 1362def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1363 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1364 (instregex "VLD4(d|q)(8|16|32)_UPD")>; 1365def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1366 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1367 1368// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" 1369def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, 1370 A57Write_8cyc_1L_1V], 1371 (instregex "VLD4LN(d|q)32$", 1372 "VLD4LN(d|q)32Pseudo$")>; 1373def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1374 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1375 A57WrBackOne], 1376 (instregex "VLD4LN(d|q)32_UPD")>; 1377def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1378 (instregex "VLD4LN(d|q)32Pseudo_UPD")>; 1379 1380// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" 1381def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, 1382 A57Write_9cyc_1L_1V], 1383 (instregex "VLD4LN(d|q)(8|16)$", 1384 "VLD4LN(d|q)(8|16)Pseudo$")>; 1385def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1386 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1387 A57WrBackOne], 1388 (instregex "VLD4LN(d|q)(8|16)_UPD")>; 1389def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1390 (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; 1391 1392// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" 1393def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, 1394 A57Write_8cyc_1L_1V], 1395 (instregex "VLD4DUP(d|q)(8|16|32)$", 1396 "VLD4DUP(d|q)(8|16|32)Pseudo$")>; 1397def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1398 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1399 A57WrBackOne], 1400 (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; 1401def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1402 (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; 1403 1404// --- 3.18 ASIMD Store Instructions --- 1405 1406// ASIMD store, 1 element, multiple, 1 reg: 1cyc S 1407def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; 1408def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], 1409 (instregex "VST1d(8|16|32|64)wb")>; 1410// ASIMD store, 1 element, multiple, 2 reg: 2cyc S 1411def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; 1412def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], 1413 (instregex "VST1q(8|16|32|64)wb")>; 1414// ASIMD store, 1 element, multiple, 3 reg: 3cyc S 1415def : InstRW<[A57Write_3cyc_1S], 1416 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; 1417def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], 1418 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; 1419// ASIMD store, 1 element, multiple, 4 reg: 4cyc S 1420def : InstRW<[A57Write_4cyc_1S], 1421 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; 1422def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], 1423 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; 1424// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" 1425def : InstRW<[A57Write_3cyc_1S_1V], 1426 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; 1427def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1428 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; 1429// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" 1430def : InstRW<[A57Write_3cyc_1S_1V], 1431 (instregex "VST2(d|b)(8|16|32)$")>; 1432def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1433 (instregex "VST2(b|d)(8|16|32)wb")>; 1434// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" 1435def : InstRW<[A57Write_4cyc_1S_1V], 1436 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; 1437def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], 1438 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; 1439// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" 1440def : InstRW<[A57Write_3cyc_1S_1V], 1441 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; 1442def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1443 (instregex "VST2LN(d|q)(8|16|32)_UPD", 1444 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; 1445// ASIMD store, 3 element, multiple, 3 reg 1446def : InstRW<[A57Write_3cyc_1S_1V], 1447 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; 1448def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1449 (instregex "VST3(d|q)(8|16|32)_UPD", 1450 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 1451// ASIMD store, 3 element, one lane 1452def : InstRW<[A57Write_3cyc_1S_1V], 1453 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; 1454def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1455 (instregex "VST3LN(d|q)(8|16|32)_UPD", 1456 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; 1457// ASIMD store, 4 element, multiple, 4 reg 1458def : InstRW<[A57Write_4cyc_1S_1V], 1459 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; 1460def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], 1461 (instregex "VST4(d|q)(8|16|32)_UPD", 1462 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 1463// ASIMD store, 4 element, one lane 1464def : InstRW<[A57Write_3cyc_1S_1V], 1465 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; 1466def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1467 (instregex "VST4LN(d|q)(8|16|32)_UPD", 1468 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; 1469 1470// --- 3.19 Cryptography Extensions --- 1471// Crypto AES ops 1472// AESD, AESE, AESIMC, AESMC: 3cyc F0 1473def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; 1474// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 1475def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; 1476// Crypto SHA1 xor ops: 6cyc F0/F1 1477def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; 1478// Crypto SHA1 fast ops: 3cyc F0 1479def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; 1480// Crypto SHA1 slow ops: 6cyc F0 1481def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; 1482// Crypto SHA256 fast ops: 3cyc F0 1483def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; 1484// Crypto SHA256 slow ops: 6cyc F0 1485def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; 1486 1487// --- 3.20 CRC --- 1488def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; 1489 1490// ----------------------------------------------------------------------------- 1491// Common definitions 1492def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 1493def : SchedAlias<WriteALU, A57Write_1cyc_1I>; 1494 1495def : SchedAlias<WriteBr, A57Write_1cyc_1B>; 1496def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>; 1497def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>; 1498def : SchedAlias<WritePreLd, A57Write_4cyc_1L>; 1499 1500def : SchedAlias<WriteLd, A57Write_4cyc_1L>; 1501def : SchedAlias<WriteST, A57Write_1cyc_1S>; 1502def : ReadAdvance<ReadALU, 0>; 1503 1504} // SchedModel = CortexA57Model 1505 1506