1//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for ARM Cortex-A57 to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// *** Common description and scheduling model parameters taken from AArch64 *** 16// The Cortex-A57 is a traditional superscalar microprocessor with a 17// conservative 3-wide in-order stage for decode and dispatch. Combined with the 18// much wider out-of-order issue stage, this produced a need to carefully 19// schedule micro-ops so that all three decoded each cycle are successfully 20// issued as the reservation station(s) simply don't stay occupied for long. 21// Therefore, IssueWidth is set to the narrower of the two at three, while still 22// modeling the machine as out-of-order. 23 24def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>; 25def IsCPSRDefinedAndPredicatedPred : 26 MCSchedPredicate<IsCPSRDefinedAndPredicated>; 27 28// Cortex A57 rev. r1p0 or later (false = r0px) 29def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>; 30 31def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>; 32def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>; 33def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>; 34 35// If Addrmode3 contains "minus register" 36class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[ 37 CheckValidRegOperand<n>, 38 CheckAM3OpSub<!add(n, 1)>]>>; 39 40def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>; 41def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>; 42def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>; 43 44// Load, scaled register offset, not plus LSL2 45class ScaledRegNotPlusLsl2<int n> : CheckNot< 46 CheckAny<[ 47 CheckAM2NoShift<n>, 48 CheckAll<[ 49 CheckAM2OpAdd<n>, 50 CheckAM2ShiftLSL<n>, 51 CheckAM2Offset<n, 2> 52 ]> 53 ]> 54 >; 55 56def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>; 57def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>; 58def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>; 59 60def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>; 61 62def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>; 63def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>; 64def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>; 65 66class A57WriteLMOpsListType<list<SchedWriteRes> writes> { 67 list <SchedWriteRes> Writes = writes; 68 SchedMachineModel SchedModel = ?; 69} 70 71// *** Common description and scheduling model parameters taken from AArch64 *** 72// (AArch64SchedA57.td) 73def CortexA57Model : SchedMachineModel { 74 let IssueWidth = 3; // 3-way decode and dispatch 75 let MicroOpBufferSize = 128; // 128 micro-op re-order buffer 76 let LoadLatency = 4; // Optimistic load latency 77 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch 78 79 // Enable partial & runtime unrolling. 80 let LoopMicroOpBufferSize = 16; 81 let CompleteModel = 1; 82 83 // FIXME: Remove when all errors have been fixed. 84 let FullInstRWOverlapCheck = 0; 85 86 let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat, 87 HasFPRegsV8_1M, HasFP16FML, HasMatMulInt8, HasBF16]; 88} 89 90//===----------------------------------------------------------------------===// 91// Define each kind of processor resource and number available on Cortex-A57. 92// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where 93// micro-ops wait for their operands and then issue out-of-order. 94 95def A57UnitB : ProcResource<1>; // Type B micro-ops 96def A57UnitI : ProcResource<2>; // Type I micro-ops 97def A57UnitM : ProcResource<1>; // Type M micro-ops 98def A57UnitL : ProcResource<1>; // Type L micro-ops 99def A57UnitS : ProcResource<1>; // Type S micro-ops 100 101def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) 102def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) 103 104let SchedModel = CortexA57Model in { 105 def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops 106} 107 108let SchedModel = CortexA57Model in { 109 110//===----------------------------------------------------------------------===// 111// Define customized scheduler read/write types specific to the Cortex-A57. 112 113include "ARMScheduleA57WriteRes.td" 114 115// To have "CompleteModel = 1", support of pseudos and special instructions 116def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", 117 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", 118 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", 119 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", 120 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", 121 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG", 122 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier", 123 "t__brkdiv0")>; 124 125def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; 126 127// Specific memory instrs 128def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", 129 "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; 130 131// coprocessor moves 132def : InstRW<[WriteNoop, WriteNoop], (instregex 133 "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", 134 "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", 135 "(t2)?MSR(banked|i|_AR|_M)?$")>; 136 137// Deprecated instructions 138def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; 139 140// Pseudos 141def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", 142 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", 143 "tLDRpci_pic", "(t2)?SUBS_PC_LR", 144 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", 145 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 146 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 147 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 148 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 149 "WIN__CHKSTK", "WIN__DBZCHK")>; 150 151// Miscellaneous 152// ----------------------------------------------------------------------------- 153 154def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; 155 156// --- 3.2 Branch Instructions --- 157// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ 158 159def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", 160 "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; 161def : InstRW<[A57Write_1cyc_1B_1I], 162 (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; 163def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; 164// Pseudos 165def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; 166def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", 167 "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; 168def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; 169 170// --- 3.3 Arithmetic and Logical Instructions --- 171// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, 172// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST 173 174def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; 175 176// Check branch forms of ALU ops: 177// check reg 0 for ARM_AM::PC 178// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB 179class A57BranchForm<SchedWriteRes non_br> : 180 BranchWriteRes<2, 1, [A57UnitB], [1], non_br>; 181 182// shift by register, conditional or unconditional 183// TODO: according to the doc, conditional uses I0/I1, unconditional uses M 184// Why more complex instruction uses more simple pipeline? 185// May be an error in doc. 186def A57WriteALUsr : SchedWriteVariant<[ 187 SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>, 188 SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]> 189]>; 190def A57WriteALUSsr : SchedWriteVariant<[ 191 SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>, 192 SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]> 193]>; 194def A57ReadALUsr : SchedReadVariant<[ 195 SchedVar<IsPredicatedPred, [ReadDefault]>, 196 SchedVar<NoSchedPred, [ReadDefault]> 197]>; 198def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>; 199def : SchedAlias<WriteALUsr, A57WriteALUsr>; 200def : SchedAlias<WriteALUSsr, A57WriteALUSsr>; 201def : SchedAlias<ReadALUsr, A57ReadALUsr>; 202 203def A57WriteCMPsr : SchedWriteVariant<[ 204 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 205 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 206]>; 207def : SchedAlias<WriteCMP, A57Write_1cyc_1I>; 208def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>; 209def : SchedAlias<WriteCMPsr, A57WriteCMPsr>; 210 211// --- 3.4 Move and Shift Instructions --- 212// Move, basic 213// MOV{S}, MOVW, MVN{S} 214def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", 215 "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", 216 "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; 217 218// Move, shift by immed, setflags/no setflags 219// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN 220// setflags = isCPSRDefined 221def A57WriteMOVsi : SchedWriteVariant<[ 222 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, 223 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 224]>; 225def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", 226 "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", 227 "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; 228 229// shift by register, conditional or unconditional, setflags/no setflags 230def A57WriteMOVsr : SchedWriteVariant<[ 231 SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>, 232 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, 233 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 234 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 235]>; 236def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", 237 "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", 238 "(t2|t)RORrr")>; 239 240// Move, top 241// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later 242def A57WriteMOVT : SchedWriteVariant<[ 243 SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>, 244 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 245]>; 246def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; 247 248def A57WriteI2pc : 249 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; 250def A57WriteI2ld : 251 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; 252def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; 253def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 254 255// +2cyc for branch forms 256def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; 257 258// --- 3.5 Divide and Multiply Instructions --- 259// Divide: SDIV, UDIV 260// latency from documentration: 4 ‐ 20, maximum taken 261def : SchedAlias<WriteDIV, A57Write_20cyc_1M>; 262// Multiply: tMul not bound to common WriteRes types 263def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; 264def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>; 265def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>; 266def : ReadAdvance<ReadMUL, 0>; 267 268// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, 269// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} 270// Multiply-accumulate pipelines support late-forwarding of accumulate operands 271// from similar μops, allowing a typical sequence of multiply-accumulate μops 272// to issue one every 1 cycle (sched advance = 2). 273def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } 274def A57WriteMLAL : SchedWriteVariant<[ 275 SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>, 276 SchedVar<NoSchedPred, [A57Write_4cyc_1M]> 277]>; 278 279def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; 280 281def : InstRW<[A57WriteMLA], 282 (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>; 283 284def : SchedAlias<WriteMAC16, A57WriteMLA>; 285def : SchedAlias<WriteMAC32, A57WriteMLA>; 286def : SchedAlias<ReadMAC, A57ReadMLA>; 287 288def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>; 289def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>; 290 291// Multiply long: SMULL, UMULL 292def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>; 293def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>; 294 295// --- 3.6 Saturating and Parallel Arithmetic Instructions --- 296// Parallel arith 297// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 298// Conditional GE-setting instructions require three extra μops 299// and two additional cycles to conditionally update the GE field. 300def A57WriteParArith : SchedWriteVariant<[ 301 SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>, 302 SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]> 303]>; 304def : InstRW< [A57WriteParArith], (instregex 305 "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", 306 "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; 307 308// Parallel arith with exchange: SASX, SSAX, UASX, USAX 309def A57WriteParArithExch : SchedWriteVariant<[ 310 SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>, 311 SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]> 312]>; 313def : InstRW<[A57WriteParArithExch], 314 (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; 315 316// Parallel halving arith 317// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 318def : InstRW<[A57Write_2cyc_1M], (instregex 319 "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", 320 "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; 321 322// Parallel halving arith with exchange 323// SHASX, SHSAX, UHASX, UHSAX 324def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", 325 "(t2)?UHASX", "(t2)?UHSAX")>; 326 327// Parallel saturating arith 328// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 329def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", 330 "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; 331 332// Parallel saturating arith with exchange 333// QASX, QSAX, UQASX, UQSAX 334def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", 335 "(t2)?UQASX", "(t2)?UQSAX")>; 336 337// Saturate: SSAT, SSAT16, USAT, USAT16 338def : InstRW<[A57Write_2cyc_1M], 339 (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; 340 341// Saturating arith: QADD, QSUB 342def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; 343 344// Saturating doubling arith: QDADD, QDSUB 345def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; 346 347// --- 3.7 Miscellaneous Data-Processing Instructions --- 348// Bit field extract: SBFX, UBFX 349def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; 350 351// Bit field insert/clear: BFI, BFC 352def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; 353 354// Select bytes, conditional/unconditional 355def A57WriteSEL : SchedWriteVariant<[ 356 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 357 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 358]>; 359def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; 360 361// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH 362def : InstRW<[A57Write_1cyc_1I], 363 (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; 364 365// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH 366def : InstRW<[A57Write_2cyc_1M], 367 (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; 368 369// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 370def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; 371 372// Sum of absolute differences: USAD8, USADA8 373def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; 374 375// --- 3.8 Load Instructions --- 376 377// Load, immed offset 378// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate 379def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", 380 "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", 381 "PICLDR", "tLDR")>; 382 383def : InstRW<[A57Write_4cyc_1L], 384 (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; 385 386// For "Load, register offset, minus" we need +1cyc, +1I 387def A57WriteLdrAm3 : SchedWriteVariant<[ 388 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>, 389 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 390]>; 391def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; 392def A57WriteLdrAm3X2 : SchedWriteVariant<[ 393 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>, 394 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 395]>; 396def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; 397def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; 398 399def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ 400 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>, 401 SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>, 402 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 403]>; 404def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; 405 406def A57WrBackOne : SchedWriteRes<[]> { 407 let Latency = 1; 408 let NumMicroOps = 0; 409} 410def A57WrBackTwo : SchedWriteRes<[]> { 411 let Latency = 2; 412 let NumMicroOps = 0; 413} 414def A57WrBackThree : SchedWriteRes<[]> { 415 let Latency = 3; 416 let NumMicroOps = 0; 417} 418 419// --- LDR pre-indexed --- 420// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) 421def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", 422 "LDRB_PRE_IMM", "t2LDRB_PRE")>; 423 424// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) 425// (5 cyc load result for not-lsl2 scaled) 426def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ 427 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>, 428 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 429]>; 430def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], 431 (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; 432 433def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ 434 SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>, 435 SchedVar<NoSchedPred, [A57WrBackOne]> 436]>; 437def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], 438 (instregex "LDR(H|SH|SB)_PRE")>; 439def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], 440 (instregex "t2LDR(H|SH|SB)?_PRE")>; 441 442// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. 443def A57WriteLdrDAm3Pre : SchedWriteVariant<[ 444 SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>, 445 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 446]>; 447def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ 448 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, 449 SchedVar<NoSchedPred, [A57WrBackOne]> 450]>; 451def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], 452 (instregex "LDRD_PRE")>; 453def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], 454 (instregex "t2LDRD_PRE")>; 455 456// --- LDR post-indexed --- 457def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", 458 "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; 459 460def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ 461 SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>, 462 SchedVar<NoSchedPred, [A57WrBackOne]> 463]>; 464def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], 465 (instregex "LDR(H|SH|SB)_POST")>; 466def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], 467 (instregex "t2LDR(H|SH|SB)?_POST")>; 468 469def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", 470 "LDRB_POST_REG", "LDR(B?)T_POST$")>; 471 472def A57WriteLdrTRegPost : SchedWriteVariant<[ 473 SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>, 474 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 475]>; 476def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ 477 SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>, 478 SchedVar<NoSchedPred, [A57WrBackTwo]> 479]>; 480// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" 481def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], 482 (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; 483 484def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; 485 486def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ 487 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, 488 SchedVar<NoSchedPred, [A57WrBackOne]> 489]>; 490// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. 491def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 492 A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; 493def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], 494 (instregex "t2LDRD_POST")>; 495 496// --- Preload instructions --- 497// Preload, immed offset 498def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", 499 "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; 500 501// Preload, register offset, 502// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 503// otherwise 4cyc "L" 504def A57WritePLD : SchedWriteVariant<[ 505 SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>, 506 SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>, 507 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 508]>; 509def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; 510 511// --- Load multiple instructions --- 512foreach NumAddr = 1-8 in { 513 def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[ 514 CheckNumOperands<!add(!shl(NumAddr, 1), 2)>, 515 CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>; 516 def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[ 517 CheckNumOperands<!add(!shl(NumAddr, 1), 3)>, 518 CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>; 519} 520 521def A57LDMOpsListNoregin : A57WriteLMOpsListType< 522 [A57Write_3cyc_1L, A57Write_3cyc_1L, 523 A57Write_4cyc_1L, A57Write_4cyc_1L, 524 A57Write_5cyc_1L, A57Write_5cyc_1L, 525 A57Write_6cyc_1L, A57Write_6cyc_1L, 526 A57Write_7cyc_1L, A57Write_7cyc_1L, 527 A57Write_8cyc_1L, A57Write_8cyc_1L, 528 A57Write_9cyc_1L, A57Write_9cyc_1L, 529 A57Write_10cyc_1L, A57Write_10cyc_1L]>; 530def A57WriteLDMnoreginlist : SchedWriteVariant<[ 531 SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>, 532 SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>, 533 SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>, 534 SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>, 535 SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>, 536 SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>, 537 SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>, 538 SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>, 539 SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]> 540]> { let Variadic=1; } 541 542def A57LDMOpsListRegin : A57WriteLMOpsListType< 543 [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 544 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 545 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 546 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 547 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 548 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 549 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, 550 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; 551def A57WriteLDMreginlist : SchedWriteVariant<[ 552 SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>, 553 SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>, 554 SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>, 555 SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>, 556 SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>, 557 SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>, 558 SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>, 559 SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>, 560 SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]> 561]> { let Variadic=1; } 562 563def A57LDMOpsList_Upd : A57WriteLMOpsListType< 564 [A57WrBackOne, 565 A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, 566 A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 567 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 568 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 569 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 570 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 571 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 572 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; 573def A57WriteLDM_Upd : SchedWriteVariant<[ 574 SchedVar<A57LMAddrUpdPred1, A57LDMOpsList_Upd.Writes[0-2]>, 575 SchedVar<A57LMAddrUpdPred2, A57LDMOpsList_Upd.Writes[0-4]>, 576 SchedVar<A57LMAddrUpdPred3, A57LDMOpsList_Upd.Writes[0-6]>, 577 SchedVar<A57LMAddrUpdPred4, A57LDMOpsList_Upd.Writes[0-8]>, 578 SchedVar<A57LMAddrUpdPred5, A57LDMOpsList_Upd.Writes[0-10]>, 579 SchedVar<A57LMAddrUpdPred6, A57LDMOpsList_Upd.Writes[0-12]>, 580 SchedVar<A57LMAddrUpdPred7, A57LDMOpsList_Upd.Writes[0-14]>, 581 SchedVar<A57LMAddrUpdPred8, A57LDMOpsList_Upd.Writes[0-16]>, 582 SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]> 583]> { let Variadic=1; } 584 585def A57WriteLDM : SchedWriteVariant<[ 586 SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>, 587 SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]> 588]> { let Variadic=1; } 589 590def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; 591 592// TODO: no writeback latency defined in documentation (implemented as 1 cyc) 593def : InstRW<[A57WriteLDM_Upd], 594 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; 595 596def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>; 597 598// --- 3.9 Store Instructions --- 599 600// Store, immed offset 601def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", 602 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; 603 604// Store, register offset 605// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", 606// otherwise 1cyc S. 607def A57WriteStrAmLDSTSO : SchedWriteVariant<[ 608 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>, 609 SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>, 610 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 611]>; 612def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; 613 614// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. 615def A57WriteStrAm3 : SchedWriteVariant<[ 616 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>, 617 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 618]>; 619def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; 620def A57WriteStrAm3X2 : SchedWriteVariant<[ 621 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, 622 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 623]>; 624def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; 625 626// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) 627def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", 628 "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", 629 "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; 630 631// Store, register pre-indexed: 632// 1(1) "S, I0/I1" for plus reg 633// 3(2) "I0/I1, S" for minus reg 634// 1(2) "S, M" for scaled plus lsl2 635// 3(2) "I0/I1, S" for other scaled 636def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ 637 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>, 638 SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>, 639 SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>, 640 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 641]>; 642def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ 643 SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>, 644 SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>, 645 SchedVar<NoSchedPred, [A57WrBackOne]> 646]>; 647def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], 648 (instregex "STR_PRE_REG", "STRB_PRE_REG")>; 649 650// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) 651// 1(1) "S, I0/I1" for imm or reg plus 652// 3(2) "I0/I1, S" for reg minus 653def A57WriteStrAm3PreX2 : SchedWriteVariant<[ 654 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, 655 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 656]>; 657def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ 658 SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>, 659 SchedVar<NoSchedPred, [A57WrBackOne]> 660]>; 661def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], 662 (instregex "STRH_PRE")>; 663 664def A57WriteStrAm3PreX3 : SchedWriteVariant<[ 665 SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>, 666 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 667]>; 668def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ 669 SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>, 670 SchedVar<NoSchedPred, [A57WrBackOne]> 671]>; 672def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], 673 (instregex "STRD_PRE")>; 674 675def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", 676 "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; 677 678// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) 679def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", 680 "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; 681 682// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr 683// 1(1) "S, I0/I1" both for reg or imm 684def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], 685 (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; 686 687// --- Store multiple instructions --- 688// TODO: no writeback latency defined in documentation 689def A57WriteSTM : SchedWriteVariant<[ 690 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, 691 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, 692 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, 693 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, 694 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, 695 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, 696 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, 697 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, 698 SchedVar<NoSchedPred, [A57Write_2cyc_1S]> 699]>; 700def A57WriteSTM_Upd : SchedWriteVariant<[ 701 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, 702 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, 703 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, 704 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, 705 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, 706 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, 707 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, 708 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, 709 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 710]>; 711 712def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; 713def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], 714 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; 715 716def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>; 717 718// --- 3.10 FP Data Processing Instructions --- 719def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; 720def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; 721 722def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; 723 724// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional 725def A57WriteVcmp : SchedWriteVariant<[ 726 SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>, 727 SchedVar<NoSchedPred, [A57Write_3cyc_1X]> 728]>; 729def : InstRW<[A57WriteVcmp], 730 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; 731 732// fp convert 733def : InstRW<[A57Write_5cyc_1V], (instregex 734 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; 735def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>; 736def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; 737 738def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>; 739 740// FP round to integral 741def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; 742 743// FP divide, FP square root 744def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; 745def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; 746def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; 747def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; 748 749def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>; 750 751// FP max/min 752def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; 753 754// FP multiply-accumulate pipelines support late forwarding of the result 755// from FP multiply μops to the accumulate operands of an 756// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle 757// after the FP multiply μop has been issued 758// FP multiply, FZ 759def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 760 761def : SchedAlias<WriteFPMUL32, A57WriteVMUL>; 762def : SchedAlias<WriteFPMUL64, A57WriteVMUL>; 763def : ReadAdvance<ReadFPMUL, 0>; 764 765// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate 766// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS 767def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } 768 769// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) 770// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) 771// Currently, there is no way to define different read advances for VFMA operand 772// from VFMA or from VMUL, so there will be 5 read advance. 773// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. 774// The same situation with ASIMD VMUL/VFMA instructions 775// def A57ReadVFMA : SchedRead; 776// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>; 777// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>; 778def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; 779 780def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; 781def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; 782def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; 783 784// VMLAH/VMLSH are not binded to scheduling classes by default, so here custom: 785def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL], 786 (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>; 787 788def : InstRW<[A57WriteVMUL], 789 (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>; 790 791def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; 792def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; 793 794// --- 3.11 FP Miscellaneous Instructions --- 795// VMOV: 3cyc "F0/F1" for imm/reg 796def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; 797def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; 798 799def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>; 800 801// 5cyc L for FP transfer, vfp to core reg, 802// 5cyc L for FP transfer, core reg to vfp 803def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; 804// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). 805def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; 806 807// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg 808def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; 809 810// --- 3.12 FP Load Instructions --- 811def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; 812 813def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; 814 815// FP load multiple (VLDM) 816 817def A57VLDMOpsListUncond : A57WriteLMOpsListType< 818 [A57Write_5cyc_1L, A57Write_5cyc_1L, 819 A57Write_6cyc_1L, A57Write_6cyc_1L, 820 A57Write_7cyc_1L, A57Write_7cyc_1L, 821 A57Write_8cyc_1L, A57Write_8cyc_1L, 822 A57Write_9cyc_1L, A57Write_9cyc_1L, 823 A57Write_10cyc_1L, A57Write_10cyc_1L, 824 A57Write_11cyc_1L, A57Write_11cyc_1L, 825 A57Write_12cyc_1L, A57Write_12cyc_1L]>; 826def A57WriteVLDMuncond : SchedWriteVariant<[ 827 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>, 828 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>, 829 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>, 830 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>, 831 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>, 832 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>, 833 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>, 834 SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]> 835]> { let Variadic=1; } 836 837def A57VLDMOpsListCond : A57WriteLMOpsListType< 838 [A57Write_5cyc_1L, A57Write_6cyc_1L, 839 A57Write_7cyc_1L, A57Write_8cyc_1L, 840 A57Write_9cyc_1L, A57Write_10cyc_1L, 841 A57Write_11cyc_1L, A57Write_12cyc_1L, 842 A57Write_13cyc_1L, A57Write_14cyc_1L, 843 A57Write_15cyc_1L, A57Write_16cyc_1L, 844 A57Write_17cyc_1L, A57Write_18cyc_1L, 845 A57Write_19cyc_1L, A57Write_20cyc_1L]>; 846def A57WriteVLDMcond : SchedWriteVariant<[ 847 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>, 848 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>, 849 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>, 850 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>, 851 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>, 852 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>, 853 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>, 854 SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]> 855]> { let Variadic=1; } 856 857def A57WriteVLDM : SchedWriteVariant<[ 858 SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>, 859 SchedVar<NoSchedPred, [A57WriteVLDMuncond]> 860]> { let Variadic=1; } 861 862def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; 863 864def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< 865 [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 866 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 867 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 868 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 869 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 870 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, 871 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, 872 A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; 873def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ 874 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>, 875 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>, 876 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>, 877 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>, 878 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>, 879 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>, 880 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>, 881 SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]> 882]> { let Variadic=1; } 883 884def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< 885 [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, 886 A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, 887 A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, 888 A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, 889 A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, 890 A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, 891 A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, 892 A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; 893def A57WriteVLDMcond_UPD : SchedWriteVariant<[ 894 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>, 895 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>, 896 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>, 897 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>, 898 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>, 899 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>, 900 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>, 901 SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]> 902]> { let Variadic=1; } 903 904def A57WriteVLDM_UPD : SchedWriteVariant<[ 905 SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>, 906 SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]> 907]> { let Variadic=1; } 908 909def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], 910 (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; 911 912// --- 3.13 FP Store Instructions --- 913def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; 914 915def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; 916 917def A57WriteVSTMs : SchedWriteVariant<[ 918 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, 919 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, 920 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, 921 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, 922 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, 923 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, 924 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, 925 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, 926 SchedVar<NoSchedPred, [A57Write_2cyc_1S]> 927]>; 928def A57WriteVSTMd : SchedWriteVariant<[ 929 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>, 930 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>, 931 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>, 932 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>, 933 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>, 934 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>, 935 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>, 936 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>, 937 SchedVar<NoSchedPred, [A57Write_4cyc_1S]> 938]>; 939def A57WriteVSTMs_Upd : SchedWriteVariant<[ 940 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, 941 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, 942 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, 943 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, 944 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, 945 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, 946 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, 947 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, 948 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 949]>; 950def A57WriteVSTMd_Upd : SchedWriteVariant<[ 951 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>, 952 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>, 953 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>, 954 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>, 955 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>, 956 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>, 957 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>, 958 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>, 959 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 960]>; 961 962def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; 963def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; 964def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], 965 (instregex "VSTM(SIA_UPD|SDB_UPD)")>; 966def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], 967 (instregex "VSTM(DIA_UPD|DDB_UPD)")>; 968 969// --- 3.14 ASIMD Integer Instructions --- 970 971// ASIMD absolute diff, 3cyc F0/F1 for integer VABD 972def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; 973 974// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form 975def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 976def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; 977def : InstRW<[A57WriteVABAD, A57ReadVABAD], 978 (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; 979def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } 980def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; 981def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], 982 (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; 983 984// ASIMD absolute diff accum long: 4(1) F1 for VABAL 985def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 986def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; 987def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; 988 989// ASIMD absolute diff long: 3cyc F0/F1 for VABDL 990def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; 991 992// ASIMD arith, basic 993def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW", 994 "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", 995 "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>; 996 997// ASIMD arith, complex 998def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", 999 "VQABS", "VQADD", "VQNEG", "VQSUB", 1000 "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; 1001 1002// ASIMD compare 1003def : InstRW<[A57Write_3cyc_1V], 1004 (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; 1005 1006// ASIMD logical 1007def : InstRW<[A57Write_3cyc_1V], 1008 (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; 1009 1010// ASIMD max/min 1011def : InstRW<[A57Write_3cyc_1V], 1012 (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; 1013 1014// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1015// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply 1016// and multiply-with-accumulate instructions relative to r0pX. 1017def A57WriteVMULD_VecInt : SchedWriteVariant<[ 1018 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1019 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1020def : InstRW<[A57WriteVMULD_VecInt], (instregex 1021 "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", 1022 "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; 1023 1024// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later 1025def A57WriteVMULQ_VecInt : SchedWriteVariant<[ 1026 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, 1027 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; 1028def : InstRW<[A57WriteVMULQ_VecInt], (instregex 1029 "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", 1030 "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; 1031 1032// ASIMD multiply accumulate, D-form 1033// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence 1034// (4 or 3 ReadAdvance) 1035def A57WriteVMLAD_VecInt : SchedWriteVariant<[ 1036 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1037 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1038def A57ReadVMLAD_VecInt : SchedReadVariant<[ 1039 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>, 1040 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]> 1041]>; 1042def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], 1043 (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; 1044 1045// ASIMD multiply accumulate, Q-form 1046// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence 1047// (4 or 3 ReadAdvance) 1048def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ 1049 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, 1050 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; 1051def A57ReadVMLAQ_VecInt : SchedReadVariant<[ 1052 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>, 1053 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]> 1054]>; 1055def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], 1056 (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; 1057 1058// ASIMD multiply accumulate long 1059// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence 1060// (4 or 3 ReadAdvance) 1061def A57WriteVMLAL_VecInt : SchedWriteVariant<[ 1062 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1063 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1064def A57ReadVMLAL_VecInt : SchedReadVariant<[ 1065 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>, 1066 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]> 1067]>; 1068def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], 1069 (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; 1070 1071// ASIMD multiply accumulate saturating long 1072// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence 1073// (3 or 2 ReadAdvance) 1074def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ 1075 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1076 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1077def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ 1078 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>, 1079 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> 1080]>; 1081def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1082 (instregex "VQDMLAL", "VQDMLSL")>; 1083 1084// Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long 1085// Scheduling info from VQDMLAL/VQDMLSL 1086def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1087 (instregex "VQRDMLAH", "VQRDMLSH")>; 1088 1089// ASIMD multiply long 1090// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1091def A57WriteVMULL_VecInt : SchedWriteVariant<[ 1092 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1093 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1094def : InstRW<[A57WriteVMULL_VecInt], 1095 (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; 1096 1097// ASIMD pairwise add and accumulate 1098// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) 1099def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 1100def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; 1101def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; 1102 1103// ASIMD shift accumulate 1104// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) 1105def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 1106def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; 1107def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; 1108 1109// ASIMD shift by immed, basic 1110def : InstRW<[A57Write_3cyc_1X], 1111 (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; 1112 1113// ASIMD shift by immed, complex 1114def : InstRW<[A57Write_4cyc_1X], (instregex 1115 "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", 1116 "VRSHRN")>; 1117 1118// ASIMD shift by immed and insert, basic, D-form 1119def : InstRW<[A57Write_4cyc_1X], (instregex 1120 "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; 1121 1122// ASIMD shift by immed and insert, basic, Q-form 1123def : InstRW<[A57Write_5cyc_1X], (instregex 1124 "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; 1125 1126// ASIMD shift by register, basic, D-form 1127def : InstRW<[A57Write_3cyc_1X], (instregex 1128 "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; 1129 1130// ASIMD shift by register, basic, Q-form 1131def : InstRW<[A57Write_4cyc_1X], (instregex 1132 "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; 1133 1134// ASIMD shift by register, complex, D-form 1135// VQRSHL, VQSHL, VRSHL 1136def : InstRW<[A57Write_4cyc_1X], (instregex 1137 "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", 1138 "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; 1139 1140// ASIMD shift by register, complex, Q-form 1141def : InstRW<[A57Write_5cyc_1X], (instregex 1142 "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", 1143 "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; 1144 1145// --- 3.15 ASIMD Floating-Point Instructions --- 1146// ASIMD FP absolute value 1147def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; 1148 1149// ASIMD FP arith 1150def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", 1151 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; 1152 1153def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>; 1154 1155// ASIMD FP compare 1156def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", 1157 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; 1158 1159// ASIMD FP convert, integer 1160def : InstRW<[A57Write_5cyc_1V], (instregex 1161 "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", 1162 "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", 1163 "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; 1164 1165// ASIMD FP convert, half-precision: 8cyc F0/F1 1166def : InstRW<[A57Write_8cyc_1V], (instregex 1167 "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", 1168 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", 1169 "VCVT(f2h|h2f)")>; 1170 1171// ASIMD FP max/min 1172def : InstRW<[A57Write_5cyc_1V], (instregex 1173 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM", 1174 "(NEON|VFP)_VMINNM")>; 1175 1176// ASIMD FP multiply 1177def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 1178def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; 1179 1180// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence 1181def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } 1182def A57ReadVMLA_VecFP : 1183 SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; 1184def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], 1185 (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; 1186 1187// ASIMD FP negate 1188def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; 1189 1190// ASIMD FP round to integral 1191def : InstRW<[A57Write_5cyc_1V], (instregex 1192 "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; 1193 1194// --- 3.16 ASIMD Miscellaneous Instructions --- 1195 1196// ASIMD bitwise insert 1197def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>; 1198 1199// ASIMD count 1200def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; 1201 1202// ASIMD duplicate, core reg: 8cyc "L, F0/F1" 1203def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; 1204 1205// ASIMD duplicate, scalar: 3cyc "F0/F1" 1206def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; 1207 1208// ASIMD extract 1209def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; 1210 1211// ASIMD move, immed 1212def : InstRW<[A57Write_3cyc_1V], (instregex 1213 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", 1214 "VMOVD0", "VMOVQ0")>; 1215 1216// ASIMD move, narrowing 1217def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; 1218 1219// ASIMD move, saturating 1220def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; 1221 1222// ASIMD reciprocal estimate 1223def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; 1224 1225// ASIMD reciprocal step, FZ 1226def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; 1227 1228// ASIMD reverse, swap, table lookup (1-2 reg) 1229def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; 1230 1231// ASIMD table lookup (3-4 reg) 1232def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; 1233 1234// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" 1235def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; 1236 1237// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" 1238def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; 1239 1240// ASIMD transpose 1241def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; 1242 1243// ASIMD unzip/zip, D-form 1244def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], 1245 (instregex "VUZPd", "VZIPd")>; 1246 1247// ASIMD unzip/zip, Q-form 1248def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], 1249 (instregex "VUZPq", "VZIPq")>; 1250 1251// --- 3.17 ASIMD Load Instructions --- 1252 1253// Overriden via InstRW for this processor. 1254def : WriteRes<WriteVLD1, []>; 1255def : WriteRes<WriteVLD2, []>; 1256def : WriteRes<WriteVLD3, []>; 1257def : WriteRes<WriteVLD4, []>; 1258def : WriteRes<WriteVST1, []>; 1259def : WriteRes<WriteVST2, []>; 1260def : WriteRes<WriteVST3, []>; 1261def : WriteRes<WriteVST4, []>; 1262 1263// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency 1264def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; 1265def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], 1266 (instregex "VLD1(d|q)(8|16|32|64)wb")>; 1267 1268// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency 1269def : InstRW<[A57Write_6cyc_1L], 1270 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; 1271 1272def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], 1273 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; 1274 1275// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" 1276def : InstRW<[A57Write_8cyc_1L_1V], (instregex 1277 "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; 1278def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex 1279 "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; 1280 1281// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" 1282def : InstRW<[A57Write_8cyc_1L_1V], 1283 (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; 1284def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1285 (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; 1286 1287// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" 1288def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; 1289def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1290 (instregex "VLD2b(8|16|32)wb")>; 1291 1292// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" 1293def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1294 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", 1295 "VLD2LN(d|q)(8|16|32)Pseudo$")>; 1296// 2 results + wb result 1297def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], 1298 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; 1299// 1 result + wb result 1300def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1301 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", 1302 "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; 1303 1304// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" 1305// 3 results 1306def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], 1307 (instregex "VLD3(d|q)(8|16|32)$")>; 1308// 1 result 1309def : InstRW<[A57Write_9cyc_1L_1V], 1310 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; 1311// 3 results + wb 1312def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1313 A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1314 (instregex "VLD3(d|q)(8|16|32)_UPD$")>; 1315// 1 result + wb 1316def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1317 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1318 1319// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" 1320def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1321 (instregex "VLD3LN(d|q)32$", 1322 "VLD3LN(d|q)32Pseudo$")>; 1323def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1324 A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1325 (instregex "VLD3LN(d|q)32_UPD")>; 1326def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1327 (instregex "VLD3LN(d|q)32Pseudo_UPD")>; 1328 1329// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" 1330def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], 1331 (instregex "VLD3LN(d|q)(8|16)$", 1332 "VLD3LN(d|q)(8|16)Pseudo$")>; 1333def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1334 A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1335 (instregex "VLD3LN(d|q)(8|16)_UPD")>; 1336def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1337 (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; 1338 1339// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" 1340def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1341 (instregex "VLD3DUP(d|q)(8|16|32)$", 1342 "VLD3DUP(d|q)(8|16|32)Pseudo$")>; 1343def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1344 A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1345 (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; 1346def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1347 (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; 1348 1349// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" 1350def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, 1351 A57Write_9cyc_1L_1V], 1352 (instregex "VLD4(d|q)(8|16|32)$")>; 1353def : InstRW<[A57Write_9cyc_1L_1V], 1354 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; 1355def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1356 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1357 (instregex "VLD4(d|q)(8|16|32)_UPD")>; 1358def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1359 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1360 1361// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" 1362def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, 1363 A57Write_8cyc_1L_1V], 1364 (instregex "VLD4LN(d|q)32$", 1365 "VLD4LN(d|q)32Pseudo$")>; 1366def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1367 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1368 A57WrBackOne], 1369 (instregex "VLD4LN(d|q)32_UPD")>; 1370def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1371 (instregex "VLD4LN(d|q)32Pseudo_UPD")>; 1372 1373// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" 1374def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, 1375 A57Write_9cyc_1L_1V], 1376 (instregex "VLD4LN(d|q)(8|16)$", 1377 "VLD4LN(d|q)(8|16)Pseudo$")>; 1378def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1379 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1380 A57WrBackOne], 1381 (instregex "VLD4LN(d|q)(8|16)_UPD")>; 1382def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1383 (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; 1384 1385// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" 1386def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, 1387 A57Write_8cyc_1L_1V], 1388 (instregex "VLD4DUP(d|q)(8|16|32)$", 1389 "VLD4DUP(d|q)(8|16|32)Pseudo$")>; 1390def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1391 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1392 A57WrBackOne], 1393 (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; 1394def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1395 (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; 1396 1397// --- 3.18 ASIMD Store Instructions --- 1398 1399// ASIMD store, 1 element, multiple, 1 reg: 1cyc S 1400def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; 1401def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], 1402 (instregex "VST1d(8|16|32|64)wb")>; 1403// ASIMD store, 1 element, multiple, 2 reg: 2cyc S 1404def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; 1405def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], 1406 (instregex "VST1q(8|16|32|64)wb")>; 1407// ASIMD store, 1 element, multiple, 3 reg: 3cyc S 1408def : InstRW<[A57Write_3cyc_1S], 1409 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; 1410def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], 1411 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; 1412// ASIMD store, 1 element, multiple, 4 reg: 4cyc S 1413def : InstRW<[A57Write_4cyc_1S], 1414 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; 1415def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], 1416 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; 1417// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" 1418def : InstRW<[A57Write_3cyc_1S_1V], 1419 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; 1420def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1421 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; 1422// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" 1423def : InstRW<[A57Write_3cyc_1S_1V], 1424 (instregex "VST2(d|b)(8|16|32)$")>; 1425def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1426 (instregex "VST2(b|d)(8|16|32)wb")>; 1427// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" 1428def : InstRW<[A57Write_4cyc_1S_1V], 1429 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; 1430def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], 1431 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; 1432// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" 1433def : InstRW<[A57Write_3cyc_1S_1V], 1434 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; 1435def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1436 (instregex "VST2LN(d|q)(8|16|32)_UPD", 1437 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; 1438// ASIMD store, 3 element, multiple, 3 reg 1439def : InstRW<[A57Write_3cyc_1S_1V], 1440 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; 1441def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1442 (instregex "VST3(d|q)(8|16|32)_UPD", 1443 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 1444// ASIMD store, 3 element, one lane 1445def : InstRW<[A57Write_3cyc_1S_1V], 1446 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; 1447def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1448 (instregex "VST3LN(d|q)(8|16|32)_UPD", 1449 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; 1450// ASIMD store, 4 element, multiple, 4 reg 1451def : InstRW<[A57Write_4cyc_1S_1V], 1452 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; 1453def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], 1454 (instregex "VST4(d|q)(8|16|32)_UPD", 1455 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 1456// ASIMD store, 4 element, one lane 1457def : InstRW<[A57Write_3cyc_1S_1V], 1458 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; 1459def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1460 (instregex "VST4LN(d|q)(8|16|32)_UPD", 1461 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; 1462 1463// --- 3.19 Cryptography Extensions --- 1464// Crypto AES ops 1465// AESD, AESE, AESIMC, AESMC: 3cyc F0 1466def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; 1467// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 1468def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; 1469// Crypto SHA1 xor ops: 6cyc F0/F1 1470def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; 1471// Crypto SHA1 fast ops: 3cyc F0 1472def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; 1473// Crypto SHA1 slow ops: 6cyc F0 1474def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; 1475// Crypto SHA256 fast ops: 3cyc F0 1476def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; 1477// Crypto SHA256 slow ops: 6cyc F0 1478def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; 1479 1480// --- 3.20 CRC --- 1481def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; 1482 1483// ----------------------------------------------------------------------------- 1484// Common definitions 1485def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 1486def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>; 1487 1488def : SchedAlias<WriteBr, A57Write_1cyc_1B>; 1489def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>; 1490def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>; 1491def : SchedAlias<WritePreLd, A57Write_4cyc_1L>; 1492 1493def : SchedAlias<WriteLd, A57Write_4cyc_1L>; 1494def : SchedAlias<WriteST, A57Write_1cyc_1S>; 1495def : ReadAdvance<ReadALU, 0>; 1496 1497} // SchedModel = CortexA57Model 1498 1499