1//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for ARM Cortex-A57 to support 10// instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14//===----------------------------------------------------------------------===// 15// *** Common description and scheduling model parameters taken from AArch64 *** 16// The Cortex-A57 is a traditional superscalar microprocessor with a 17// conservative 3-wide in-order stage for decode and dispatch. Combined with the 18// much wider out-of-order issue stage, this produced a need to carefully 19// schedule micro-ops so that all three decoded each cycle are successfully 20// issued as the reservation station(s) simply don't stay occupied for long. 21// Therefore, IssueWidth is set to the narrower of the two at three, while still 22// modeling the machine as out-of-order. 23 24def IsCPSRDefinedAndPredicated : CheckAll<[IsCPSRDefined, IsPredicated]>; 25def IsCPSRDefinedAndPredicatedPred : 26 MCSchedPredicate<IsCPSRDefinedAndPredicated>; 27 28// Cortex A57 rev. r1p0 or later (false = r0px) 29def IsR1P0AndLaterPred : MCSchedPredicate<FalsePred>; 30 31def IsLdrAm3RegOffPred : MCSchedPredicate<CheckInvalidRegOperand<2>>; 32def IsLdrAm3RegOffPredX2 : MCSchedPredicate<CheckInvalidRegOperand<3>>; 33def IsLdrAm3RegOffPredX3 : MCSchedPredicate<CheckInvalidRegOperand<4>>; 34 35// If Addrmode3 contains "minus register" 36class Am3NegativeRegOffset<int n> : MCSchedPredicate<CheckAll<[ 37 CheckValidRegOperand<n>, 38 CheckAM3OpSub<!add(n, 1)>]>>; 39 40def IsLdrAm3NegRegOffPred : Am3NegativeRegOffset<2>; 41def IsLdrAm3NegRegOffPredX2 : Am3NegativeRegOffset<3>; 42def IsLdrAm3NegRegOffPredX3 : Am3NegativeRegOffset<4>; 43 44// Load, scaled register offset, not plus LSL2 45class ScaledRegNotPlusLsl2<int n> : CheckNot< 46 CheckAny<[ 47 CheckAM2NoShift<n>, 48 CheckAll<[ 49 CheckAM2OpAdd<n>, 50 CheckAM2ShiftLSL<n>, 51 CheckAM2Offset<n, 2> 52 ]> 53 ]> 54 >; 55 56def IsLdstsoScaledNotOptimalPredX0 : MCSchedPredicate<ScaledRegNotPlusLsl2<2>>; 57def IsLdstsoScaledNotOptimalPred : MCSchedPredicate<ScaledRegNotPlusLsl2<3>>; 58def IsLdstsoScaledNotOptimalPredX2 : MCSchedPredicate<ScaledRegNotPlusLsl2<4>>; 59 60def IsLdstsoScaledPredX2 : MCSchedPredicate<CheckNot<CheckAM2NoShift<4>>>; 61 62def IsLdstsoMinusRegPredX0 : MCSchedPredicate<CheckAM2OpSub<2>>; 63def IsLdstsoMinusRegPred : MCSchedPredicate<CheckAM2OpSub<3>>; 64def IsLdstsoMinusRegPredX2 : MCSchedPredicate<CheckAM2OpSub<4>>; 65 66class A57WriteLMOpsListType<list<SchedWriteRes> writes> { 67 list <SchedWriteRes> Writes = writes; 68 SchedMachineModel SchedModel = ?; 69} 70 71// *** Common description and scheduling model parameters taken from AArch64 *** 72// (AArch64SchedA57.td) 73def CortexA57Model : SchedMachineModel { 74 let IssueWidth = 3; // 3-way decode and dispatch 75 let MicroOpBufferSize = 128; // 128 micro-op re-order buffer 76 let LoadLatency = 4; // Optimistic load latency 77 let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch 78 79 // Enable partial & runtime unrolling. 80 let LoopMicroOpBufferSize = 16; 81 let CompleteModel = 1; 82 83 // FIXME: Remove when all errors have been fixed. 84 let FullInstRWOverlapCheck = 0; 85 86 let UnsupportedFeatures = [HasV8_1MMainline, HasMVEInt, HasMVEFloat, IsMClass, 87 HasFPRegsV8_1M, HasFP16FML, HasMatMulInt8, HasBF16]; 88} 89 90//===----------------------------------------------------------------------===// 91// Define each kind of processor resource and number available on Cortex-A57. 92// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where 93// micro-ops wait for their operands and then issue out-of-order. 94 95def A57UnitB : ProcResource<1>; // Type B micro-ops 96def A57UnitI : ProcResource<2>; // Type I micro-ops 97def A57UnitM : ProcResource<1>; // Type M micro-ops 98def A57UnitL : ProcResource<1>; // Type L micro-ops 99def A57UnitS : ProcResource<1>; // Type S micro-ops 100 101def A57UnitX : ProcResource<1>; // Type X micro-ops (F1) 102def A57UnitW : ProcResource<1>; // Type W micro-ops (F0) 103 104let SchedModel = CortexA57Model in { 105 def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops 106} 107 108let SchedModel = CortexA57Model in { 109 110//===----------------------------------------------------------------------===// 111// Define customized scheduler read/write types specific to the Cortex-A57. 112 113include "ARMScheduleA57WriteRes.td" 114 115// To have "CompleteModel = 1", support of pseudos and special instructions 116def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$", 117 "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$", 118 "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$", 119 "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$", 120 "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE", 121 "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "(t2|t)?UDF$", "t2DCPS", "t2SG", 122 "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "t__brkdiv0")>; 123 124def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>; 125 126// Specific memory instrs 127def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC", 128 "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>; 129 130// coprocessor moves 131def : InstRW<[WriteNoop, WriteNoop], (instregex 132 "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$", 133 "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$", 134 "(t2)?MSR(banked|i|_AR|_M)?$")>; 135 136// Deprecated instructions 137def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>; 138 139// Pseudos 140def : InstRW<[WriteNoop], (instregex "(t2)?ABS$", 141 "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj", 142 "tLDRpci_pic", "(t2)?SUBS_PC_LR", 143 "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp", 144 "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 145 "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 146 "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm", 147 "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm", 148 "WIN__CHKSTK", "WIN__DBZCHK")>; 149 150// Miscellaneous 151// ----------------------------------------------------------------------------- 152 153def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>; 154 155// --- 3.2 Branch Instructions --- 156// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ 157 158def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$", 159 "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>; 160def : InstRW<[A57Write_1cyc_1B_1I], 161 (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>; 162def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>; 163// Pseudos 164def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>; 165def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr", 166 "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>; 167def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>; 168 169// --- 3.3 Arithmetic and Logical Instructions --- 170// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S}, 171// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST 172 173def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>; 174 175// Check branch forms of ALU ops: 176// check reg 0 for ARM_AM::PC 177// if so adds 2 cyc to latency, 1 uop, 1 res cycle for A57UnitB 178class A57BranchForm<SchedWriteRes non_br> : 179 BranchWriteRes<2, 1, [A57UnitB], [1], non_br>; 180 181// shift by register, conditional or unconditional 182// TODO: according to the doc, conditional uses I0/I1, unconditional uses M 183// Why more complex instruction uses more simple pipeline? 184// May be an error in doc. 185def A57WriteALUsr : SchedWriteVariant<[ 186 SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>, 187 SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]> 188]>; 189def A57WriteALUSsr : SchedWriteVariant<[ 190 SchedVar<IsPredicatedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1I>>]>, 191 SchedVar<NoSchedPred, [CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>]> 192]>; 193def A57ReadALUsr : SchedReadVariant<[ 194 SchedVar<IsPredicatedPred, [ReadDefault]>, 195 SchedVar<NoSchedPred, [ReadDefault]> 196]>; 197def : SchedAlias<WriteALUsi, CheckBranchForm<0, A57BranchForm<A57Write_2cyc_1M>>>; 198def : SchedAlias<WriteALUsr, A57WriteALUsr>; 199def : SchedAlias<WriteALUSsr, A57WriteALUSsr>; 200def : SchedAlias<ReadALUsr, A57ReadALUsr>; 201 202def A57WriteCMPsr : SchedWriteVariant<[ 203 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 204 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 205]>; 206def : SchedAlias<WriteCMP, A57Write_1cyc_1I>; 207def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>; 208def : SchedAlias<WriteCMPsr, A57WriteCMPsr>; 209 210// --- 3.4 Move and Shift Instructions --- 211// Move, basic 212// MOV{S}, MOVW, MVN{S} 213def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)", 214 "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL", 215 "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>; 216 217// Move, shift by immed, setflags/no setflags 218// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN 219// setflags = isCPSRDefined 220def A57WriteMOVsi : SchedWriteVariant<[ 221 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, 222 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 223]>; 224def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi", 225 "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi", 226 "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>; 227 228// shift by register, conditional or unconditional, setflags/no setflags 229def A57WriteMOVsr : SchedWriteVariant<[ 230 SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>, 231 SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>, 232 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 233 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 234]>; 235def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs", 236 "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr", 237 "(t2|t)RORrr")>; 238 239// Move, top 240// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later 241def A57WriteMOVT : SchedWriteVariant<[ 242 SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>, 243 SchedVar<NoSchedPred, [A57Write_2cyc_1M]> 244]>; 245def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>; 246 247def A57WriteI2pc : 248 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>; 249def A57WriteI2ld : 250 WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>; 251def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>; 252def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>; 253 254// +2cyc for branch forms 255def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>; 256 257// --- 3.5 Divide and Multiply Instructions --- 258// Divide: SDIV, UDIV 259// latency from documentration: 4 ‐ 20, maximum taken 260def : SchedAlias<WriteDIV, A57Write_20cyc_1M>; 261// Multiply: tMul not bound to common WriteRes types 262def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>; 263def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>; 264def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>; 265def : ReadAdvance<ReadMUL, 0>; 266 267// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB, 268// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R} 269// Multiply-accumulate pipelines support late-forwarding of accumulate operands 270// from similar μops, allowing a typical sequence of multiply-accumulate μops 271// to issue one every 1 cycle (sched advance = 2). 272def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; } 273def A57WriteMLAL : SchedWriteVariant<[ 274 SchedVar<IsCPSRDefinedPred, [A57Write_5cyc_1I_1M]>, 275 SchedVar<NoSchedPred, [A57Write_4cyc_1M]> 276]>; 277 278def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>; 279 280def : InstRW<[A57WriteMLA], 281 (instregex "t2SMLAD", "t2SMLADX", "t2SMLSD", "t2SMLSDX")>; 282 283def : SchedAlias<WriteMAC16, A57WriteMLA>; 284def : SchedAlias<WriteMAC32, A57WriteMLA>; 285def : SchedAlias<ReadMAC, A57ReadMLA>; 286 287def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>; 288def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>; 289 290// Multiply long: SMULL, UMULL 291def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>; 292def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>; 293 294// --- 3.6 Saturating and Parallel Arithmetic Instructions --- 295// Parallel arith 296// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8 297// Conditional GE-setting instructions require three extra μops 298// and two additional cycles to conditionally update the GE field. 299def A57WriteParArith : SchedWriteVariant<[ 300 SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>, 301 SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]> 302]>; 303def : InstRW< [A57WriteParArith], (instregex 304 "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)", 305 "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>; 306 307// Parallel arith with exchange: SASX, SSAX, UASX, USAX 308def A57WriteParArithExch : SchedWriteVariant<[ 309 SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>, 310 SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]> 311]>; 312def : InstRW<[A57WriteParArithExch], 313 (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>; 314 315// Parallel halving arith 316// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8 317def : InstRW<[A57Write_2cyc_1M], (instregex 318 "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)", 319 "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>; 320 321// Parallel halving arith with exchange 322// SHASX, SHSAX, UHASX, UHSAX 323def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX", 324 "(t2)?UHASX", "(t2)?UHSAX")>; 325 326// Parallel saturating arith 327// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8 328def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)", 329 "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>; 330 331// Parallel saturating arith with exchange 332// QASX, QSAX, UQASX, UQSAX 333def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX", 334 "(t2)?UQASX", "(t2)?UQSAX")>; 335 336// Saturate: SSAT, SSAT16, USAT, USAT16 337def : InstRW<[A57Write_2cyc_1M], 338 (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>; 339 340// Saturating arith: QADD, QSUB 341def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>; 342 343// Saturating doubling arith: QDADD, QDSUB 344def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>; 345 346// --- 3.7 Miscellaneous Data-Processing Instructions --- 347// Bit field extract: SBFX, UBFX 348def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>; 349 350// Bit field insert/clear: BFI, BFC 351def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>; 352 353// Select bytes, conditional/unconditional 354def A57WriteSEL : SchedWriteVariant<[ 355 SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>, 356 SchedVar<NoSchedPred, [A57Write_1cyc_1I]> 357]>; 358def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>; 359 360// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH 361def : InstRW<[A57Write_1cyc_1I], 362 (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>; 363 364// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH 365def : InstRW<[A57Write_2cyc_1M], 366 (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>; 367 368// Sign/zero extend and add, parallel: SXTAB16, UXTAB16 369def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>; 370 371// Sum of absolute differences: USAD8, USADA8 372def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>; 373 374// --- 3.8 Load Instructions --- 375 376// Load, immed offset 377// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate 378def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12", 379 "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)", 380 "PICLDR", "tLDR")>; 381 382def : InstRW<[A57Write_4cyc_1L], 383 (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>; 384 385// For "Load, register offset, minus" we need +1cyc, +1I 386def A57WriteLdrAm3 : SchedWriteVariant<[ 387 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>, 388 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 389]>; 390def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>; 391def A57WriteLdrAm3X2 : SchedWriteVariant<[ 392 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>, 393 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 394]>; 395def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>; 396def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>; 397 398def A57WriteLdrAmLDSTSO : SchedWriteVariant<[ 399 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>, 400 SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>, 401 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 402]>; 403def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>; 404 405def A57WrBackOne : SchedWriteRes<[]> { 406 let Latency = 1; 407 let NumMicroOps = 0; 408} 409def A57WrBackTwo : SchedWriteRes<[]> { 410 let Latency = 2; 411 let NumMicroOps = 0; 412} 413def A57WrBackThree : SchedWriteRes<[]> { 414 let Latency = 3; 415 let NumMicroOps = 0; 416} 417 418// --- LDR pre-indexed --- 419// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update) 420def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM", 421 "LDRB_PRE_IMM", "t2LDRB_PRE")>; 422 423// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update) 424// (5 cyc load result for not-lsl2 scaled) 425def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[ 426 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>, 427 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 428]>; 429def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo], 430 (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>; 431 432def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[ 433 SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>, 434 SchedVar<NoSchedPred, [A57WrBackOne]> 435]>; 436def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack], 437 (instregex "LDR(H|SH|SB)_PRE")>; 438def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], 439 (instregex "t2LDR(H|SH|SB)?_PRE")>; 440 441// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm. 442def A57WriteLdrDAm3Pre : SchedWriteVariant<[ 443 SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>, 444 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 445]>; 446def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[ 447 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, 448 SchedVar<NoSchedPred, [A57WrBackOne]> 449]>; 450def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack], 451 (instregex "LDRD_PRE")>; 452def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], 453 (instregex "t2LDRD_PRE")>; 454 455// --- LDR post-indexed --- 456def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM", 457 "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>; 458 459def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[ 460 SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>, 461 SchedVar<NoSchedPred, [A57WrBackOne]> 462]>; 463def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack], 464 (instregex "LDR(H|SH|SB)_POST")>; 465def : InstRW<[A57Write_4cyc_1L, A57WrBackOne], 466 (instregex "t2LDR(H|SH|SB)?_POST")>; 467 468def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG", 469 "LDRB_POST_REG", "LDR(B?)T_POST$")>; 470 471def A57WriteLdrTRegPost : SchedWriteVariant<[ 472 SchedVar<IsLdstsoScaledPredX2, [A57Write_4cyc_1I_1L_1M]>, 473 SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]> 474]>; 475def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[ 476 SchedVar<IsLdstsoScaledPredX2, [A57WrBackThree]>, 477 SchedVar<NoSchedPred, [A57WrBackTwo]> 478]>; 479// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L" 480def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack], 481 (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>; 482 483def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>; 484 485def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[ 486 SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>, 487 SchedVar<NoSchedPred, [A57WrBackOne]> 488]>; 489// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm. 490def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 491 A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>; 492def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne], 493 (instregex "t2LDRD_POST")>; 494 495// --- Preload instructions --- 496// Preload, immed offset 497def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12", 498 "t2PLDW?(i8|pci|s)", "(t2)?PLI")>; 499 500// Preload, register offset, 501// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2 502// otherwise 4cyc "L" 503def A57WritePLD : SchedWriteVariant<[ 504 SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>, 505 SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>, 506 SchedVar<NoSchedPred, [A57Write_4cyc_1L]> 507]>; 508def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>; 509 510// --- Load multiple instructions --- 511foreach NumAddr = 1-8 in { 512 def A57LMAddrPred#NumAddr : MCSchedPredicate<CheckAny<[ 513 CheckNumOperands<!add(!shl(NumAddr, 1), 2)>, 514 CheckNumOperands<!add(!shl(NumAddr, 1), 3)>]>>; 515 def A57LMAddrUpdPred#NumAddr : MCSchedPredicate<CheckAny<[ 516 CheckNumOperands<!add(!shl(NumAddr, 1), 3)>, 517 CheckNumOperands<!add(!shl(NumAddr, 1), 4)>]>>; 518} 519 520def A57LDMOpsListNoregin : A57WriteLMOpsListType< 521 [A57Write_3cyc_1L, A57Write_3cyc_1L, 522 A57Write_4cyc_1L, A57Write_4cyc_1L, 523 A57Write_5cyc_1L, A57Write_5cyc_1L, 524 A57Write_6cyc_1L, A57Write_6cyc_1L, 525 A57Write_7cyc_1L, A57Write_7cyc_1L, 526 A57Write_8cyc_1L, A57Write_8cyc_1L, 527 A57Write_9cyc_1L, A57Write_9cyc_1L, 528 A57Write_10cyc_1L, A57Write_10cyc_1L]>; 529def A57WriteLDMnoreginlist : SchedWriteVariant<[ 530 SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>, 531 SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>, 532 SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>, 533 SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>, 534 SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>, 535 SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>, 536 SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>, 537 SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>, 538 SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]> 539]> { let Variadic=1; } 540 541def A57LDMOpsListRegin : A57WriteLMOpsListType< 542 [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 543 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 544 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 545 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 546 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 547 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 548 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, 549 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>; 550def A57WriteLDMreginlist : SchedWriteVariant<[ 551 SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>, 552 SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>, 553 SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>, 554 SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>, 555 SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>, 556 SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>, 557 SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>, 558 SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>, 559 SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]> 560]> { let Variadic=1; } 561 562def A57LDMOpsList_Upd : A57WriteLMOpsListType< 563 [A57WrBackOne, 564 A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I, 565 A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, 566 A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 567 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 568 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 569 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 570 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 571 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>; 572def A57WriteLDM_Upd : SchedWriteVariant<[ 573 SchedVar<A57LMAddrUpdPred1, A57LDMOpsList_Upd.Writes[0-2]>, 574 SchedVar<A57LMAddrUpdPred2, A57LDMOpsList_Upd.Writes[0-4]>, 575 SchedVar<A57LMAddrUpdPred3, A57LDMOpsList_Upd.Writes[0-6]>, 576 SchedVar<A57LMAddrUpdPred4, A57LDMOpsList_Upd.Writes[0-8]>, 577 SchedVar<A57LMAddrUpdPred5, A57LDMOpsList_Upd.Writes[0-10]>, 578 SchedVar<A57LMAddrUpdPred6, A57LDMOpsList_Upd.Writes[0-12]>, 579 SchedVar<A57LMAddrUpdPred7, A57LDMOpsList_Upd.Writes[0-14]>, 580 SchedVar<A57LMAddrUpdPred8, A57LDMOpsList_Upd.Writes[0-16]>, 581 SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]> 582]> { let Variadic=1; } 583 584def A57WriteLDM : SchedWriteVariant<[ 585 SchedVar<IsLDMBaseRegInListPred, [A57WriteLDMreginlist]>, 586 SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]> 587]> { let Variadic=1; } 588 589def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>; 590 591// TODO: no writeback latency defined in documentation (implemented as 1 cyc) 592def : InstRW<[A57WriteLDM_Upd], 593 (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>; 594 595def : InstRW<[A57Write_5cyc_1L], (instregex "VLLDM")>; 596 597// --- 3.9 Store Instructions --- 598 599// Store, immed offset 600def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR", 601 "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>; 602 603// Store, register offset 604// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S", 605// otherwise 1cyc S. 606def A57WriteStrAmLDSTSO : SchedWriteVariant<[ 607 SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>, 608 SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>, 609 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 610]>; 611def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>; 612 613// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg. 614def A57WriteStrAm3 : SchedWriteVariant<[ 615 SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>, 616 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 617]>; 618def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>; 619def A57WriteStrAm3X2 : SchedWriteVariant<[ 620 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, 621 SchedVar<NoSchedPred, [A57Write_1cyc_1S]> 622]>; 623def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>; 624 625// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback) 626def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM", 627 "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)", 628 "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>; 629 630// Store, register pre-indexed: 631// 1(1) "S, I0/I1" for plus reg 632// 3(2) "I0/I1, S" for minus reg 633// 1(2) "S, M" for scaled plus lsl2 634// 3(2) "I0/I1, S" for other scaled 635def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[ 636 SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>, 637 SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>, 638 SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>, 639 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 640]>; 641def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[ 642 SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>, 643 SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>, 644 SchedVar<NoSchedPred, [A57WrBackOne]> 645]>; 646def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre], 647 (instregex "STR_PRE_REG", "STRB_PRE_REG")>; 648 649// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE) 650// 1(1) "S, I0/I1" for imm or reg plus 651// 3(2) "I0/I1, S" for reg minus 652def A57WriteStrAm3PreX2 : SchedWriteVariant<[ 653 SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>, 654 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 655]>; 656def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[ 657 SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>, 658 SchedVar<NoSchedPred, [A57WrBackOne]> 659]>; 660def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2], 661 (instregex "STRH_PRE")>; 662 663def A57WriteStrAm3PreX3 : SchedWriteVariant<[ 664 SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>, 665 SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]> 666]>; 667def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[ 668 SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>, 669 SchedVar<NoSchedPred, [A57WrBackOne]> 670]>; 671def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3], 672 (instregex "STRD_PRE")>; 673 674def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM", 675 "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>; 676 677// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not) 678def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG", 679 "STRB(T?)_POST_REG", "STR(B?)T_POST$")>; 680 681// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr 682// 1(1) "S, I0/I1" both for reg or imm 683def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], 684 (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>; 685 686// --- Store multiple instructions --- 687// TODO: no writeback latency defined in documentation 688def A57WriteSTM : SchedWriteVariant<[ 689 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, 690 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, 691 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, 692 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, 693 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, 694 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, 695 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, 696 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, 697 SchedVar<NoSchedPred, [A57Write_2cyc_1S]> 698]>; 699def A57WriteSTM_Upd : SchedWriteVariant<[ 700 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, 701 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, 702 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, 703 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, 704 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, 705 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, 706 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, 707 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, 708 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 709]>; 710 711def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>; 712def : InstRW<[A57WrBackOne, A57WriteSTM_Upd], 713 (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>; 714 715def : InstRW<[A57Write_5cyc_1S], (instregex "VLSTM")>; 716 717// --- 3.10 FP Data Processing Instructions --- 718def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>; 719def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>; 720 721def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>; 722 723// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional 724def A57WriteVcmp : SchedWriteVariant<[ 725 SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>, 726 SchedVar<NoSchedPred, [A57Write_3cyc_1X]> 727]>; 728def : InstRW<[A57WriteVcmp], 729 (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>; 730 731// fp convert 732def : InstRW<[A57Write_5cyc_1V], (instregex 733 "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>; 734def : InstRW<[A57Write_5cyc_1V], (instregex "VTOSLS", "VTOUHS", "VTOULS")>; 735def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>; 736 737def : InstRW<[A57Write_5cyc_1V], (instregex "VJCVT")>; 738 739// FP round to integral 740def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>; 741 742// FP divide, FP square root 743def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>; 744def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>; 745def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>; 746def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>; 747 748def : InstRW<[A57Write_17cyc_1W], (instregex "VSQRTH")>; 749 750// FP max/min 751def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>; 752 753// FP multiply-accumulate pipelines support late forwarding of the result 754// from FP multiply μops to the accumulate operands of an 755// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle 756// after the FP multiply μop has been issued 757// FP multiply, FZ 758def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 759 760def : SchedAlias<WriteFPMUL32, A57WriteVMUL>; 761def : SchedAlias<WriteFPMUL64, A57WriteVMUL>; 762def : ReadAdvance<ReadFPMUL, 0>; 763 764// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate 765// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS 766def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; } 767 768// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.) 769// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.) 770// Currently, there is no way to define different read advances for VFMA operand 771// from VFMA or from VMUL, so there will be 5 read advance. 772// Zero latency (instead of one) for VMUL->VFMA shouldn't break something. 773// The same situation with ASIMD VMUL/VFMA instructions 774// def A57ReadVFMA : SchedRead; 775// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>; 776// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>; 777def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>; 778 779def : SchedAlias<WriteFPMAC32, A57WriteVFMA>; 780def : SchedAlias<WriteFPMAC64, A57WriteVFMA>; 781def : SchedAlias<ReadFPMAC, A57ReadVFMA5>; 782 783// VMLAH/VMLSH are not binded to scheduling classes by default, so here custom: 784def : InstRW<[A57WriteVFMA, A57ReadVFMA5, ReadFPMUL, ReadFPMUL], 785 (instregex "VMLAH", "VMLSH", "VNMLAH", "VNMLSH")>; 786 787def : InstRW<[A57WriteVMUL], 788 (instregex "VUDOTD", "VSDOTD", "VUDOTQ", "VSDOTQ")>; 789 790def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>; 791def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>; 792 793// --- 3.11 FP Miscellaneous Instructions --- 794// VMOV: 3cyc "F0/F1" for imm/reg 795def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>; 796def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>; 797 798def : InstRW<[A57Write_3cyc_1V], (instregex "VINSH")>; 799 800// 5cyc L for FP transfer, vfp to core reg, 801// 5cyc L for FP transfer, core reg to vfp 802def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>; 803// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2). 804def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>; 805 806// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg 807def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>; 808 809// --- 3.12 FP Load Instructions --- 810def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>; 811 812def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>; 813 814// FP load multiple (VLDM) 815 816def A57VLDMOpsListUncond : A57WriteLMOpsListType< 817 [A57Write_5cyc_1L, A57Write_5cyc_1L, 818 A57Write_6cyc_1L, A57Write_6cyc_1L, 819 A57Write_7cyc_1L, A57Write_7cyc_1L, 820 A57Write_8cyc_1L, A57Write_8cyc_1L, 821 A57Write_9cyc_1L, A57Write_9cyc_1L, 822 A57Write_10cyc_1L, A57Write_10cyc_1L, 823 A57Write_11cyc_1L, A57Write_11cyc_1L, 824 A57Write_12cyc_1L, A57Write_12cyc_1L]>; 825def A57WriteVLDMuncond : SchedWriteVariant<[ 826 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>, 827 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>, 828 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>, 829 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>, 830 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>, 831 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>, 832 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>, 833 SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]> 834]> { let Variadic=1; } 835 836def A57VLDMOpsListCond : A57WriteLMOpsListType< 837 [A57Write_5cyc_1L, A57Write_6cyc_1L, 838 A57Write_7cyc_1L, A57Write_8cyc_1L, 839 A57Write_9cyc_1L, A57Write_10cyc_1L, 840 A57Write_11cyc_1L, A57Write_12cyc_1L, 841 A57Write_13cyc_1L, A57Write_14cyc_1L, 842 A57Write_15cyc_1L, A57Write_16cyc_1L, 843 A57Write_17cyc_1L, A57Write_18cyc_1L, 844 A57Write_19cyc_1L, A57Write_20cyc_1L]>; 845def A57WriteVLDMcond : SchedWriteVariant<[ 846 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>, 847 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>, 848 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>, 849 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>, 850 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>, 851 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>, 852 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>, 853 SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]> 854]> { let Variadic=1; } 855 856def A57WriteVLDM : SchedWriteVariant<[ 857 SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>, 858 SchedVar<NoSchedPred, [A57WriteVLDMuncond]> 859]> { let Variadic=1; } 860 861def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>; 862 863def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType< 864 [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I, 865 A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I, 866 A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I, 867 A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I, 868 A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I, 869 A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I, 870 A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I, 871 A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>; 872def A57WriteVLDMuncond_UPD : SchedWriteVariant<[ 873 SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>, 874 SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>, 875 SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>, 876 SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>, 877 SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>, 878 SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>, 879 SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>, 880 SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]> 881]> { let Variadic=1; } 882 883def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType< 884 [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I, 885 A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I, 886 A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I, 887 A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I, 888 A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I, 889 A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I, 890 A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I, 891 A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>; 892def A57WriteVLDMcond_UPD : SchedWriteVariant<[ 893 SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>, 894 SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>, 895 SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>, 896 SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>, 897 SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>, 898 SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>, 899 SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>, 900 SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]> 901]> { let Variadic=1; } 902 903def A57WriteVLDM_UPD : SchedWriteVariant<[ 904 SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>, 905 SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]> 906]> { let Variadic=1; } 907 908def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD], 909 (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>; 910 911// --- 3.13 FP Store Instructions --- 912def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>; 913 914def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>; 915 916def A57WriteVSTMs : SchedWriteVariant<[ 917 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>, 918 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>, 919 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>, 920 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>, 921 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>, 922 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>, 923 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>, 924 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>, 925 SchedVar<NoSchedPred, [A57Write_2cyc_1S]> 926]>; 927def A57WriteVSTMd : SchedWriteVariant<[ 928 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>, 929 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>, 930 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>, 931 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>, 932 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>, 933 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>, 934 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>, 935 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>, 936 SchedVar<NoSchedPred, [A57Write_4cyc_1S]> 937]>; 938def A57WriteVSTMs_Upd : SchedWriteVariant<[ 939 SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>, 940 SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>, 941 SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>, 942 SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>, 943 SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>, 944 SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>, 945 SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>, 946 SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>, 947 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 948]>; 949def A57WriteVSTMd_Upd : SchedWriteVariant<[ 950 SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>, 951 SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>, 952 SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>, 953 SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>, 954 SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>, 955 SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>, 956 SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>, 957 SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>, 958 SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]> 959]>; 960 961def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>; 962def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>; 963def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd], 964 (instregex "VSTM(SIA_UPD|SDB_UPD)")>; 965def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd], 966 (instregex "VSTM(DIA_UPD|DDB_UPD)")>; 967 968// --- 3.14 ASIMD Integer Instructions --- 969 970// ASIMD absolute diff, 3cyc F0/F1 for integer VABD 971def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>; 972 973// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form 974def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 975def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>; 976def : InstRW<[A57WriteVABAD, A57ReadVABAD], 977 (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>; 978def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; } 979def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>; 980def : InstRW<[A57WriteVABAQ, A57ReadVABAQ], 981 (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>; 982 983// ASIMD absolute diff accum long: 4(1) F1 for VABAL 984def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 985def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>; 986def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>; 987 988// ASIMD absolute diff long: 3cyc F0/F1 for VABDL 989def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>; 990 991// ASIMD arith, basic 992def : InstRW<[A57Write_3cyc_1V], (instregex "VADDv", "VADDL", "VADDW", 993 "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)", 994 "VPADDi", "VPADDL", "VSUBv", "VSUBL", "VSUBW")>; 995 996// ASIMD arith, complex 997def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB", 998 "VQABS", "VQADD", "VQNEG", "VQSUB", 999 "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>; 1000 1001// ASIMD compare 1002def : InstRW<[A57Write_3cyc_1V], 1003 (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>; 1004 1005// ASIMD logical 1006def : InstRW<[A57Write_3cyc_1V], 1007 (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>; 1008 1009// ASIMD max/min 1010def : InstRW<[A57Write_3cyc_1V], 1011 (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>; 1012 1013// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1014// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply 1015// and multiply-with-accumulate instructions relative to r0pX. 1016def A57WriteVMULD_VecInt : SchedWriteVariant<[ 1017 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1018 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1019def : InstRW<[A57WriteVMULD_VecInt], (instregex 1020 "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)", 1021 "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>; 1022 1023// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later 1024def A57WriteVMULQ_VecInt : SchedWriteVariant<[ 1025 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, 1026 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; 1027def : InstRW<[A57WriteVMULQ_VecInt], (instregex 1028 "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)", 1029 "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>; 1030 1031// ASIMD multiply accumulate, D-form 1032// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence 1033// (4 or 3 ReadAdvance) 1034def A57WriteVMLAD_VecInt : SchedWriteVariant<[ 1035 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1036 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1037def A57ReadVMLAD_VecInt : SchedReadVariant<[ 1038 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>, 1039 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]> 1040]>; 1041def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt], 1042 (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>; 1043 1044// ASIMD multiply accumulate, Q-form 1045// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence 1046// (4 or 3 ReadAdvance) 1047def A57WriteVMLAQ_VecInt : SchedWriteVariant<[ 1048 SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>, 1049 SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>; 1050def A57ReadVMLAQ_VecInt : SchedReadVariant<[ 1051 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>, 1052 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]> 1053]>; 1054def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt], 1055 (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>; 1056 1057// ASIMD multiply accumulate long 1058// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence 1059// (4 or 3 ReadAdvance) 1060def A57WriteVMLAL_VecInt : SchedWriteVariant<[ 1061 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1062 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1063def A57ReadVMLAL_VecInt : SchedReadVariant<[ 1064 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>, 1065 SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]> 1066]>; 1067def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt], 1068 (instregex "VMLAL(s|u)", "VMLSL(s|u)")>; 1069 1070// ASIMD multiply accumulate saturating long 1071// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence 1072// (3 or 2 ReadAdvance) 1073def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[ 1074 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1075 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1076def A57ReadVQDMLAL_VecInt : SchedReadVariant<[ 1077 SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>, 1078 SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]> 1079]>; 1080def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1081 (instregex "VQDMLAL", "VQDMLSL")>; 1082 1083// Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long 1084// Scheduling info from VQDMLAL/VQDMLSL 1085def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt], 1086 (instregex "VQRDMLAH", "VQRDMLSH")>; 1087 1088// ASIMD multiply long 1089// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later 1090def A57WriteVMULL_VecInt : SchedWriteVariant<[ 1091 SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>, 1092 SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>; 1093def : InstRW<[A57WriteVMULL_VecInt], 1094 (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>; 1095 1096// ASIMD pairwise add and accumulate 1097// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) 1098def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 1099def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>; 1100def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>; 1101 1102// ASIMD shift accumulate 1103// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance) 1104def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; } 1105def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>; 1106def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>; 1107 1108// ASIMD shift by immed, basic 1109def : InstRW<[A57Write_3cyc_1X], 1110 (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>; 1111 1112// ASIMD shift by immed, complex 1113def : InstRW<[A57Write_4cyc_1X], (instregex 1114 "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)", 1115 "VRSHRN")>; 1116 1117// ASIMD shift by immed and insert, basic, D-form 1118def : InstRW<[A57Write_4cyc_1X], (instregex 1119 "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>; 1120 1121// ASIMD shift by immed and insert, basic, Q-form 1122def : InstRW<[A57Write_5cyc_1X], (instregex 1123 "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>; 1124 1125// ASIMD shift by register, basic, D-form 1126def : InstRW<[A57Write_3cyc_1X], (instregex 1127 "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; 1128 1129// ASIMD shift by register, basic, Q-form 1130def : InstRW<[A57Write_4cyc_1X], (instregex 1131 "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; 1132 1133// ASIMD shift by register, complex, D-form 1134// VQRSHL, VQSHL, VRSHL 1135def : InstRW<[A57Write_4cyc_1X], (instregex 1136 "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", 1137 "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>; 1138 1139// ASIMD shift by register, complex, Q-form 1140def : InstRW<[A57Write_5cyc_1X], (instregex 1141 "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", 1142 "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>; 1143 1144// --- 3.15 ASIMD Floating-Point Instructions --- 1145// ASIMD FP absolute value 1146def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>; 1147 1148// ASIMD FP arith 1149def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)", 1150 "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>; 1151 1152def : InstRW<[A57Write_5cyc_1V], (instregex "VCADD", "VCMLA")>; 1153 1154// ASIMD FP compare 1155def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)", 1156 "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>; 1157 1158// ASIMD FP convert, integer 1159def : InstRW<[A57Write_5cyc_1V], (instregex 1160 "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)", 1161 "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)", 1162 "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>; 1163 1164// ASIMD FP convert, half-precision: 8cyc F0/F1 1165def : InstRW<[A57Write_8cyc_1V], (instregex 1166 "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)", 1167 "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)", 1168 "VCVT(f2h|h2f)")>; 1169 1170// ASIMD FP max/min 1171def : InstRW<[A57Write_5cyc_1V], (instregex 1172 "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "(NEON|VFP)_VMAXNM", 1173 "(NEON|VFP)_VMINNM")>; 1174 1175// ASIMD FP multiply 1176def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; } 1177def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>; 1178 1179// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence 1180def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; } 1181def A57ReadVMLA_VecFP : 1182 SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>; 1183def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP], 1184 (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>; 1185 1186// ASIMD FP negate 1187def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>; 1188 1189// ASIMD FP round to integral 1190def : InstRW<[A57Write_5cyc_1V], (instregex 1191 "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>; 1192 1193// --- 3.16 ASIMD Miscellaneous Instructions --- 1194 1195// ASIMD bitwise insert 1196def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>; 1197 1198// ASIMD count 1199def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>; 1200 1201// ASIMD duplicate, core reg: 8cyc "L, F0/F1" 1202def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>; 1203 1204// ASIMD duplicate, scalar: 3cyc "F0/F1" 1205def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>; 1206 1207// ASIMD extract 1208def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>; 1209 1210// ASIMD move, immed 1211def : InstRW<[A57Write_3cyc_1V], (instregex 1212 "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)", 1213 "VMOVD0", "VMOVQ0")>; 1214 1215// ASIMD move, narrowing 1216def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>; 1217 1218// ASIMD move, saturating 1219def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>; 1220 1221// ASIMD reciprocal estimate 1222def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>; 1223 1224// ASIMD reciprocal step, FZ 1225def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>; 1226 1227// ASIMD reverse, swap, table lookup (1-2 reg) 1228def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>; 1229 1230// ASIMD table lookup (3-4 reg) 1231def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>; 1232 1233// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1" 1234def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>; 1235 1236// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1" 1237def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>; 1238 1239// ASIMD transpose 1240def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>; 1241 1242// ASIMD unzip/zip, D-form 1243def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], 1244 (instregex "VUZPd", "VZIPd")>; 1245 1246// ASIMD unzip/zip, Q-form 1247def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V], 1248 (instregex "VUZPq", "VZIPq")>; 1249 1250// --- 3.17 ASIMD Load Instructions --- 1251 1252// Overriden via InstRW for this processor. 1253def : WriteRes<WriteVLD1, []>; 1254def : WriteRes<WriteVLD2, []>; 1255def : WriteRes<WriteVLD3, []>; 1256def : WriteRes<WriteVLD4, []>; 1257def : WriteRes<WriteVST1, []>; 1258def : WriteRes<WriteVST2, []>; 1259def : WriteRes<WriteVST3, []>; 1260def : WriteRes<WriteVST4, []>; 1261 1262// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency 1263def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>; 1264def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne], 1265 (instregex "VLD1(d|q)(8|16|32|64)wb")>; 1266 1267// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency 1268def : InstRW<[A57Write_6cyc_1L], 1269 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>; 1270 1271def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne], 1272 (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>; 1273 1274// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1" 1275def : InstRW<[A57Write_8cyc_1L_1V], (instregex 1276 "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>; 1277def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex 1278 "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>; 1279 1280// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1" 1281def : InstRW<[A57Write_8cyc_1L_1V], 1282 (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>; 1283def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1284 (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>; 1285 1286// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1" 1287def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>; 1288def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1289 (instregex "VLD2b(8|16|32)wb")>; 1290 1291// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1" 1292def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1293 (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$", 1294 "VLD2LN(d|q)(8|16|32)Pseudo$")>; 1295// 2 results + wb result 1296def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne], 1297 (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>; 1298// 1 result + wb result 1299def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1300 (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb", 1301 "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>; 1302 1303// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1" 1304// 3 results 1305def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], 1306 (instregex "VLD3(d|q)(8|16|32)$")>; 1307// 1 result 1308def : InstRW<[A57Write_9cyc_1L_1V], 1309 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>; 1310// 3 results + wb 1311def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1312 A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1313 (instregex "VLD3(d|q)(8|16|32)_UPD$")>; 1314// 1 result + wb 1315def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1316 (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1317 1318// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1" 1319def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1320 (instregex "VLD3LN(d|q)32$", 1321 "VLD3LN(d|q)32Pseudo$")>; 1322def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1323 A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1324 (instregex "VLD3LN(d|q)32_UPD")>; 1325def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1326 (instregex "VLD3LN(d|q)32Pseudo_UPD")>; 1327 1328// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1" 1329def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V], 1330 (instregex "VLD3LN(d|q)(8|16)$", 1331 "VLD3LN(d|q)(8|16)Pseudo$")>; 1332def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1333 A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1334 (instregex "VLD3LN(d|q)(8|16)_UPD")>; 1335def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1336 (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>; 1337 1338// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1" 1339def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V], 1340 (instregex "VLD3DUP(d|q)(8|16|32)$", 1341 "VLD3DUP(d|q)(8|16|32)Pseudo$")>; 1342def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1343 A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1344 (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>; 1345def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1346 (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>; 1347 1348// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1" 1349def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, 1350 A57Write_9cyc_1L_1V], 1351 (instregex "VLD4(d|q)(8|16|32)$")>; 1352def : InstRW<[A57Write_9cyc_1L_1V], 1353 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>; 1354def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1355 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1356 (instregex "VLD4(d|q)(8|16|32)_UPD")>; 1357def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1358 (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>; 1359 1360// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1" 1361def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, 1362 A57Write_8cyc_1L_1V], 1363 (instregex "VLD4LN(d|q)32$", 1364 "VLD4LN(d|q)32Pseudo$")>; 1365def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1366 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1367 A57WrBackOne], 1368 (instregex "VLD4LN(d|q)32_UPD")>; 1369def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1370 (instregex "VLD4LN(d|q)32Pseudo_UPD")>; 1371 1372// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1" 1373def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, 1374 A57Write_9cyc_1L_1V], 1375 (instregex "VLD4LN(d|q)(8|16)$", 1376 "VLD4LN(d|q)(8|16)Pseudo$")>; 1377def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1378 A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, 1379 A57WrBackOne], 1380 (instregex "VLD4LN(d|q)(8|16)_UPD")>; 1381def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne], 1382 (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>; 1383 1384// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1" 1385def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, 1386 A57Write_8cyc_1L_1V], 1387 (instregex "VLD4DUP(d|q)(8|16|32)$", 1388 "VLD4DUP(d|q)(8|16|32)Pseudo$")>; 1389def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1390 A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I, 1391 A57WrBackOne], 1392 (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>; 1393def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], 1394 (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>; 1395 1396// --- 3.18 ASIMD Store Instructions --- 1397 1398// ASIMD store, 1 element, multiple, 1 reg: 1cyc S 1399def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>; 1400def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], 1401 (instregex "VST1d(8|16|32|64)wb")>; 1402// ASIMD store, 1 element, multiple, 2 reg: 2cyc S 1403def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>; 1404def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I], 1405 (instregex "VST1q(8|16|32|64)wb")>; 1406// ASIMD store, 1 element, multiple, 3 reg: 3cyc S 1407def : InstRW<[A57Write_3cyc_1S], 1408 (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>; 1409def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I], 1410 (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>; 1411// ASIMD store, 1 element, multiple, 4 reg: 4cyc S 1412def : InstRW<[A57Write_4cyc_1S], 1413 (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>; 1414def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I], 1415 (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>; 1416// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S" 1417def : InstRW<[A57Write_3cyc_1S_1V], 1418 (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>; 1419def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1420 (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>; 1421// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S" 1422def : InstRW<[A57Write_3cyc_1S_1V], 1423 (instregex "VST2(d|b)(8|16|32)$")>; 1424def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1425 (instregex "VST2(b|d)(8|16|32)wb")>; 1426// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S" 1427def : InstRW<[A57Write_4cyc_1S_1V], 1428 (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>; 1429def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], 1430 (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>; 1431// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S" 1432def : InstRW<[A57Write_3cyc_1S_1V], 1433 (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>; 1434def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1435 (instregex "VST2LN(d|q)(8|16|32)_UPD", 1436 "VST2LN(d|q)(8|16|32)Pseudo_UPD")>; 1437// ASIMD store, 3 element, multiple, 3 reg 1438def : InstRW<[A57Write_3cyc_1S_1V], 1439 (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>; 1440def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1441 (instregex "VST3(d|q)(8|16|32)_UPD", 1442 "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 1443// ASIMD store, 3 element, one lane 1444def : InstRW<[A57Write_3cyc_1S_1V], 1445 (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>; 1446def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1447 (instregex "VST3LN(d|q)(8|16|32)_UPD", 1448 "VST3LN(d|q)(8|16|32)Pseudo_UPD")>; 1449// ASIMD store, 4 element, multiple, 4 reg 1450def : InstRW<[A57Write_4cyc_1S_1V], 1451 (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>; 1452def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I], 1453 (instregex "VST4(d|q)(8|16|32)_UPD", 1454 "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>; 1455// ASIMD store, 4 element, one lane 1456def : InstRW<[A57Write_3cyc_1S_1V], 1457 (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>; 1458def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I], 1459 (instregex "VST4LN(d|q)(8|16|32)_UPD", 1460 "VST4LN(d|q)(8|16|32)Pseudo_UPD")>; 1461 1462// --- 3.19 Cryptography Extensions --- 1463// Crypto AES ops 1464// AESD, AESE, AESIMC, AESMC: 3cyc F0 1465def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>; 1466// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0 1467def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>; 1468// Crypto SHA1 xor ops: 6cyc F0/F1 1469def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>; 1470// Crypto SHA1 fast ops: 3cyc F0 1471def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>; 1472// Crypto SHA1 slow ops: 6cyc F0 1473def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>; 1474// Crypto SHA256 fast ops: 3cyc F0 1475def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>; 1476// Crypto SHA256 slow ops: 6cyc F0 1477def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>; 1478 1479// --- 3.20 CRC --- 1480def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>; 1481 1482// ----------------------------------------------------------------------------- 1483// Common definitions 1484def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; } 1485def : SchedAlias<WriteALU, CheckBranchForm<0, A57BranchForm<A57Write_1cyc_1I>>>; 1486 1487def : SchedAlias<WriteBr, A57Write_1cyc_1B>; 1488def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>; 1489def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>; 1490def : SchedAlias<WritePreLd, A57Write_4cyc_1L>; 1491 1492def : SchedAlias<WriteLd, A57Write_4cyc_1L>; 1493def : SchedAlias<WriteST, A57Write_1cyc_1S>; 1494def : ReadAdvance<ReadALU, 0>; 1495 1496} // SchedModel = CortexA57Model 1497 1498