ARMScheduleA57.td - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA57.td

Lines Matching +full:pre +full:- +full:multiply
1 //=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for ARM Cortex-A57 to support
12 //===----------------------------------------------------------------------===//
14 //===----------------------------------------------------------------------===//
16 // The Cortex-A57 is a traditional superscalar microprocessor with a
17 // conservative 3-wide in-order stage for decode and dispatch. Combined with the
18 // much wider out-of-order issue stage, this produced a need to carefully
19 // schedule micro-ops so that all three decoded each cycle are successfully
22 // modeling the machine as out-of-order.
74   let IssueWidth        =   3; // 3-way decode and dispatch
75   let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
90 //===----------------------------------------------------------------------===//
91 // Define each kind of processor resource and number available on Cortex-A57.
92 // Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
93 // micro-ops wait for their operands and then issue out-of-order.
95 def A57UnitB : ProcResource<1>;  // Type B micro-ops
96 def A57UnitI : ProcResource<2>;  // Type I micro-ops
97 def A57UnitM : ProcResource<1>;  // Type M micro-ops
98 def A57UnitL : ProcResource<1>;  // Type L micro-ops
99 def A57UnitS : ProcResource<1>;  // Type S micro-ops
101 def A57UnitX : ProcResource<1>;  // Type X micro-ops (F1)
102 def A57UnitW : ProcResource<1>;  // Type W micro-ops (F0)
105   def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>;    // Type V micro-ops
110 //===----------------------------------------------------------------------===//
111 // Define customized scheduler read/write types specific to the Cortex-A57.
151 // -----------------------------------------------------------------------------
155 // --- 3.2 Branch Instructions ---
169 // --- 3.3 Arithmetic and Logical Instructions ---
210 // --- 3.4 Move and Shift Instructions ---
240 // MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
257 // --- 3.5 Divide and Multiply Instructions ---
261 // Multiply: tMul not bound to common WriteRes types
267 // Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
269 // Multiply-accumulate pipelines support late-forwarding of accumulate operands
270 // from similar μops, allowing a typical sequence of multiply-accumulate μops
290 // Multiply long: SMULL, UMULL
294 // --- 3.6 Saturating and Parallel Arithmetic Instructions ---
297 // Conditional GE-setting instructions require three extra μops
346 // --- 3.7 Miscellaneous Data-Processing Instructions ---
374 // --- 3.8 Load Instructions ---
418 // --- LDR pre-indexed ---
419 // Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
423 // Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
424 // (5 cyc load result for not-lsl2 scaled)
441 // LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
455 // --- LDR post-indexed ---
489 // LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
495 // --- Preload instructions ---
510 // --- Load multiple instructions ---
511 foreach NumAddr = 1-8 in {
530   SchedVar<A57LMAddrPred1,     A57LDMOpsListNoregin.Writes[0-1]>,
531   SchedVar<A57LMAddrPred2,     A57LDMOpsListNoregin.Writes[0-3]>,
532   SchedVar<A57LMAddrPred3,     A57LDMOpsListNoregin.Writes[0-5]>,
533   SchedVar<A57LMAddrPred4,     A57LDMOpsListNoregin.Writes[0-7]>,
534   SchedVar<A57LMAddrPred5,     A57LDMOpsListNoregin.Writes[0-9]>,
535   SchedVar<A57LMAddrPred6,     A57LDMOpsListNoregin.Writes[0-11]>,
536   SchedVar<A57LMAddrPred7,     A57LDMOpsListNoregin.Writes[0-13]>,
537   SchedVar<A57LMAddrPred8,     A57LDMOpsListNoregin.Writes[0-15]>,
538   SchedVar<NoSchedPred,        A57LDMOpsListNoregin.Writes[0-15]>
551   SchedVar<A57LMAddrPred1,     A57LDMOpsListRegin.Writes[0-1]>,
552   SchedVar<A57LMAddrPred2,     A57LDMOpsListRegin.Writes[0-3]>,
553   SchedVar<A57LMAddrPred3,     A57LDMOpsListRegin.Writes[0-5]>,
554   SchedVar<A57LMAddrPred4,     A57LDMOpsListRegin.Writes[0-7]>,
555   SchedVar<A57LMAddrPred5,     A57LDMOpsListRegin.Writes[0-9]>,
556   SchedVar<A57LMAddrPred6,     A57LDMOpsListRegin.Writes[0-11]>,
557   SchedVar<A57LMAddrPred7,     A57LDMOpsListRegin.Writes[0-13]>,
558   SchedVar<A57LMAddrPred8,     A57LDMOpsListRegin.Writes[0-15]>,
559   SchedVar<NoSchedPred,        A57LDMOpsListRegin.Writes[0-15]>
573   SchedVar<A57LMAddrUpdPred1,     A57LDMOpsList_Upd.Writes[0-2]>,
574   SchedVar<A57LMAddrUpdPred2,     A57LDMOpsList_Upd.Writes[0-4]>,
575   SchedVar<A57LMAddrUpdPred3,     A57LDMOpsList_Upd.Writes[0-6]>,
576   SchedVar<A57LMAddrUpdPred4,     A57LDMOpsList_Upd.Writes[0-8]>,
577   SchedVar<A57LMAddrUpdPred5,     A57LDMOpsList_Upd.Writes[0-10]>,
578   SchedVar<A57LMAddrUpdPred6,     A57LDMOpsList_Upd.Writes[0-12]>,
579   SchedVar<A57LMAddrUpdPred7,     A57LDMOpsList_Upd.Writes[0-14]>,
580   SchedVar<A57LMAddrUpdPred8,     A57LDMOpsList_Upd.Writes[0-16]>,
581   SchedVar<NoSchedPred,           A57LDMOpsList_Upd.Writes[0-16]>
597 // --- 3.9 Store Instructions ---
625 // Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
627   "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
628   "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
630 // Store, register pre-indexed:
649 // pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
677 // 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
681 // post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
686 // --- Store multiple instructions ---
717 // --- 3.10 FP Data Processing Instructions ---
723 // fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
753 // FP multiply-accumulate pipelines support late forwarding of the result
754 // from FP multiply μops to the accumulate operands of an
755 // FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
756 // after the FP multiply μop has been issued
757 // FP multiply, FZ
764 // FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
768 // VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
769 // VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
772 // Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
793 // --- 3.11 FP Miscellaneous Instructions ---
806 // 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
809 // --- 3.12 FP Load Instructions ---
826   SchedVar<A57LMAddrPred1,  A57VLDMOpsListUncond.Writes[0-1]>,
827   SchedVar<A57LMAddrPred2,  A57VLDMOpsListUncond.Writes[0-3]>,
828   SchedVar<A57LMAddrPred3,  A57VLDMOpsListUncond.Writes[0-5]>,
829   SchedVar<A57LMAddrPred4,  A57VLDMOpsListUncond.Writes[0-7]>,
830   SchedVar<A57LMAddrPred5,  A57VLDMOpsListUncond.Writes[0-9]>,
831   SchedVar<A57LMAddrPred6,  A57VLDMOpsListUncond.Writes[0-11]>,
832   SchedVar<A57LMAddrPred7,  A57VLDMOpsListUncond.Writes[0-13]>,
833   SchedVar<NoSchedPred,     A57VLDMOpsListUncond.Writes[0-15]>
846   SchedVar<A57LMAddrPred1,  A57VLDMOpsListCond.Writes[0-1]>,
847   SchedVar<A57LMAddrPred2,  A57VLDMOpsListCond.Writes[0-3]>,
848   SchedVar<A57LMAddrPred3,  A57VLDMOpsListCond.Writes[0-5]>,
849   SchedVar<A57LMAddrPred4,  A57VLDMOpsListCond.Writes[0-7]>,
850   SchedVar<A57LMAddrPred5,  A57VLDMOpsListCond.Writes[0-9]>,
851   SchedVar<A57LMAddrPred6,  A57VLDMOpsListCond.Writes[0-11]>,
852   SchedVar<A57LMAddrPred7,  A57VLDMOpsListCond.Writes[0-13]>,
853   SchedVar<NoSchedPred,     A57VLDMOpsListCond.Writes[0-15]>
873   SchedVar<A57LMAddrPred1,  A57VLDMOpsListUncond_Upd.Writes[0-1]>,
874   SchedVar<A57LMAddrPred2,  A57VLDMOpsListUncond_Upd.Writes[0-3]>,
875   SchedVar<A57LMAddrPred3,  A57VLDMOpsListUncond_Upd.Writes[0-5]>,
876   SchedVar<A57LMAddrPred4,  A57VLDMOpsListUncond_Upd.Writes[0-7]>,
877   SchedVar<A57LMAddrPred5,  A57VLDMOpsListUncond_Upd.Writes[0-9]>,
878   SchedVar<A57LMAddrPred6,  A57VLDMOpsListUncond_Upd.Writes[0-11]>,
879   SchedVar<A57LMAddrPred7,  A57VLDMOpsListUncond_Upd.Writes[0-13]>,
880   SchedVar<NoSchedPred,     A57VLDMOpsListUncond_Upd.Writes[0-15]>
893   SchedVar<A57LMAddrPred1,  A57VLDMOpsListCond_Upd.Writes[0-1]>,
894   SchedVar<A57LMAddrPred2,  A57VLDMOpsListCond_Upd.Writes[0-3]>,
895   SchedVar<A57LMAddrPred3,  A57VLDMOpsListCond_Upd.Writes[0-5]>,
896   SchedVar<A57LMAddrPred4,  A57VLDMOpsListCond_Upd.Writes[0-7]>,
897   SchedVar<A57LMAddrPred5,  A57VLDMOpsListCond_Upd.Writes[0-9]>,
898   SchedVar<A57LMAddrPred6,  A57VLDMOpsListCond_Upd.Writes[0-11]>,
899   SchedVar<A57LMAddrPred7,  A57VLDMOpsListCond_Upd.Writes[0-13]>,
900   SchedVar<NoSchedPred,     A57VLDMOpsListCond_Upd.Writes[0-15]>
911 // --- 3.13 FP Store Instructions ---
968 // --- 3.14 ASIMD Integer Instructions ---
973 // ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
1013 // ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
1014 // Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
1015 // and multiply-with-accumulate instructions relative to r0pX.
1023 // ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
1031 // ASIMD multiply accumulate, D-form
1044 // ASIMD multiply accumulate, Q-form
1057 // ASIMD multiply accumulate long
1070 // ASIMD multiply accumulate saturating long
1083 // Vector Saturating Rounding Doubling Multiply Accumulate/Subtract Long
1088 // ASIMD multiply long
1117 // ASIMD shift by immed and insert, basic, D-form
1121 // ASIMD shift by immed and insert, basic, Q-form
1125 // ASIMD shift by register, basic, D-form
1129 // ASIMD shift by register, basic, Q-form
1133 // ASIMD shift by register, complex, D-form
1139 // ASIMD shift by register, complex, Q-form
1144 // --- 3.15 ASIMD Floating-Point Instructions ---
1164 // ASIMD FP convert, half-precision: 8cyc F0/F1
1175 // ASIMD FP multiply
1179 // ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
1193 // --- 3.16 ASIMD Miscellaneous Instructions ---
1227 // ASIMD reverse, swap, table lookup (1-2 reg)
1230 // ASIMD table lookup (3-4 reg)
1242 // ASIMD unzip/zip, D-form
1246 // ASIMD unzip/zip, Q-form
1250 // --- 3.17 ASIMD Load Instructions ---
1262 // 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
1267 // 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
1396 // --- 3.18 ASIMD Store Instructions ---
1462 // --- 3.19 Cryptography Extensions ---
1466 // Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
1479 // --- 3.20 CRC ---
1482 // -----------------------------------------------------------------------------