ARMScheduleA9.td - OpenGrok cross reference for /freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ARMScheduleA9.td

Lines Matching +full:quad +full:- +full:precision
1 //=- ARMScheduleA9.td - ARM Cortex-A9 Scheduling Definitions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
13 // ===---------------------------------------------------------------------===//
18 // Ad-hoc scheduling information derived from pretty vague "Cortex-A9 Technical
41   // Two fully-pipelined integer ALU pipelines
202   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
287                          -1>, // dynamic uops
296                          -1>, // dynamic uops
306                          -1>, // dynamic uops
315                                -1>, // dynamic uops
325                                -1>, // dynamic uops
346   // FIXME: If address is 64-bit aligned, AGU cycles is 1.
420                 [], [], -1>, // dynamic uops
427                 [2], [], -1>, // dynamic uops
441   // instruction and vice-versa. We model this behavior with two artificial FUs:
445   //  - Acquires DRegsVFP resource for 1 cycle
446   //  - Reserves DRegsN resource for the whole duration (including time to
451   // "cross-domain" stalls.
464   // Single-precision FP Unary
473   // Double-precision FP Unary
483   // Single-precision FP Compare
492   // Double-precision FP Compare
535   // Single-Precision FP to Integer Convert
543   // Double-Precision FP to Integer Convert
551   // Integer to Single-Precision FP Convert
559   // Integer to Double-Precision FP Convert
567   // Single-precision FP ALU
575   // Double-precision FP ALU
583   // Single-precision FP Multiply
591   // Double-precision FP Multiply
599   // Single-precision FP MAC
607   // Double-precision FP MAC
615   // Single-precision Fused FP MAC
623   // Double-precision Fused FP MAC
631   // Single-precision FP DIV
639   // Double-precision FP DIV
647   // Single-precision FP SQRT
655   // Double-precision FP SQRT
664   // Integer to Single-precision Move
673   // Integer to Double-precision Move
682   // Single-precision to Integer Move
684   // On A9 move-from-VFP is free to issue with no stall if other VFP
685   // operations are in flight. I assume it still can't dual-issue though.
690   // Double-precision to Integer Move
692   // On A9 move-from-VFP is free to issue with no stall if other VFP
693   // operations are in flight. I assume it still can't dual-issue though.
698   // Single-precision FP Load
707   // Double-precision FP Load
708   // FIXME: Result latency is 1 if address is 64-bit aligned.
725                 [1, 1, 1, 1], [], -1>, // dynamic uops
735                 [2, 1, 1, 1], [], -1>, // dynamic uops
737   // Single-precision FP Store
746   // Double-precision FP Store
763                 [1, 1, 1, 1], [], -1>, // dynamic uops
773                 [2, 1, 1, 1], [], -1>, // dynamic uops
1275   // Double-register Integer Unary
1284   // Quad-register Integer Unary
1293   // Double-register Integer Q-Unary
1302   // Quad-register Integer CountQ-Unary
1311   // Double-register Integer Binary
1320   // Quad-register Integer Binary
1329   // Double-register Integer Subtract
1338   // Quad-register Integer Subtract
1347   // Double-register Integer Shift
1356   // Quad-register Integer Shift
1365   // Double-register Integer Shift (4 cycle)
1374   // Quad-register Integer Shift (4 cycle)
1383   // Double-register Integer Binary (4 cycle)
1392   // Quad-register Integer Binary (4 cycle)
1401   // Double-register Integer Subtract (4 cycle)
1410   // Quad-register Integer Subtract (4 cycle)
1420   // Double-register Integer Count
1429   // Quad-register Integer Count
1440   // Double-register Absolute Difference and Accumulate
1449   // Quad-register Absolute Difference and Accumulate
1458   // Double-register Integer Pair Add Long
1467   // Quad-register Integer Pair Add Long
1477   // Double-register Integer Multiply (.8, .16)
1486   // Quad-register Integer Multiply (.8, .16)
1496   // Double-register Integer Multiply (.32)
1505   // Quad-register Integer Multiply (.32)
1514   // Double-register Integer Multiply-Accumulate (.8, .16)
1523   // Double-register Integer Multiply-Accumulate (.32)
1532   // Quad-register Integer Multiply-Accumulate (.8, .16)
1541   // Quad-register Integer Multiply-Accumulate (.32)
1568   // Double-register Permute Move
1577   // Quad-register Permute Move
1586   // Integer to Single-precision Move
1594   // Integer to Double-precision Move
1602   // Single-precision to Integer Move
1610   // Double-precision to Integer Move
1636   // Double-register FP Unary
1645   // Quad-register FP Unary
1656   // Double-register FP Binary
1677   // Double-register FP VMUL
1686   // Quad-register FP Binary
1699   // Quad-register FP VMUL
1708   // Double-register FP Multiple-Accumulate
1717   // Quad-register FP Multiple-Accumulate
1728   // Double-register Fused FP Multiple-Accumulate
1737   // Quad-register Fused FP Multiple-Accumulate
1748   // Double-register Reciprical Step
1757   // Quad-register Reciprical Step
1766   // Double-register Permute
1775   // Quad-register Permute
1786   // Quad-register Permute (3 cycle issue)
1798   // Double-register VEXT
1807   // Quad-register VEXT
1877 // ===---------------------------------------------------------------------===//
1878 // The following definitions describe the simpler per-operand machine model.
1886 // Cortex-A9 machine model for scheduling and other instruction cost heuristics.
1888   let IssueWidth = 2; // 2 micro-ops are dispatched per cycle.
1905 //===----------------------------------------------------------------------===//
1923 //===----------------------------------------------------------------------===//
1932 // Write an integer shifted-by register
1960 // Floating-point
2018 // Load the upper 32-bits using the same micro-op.
2033 //===----------------------------------------------------------------------===//
2038 foreach NumCycles = 2-8 in {
2043 foreach NumAddr = 1-8 in {
2045 // Define A9WriteAdr1-8 as a sequence of A9WriteAdr with additive
2051   SchedPredicate<"(TII->getNumLDMAddresses(*MI)+1)/2 == "#NumAddr>;
2055 // Fall-back for unknown LDMs.
2056 def A9LMUnknownPred : SchedPredicate<"TII->getNumLDMAddresses(*MI) == 0">;
2069   // For unknown LDM/VLDM/VSTM, assume 2 32-bit registers.
2084 foreach NumAddr = 1-8 in {
2091 //===----------------------------------------------------------------------===//
2092 // LDM: Load multiple into 32-bit integer registers.
2104 // A9WriteLM variants expand into a pair of writes for each 64-bit
2110   SchedVar<A9LMAdr1Pred, A9WriteLMOpsList.Writes[0-1]>,
2111   SchedVar<A9LMAdr2Pred, A9WriteLMOpsList.Writes[0-3]>,
2112   SchedVar<A9LMAdr3Pred, A9WriteLMOpsList.Writes[0-5]>,
2113   SchedVar<A9LMAdr4Pred, A9WriteLMOpsList.Writes[0-7]>,
2114   SchedVar<A9LMAdr5Pred, A9WriteLMOpsList.Writes[0-9]>,
2115   SchedVar<A9LMAdr6Pred, A9WriteLMOpsList.Writes[0-11]>,
2116   SchedVar<A9LMAdr7Pred, A9WriteLMOpsList.Writes[0-13]>,
2117   SchedVar<A9LMAdr8Pred, A9WriteLMOpsList.Writes[0-15]>,
2131 //===----------------------------------------------------------------------===//
2135 // so can be used in WriteSequences for in single-issue instructions that
2142 foreach NumAddr = 1-8 in {
2144 // Helper for A9WriteLfp1-8: A sequence of fp loads with no micro-ops.
2147 // A9WriteLfp1-8 definitions are statically expanded into a sequence of
2153 // A9WriteLfp1-8Mov adds a cycle of latency and FP resource for
2162 // A9WriteLfp1-8 sequence based on a predicate. This supports the
2163 // preRA VLDM variants in which all 64-bit loads are written to the
2164 // same tuple of either single or double precision registers.
2181 foreach NumAddr = 1-8 in {
2196 // pair of writes for each 64-bit data loaded. When the number of
2201                  [A9WriteLMfp1, A9WriteLMfp2,       // 0-1
2202                   A9WriteLMfp3, A9WriteLMfp4,       // 2-3
2203                   A9WriteLMfp5, A9WriteLMfp6,       // 4-5
2204                   A9WriteLMfp7, A9WriteLMfp8,       // 6-7
2205                   A9WriteLMfp1Hi,                   // 8-8
2206                   A9WriteLMfp2Hi, A9WriteLMfp2Hi,   // 9-10
2207                   A9WriteLMfp3Hi, A9WriteLMfp3Hi,   // 11-12
2208                   A9WriteLMfp4Hi, A9WriteLMfp4Hi,   // 13-14
2209                   A9WriteLMfp5Hi, A9WriteLMfp5Hi,   // 15-16
2210                   A9WriteLMfp6Hi, A9WriteLMfp6Hi,   // 17-18
2211                   A9WriteLMfp7Hi, A9WriteLMfp7Hi,   // 19-20
2212                   A9WriteLMfp8Hi, A9WriteLMfp8Hi]>; // 21-22
2215   SchedVar<A9LMAdr1Pred, A9WriteLMfpPostRAOpsList.Writes[0-0, 8-8]>,
2216   SchedVar<A9LMAdr2Pred, A9WriteLMfpPostRAOpsList.Writes[0-1, 9-10]>,
2217   SchedVar<A9LMAdr3Pred, A9WriteLMfpPostRAOpsList.Writes[0-2, 10-12]>,
2218   SchedVar<A9LMAdr4Pred, A9WriteLMfpPostRAOpsList.Writes[0-3, 11-14]>,
2219   SchedVar<A9LMAdr5Pred, A9WriteLMfpPostRAOpsList.Writes[0-4, 12-16]>,
2220   SchedVar<A9LMAdr6Pred, A9WriteLMfpPostRAOpsList.Writes[0-5, 13-18]>,
2221   SchedVar<A9LMAdr7Pred, A9WriteLMfpPostRAOpsList.Writes[0-6, 14-20]>,
2222   SchedVar<A9LMAdr8Pred, A9WriteLMfpPostRAOpsList.Writes[0-7, 15-22]>,
2239 // Distinguish between our multiple MI-level forms of the same
2242   "MI->getOperand(0).getReg().isVirtual()">;
2244   "MI->getOperand(0).getReg().isPhysical()">;
2252 //===----------------------------------------------------------------------===//
2253 // Resources for other (non-LDM/VLDM) Variants.
2270 // after the instruction issues, decreases producer latency by N-1.
2275 //===----------------------------------------------------------------------===//
2281 // This table follows the ARM Cortex-A9 Technical Reference Manuals,
2309 // TODO: For floating-point ops, we model the pipeline forwarding
2367 // Reuse the load-multiple variants for store-multiple because the
2481 // NEON floating-point
2499 // ===---------------------------------------------------------------------===//
2500 // Floating-point. Map target defined SchedReadWrite to processor specific ones
2522 // ===---------------------------------------------------------------------===//
2523 // Subtarget-specific overrides. Map opcodes to list of SchedReadWrite types.