Lines Matching +full:pre +full:- +full:multiply

1 //==- AArch64SchedCortexA55.td - ARM Cortex-A55 Scheduling Definitions -*- tablegen -*-=//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file defines the machine model for the ARM Cortex-A55 processors. Note
10 // that this schedule is currently used as the default for -mcpu=generic. As a
12 // Cortex-A55, instead aiming to be a good compromise between different cpus.
14 //===----------------------------------------------------------------------===//
16 // ===---------------------------------------------------------------------===//
17 // The following definitions describe the per-operand machine model.
20 // Cortex-A55 machine model for scheduling and other instruction cost heuristics.
22 let MicroOpBufferSize = 0; // The Cortex-A55 is an in-order processor
23 let IssueWidth = 2; // It dual-issues under most circumstances
27 // or 5. Setting it 4 looked to be good trade-off.
30 let CompleteModel = 0; // Covers instructions applicable to Cortex-A55.
38 //===----------------------------------------------------------------------===//
42 // Cortex-A55 is in-order.
45 def CortexA55UnitMAC : ProcResource<1> { let BufferSize = 0; } // Int MAC, 64-bi wide
52 // instructions, which can mostly be dual-issued; that's why for now we model
58 //===----------------------------------------------------------------------===//
59 // Subtarget-specific SchedWrite types
68 def : WriteRes<WriteISReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Shifted-Reg
69 def : WriteRes<WriteIEReg, [CortexA55UnitALU]> { let Latency = 3; } // ALU of Extended-Reg
74 def : WriteRes<WriteIM32, [CortexA55UnitMAC]> { let Latency = 4; } // 32-bit Multiply
75 def : WriteRes<WriteIM64, [CortexA55UnitMAC]> { let Latency = 4; } // 64-bit Multiply
90 // Vector Load - Vector loads take 1-5 cycles to issue. For the WriteVecLd
116 // Pre/Post Indexing - Performed as part of address generation
127 // Vector Store - Similar to vector loads, can take 1-3 cycles to issue.
207 //===----------------------------------------------------------------------===//
208 // Subtarget-specific SchedRead types.
215 // ALU - ALU input operands are generally needed in EX1. An operand produced in
217 // allowing back-to-back ALU operations such as add. If an operand requires
258 //===----------------------------------------------------------------------===//
259 // Subtarget-specific InstRWs.
261 //---
263 //---
268 def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>;
269 def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>;
270 def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>;
271 def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>;
273 //---
274 // Vector Loads - 64-bit per cycle
275 //---
276 // 1-element structures
299 // 2-element structures
310 // 3-element structures
321 // 4-element structures
322 def : InstRW<[CortexA55WriteVLD2], (instregex "LD4i(8|16|32|64)$")>; // load single 4-el structure to one lane of 4 regs.
323 def : InstRW<[CortexA55WriteVLD2], (instregex "LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; // load single 4-el structure, replicate to all lanes of 4 regs.
324 def : InstRW<[CortexA55WriteVLD4], (instregex "LD4Fourv(8b|4h|2s|1d)$")>; // load multiple 4-el structures to 4 regs.
332 //---
334 //---
363 //---
365 //---
432 // ASIMD multiply, by element
435 // ASIMD multiply
438 // ASIMD multiply accumulate
442 // ASIMD multiply accumulate half
444 // ASIMD multiply accumulate long
446 // ASIMD multiply accumulate long #2
453 // ASIMD multiply long
455 // ASIMD polynomial (8x8) multiply long