1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V1 processors. 10// 11// References: 12// - "Arm Neoverse V1 Software Optimization Guide" 13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing" 14// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm 15// - "Neoverse V1" 16// https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1 17 18// 19//===----------------------------------------------------------------------===// 20 21def NeoverseV1Model : SchedMachineModel { 22 let IssueWidth = 15; // Maximum micro-ops dispatch rate. 23 let MicroOpBufferSize = 256; // Micro-op re-order buffer. 24 let LoadLatency = 4; // Optimistic load latency. 25 let MispredictPenalty = 11; // Cycles cost of branch mispredicted. 26 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 27 let CompleteModel = 1; 28 29 list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F, 30 SMEUnsupported.F, 31 [HasMTE]); 32} 33 34//===----------------------------------------------------------------------===// 35// Define each kind of processor resource and number available on Neoverse V1. 36// Instructions are first fetched and then decoded into internal macro-ops 37// (MOPs). From there, the MOPs proceed through register renaming and dispatch 38// stages. A MOP can be split into one or more micro-ops further down the 39// pipeline, after the decode stage. Once dispatched, micro-ops wait for their 40// operands and issue out-of-order to one of the issue pipelines. Each issue 41// pipeline can accept one micro-op per cycle. 42 43let SchedModel = NeoverseV1Model in { 44 45// Define the issue ports. 46def V1UnitB : ProcResource<2>; // Branch 0/1 47def V1UnitS : ProcResource<2>; // Integer single cycle 0/1 48def V1UnitM0 : ProcResource<1>; // Integer multicycle 0 49def V1UnitM1 : ProcResource<1>; // Integer multicycle 1 50def V1UnitL01 : ProcResource<2>; // Load/Store 0/1 51def V1UnitL2 : ProcResource<1>; // Load 2 52def V1UnitD : ProcResource<2>; // Store data 0/1 53def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0 54def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1 55def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2 56def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3 57 58def V1UnitI : ProcResGroup<[V1UnitS, 59 V1UnitM0, V1UnitM1]>; // Integer units 60def V1UnitJ : ProcResGroup<[V1UnitS, V1UnitM0]>; // Integer 0-2 units 61def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units 62def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load units 63def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1, 64 V1UnitV2, V1UnitV3]>; // FP/ASIMD units 65def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units 66def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units 67def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units 68 69// Define commonly used read types. 70 71// No generic forwarding is provided for these types. 72def : ReadAdvance<ReadI, 0>; 73def : ReadAdvance<ReadISReg, 0>; 74def : ReadAdvance<ReadIEReg, 0>; 75def : ReadAdvance<ReadIM, 0>; 76def : ReadAdvance<ReadIMA, 0>; 77def : ReadAdvance<ReadID, 0>; 78def : ReadAdvance<ReadExtrHi, 0>; 79def : ReadAdvance<ReadAdrBase, 0>; 80def : ReadAdvance<ReadST, 0>; 81def : ReadAdvance<ReadVLD, 0>; 82 83def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 84def : WriteRes<WriteBarrier, []> { let Latency = 1; } 85def : WriteRes<WriteHint, []> { let Latency = 1; } 86 87 88//===----------------------------------------------------------------------===// 89// Define generic 0 micro-op types 90 91let Latency = 0, NumMicroOps = 0 in 92def V1Write_0c_0Z : SchedWriteRes<[]>; 93 94 95//===----------------------------------------------------------------------===// 96// Define generic 1 micro-op types 97 98def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; } 99def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; } 100def V1Write_1c_1J : SchedWriteRes<[V1UnitJ]> { let Latency = 1; } 101def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; } 102def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; } 103def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; } 104def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; } 105def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; } 106def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; } 107def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; } 108def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; } 109def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; } 110def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } 111def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; } 112def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; } 113def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12; 114 let ResourceCycles = [5]; } 115def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20; 116 let ResourceCycles = [5]; } 117def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; } 118def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 119def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 120def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; } 121def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; } 122def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; } 123def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; } 124def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; } 125def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10; 126 let ResourceCycles = [7]; } 127def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12; 128 let ResourceCycles = [7]; } 129def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13; 130 let ResourceCycles = [10]; } 131def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15; 132 let ResourceCycles = [7]; } 133def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16; 134 let ResourceCycles = [7]; } 135def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20; 136 let ResourceCycles = [7]; } 137def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; } 138def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } 139def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } 140def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 141def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; } 142def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } 143def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7; 144 let ResourceCycles = [7]; } 145def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10; 146 let ResourceCycles = [7]; } 147def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 148 let ResourceCycles = [5]; } 149def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 150 let ResourceCycles = [11]; } 151def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15; 152 let ResourceCycles = [7]; } 153def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16; 154 let ResourceCycles = [7]; } 155def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; } 156def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; } 157def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; } 158def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; } 159def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } 160 161//===----------------------------------------------------------------------===// 162// Define generic 2 micro-op types 163 164let Latency = 1, NumMicroOps = 2 in 165def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>; 166let Latency = 6, NumMicroOps = 2 in 167def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>; 168let Latency = 3, NumMicroOps = 2 in 169def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>; 170let Latency = 5, NumMicroOps = 2 in 171def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 172let Latency = 7, NumMicroOps = 2 in 173def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 174let Latency = 6, NumMicroOps = 2 in 175def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>; 176let Latency = 6, NumMicroOps = 2 in 177def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>; 178let Latency = 8, NumMicroOps = 2 in 179def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 180let Latency = 9, NumMicroOps = 2 in 181def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 182let Latency = 11, NumMicroOps = 2 in 183def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 184let Latency = 1, NumMicroOps = 2 in 185def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>; 186let Latency = 6, NumMicroOps = 2 in 187def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 188let Latency = 7, NumMicroOps = 2 in 189def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 190let Latency = 2, NumMicroOps = 2 in 191def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 192let Latency = 4, NumMicroOps = 2 in 193def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 194let Latency = 6, NumMicroOps = 2 in 195def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 196let Latency = 2, NumMicroOps = 2 in 197def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 198let Latency = 4, NumMicroOps = 2 in 199def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 200let Latency = 2, NumMicroOps = 2 in 201def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 202let Latency = 3, NumMicroOps = 2 in 203def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 204let Latency = 9, NumMicroOps = 2 in 205def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>; 206let Latency = 5, NumMicroOps = 2 in 207def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>; 208let Latency = 4, NumMicroOps = 2 in 209def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>; 210let Latency = 7, NumMicroOps = 2 in 211def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 212let Latency = 5, NumMicroOps = 2 in 213def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>; 214let Latency = 6, NumMicroOps = 2 in 215def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 216let Latency = 9, NumMicroOps = 2 in 217def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 218let Latency = 4, NumMicroOps = 2 in 219def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>; 220let Latency = 8, NumMicroOps = 2 in 221def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>; 222let Latency = 4, NumMicroOps = 2 in 223def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 224let Latency = 5, NumMicroOps = 2 in 225def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 226let Latency = 2, NumMicroOps = 2 in 227def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 228let Latency = 4, NumMicroOps = 2 in 229def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 230let Latency = 4, NumMicroOps = 2 in 231def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 232let Latency = 6, NumMicroOps = 2 in 233def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 234let Latency = 4, NumMicroOps = 2 in 235def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>; 236let Latency = 4, NumMicroOps = 2 in 237def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>; 238 239//===----------------------------------------------------------------------===// 240// Define generic 3 micro-op types 241 242let Latency = 2, NumMicroOps = 3 in 243def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>; 244let Latency = 7, NumMicroOps = 3 in 245def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>; 246let Latency = 8, NumMicroOps = 3 in 247def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>; 248let Latency = 6, NumMicroOps = 3 in 249def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>; 250let Latency = 2, NumMicroOps = 3 in 251def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 252let Latency = 4, NumMicroOps = 3 in 253def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 254let Latency = 2, NumMicroOps = 3 in 255def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>; 256let Latency = 6, NumMicroOps = 3 in 257def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>; 258let Latency = 4, NumMicroOps = 3 in 259def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 260let Latency = 6, NumMicroOps = 3 in 261def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 262let Latency = 8, NumMicroOps = 3 in 263def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 264 265//===----------------------------------------------------------------------===// 266// Define generic 4 micro-op types 267 268let Latency = 8, NumMicroOps = 4 in 269def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0, 270 V1UnitV0, V1UnitV0]>; 271let Latency = 7, NumMicroOps = 4 in 272def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>; 273let Latency = 8, NumMicroOps = 4 in 274def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 275 V1UnitV, V1UnitV]>; 276let Latency = 9, NumMicroOps = 4 in 277def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 278 V1UnitV, V1UnitV]>; 279let Latency = 11, NumMicroOps = 4 in 280def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 281 V1UnitV, V1UnitV]>; 282let Latency = 10, NumMicroOps = 4 in 283def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01, 284 V1UnitV, V1UnitV]>; 285let Latency = 2, NumMicroOps = 4 in 286def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 287 V1UnitV01, V1UnitV01]>; 288let Latency = 4, NumMicroOps = 4 in 289def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 290 V1UnitV01, V1UnitV01]>; 291let Latency = 8, NumMicroOps = 4 in 292def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 293 V1UnitV01, V1UnitV01]>; 294let Latency = 9, NumMicroOps = 4 in 295def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 296 V1UnitV01, V1UnitV01]>; 297let Latency = 10, NumMicroOps = 4 in 298def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 299 V1UnitV01, V1UnitV01]>; 300let Latency = 10, NumMicroOps = 4 in 301def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 302 V1UnitV1, V1UnitV1]>; 303let Latency = 12, NumMicroOps = 4 in 304def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 305 V1UnitV1, V1UnitV1]>; 306let Latency = 6, NumMicroOps = 4 in 307def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, 308 V1UnitV0, V1UnitV0]>; 309let Latency = 12, NumMicroOps = 4 in 310def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 311 V1UnitV01, V1UnitV01]>; 312let Latency = 6, NumMicroOps = 4 in 313def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 314 315//===----------------------------------------------------------------------===// 316// Define generic 5 micro-op types 317 318let Latency = 8, NumMicroOps = 5 in 319def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL, 320 V1UnitV, V1UnitV, V1UnitV]>; 321let Latency = 14, NumMicroOps = 5 in 322def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV, 323 V1UnitV0, 324 V1UnitV1, V1UnitV1, 325 V1UnitV13]>; 326let Latency = 9, NumMicroOps = 5 in 327def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV, 328 V1UnitV01, V1UnitV01, 329 V1UnitV01, V1UnitV01]>; 330let Latency = 6, NumMicroOps = 5 in 331def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 332 V1UnitV01, V1UnitV01, V1UnitV01]>; 333 334//===----------------------------------------------------------------------===// 335// Define generic 6 micro-op types 336 337let Latency = 6, NumMicroOps = 6 in 338def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 339 V1UnitV, V1UnitV, V1UnitV]>; 340let Latency = 8, NumMicroOps = 6 in 341def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 342 V1UnitV, V1UnitV, V1UnitV]>; 343let Latency = 2, NumMicroOps = 6 in 344def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 345 V1UnitV01, V1UnitV01, V1UnitV01]>; 346let Latency = 5, NumMicroOps = 6 in 347def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 348 V1UnitV01, V1UnitV01, V1UnitV01]>; 349let Latency = 6, NumMicroOps = 6 in 350def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 351 V1UnitV01, V1UnitV01, V1UnitV01]>; 352let Latency = 11, NumMicroOps = 6 in 353def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 354 V1UnitV01, V1UnitV01, V1UnitV01]>; 355let Latency = 11, NumMicroOps = 6 in 356def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV, 357 V1UnitV01, V1UnitV01, 358 V1UnitV01, V1UnitV01, V1UnitV01]>; 359let Latency = 13, NumMicroOps = 6 in 360def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01, 361 V1UnitV01, V1UnitV01, V1UnitV01]>; 362 363//===----------------------------------------------------------------------===// 364// Define generic 7 micro-op types 365 366let Latency = 8, NumMicroOps = 7 in 367def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 368 V1UnitV, V1UnitV, V1UnitV, V1UnitV]>; 369let Latency = 8, NumMicroOps = 7 in 370def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 371 V1UnitS, 372 V1UnitV01, V1UnitV01, V1UnitV01]>; 373 374//===----------------------------------------------------------------------===// 375// Define generic 8 micro-op types 376 377let Latency = 9, NumMicroOps = 8 in 378def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL, 379 V1UnitL, V1UnitL, 380 V1UnitV, V1UnitV, 381 V1UnitV, V1UnitV]>; 382let Latency = 2, NumMicroOps = 8 in 383def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 384 V1UnitL01, V1UnitL01, 385 V1UnitV01, V1UnitV01, 386 V1UnitV01, V1UnitV01]>; 387let Latency = 4, NumMicroOps = 8 in 388def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 389 V1UnitL01, V1UnitL01, 390 V1UnitV01, V1UnitV01, 391 V1UnitV01, V1UnitV01]>; 392let Latency = 12, NumMicroOps = 8 in 393def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 394 V1UnitL01, V1UnitL01, 395 V1UnitV01, V1UnitV01, 396 V1UnitV01, V1UnitV01]>; 397 398//===----------------------------------------------------------------------===// 399// Define generic 10 micro-op types 400 401let Latency = 13, NumMicroOps = 10 in 402def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 403 V1UnitL01, V1UnitL01, 404 V1UnitS, V1UnitS, 405 V1UnitV01, V1UnitV01, 406 V1UnitV01, V1UnitV01]>; 407let Latency = 7, NumMicroOps = 10 in 408def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 409 V1UnitL01, V1UnitL01, V1UnitL01, 410 V1UnitV, V1UnitV, 411 V1UnitV, V1UnitV, V1UnitV]>; 412let Latency = 11, NumMicroOps = 10 in 413def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0, 414 V1UnitV0, V1UnitV0, V1UnitV0, 415 V1UnitV0, V1UnitV0, V1UnitV0, 416 V1UnitV0, V1UnitV0, V1UnitV0]>; 417 418//===----------------------------------------------------------------------===// 419// Define generic 12 micro-op types 420 421let Latency = 7, NumMicroOps = 12 in 422def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 423 V1UnitL01, V1UnitL01, V1UnitL01, 424 V1UnitV01, V1UnitV01, V1UnitV01, 425 V1UnitV01, V1UnitV01, V1UnitV01]>; 426 427//===----------------------------------------------------------------------===// 428// Define generic 15 micro-op types 429 430let Latency = 7, NumMicroOps = 15 in 431def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 432 V1UnitL01, V1UnitL01, V1UnitL01, 433 V1UnitS, V1UnitS, 434 V1UnitS, V1UnitS, V1UnitS, 435 V1UnitV, V1UnitV, 436 V1UnitV, V1UnitV, V1UnitV]>; 437 438 439//===----------------------------------------------------------------------===// 440// Define generic 18 micro-op types 441 442let Latency = 19, NumMicroOps = 18 in 443def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 444 V1UnitL01, V1UnitL01, V1UnitL01, 445 V1UnitL01, V1UnitL01, V1UnitL01, 446 V1UnitV, V1UnitV, V1UnitV, 447 V1UnitV, V1UnitV, V1UnitV, 448 V1UnitV, V1UnitV, V1UnitV]>; 449let Latency = 19, NumMicroOps = 18 in 450def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0, 451 V1UnitV0, V1UnitV0, V1UnitV0, 452 V1UnitV0, V1UnitV0, V1UnitV0, 453 V1UnitV0, V1UnitV0, V1UnitV0, 454 V1UnitV0, V1UnitV0, V1UnitV0, 455 V1UnitV0, V1UnitV0, V1UnitV0]>; 456 457//===----------------------------------------------------------------------===// 458// Define generic 27 micro-op types 459 460let Latency = 11, NumMicroOps = 27 in 461def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 462 V1UnitL01, V1UnitL01, V1UnitL01, 463 V1UnitL01, V1UnitL01, V1UnitL01, 464 V1UnitS, V1UnitS, V1UnitS, 465 V1UnitS, V1UnitS, V1UnitS, 466 V1UnitS, V1UnitS, V1UnitS, 467 V1UnitV, V1UnitV, V1UnitV, 468 V1UnitV, V1UnitV, V1UnitV, 469 V1UnitV, V1UnitV, V1UnitV]>; 470 471 472// Miscellaneous Instructions 473// ----------------------------------------------------------------------------- 474 475// COPY 476def : InstRW<[V1Write_1c_1I], (instrs COPY)>; 477 478// MSR 479def : WriteRes<WriteSys, []> { let Latency = 1; } 480 481 482// Branch Instructions 483// ----------------------------------------------------------------------------- 484 485// Branch, immed 486// Compare and branch 487def : SchedAlias<WriteBr, V1Write_1c_1B>; 488 489// Branch, register 490def : SchedAlias<WriteBrReg, V1Write_1c_1B>; 491 492// Branch and link, immed 493// Branch and link, register 494def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>; 495 496// Compare and branch 497def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>; 498 499 500// Arithmetic and Logical Instructions 501// ----------------------------------------------------------------------------- 502 503// ALU, basic 504// Conditional compare 505// Conditional select 506// Logical, basic 507// Address generation 508// Count leading 509// Reverse bits/bytes 510// Move immediate 511def : SchedAlias<WriteI, V1Write_1c_1I>; 512 513// ALU, basic, flagset 514def : InstRW<[V1Write_1c_1J], 515 (instregex "^(ADD|SUB)S[WX]r[ir]$", 516 "^(ADC|SBC)S[WX]r$", 517 "^ANDS[WX]ri$", 518 "^(AND|BIC)S[WX]rr$")>; 519 520// ALU, extend and shift 521def : SchedAlias<WriteIEReg, V1Write_2c_1M>; 522 523// Arithmetic, LSL shift, shift <= 4 524// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 525def V1WriteISReg : SchedWriteVariant< 526 [SchedVar<IsCheapLSL, [V1Write_1c_1I]>, 527 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 528def : SchedAlias<WriteISReg, V1WriteISReg>; 529 530// Arithmetic, flagset, LSL shift, shift <= 4 531// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 532def V1WriteISRegS : SchedWriteVariant< 533 [SchedVar<IsCheapLSL, [V1Write_1c_1J]>, 534 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 535def : InstRW<[V1WriteISRegS], 536 (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>; 537 538// Logical, shift, no flagset 539def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 540 541// Logical, shift, flagset 542def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>; 543 544// Flag manipulation instructions 545def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>; 546 547 548// Divide and multiply instructions 549// ----------------------------------------------------------------------------- 550 551// Divide 552def : SchedAlias<WriteID32, V1Write_12c5_1M0>; 553def : SchedAlias<WriteID64, V1Write_20c5_1M0>; 554 555// Multiply 556// Multiply accumulate 557// Multiply accumulate, long 558// Multiply long 559def V1WriteIM : SchedWriteVariant< 560 [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>, 561 SchedVar<NoSchedPred, [V1Write_2c_1M0]>]>; 562def : SchedAlias<WriteIM32, V1WriteIM>; 563def : SchedAlias<WriteIM64, V1WriteIM>; 564 565// Multiply high 566def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>; 567 568 569// Pointer Authentication Instructions (v8.3 PAC) 570// ----------------------------------------------------------------------------- 571 572// Authenticate data address 573// Authenticate instruction address 574// Compute pointer authentication code for data address 575// Compute pointer authentication code, using generic key 576// Compute pointer authentication code for instruction address 577def : InstRW<[V1Write_5c_1M0], (instregex "^AUT", 578 "^PAC")>; 579 580// Branch and link, register, with pointer authentication 581// Branch, register, with pointer authentication 582// Branch, return, with pointer authentication 583def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$", 584 "^E?RETA[AB]$")>; 585 586// Load register, with pointer authentication 587def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 588 589// Strip pointer authentication code 590def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>; 591 592 593// Miscellaneous data-processing instructions 594// ----------------------------------------------------------------------------- 595 596// Bitfield extract, one reg 597// Bitfield extract, two regs 598def V1WriteExtr : SchedWriteVariant< 599 [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>, 600 SchedVar<NoSchedPred, [V1Write_3c_1I_1M]>]>; 601def : SchedAlias<WriteExtr, V1WriteExtr>; 602 603// Bitfield move, basic 604// Variable shift 605def : SchedAlias<WriteIS, V1Write_1c_1I>; 606 607// Bitfield move, insert 608def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>; 609 610// Move immediate 611def : SchedAlias<WriteImm, V1Write_1c_1I>; 612 613 614// Load instructions 615// ----------------------------------------------------------------------------- 616 617// Load register, immed offset 618def : SchedAlias<WriteLD, V1Write_4c_1L>; 619 620// Load register, immed offset, index 621def : SchedAlias<WriteLDIdx, V1Write_4c_1L>; 622def : SchedAlias<WriteAdr, V1Write_1c_1I>; 623 624// Load pair, immed offset 625def : SchedAlias<WriteLDHi, V1Write_4c_1L>; 626def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>; 627def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z, WriteAdr], 628 (instrs LDPWpost, LDPWpre)>; 629 630// Load pair, signed immed offset, signed words 631def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>; 632 633// Load pair, immed post or pre-index, signed words 634def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z, WriteAdr], 635 (instrs LDPSWpost, LDPSWpre)>; 636 637 638// Store instructions 639// ----------------------------------------------------------------------------- 640 641// Store register, immed offset 642def : SchedAlias<WriteST, V1Write_1c_1L01_1D>; 643 644// Store register, immed offset, index 645def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>; 646 647// Store pair, immed offset 648def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>; 649 650 651// FP data processing instructions 652// ----------------------------------------------------------------------------- 653 654// FP absolute value 655// FP arithmetic 656// FP min/max 657// FP negate 658def : SchedAlias<WriteF, V1Write_2c_1V>; 659 660// FP compare 661def : SchedAlias<WriteFCmp, V1Write_2c_1V0>; 662 663// FP divide 664// FP square root 665def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>; 666 667// FP divide, H-form 668// FP square root, H-form 669def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>; 670 671// FP divide, S-form 672// FP square root, S-form 673def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>; 674 675// FP divide, D-form 676def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>; 677 678// FP square root, D-form 679def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>; 680 681// FP multiply 682def : SchedAlias<WriteFMul, V1Write_3c_1V>; 683 684// FP multiply accumulate 685def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 686 687// FP round to integral 688def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 689 "^FRINT(32|64)[XZ][SD]r$")>; 690 691// FP select 692def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>; 693 694 695// FP miscellaneous instructions 696// ----------------------------------------------------------------------------- 697 698// FP convert, from gen to vec reg 699def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 700 701// FP convert, from vec to gen reg 702def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 703 704// FP convert, Javascript from vec to gen reg 705def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>; 706 707// FP convert, from vec to vec reg 708def : SchedAlias<WriteFCvt, V1Write_3c_1V02>; 709 710// FP move, immed 711def : SchedAlias<WriteFImm, V1Write_2c_1V>; 712 713// FP move, register 714def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; 715 716// FP transfer, from gen to low half of vec reg 717def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 718 719// FP transfer, from gen to high half of vec reg 720def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>; 721 722// FP transfer, from vec to gen reg 723def : SchedAlias<WriteFCopy, V1Write_2c_1V1>; 724 725 726// FP load instructions 727// ----------------------------------------------------------------------------- 728 729// Load vector reg, literal, S/D/Q forms 730// Load vector reg, unscaled immed 731// Load vector reg, unsigned immed 732def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", 733 "^LDUR[BHSDQ]i$", 734 "^LDR[BHSDQ]ui$")>; 735 736// Load vector reg, immed post-index 737// Load vector reg, immed pre-index 738def : InstRW<[V1Write_6c_1L, WriteAdr], 739 (instregex "^LDR[BHSDQ](post|pre)$")>; 740 741// Load vector reg, register offset, basic 742// Load vector reg, register offset, scale, S/D-form 743// Load vector reg, register offset, extend 744// Load vector reg, register offset, extend, scale, S/D-form 745def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 746 747// Load vector reg, register offset, scale, H/Q-form 748// Load vector reg, register offset, extend, scale, H/Q-form 749def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 750 751// Load vector pair, immed offset, S/D-form 752def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>; 753 754// Load vector pair, immed offset, Q-form 755def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 756 757// Load vector pair, immed post-index, S/D-form 758// Load vector pair, immed pre-index, S/D-form 759def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z, WriteAdr], 760 (instregex "^LDP[SD](pre|post)$")>; 761 762// Load vector pair, immed post-index, Q-form 763// Load vector pair, immed pre-index, Q-form 764def : InstRW<[V1Write_6c_1L, WriteLDHi, WriteAdr], 765 (instrs LDPQpost, LDPQpre)>; 766 767 768// FP store instructions 769// ----------------------------------------------------------------------------- 770 771// Store vector reg, unscaled immed, B/H/S/D/Q-form 772def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 773 774// Store vector reg, immed post-index, B/H/S/D/Q-form 775// Store vector reg, immed pre-index, B/H/S/D/Q-form 776def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], 777 (instregex "^STR[BHSDQ](pre|post)$")>; 778 779// Store vector reg, unsigned immed, B/H/S/D/Q-form 780def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 781 782// Store vector reg, register offset, basic, B/S/D-form 783// Store vector reg, register offset, scale, B/S/D-form 784// Store vector reg, register offset, extend, B/S/D-form 785// Store vector reg, register offset, extend, scale, B/S/D-form 786def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase], 787 (instregex "^STR[BSD]ro[WX]$")>; 788 789// Store vector reg, register offset, basic, H/Q-form 790// Store vector reg, register offset, scale, H/Q-form 791// Store vector reg, register offset, extend, H/Q-form 792// Store vector reg, register offset, extend, scale, H/Q-form 793def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase], 794 (instregex "^STR[HQ]ro[WX]$")>; 795 796// Store vector pair, immed offset, S/D/Q-form 797def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>; 798 799// Store vector pair, immed post-index, S/D-form 800// Store vector pair, immed pre-index, S/D-form 801def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], 802 (instregex "^STP[SD](pre|post)$")>; 803 804// Store vector pair, immed post-index, Q-form 805// Store vector pair, immed pre-index, Q-form 806def : InstRW<[V1Write_2c_2L01_1V01, WriteAdr], (instrs STPQpre, STPQpost)>; 807 808 809// ASIMD integer instructions 810// ----------------------------------------------------------------------------- 811 812// ASIMD absolute diff 813// ASIMD absolute diff long 814// ASIMD arith, basic 815// ASIMD arith, complex 816// ASIMD arith, pair-wise 817// ASIMD compare 818// ASIMD logical 819// ASIMD max/min, basic and pair-wise 820def : SchedAlias<WriteVd, V1Write_2c_1V>; 821def : SchedAlias<WriteVq, V1Write_2c_1V>; 822 823// ASIMD absolute diff accum 824// ASIMD absolute diff accum long 825// ASIMD pairwise add and accumulate long 826def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>; 827 828// ASIMD arith, reduce, 4H/4S 829// ASIMD max/min, reduce, 4H/4S 830def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$", 831 "^[SU](MAX|MIN)Vv4(i16|i32)v$")>; 832 833// ASIMD arith, reduce, 8B/8H 834// ASIMD max/min, reduce, 8B/8H 835def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$", 836 "^[SU](MAX|MIN)Vv8(i8|i16)v$")>; 837 838// ASIMD arith, reduce, 16B 839// ASIMD max/min, reduce, 16B 840def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$", 841 "[SU](MAX|MIN)Vv16i8v$")>; 842 843// ASIMD dot product 844// ASIMD dot product using signed and unsigned integers 845def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>; 846 847// ASIMD matrix multiply- accumulate 848def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>; 849 850// ASIMD multiply 851// ASIMD multiply accumulate 852// ASIMD multiply accumulate long 853// ASIMD multiply accumulate high 854// ASIMD multiply accumulate saturating long 855def : InstRW<[V1Write_4c_1V02], 856 (instregex "^MUL(v[148]i16|v[124]i32)$", 857 "^SQR?DMULH(v[48]i16|v[24]i32)$", 858 "^ML[AS](v[148]i16|v[124]i32)$", 859 "^[SU]ML[AS]Lv", 860 "^SQRDML[AS]H(v[148]i16|v[124]i32)$", 861 "^SQDML[AS]Lv")>; 862 863// ASIMD multiply/multiply long (8x8) polynomial 864def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>; 865 866// ASIMD multiply long 867def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>; 868 869// ASIMD shift accumulate 870// ASIMD shift by immed, complex 871// ASIMD shift by register, complex 872def : InstRW<[V1Write_4c_1V13], 873 (instregex "^[SU]R?SRAv", 874 "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$", 875 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 876 "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 877 "^[SU]Q?RSHLv", "^[SU]QSHLv")>; 878 879// ASIMD shift by immed, basic 880// ASIMD shift by immed and insert, basic 881// ASIMD shift by register, basic 882def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv", 883 "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>; 884 885 886// ASIMD FP instructions 887// ----------------------------------------------------------------------------- 888 889// ASIMD FP absolute value/difference 890// ASIMD FP arith, normal 891// ASIMD FP compare 892// ASIMD FP complex add 893// ASIMD FP max/min, normal 894// ASIMD FP max/min, pairwise 895// ASIMD FP negate 896// Covered by "SchedAlias (WriteV[dq]...)" above 897 898// ASIMD FP complex multiply add 899// ASIMD FP multiply accumulate 900def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$", 901 "^FML[AS]v")>; 902 903// ASIMD FP convert, long (F16 to F32) 904def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>; 905 906// ASIMD FP convert, long (F32 to F64) 907def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>; 908 909// ASIMD FP convert, narrow (F32 to F16) 910def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>; 911 912// ASIMD FP convert, narrow (F64 to F32) 913def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$", 914 "^FCVTXN(v[24]f32|v1i64)$")>; 915 916// ASIMD FP convert, other, D-form F32 and Q-form F64 917def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 918 "^[SU]CVTFv2f(32|64)$")>; 919 920// ASIMD FP convert, other, D-form F16 and Q-form F32 921def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 922 "^[SU]CVTFv4f(16|32)$")>; 923 924// ASIMD FP convert, other, Q-form F16 925def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 926 "^[SU]CVTFv8f16$")>; 927 928// ASIMD FP divide, D-form, F16 929// ASIMD FP square root, D-form, F16 930def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>; 931 932// ASIMD FP divide, F32 933// ASIMD FP square root, F32 934def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32, 935 FSQRTv2f32, FSQRTv4f32)>; 936 937// ASIMD FP divide, Q-form, F16 938def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>; 939 940// ASIMD FP divide, Q-form, F64 941def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>; 942 943// ASIMD FP square root, Q-form, F16 944def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>; 945 946// ASIMD FP square root, Q-form, F64 947def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>; 948 949// ASIMD FP max/min, reduce, F32 and D-form F16 950def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>; 951 952// ASIMD FP max/min, reduce, Q-form F16 953def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>; 954 955// ASIMD FP multiply 956def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>; 957 958// ASIMD FP multiply accumulate long 959def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>; 960 961// ASIMD FP round, D-form F32 and Q-form F64 962def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>; 963 964// ASIMD FP round, D-form F16 and Q-form F32 965def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>; 966 967// ASIMD FP round, Q-form F16 968def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 969 970 971// ASIMD BF instructions 972// ----------------------------------------------------------------------------- 973 974// ASIMD convert, F32 to BF16 975def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>; 976 977// ASIMD dot product 978def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>; 979 980// ASIMD matrix multiply accumulate 981def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>; 982 983// ASIMD multiply accumulate long 984def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>; 985 986// Scalar convert, F32 to BF16 987def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>; 988 989 990// ASIMD miscellaneous instructions 991// ----------------------------------------------------------------------------- 992 993// ASIMD bit reverse 994// ASIMD bitwise insert 995// ASIMD count 996// ASIMD duplicate, element 997// ASIMD extract 998// ASIMD extract narrow 999// ASIMD insert, element to element 1000// ASIMD move, FP immed 1001// ASIMD move, integer immed 1002// ASIMD reverse 1003// ASIMD table lookup, 1 or 2 table regs 1004// ASIMD table lookup extension, 1 table reg 1005// ASIMD transfer, element to gen reg 1006// ASIMD transpose 1007// ASIMD unzip/zip 1008// Covered by "SchedAlias (WriteV[dq]...)" above 1009 1010// ASIMD duplicate, gen reg 1011def : InstRW<[V1Write_3c_1M0], 1012 (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>; 1013 1014// ASIMD extract narrow, saturating 1015def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1016 1017// ASIMD reciprocal and square root estimate, D-form U32 1018// ASIMD reciprocal and square root estimate, D-form F32 and F64 1019def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32, 1020 URSQRTEv2i32, 1021 FRECPEv1i32, FRECPEv2f32, FRECPEv1i64, 1022 FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>; 1023 1024// ASIMD reciprocal and square root estimate, Q-form U32 1025// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64 1026def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32, 1027 URSQRTEv4i32, 1028 FRECPEv1f16, FRECPEv4f16, 1029 FRECPEv4f32, FRECPEv2f64, 1030 FRSQRTEv1f16, FRSQRTEv4f16, 1031 FRSQRTEv4f32, FRSQRTEv2f64)>; 1032 1033// ASIMD reciprocal and square root estimate, Q-form F16 1034def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16, 1035 FRSQRTEv8f16)>; 1036 1037// ASIMD reciprocal exponent 1038def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>; 1039 1040// ASIMD reciprocal step 1041def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv", 1042 "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>; 1043 1044// ASIMD table lookup, 1 or 2 table regs 1045// ASIMD table lookup extension, 1 table reg 1046def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$", 1047 "^TBXv(8|16)i8One$")>; 1048 1049// ASIMD table lookup, 3 table regs 1050// ASIMD table lookup extension, 2 table reg 1051def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three, 1052 TBXv8i8Two, TBXv16i8Two)>; 1053 1054// ASIMD table lookup, 4 table regs 1055def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1056 1057// ASIMD table lookup extension, 3 table reg 1058def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>; 1059 1060// ASIMD table lookup extension, 4 table reg 1061def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>; 1062 1063// ASIMD transfer, element to gen reg 1064def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$", 1065 "^UMOVvi(8|16|32|64)$")>; 1066 1067// ASIMD transfer, gen reg to element 1068def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1069 1070 1071// ASIMD load instructions 1072// ----------------------------------------------------------------------------- 1073 1074// ASIMD load, 1 element, multiple, 1 reg 1075def : InstRW<[V1Write_6c_1L], 1076 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1077def : InstRW<[V1Write_6c_1L, WriteAdr], 1078 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1079 1080// ASIMD load, 1 element, multiple, 2 reg 1081def : InstRW<[V1Write_6c_2L], 1082 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1083def : InstRW<[V1Write_6c_2L, WriteAdr], 1084 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1085 1086// ASIMD load, 1 element, multiple, 3 reg 1087def : InstRW<[V1Write_6c_3L], 1088 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1089def : InstRW<[V1Write_6c_3L, WriteAdr], 1090 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1091 1092// ASIMD load, 1 element, multiple, 4 reg, D-form 1093def : InstRW<[V1Write_6c_2L], 1094 (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1095def : InstRW<[V1Write_6c_2L, WriteAdr], 1096 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1097 1098// ASIMD load, 1 element, multiple, 4 reg, Q-form 1099def : InstRW<[V1Write_7c_4L], 1100 (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1101def : InstRW<[V1Write_7c_4L, WriteAdr], 1102 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1103 1104// ASIMD load, 1 element, one lane 1105// ASIMD load, 1 element, all lanes 1106def : InstRW<[V1Write_8c_1L_1V], 1107 (instregex "^LD1(i|Rv)(8|16|32|64)$", 1108 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1109def : InstRW<[V1Write_8c_1L_1V, WriteAdr], 1110 (instregex "^LD1i(8|16|32|64)_POST$", 1111 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1112 1113// ASIMD load, 2 element, multiple, D-form 1114def : InstRW<[V1Write_8c_1L_2V], 1115 (instregex "^LD2Twov(8b|4h|2s)$")>; 1116def : InstRW<[V1Write_8c_1L_2V, WriteAdr], 1117 (instregex "^LD2Twov(8b|4h|2s)_POST$")>; 1118 1119// ASIMD load, 2 element, multiple, Q-form 1120def : InstRW<[V1Write_8c_2L_2V], 1121 (instregex "^LD2Twov(16b|8h|4s|2d)$")>; 1122def : InstRW<[V1Write_8c_2L_2V, WriteAdr], 1123 (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; 1124 1125// ASIMD load, 2 element, one lane 1126// ASIMD load, 2 element, all lanes 1127def : InstRW<[V1Write_8c_1L_2V], 1128 (instregex "^LD2i(8|16|32|64)$", 1129 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1130def : InstRW<[V1Write_8c_1L_2V, WriteAdr], 1131 (instregex "^LD2i(8|16|32|64)_POST$", 1132 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1133 1134// ASIMD load, 3 element, multiple, D-form 1135// ASIMD load, 3 element, one lane 1136// ASIMD load, 3 element, all lanes 1137def : InstRW<[V1Write_8c_2L_3V], 1138 (instregex "^LD3Threev(8b|4h|2s)$", 1139 "^LD3i(8|16|32|64)$", 1140 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1141def : InstRW<[V1Write_8c_2L_3V, WriteAdr], 1142 (instregex "^LD3Threev(8b|4h|2s)_POST$", 1143 "^LD3i(8|16|32|64)_POST$", 1144 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1145 1146// ASIMD load, 3 element, multiple, Q-form 1147def : InstRW<[V1Write_8c_3L_3V], 1148 (instregex "^LD3Threev(16b|8h|4s|2d)$")>; 1149def : InstRW<[V1Write_8c_3L_3V, WriteAdr], 1150 (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; 1151 1152// ASIMD load, 4 element, multiple, D-form 1153// ASIMD load, 4 element, one lane 1154// ASIMD load, 4 element, all lanes 1155def : InstRW<[V1Write_8c_3L_4V], 1156 (instregex "^LD4Fourv(8b|4h|2s)$", 1157 "^LD4i(8|16|32|64)$", 1158 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1159def : InstRW<[V1Write_8c_3L_4V, WriteAdr], 1160 (instregex "^LD4Fourv(8b|4h|2s)_POST$", 1161 "^LD4i(8|16|32|64)_POST$", 1162 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1163 1164// ASIMD load, 4 element, multiple, Q-form 1165def : InstRW<[V1Write_9c_4L_4V], 1166 (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; 1167def : InstRW<[V1Write_9c_4L_4V, WriteAdr], 1168 (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; 1169 1170 1171// ASIMD store instructions 1172// ----------------------------------------------------------------------------- 1173 1174// ASIMD store, 1 element, multiple, 1 reg 1175// ASIMD store, 1 element, multiple, 2 reg, D-form 1176def : InstRW<[V1Write_2c_1L01_1V01], 1177 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$", 1178 "^ST1Twov(8b|4h|2s|1d)$")>; 1179def : InstRW<[V1Write_2c_1L01_1V01, WriteAdr], 1180 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$", 1181 "^ST1Twov(8b|4h|2s|1d)_POST$")>; 1182 1183// ASIMD store, 1 element, multiple, 2 reg, Q-form 1184// ASIMD store, 1 element, multiple, 3 reg, D-form 1185// ASIMD store, 1 element, multiple, 4 reg, D-form 1186def : InstRW<[V1Write_2c_2L01_2V01], 1187 (instregex "^ST1Twov(16b|8h|4s|2d)$", 1188 "^ST1Threev(8b|4h|2s|1d)$", 1189 "^ST1Fourv(8b|4h|2s|1d)$")>; 1190def : InstRW<[V1Write_2c_2L01_2V01, WriteAdr], 1191 (instregex "^ST1Twov(16b|8h|4s|2d)_POST$", 1192 "^ST1Threev(8b|4h|2s|1d)_POST$", 1193 "^ST1Fourv(8b|4h|2s|1d)_POST$")>; 1194 1195// ASIMD store, 1 element, multiple, 3 reg, Q-form 1196def : InstRW<[V1Write_2c_3L01_3V01], 1197 (instregex "^ST1Threev(16b|8h|4s|2d)$")>; 1198def : InstRW<[V1Write_2c_3L01_3V01, WriteAdr], 1199 (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; 1200 1201// ASIMD store, 1 element, multiple, 4 reg, Q-form 1202def : InstRW<[V1Write_2c_4L01_4V01], 1203 (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; 1204def : InstRW<[V1Write_2c_4L01_4V01, WriteAdr], 1205 (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; 1206 1207// ASIMD store, 1 element, one lane 1208// ASIMD store, 2 element, multiple, D-form 1209// ASIMD store, 2 element, one lane 1210def : InstRW<[V1Write_4c_1L01_1V01], 1211 (instregex "^ST1i(8|16|32|64)$", 1212 "^ST2Twov(8b|4h|2s)$", 1213 "^ST2i(8|16|32|64)$")>; 1214def : InstRW<[V1Write_4c_1L01_1V01, WriteAdr], 1215 (instregex "^ST1i(8|16|32|64)_POST$", 1216 "^ST2Twov(8b|4h|2s)_POST$", 1217 "^ST2i(8|16|32|64)_POST$")>; 1218 1219// ASIMD store, 2 element, multiple, Q-form 1220// ASIMD store, 3 element, multiple, D-form 1221// ASIMD store, 3 element, one lane 1222// ASIMD store, 4 element, one lane, D 1223def : InstRW<[V1Write_4c_2L01_2V01], 1224 (instregex "^ST2Twov(16b|8h|4s|2d)$", 1225 "^ST3Threev(8b|4h|2s)$", 1226 "^ST3i(8|16|32|64)$", 1227 "^ST4i64$")>; 1228def : InstRW<[V1Write_4c_2L01_2V01, WriteAdr], 1229 (instregex "^ST2Twov(16b|8h|4s|2d)_POST$", 1230 "^ST3Threev(8b|4h|2s)_POST$", 1231 "^ST3i(8|16|32|64)_POST$", 1232 "^ST4i64_POST$")>; 1233 1234// ASIMD store, 3 element, multiple, Q-form 1235def : InstRW<[V1Write_5c_3L01_3V01], 1236 (instregex "^ST3Threev(16b|8h|4s|2d)$")>; 1237def : InstRW<[V1Write_5c_3L01_3V01, WriteAdr], 1238 (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; 1239 1240// ASIMD store, 4 element, multiple, D-form 1241def : InstRW<[V1Write_6c_3L01_3V01], 1242 (instregex "^ST4Fourv(8b|4h|2s)$")>; 1243def : InstRW<[V1Write_6c_3L01_3V01, WriteAdr], 1244 (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; 1245 1246// ASIMD store, 4 element, multiple, Q-form, B/H/S 1247def : InstRW<[V1Write_7c_6L01_6V01], 1248 (instregex "^ST4Fourv(16b|8h|4s)$")>; 1249def : InstRW<[V1Write_7c_6L01_6V01, WriteAdr], 1250 (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; 1251 1252// ASIMD store, 4 element, multiple, Q-form, D 1253def : InstRW<[V1Write_4c_4L01_4V01], 1254 (instrs ST4Fourv2d)>; 1255def : InstRW<[V1Write_4c_4L01_4V01, WriteAdr], 1256 (instrs ST4Fourv2d_POST)>; 1257 1258// ASIMD store, 4 element, one lane, B/H/S 1259def : InstRW<[V1Write_6c_3L_3V], 1260 (instregex "^ST4i(8|16|32)$")>; 1261def : InstRW<[V1Write_6c_3L_3V, WriteAdr], 1262 (instregex "^ST4i(8|16|32)_POST$")>; 1263 1264 1265// Cryptography extensions 1266// ----------------------------------------------------------------------------- 1267 1268// Crypto polynomial (64x64) multiply long 1269// Covered by "SchedAlias (WriteV[dq]...)" above 1270 1271// Crypto AES ops 1272def V1WriteVC : WriteSequence<[V1Write_2c_1V]>; 1273def V1ReadVC : SchedReadAdvance<2, [V1WriteVC]>; 1274def : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>; 1275def : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>; 1276 1277// Crypto SHA1 hash acceleration op 1278// Crypto SHA1 schedule acceleration ops 1279// Crypto SHA256 schedule acceleration ops 1280// Crypto SHA512 hash acceleration ops 1281// Crypto SM3 ops 1282def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$", 1283 "^SHA256SU[01]rr$", 1284 "^SHA512(H2?|SU[01])$", 1285 "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>; 1286 1287// Crypto SHA1 hash acceleration ops 1288// Crypto SHA256 hash acceleration ops 1289// Crypto SM4 ops 1290def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$", 1291 "^SHA256H2?rrr$", 1292 "^SM4E(KEY)?$")>; 1293 1294// Crypto SHA3 ops 1295def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1296 1297 1298// CRC instruction 1299// ----------------------------------------------------------------------------- 1300 1301// CRC checksum ops 1302def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>; 1303 1304 1305// SVE Predicate instructions 1306// ----------------------------------------------------------------------------- 1307 1308// Loop control, based on predicate 1309def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>; 1310def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1311 1312// Loop control, based on predicate and flag setting 1313def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP, 1314 BRKPAS_PPzPP, BRKPBS_PPzPP)>; 1315 1316// Loop control, based on GPR 1317def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1318 1319// Loop terminate 1320def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1321 1322// Predicate counting scalar 1323// Predicate counting scalar, active predicate 1324def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1325def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$", 1326 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1327 "^UQ(DEC|INC)[BHWD]_WPiI$", 1328 "^CNTP_XPP_[BHSD]$", 1329 "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$", 1330 "^UQ(DEC|INC)P_WP_[BHSD]$", 1331 "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>; 1332 1333// Predicate counting vector, active predicate 1334def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>; 1335 1336// Predicate logical 1337def : InstRW<[V1Write_1c_1M0], 1338 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1339 1340// Predicate logical, flag setting 1341def : InstRW<[V1Write_2c_2M0], 1342 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>; 1343 1344// Predicate reverse 1345// Predicate set/initialize/find next 1346// Predicate transpose 1347// Predicate unpack and widen 1348// Predicate zip/unzip 1349def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$", 1350 "^PFALSE$", "^PFIRST_B$", 1351 "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$", 1352 "^TRN[12]_PPP_[BHSDQ]$", 1353 "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1354 1355// Predicate set/initialize/find next 1356// Predicate unpack and widen 1357def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP, 1358 PUNPKHI_PP, PUNPKLO_PP)>; 1359 1360// Predicate select 1361def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>; 1362 1363// Predicate set/initialize, set flags 1364def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; 1365 1366 1367 1368// SVE integer instructions 1369// ----------------------------------------------------------------------------- 1370 1371// Arithmetic, basic 1372// Logical 1373def : InstRW<[V1Write_2c_1V01], 1374 (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$", 1375 "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$", 1376 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1377 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1378 "^[SU]ABD_ZP[mZ]Z_[BHSD]$", 1379 "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$", 1380 "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", 1381 "^SUBR_Z(I|P[mZ]Z)_[BHSD]$", 1382 "^(AND|EOR|ORR)_ZI$", 1383 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", 1384 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1385 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; 1386 1387// Arithmetic, shift 1388def : InstRW<[V1Write_2c_1V1], 1389 (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]", 1390 "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]", 1391 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 1392 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 1393 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 1394 1395// Arithmetic, shift right for divide 1396def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>; 1397 1398// Count/reverse bits 1399def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; 1400 1401// Broadcast logical bitmask immediate to vector 1402def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; 1403 1404// Compare and set flags 1405def : InstRW<[V1Write_4c_1M0_1V0], 1406 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1407 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1408 1409// Conditional extract operations, scalar form 1410def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1411 1412// Conditional extract operations, SIMD&FP scalar and vector forms 1413def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1414 "^COMPACT_ZPZ_[SD]$", 1415 "^SPLICE_ZPZZ?_[BHSD]$")>; 1416 1417// Convert to floating point, 64b to float or convert to double 1418def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 1419 "^[SU]CVTF_ZPmZ_StoD")>; 1420 1421// Convert to floating point, 32b to single or half 1422def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; 1423 1424// Convert to floating point, 16b to half 1425def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; 1426 1427// Copy, scalar 1428def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; 1429 1430// Copy, scalar SIMD&FP or imm 1431def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; 1432 1433// Divides, 32 bit 1434def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; 1435 1436// Divides, 64 bit 1437def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; 1438 1439// Dot product, 8 bit 1440def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>; 1441 1442// Dot product, 8 bit, using signed and unsigned integers 1443def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>; 1444 1445// Dot product, 16 bit 1446def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>; 1447 1448// Duplicate, immediate and indexed form 1449def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", 1450 "^DUP_ZZI_[BHSDQ]$")>; 1451 1452// Duplicate, scalar form 1453def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1454 1455// Extend, sign or zero 1456def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", 1457 "^[SU]XTH_ZPmZ_[SD]$", 1458 "^[SU]XTW_ZPmZ_[D]$")>; 1459 1460// Extract 1461def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; 1462 1463// Extract/insert operation, SIMD and FP scalar form 1464def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1465 "^INSR_ZV_[BHSD]$")>; 1466 1467// Extract/insert operation, scalar 1468def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1469 "^INSR_ZR_[BHSD]$")>; 1470 1471// Horizontal operations, B, H, S form, imm, imm 1472def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>; 1473 1474// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar 1475def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1476 1477// Horizontal operations, D form, imm, imm 1478def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>; 1479 1480// Horizontal operations, D form, scalar, imm / scalar / imm, scalar 1481def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1482 1483// Move prefix 1484def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1485 "^MOVPRFX_ZZ$")>; 1486 1487// Matrix multiply-accumulate 1488def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1489 1490// Multiply, B, H, S element size 1491def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$", 1492 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; 1493 1494// Multiply, D element size 1495// Multiply accumulate, D element size 1496def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$", 1497 "^[SU]MULH_ZPmZ_D$", 1498 "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>; 1499 1500// Multiply accumulate, B, H, S element size 1501// NOTE: This is not specified in the SOG. 1502def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; 1503 1504// Predicate counting vector 1505def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; 1506 1507// Reduction, arithmetic, B form 1508def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13], 1509 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1510 1511// Reduction, arithmetic, H form 1512def : InstRW<[V1Write_12c_1V_1V01_2V1], 1513 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1514 1515// Reduction, arithmetic, S form 1516def : InstRW<[V1Write_10c_1V_1V01_2V1], 1517 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1518 1519// Reduction, arithmetic, D form 1520def : InstRW<[V1Write_8c_1V_1V01], 1521 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1522 1523// Reduction, logical 1524def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; 1525 1526// Reverse, vector 1527def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$", 1528 "^REVB_ZPmZ_[HSD]$", 1529 "^REVH_ZPmZ_[SD]$", 1530 "^REVW_ZPmZ_D$")>; 1531 1532// Select, vector form 1533// Table lookup 1534// Table lookup extension 1535// Transpose, vector form 1536// Unpack and extend 1537// Zip/unzip 1538def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$", 1539 "^TB[LX]_ZZZ_[BHSD]$", 1540 "^TRN[12]_ZZZ_[BHSDQ]$", 1541 "^[SU]UNPK(HI|LO)_ZZ_[HSD]$", 1542 "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1543 1544 1545// SVE floating-point instructions 1546// ----------------------------------------------------------------------------- 1547 1548// Floating point absolute value/difference 1549// Floating point arithmetic 1550def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$", 1551 "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", 1552 "^FADDP_ZPmZZ_[HSD]$", 1553 "^FNEG_ZPmZ_[HSD]$", 1554 "^FSUBR_ZPm[IZ]_[HSD]$")>; 1555 1556// Floating point associative add, F16 1557def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; 1558 1559// Floating point associative add, F32 1560def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>; 1561 1562// Floating point associative add, F64 1563def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>; 1564 1565// Floating point compare 1566def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$", 1567 "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$", 1568 "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>; 1569 1570// Floating point complex add 1571def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1572 1573// Floating point complex multiply add 1574def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1575 "^FCMLA_ZZZI_[HS]$")>; 1576 1577// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1578// Floating point convert to integer, F32 1579def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", 1580 "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; 1581 1582// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) 1583// Floating point convert to integer, F64 1584def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", 1585 "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; 1586 1587// Floating point convert to integer, F16 1588def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; 1589 1590// Floating point copy 1591def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", 1592 "^FDUP_ZI_[HSD]$")>; 1593 1594// Floating point divide, F16 1595def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; 1596 1597// Floating point divide, F32 1598def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; 1599 1600// Floating point divide, F64 1601def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; 1602 1603// Floating point min/max 1604def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; 1605 1606// Floating point multiply 1607def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$", 1608 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; 1609 1610// Floating point multiply accumulate 1611// Floating point reciprocal step 1612def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", 1613 "^FML[AS]_ZZZI_[HSD]$", 1614 "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 1615 1616// Floating point reciprocal estimate, F16 1617def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>; 1618 1619// Floating point reciprocal estimate, F32 1620def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; 1621 1622// Floating point reciprocal estimate, F64 1623def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; 1624 1625// Floating point reciprocal exponent 1626def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>; 1627 1628// Floating point reduction, F16 1629def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; 1630 1631// Floating point reduction, F32 1632def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>; 1633 1634// Floating point reduction, F64 1635def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; 1636 1637// Floating point round to integral, F16 1638def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; 1639 1640// Floating point round to integral, F32 1641def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; 1642 1643// Floating point round to integral, F64 1644def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; 1645 1646// Floating point square root, F16 1647def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>; 1648 1649// Floating point square root, F32 1650def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>; 1651 1652// Floating point square root, F64 1653def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>; 1654 1655// Floating point trigonometric 1656def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", 1657 "^FTMAD_ZZI_[HSD]$", 1658 "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 1659 1660 1661// SVE BFloat16 (BF16) instructions 1662// ----------------------------------------------------------------------------- 1663 1664// Convert, F32 to BF16 1665def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 1666 1667// Dot product 1668def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 1669 1670// Matrix multiply accumulate 1671def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>; 1672 1673// Multiply accumulate long 1674def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 1675 1676 1677// SVE Load instructions 1678// ----------------------------------------------------------------------------- 1679 1680// Load vector 1681def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>; 1682 1683// Load predicate 1684def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>; 1685 1686// Contiguous load, scalar + imm 1687// Contiguous load, scalar + scalar 1688// Contiguous load broadcast, scalar + imm 1689// Contiguous load broadcast, scalar + scalar 1690def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM_REAL$", 1691 "^LD1S?B_[HSD]_IMM_REAL$", 1692 "^LD1S?H_[SD]_IMM_REAL$", 1693 "^LD1S?W_D_IMM_REAL$", 1694 "^LD1[BWD]$", 1695 "^LD1S?B_[HSD]$", 1696 "^LD1S?W_D$", 1697 "^LD1R[BHWD]_IMM$", 1698 "^LD1RSW_IMM$", 1699 "^LD1RS?B_[HSD]_IMM$", 1700 "^LD1RS?H_[SD]_IMM$", 1701 "^LD1RS?W_D_IMM$", 1702 "^LD1RQ_[BHWD]_IMM$", 1703 "^LD1RQ_[BWD]$")>; 1704def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$", 1705 "^LD1S?H_[SD]$", 1706 "^LD1RQ_H$")>; 1707 1708// Non temporal load, scalar + imm 1709def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>; 1710 1711// Non temporal load, scalar + scalar 1712def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>; 1713def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>; 1714 1715// Contiguous first faulting load, scalar + scalar 1716def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$", 1717 "^LDFF1S?H_[SD]_REAL$")>; 1718def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$", 1719 "^LDFF1S?B_[HSD]_REAL$", 1720 "^LDFF1S?W_D_REAL$")>; 1721 1722// Contiguous non faulting load, scalar + imm 1723def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$", 1724 "^LDNF1S?B_[HSD]_IMM_REAL$", 1725 "^LDNF1S?H_[SD]_IMM_REAL$", 1726 "^LDNF1S?W_D_IMM_REAL$")>; 1727 1728// Contiguous Load two structures to two vectors, scalar + imm 1729def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>; 1730 1731// Contiguous Load two structures to two vectors, scalar + scalar 1732def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>; 1733def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>; 1734 1735// Contiguous Load three structures to three vectors, scalar + imm 1736def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>; 1737 1738// Contiguous Load three structures to three vectors, scalar + scalar 1739def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>; 1740 1741// Contiguous Load four structures to four vectors, scalar + imm 1742def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>; 1743 1744// Contiguous Load four structures to four vectors, scalar + scalar 1745def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>; 1746 1747// Gather load, vector + imm, 32-bit element size 1748def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 1749 "^GLD(FF)?1W_IMM_REAL$")>; 1750 1751// Gather load, vector + imm, 64-bit element size 1752def : InstRW<[V1Write_9c_2L_2V], 1753 (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 1754 "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$", 1755 "^GLD(FF)?1D_IMM_REAL$", 1756 "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>; 1757 1758// Gather load, 32-bit scaled offset 1759def : InstRW<[V1Write_11c_2L_2V], 1760 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", 1761 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 1762 1763// Gather load, 32-bit unpacked unscaled offset 1764def : InstRW<[V1Write_9c_1L_1V], 1765 (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 1766 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 1767 1768// Prefetch 1769// NOTE: This is not specified in the SOG. 1770def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>; 1771 1772 1773// SVE Store instructions 1774// ----------------------------------------------------------------------------- 1775 1776// Store from predicate reg 1777def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>; 1778 1779// Store from vector reg 1780def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>; 1781 1782// Contiguous store, scalar + imm 1783// Contiguous store, scalar + scalar 1784def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 1785 "^ST1B_[HSD]_IMM$", 1786 "^ST1H_[SD]_IMM$", 1787 "^ST1W_D_IMM$", 1788 "^ST1[BWD]$", 1789 "^ST1B_[HSD]$", 1790 "^ST1W_D$")>; 1791def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 1792 1793// Contiguous store two structures from two vectors, scalar + imm 1794// Contiguous store two structures from two vectors, scalar + scalar 1795def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$", 1796 "^ST2[BWD]$")>; 1797def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>; 1798 1799// Contiguous store three structures from three vectors, scalar + imm 1800def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 1801 1802// Contiguous store three structures from three vectors, scalar + scalar 1803def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>; 1804 1805// Contiguous store four structures from four vectors, scalar + imm 1806def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 1807 1808// Contiguous store four structures from four vectors, scalar + scalar 1809def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>; 1810 1811// Non temporal store, scalar + imm 1812// Non temporal store, scalar + scalar 1813def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$", 1814 "^STNT1[BWD]_ZRR$")>; 1815def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; 1816 1817// Scatter store vector + imm 32-bit element size 1818// Scatter store, 32-bit scaled offset 1819// Scatter store, 32-bit unscaled offset 1820def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 1821 "^SST1W_IMM$", 1822 "^SST1(H_S|W)_[SU]XTW_SCALED$", 1823 "^SST1[BH]_S_[SU]XTW$", 1824 "^SST1W_[SU]XTW$")>; 1825 1826// Scatter store, 32-bit unpacked unscaled offset 1827// Scatter store, 32-bit unpacked scaled offset 1828def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 1829 "^SST1D_[SU]XTW$", 1830 "^SST1[HW]_D_[SU]XTW_SCALED$", 1831 "^SST1D_[SU]XTW_SCALED$")>; 1832 1833// Scatter store vector + imm 64-bit element size 1834// Scatter store, 64-bit scaled offset 1835// Scatter store, 64-bit unscaled offset 1836def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 1837 "^SST1D_IMM$", 1838 "^SST1[HW]_D_SCALED$", 1839 "^SST1D_SCALED$", 1840 "^SST1[BHW]_D$", 1841 "^SST1D$")>; 1842 1843 1844// SVE Miscellaneous instructions 1845// ----------------------------------------------------------------------------- 1846 1847// Read first fault register, unpredicated 1848// Set first fault register 1849// Write to first fault register 1850def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL, 1851 SETFFR, 1852 WRFFR)>; 1853 1854// Read first fault register, predicated 1855def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>; 1856 1857// Read first fault register and set flags 1858def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>; 1859 1860 1861} 1862