1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V1 processors. 10// 11// References: 12// - "Arm Neoverse V1 Software Optimization Guide" 13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing" 14// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm 15// - "Neoverse V1" 16// https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1 17 18// 19//===----------------------------------------------------------------------===// 20 21def NeoverseV1Model : SchedMachineModel { 22 let IssueWidth = 15; // Maximum micro-ops dispatch rate. 23 let MicroOpBufferSize = 256; // Micro-op re-order buffer. 24 let LoadLatency = 4; // Optimistic load latency. 25 let MispredictPenalty = 11; // Cycles cost of branch mispredicted. 26 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 27 let CompleteModel = 1; 28 29 list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F, 30 SMEUnsupported.F, 31 [HasMTE, HasCPA, 32 HasCSSC]); 33} 34 35//===----------------------------------------------------------------------===// 36// Define each kind of processor resource and number available on Neoverse V1. 37// Instructions are first fetched and then decoded into internal macro-ops 38// (MOPs). From there, the MOPs proceed through register renaming and dispatch 39// stages. A MOP can be split into one or more micro-ops further down the 40// pipeline, after the decode stage. Once dispatched, micro-ops wait for their 41// operands and issue out-of-order to one of the issue pipelines. Each issue 42// pipeline can accept one micro-op per cycle. 43 44let SchedModel = NeoverseV1Model in { 45 46// Define the issue ports. 47def V1UnitB : ProcResource<2>; // Branch 0/1 48def V1UnitS : ProcResource<2>; // Integer single cycle 0/1 49def V1UnitM0 : ProcResource<1>; // Integer multicycle 0 50def V1UnitM1 : ProcResource<1>; // Integer multicycle 1 51def V1UnitL01 : ProcResource<2>; // Load/Store 0/1 52def V1UnitL2 : ProcResource<1>; // Load 2 53def V1UnitD : ProcResource<2>; // Store data 0/1 54def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0 55def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1 56def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2 57def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3 58 59def V1UnitI : ProcResGroup<[V1UnitS, 60 V1UnitM0, V1UnitM1]>; // Integer units 61def V1UnitJ : ProcResGroup<[V1UnitS, V1UnitM0]>; // Integer 0-2 units 62def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units 63def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load units 64def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1, 65 V1UnitV2, V1UnitV3]>; // FP/ASIMD units 66def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units 67def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units 68def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units 69 70// Define commonly used read types. 71 72// No generic forwarding is provided for these types. 73def : ReadAdvance<ReadI, 0>; 74def : ReadAdvance<ReadISReg, 0>; 75def : ReadAdvance<ReadIEReg, 0>; 76def : ReadAdvance<ReadIM, 0>; 77def : ReadAdvance<ReadIMA, 0>; 78def : ReadAdvance<ReadID, 0>; 79def : ReadAdvance<ReadExtrHi, 0>; 80def : ReadAdvance<ReadAdrBase, 0>; 81def : ReadAdvance<ReadST, 0>; 82def : ReadAdvance<ReadVLD, 0>; 83 84def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 85def : WriteRes<WriteBarrier, []> { let Latency = 1; } 86def : WriteRes<WriteHint, []> { let Latency = 1; } 87 88 89//===----------------------------------------------------------------------===// 90// Define generic 0 micro-op types 91 92let Latency = 0, NumMicroOps = 0 in 93def V1Write_0c_0Z : SchedWriteRes<[]>; 94 95 96//===----------------------------------------------------------------------===// 97// Define generic 1 micro-op types 98 99def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; } 100def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; } 101def V1Write_1c_1J : SchedWriteRes<[V1UnitJ]> { let Latency = 1; } 102def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; } 103def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; } 104def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; } 105def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; } 106def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; } 107def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; } 108def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; } 109def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; } 110def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; } 111def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } 112def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; } 113def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; } 114def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12; 115 let ReleaseAtCycles = [5]; } 116def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20; 117 let ReleaseAtCycles = [5]; } 118def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; } 119def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 120def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 121def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; } 122def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; } 123def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; } 124def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; } 125def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; } 126def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10; 127 let ReleaseAtCycles = [7]; } 128def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12; 129 let ReleaseAtCycles = [7]; } 130def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13; 131 let ReleaseAtCycles = [10]; } 132def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15; 133 let ReleaseAtCycles = [7]; } 134def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16; 135 let ReleaseAtCycles = [7]; } 136def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20; 137 let ReleaseAtCycles = [7]; } 138def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; } 139def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } 140def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } 141def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 142def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; } 143def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } 144def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7; 145 let ReleaseAtCycles = [7]; } 146def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10; 147 let ReleaseAtCycles = [7]; } 148def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 149 let ReleaseAtCycles = [5]; } 150def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 151 let ReleaseAtCycles = [11]; } 152def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15; 153 let ReleaseAtCycles = [7]; } 154def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16; 155 let ReleaseAtCycles = [7]; } 156def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; } 157def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; } 158def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; } 159def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; } 160def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } 161 162//===----------------------------------------------------------------------===// 163// Define generic 2 micro-op types 164 165let Latency = 1, NumMicroOps = 2 in 166def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>; 167let Latency = 6, NumMicroOps = 2 in 168def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>; 169let Latency = 3, NumMicroOps = 2 in 170def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>; 171let Latency = 5, NumMicroOps = 2 in 172def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 173let Latency = 7, NumMicroOps = 2 in 174def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 175let Latency = 6, NumMicroOps = 2 in 176def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>; 177let Latency = 6, NumMicroOps = 2 in 178def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>; 179let Latency = 8, NumMicroOps = 2 in 180def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 181let Latency = 9, NumMicroOps = 2 in 182def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 183let Latency = 11, NumMicroOps = 2 in 184def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 185let Latency = 1, NumMicroOps = 2 in 186def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>; 187let Latency = 6, NumMicroOps = 2 in 188def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 189let Latency = 7, NumMicroOps = 2 in 190def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 191let Latency = 2, NumMicroOps = 2 in 192def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 193let Latency = 4, NumMicroOps = 2 in 194def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 195let Latency = 6, NumMicroOps = 2 in 196def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 197let Latency = 2, NumMicroOps = 2 in 198def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 199let Latency = 4, NumMicroOps = 2 in 200def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 201let Latency = 2, NumMicroOps = 2 in 202def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 203let Latency = 3, NumMicroOps = 2 in 204def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 205let Latency = 9, NumMicroOps = 2 in 206def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>; 207let Latency = 5, NumMicroOps = 2 in 208def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>; 209let Latency = 4, NumMicroOps = 2 in 210def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>; 211let Latency = 7, NumMicroOps = 2 in 212def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 213let Latency = 5, NumMicroOps = 2 in 214def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>; 215let Latency = 6, NumMicroOps = 2 in 216def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 217let Latency = 9, NumMicroOps = 2 in 218def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 219let Latency = 4, NumMicroOps = 2 in 220def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>; 221let Latency = 8, NumMicroOps = 2 in 222def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>; 223let Latency = 4, NumMicroOps = 2 in 224def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 225let Latency = 5, NumMicroOps = 2 in 226def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 227let Latency = 2, NumMicroOps = 2 in 228def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 229let Latency = 4, NumMicroOps = 2 in 230def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 231let Latency = 4, NumMicroOps = 2 in 232def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 233let Latency = 6, NumMicroOps = 2 in 234def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 235let Latency = 4, NumMicroOps = 2 in 236def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>; 237let Latency = 4, NumMicroOps = 2 in 238def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>; 239 240//===----------------------------------------------------------------------===// 241// Define generic 3 micro-op types 242 243let Latency = 2, NumMicroOps = 3 in 244def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>; 245let Latency = 7, NumMicroOps = 3 in 246def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>; 247let Latency = 8, NumMicroOps = 3 in 248def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>; 249let Latency = 6, NumMicroOps = 3 in 250def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>; 251let Latency = 2, NumMicroOps = 3 in 252def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 253let Latency = 4, NumMicroOps = 3 in 254def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 255let Latency = 2, NumMicroOps = 3 in 256def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>; 257let Latency = 6, NumMicroOps = 3 in 258def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>; 259let Latency = 4, NumMicroOps = 3 in 260def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 261let Latency = 6, NumMicroOps = 3 in 262def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 263let Latency = 8, NumMicroOps = 3 in 264def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 265 266//===----------------------------------------------------------------------===// 267// Define generic 4 micro-op types 268 269let Latency = 8, NumMicroOps = 4 in 270def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0, 271 V1UnitV0, V1UnitV0]>; 272let Latency = 7, NumMicroOps = 4 in 273def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>; 274let Latency = 8, NumMicroOps = 4 in 275def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 276 V1UnitV, V1UnitV]>; 277let Latency = 9, NumMicroOps = 4 in 278def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 279 V1UnitV, V1UnitV]>; 280let Latency = 11, NumMicroOps = 4 in 281def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 282 V1UnitV, V1UnitV]>; 283let Latency = 10, NumMicroOps = 4 in 284def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01, 285 V1UnitV, V1UnitV]>; 286let Latency = 2, NumMicroOps = 4 in 287def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 288 V1UnitV01, V1UnitV01]>; 289let Latency = 4, NumMicroOps = 4 in 290def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 291 V1UnitV01, V1UnitV01]>; 292let Latency = 8, NumMicroOps = 4 in 293def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 294 V1UnitV01, V1UnitV01]>; 295let Latency = 9, NumMicroOps = 4 in 296def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 297 V1UnitV01, V1UnitV01]>; 298let Latency = 10, NumMicroOps = 4 in 299def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 300 V1UnitV01, V1UnitV01]>; 301let Latency = 10, NumMicroOps = 4 in 302def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 303 V1UnitV1, V1UnitV1]>; 304let Latency = 12, NumMicroOps = 4 in 305def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 306 V1UnitV1, V1UnitV1]>; 307let Latency = 6, NumMicroOps = 4 in 308def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, 309 V1UnitV0, V1UnitV0]>; 310let Latency = 12, NumMicroOps = 4 in 311def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 312 V1UnitV01, V1UnitV01]>; 313let Latency = 6, NumMicroOps = 4 in 314def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 315 316//===----------------------------------------------------------------------===// 317// Define generic 5 micro-op types 318 319let Latency = 8, NumMicroOps = 5 in 320def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL, 321 V1UnitV, V1UnitV, V1UnitV]>; 322let Latency = 14, NumMicroOps = 5 in 323def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV, 324 V1UnitV0, 325 V1UnitV1, V1UnitV1, 326 V1UnitV13]>; 327let Latency = 9, NumMicroOps = 5 in 328def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV, 329 V1UnitV01, V1UnitV01, 330 V1UnitV01, V1UnitV01]>; 331let Latency = 6, NumMicroOps = 5 in 332def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 333 V1UnitV01, V1UnitV01, V1UnitV01]>; 334 335//===----------------------------------------------------------------------===// 336// Define generic 6 micro-op types 337 338let Latency = 6, NumMicroOps = 6 in 339def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 340 V1UnitV, V1UnitV, V1UnitV]>; 341let Latency = 8, NumMicroOps = 6 in 342def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 343 V1UnitV, V1UnitV, V1UnitV]>; 344let Latency = 2, NumMicroOps = 6 in 345def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 346 V1UnitV01, V1UnitV01, V1UnitV01]>; 347let Latency = 5, NumMicroOps = 6 in 348def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 349 V1UnitV01, V1UnitV01, V1UnitV01]>; 350let Latency = 6, NumMicroOps = 6 in 351def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 352 V1UnitV01, V1UnitV01, V1UnitV01]>; 353let Latency = 11, NumMicroOps = 6 in 354def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 355 V1UnitV01, V1UnitV01, V1UnitV01]>; 356let Latency = 11, NumMicroOps = 6 in 357def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV, 358 V1UnitV01, V1UnitV01, 359 V1UnitV01, V1UnitV01, V1UnitV01]>; 360let Latency = 13, NumMicroOps = 6 in 361def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01, 362 V1UnitV01, V1UnitV01, V1UnitV01]>; 363 364//===----------------------------------------------------------------------===// 365// Define generic 7 micro-op types 366 367let Latency = 8, NumMicroOps = 7 in 368def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 369 V1UnitV, V1UnitV, V1UnitV, V1UnitV]>; 370let Latency = 8, NumMicroOps = 7 in 371def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 372 V1UnitS, 373 V1UnitV01, V1UnitV01, V1UnitV01]>; 374 375//===----------------------------------------------------------------------===// 376// Define generic 8 micro-op types 377 378let Latency = 9, NumMicroOps = 8 in 379def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL, 380 V1UnitL, V1UnitL, 381 V1UnitV, V1UnitV, 382 V1UnitV, V1UnitV]>; 383let Latency = 2, NumMicroOps = 8 in 384def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 385 V1UnitL01, V1UnitL01, 386 V1UnitV01, V1UnitV01, 387 V1UnitV01, V1UnitV01]>; 388let Latency = 4, NumMicroOps = 8 in 389def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 390 V1UnitL01, V1UnitL01, 391 V1UnitV01, V1UnitV01, 392 V1UnitV01, V1UnitV01]>; 393let Latency = 12, NumMicroOps = 8 in 394def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 395 V1UnitL01, V1UnitL01, 396 V1UnitV01, V1UnitV01, 397 V1UnitV01, V1UnitV01]>; 398 399//===----------------------------------------------------------------------===// 400// Define generic 10 micro-op types 401 402let Latency = 13, NumMicroOps = 10 in 403def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 404 V1UnitL01, V1UnitL01, 405 V1UnitS, V1UnitS, 406 V1UnitV01, V1UnitV01, 407 V1UnitV01, V1UnitV01]>; 408let Latency = 7, NumMicroOps = 10 in 409def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 410 V1UnitL01, V1UnitL01, V1UnitL01, 411 V1UnitV, V1UnitV, 412 V1UnitV, V1UnitV, V1UnitV]>; 413let Latency = 11, NumMicroOps = 10 in 414def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0, 415 V1UnitV0, V1UnitV0, V1UnitV0, 416 V1UnitV0, V1UnitV0, V1UnitV0, 417 V1UnitV0, V1UnitV0, V1UnitV0]>; 418 419//===----------------------------------------------------------------------===// 420// Define generic 12 micro-op types 421 422let Latency = 7, NumMicroOps = 12 in 423def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 424 V1UnitL01, V1UnitL01, V1UnitL01, 425 V1UnitV01, V1UnitV01, V1UnitV01, 426 V1UnitV01, V1UnitV01, V1UnitV01]>; 427 428//===----------------------------------------------------------------------===// 429// Define generic 15 micro-op types 430 431let Latency = 7, NumMicroOps = 15 in 432def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 433 V1UnitL01, V1UnitL01, V1UnitL01, 434 V1UnitS, V1UnitS, 435 V1UnitS, V1UnitS, V1UnitS, 436 V1UnitV, V1UnitV, 437 V1UnitV, V1UnitV, V1UnitV]>; 438 439 440//===----------------------------------------------------------------------===// 441// Define generic 18 micro-op types 442 443let Latency = 19, NumMicroOps = 18 in 444def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 445 V1UnitL01, V1UnitL01, V1UnitL01, 446 V1UnitL01, V1UnitL01, V1UnitL01, 447 V1UnitV, V1UnitV, V1UnitV, 448 V1UnitV, V1UnitV, V1UnitV, 449 V1UnitV, V1UnitV, V1UnitV]>; 450let Latency = 19, NumMicroOps = 18 in 451def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0, 452 V1UnitV0, V1UnitV0, V1UnitV0, 453 V1UnitV0, V1UnitV0, V1UnitV0, 454 V1UnitV0, V1UnitV0, V1UnitV0, 455 V1UnitV0, V1UnitV0, V1UnitV0, 456 V1UnitV0, V1UnitV0, V1UnitV0]>; 457 458//===----------------------------------------------------------------------===// 459// Define generic 27 micro-op types 460 461let Latency = 11, NumMicroOps = 27 in 462def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 463 V1UnitL01, V1UnitL01, V1UnitL01, 464 V1UnitL01, V1UnitL01, V1UnitL01, 465 V1UnitS, V1UnitS, V1UnitS, 466 V1UnitS, V1UnitS, V1UnitS, 467 V1UnitS, V1UnitS, V1UnitS, 468 V1UnitV, V1UnitV, V1UnitV, 469 V1UnitV, V1UnitV, V1UnitV, 470 V1UnitV, V1UnitV, V1UnitV]>; 471 472 473// Miscellaneous Instructions 474// ----------------------------------------------------------------------------- 475 476// COPY 477def : InstRW<[V1Write_1c_1I], (instrs COPY)>; 478 479// MSR 480def : WriteRes<WriteSys, []> { let Latency = 1; } 481 482 483// Branch Instructions 484// ----------------------------------------------------------------------------- 485 486// Branch, immed 487// Compare and branch 488def : SchedAlias<WriteBr, V1Write_1c_1B>; 489 490// Branch, register 491def : SchedAlias<WriteBrReg, V1Write_1c_1B>; 492 493// Branch and link, immed 494// Branch and link, register 495def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>; 496 497// Compare and branch 498def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>; 499 500 501// Arithmetic and Logical Instructions 502// ----------------------------------------------------------------------------- 503 504// ALU, basic 505// Conditional compare 506// Conditional select 507// Logical, basic 508// Address generation 509// Count leading 510// Reverse bits/bytes 511// Move immediate 512def : SchedAlias<WriteI, V1Write_1c_1I>; 513 514// ALU, basic, flagset 515def : InstRW<[V1Write_1c_1J], 516 (instregex "^(ADD|SUB)S[WX]r[ir]$", 517 "^(ADC|SBC)S[WX]r$", 518 "^ANDS[WX]ri$", 519 "^(AND|BIC)S[WX]rr$")>; 520 521// ALU, extend and shift 522def : SchedAlias<WriteIEReg, V1Write_2c_1M>; 523 524// Arithmetic, LSL shift, shift <= 4 525// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 526def V1WriteISReg : SchedWriteVariant< 527 [SchedVar<IsCheapLSL, [V1Write_1c_1I]>, 528 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 529def : SchedAlias<WriteISReg, V1WriteISReg>; 530 531// Arithmetic, flagset, LSL shift, shift <= 4 532// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 533def V1WriteISRegS : SchedWriteVariant< 534 [SchedVar<IsCheapLSL, [V1Write_1c_1J]>, 535 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 536def : InstRW<[V1WriteISRegS], 537 (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>; 538 539// Logical, shift, no flagset 540def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 541 542// Logical, shift, flagset 543def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>; 544 545// Flag manipulation instructions 546def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>; 547 548 549// Divide and multiply instructions 550// ----------------------------------------------------------------------------- 551 552// Divide 553def : SchedAlias<WriteID32, V1Write_12c5_1M0>; 554def : SchedAlias<WriteID64, V1Write_20c5_1M0>; 555 556// Multiply 557// Multiply accumulate 558// Multiply accumulate, long 559// Multiply long 560def V1WriteIM : SchedWriteVariant< 561 [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>, 562 SchedVar<NoSchedPred, [V1Write_2c_1M0]>]>; 563def : SchedAlias<WriteIM32, V1WriteIM>; 564def : SchedAlias<WriteIM64, V1WriteIM>; 565 566// Multiply high 567def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>; 568 569 570// Pointer Authentication Instructions (v8.3 PAC) 571// ----------------------------------------------------------------------------- 572 573// Authenticate data address 574// Authenticate instruction address 575// Compute pointer authentication code for data address 576// Compute pointer authentication code, using generic key 577// Compute pointer authentication code for instruction address 578def : InstRW<[V1Write_5c_1M0], (instregex "^AUT", 579 "^PAC")>; 580 581// Branch and link, register, with pointer authentication 582// Branch, register, with pointer authentication 583// Branch, return, with pointer authentication 584def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$", 585 "^E?RETA[AB]$")>; 586 587// Load register, with pointer authentication 588def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 589 590// Strip pointer authentication code 591def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>; 592 593 594// Miscellaneous data-processing instructions 595// ----------------------------------------------------------------------------- 596 597// Bitfield extract, one reg 598// Bitfield extract, two regs 599def V1WriteExtr : SchedWriteVariant< 600 [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>, 601 SchedVar<NoSchedPred, [V1Write_3c_1I_1M]>]>; 602def : SchedAlias<WriteExtr, V1WriteExtr>; 603 604// Bitfield move, basic 605// Variable shift 606def : SchedAlias<WriteIS, V1Write_1c_1I>; 607 608// Bitfield move, insert 609def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>; 610 611// Move immediate 612def : SchedAlias<WriteImm, V1Write_1c_1I>; 613 614 615// Load instructions 616// ----------------------------------------------------------------------------- 617 618// Load register, immed offset 619def : SchedAlias<WriteLD, V1Write_4c_1L>; 620 621// Load register, immed offset, index 622def : SchedAlias<WriteLDIdx, V1Write_4c_1L>; 623def : SchedAlias<WriteAdr, V1Write_1c_1I>; 624 625// Load pair, immed offset 626def : SchedAlias<WriteLDHi, V1Write_4c_1L>; 627def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>; 628def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z], 629 (instrs LDPWpost, LDPWpre)>; 630 631// Load pair, signed immed offset, signed words 632def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>; 633 634// Load pair, immed post or pre-index, signed words 635def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z], 636 (instrs LDPSWpost, LDPSWpre)>; 637 638 639// Store instructions 640// ----------------------------------------------------------------------------- 641 642// Store register, immed offset 643def : SchedAlias<WriteST, V1Write_1c_1L01_1D>; 644 645// Store register, immed offset, index 646def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>; 647 648// Store pair, immed offset 649def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>; 650 651 652// FP data processing instructions 653// ----------------------------------------------------------------------------- 654 655// FP absolute value 656// FP arithmetic 657// FP min/max 658// FP negate 659def : SchedAlias<WriteF, V1Write_2c_1V>; 660 661// FP compare 662def : SchedAlias<WriteFCmp, V1Write_2c_1V0>; 663 664// FP divide 665// FP square root 666def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>; 667 668// FP divide, H-form 669// FP square root, H-form 670def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>; 671 672// FP divide, S-form 673// FP square root, S-form 674def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>; 675 676// FP divide, D-form 677def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>; 678 679// FP square root, D-form 680def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>; 681 682// FP multiply 683def : SchedAlias<WriteFMul, V1Write_3c_1V>; 684 685// FP multiply accumulate 686def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 687 688// FP round to integral 689def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 690 "^FRINT(32|64)[XZ][SD]r$")>; 691 692// FP select 693def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>; 694 695 696// FP miscellaneous instructions 697// ----------------------------------------------------------------------------- 698 699// FP convert, from gen to vec reg 700def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 701 702// FP convert, from vec to gen reg 703def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 704 705// FP convert, Javascript from vec to gen reg 706def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>; 707 708// FP convert, from vec to vec reg 709def : SchedAlias<WriteFCvt, V1Write_3c_1V02>; 710 711// FP move, immed 712def : SchedAlias<WriteFImm, V1Write_2c_1V>; 713 714// FP move, register 715def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; 716 717// FP transfer, from gen to low half of vec reg 718def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 719 720// FP transfer, from gen to high half of vec reg 721def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>; 722 723// FP transfer, from vec to gen reg 724def : SchedAlias<WriteFCopy, V1Write_2c_1V1>; 725 726 727// FP load instructions 728// ----------------------------------------------------------------------------- 729 730// Load vector reg, literal, S/D/Q forms 731// Load vector reg, unscaled immed 732// Load vector reg, unsigned immed 733def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", 734 "^LDUR[BHSDQ]i$", 735 "^LDR[BHSDQ]ui$")>; 736 737// Load vector reg, immed post-index 738// Load vector reg, immed pre-index 739def : InstRW<[WriteAdr, V1Write_6c_1L], 740 (instregex "^LDR[BHSDQ](post|pre)$")>; 741 742// Load vector reg, register offset, basic 743// Load vector reg, register offset, scale, S/D-form 744// Load vector reg, register offset, extend 745// Load vector reg, register offset, extend, scale, S/D-form 746def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 747 748// Load vector reg, register offset, scale, H/Q-form 749// Load vector reg, register offset, extend, scale, H/Q-form 750def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 751 752// Load vector pair, immed offset, S/D-form 753def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>; 754 755// Load vector pair, immed offset, Q-form 756def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 757 758// Load vector pair, immed post-index, S/D-form 759// Load vector pair, immed pre-index, S/D-form 760def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z], 761 (instregex "^LDP[SD](pre|post)$")>; 762 763// Load vector pair, immed post-index, Q-form 764// Load vector pair, immed pre-index, Q-form 765def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi], 766 (instrs LDPQpost, LDPQpre)>; 767 768 769// FP store instructions 770// ----------------------------------------------------------------------------- 771 772// Store vector reg, unscaled immed, B/H/S/D/Q-form 773def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 774 775// Store vector reg, immed post-index, B/H/S/D/Q-form 776// Store vector reg, immed pre-index, B/H/S/D/Q-form 777def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 778 (instregex "^STR[BHSDQ](pre|post)$")>; 779 780// Store vector reg, unsigned immed, B/H/S/D/Q-form 781def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 782 783// Store vector reg, register offset, basic, B/S/D-form 784// Store vector reg, register offset, scale, B/S/D-form 785// Store vector reg, register offset, extend, B/S/D-form 786// Store vector reg, register offset, extend, scale, B/S/D-form 787def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase], 788 (instregex "^STR[BSD]ro[WX]$")>; 789 790// Store vector reg, register offset, basic, H/Q-form 791// Store vector reg, register offset, scale, H/Q-form 792// Store vector reg, register offset, extend, H/Q-form 793// Store vector reg, register offset, extend, scale, H/Q-form 794def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase], 795 (instregex "^STR[HQ]ro[WX]$")>; 796 797// Store vector pair, immed offset, S/D/Q-form 798def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>; 799 800// Store vector pair, immed post-index, S/D-form 801// Store vector pair, immed pre-index, S/D-form 802def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 803 (instregex "^STP[SD](pre|post)$")>; 804 805// Store vector pair, immed post-index, Q-form 806// Store vector pair, immed pre-index, Q-form 807def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>; 808 809 810// ASIMD integer instructions 811// ----------------------------------------------------------------------------- 812 813// ASIMD absolute diff 814// ASIMD absolute diff long 815// ASIMD arith, basic 816// ASIMD arith, complex 817// ASIMD arith, pair-wise 818// ASIMD compare 819// ASIMD logical 820// ASIMD max/min, basic and pair-wise 821def : SchedAlias<WriteVd, V1Write_2c_1V>; 822def : SchedAlias<WriteVq, V1Write_2c_1V>; 823 824// ASIMD absolute diff accum 825// ASIMD absolute diff accum long 826// ASIMD pairwise add and accumulate long 827def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>; 828 829// ASIMD arith, reduce, 4H/4S 830// ASIMD max/min, reduce, 4H/4S 831def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$", 832 "^[SU](MAX|MIN)Vv4(i16|i32)v$")>; 833 834// ASIMD arith, reduce, 8B/8H 835// ASIMD max/min, reduce, 8B/8H 836def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$", 837 "^[SU](MAX|MIN)Vv8(i8|i16)v$")>; 838 839// ASIMD arith, reduce, 16B 840// ASIMD max/min, reduce, 16B 841def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$", 842 "[SU](MAX|MIN)Vv16i8v$")>; 843 844// ASIMD dot product 845// ASIMD dot product using signed and unsigned integers 846def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>; 847 848// ASIMD matrix multiply- accumulate 849def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>; 850 851// ASIMD multiply 852// ASIMD multiply accumulate 853// ASIMD multiply accumulate long 854// ASIMD multiply accumulate high 855// ASIMD multiply accumulate saturating long 856def : InstRW<[V1Write_4c_1V02], 857 (instregex "^MUL(v[148]i16|v[124]i32)$", 858 "^SQR?DMULH(v[48]i16|v[24]i32)$", 859 "^ML[AS](v[148]i16|v[124]i32)$", 860 "^[SU]ML[AS]Lv", 861 "^SQRDML[AS]H(v[148]i16|v[124]i32)$", 862 "^SQDML[AS]Lv")>; 863 864// ASIMD multiply/multiply long (8x8) polynomial 865def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>; 866 867// ASIMD multiply long 868def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>; 869 870// ASIMD shift accumulate 871// ASIMD shift by immed, complex 872// ASIMD shift by register, complex 873def : InstRW<[V1Write_4c_1V13], 874 (instregex "^[SU]R?SRAv", 875 "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$", 876 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 877 "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 878 "^[SU]Q?RSHLv", "^[SU]QSHLv")>; 879 880// ASIMD shift by immed, basic 881// ASIMD shift by immed and insert, basic 882// ASIMD shift by register, basic 883def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv", 884 "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>; 885 886 887// ASIMD FP instructions 888// ----------------------------------------------------------------------------- 889 890// ASIMD FP absolute value/difference 891// ASIMD FP arith, normal 892// ASIMD FP compare 893// ASIMD FP complex add 894// ASIMD FP max/min, normal 895// ASIMD FP max/min, pairwise 896// ASIMD FP negate 897// Covered by "SchedAlias (WriteV[dq]...)" above 898 899// ASIMD FP complex multiply add 900// ASIMD FP multiply accumulate 901def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$", 902 "^FML[AS]v")>; 903 904// ASIMD FP convert, long (F16 to F32) 905def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>; 906 907// ASIMD FP convert, long (F32 to F64) 908def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>; 909 910// ASIMD FP convert, narrow (F32 to F16) 911def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>; 912 913// ASIMD FP convert, narrow (F64 to F32) 914def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$", 915 "^FCVTXN(v[24]f32|v1i64)$")>; 916 917// ASIMD FP convert, other, D-form F32 and Q-form F64 918def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 919 "^[SU]CVTFv2f(32|64)$")>; 920 921// ASIMD FP convert, other, D-form F16 and Q-form F32 922def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 923 "^[SU]CVTFv4f(16|32)$")>; 924 925// ASIMD FP convert, other, Q-form F16 926def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 927 "^[SU]CVTFv8f16$")>; 928 929// ASIMD FP divide, D-form, F16 930// ASIMD FP square root, D-form, F16 931def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>; 932 933// ASIMD FP divide, F32 934// ASIMD FP square root, F32 935def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32, 936 FSQRTv2f32, FSQRTv4f32)>; 937 938// ASIMD FP divide, Q-form, F16 939def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>; 940 941// ASIMD FP divide, Q-form, F64 942def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>; 943 944// ASIMD FP square root, Q-form, F16 945def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>; 946 947// ASIMD FP square root, Q-form, F64 948def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>; 949 950// ASIMD FP max/min, reduce, F32 and D-form F16 951def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>; 952 953// ASIMD FP max/min, reduce, Q-form F16 954def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>; 955 956// ASIMD FP multiply 957def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>; 958 959// ASIMD FP multiply accumulate long 960def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>; 961 962// ASIMD FP round, D-form F32 and Q-form F64 963def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>; 964 965// ASIMD FP round, D-form F16 and Q-form F32 966def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>; 967 968// ASIMD FP round, Q-form F16 969def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 970 971 972// ASIMD BF instructions 973// ----------------------------------------------------------------------------- 974 975// ASIMD convert, F32 to BF16 976def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>; 977 978// ASIMD dot product 979def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>; 980 981// ASIMD matrix multiply accumulate 982def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>; 983 984// ASIMD multiply accumulate long 985def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>; 986 987// Scalar convert, F32 to BF16 988def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>; 989 990 991// ASIMD miscellaneous instructions 992// ----------------------------------------------------------------------------- 993 994// ASIMD bit reverse 995// ASIMD bitwise insert 996// ASIMD count 997// ASIMD duplicate, element 998// ASIMD extract 999// ASIMD extract narrow 1000// ASIMD insert, element to element 1001// ASIMD move, FP immed 1002// ASIMD move, integer immed 1003// ASIMD reverse 1004// ASIMD table lookup, 1 or 2 table regs 1005// ASIMD table lookup extension, 1 table reg 1006// ASIMD transfer, element to gen reg 1007// ASIMD transpose 1008// ASIMD unzip/zip 1009// Covered by "SchedAlias (WriteV[dq]...)" above 1010 1011// ASIMD duplicate, gen reg 1012def : InstRW<[V1Write_3c_1M0], 1013 (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>; 1014 1015// ASIMD extract narrow, saturating 1016def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1017 1018// ASIMD reciprocal and square root estimate, D-form U32 1019// ASIMD reciprocal and square root estimate, D-form F32 and F64 1020def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32, 1021 URSQRTEv2i32, 1022 FRECPEv1i32, FRECPEv2f32, FRECPEv1i64, 1023 FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>; 1024 1025// ASIMD reciprocal and square root estimate, Q-form U32 1026// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64 1027def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32, 1028 URSQRTEv4i32, 1029 FRECPEv1f16, FRECPEv4f16, 1030 FRECPEv4f32, FRECPEv2f64, 1031 FRSQRTEv1f16, FRSQRTEv4f16, 1032 FRSQRTEv4f32, FRSQRTEv2f64)>; 1033 1034// ASIMD reciprocal and square root estimate, Q-form F16 1035def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16, 1036 FRSQRTEv8f16)>; 1037 1038// ASIMD reciprocal exponent 1039def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>; 1040 1041// ASIMD reciprocal step 1042def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv", 1043 "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>; 1044 1045// ASIMD table lookup, 1 or 2 table regs 1046// ASIMD table lookup extension, 1 table reg 1047def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$", 1048 "^TBXv(8|16)i8One$")>; 1049 1050// ASIMD table lookup, 3 table regs 1051// ASIMD table lookup extension, 2 table reg 1052def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three, 1053 TBXv8i8Two, TBXv16i8Two)>; 1054 1055// ASIMD table lookup, 4 table regs 1056def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1057 1058// ASIMD table lookup extension, 3 table reg 1059def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>; 1060 1061// ASIMD table lookup extension, 4 table reg 1062def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>; 1063 1064// ASIMD transfer, element to gen reg 1065def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$", 1066 "^UMOVvi(8|16|32|64)$")>; 1067 1068// ASIMD transfer, gen reg to element 1069def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1070 1071 1072// ASIMD load instructions 1073// ----------------------------------------------------------------------------- 1074 1075// ASIMD load, 1 element, multiple, 1 reg 1076def : InstRW<[V1Write_6c_1L], 1077 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1078def : InstRW<[WriteAdr, V1Write_6c_1L], 1079 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1080 1081// ASIMD load, 1 element, multiple, 2 reg 1082def : InstRW<[V1Write_6c_2L], 1083 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1084def : InstRW<[WriteAdr, V1Write_6c_2L], 1085 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1086 1087// ASIMD load, 1 element, multiple, 3 reg 1088def : InstRW<[V1Write_6c_3L], 1089 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1090def : InstRW<[WriteAdr, V1Write_6c_3L], 1091 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1092 1093// ASIMD load, 1 element, multiple, 4 reg, D-form 1094def : InstRW<[V1Write_6c_2L], 1095 (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1096def : InstRW<[WriteAdr, V1Write_6c_2L], 1097 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1098 1099// ASIMD load, 1 element, multiple, 4 reg, Q-form 1100def : InstRW<[V1Write_7c_4L], 1101 (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1102def : InstRW<[WriteAdr, V1Write_7c_4L], 1103 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1104 1105// ASIMD load, 1 element, one lane 1106// ASIMD load, 1 element, all lanes 1107def : InstRW<[V1Write_8c_1L_1V], 1108 (instregex "^LD1(i|Rv)(8|16|32|64)$", 1109 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1110def : InstRW<[WriteAdr, V1Write_8c_1L_1V], 1111 (instregex "^LD1i(8|16|32|64)_POST$", 1112 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1113 1114// ASIMD load, 2 element, multiple, D-form 1115def : InstRW<[V1Write_8c_1L_2V], 1116 (instregex "^LD2Twov(8b|4h|2s)$")>; 1117def : InstRW<[WriteAdr, V1Write_8c_1L_2V], 1118 (instregex "^LD2Twov(8b|4h|2s)_POST$")>; 1119 1120// ASIMD load, 2 element, multiple, Q-form 1121def : InstRW<[V1Write_8c_2L_2V], 1122 (instregex "^LD2Twov(16b|8h|4s|2d)$")>; 1123def : InstRW<[WriteAdr, V1Write_8c_2L_2V], 1124 (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; 1125 1126// ASIMD load, 2 element, one lane 1127// ASIMD load, 2 element, all lanes 1128def : InstRW<[V1Write_8c_1L_2V], 1129 (instregex "^LD2i(8|16|32|64)$", 1130 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1131def : InstRW<[WriteAdr, V1Write_8c_1L_2V], 1132 (instregex "^LD2i(8|16|32|64)_POST$", 1133 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1134 1135// ASIMD load, 3 element, multiple, D-form 1136// ASIMD load, 3 element, one lane 1137// ASIMD load, 3 element, all lanes 1138def : InstRW<[V1Write_8c_2L_3V], 1139 (instregex "^LD3Threev(8b|4h|2s)$", 1140 "^LD3i(8|16|32|64)$", 1141 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1142def : InstRW<[WriteAdr, V1Write_8c_2L_3V], 1143 (instregex "^LD3Threev(8b|4h|2s)_POST$", 1144 "^LD3i(8|16|32|64)_POST$", 1145 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1146 1147// ASIMD load, 3 element, multiple, Q-form 1148def : InstRW<[V1Write_8c_3L_3V], 1149 (instregex "^LD3Threev(16b|8h|4s|2d)$")>; 1150def : InstRW<[WriteAdr, V1Write_8c_3L_3V], 1151 (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; 1152 1153// ASIMD load, 4 element, multiple, D-form 1154// ASIMD load, 4 element, one lane 1155// ASIMD load, 4 element, all lanes 1156def : InstRW<[V1Write_8c_3L_4V], 1157 (instregex "^LD4Fourv(8b|4h|2s)$", 1158 "^LD4i(8|16|32|64)$", 1159 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1160def : InstRW<[WriteAdr, V1Write_8c_3L_4V], 1161 (instregex "^LD4Fourv(8b|4h|2s)_POST$", 1162 "^LD4i(8|16|32|64)_POST$", 1163 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1164 1165// ASIMD load, 4 element, multiple, Q-form 1166def : InstRW<[V1Write_9c_4L_4V], 1167 (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; 1168def : InstRW<[WriteAdr, V1Write_9c_4L_4V], 1169 (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; 1170 1171 1172// ASIMD store instructions 1173// ----------------------------------------------------------------------------- 1174 1175// ASIMD store, 1 element, multiple, 1 reg 1176// ASIMD store, 1 element, multiple, 2 reg, D-form 1177def : InstRW<[V1Write_2c_1L01_1V01], 1178 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$", 1179 "^ST1Twov(8b|4h|2s|1d)$")>; 1180def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 1181 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$", 1182 "^ST1Twov(8b|4h|2s|1d)_POST$")>; 1183 1184// ASIMD store, 1 element, multiple, 2 reg, Q-form 1185// ASIMD store, 1 element, multiple, 3 reg, D-form 1186// ASIMD store, 1 element, multiple, 4 reg, D-form 1187def : InstRW<[V1Write_2c_2L01_2V01], 1188 (instregex "^ST1Twov(16b|8h|4s|2d)$", 1189 "^ST1Threev(8b|4h|2s|1d)$", 1190 "^ST1Fourv(8b|4h|2s|1d)$")>; 1191def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01], 1192 (instregex "^ST1Twov(16b|8h|4s|2d)_POST$", 1193 "^ST1Threev(8b|4h|2s|1d)_POST$", 1194 "^ST1Fourv(8b|4h|2s|1d)_POST$")>; 1195 1196// ASIMD store, 1 element, multiple, 3 reg, Q-form 1197def : InstRW<[V1Write_2c_3L01_3V01], 1198 (instregex "^ST1Threev(16b|8h|4s|2d)$")>; 1199def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01], 1200 (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; 1201 1202// ASIMD store, 1 element, multiple, 4 reg, Q-form 1203def : InstRW<[V1Write_2c_4L01_4V01], 1204 (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; 1205def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01], 1206 (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; 1207 1208// ASIMD store, 1 element, one lane 1209// ASIMD store, 2 element, multiple, D-form 1210// ASIMD store, 2 element, one lane 1211def : InstRW<[V1Write_4c_1L01_1V01], 1212 (instregex "^ST1i(8|16|32|64)$", 1213 "^ST2Twov(8b|4h|2s)$", 1214 "^ST2i(8|16|32|64)$")>; 1215def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01], 1216 (instregex "^ST1i(8|16|32|64)_POST$", 1217 "^ST2Twov(8b|4h|2s)_POST$", 1218 "^ST2i(8|16|32|64)_POST$")>; 1219 1220// ASIMD store, 2 element, multiple, Q-form 1221// ASIMD store, 3 element, multiple, D-form 1222// ASIMD store, 3 element, one lane 1223// ASIMD store, 4 element, one lane, D 1224def : InstRW<[V1Write_4c_2L01_2V01], 1225 (instregex "^ST2Twov(16b|8h|4s|2d)$", 1226 "^ST3Threev(8b|4h|2s)$", 1227 "^ST3i(8|16|32|64)$", 1228 "^ST4i64$")>; 1229def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01], 1230 (instregex "^ST2Twov(16b|8h|4s|2d)_POST$", 1231 "^ST3Threev(8b|4h|2s)_POST$", 1232 "^ST3i(8|16|32|64)_POST$", 1233 "^ST4i64_POST$")>; 1234 1235// ASIMD store, 3 element, multiple, Q-form 1236def : InstRW<[V1Write_5c_3L01_3V01], 1237 (instregex "^ST3Threev(16b|8h|4s|2d)$")>; 1238def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01], 1239 (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; 1240 1241// ASIMD store, 4 element, multiple, D-form 1242def : InstRW<[V1Write_6c_3L01_3V01], 1243 (instregex "^ST4Fourv(8b|4h|2s)$")>; 1244def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01], 1245 (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; 1246 1247// ASIMD store, 4 element, multiple, Q-form, B/H/S 1248def : InstRW<[V1Write_7c_6L01_6V01], 1249 (instregex "^ST4Fourv(16b|8h|4s)$")>; 1250def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01], 1251 (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; 1252 1253// ASIMD store, 4 element, multiple, Q-form, D 1254def : InstRW<[V1Write_4c_4L01_4V01], 1255 (instrs ST4Fourv2d)>; 1256def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01], 1257 (instrs ST4Fourv2d_POST)>; 1258 1259// ASIMD store, 4 element, one lane, B/H/S 1260def : InstRW<[V1Write_6c_3L_3V], 1261 (instregex "^ST4i(8|16|32)$")>; 1262def : InstRW<[WriteAdr, V1Write_6c_3L_3V], 1263 (instregex "^ST4i(8|16|32)_POST$")>; 1264 1265 1266// Cryptography extensions 1267// ----------------------------------------------------------------------------- 1268 1269// Crypto polynomial (64x64) multiply long 1270// Covered by "SchedAlias (WriteV[dq]...)" above 1271 1272// Crypto AES ops 1273def V1WriteVC : WriteSequence<[V1Write_2c_1V]>; 1274def V1ReadVC : SchedReadAdvance<2, [V1WriteVC]>; 1275def : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>; 1276def : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>; 1277 1278// Crypto SHA1 hash acceleration op 1279// Crypto SHA1 schedule acceleration ops 1280// Crypto SHA256 schedule acceleration ops 1281// Crypto SHA512 hash acceleration ops 1282// Crypto SM3 ops 1283def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$", 1284 "^SHA256SU[01]rr$", 1285 "^SHA512(H2?|SU[01])$", 1286 "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>; 1287 1288// Crypto SHA1 hash acceleration ops 1289// Crypto SHA256 hash acceleration ops 1290// Crypto SM4 ops 1291def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$", 1292 "^SHA256H2?rrr$", 1293 "^SM4E(KEY)?$")>; 1294 1295// Crypto SHA3 ops 1296def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1297 1298 1299// CRC instruction 1300// ----------------------------------------------------------------------------- 1301 1302// CRC checksum ops 1303def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>; 1304 1305 1306// SVE Predicate instructions 1307// ----------------------------------------------------------------------------- 1308 1309// Loop control, based on predicate 1310def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>; 1311def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1312 1313// Loop control, based on predicate and flag setting 1314def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP, 1315 BRKPAS_PPzPP, BRKPBS_PPzPP)>; 1316 1317// Loop control, based on GPR 1318def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1319 1320// Loop terminate 1321def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1322 1323// Predicate counting scalar 1324// Predicate counting scalar, active predicate 1325def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1326def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$", 1327 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1328 "^UQ(DEC|INC)[BHWD]_WPiI$", 1329 "^CNTP_XPP_[BHSD]$", 1330 "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$", 1331 "^UQ(DEC|INC)P_WP_[BHSD]$", 1332 "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>; 1333 1334// Predicate counting vector, active predicate 1335def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>; 1336 1337// Predicate logical 1338def : InstRW<[V1Write_1c_1M0], 1339 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1340 1341// Predicate logical, flag setting 1342def : InstRW<[V1Write_2c_2M0], 1343 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>; 1344 1345// Predicate reverse 1346// Predicate set/initialize/find next 1347// Predicate transpose 1348// Predicate unpack and widen 1349// Predicate zip/unzip 1350def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$", 1351 "^PFALSE$", "^PFIRST_B$", 1352 "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$", 1353 "^TRN[12]_PPP_[BHSDQ]$", 1354 "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1355 1356// Predicate set/initialize/find next 1357// Predicate unpack and widen 1358def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP, 1359 PUNPKHI_PP, PUNPKLO_PP)>; 1360 1361// Predicate select 1362def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>; 1363 1364// Predicate set/initialize, set flags 1365def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; 1366 1367 1368 1369// SVE integer instructions 1370// ----------------------------------------------------------------------------- 1371 1372// Arithmetic, basic 1373// Logical 1374def : InstRW<[V1Write_2c_1V01], 1375 (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]$", 1376 "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]$", 1377 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1378 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1379 "^[SU]ABD_ZP[mZ]Z_[BHSD]$", 1380 "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]$", 1381 "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", 1382 "^SUBR_Z(I|P[mZ]Z)_[BHSD]$", 1383 "^(AND|EOR|ORR)_ZI$", 1384 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", 1385 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1386 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; 1387 1388// Arithmetic, shift 1389def : InstRW<[V1Write_2c_1V1], 1390 (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]", 1391 "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]", 1392 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 1393 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 1394 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 1395 1396// Arithmetic, shift right for divide 1397def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_ZP[mZ]I_[BHSD]$")>; 1398 1399// Count/reverse bits 1400def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; 1401 1402// Broadcast logical bitmask immediate to vector 1403def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; 1404 1405// Compare and set flags 1406def : InstRW<[V1Write_4c_1M0_1V0], 1407 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1408 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1409 1410// Conditional extract operations, scalar form 1411def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1412 1413// Conditional extract operations, SIMD&FP scalar and vector forms 1414def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1415 "^COMPACT_ZPZ_[SD]$", 1416 "^SPLICE_ZPZZ?_[BHSD]$")>; 1417 1418// Convert to floating point, 64b to float or convert to double 1419def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 1420 "^[SU]CVTF_ZPmZ_StoD")>; 1421 1422// Convert to floating point, 32b to single or half 1423def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; 1424 1425// Convert to floating point, 16b to half 1426def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; 1427 1428// Copy, scalar 1429def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; 1430 1431// Copy, scalar SIMD&FP or imm 1432def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; 1433 1434// Divides, 32 bit 1435def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; 1436 1437// Divides, 64 bit 1438def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; 1439 1440// Dot product, 8 bit 1441def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>; 1442 1443// Dot product, 8 bit, using signed and unsigned integers 1444def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>; 1445 1446// Dot product, 16 bit 1447def : InstRW<[V1Write_4c_1V01], (instregex "^[SU]DOT_ZZZI?_D$")>; 1448 1449// Duplicate, immediate and indexed form 1450def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", 1451 "^DUP_ZZI_[BHSDQ]$")>; 1452 1453// Duplicate, scalar form 1454def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1455 1456// Extend, sign or zero 1457def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", 1458 "^[SU]XTH_ZPmZ_[SD]$", 1459 "^[SU]XTW_ZPmZ_[D]$")>; 1460 1461// Extract 1462def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; 1463 1464// Extract/insert operation, SIMD and FP scalar form 1465def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1466 "^INSR_ZV_[BHSD]$")>; 1467 1468// Extract/insert operation, scalar 1469def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1470 "^INSR_ZR_[BHSD]$")>; 1471 1472// Horizontal operations, B, H, S form, imm, imm 1473def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>; 1474 1475// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar 1476def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1477 1478// Horizontal operations, D form, imm, imm 1479def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>; 1480 1481// Horizontal operations, D form, scalar, imm / scalar / imm, scalar 1482def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1483 1484// Move prefix 1485def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1486 "^MOVPRFX_ZZ$")>; 1487 1488// Matrix multiply-accumulate 1489def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1490 1491// Multiply, B, H, S element size 1492def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ)_[BHS]$", 1493 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; 1494 1495// Multiply, D element size 1496// Multiply accumulate, D element size 1497def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ)_D$", 1498 "^[SU]MULH_ZPmZ_D$", 1499 "^(MLA|MLS|MAD|MSB)_ZPmZZ_D$")>; 1500 1501// Multiply accumulate, B, H, S element size 1502// NOTE: This is not specified in the SOG. 1503def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; 1504 1505// Predicate counting vector 1506def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; 1507 1508// Reduction, arithmetic, B form 1509def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13], 1510 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1511 1512// Reduction, arithmetic, H form 1513def : InstRW<[V1Write_12c_1V_1V01_2V1], 1514 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1515 1516// Reduction, arithmetic, S form 1517def : InstRW<[V1Write_10c_1V_1V01_2V1], 1518 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1519 1520// Reduction, arithmetic, D form 1521def : InstRW<[V1Write_8c_1V_1V01], 1522 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1523 1524// Reduction, logical 1525def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; 1526 1527// Reverse, vector 1528def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$", 1529 "^REVB_ZPmZ_[HSD]$", 1530 "^REVH_ZPmZ_[SD]$", 1531 "^REVW_ZPmZ_D$")>; 1532 1533// Select, vector form 1534// Table lookup 1535// Table lookup extension 1536// Transpose, vector form 1537// Unpack and extend 1538// Zip/unzip 1539def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$", 1540 "^TB[LX]_ZZZ_[BHSD]$", 1541 "^TRN[12]_ZZZ_[BHSDQ]$", 1542 "^[SU]UNPK(HI|LO)_ZZ_[HSD]$", 1543 "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1544 1545 1546// SVE floating-point instructions 1547// ----------------------------------------------------------------------------- 1548 1549// Floating point absolute value/difference 1550// Floating point arithmetic 1551def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]$", 1552 "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", 1553 "^FADDP_ZPmZZ_[HSD]$", 1554 "^FNEG_ZPmZ_[HSD]$", 1555 "^FSUBR_ZPm[IZ]_[HSD]$")>; 1556 1557// Floating point associative add, F16 1558def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; 1559 1560// Floating point associative add, F32 1561def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>; 1562 1563// Floating point associative add, F64 1564def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>; 1565 1566// Floating point compare 1567def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$", 1568 "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$", 1569 "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>; 1570 1571// Floating point complex add 1572def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1573 1574// Floating point complex multiply add 1575def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1576 "^FCMLA_ZZZI_[HS]$")>; 1577 1578// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1579// Floating point convert to integer, F32 1580def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", 1581 "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; 1582 1583// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) 1584// Floating point convert to integer, F64 1585def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", 1586 "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; 1587 1588// Floating point convert to integer, F16 1589def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; 1590 1591// Floating point copy 1592def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", 1593 "^FDUP_ZI_[HSD]$")>; 1594 1595// Floating point divide, F16 1596def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; 1597 1598// Floating point divide, F32 1599def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; 1600 1601// Floating point divide, F64 1602def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; 1603 1604// Floating point min/max 1605def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; 1606 1607// Floating point multiply 1608def : InstRW<[V1Write_3c_1V01], (instregex "^F(SCALE|MULX)_ZPmZ_[HSD]$", 1609 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; 1610 1611// Floating point multiply accumulate 1612// Floating point reciprocal step 1613def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", 1614 "^FML[AS]_ZZZI_[HSD]$", 1615 "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 1616 1617// Floating point reciprocal estimate, F16 1618def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>; 1619 1620// Floating point reciprocal estimate, F32 1621def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; 1622 1623// Floating point reciprocal estimate, F64 1624def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; 1625 1626// Floating point reciprocal exponent 1627def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]$")>; 1628 1629// Floating point reduction, F16 1630def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; 1631 1632// Floating point reduction, F32 1633def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>; 1634 1635// Floating point reduction, F64 1636def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; 1637 1638// Floating point round to integral, F16 1639def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; 1640 1641// Floating point round to integral, F32 1642def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; 1643 1644// Floating point round to integral, F64 1645def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; 1646 1647// Floating point square root, F16 1648def : InstRW<[V1Write_13c10_1V0], (instrs FSQRT_ZPmZ_H)>; 1649 1650// Floating point square root, F32 1651def : InstRW<[V1Write_10c7_1V0], (instrs FSQRT_ZPmZ_S)>; 1652 1653// Floating point square root, F64 1654def : InstRW<[V1Write_16c7_1V0], (instrs FSQRT_ZPmZ_D)>; 1655 1656// Floating point trigonometric 1657def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", 1658 "^FTMAD_ZZI_[HSD]$", 1659 "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 1660 1661 1662// SVE BFloat16 (BF16) instructions 1663// ----------------------------------------------------------------------------- 1664 1665// Convert, F32 to BF16 1666def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 1667 1668// Dot product 1669def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 1670 1671// Matrix multiply accumulate 1672def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>; 1673 1674// Multiply accumulate long 1675def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 1676 1677 1678// SVE Load instructions 1679// ----------------------------------------------------------------------------- 1680 1681// Load vector 1682def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>; 1683 1684// Load predicate 1685def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>; 1686 1687// Contiguous load, scalar + imm 1688// Contiguous load, scalar + scalar 1689// Contiguous load broadcast, scalar + imm 1690// Contiguous load broadcast, scalar + scalar 1691def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$", 1692 "^LD1S?B_[HSD]_IMM$", 1693 "^LD1S?H_[SD]_IMM$", 1694 "^LD1S?W_D_IMM$", 1695 "^LD1[BWD]$", 1696 "^LD1S?B_[HSD]$", 1697 "^LD1S?W_D$", 1698 "^LD1R[BHWD]_IMM$", 1699 "^LD1RSW_IMM$", 1700 "^LD1RS?B_[HSD]_IMM$", 1701 "^LD1RS?H_[SD]_IMM$", 1702 "^LD1RS?W_D_IMM$", 1703 "^LD1RQ_[BHWD]_IMM$", 1704 "^LD1RQ_[BWD]$")>; 1705def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$", 1706 "^LD1S?H_[SD]$", 1707 "^LD1RQ_H$")>; 1708 1709// Non temporal load, scalar + imm 1710def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>; 1711 1712// Non temporal load, scalar + scalar 1713def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>; 1714def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>; 1715 1716// Contiguous first faulting load, scalar + scalar 1717def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H_REAL$", 1718 "^LDFF1S?H_[SD]_REAL$")>; 1719def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]_REAL$", 1720 "^LDFF1S?B_[HSD]_REAL$", 1721 "^LDFF1S?W_D_REAL$")>; 1722 1723// Contiguous non faulting load, scalar + imm 1724def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM_REAL$", 1725 "^LDNF1S?B_[HSD]_IMM_REAL$", 1726 "^LDNF1S?H_[SD]_IMM_REAL$", 1727 "^LDNF1S?W_D_IMM_REAL$")>; 1728 1729// Contiguous Load two structures to two vectors, scalar + imm 1730def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>; 1731 1732// Contiguous Load two structures to two vectors, scalar + scalar 1733def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>; 1734def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>; 1735 1736// Contiguous Load three structures to three vectors, scalar + imm 1737def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>; 1738 1739// Contiguous Load three structures to three vectors, scalar + scalar 1740def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>; 1741 1742// Contiguous Load four structures to four vectors, scalar + imm 1743def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>; 1744 1745// Contiguous Load four structures to four vectors, scalar + scalar 1746def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>; 1747 1748// Gather load, vector + imm, 32-bit element size 1749def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 1750 "^GLD(FF)?1W_IMM_REAL$")>; 1751 1752// Gather load, vector + imm, 64-bit element size 1753def : InstRW<[V1Write_9c_2L_2V], 1754 (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 1755 "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?(SCALED_)?REAL$", 1756 "^GLD(FF)?1D_IMM_REAL$", 1757 "^GLD(FF)?1D_([SU]XTW_)?(SCALED_)?REAL$")>; 1758 1759// Gather load, 32-bit scaled offset 1760def : InstRW<[V1Write_11c_2L_2V], 1761 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", 1762 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 1763 1764// Gather load, 32-bit unpacked unscaled offset 1765def : InstRW<[V1Write_9c_1L_1V], 1766 (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 1767 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 1768 1769// Prefetch 1770// NOTE: This is not specified in the SOG. 1771def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>; 1772 1773 1774// SVE Store instructions 1775// ----------------------------------------------------------------------------- 1776 1777// Store from predicate reg 1778def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>; 1779 1780// Store from vector reg 1781def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>; 1782 1783// Contiguous store, scalar + imm 1784// Contiguous store, scalar + scalar 1785def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 1786 "^ST1B_[HSD]_IMM$", 1787 "^ST1H_[SD]_IMM$", 1788 "^ST1W_D_IMM$", 1789 "^ST1[BWD]$", 1790 "^ST1B_[HSD]$", 1791 "^ST1W_D$")>; 1792def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 1793 1794// Contiguous store two structures from two vectors, scalar + imm 1795// Contiguous store two structures from two vectors, scalar + scalar 1796def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$", 1797 "^ST2[BWD]$")>; 1798def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>; 1799 1800// Contiguous store three structures from three vectors, scalar + imm 1801def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 1802 1803// Contiguous store three structures from three vectors, scalar + scalar 1804def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>; 1805 1806// Contiguous store four structures from four vectors, scalar + imm 1807def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 1808 1809// Contiguous store four structures from four vectors, scalar + scalar 1810def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>; 1811 1812// Non temporal store, scalar + imm 1813// Non temporal store, scalar + scalar 1814def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$", 1815 "^STNT1[BWD]_ZRR$")>; 1816def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; 1817 1818// Scatter store vector + imm 32-bit element size 1819// Scatter store, 32-bit scaled offset 1820// Scatter store, 32-bit unscaled offset 1821def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 1822 "^SST1W_IMM$", 1823 "^SST1(H_S|W)_[SU]XTW_SCALED$", 1824 "^SST1[BH]_S_[SU]XTW$", 1825 "^SST1W_[SU]XTW$")>; 1826 1827// Scatter store, 32-bit unpacked unscaled offset 1828// Scatter store, 32-bit unpacked scaled offset 1829def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 1830 "^SST1D_[SU]XTW$", 1831 "^SST1[HW]_D_[SU]XTW_SCALED$", 1832 "^SST1D_[SU]XTW_SCALED$")>; 1833 1834// Scatter store vector + imm 64-bit element size 1835// Scatter store, 64-bit scaled offset 1836// Scatter store, 64-bit unscaled offset 1837def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 1838 "^SST1D_IMM$", 1839 "^SST1[HW]_D_SCALED$", 1840 "^SST1D_SCALED$", 1841 "^SST1[BHW]_D$", 1842 "^SST1D$")>; 1843 1844 1845// SVE Miscellaneous instructions 1846// ----------------------------------------------------------------------------- 1847 1848// Read first fault register, unpredicated 1849// Set first fault register 1850// Write to first fault register 1851def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P_REAL, 1852 SETFFR, 1853 WRFFR)>; 1854 1855// Read first fault register, predicated 1856def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz_REAL)>; 1857 1858// Read first fault register and set flags 1859def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>; 1860 1861 1862} 1863