1//=- AArch64SchedNeoverseV1.td - NeoverseV1 Scheduling Model -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V1 processors. 10// 11// References: 12// - "Arm Neoverse V1 Software Optimization Guide" 13// - "Arm Neoverse V1 Platform: Unleashing a new performance tier for Arm-based computing" 14// https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/neoverse-v1-platform-a-new-performance-tier-for-arm 15// - "Neoverse V1" 16// https://en.wikichip.org/wiki/arm_holdings/microarchitectures/neoverse_v1 17 18// 19//===----------------------------------------------------------------------===// 20 21def NeoverseV1Model : SchedMachineModel { 22 let IssueWidth = 15; // Maximum micro-ops dispatch rate. 23 let MicroOpBufferSize = 256; // Micro-op re-order buffer. 24 let LoadLatency = 4; // Optimistic load latency. 25 let MispredictPenalty = 11; // Cycles cost of branch mispredicted. 26 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 27 let CompleteModel = 1; 28 29 list<Predicate> UnsupportedFeatures = !listconcat(SVE2Unsupported.F, 30 SMEUnsupported.F, 31 [HasMTE, HasCPA, 32 HasCSSC]); 33} 34 35//===----------------------------------------------------------------------===// 36// Define each kind of processor resource and number available on Neoverse V1. 37// Instructions are first fetched and then decoded into internal macro-ops 38// (MOPs). From there, the MOPs proceed through register renaming and dispatch 39// stages. A MOP can be split into one or more micro-ops further down the 40// pipeline, after the decode stage. Once dispatched, micro-ops wait for their 41// operands and issue out-of-order to one of the issue pipelines. Each issue 42// pipeline can accept one micro-op per cycle. 43 44let SchedModel = NeoverseV1Model in { 45 46// Define the issue ports. 47def V1UnitB : ProcResource<2>; // Branch 0/1 48def V1UnitS : ProcResource<2>; // Integer single cycle 0/1 49def V1UnitM0 : ProcResource<1>; // Integer multicycle 0 50def V1UnitM1 : ProcResource<1>; // Integer multicycle 1 51def V1UnitL01 : ProcResource<2>; // Load/Store 0/1 52def V1UnitL2 : ProcResource<1>; // Load 2 53def V1UnitD : ProcResource<2>; // Store data 0/1 54def V1UnitV0 : ProcResource<1>; // FP/ASIMD 0 55def V1UnitV1 : ProcResource<1>; // FP/ASIMD 1 56def V1UnitV2 : ProcResource<1>; // FP/ASIMD 2 57def V1UnitV3 : ProcResource<1>; // FP/ASIMD 3 58 59def V1UnitI : ProcResGroup<[V1UnitS, 60 V1UnitM0, V1UnitM1]>; // Integer units 61def V1UnitJ : ProcResGroup<[V1UnitS, V1UnitM0]>; // Integer 0-2 units 62def V1UnitM : ProcResGroup<[V1UnitM0, V1UnitM1]>; // Integer multicycle units 63def V1UnitL : ProcResGroup<[V1UnitL01, V1UnitL2]>; // Load units 64def V1UnitV : ProcResGroup<[V1UnitV0, V1UnitV1, 65 V1UnitV2, V1UnitV3]>; // FP/ASIMD units 66def V1UnitV01 : ProcResGroup<[V1UnitV0, V1UnitV1]>; // FP/ASIMD 0/1 units 67def V1UnitV02 : ProcResGroup<[V1UnitV0, V1UnitV2]>; // FP/ASIMD 0/2 units 68def V1UnitV13 : ProcResGroup<[V1UnitV1, V1UnitV3]>; // FP/ASIMD 1/3 units 69 70// Define commonly used read types. 71 72// No generic forwarding is provided for these types. 73def : ReadAdvance<ReadI, 0>; 74def : ReadAdvance<ReadISReg, 0>; 75def : ReadAdvance<ReadIEReg, 0>; 76def : ReadAdvance<ReadIM, 0>; 77def : ReadAdvance<ReadIMA, 0>; 78def : ReadAdvance<ReadID, 0>; 79def : ReadAdvance<ReadExtrHi, 0>; 80def : ReadAdvance<ReadAdrBase, 0>; 81def : ReadAdvance<ReadST, 0>; 82def : ReadAdvance<ReadVLD, 0>; 83 84def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 85def : WriteRes<WriteBarrier, []> { let Latency = 1; } 86def : WriteRes<WriteHint, []> { let Latency = 1; } 87 88 89//===----------------------------------------------------------------------===// 90// Define generic 0 micro-op types 91 92let Latency = 0, NumMicroOps = 0 in 93def V1Write_0c_0Z : SchedWriteRes<[]>; 94 95 96//===----------------------------------------------------------------------===// 97// Define generic 1 micro-op types 98 99def V1Write_1c_1B : SchedWriteRes<[V1UnitB]> { let Latency = 1; } 100def V1Write_1c_1I : SchedWriteRes<[V1UnitI]> { let Latency = 1; } 101def V1Write_1c_1J : SchedWriteRes<[V1UnitJ]> { let Latency = 1; } 102def V1Write_4c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 4; } 103def V1Write_6c_1L : SchedWriteRes<[V1UnitL]> { let Latency = 6; } 104def V1Write_1c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 1; } 105def V1Write_4c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 4; } 106def V1Write_6c_1L01 : SchedWriteRes<[V1UnitL01]> { let Latency = 6; } 107def V1Write_2c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 2; } 108def V1Write_3c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 3; } 109def V1Write_4c_1M : SchedWriteRes<[V1UnitM]> { let Latency = 4; } 110def V1Write_1c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 1; } 111def V1Write_2c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 2; } 112def V1Write_3c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 3; } 113def V1Write_5c_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 5; } 114def V1Write_12c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 12; 115 let ReleaseAtCycles = [5]; } 116def V1Write_20c5_1M0 : SchedWriteRes<[V1UnitM0]> { let Latency = 20; 117 let ReleaseAtCycles = [5]; } 118def V1Write_2c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 2; } 119def V1Write_3c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 3; } 120def V1Write_4c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 4; } 121def V1Write_5c_1V : SchedWriteRes<[V1UnitV]> { let Latency = 5; } 122def V1Write_2c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 2; } 123def V1Write_3c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 3; } 124def V1Write_4c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 4; } 125def V1Write_6c_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 6; } 126def V1Write_10c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 10; 127 let ReleaseAtCycles = [7]; } 128def V1Write_12c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 12; 129 let ReleaseAtCycles = [7]; } 130def V1Write_13c10_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 13; 131 let ReleaseAtCycles = [10]; } 132def V1Write_15c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 15; 133 let ReleaseAtCycles = [7]; } 134def V1Write_16c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 16; 135 let ReleaseAtCycles = [7]; } 136def V1Write_20c7_1V0 : SchedWriteRes<[V1UnitV0]> { let Latency = 20; 137 let ReleaseAtCycles = [7]; } 138def V1Write_2c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 2; } 139def V1Write_3c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 3; } 140def V1Write_4c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 4; } 141def V1Write_5c_1V01 : SchedWriteRes<[V1UnitV01]> { let Latency = 5; } 142def V1Write_3c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 3; } 143def V1Write_4c_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 4; } 144def V1Write_7c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 7; 145 let ReleaseAtCycles = [7]; } 146def V1Write_10c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 10; 147 let ReleaseAtCycles = [7]; } 148def V1Write_13c5_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 149 let ReleaseAtCycles = [5]; } 150def V1Write_13c11_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 13; 151 let ReleaseAtCycles = [11]; } 152def V1Write_15c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 15; 153 let ReleaseAtCycles = [7]; } 154def V1Write_16c7_1V02 : SchedWriteRes<[V1UnitV02]> { let Latency = 16; 155 let ReleaseAtCycles = [7]; } 156def V1Write_2c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 2; } 157def V1Write_3c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 3; } 158def V1Write_4c_1V1 : SchedWriteRes<[V1UnitV1]> { let Latency = 4; } 159def V1Write_2c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 2; } 160def V1Write_4c_1V13 : SchedWriteRes<[V1UnitV13]> { let Latency = 4; } 161 162//===----------------------------------------------------------------------===// 163// Define generic 2 micro-op types 164 165let Latency = 1, NumMicroOps = 2 in 166def V1Write_1c_1B_1S : SchedWriteRes<[V1UnitB, V1UnitS]>; 167let Latency = 6, NumMicroOps = 2 in 168def V1Write_6c_1B_1M0 : SchedWriteRes<[V1UnitB, V1UnitM0]>; 169let Latency = 3, NumMicroOps = 2 in 170def V1Write_3c_1I_1M : SchedWriteRes<[V1UnitI, V1UnitM]>; 171let Latency = 5, NumMicroOps = 2 in 172def V1Write_5c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 173let Latency = 7, NumMicroOps = 2 in 174def V1Write_7c_1I_1L : SchedWriteRes<[V1UnitI, V1UnitL]>; 175let Latency = 6, NumMicroOps = 2 in 176def V1Write_6c_2L : SchedWriteRes<[V1UnitL, V1UnitL]>; 177let Latency = 6, NumMicroOps = 2 in 178def V1Write_6c_1L_1M : SchedWriteRes<[V1UnitL, V1UnitM]>; 179let Latency = 8, NumMicroOps = 2 in 180def V1Write_8c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 181let Latency = 9, NumMicroOps = 2 in 182def V1Write_9c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 183let Latency = 11, NumMicroOps = 2 in 184def V1Write_11c_1L_1V : SchedWriteRes<[V1UnitL, V1UnitV]>; 185let Latency = 1, NumMicroOps = 2 in 186def V1Write_1c_1L01_1D : SchedWriteRes<[V1UnitL01, V1UnitD]>; 187let Latency = 6, NumMicroOps = 2 in 188def V1Write_6c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 189let Latency = 7, NumMicroOps = 2 in 190def V1Write_7c_1L01_1S : SchedWriteRes<[V1UnitL01, V1UnitS]>; 191let Latency = 2, NumMicroOps = 2 in 192def V1Write_2c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 193let Latency = 4, NumMicroOps = 2 in 194def V1Write_4c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 195let Latency = 6, NumMicroOps = 2 in 196def V1Write_6c_1L01_1V : SchedWriteRes<[V1UnitL01, V1UnitV]>; 197let Latency = 2, NumMicroOps = 2 in 198def V1Write_2c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 199let Latency = 4, NumMicroOps = 2 in 200def V1Write_4c_1L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitV01]>; 201let Latency = 2, NumMicroOps = 2 in 202def V1Write_2c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 203let Latency = 3, NumMicroOps = 2 in 204def V1Write_3c_2M0 : SchedWriteRes<[V1UnitM0, V1UnitM0]>; 205let Latency = 9, NumMicroOps = 2 in 206def V1Write_9c_1M0_1L : SchedWriteRes<[V1UnitM0, V1UnitL]>; 207let Latency = 5, NumMicroOps = 2 in 208def V1Write_5c_1M0_1V : SchedWriteRes<[V1UnitM0, V1UnitV]>; 209let Latency = 4, NumMicroOps = 2 in 210def V1Write_4c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV0]>; 211let Latency = 7, NumMicroOps = 2 in 212def V1Write_7c_1M0_1V0 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 213let Latency = 5, NumMicroOps = 2 in 214def V1Write_5c_1M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitV01]>; 215let Latency = 6, NumMicroOps = 2 in 216def V1Write_6c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 217let Latency = 9, NumMicroOps = 2 in 218def V1Write_9c_1M0_1V1 : SchedWriteRes<[V1UnitM0, V1UnitV1]>; 219let Latency = 4, NumMicroOps = 2 in 220def V1Write_4c_2V : SchedWriteRes<[V1UnitV, V1UnitV]>; 221let Latency = 8, NumMicroOps = 2 in 222def V1Write_8c_1V_1V01 : SchedWriteRes<[V1UnitV, V1UnitV01]>; 223let Latency = 4, NumMicroOps = 2 in 224def V1Write_4c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 225let Latency = 5, NumMicroOps = 2 in 226def V1Write_5c_2V0 : SchedWriteRes<[V1UnitV0, V1UnitV0]>; 227let Latency = 2, NumMicroOps = 2 in 228def V1Write_2c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 229let Latency = 4, NumMicroOps = 2 in 230def V1Write_4c_2V01 : SchedWriteRes<[V1UnitV01, V1UnitV01]>; 231let Latency = 4, NumMicroOps = 2 in 232def V1Write_4c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 233let Latency = 6, NumMicroOps = 2 in 234def V1Write_6c_2V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 235let Latency = 4, NumMicroOps = 2 in 236def V1Write_4c_1V13_1V : SchedWriteRes<[V1UnitV13, V1UnitV]>; 237let Latency = 4, NumMicroOps = 2 in 238def V1Write_4c_2V13 : SchedWriteRes<[V1UnitV13, V1UnitV13]>; 239 240//===----------------------------------------------------------------------===// 241// Define generic 3 micro-op types 242 243let Latency = 2, NumMicroOps = 3 in 244def V1Write_2c_1I_1L01_1V01 : SchedWriteRes<[V1UnitI, V1UnitL01, V1UnitV01]>; 245let Latency = 7, NumMicroOps = 3 in 246def V1Write_7c_2M0_1V01 : SchedWriteRes<[V1UnitM0, V1UnitM0, V1UnitV01]>; 247let Latency = 8, NumMicroOps = 3 in 248def V1Write_8c_1L_2V : SchedWriteRes<[V1UnitL, V1UnitV, V1UnitV]>; 249let Latency = 6, NumMicroOps = 3 in 250def V1Write_6c_3L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL]>; 251let Latency = 2, NumMicroOps = 3 in 252def V1Write_2c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 253let Latency = 4, NumMicroOps = 3 in 254def V1Write_4c_1L01_1S_1V : SchedWriteRes<[V1UnitL01, V1UnitS, V1UnitV]>; 255let Latency = 2, NumMicroOps = 3 in 256def V1Write_2c_2L01_1V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitV01]>; 257let Latency = 6, NumMicroOps = 3 in 258def V1Write_6c_3V : SchedWriteRes<[V1UnitV, V1UnitV, V1UnitV]>; 259let Latency = 4, NumMicroOps = 3 in 260def V1Write_4c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 261let Latency = 6, NumMicroOps = 3 in 262def V1Write_6c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 263let Latency = 8, NumMicroOps = 3 in 264def V1Write_8c_3V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01]>; 265 266//===----------------------------------------------------------------------===// 267// Define generic 4 micro-op types 268 269let Latency = 8, NumMicroOps = 4 in 270def V1Write_8c_2M0_2V0 : SchedWriteRes<[V1UnitM0, V1UnitM0, 271 V1UnitV0, V1UnitV0]>; 272let Latency = 7, NumMicroOps = 4 in 273def V1Write_7c_4L : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, V1UnitL]>; 274let Latency = 8, NumMicroOps = 4 in 275def V1Write_8c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 276 V1UnitV, V1UnitV]>; 277let Latency = 9, NumMicroOps = 4 in 278def V1Write_9c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 279 V1UnitV, V1UnitV]>; 280let Latency = 11, NumMicroOps = 4 in 281def V1Write_11c_2L_2V : SchedWriteRes<[V1UnitL, V1UnitL, 282 V1UnitV, V1UnitV]>; 283let Latency = 10, NumMicroOps = 4 in 284def V1Write_10c_2L01_2V : SchedWriteRes<[V1UnitL01, V1UnitL01, 285 V1UnitV, V1UnitV]>; 286let Latency = 2, NumMicroOps = 4 in 287def V1Write_2c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 288 V1UnitV01, V1UnitV01]>; 289let Latency = 4, NumMicroOps = 4 in 290def V1Write_4c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 291 V1UnitV01, V1UnitV01]>; 292let Latency = 8, NumMicroOps = 4 in 293def V1Write_8c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 294 V1UnitV01, V1UnitV01]>; 295let Latency = 9, NumMicroOps = 4 in 296def V1Write_9c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 297 V1UnitV01, V1UnitV01]>; 298let Latency = 10, NumMicroOps = 4 in 299def V1Write_10c_2L01_2V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 300 V1UnitV01, V1UnitV01]>; 301let Latency = 10, NumMicroOps = 4 in 302def V1Write_10c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 303 V1UnitV1, V1UnitV1]>; 304let Latency = 12, NumMicroOps = 4 in 305def V1Write_12c_1V_1V01_2V1 : SchedWriteRes<[V1UnitV, V1UnitV01, 306 V1UnitV1, V1UnitV1]>; 307let Latency = 6, NumMicroOps = 4 in 308def V1Write_6c_4V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, 309 V1UnitV0, V1UnitV0]>; 310let Latency = 12, NumMicroOps = 4 in 311def V1Write_12c_4V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 312 V1UnitV01, V1UnitV01]>; 313let Latency = 6, NumMicroOps = 4 in 314def V1Write_6c_4V02 : SchedWriteRes<[V1UnitV02, V1UnitV02]>; 315 316//===----------------------------------------------------------------------===// 317// Define generic 5 micro-op types 318 319let Latency = 8, NumMicroOps = 5 in 320def V1Write_8c_2L_3V : SchedWriteRes<[V1UnitL, V1UnitL, 321 V1UnitV, V1UnitV, V1UnitV]>; 322let Latency = 14, NumMicroOps = 5 in 323def V1Write_14c_1V_1V0_2V1_1V13 : SchedWriteRes<[V1UnitV, 324 V1UnitV0, 325 V1UnitV1, V1UnitV1, 326 V1UnitV13]>; 327let Latency = 9, NumMicroOps = 5 in 328def V1Write_9c_1V_4V01 : SchedWriteRes<[V1UnitV, 329 V1UnitV01, V1UnitV01, 330 V1UnitV01, V1UnitV01]>; 331let Latency = 6, NumMicroOps = 5 in 332def V1Write_6c_5V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, 333 V1UnitV01, V1UnitV01, V1UnitV01]>; 334 335//===----------------------------------------------------------------------===// 336// Define generic 6 micro-op types 337 338let Latency = 6, NumMicroOps = 6 in 339def V1Write_6c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 340 V1UnitV, V1UnitV, V1UnitV]>; 341let Latency = 8, NumMicroOps = 6 in 342def V1Write_8c_3L_3V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 343 V1UnitV, V1UnitV, V1UnitV]>; 344let Latency = 2, NumMicroOps = 6 in 345def V1Write_2c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 346 V1UnitV01, V1UnitV01, V1UnitV01]>; 347let Latency = 5, NumMicroOps = 6 in 348def V1Write_5c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 349 V1UnitV01, V1UnitV01, V1UnitV01]>; 350let Latency = 6, NumMicroOps = 6 in 351def V1Write_6c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 352 V1UnitV01, V1UnitV01, V1UnitV01]>; 353let Latency = 11, NumMicroOps = 6 in 354def V1Write_11c_3L01_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 355 V1UnitV01, V1UnitV01, V1UnitV01]>; 356let Latency = 11, NumMicroOps = 6 in 357def V1Write_11c_1V_5V01 : SchedWriteRes<[V1UnitV, 358 V1UnitV01, V1UnitV01, 359 V1UnitV01, V1UnitV01, V1UnitV01]>; 360let Latency = 13, NumMicroOps = 6 in 361def V1Write_13c_6V01 : SchedWriteRes<[V1UnitV01, V1UnitV01, V1UnitV01, 362 V1UnitV01, V1UnitV01, V1UnitV01]>; 363 364//===----------------------------------------------------------------------===// 365// Define generic 7 micro-op types 366 367let Latency = 8, NumMicroOps = 7 in 368def V1Write_8c_3L_4V : SchedWriteRes<[V1UnitL, V1UnitL, V1UnitL, 369 V1UnitV, V1UnitV, V1UnitV, V1UnitV]>; 370let Latency = 8, NumMicroOps = 7 in 371def V1Write_13c_3L01_1S_3V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 372 V1UnitS, 373 V1UnitV01, V1UnitV01, V1UnitV01]>; 374 375//===----------------------------------------------------------------------===// 376// Define generic 8 micro-op types 377 378let Latency = 9, NumMicroOps = 8 in 379def V1Write_9c_4L_4V : SchedWriteRes<[V1UnitL, V1UnitL, 380 V1UnitL, V1UnitL, 381 V1UnitV, V1UnitV, 382 V1UnitV, V1UnitV]>; 383let Latency = 2, NumMicroOps = 8 in 384def V1Write_2c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 385 V1UnitL01, V1UnitL01, 386 V1UnitV01, V1UnitV01, 387 V1UnitV01, V1UnitV01]>; 388let Latency = 4, NumMicroOps = 8 in 389def V1Write_4c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 390 V1UnitL01, V1UnitL01, 391 V1UnitV01, V1UnitV01, 392 V1UnitV01, V1UnitV01]>; 393let Latency = 12, NumMicroOps = 8 in 394def V1Write_12c_4L01_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 395 V1UnitL01, V1UnitL01, 396 V1UnitV01, V1UnitV01, 397 V1UnitV01, V1UnitV01]>; 398 399//===----------------------------------------------------------------------===// 400// Define generic 10 micro-op types 401 402let Latency = 13, NumMicroOps = 10 in 403def V1Write_13c_4L01_2S_4V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, 404 V1UnitL01, V1UnitL01, 405 V1UnitS, V1UnitS, 406 V1UnitV01, V1UnitV01, 407 V1UnitV01, V1UnitV01]>; 408let Latency = 7, NumMicroOps = 10 in 409def V1Write_7c_5L01_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 410 V1UnitL01, V1UnitL01, V1UnitL01, 411 V1UnitV, V1UnitV, 412 V1UnitV, V1UnitV, V1UnitV]>; 413let Latency = 11, NumMicroOps = 10 in 414def V1Write_11c_10V0 : SchedWriteRes<[V1UnitV0, 415 V1UnitV0, V1UnitV0, V1UnitV0, 416 V1UnitV0, V1UnitV0, V1UnitV0, 417 V1UnitV0, V1UnitV0, V1UnitV0]>; 418 419//===----------------------------------------------------------------------===// 420// Define generic 12 micro-op types 421 422let Latency = 7, NumMicroOps = 12 in 423def V1Write_7c_6L01_6V01 : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 424 V1UnitL01, V1UnitL01, V1UnitL01, 425 V1UnitV01, V1UnitV01, V1UnitV01, 426 V1UnitV01, V1UnitV01, V1UnitV01]>; 427 428//===----------------------------------------------------------------------===// 429// Define generic 15 micro-op types 430 431let Latency = 7, NumMicroOps = 15 in 432def V1Write_7c_5L01_5S_5V : SchedWriteRes<[V1UnitL01, V1UnitL01, 433 V1UnitL01, V1UnitL01, V1UnitL01, 434 V1UnitS, V1UnitS, 435 V1UnitS, V1UnitS, V1UnitS, 436 V1UnitV, V1UnitV, 437 V1UnitV, V1UnitV, V1UnitV]>; 438 439 440//===----------------------------------------------------------------------===// 441// Define generic 18 micro-op types 442 443let Latency = 19, NumMicroOps = 18 in 444def V1Write_11c_9L01_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 445 V1UnitL01, V1UnitL01, V1UnitL01, 446 V1UnitL01, V1UnitL01, V1UnitL01, 447 V1UnitV, V1UnitV, V1UnitV, 448 V1UnitV, V1UnitV, V1UnitV, 449 V1UnitV, V1UnitV, V1UnitV]>; 450let Latency = 19, NumMicroOps = 18 in 451def V1Write_19c_18V0 : SchedWriteRes<[V1UnitV0, V1UnitV0, V1UnitV0, 452 V1UnitV0, V1UnitV0, V1UnitV0, 453 V1UnitV0, V1UnitV0, V1UnitV0, 454 V1UnitV0, V1UnitV0, V1UnitV0, 455 V1UnitV0, V1UnitV0, V1UnitV0, 456 V1UnitV0, V1UnitV0, V1UnitV0]>; 457 458//===----------------------------------------------------------------------===// 459// Define generic 27 micro-op types 460 461let Latency = 11, NumMicroOps = 27 in 462def V1Write_11c_9L01_9S_9V : SchedWriteRes<[V1UnitL01, V1UnitL01, V1UnitL01, 463 V1UnitL01, V1UnitL01, V1UnitL01, 464 V1UnitL01, V1UnitL01, V1UnitL01, 465 V1UnitS, V1UnitS, V1UnitS, 466 V1UnitS, V1UnitS, V1UnitS, 467 V1UnitS, V1UnitS, V1UnitS, 468 V1UnitV, V1UnitV, V1UnitV, 469 V1UnitV, V1UnitV, V1UnitV, 470 V1UnitV, V1UnitV, V1UnitV]>; 471 472 473// Miscellaneous Instructions 474// ----------------------------------------------------------------------------- 475 476// COPY 477def : InstRW<[V1Write_1c_1I], (instrs COPY)>; 478 479// MSR 480def : WriteRes<WriteSys, []> { let Latency = 1; } 481 482 483// Branch Instructions 484// ----------------------------------------------------------------------------- 485 486// Branch, immed 487// Compare and branch 488def : SchedAlias<WriteBr, V1Write_1c_1B>; 489 490// Branch, register 491def : SchedAlias<WriteBrReg, V1Write_1c_1B>; 492 493// Branch and link, immed 494// Branch and link, register 495def : InstRW<[V1Write_1c_1B_1S], (instrs BL, BLR)>; 496 497// Compare and branch 498def : InstRW<[V1Write_1c_1B], (instregex "^[CT]BN?Z[XW]$")>; 499 500 501// Arithmetic and Logical Instructions 502// ----------------------------------------------------------------------------- 503 504// ALU, basic 505// Conditional compare 506// Conditional select 507// Logical, basic 508// Address generation 509// Count leading 510// Reverse bits/bytes 511// Move immediate 512def : SchedAlias<WriteI, V1Write_1c_1I>; 513 514// ALU, basic, flagset 515def : InstRW<[V1Write_1c_1J], 516 (instregex "^(ADD|SUB)S[WX]r[ir]$", 517 "^(ADC|SBC)S[WX]r$", 518 "^ANDS[WX]ri$", 519 "^(AND|BIC)S[WX]rr$")>; 520 521// ALU, extend and shift 522def : SchedAlias<WriteIEReg, V1Write_2c_1M>; 523 524// Arithmetic, LSL shift, shift <= 4 525// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 526def V1WriteISReg : SchedWriteVariant< 527 [SchedVar<IsCheapLSL, [V1Write_1c_1I]>, 528 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 529def : SchedAlias<WriteISReg, V1WriteISReg>; 530 531// Arithmetic, flagset, LSL shift, shift <= 4 532// Arithmetic, flagset, LSR/ASR/ROR shift or LSL shift > 4 533def V1WriteISRegS : SchedWriteVariant< 534 [SchedVar<IsCheapLSL, [V1Write_1c_1J]>, 535 SchedVar<NoSchedPred, [V1Write_2c_1M]>]>; 536def : InstRW<[V1WriteISRegS], 537 (instregex "^(ADD|SUB)S(([WX]r[sx])|Xrx64)$")>; 538 539// Logical, shift, no flagset 540def : InstRW<[V1Write_1c_1I], (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 541 542// Logical, shift, flagset 543def : InstRW<[V1Write_2c_1M], (instregex "^(AND|BIC)S[WX]rs$")>; 544 545// Flag manipulation instructions 546def : InstRW<[V1Write_1c_1J], (instrs SETF8, SETF16, RMIF, CFINV)>; 547 548 549// Divide and multiply instructions 550// ----------------------------------------------------------------------------- 551 552// Divide 553def : SchedAlias<WriteID32, V1Write_12c5_1M0>; 554def : SchedAlias<WriteID64, V1Write_20c5_1M0>; 555 556// Multiply 557// Multiply accumulate 558// Multiply accumulate, long 559// Multiply long 560def V1WriteIM : SchedWriteVariant< 561 [SchedVar<NeoverseMULIdiomPred, [V1Write_2c_1M]>, 562 SchedVar<NoSchedPred, [V1Write_2c_1M0]>]>; 563def : SchedAlias<WriteIM32, V1WriteIM>; 564def : SchedAlias<WriteIM64, V1WriteIM>; 565 566// Multiply high 567def : InstRW<[V1Write_3c_1M, ReadIM, ReadIM], (instrs SMULHrr, UMULHrr)>; 568 569 570// Pointer Authentication Instructions (v8.3 PAC) 571// ----------------------------------------------------------------------------- 572 573// Authenticate data address 574// Authenticate instruction address 575// Compute pointer authentication code for data address 576// Compute pointer authentication code, using generic key 577// Compute pointer authentication code for instruction address 578def : InstRW<[V1Write_5c_1M0], (instregex "^AUT", 579 "^PAC")>; 580 581// Branch and link, register, with pointer authentication 582// Branch, register, with pointer authentication 583// Branch, return, with pointer authentication 584def : InstRW<[V1Write_6c_1B_1M0], (instregex "^BL?RA[AB]Z?$", 585 "^E?RETA[AB]$")>; 586 587// Load register, with pointer authentication 588def : InstRW<[V1Write_9c_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 589 590// Strip pointer authentication code 591def : InstRW<[V1Write_2c_1M0], (instrs XPACD, XPACI, XPACLRI)>; 592 593 594// Miscellaneous data-processing instructions 595// ----------------------------------------------------------------------------- 596 597// Bitfield extract, one reg 598// Bitfield extract, two regs 599def V1WriteExtr : SchedWriteVariant< 600 [SchedVar<IsRORImmIdiomPred, [V1Write_1c_1I]>, 601 SchedVar<NoSchedPred, [V1Write_3c_1I_1M]>]>; 602def : SchedAlias<WriteExtr, V1WriteExtr>; 603 604// Bitfield move, basic 605// Variable shift 606def : SchedAlias<WriteIS, V1Write_1c_1I>; 607 608// Bitfield move, insert 609def : InstRW<[V1Write_2c_1M], (instregex "^BFM[WX]ri$")>; 610 611// Move immediate 612def : SchedAlias<WriteImm, V1Write_1c_1I>; 613 614 615// Load instructions 616// ----------------------------------------------------------------------------- 617 618// Load register, immed offset 619def : SchedAlias<WriteLD, V1Write_4c_1L>; 620 621// Load register, immed offset, index 622def : SchedAlias<WriteLDIdx, V1Write_4c_1L>; 623def : SchedAlias<WriteAdr, V1Write_1c_1I>; 624 625// Load pair, immed offset 626def : SchedAlias<WriteLDHi, V1Write_4c_1L>; 627def : InstRW<[V1Write_4c_1L, V1Write_0c_0Z], (instrs LDPWi, LDNPWi)>; 628def : InstRW<[WriteAdr, V1Write_4c_1L, V1Write_0c_0Z], 629 (instrs LDPWpost, LDPWpre)>; 630 631// Load pair, signed immed offset, signed words 632def : InstRW<[V1Write_5c_1I_1L, V1Write_0c_0Z], (instrs LDPSWi)>; 633 634// Load pair, immed post or pre-index, signed words 635def : InstRW<[WriteAdr, V1Write_5c_1I_1L, V1Write_0c_0Z], 636 (instrs LDPSWpost, LDPSWpre)>; 637 638 639// Store instructions 640// ----------------------------------------------------------------------------- 641 642// Store register, immed offset 643def : SchedAlias<WriteST, V1Write_1c_1L01_1D>; 644 645// Store register, immed offset, index 646def : SchedAlias<WriteSTIdx, V1Write_1c_1L01_1D>; 647 648// Store pair, immed offset 649def : SchedAlias<WriteSTP, V1Write_1c_1L01_1D>; 650 651 652// FP data processing instructions 653// ----------------------------------------------------------------------------- 654 655// FP absolute value 656// FP arithmetic 657// FP min/max 658// FP negate 659def : SchedAlias<WriteF, V1Write_2c_1V>; 660 661// FP compare 662def : SchedAlias<WriteFCmp, V1Write_2c_1V0>; 663 664// FP divide 665// FP square root 666def : SchedAlias<WriteFDiv, V1Write_10c7_1V02>; 667 668// FP divide, H-form 669// FP square root, H-form 670def : InstRW<[V1Write_7c7_1V02], (instrs FDIVHrr, FSQRTHr)>; 671 672// FP divide, S-form 673// FP square root, S-form 674def : InstRW<[V1Write_10c7_1V02], (instrs FDIVSrr, FSQRTSr)>; 675 676// FP divide, D-form 677def : InstRW<[V1Write_15c7_1V02], (instrs FDIVDrr)>; 678 679// FP square root, D-form 680def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTDr)>; 681 682// FP multiply 683def : SchedAlias<WriteFMul, V1Write_3c_1V>; 684 685// FP multiply accumulate 686def : InstRW<[V1Write_4c_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 687 688// FP round to integral 689def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 690 "^FRINT(32|64)[XZ][SD]r$")>; 691 692// FP select 693def : InstRW<[V1Write_2c_1V01], (instregex "^FCSEL[HSD]rrr$")>; 694 695 696// FP miscellaneous instructions 697// ----------------------------------------------------------------------------- 698 699// FP convert, from gen to vec reg 700def : InstRW<[V1Write_3c_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 701 702// FP convert, from vec to gen reg 703def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 704 705// FP convert, Javascript from vec to gen reg 706def : InstRW<[V1Write_3c_1V0], (instrs FJCVTZS)>; 707 708// FP convert, from vec to vec reg 709def : SchedAlias<WriteFCvt, V1Write_3c_1V02>; 710 711// FP move, immed 712def : SchedAlias<WriteFImm, V1Write_2c_1V>; 713 714// FP move, register 715def : InstRW<[V1Write_2c_1V], (instrs FMOVHr, FMOVSr, FMOVDr)>; 716 717// FP transfer, from gen to low half of vec reg 718def : InstRW<[V1Write_3c_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 719 720// FP transfer, from gen to high half of vec reg 721def : InstRW<[V1Write_5c_1M0_1V], (instrs FMOVXDHighr)>; 722 723// FP transfer, from vec to gen reg 724def : SchedAlias<WriteFCopy, V1Write_2c_1V1>; 725 726 727// FP load instructions 728// ----------------------------------------------------------------------------- 729 730// Load vector reg, literal, S/D/Q forms 731// Load vector reg, unscaled immed 732// Load vector reg, unsigned immed 733def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[SDQ]l$", 734 "^LDUR[BHSDQ]i$", 735 "^LDR[BHSDQ]ui$")>; 736 737// Load vector reg, immed post-index 738// Load vector reg, immed pre-index 739def : InstRW<[WriteAdr, V1Write_6c_1L], 740 (instregex "^LDR[BHSDQ](post|pre)$")>; 741 742// Load vector reg, register offset, basic 743// Load vector reg, register offset, scale, S/D-form 744// Load vector reg, register offset, extend 745// Load vector reg, register offset, extend, scale, S/D-form 746def : InstRW<[V1Write_6c_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 747 748// Load vector reg, register offset, scale, H/Q-form 749// Load vector reg, register offset, extend, scale, H/Q-form 750def : InstRW<[V1Write_7c_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 751 752// Load vector pair, immed offset, S/D-form 753def : InstRW<[V1Write_6c_1L, V1Write_0c_0Z], (instregex "^LDN?P[SD]i$")>; 754 755// Load vector pair, immed offset, Q-form 756def : InstRW<[V1Write_6c_1L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 757 758// Load vector pair, immed post-index, S/D-form 759// Load vector pair, immed pre-index, S/D-form 760def : InstRW<[WriteAdr, V1Write_6c_1L, V1Write_0c_0Z], 761 (instregex "^LDP[SD](pre|post)$")>; 762 763// Load vector pair, immed post-index, Q-form 764// Load vector pair, immed pre-index, Q-form 765def : InstRW<[WriteAdr, V1Write_6c_1L, WriteLDHi], 766 (instrs LDPQpost, LDPQpre)>; 767 768 769// FP store instructions 770// ----------------------------------------------------------------------------- 771 772// Store vector reg, unscaled immed, B/H/S/D/Q-form 773def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 774 775// Store vector reg, immed post-index, B/H/S/D/Q-form 776// Store vector reg, immed pre-index, B/H/S/D/Q-form 777def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 778 (instregex "^STR[BHSDQ](pre|post)$")>; 779 780// Store vector reg, unsigned immed, B/H/S/D/Q-form 781def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 782 783// Store vector reg, register offset, basic, B/S/D-form 784// Store vector reg, register offset, scale, B/S/D-form 785// Store vector reg, register offset, extend, B/S/D-form 786// Store vector reg, register offset, extend, scale, B/S/D-form 787def : InstRW<[V1Write_2c_1L01_1V01, ReadAdrBase], 788 (instregex "^STR[BSD]ro[WX]$")>; 789 790// Store vector reg, register offset, basic, H/Q-form 791// Store vector reg, register offset, scale, H/Q-form 792// Store vector reg, register offset, extend, H/Q-form 793// Store vector reg, register offset, extend, scale, H/Q-form 794def : InstRW<[V1Write_2c_1I_1L01_1V01, ReadAdrBase], 795 (instregex "^STR[HQ]ro[WX]$")>; 796 797// Store vector pair, immed offset, S/D/Q-form 798def : InstRW<[V1Write_2c_1L01_1V01], (instregex "^STN?P[SDQ]i$")>; 799 800// Store vector pair, immed post-index, S/D-form 801// Store vector pair, immed pre-index, S/D-form 802def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 803 (instregex "^STP[SD](pre|post)$")>; 804 805// Store vector pair, immed post-index, Q-form 806// Store vector pair, immed pre-index, Q-form 807def : InstRW<[WriteAdr, V1Write_2c_2L01_1V01], (instrs STPQpre, STPQpost)>; 808 809 810// ASIMD integer instructions 811// ----------------------------------------------------------------------------- 812 813// ASIMD absolute diff 814// ASIMD absolute diff long 815// ASIMD arith, basic 816// ASIMD arith, complex 817// ASIMD arith, pair-wise 818// ASIMD compare 819// ASIMD logical 820// ASIMD max/min, basic and pair-wise 821def : SchedAlias<WriteVd, V1Write_2c_1V>; 822def : SchedAlias<WriteVq, V1Write_2c_1V>; 823 824// ASIMD absolute diff accum 825// ASIMD absolute diff accum long 826// ASIMD pairwise add and accumulate long 827def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]ABAL?v", "^[SU]ADALPv")>; 828 829// ASIMD arith, reduce, 4H/4S 830// ASIMD max/min, reduce, 4H/4S 831def : InstRW<[V1Write_2c_1V13], (instregex "^(ADD|[SU]ADDL)Vv4(i16|i32)v$", 832 "^[SU](MAX|MIN)Vv4(i16|i32)v$")>; 833 834// ASIMD arith, reduce, 8B/8H 835// ASIMD max/min, reduce, 8B/8H 836def : InstRW<[V1Write_4c_1V13_1V], (instregex "^(ADD|[SU]ADDL)Vv8(i8|i16)v$", 837 "^[SU](MAX|MIN)Vv8(i8|i16)v$")>; 838 839// ASIMD arith, reduce, 16B 840// ASIMD max/min, reduce, 16B 841def : InstRW<[V1Write_4c_2V13], (instregex "^(ADD|[SU]ADDL)Vv16i8v$", 842 "[SU](MAX|MIN)Vv16i8v$")>; 843 844// ASIMD dot product 845// ASIMD dot product using signed and unsigned integers 846def : InstRW<[V1Write_2c_1V], (instregex "^([SU]|SU|US)DOT(lane)?v(8|16)i8$")>; 847 848// ASIMD matrix multiply- accumulate 849def : InstRW<[V1Write_3c_1V], (instrs SMMLA, UMMLA, USMMLA)>; 850 851// ASIMD multiply 852// ASIMD multiply accumulate 853// ASIMD multiply accumulate long 854// ASIMD multiply accumulate high 855// ASIMD multiply accumulate saturating long 856def : InstRW<[V1Write_4c_1V02], 857 (instregex "^MUL(v[148]i16|v[124]i32)$", 858 "^SQR?DMULH(v[48]i16|v[24]i32)$", 859 "^ML[AS](v[148]i16|v[124]i32)$", 860 "^[SU]ML[AS]Lv", 861 "^SQRDML[AS]H(v[148]i16|v[124]i32)$", 862 "^SQDML[AS]Lv")>; 863 864// ASIMD multiply/multiply long (8x8) polynomial 865def : InstRW<[V1Write_3c_1V01], (instregex "^PMULL?v(8|16)i8$")>; 866 867// ASIMD multiply long 868def : InstRW<[V1Write_3c_1V02], (instregex "^([SU]|SQD)MULLv")>; 869 870// ASIMD shift accumulate 871// ASIMD shift by immed, complex 872// ASIMD shift by register, complex 873def : InstRW<[V1Write_4c_1V13], 874 (instregex "^[SU]R?SRAv", 875 "^RSHRNv", "^SQRSHRU?Nv", "^(SQSHLU?|UQSHL)[bhsd]$", 876 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 877 "^SQSHU?RNv", "^[SU]RSHRv", "^UQR?SHRNv", 878 "^[SU]Q?RSHLv", "^[SU]QSHLv")>; 879 880// ASIMD shift by immed, basic 881// ASIMD shift by immed and insert, basic 882// ASIMD shift by register, basic 883def : InstRW<[V1Write_2c_1V13], (instregex "^SHLL?v", "^SHRNv", "^[SU]SHLLv", 884 "^[SU]SHRv", "^S[LR]Iv", "^[SU]SHLv")>; 885 886 887// ASIMD FP instructions 888// ----------------------------------------------------------------------------- 889 890// ASIMD FP absolute value/difference 891// ASIMD FP arith, normal 892// ASIMD FP compare 893// ASIMD FP complex add 894// ASIMD FP max/min, normal 895// ASIMD FP max/min, pairwise 896// ASIMD FP negate 897// Covered by "SchedAlias (WriteV[dq]...)" above 898 899// ASIMD FP complex multiply add 900// ASIMD FP multiply accumulate 901def : InstRW<[V1Write_4c_1V], (instregex "^FCADD(v[48]f16|v[24]f32|v2f64)$", 902 "^FML[AS]v")>; 903 904// ASIMD FP convert, long (F16 to F32) 905def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTLv[48]i16$")>; 906 907// ASIMD FP convert, long (F32 to F64) 908def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTLv[24]i32$")>; 909 910// ASIMD FP convert, narrow (F32 to F16) 911def : InstRW<[V1Write_4c_2V02], (instregex "^FCVTNv[48]i16$")>; 912 913// ASIMD FP convert, narrow (F64 to F32) 914def : InstRW<[V1Write_3c_1V02], (instregex "^FCVTNv[24]i32$", 915 "^FCVTXN(v[24]f32|v1i64)$")>; 916 917// ASIMD FP convert, other, D-form F32 and Q-form F64 918def : InstRW<[V1Write_3c_1V02], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 919 "^[SU]CVTFv2f(32|64)$")>; 920 921// ASIMD FP convert, other, D-form F16 and Q-form F32 922def : InstRW<[V1Write_4c_2V02], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 923 "^[SU]CVTFv4f(16|32)$")>; 924 925// ASIMD FP convert, other, Q-form F16 926def : InstRW<[V1Write_6c_4V02], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 927 "^[SU]CVTFv8f16$")>; 928 929// ASIMD FP divide, D-form, F16 930// ASIMD FP square root, D-form, F16 931def : InstRW<[V1Write_7c7_1V02], (instrs FDIVv4f16, FSQRTv4f16)>; 932 933// ASIMD FP divide, F32 934// ASIMD FP square root, F32 935def : InstRW<[V1Write_10c7_1V02], (instrs FDIVv2f32, FDIVv4f32, 936 FSQRTv2f32, FSQRTv4f32)>; 937 938// ASIMD FP divide, Q-form, F16 939def : InstRW<[V1Write_13c5_1V02], (instrs FDIVv8f16)>; 940 941// ASIMD FP divide, Q-form, F64 942def : InstRW<[V1Write_15c7_1V02], (instrs FDIVv2f64)>; 943 944// ASIMD FP square root, Q-form, F16 945def : InstRW<[V1Write_13c11_1V02], (instrs FSQRTv8f16)>; 946 947// ASIMD FP square root, Q-form, F64 948def : InstRW<[V1Write_16c7_1V02], (instrs FSQRTv2f64)>; 949 950// ASIMD FP max/min, reduce, F32 and D-form F16 951def : InstRW<[V1Write_4c_2V], (instregex "^F(MAX|MIN)(NM)?Vv4(i16|i32)v$")>; 952 953// ASIMD FP max/min, reduce, Q-form F16 954def : InstRW<[V1Write_6c_3V], (instregex "^F(MAX|MIN)(NM)?Vv8i16v$")>; 955 956// ASIMD FP multiply 957def : InstRW<[V1Write_3c_1V], (instregex "^FMULX?v")>; 958 959// ASIMD FP multiply accumulate long 960def : InstRW<[V1Write_5c_1V], (instregex "^FML[AS]L2?v")>; 961 962// ASIMD FP round, D-form F32 and Q-form F64 963def : InstRW<[V1Write_3c_1V02], (instregex "^FRINT[AIMNPXZ]v2f(32|64)$")>; 964 965// ASIMD FP round, D-form F16 and Q-form F32 966def : InstRW<[V1Write_4c_2V02], (instregex "^FRINT[AIMNPXZ]v4f(16|32)$")>; 967 968// ASIMD FP round, Q-form F16 969def : InstRW<[V1Write_6c_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 970 971 972// ASIMD BF instructions 973// ----------------------------------------------------------------------------- 974 975// ASIMD convert, F32 to BF16 976def : InstRW<[V1Write_4c_1V02], (instrs BFCVTN, BFCVTN2)>; 977 978// ASIMD dot product 979def : InstRW<[V1Write_4c_1V], (instregex "^BF(DOT|16DOTlane)v[48]bf16$")>; 980 981// ASIMD matrix multiply accumulate 982def : InstRW<[V1Write_5c_1V], (instrs BFMMLA)>; 983 984// ASIMD multiply accumulate long 985def : InstRW<[V1Write_4c_1V], (instregex "^BFMLAL[BT](Idx)?$")>; 986 987// Scalar convert, F32 to BF16 988def : InstRW<[V1Write_3c_1V02], (instrs BFCVT)>; 989 990 991// ASIMD miscellaneous instructions 992// ----------------------------------------------------------------------------- 993 994// ASIMD bit reverse 995// ASIMD bitwise insert 996// ASIMD count 997// ASIMD duplicate, element 998// ASIMD extract 999// ASIMD extract narrow 1000// ASIMD insert, element to element 1001// ASIMD move, FP immed 1002// ASIMD move, integer immed 1003// ASIMD reverse 1004// ASIMD table lookup, 1 or 2 table regs 1005// ASIMD table lookup extension, 1 table reg 1006// ASIMD transfer, element to gen reg 1007// ASIMD transpose 1008// ASIMD unzip/zip 1009// Covered by "SchedAlias (WriteV[dq]...)" above 1010 1011// ASIMD duplicate, gen reg 1012def : InstRW<[V1Write_3c_1M0], 1013 (instregex "^DUP((v16|v8)i8|(v8|v4)i16|(v4|v2)i32|v2i64)gpr$")>; 1014 1015// ASIMD extract narrow, saturating 1016def : InstRW<[V1Write_4c_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1017 1018// ASIMD reciprocal and square root estimate, D-form U32 1019// ASIMD reciprocal and square root estimate, D-form F32 and F64 1020def : InstRW<[V1Write_3c_1V02], (instrs URECPEv2i32, 1021 URSQRTEv2i32, 1022 FRECPEv1i32, FRECPEv2f32, FRECPEv1i64, 1023 FRSQRTEv1i32, FRSQRTEv2f32, FRSQRTEv1i64)>; 1024 1025// ASIMD reciprocal and square root estimate, Q-form U32 1026// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 and F64 1027def : InstRW<[V1Write_4c_1V02], (instrs URECPEv4i32, 1028 URSQRTEv4i32, 1029 FRECPEv1f16, FRECPEv4f16, 1030 FRECPEv4f32, FRECPEv2f64, 1031 FRSQRTEv1f16, FRSQRTEv4f16, 1032 FRSQRTEv4f32, FRSQRTEv2f64)>; 1033 1034// ASIMD reciprocal and square root estimate, Q-form F16 1035def : InstRW<[V1Write_6c_2V02], (instrs FRECPEv8f16, 1036 FRSQRTEv8f16)>; 1037 1038// ASIMD reciprocal exponent 1039def : InstRW<[V1Write_3c_1V02], (instrs FRECPXv1f16, FRECPXv1i32, FRECPXv1i64)>; 1040 1041// ASIMD reciprocal step 1042def : InstRW<[V1Write_4c_1V], (instregex "^FRECPS(16|32|64)$", "^FRECPSv", 1043 "^FRSQRTS(16|32|64)$", "^FRSQRTSv")>; 1044 1045// ASIMD table lookup, 1 or 2 table regs 1046// ASIMD table lookup extension, 1 table reg 1047def : InstRW<[V1Write_2c_2V01], (instregex "^TBLv(8|16)i8(One|Two)$", 1048 "^TBXv(8|16)i8One$")>; 1049 1050// ASIMD table lookup, 3 table regs 1051// ASIMD table lookup extension, 2 table reg 1052def : InstRW<[V1Write_4c_2V01], (instrs TBLv8i8Three, TBLv16i8Three, 1053 TBXv8i8Two, TBXv16i8Two)>; 1054 1055// ASIMD table lookup, 4 table regs 1056def : InstRW<[V1Write_4c_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1057 1058// ASIMD table lookup extension, 3 table reg 1059def : InstRW<[V1Write_6c_3V01], (instrs TBXv8i8Three, TBXv16i8Three)>; 1060 1061// ASIMD table lookup extension, 4 table reg 1062def : InstRW<[V1Write_6c_5V01], (instrs TBXv8i8Four, TBXv16i8Four)>; 1063 1064// ASIMD transfer, element to gen reg 1065def : InstRW<[V1Write_2c_1V], (instregex "^SMOVvi(((8|16)to(32|64))|32to64)$", 1066 "^UMOVvi(8|16|32|64)$")>; 1067 1068// ASIMD transfer, gen reg to element 1069def : InstRW<[V1Write_5c_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1070 1071 1072// ASIMD load instructions 1073// ----------------------------------------------------------------------------- 1074 1075// ASIMD load, 1 element, multiple, 1 reg 1076def : InstRW<[V1Write_6c_1L], 1077 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1078def : InstRW<[WriteAdr, V1Write_6c_1L], 1079 (instregex "^LD1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1080 1081// ASIMD load, 1 element, multiple, 2 reg 1082def : InstRW<[V1Write_6c_2L], 1083 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1084def : InstRW<[WriteAdr, V1Write_6c_2L], 1085 (instregex "^LD1Twov(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1086 1087// ASIMD load, 1 element, multiple, 3 reg 1088def : InstRW<[V1Write_6c_3L], 1089 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1090def : InstRW<[WriteAdr, V1Write_6c_3L], 1091 (instregex "^LD1Threev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1092 1093// ASIMD load, 1 element, multiple, 4 reg, D-form 1094def : InstRW<[V1Write_6c_2L], 1095 (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1096def : InstRW<[WriteAdr, V1Write_6c_2L], 1097 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1098 1099// ASIMD load, 1 element, multiple, 4 reg, Q-form 1100def : InstRW<[V1Write_7c_4L], 1101 (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1102def : InstRW<[WriteAdr, V1Write_7c_4L], 1103 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1104 1105// ASIMD load, 1 element, one lane 1106// ASIMD load, 1 element, all lanes 1107def : InstRW<[V1Write_8c_1L_1V], 1108 (instregex "^LD1(i|Rv)(8|16|32|64)$", 1109 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1110def : InstRW<[WriteAdr, V1Write_8c_1L_1V], 1111 (instregex "^LD1i(8|16|32|64)_POST$", 1112 "^LD1Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1113 1114// ASIMD load, 2 element, multiple, D-form 1115def : InstRW<[V1Write_8c_1L_2V], 1116 (instregex "^LD2Twov(8b|4h|2s)$")>; 1117def : InstRW<[WriteAdr, V1Write_8c_1L_2V], 1118 (instregex "^LD2Twov(8b|4h|2s)_POST$")>; 1119 1120// ASIMD load, 2 element, multiple, Q-form 1121def : InstRW<[V1Write_8c_2L_2V], 1122 (instregex "^LD2Twov(16b|8h|4s|2d)$")>; 1123def : InstRW<[WriteAdr, V1Write_8c_2L_2V], 1124 (instregex "^LD2Twov(16b|8h|4s|2d)_POST$")>; 1125 1126// ASIMD load, 2 element, one lane 1127// ASIMD load, 2 element, all lanes 1128def : InstRW<[V1Write_8c_1L_2V], 1129 (instregex "^LD2i(8|16|32|64)$", 1130 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1131def : InstRW<[WriteAdr, V1Write_8c_1L_2V], 1132 (instregex "^LD2i(8|16|32|64)_POST$", 1133 "^LD2Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1134 1135// ASIMD load, 3 element, multiple, D-form 1136// ASIMD load, 3 element, one lane 1137// ASIMD load, 3 element, all lanes 1138def : InstRW<[V1Write_8c_2L_3V], 1139 (instregex "^LD3Threev(8b|4h|2s)$", 1140 "^LD3i(8|16|32|64)$", 1141 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1142def : InstRW<[WriteAdr, V1Write_8c_2L_3V], 1143 (instregex "^LD3Threev(8b|4h|2s)_POST$", 1144 "^LD3i(8|16|32|64)_POST$", 1145 "^LD3Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1146 1147// ASIMD load, 3 element, multiple, Q-form 1148def : InstRW<[V1Write_8c_3L_3V], 1149 (instregex "^LD3Threev(16b|8h|4s|2d)$")>; 1150def : InstRW<[WriteAdr, V1Write_8c_3L_3V], 1151 (instregex "^LD3Threev(16b|8h|4s|2d)_POST$")>; 1152 1153// ASIMD load, 4 element, multiple, D-form 1154// ASIMD load, 4 element, one lane 1155// ASIMD load, 4 element, all lanes 1156def : InstRW<[V1Write_8c_3L_4V], 1157 (instregex "^LD4Fourv(8b|4h|2s)$", 1158 "^LD4i(8|16|32|64)$", 1159 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)$")>; 1160def : InstRW<[WriteAdr, V1Write_8c_3L_4V], 1161 (instregex "^LD4Fourv(8b|4h|2s)_POST$", 1162 "^LD4i(8|16|32|64)_POST$", 1163 "^LD4Rv(8b|16b|4h|8h|2s|4s|1d|2d)_POST$")>; 1164 1165// ASIMD load, 4 element, multiple, Q-form 1166def : InstRW<[V1Write_9c_4L_4V], 1167 (instregex "^LD4Fourv(16b|8h|4s|2d)$")>; 1168def : InstRW<[WriteAdr, V1Write_9c_4L_4V], 1169 (instregex "^LD4Fourv(16b|8h|4s|2d)_POST$")>; 1170 1171 1172// ASIMD store instructions 1173// ----------------------------------------------------------------------------- 1174 1175// ASIMD store, 1 element, multiple, 1 reg 1176// ASIMD store, 1 element, multiple, 2 reg, D-form 1177def : InstRW<[V1Write_2c_1L01_1V01], 1178 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)$", 1179 "^ST1Twov(8b|4h|2s|1d)$")>; 1180def : InstRW<[WriteAdr, V1Write_2c_1L01_1V01], 1181 (instregex "^ST1Onev(8b|16b|4h|8h|2s|4s|1d|2d)_POST$", 1182 "^ST1Twov(8b|4h|2s|1d)_POST$")>; 1183 1184// ASIMD store, 1 element, multiple, 2 reg, Q-form 1185// ASIMD store, 1 element, multiple, 3 reg, D-form 1186// ASIMD store, 1 element, multiple, 4 reg, D-form 1187def : InstRW<[V1Write_2c_2L01_2V01], 1188 (instregex "^ST1Twov(16b|8h|4s|2d)$", 1189 "^ST1Threev(8b|4h|2s|1d)$", 1190 "^ST1Fourv(8b|4h|2s|1d)$")>; 1191def : InstRW<[WriteAdr, V1Write_2c_2L01_2V01], 1192 (instregex "^ST1Twov(16b|8h|4s|2d)_POST$", 1193 "^ST1Threev(8b|4h|2s|1d)_POST$", 1194 "^ST1Fourv(8b|4h|2s|1d)_POST$")>; 1195 1196// ASIMD store, 1 element, multiple, 3 reg, Q-form 1197def : InstRW<[V1Write_2c_3L01_3V01], 1198 (instregex "^ST1Threev(16b|8h|4s|2d)$")>; 1199def : InstRW<[WriteAdr, V1Write_2c_3L01_3V01], 1200 (instregex "^ST1Threev(16b|8h|4s|2d)_POST$")>; 1201 1202// ASIMD store, 1 element, multiple, 4 reg, Q-form 1203def : InstRW<[V1Write_2c_4L01_4V01], 1204 (instregex "^ST1Fourv(16b|8h|4s|2d)$")>; 1205def : InstRW<[WriteAdr, V1Write_2c_4L01_4V01], 1206 (instregex "^ST1Fourv(16b|8h|4s|2d)_POST$")>; 1207 1208// ASIMD store, 1 element, one lane 1209// ASIMD store, 2 element, multiple, D-form 1210// ASIMD store, 2 element, one lane 1211def : InstRW<[V1Write_4c_1L01_1V01], 1212 (instregex "^ST1i(8|16|32|64)$", 1213 "^ST2Twov(8b|4h|2s)$", 1214 "^ST2i(8|16|32|64)$")>; 1215def : InstRW<[WriteAdr, V1Write_4c_1L01_1V01], 1216 (instregex "^ST1i(8|16|32|64)_POST$", 1217 "^ST2Twov(8b|4h|2s)_POST$", 1218 "^ST2i(8|16|32|64)_POST$")>; 1219 1220// ASIMD store, 2 element, multiple, Q-form 1221// ASIMD store, 3 element, multiple, D-form 1222// ASIMD store, 3 element, one lane 1223// ASIMD store, 4 element, one lane, D 1224def : InstRW<[V1Write_4c_2L01_2V01], 1225 (instregex "^ST2Twov(16b|8h|4s|2d)$", 1226 "^ST3Threev(8b|4h|2s)$", 1227 "^ST3i(8|16|32|64)$", 1228 "^ST4i64$")>; 1229def : InstRW<[WriteAdr, V1Write_4c_2L01_2V01], 1230 (instregex "^ST2Twov(16b|8h|4s|2d)_POST$", 1231 "^ST3Threev(8b|4h|2s)_POST$", 1232 "^ST3i(8|16|32|64)_POST$", 1233 "^ST4i64_POST$")>; 1234 1235// ASIMD store, 3 element, multiple, Q-form 1236def : InstRW<[V1Write_5c_3L01_3V01], 1237 (instregex "^ST3Threev(16b|8h|4s|2d)$")>; 1238def : InstRW<[WriteAdr, V1Write_5c_3L01_3V01], 1239 (instregex "^ST3Threev(16b|8h|4s|2d)_POST$")>; 1240 1241// ASIMD store, 4 element, multiple, D-form 1242def : InstRW<[V1Write_6c_3L01_3V01], 1243 (instregex "^ST4Fourv(8b|4h|2s)$")>; 1244def : InstRW<[WriteAdr, V1Write_6c_3L01_3V01], 1245 (instregex "^ST4Fourv(8b|4h|2s)_POST$")>; 1246 1247// ASIMD store, 4 element, multiple, Q-form, B/H/S 1248def : InstRW<[V1Write_7c_6L01_6V01], 1249 (instregex "^ST4Fourv(16b|8h|4s)$")>; 1250def : InstRW<[WriteAdr, V1Write_7c_6L01_6V01], 1251 (instregex "^ST4Fourv(16b|8h|4s)_POST$")>; 1252 1253// ASIMD store, 4 element, multiple, Q-form, D 1254def : InstRW<[V1Write_4c_4L01_4V01], 1255 (instrs ST4Fourv2d)>; 1256def : InstRW<[WriteAdr, V1Write_4c_4L01_4V01], 1257 (instrs ST4Fourv2d_POST)>; 1258 1259// ASIMD store, 4 element, one lane, B/H/S 1260def : InstRW<[V1Write_6c_3L_3V], 1261 (instregex "^ST4i(8|16|32)$")>; 1262def : InstRW<[WriteAdr, V1Write_6c_3L_3V], 1263 (instregex "^ST4i(8|16|32)_POST$")>; 1264 1265 1266// Cryptography extensions 1267// ----------------------------------------------------------------------------- 1268 1269// Crypto polynomial (64x64) multiply long 1270// Covered by "SchedAlias (WriteV[dq]...)" above 1271 1272// Crypto AES ops 1273def V1WriteVC : WriteSequence<[V1Write_2c_1V]>; 1274def V1ReadVC : SchedReadAdvance<2, [V1WriteVC]>; 1275def : InstRW<[V1WriteVC], (instrs AESDrr, AESErr)>; 1276def : InstRW<[V1Write_2c_1V, V1ReadVC], (instrs AESMCrr, AESIMCrr)>; 1277 1278// Crypto SHA1 hash acceleration op 1279// Crypto SHA1 schedule acceleration ops 1280// Crypto SHA256 schedule acceleration ops 1281// Crypto SHA512 hash acceleration ops 1282// Crypto SM3 ops 1283def : InstRW<[V1Write_2c_1V0], (instregex "^SHA1(H|SU[01])rr$", 1284 "^SHA256SU[01]rr$", 1285 "^SHA512(H2?|SU[01])$", 1286 "^SM3(PARTW(1|2SM3SS1)|TT[12][AB])$")>; 1287 1288// Crypto SHA1 hash acceleration ops 1289// Crypto SHA256 hash acceleration ops 1290// Crypto SM4 ops 1291def : InstRW<[V1Write_4c_1V0], (instregex "^SHA1[CMP]rrr$", 1292 "^SHA256H2?rrr$", 1293 "^SM4E(KEY)?$")>; 1294 1295// Crypto SHA3 ops 1296def : InstRW<[V1Write_2c_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1297 1298 1299// CRC instruction 1300// ----------------------------------------------------------------------------- 1301 1302// CRC checksum ops 1303def : InstRW<[V1Write_2c_1M0], (instregex "^CRC32C?[BHWX]rr$")>; 1304 1305 1306// SVE Predicate instructions 1307// ----------------------------------------------------------------------------- 1308 1309// Loop control, based on predicate 1310def : InstRW<[V1Write_2c_1M0], (instregex "^BRK[AB]_PP[mz]P$")>; 1311def : InstRW<[V1Write_2c_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1312 1313// Loop control, based on predicate and flag setting 1314def : InstRW<[V1Write_3c_2M0], (instrs BRKAS_PPzP, BRKBS_PPzP, BRKNS_PPzP, 1315 BRKPAS_PPzPP, BRKPBS_PPzPP)>; 1316 1317// Loop control, based on GPR 1318def : InstRW<[V1Write_3c_2M0], (instregex "^WHILE(LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1319 1320// Loop terminate 1321def : InstRW<[V1Write_1c_1M0], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1322 1323// Predicate counting scalar 1324// Predicate counting scalar, active predicate 1325def : InstRW<[V1Write_2c_1M0], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1326def : InstRW<[V1Write_2c_1M0], (instregex "^(CNT|([SU]Q)?(DEC|INC))[BHWD]_XPiI$", 1327 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1328 "^UQ(DEC|INC)[BHWD]_WPiI$", 1329 "^CNTP_XPP_[BHSD]$", 1330 "^([SU]Q)?(DEC|INC)P_XP_[BHSD]$", 1331 "^UQ(DEC|INC)P_WP_[BHSD]$", 1332 "^[SU]Q(DEC|INC)P_XPWd_[BHSD]$")>; 1333 1334// Predicate counting vector, active predicate 1335def : InstRW<[V1Write_7c_2M0_1V01], (instregex "^([SU]Q)?(DEC|INC)P_ZP_[HSD]$")>; 1336 1337// Predicate logical 1338def : InstRW<[V1Write_1c_1M0], 1339 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1340 1341// Predicate logical, flag setting 1342def : InstRW<[V1Write_2c_2M0], 1343 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)S_PPzPP$")>; 1344 1345// Predicate reverse 1346// Predicate set/initialize/find next 1347// Predicate transpose 1348// Predicate unpack and widen 1349// Predicate zip/unzip 1350def : InstRW<[V1Write_2c_1M0], (instregex "^REV_PP_[BHSD]$", 1351 "^PFALSE$", "^PFIRST_B$", 1352 "^PNEXT_[BHSD]$", "^PTRUE_[BHSD]$", 1353 "^TRN[12]_PPP_[BHSDQ]$", 1354 "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1355 1356// Predicate set/initialize/find next 1357// Predicate unpack and widen 1358def : InstRW<[V1Write_2c_1M0], (instrs PTEST_PP, 1359 PUNPKHI_PP, PUNPKLO_PP)>; 1360 1361// Predicate select 1362def : InstRW<[V1Write_1c_1M0], (instrs SEL_PPPP)>; 1363 1364// Predicate set/initialize, set flags 1365def : InstRW<[V1Write_3c_2M0], (instregex "^PTRUES_[BHSD]$")>; 1366 1367 1368 1369// SVE integer instructions 1370// ----------------------------------------------------------------------------- 1371 1372// Arithmetic, basic 1373// Logical 1374def : InstRW<[V1Write_2c_1V01], 1375 (instregex "^(ABS|CNOT|NEG)_ZPmZ_[BHSD]", 1376 "^(ADD|SUB)_Z(I|P[mZ]Z|ZZ)_[BHSD]", 1377 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1378 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1379 "^[SU]ABD_ZP[mZ]Z_[BHSD]", 1380 "^[SU](MAX|MIN)_Z(I|P[mZ]Z)_[BHSD]", 1381 "^[SU]Q(ADD|SUB)_Z(I|ZZ)_[BHSD]$", 1382 "^SUBR_Z(I|P[mZ]Z)_[BHSD]", 1383 "^(AND|EOR|ORR)_ZI$", 1384 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZP?ZZ", 1385 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1386 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]")>; 1387 1388// Arithmetic, shift 1389def : InstRW<[V1Write_2c_1V1], 1390 (instregex "^(ASR|LSL|LSR)_WIDE_Z(Pm|Z)Z_[BHS]", 1391 "^(ASR|LSL|LSR)_ZPm[IZ]_[BHSD]", 1392 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 1393 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 1394 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 1395 1396// Arithmetic, shift right for divide 1397def : InstRW<[V1Write_4c_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; 1398 1399// Count/reverse bits 1400def : InstRW<[V1Write_2c_1V01], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; 1401 1402// Broadcast logical bitmask immediate to vector 1403def : InstRW<[V1Write_2c_1V01], (instrs DUPM_ZI)>; 1404 1405// Compare and set flags 1406def : InstRW<[V1Write_4c_1M0_1V0], 1407 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1408 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1409 1410// Conditional extract operations, scalar form 1411def : InstRW<[V1Write_9c_1M0_1V1], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1412 1413// Conditional extract operations, SIMD&FP scalar and vector forms 1414def : InstRW<[V1Write_3c_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1415 "^COMPACT_ZPZ_[SD]$", 1416 "^SPLICE_ZPZZ?_[BHSD]$")>; 1417 1418// Convert to floating point, 64b to float or convert to double 1419def : InstRW<[V1Write_3c_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 1420 "^[SU]CVTF_ZPmZ_StoD")>; 1421 1422// Convert to floating point, 32b to single or half 1423def : InstRW<[V1Write_4c_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; 1424 1425// Convert to floating point, 16b to half 1426def : InstRW<[V1Write_6c_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; 1427 1428// Copy, scalar 1429def : InstRW<[V1Write_5c_1M0_1V01], (instregex "^CPY_ZPmR_[BHSD]$")>; 1430 1431// Copy, scalar SIMD&FP or imm 1432def : InstRW<[V1Write_2c_1V01], (instregex "^CPY_ZP([mz]I|mV)_[BHSD]$")>; 1433 1434// Divides, 32 bit 1435def : InstRW<[V1Write_12c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", 1436 "^[SU]DIV_ZPZZ_S")>; 1437 1438// Divides, 64 bit 1439def : InstRW<[V1Write_20c7_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", 1440 "^[SU]DIV_ZPZZ_D")>; 1441 1442// Dot product, 8 bit 1443def : InstRW<[V1Write_3c_1V01], (instregex "^[SU]DOT_ZZZI?_S$")>; 1444 1445// Dot product, 8 bit, using signed and unsigned integers 1446def : InstRW<[V1Write_3c_1V], (instrs SUDOT_ZZZI, USDOT_ZZZ, USDOT_ZZZI)>; 1447 1448// Dot product, 16 bit 1449def : InstRW<[V1Write_4c_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>; 1450 1451// Duplicate, immediate and indexed form 1452def : InstRW<[V1Write_2c_1V01], (instregex "^DUP_ZI_[BHSD]$", 1453 "^DUP_ZZI_[BHSDQ]$")>; 1454 1455// Duplicate, scalar form 1456def : InstRW<[V1Write_3c_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1457 1458// Extend, sign or zero 1459def : InstRW<[V1Write_2c_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]", 1460 "^[SU]XTH_ZPmZ_[SD]", 1461 "^[SU]XTW_ZPmZ_[D]")>; 1462 1463// Extract 1464def : InstRW<[V1Write_2c_1V01], (instrs EXT_ZZI)>; 1465 1466// Extract/insert operation, SIMD and FP scalar form 1467def : InstRW<[V1Write_3c_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1468 "^INSR_ZV_[BHSD]$")>; 1469 1470// Extract/insert operation, scalar 1471def : InstRW<[V1Write_6c_1M0_1V1], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1472 "^INSR_ZR_[BHSD]$")>; 1473 1474// Horizontal operations, B, H, S form, imm, imm 1475def : InstRW<[V1Write_4c_1V0], (instregex "^INDEX_II_[BHS]$")>; 1476 1477// Horizontal operations, B, H, S form, scalar, imm / scalar / imm, scalar 1478def : InstRW<[V1Write_7c_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1479 1480// Horizontal operations, D form, imm, imm 1481def : InstRW<[V1Write_5c_2V0], (instrs INDEX_II_D)>; 1482 1483// Horizontal operations, D form, scalar, imm / scalar / imm, scalar 1484def : InstRW<[V1Write_8c_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1485 1486// Move prefix 1487def : InstRW<[V1Write_2c_1V01], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1488 "^MOVPRFX_ZZ$")>; 1489 1490// Matrix multiply-accumulate 1491def : InstRW<[V1Write_3c_1V01], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1492 1493// Multiply, B, H, S element size 1494def : InstRW<[V1Write_4c_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", 1495 "^MUL_ZPZZ_[BHS]", 1496 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", 1497 "^[SU]MULH_ZPZZ_[BHS]")>; 1498 1499// Multiply, D element size 1500// Multiply accumulate, D element size 1501def : InstRW<[V1Write_5c_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", 1502 "^MUL_ZPZZ_D", 1503 "^[SU]MULH_(ZPmZ|ZZZ)_D", 1504 "^[SU]MULH_ZPZZ_D", 1505 "^(MLA|MLS|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>; 1506 1507// Multiply accumulate, B, H, S element size 1508// NOTE: This is not specified in the SOG. 1509def : InstRW<[V1Write_4c_1V0], (instregex "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>; 1510 1511// Predicate counting vector 1512def : InstRW<[V1Write_2c_1V0], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI$")>; 1513 1514// Reduction, arithmetic, B form 1515def : InstRW<[V1Write_14c_1V_1V0_2V1_1V13], 1516 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1517 1518// Reduction, arithmetic, H form 1519def : InstRW<[V1Write_12c_1V_1V01_2V1], 1520 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1521 1522// Reduction, arithmetic, S form 1523def : InstRW<[V1Write_10c_1V_1V01_2V1], 1524 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1525 1526// Reduction, arithmetic, D form 1527def : InstRW<[V1Write_8c_1V_1V01], 1528 (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1529 1530// Reduction, logical 1531def : InstRW<[V1Write_12c_4V01], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]$")>; 1532 1533// Reverse, vector 1534def : InstRW<[V1Write_2c_1V01], (instregex "^REV_ZZ_[BHSD]$", 1535 "^REVB_ZPmZ_[HSD]$", 1536 "^REVH_ZPmZ_[SD]$", 1537 "^REVW_ZPmZ_D$")>; 1538 1539// Select, vector form 1540// Table lookup 1541// Table lookup extension 1542// Transpose, vector form 1543// Unpack and extend 1544// Zip/unzip 1545def : InstRW<[V1Write_2c_1V01], (instregex "^SEL_ZPZZ_[BHSD]$", 1546 "^TB[LX]_ZZZ_[BHSD]$", 1547 "^TRN[12]_ZZZ_[BHSDQ]$", 1548 "^[SU]UNPK(HI|LO)_ZZ_[HSD]$", 1549 "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1550 1551 1552// SVE floating-point instructions 1553// ----------------------------------------------------------------------------- 1554 1555// Floating point absolute value/difference 1556def : InstRW<[V1Write_2c_1V01], (instregex "^FAB[SD]_ZPmZ_[HSD]", 1557 "^FABD_ZPZZ_[HSD]", 1558 "^FABS_ZPmZ_[HSD]")>; 1559 1560// Floating point arithmetic 1561def : InstRW<[V1Write_2c_1V01], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", 1562 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", 1563 "^FADDP_ZPmZZ_[HSD]", 1564 "^FNEG_ZPmZ_[HSD]", 1565 "^FSUBR_ZPm[IZ]_[HSD]", 1566 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; 1567 1568// Floating point associative add, F16 1569def : InstRW<[V1Write_19c_18V0], (instrs FADDA_VPZ_H)>; 1570 1571// Floating point associative add, F32 1572def : InstRW<[V1Write_11c_10V0], (instrs FADDA_VPZ_S)>; 1573 1574// Floating point associative add, F64 1575def : InstRW<[V1Write_8c_3V01], (instrs FADDA_VPZ_D)>; 1576 1577// Floating point compare 1578def : InstRW<[V1Write_2c_1V0], (instregex "^FAC(GE|GT)_PPzZZ_[HSD]$", 1579 "^FCM(EQ|GE|GT|NE|UO)_PPzZZ_[HSD]$", 1580 "^FCM(EQ|GE|GT|LE|LT|NE)_PPzZ0_[HSD]$")>; 1581 1582// Floating point complex add 1583def : InstRW<[V1Write_3c_1V01], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1584 1585// Floating point complex multiply add 1586def : InstRW<[V1Write_5c_1V01], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1587 "^FCMLA_ZZZI_[HS]$")>; 1588 1589// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1590// Floating point convert to integer, F32 1591def : InstRW<[V1Write_4c_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", 1592 "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; 1593 1594// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 or F64 to F16) 1595// Floating point convert to integer, F64 1596def : InstRW<[V1Write_3c_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", 1597 "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; 1598 1599// Floating point convert to integer, F16 1600def : InstRW<[V1Write_6c_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; 1601 1602// Floating point copy 1603def : InstRW<[V1Write_2c_1V01], (instregex "^FCPY_ZPmI_[HSD]$", 1604 "^FDUP_ZI_[HSD]$")>; 1605 1606// Floating point divide, F16 1607def : InstRW<[V1Write_13c10_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; 1608 1609// Floating point divide, F32 1610def : InstRW<[V1Write_10c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; 1611 1612// Floating point divide, F64 1613def : InstRW<[V1Write_15c7_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; 1614 1615// Floating point min/max 1616def : InstRW<[V1Write_2c_1V01], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", 1617 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; 1618 1619// Floating point multiply 1620def : InstRW<[V1Write_3c_1V01], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", 1621 "^FMULX_ZPZZ_[HSD]", 1622 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", 1623 "^FMUL_ZPZ[IZ]_[HSD]")>; 1624 1625// Floating point multiply accumulate 1626// Floating point reciprocal step 1627def : InstRW<[V1Write_4c_1V01], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", 1628 "^FN?ML[AS]_ZPZZZ_[HSD]", 1629 "^FML[AS]_ZZZI_[HSD]$", 1630 "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 1631 1632// Floating point reciprocal estimate, F16 1633def : InstRW<[V1Write_6c_4V0], (instrs FRECPE_ZZ_H, FRSQRTE_ZZ_H)>; 1634 1635// Floating point reciprocal estimate, F32 1636def : InstRW<[V1Write_4c_2V0], (instrs FRECPE_ZZ_S, FRSQRTE_ZZ_S)>; 1637 1638// Floating point reciprocal estimate, F64 1639def : InstRW<[V1Write_3c_1V0], (instrs FRECPE_ZZ_D, FRSQRTE_ZZ_D)>; 1640 1641// Floating point reciprocal exponent 1642def : InstRW<[V1Write_3c_1V0], (instregex "^FRECPX_ZPmZ_[HSD]")>; 1643 1644// Floating point reduction, F16 1645def : InstRW<[V1Write_13c_6V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_H$")>; 1646 1647// Floating point reduction, F32 1648def : InstRW<[V1Write_11c_1V_5V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_S$")>; 1649 1650// Floating point reduction, F64 1651def : InstRW<[V1Write_9c_1V_4V01], (instregex "^F(ADD|((MAX|MIN)(NM)?))V_VPZ_D$")>; 1652 1653// Floating point round to integral, F16 1654def : InstRW<[V1Write_6c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; 1655 1656// Floating point round to integral, F32 1657def : InstRW<[V1Write_4c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; 1658 1659// Floating point round to integral, F64 1660def : InstRW<[V1Write_3c_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; 1661 1662// Floating point square root, F16 1663def : InstRW<[V1Write_13c10_1V0], (instregex "^FSQRT_ZPmZ_H")>; 1664 1665// Floating point square root, F32 1666def : InstRW<[V1Write_10c7_1V0], (instregex "^FSQRT_ZPmZ_S")>; 1667 1668// Floating point square root, F64 1669def : InstRW<[V1Write_16c7_1V0], (instregex "^FSQRT_ZPmZ_D")>; 1670 1671// Floating point trigonometric 1672def : InstRW<[V1Write_3c_1V01], (instregex "^FEXPA_ZZ_[HSD]$", 1673 "^FTMAD_ZZI_[HSD]$", 1674 "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 1675 1676 1677// SVE BFloat16 (BF16) instructions 1678// ----------------------------------------------------------------------------- 1679 1680// Convert, F32 to BF16 1681def : InstRW<[V1Write_4c_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 1682 1683// Dot product 1684def : InstRW<[V1Write_4c_1V01], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 1685 1686// Matrix multiply accumulate 1687def : InstRW<[V1Write_5c_1V01], (instrs BFMMLA_ZZZ)>; 1688 1689// Multiply accumulate long 1690def : InstRW<[V1Write_5c_1V01], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 1691 1692 1693// SVE Load instructions 1694// ----------------------------------------------------------------------------- 1695 1696// Load vector 1697def : InstRW<[V1Write_6c_1L01], (instrs LDR_ZXI)>; 1698 1699// Load predicate 1700def : InstRW<[V1Write_6c_1L_1M], (instrs LDR_PXI)>; 1701 1702// Contiguous load, scalar + imm 1703// Contiguous load, scalar + scalar 1704// Contiguous load broadcast, scalar + imm 1705// Contiguous load broadcast, scalar + scalar 1706def : InstRW<[V1Write_6c_1L01], (instregex "^LD1[BHWD]_IMM$", 1707 "^LD1S?B_[HSD]_IMM$", 1708 "^LD1S?H_[SD]_IMM$", 1709 "^LD1S?W_D_IMM$", 1710 "^LD1[BWD]$", 1711 "^LD1S?B_[HSD]$", 1712 "^LD1S?W_D$", 1713 "^LD1R[BHWD]_IMM$", 1714 "^LD1RSW_IMM$", 1715 "^LD1RS?B_[HSD]_IMM$", 1716 "^LD1RS?H_[SD]_IMM$", 1717 "^LD1RS?W_D_IMM$", 1718 "^LD1RQ_[BHWD]_IMM$", 1719 "^LD1RQ_[BWD]$")>; 1720def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LD1H$", 1721 "^LD1S?H_[SD]$", 1722 "^LD1RQ_H$")>; 1723 1724// Non temporal load, scalar + imm 1725def : InstRW<[V1Write_6c_1L01], (instregex "^LDNT1[BHWD]_ZRI$")>; 1726 1727// Non temporal load, scalar + scalar 1728def : InstRW<[V1Write_7c_1L01_1S], (instrs LDNT1H_ZRR)>; 1729def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDNT1[BWD]_ZRR$")>; 1730 1731// Contiguous first faulting load, scalar + scalar 1732def : InstRW<[V1Write_7c_1L01_1S], (instregex "^LDFF1H$", 1733 "^LDFF1S?H_[SD]$")>; 1734def : InstRW<[V1Write_6c_1L01_1S], (instregex "^LDFF1[BWD]$", 1735 "^LDFF1S?B_[HSD]$", 1736 "^LDFF1S?W_D$")>; 1737 1738// Contiguous non faulting load, scalar + imm 1739def : InstRW<[V1Write_6c_1L01], (instregex "^LDNF1[BHWD]_IMM$", 1740 "^LDNF1S?B_[HSD]_IMM$", 1741 "^LDNF1S?H_[SD]_IMM$", 1742 "^LDNF1S?W_D_IMM$")>; 1743 1744// Contiguous Load two structures to two vectors, scalar + imm 1745def : InstRW<[V1Write_8c_2L01_2V01], (instregex "^LD2[BHWD]_IMM$")>; 1746 1747// Contiguous Load two structures to two vectors, scalar + scalar 1748def : InstRW<[V1Write_10c_2L01_2V01], (instrs LD2H)>; 1749def : InstRW<[V1Write_9c_2L01_2V01], (instregex "^LD2[BWD]$")>; 1750 1751// Contiguous Load three structures to three vectors, scalar + imm 1752def : InstRW<[V1Write_11c_3L01_3V01], (instregex "^LD3[BHWD]_IMM$")>; 1753 1754// Contiguous Load three structures to three vectors, scalar + scalar 1755def : InstRW<[V1Write_13c_3L01_1S_3V01], (instregex "^LD3[BHWD]$")>; 1756 1757// Contiguous Load four structures to four vectors, scalar + imm 1758def : InstRW<[V1Write_12c_4L01_4V01], (instregex "^LD4[BHWD]_IMM$")>; 1759 1760// Contiguous Load four structures to four vectors, scalar + scalar 1761def : InstRW<[V1Write_13c_4L01_2S_4V01], (instregex "^LD4[BHWD]$")>; 1762 1763// Gather load, vector + imm, 32-bit element size 1764def : InstRW<[V1Write_11c_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", 1765 "^GLD(FF)?1W_IMM$")>; 1766 1767// Gather load, vector + imm, 64-bit element size 1768def : InstRW<[V1Write_9c_2L_2V], 1769 (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", 1770 "^GLD(FF)?1S?[BHW]_D(_[SU]XTW)?(_SCALED)?$", 1771 "^GLD(FF)?1D_IMM$", 1772 "^GLD(FF)?1D(_[SU]XTW)?(_SCALED)?$")>; 1773 1774// Gather load, 32-bit scaled offset 1775def : InstRW<[V1Write_11c_2L_2V], 1776 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", 1777 "^GLD(FF)?1W_[SU]XTW_SCALED")>; 1778 1779// Gather load, 32-bit unpacked unscaled offset 1780def : InstRW<[V1Write_9c_1L_1V], 1781 (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", 1782 "^GLD(FF)?1W_[SU]XTW$")>; 1783 1784// Prefetch 1785// NOTE: This is not specified in the SOG. 1786def : InstRW<[V1Write_4c_1L01], (instregex "^PRF[BHWD]")>; 1787 1788 1789// SVE Store instructions 1790// ----------------------------------------------------------------------------- 1791 1792// Store from predicate reg 1793def : InstRW<[V1Write_1c_1L01], (instrs STR_PXI)>; 1794 1795// Store from vector reg 1796def : InstRW<[V1Write_2c_1L01_1V], (instrs STR_ZXI)>; 1797 1798// Contiguous store, scalar + imm 1799// Contiguous store, scalar + scalar 1800def : InstRW<[V1Write_2c_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 1801 "^ST1B_[HSD]_IMM$", 1802 "^ST1H_[SD]_IMM$", 1803 "^ST1W_D_IMM$", 1804 "^ST1[BWD]$", 1805 "^ST1B_[HSD]$", 1806 "^ST1W_D$")>; 1807def : InstRW<[V1Write_2c_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 1808 1809// Contiguous store two structures from two vectors, scalar + imm 1810// Contiguous store two structures from two vectors, scalar + scalar 1811def : InstRW<[V1Write_4c_1L01_1V], (instregex "^ST2[BHWD]_IMM$", 1812 "^ST2[BWD]$")>; 1813def : InstRW<[V1Write_4c_1L01_1S_1V], (instrs ST2H)>; 1814 1815// Contiguous store three structures from three vectors, scalar + imm 1816def : InstRW<[V1Write_7c_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 1817 1818// Contiguous store three structures from three vectors, scalar + scalar 1819def : InstRW<[V1Write_7c_5L01_5S_5V], (instregex "^ST3[BHWD]$")>; 1820 1821// Contiguous store four structures from four vectors, scalar + imm 1822def : InstRW<[V1Write_11c_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 1823 1824// Contiguous store four structures from four vectors, scalar + scalar 1825def : InstRW<[V1Write_11c_9L01_9S_9V], (instregex "^ST4[BHWD]$")>; 1826 1827// Non temporal store, scalar + imm 1828// Non temporal store, scalar + scalar 1829def : InstRW<[V1Write_2c_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$", 1830 "^STNT1[BWD]_ZRR$")>; 1831def : InstRW<[V1Write_2c_1L01_1S_1V], (instrs STNT1H_ZRR)>; 1832 1833// Scatter store vector + imm 32-bit element size 1834// Scatter store, 32-bit scaled offset 1835// Scatter store, 32-bit unscaled offset 1836def : InstRW<[V1Write_10c_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 1837 "^SST1W_IMM$", 1838 "^SST1(H_S|W)_[SU]XTW_SCALED$", 1839 "^SST1[BH]_S_[SU]XTW$", 1840 "^SST1W_[SU]XTW$")>; 1841 1842// Scatter store, 32-bit unpacked unscaled offset 1843// Scatter store, 32-bit unpacked scaled offset 1844def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 1845 "^SST1D_[SU]XTW$", 1846 "^SST1[HW]_D_[SU]XTW_SCALED$", 1847 "^SST1D_[SU]XTW_SCALED$")>; 1848 1849// Scatter store vector + imm 64-bit element size 1850// Scatter store, 64-bit scaled offset 1851// Scatter store, 64-bit unscaled offset 1852def : InstRW<[V1Write_6c_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 1853 "^SST1D_IMM$", 1854 "^SST1[HW]_D_SCALED$", 1855 "^SST1D_SCALED$", 1856 "^SST1[BHW]_D$", 1857 "^SST1D$")>; 1858 1859 1860// SVE Miscellaneous instructions 1861// ----------------------------------------------------------------------------- 1862 1863// Read first fault register, unpredicated 1864// Set first fault register 1865// Write to first fault register 1866def : InstRW<[V1Write_2c_1M0], (instrs RDFFR_P, 1867 SETFFR, 1868 WRFFR)>; 1869 1870// Read first fault register, predicated 1871def : InstRW<[V1Write_3c_2M0], (instrs RDFFR_PPz)>; 1872 1873// Read first fault register and set flags 1874def : InstRW<[V1Write_4c_1M], (instrs RDFFRS_PPz)>; 1875 1876 1877} 1878