1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V2 processors. 10// All information is taken from the V2 Software Optimisation guide: 11// 12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2 13// 14//===----------------------------------------------------------------------===// 15 16def NeoverseV2Model : SchedMachineModel { 17 let IssueWidth = 16; // Micro-ops dispatched at a time. 18 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2. 19 let LoadLatency = 4; // Optimistic load latency. 20 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2. 21 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 22 let CompleteModel = 1; 23 24 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, 25 [HasSVE2p1]); 26} 27 28//===----------------------------------------------------------------------===// 29// Define each kind of processor resource and number available on Neoverse V2. 30// Instructions are first fetched and then decoded into internal macro-ops 31// (MOPs). From there, the MOPs proceed through register renaming and dispatch 32// stages. A MOP can be split into two micro-ops further down the pipeline 33// after the decode stage. Once dispatched, micro-ops wait for their operands 34// and issue out-of-order to one of seventeen issue pipelines. Each issue 35// pipeline can accept one micro-op per cycle. 36 37let SchedModel = NeoverseV2Model in { 38 39// Define the (17) issue ports. 40def V2UnitB : ProcResource<2>; // Branch 0/1 41def V2UnitS0 : ProcResource<1>; // Integer single-cycle 0 42def V2UnitS1 : ProcResource<1>; // Integer single-cycle 1 43def V2UnitS2 : ProcResource<1>; // Integer single-cycle 2 44def V2UnitS3 : ProcResource<1>; // Integer single-cycle 3 45def V2UnitM0 : ProcResource<1>; // Integer single/multicycle 0 46def V2UnitM1 : ProcResource<1>; // Integer single/multicycle 1 47def V2UnitV0 : ProcResource<1>; // FP/ASIMD 0 48def V2UnitV1 : ProcResource<1>; // FP/ASIMD 1 49def V2UnitV2 : ProcResource<1>; // FP/ASIMD 2 50def V2UnitV3 : ProcResource<1>; // FP/ASIMD 3 51def V2UnitL01 : ProcResource<2>; // Load/Store 0/1 52def V2UnitL2 : ProcResource<1>; // Load 2 53def V2UnitD : ProcResource<2>; // Store data 0/1 54 55def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1 56def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3 57def V2UnitF : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1 and single/multicycle 0/1 58def V2UnitI : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1/2/3 and single/multicycle 0/1 59def V2UnitM : ProcResGroup<[V2UnitM0, V2UnitM1]>; // Integer single/multicycle 0/1 60def V2UnitL : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2 61def V2UnitV : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>; // FP/ASIMD 0/1/2/3 62def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>; // FP/ASIMD 0/1 63def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>; // FP/ASIMD 0/2 64def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>; // FP/ASIMD 1/3 65def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>; // FP/ASIMD 2/3 66 67// Define commonly used read types. 68 69// No forwarding is provided for these types. 70def : ReadAdvance<ReadI, 0>; 71def : ReadAdvance<ReadISReg, 0>; 72def : ReadAdvance<ReadIEReg, 0>; 73def : ReadAdvance<ReadIM, 0>; 74def : ReadAdvance<ReadIMA, 0>; 75def : ReadAdvance<ReadID, 0>; 76def : ReadAdvance<ReadExtrHi, 0>; 77def : ReadAdvance<ReadAdrBase, 0>; 78def : ReadAdvance<ReadST, 0>; 79def : ReadAdvance<ReadVLD, 0>; 80 81// NOTE: Copied from N2. 82def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 83def : WriteRes<WriteBarrier, []> { let Latency = 1; } 84def : WriteRes<WriteHint, []> { let Latency = 1; } 85def : WriteRes<WriteLDHi, []> { let Latency = 4; } 86 87//===----------------------------------------------------------------------===// 88// Define customized scheduler read/write types specific to the Neoverse V2. 89 90//===----------------------------------------------------------------------===// 91// Define generic 1 micro-op types 92 93def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; } 94def V2Write_1cyc_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; } 95def V2Write_1cyc_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; } 96def V2Write_1cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; } 97def V2Write_1cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; } 98def V2Write_1cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 1; } 99def V2Write_2cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 2; } 100def V2Write_3cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 3; } 101def V2Write_2cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 102def V2Write_3cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; } 103def V2Write_5cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 5; } 104def V2Write_12cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 12; 105 let ResourceCycles = [12]; } 106def V2Write_20cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 20; 107 let ResourceCycles = [20]; } 108def V2Write_4cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 4; } 109def V2Write_6cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 6; } 110def V2Write_2cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 2; } 111def V2Write_2cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 2; } 112def V2Write_2cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 2; } 113def V2Write_2cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 2; } 114def V2Write_3cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 115def V2Write_3cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 3; 116 let ResourceCycles = [2]; } 117def V2Write_3cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 3; } 118def V2Write_4cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 119def V2Write_5cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 120def V2Write_6cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 121def V2Write_12cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 12; } 122def V2Write_3cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 3; } 123def V2Write_3cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 3; } 124def V2Write_4cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 4; } 125def V2Write_4cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 126def V2Write_7cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 7; 127 let ResourceCycles = [7]; } 128def V2Write_7cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 7; 129 let ResourceCycles = [2]; } 130def V2Write_9cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 9; } 131def V2Write_9cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 9; 132 let ResourceCycles = [2]; } 133def V2Write_10cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 10; } 134def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10; 135 let ResourceCycles = [2]; } 136def V2Write_12cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 12; 137 let ResourceCycles = [11]; } 138def V2Write_13cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 13; } 139def V2Write_15cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 15; } 140def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15; 141 let ResourceCycles = [8]; } 142def V2Write_16cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 16; } 143def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16; 144 let ResourceCycles = [8]; } 145def V2Write_20cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20; 146 let ResourceCycles = [20]; } 147def V2Write_2cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; } 148def V2Write_2cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; } 149def V2Write_3cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; } 150def V2Write_4cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; } 151def V2Write_4cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 152def V2Write_6cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; } 153def V2Write_10cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 10; } 154def V2Write_6cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 6; } 155 156//===----------------------------------------------------------------------===// 157// Define generic 2 micro-op types 158 159def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> { 160 let Latency = 1; 161 let NumMicroOps = 2; 162} 163 164def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> { 165 let Latency = 6; 166 let NumMicroOps = 2; 167} 168 169def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> { 170 let Latency = 9; 171 let NumMicroOps = 2; 172} 173 174def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> { 175 let Latency = 3; 176 let NumMicroOps = 2; 177} 178 179def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 180 let Latency = 1; 181 let NumMicroOps = 2; 182} 183 184def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 185 let Latency = 3; 186 let NumMicroOps = 2; 187} 188 189def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 190 let Latency = 4; 191 let NumMicroOps = 2; 192} 193 194def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> { 195 let Latency = 5; 196 let NumMicroOps = 2; 197} 198 199def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> { 200 let Latency = 6; 201 let NumMicroOps = 2; 202} 203 204def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> { 205 let Latency = 7; 206 let NumMicroOps = 2; 207} 208 209def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> { 210 let Latency = 7; 211 let NumMicroOps = 2; 212} 213 214def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> { 215 let Latency = 1; 216 let NumMicroOps = 2; 217} 218 219def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> { 220 let Latency = 5; 221 let NumMicroOps = 2; 222} 223 224def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> { 225 let Latency = 2; 226 let NumMicroOps = 2; 227} 228 229def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> { 230 let Latency = 2; 231 let NumMicroOps = 2; 232} 233 234def V2Write_2cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> { 235 let Latency = 2; 236 let NumMicroOps = 2; 237} 238 239def V2Write_4cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> { 240 let Latency = 4; 241 let NumMicroOps = 2; 242} 243 244def V2Write_4cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> { 245 let Latency = 4; 246 let NumMicroOps = 2; 247} 248 249def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> { 250 let Latency = 4; 251 let NumMicroOps = 2; 252} 253 254def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> { 255 let Latency = 4; 256 let NumMicroOps = 2; 257} 258 259def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> { 260 let Latency = 4; 261 let NumMicroOps = 2; 262} 263 264def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> { 265 let Latency = 4; 266 let NumMicroOps = 2; 267} 268 269def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> { 270 let Latency = 6; 271 let NumMicroOps = 2; 272} 273 274def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> { 275 let Latency = 6; 276 let NumMicroOps = 2; 277} 278 279def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> { 280 let Latency = 8; 281 let NumMicroOps = 2; 282} 283 284def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> { 285 let Latency = 4; 286 let NumMicroOps = 2; 287} 288 289def V2Write_3cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 290 let Latency = 3; 291 let NumMicroOps = 2; 292} 293 294def V2Write_4cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 295 let Latency = 4; 296 let NumMicroOps = 2; 297} 298 299def V2Write_1cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 300 let Latency = 1; 301 let NumMicroOps = 2; 302} 303 304def V2Write_2cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 305 let Latency = 2; 306 let NumMicroOps = 2; 307} 308 309def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> { 310 let Latency = 6; 311 let NumMicroOps = 2; 312} 313 314def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> { 315 let Latency = 4; 316 let NumMicroOps = 2; 317} 318 319def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> { 320 let Latency = 5; 321 let NumMicroOps = 2; 322} 323 324def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> { 325 let Latency = 5; 326 let NumMicroOps = 2; 327} 328 329def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> { 330 let Latency = 5; 331 let NumMicroOps = 2; 332} 333 334def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> { 335 let Latency = 6; 336 let NumMicroOps = 2; 337} 338 339def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> { 340 let Latency = 7; 341 let NumMicroOps = 2; 342} 343 344def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> { 345 let Latency = 2; 346 let NumMicroOps = 2; 347} 348 349def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> { 350 let Latency = 3; 351 let NumMicroOps = 2; 352} 353 354def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> { 355 let Latency = 6; 356 let NumMicroOps = 2; 357} 358 359def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> { 360 let Latency = 6; 361 let NumMicroOps = 2; 362} 363 364def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> { 365 let Latency = 6; 366 let NumMicroOps = 2; 367} 368 369def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> { 370 let Latency = 4; 371 let NumMicroOps = 2; 372} 373 374def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> { 375 let Latency = 8; 376 let NumMicroOps = 2; 377} 378 379//===----------------------------------------------------------------------===// 380// Define generic 3 micro-op types 381 382def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> { 383 let Latency = 1; 384 let NumMicroOps = 3; 385} 386 387def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> { 388 let Latency = 2; 389 let NumMicroOps = 3; 390} 391 392def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> { 393 let Latency = 2; 394 let NumMicroOps = 3; 395} 396 397def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> { 398 let Latency = 4; 399 let NumMicroOps = 3; 400} 401 402def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> { 403 let Latency = 9; 404 let NumMicroOps = 3; 405} 406 407def V2Write_4cyc_3V01 : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> { 408 let Latency = 4; 409 let NumMicroOps = 3; 410} 411 412def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> { 413 let Latency = 7; 414 let NumMicroOps = 3; 415} 416 417def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> { 418 let Latency = 2; 419 let NumMicroOps = 3; 420} 421 422def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> { 423 let Latency = 2; 424 let NumMicroOps = 3; 425} 426 427def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> { 428 let Latency = 6; 429 let NumMicroOps = 3; 430} 431 432def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> { 433 let Latency = 6; 434 let NumMicroOps = 3; 435} 436 437def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> { 438 let Latency = 8; 439 let NumMicroOps = 3; 440} 441 442//===----------------------------------------------------------------------===// 443// Define generic 4 micro-op types 444 445def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 446 V2UnitI]> { 447 let Latency = 2; 448 let NumMicroOps = 4; 449} 450 451def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 452 V2UnitV01, V2UnitV01]> { 453 let Latency = 2; 454 let NumMicroOps = 4; 455} 456 457def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 458 V2UnitV01, V2UnitV01]> { 459 let Latency = 4; 460 let NumMicroOps = 4; 461} 462 463def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> { 464 let Latency = 5; 465 let NumMicroOps = 4; 466} 467 468def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1, 469 V2UnitV1]> { 470 let Latency = 9; 471 let NumMicroOps = 4; 472} 473 474def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> { 475 let Latency = 6; 476 let NumMicroOps = 4; 477} 478 479def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 480 let Latency = 8; 481 let NumMicroOps = 4; 482} 483 484def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 485 V2UnitV13]> { 486 let Latency = 6; 487 let NumMicroOps = 4; 488} 489 490def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 491 V2UnitV13]> { 492 let Latency = 8; 493 let NumMicroOps = 4; 494} 495 496def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02, 497 V2UnitV02]> { 498 let Latency = 6; 499 let NumMicroOps = 4; 500} 501 502def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 503 let Latency = 6; 504 let NumMicroOps = 4; 505} 506 507def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> { 508 let Latency = 8; 509 let NumMicroOps = 4; 510} 511 512def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> { 513 let Latency = 9; 514 let NumMicroOps = 4; 515} 516 517def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV, 518 V2UnitV]> { 519 let Latency = 2; 520 let NumMicroOps = 4; 521} 522 523def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV, 524 V2UnitV]> { 525 let Latency = 4; 526 let NumMicroOps = 4; 527} 528 529def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02, 530 V2UnitV02]> { 531 let Latency = 8; 532 let NumMicroOps = 4; 533} 534 535def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1, 536 V2UnitV1]> { 537 let Latency = 8; 538 let NumMicroOps = 4; 539} 540 541def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM, 542 V2UnitM]> { 543 let Latency = 4; 544 let NumMicroOps = 4; 545} 546 547def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM, 548 V2UnitM]> { 549 let Latency = 5; 550 let NumMicroOps = 4; 551} 552 553def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> { 554 let Latency = 6; 555 let NumMicroOps = 4; 556} 557 558def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> { 559 let Latency = 7; 560 let NumMicroOps = 4; 561} 562 563def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 564 V2UnitV01]> { 565 let Latency = 6; 566 let NumMicroOps = 4; 567} 568 569//===----------------------------------------------------------------------===// 570// Define generic 5 micro-op types 571 572def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 573 V2UnitI, V2UnitI]> { 574 let Latency = 2; 575 let NumMicroOps = 5; 576} 577 578def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV, 579 V2UnitV]> { 580 let Latency = 8; 581 let NumMicroOps = 5; 582} 583 584def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 585 V2UnitV]> { 586 let Latency = 9; 587 let NumMicroOps = 5; 588} 589 590def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 591 V2UnitV]> { 592 let Latency = 10; 593 let NumMicroOps = 5; 594} 595 596def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV, 597 V2UnitV]> { 598 let Latency = 6; 599 let NumMicroOps = 5; 600} 601 602//===----------------------------------------------------------------------===// 603// Define generic 6 micro-op types 604 605def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 606 V2UnitV, V2UnitV, V2UnitV]> { 607 let Latency = 8; 608 let NumMicroOps = 6; 609} 610 611def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 612 V2UnitV, V2UnitV, V2UnitV]> { 613 let Latency = 9; 614 let NumMicroOps = 6; 615} 616 617def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, 618 V2UnitV, V2UnitV, V2UnitV]> { 619 let Latency = 9; 620 let NumMicroOps = 6; 621} 622 623def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, 624 V2UnitV, V2UnitS, V2UnitS]> { 625 let Latency = 9; 626 let NumMicroOps = 6; 627} 628 629def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 630 V2UnitV13, V2UnitV13, V2UnitV13]> { 631 let Latency = 9; 632 let NumMicroOps = 6; 633} 634 635def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 636 V2UnitV, V2UnitV, V2UnitV]> { 637 let Latency = 2; 638 let NumMicroOps = 6; 639} 640 641def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 642 V2UnitV01, V2UnitV01, V2UnitV01]> { 643 let Latency = 4; 644 let NumMicroOps = 6; 645} 646 647def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 648 V2UnitV01, V2UnitV01, V2UnitV01]> { 649 let Latency = 5; 650 let NumMicroOps = 6; 651} 652 653def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 654 V2UnitV01, V2UnitV01, V2UnitV01]> { 655 let Latency = 2; 656 let NumMicroOps = 6; 657} 658 659def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS, 660 V2UnitS, V2UnitV01, V2UnitV01]> { 661 let Latency = 4; 662 let NumMicroOps = 6; 663} 664 665//===----------------------------------------------------------------------===// 666// Define generic 7 micro-op types 667 668def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 669 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 670 let Latency = 8; 671 let NumMicroOps = 7; 672} 673 674//===----------------------------------------------------------------------===// 675// Define generic 8 micro-op types 676 677def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 678 V2UnitL01, V2UnitV, V2UnitV, V2UnitV, 679 V2UnitV]> { 680 let Latency = 2; 681 let NumMicroOps = 8; 682} 683 684def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 685 V2UnitL01, V2UnitV01, V2UnitV01, 686 V2UnitV01, V2UnitV01]> { 687 let Latency = 2; 688 let NumMicroOps = 8; 689} 690 691def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 692 V2UnitL01, V2UnitV01, V2UnitV01, 693 V2UnitV01, V2UnitV01]> { 694 let Latency = 4; 695 let NumMicroOps = 8; 696} 697 698def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 699 V2UnitV01, V2UnitV01, V2UnitV01, 700 V2UnitV01, V2UnitV01]> { 701 let Latency = 6; 702 let NumMicroOps = 8; 703} 704 705def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL, 706 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 707 let Latency = 8; 708 let NumMicroOps = 8; 709} 710 711//===----------------------------------------------------------------------===// 712// Define generic 9 micro-op types 713 714def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 715 V2UnitV01, V2UnitV01, V2UnitV01, 716 V2UnitV01, V2UnitV01, V2UnitV01]> { 717 let Latency = 6; 718 let NumMicroOps = 9; 719} 720 721def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 722 V2UnitV, V2UnitV, V2UnitV, V2UnitV, 723 V2UnitV]> { 724 let Latency = 10; 725 let NumMicroOps = 9; 726} 727 728def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, 729 V2UnitL, V2UnitL, V2UnitL, 730 V2UnitS, V2UnitS, V2UnitS]> { 731 let Latency = 10; 732 let NumMicroOps = 9; 733} 734 735//===----------------------------------------------------------------------===// 736// Define generic 10 micro-op types 737 738def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL, 739 V2UnitL, V2UnitL, V2UnitV, V2UnitV, 740 V2UnitV, V2UnitV]> { 741 let Latency = 9; 742 let NumMicroOps = 10; 743} 744 745//===----------------------------------------------------------------------===// 746// Define generic 12 micro-op types 747 748def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 749 V2UnitL01, V2UnitV01, V2UnitV01, 750 V2UnitV01, V2UnitV01, V2UnitV01, 751 V2UnitV01, V2UnitV01, V2UnitV01]> { 752 let Latency = 5; 753 let NumMicroOps = 12; 754} 755 756def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 757 V2UnitL, V2UnitV, V2UnitV, 758 V2UnitV, V2UnitV, V2UnitV, 759 V2UnitV, V2UnitV, V2UnitV]> { 760 let Latency = 9; 761 let NumMicroOps = 12; 762} 763 764def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 765 V2UnitL, V2UnitV, V2UnitV, 766 V2UnitV, V2UnitV, V2UnitV, 767 V2UnitV, V2UnitV, V2UnitV]> { 768 let Latency = 10; 769 let NumMicroOps = 12; 770} 771 772//===----------------------------------------------------------------------===// 773// Define generic 16 micro-op types 774 775def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 776 V2UnitL01, V2UnitV01, V2UnitV01, 777 V2UnitV01, V2UnitV01, V2UnitV01, 778 V2UnitV01, V2UnitV01, V2UnitV01, 779 V2UnitV01, V2UnitV01, V2UnitV01, 780 V2UnitV01]> { 781 let Latency = 7; 782 let NumMicroOps = 16; 783} 784 785def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 786 V2UnitL, V2UnitV, V2UnitV, 787 V2UnitV, V2UnitV, V2UnitV, 788 V2UnitV, V2UnitV, V2UnitV, 789 V2UnitS, V2UnitS, V2UnitS, 790 V2UnitS]> { 791 let Latency = 10; 792 let NumMicroOps = 16; 793} 794 795//===----------------------------------------------------------------------===// 796// Define generic 18 micro-op types 797 798def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 799 V2UnitL01, V2UnitL01, V2UnitL01, 800 V2UnitL01, V2UnitL01, V2UnitL01, 801 V2UnitV01, V2UnitV01, V2UnitV01, 802 V2UnitV01, V2UnitV01, V2UnitV01, 803 V2UnitV01, V2UnitV01, V2UnitV01]> { 804 let Latency = 7; 805 let NumMicroOps = 18; 806} 807 808//===----------------------------------------------------------------------===// 809// Define generic 27 micro-op types 810 811def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 812 V2UnitL01, V2UnitL01, V2UnitL01, 813 V2UnitL01, V2UnitL01, V2UnitL01, 814 V2UnitS, V2UnitS, V2UnitS, 815 V2UnitS, V2UnitS, V2UnitS, 816 V2UnitS, V2UnitS, V2UnitS, 817 V2UnitV01, V2UnitV01, V2UnitV01, 818 V2UnitV01, V2UnitV01, V2UnitV01, 819 V2UnitV01, V2UnitV01, 820 V2UnitV01]> { 821 let Latency = 7; 822 let NumMicroOps = 27; 823} 824 825//===----------------------------------------------------------------------===// 826// Define generic 36 micro-op types 827 828def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 829 V2UnitL01, V2UnitL01, V2UnitL01, 830 V2UnitL01, V2UnitL01, V2UnitL01, 831 V2UnitL01, V2UnitL01, V2UnitL01, 832 V2UnitL01, V2UnitL01, V2UnitL01, 833 V2UnitL01, V2UnitL01, V2UnitL01, 834 V2UnitV01, V2UnitV01, V2UnitV01, 835 V2UnitV01, V2UnitV01, V2UnitV01, 836 V2UnitV01, V2UnitV01, V2UnitV01, 837 V2UnitV01, V2UnitV01, V2UnitV01, 838 V2UnitV01, V2UnitV01, V2UnitV01, 839 V2UnitV01, V2UnitV01, 840 V2UnitV01]> { 841 let Latency = 11; 842 let NumMicroOps = 36; 843} 844 845//===----------------------------------------------------------------------===// 846// Define generic 54 micro-op types 847 848def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 849 V2UnitL01, V2UnitL01, 850 V2UnitL01, V2UnitL01, 851 V2UnitL01, V2UnitL01, 852 V2UnitL01, V2UnitL01, 853 V2UnitL01, V2UnitL01, 854 V2UnitL01, V2UnitL01, 855 V2UnitL01, V2UnitL01, 856 V2UnitL01, V2UnitL01, 857 V2UnitS, V2UnitS, V2UnitS, 858 V2UnitS, V2UnitS, V2UnitS, 859 V2UnitS, V2UnitS, V2UnitS, 860 V2UnitS, V2UnitS, V2UnitS, 861 V2UnitS, V2UnitS, V2UnitS, 862 V2UnitS, V2UnitS, V2UnitS, 863 V2UnitV01, V2UnitV01, 864 V2UnitV01, V2UnitV01, 865 V2UnitV01, V2UnitV01, 866 V2UnitV01, V2UnitV01, 867 V2UnitV01, V2UnitV01, 868 V2UnitV01, V2UnitV01, 869 V2UnitV01, V2UnitV01, 870 V2UnitV01, V2UnitV01, 871 V2UnitV01, V2UnitV01]> { 872 let Latency = 11; 873 let NumMicroOps = 54; 874} 875 876//===----------------------------------------------------------------------===// 877// Define predicate-controlled types 878 879def V2Write_ArithI : SchedWriteVariant<[ 880 SchedVar<IsCheapLSL, [V2Write_1cyc_1I]>, 881 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 882 883def V2Write_ArithF : SchedWriteVariant<[ 884 SchedVar<IsCheapLSL, [V2Write_1cyc_1F]>, 885 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 886 887def V2Write_Logical : SchedWriteVariant<[ 888 SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>, 889 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 890 891def V2Write_Extr : SchedWriteVariant<[ 892 SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>, 893 SchedVar<NoSchedPred, [V2Write_3cyc_1I_1M]>]>; 894 895def V2Write_LdrHQ : SchedWriteVariant<[ 896 SchedVar<NeoverseHQForm, [V2Write_7cyc_1I_1L]>, 897 SchedVar<NoSchedPred, [V2Write_6cyc_1L]>]>; 898 899def V2Write_StrHQ : SchedWriteVariant<[ 900 SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>, 901 SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>; 902 903def V2Write_2or3cyc_1M : SchedWriteVariant<[ 904 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>, 905 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 906 907def V2Write_3or4cyc_2M : SchedWriteVariant<[ 908 SchedVar<NeoversePdIsPg, [V2Write_4cyc_2M]>, 909 SchedVar<NoSchedPred, [V2Write_3cyc_2M]>]>; 910 911def V2Write_1or2cyc_1M0 : SchedWriteVariant<[ 912 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0]>, 913 SchedVar<NoSchedPred, [V2Write_1cyc_1M0]>]>; 914 915def V2Write_2or3cyc_1M0 : SchedWriteVariant<[ 916 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M0]>, 917 SchedVar<NoSchedPred, [V2Write_2cyc_1M0]>]>; 918 919def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[ 920 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0_1M]>, 921 SchedVar<NoSchedPred, [V2Write_1cyc_1M0_1M]>]>; 922 923def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[ 924 SchedVar<NeoversePdIsPg, [V2Write_4cyc_1M0_1M]>, 925 SchedVar<NoSchedPred, [V2Write_3cyc_1M0_1M]>]>; 926 927def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[ 928 SchedVar<NeoversePdIsPg, [V2Write_5cyc_2M0_2M]>, 929 SchedVar<NoSchedPred, [V2Write_4cyc_2M0_2M]>]>; 930 931def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[ 932 SchedVar<NeoversePdIsPg, [V2Write_5cyc_1V0_1M0]>, 933 SchedVar<NoSchedPred, [V2Write_4cyc_1V0_1M0]>]>; 934 935def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[ 936 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1V0_1M]>, 937 SchedVar<NoSchedPred, [V2Write_2cyc_1V0_1M]>]>; 938 939def V2Write_IncDec : SchedWriteVariant<[ 940 SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>, 941 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 942 943//===----------------------------------------------------------------------===// 944// Define forwarded types 945 946// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for 947// consumers of 64 bit multiply high operations? 948def V2Wr_IM : SchedWriteRes<[V2UnitM]> { let Latency = 2; } 949def V2Wr_IMA : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 950def V2Wr_IMUL : SchedWriteVariant<[ 951 SchedVar<IsReg3ZeroPred, [V2Wr_IM]>, 952 SchedVar<NoSchedPred, [V2Wr_IMA]>]>; 953def V2Rd_IMA : SchedReadAdvance<1, [V2Wr_IMA]>; 954 955def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 956def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>; 957 958def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 959def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>; 960 961def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 962def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>; 963 964def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 965def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>; 966 967def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 968def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>; 969 970def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; } 971def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>; 972 973def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 974def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>; 975 976def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 977def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>; 978 979def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 980def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>; 981 982def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 983def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>; 984 985def V2Wr_VFM : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 986def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 987def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>; 988 989def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 990def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>; 991 992def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 993def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>; 994def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 995def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>; 996def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 997def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>; 998 999def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 1000def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>; 1001 1002def V2Wr_ZA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1003def V2Rd_ZA : SchedReadAdvance<3, [V2Wr_ZA]>; 1004def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1005def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>; 1006def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1007def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>; 1008 1009def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1010def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>; 1011def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1012def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>; 1013 1014// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce 1015// throughput to 1 in case of forwarding? 1016def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1017def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>; 1018def V2Wr_ZCMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1019def V2Rd_ZCMAD : SchedReadAdvance<2, [V2Wr_ZCMAD]>; 1020 1021def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1022def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>; 1023 1024def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; } 1025def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>; 1026def V2Wr_ZMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1027def V2Rd_ZMAD : SchedReadAdvance<2, [V2Wr_ZMAD]>; 1028 1029def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1030def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>; 1031 1032def V2Wr_ZMASQL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1033def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1034def V2Wr_ZMASQD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1035def V2Rd_ZMASQ : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS, 1036 V2Wr_ZMASQD]>; 1037 1038def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1039def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>; 1040 1041def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1042def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>; 1043 1044def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1045def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>; 1046 1047def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1048def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>; 1049def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 1050def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>; 1051def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1052def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>; 1053 1054//===----------------------------------------------------------------------===// 1055// Define types with long resource cycles (rc) 1056 1057def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ResourceCycles = [ 5]; } 1058def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ResourceCycles = [ 7]; } 1059def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 5]; } 1060def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [ 9]; } 1061def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ResourceCycles = [10]; } 1062def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ResourceCycles = [ 9]; } 1063def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ResourceCycles = [ 9]; } 1064def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ResourceCycles = [12]; } 1065def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [12]; } 1066def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ResourceCycles = [13]; } 1067def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ResourceCycles = [14]; } 1068def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ResourceCycles = [15]; } 1069def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ResourceCycles = [14]; } 1070 1071// Miscellaneous 1072// ----------------------------------------------------------------------------- 1073 1074def : InstRW<[WriteI], (instrs COPY)>; 1075 1076// §3.3 Branch instructions 1077// ----------------------------------------------------------------------------- 1078 1079// Branch, immed 1080// Compare and branch 1081def : SchedAlias<WriteBr, V2Write_1cyc_1B>; 1082 1083// Branch, register 1084def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>; 1085 1086// Branch and link, immed 1087// Branch and link, register 1088def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>; 1089 1090// §3.4 Arithmetic and Logical Instructions 1091// ----------------------------------------------------------------------------- 1092 1093// ALU, basic 1094// ALU, basic, flagset 1095def : SchedAlias<WriteI, V2Write_1cyc_1I>; 1096def : InstRW<[V2Write_1cyc_1F], 1097 (instregex "^(ADC|SBC)S[WX]r$")>; 1098 1099// ALU, extend and shift 1100def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>; 1101 1102// Arithmetic, LSL shift, shift <= 4 1103// Arithmetic, flagset, LSL shift, shift <= 4 1104// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 1105def : SchedAlias<WriteISReg, V2Write_ArithI>; 1106def : InstRW<[V2Write_ArithF], 1107 (instregex "^(ADD|SUB)S[WX]rs$")>; 1108 1109// Arithmetic, immediate to logical address tag 1110def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>; 1111 1112// Convert floating-point condition flags 1113// Flag manipulation instructions 1114def : WriteRes<WriteSys, []> { let Latency = 1; } 1115 1116// Insert Random Tags 1117def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>; 1118 1119// Insert Tag Mask 1120// Subtract Pointer 1121// Subtract Pointer, flagset 1122def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 1123 1124// Logical, shift, no flagset 1125def : InstRW<[V2Write_1cyc_1I], 1126 (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 1127 1128// Logical, shift, flagset 1129def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; 1130 1131// Move and shift instructions 1132// ----------------------------------------------------------------------------- 1133 1134def : SchedAlias<WriteImm, V2Write_1cyc_1I>; 1135 1136// §3.5 Divide and multiply instructions 1137// ----------------------------------------------------------------------------- 1138 1139// SDIV, UDIV 1140def : SchedAlias<WriteID32, V2Write_12cyc_1M0>; 1141def : SchedAlias<WriteID64, V2Write_20cyc_1M0>; 1142 1143def : SchedAlias<WriteIM32, V2Write_2cyc_1M>; 1144def : SchedAlias<WriteIM64, V2Write_2cyc_1M>; 1145 1146// Multiply 1147// Multiply accumulate, W-form 1148// Multiply accumulate, X-form 1149def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA], 1150 (instregex "^M(ADD|SUB)[WX]rrr$")>; 1151 1152// Multiply accumulate long 1153// Multiply long 1154def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA], 1155 (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; 1156 1157// Multiply high 1158def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 1159 1160// Pointer Authentication Instructions (v8.3 PAC) 1161// ----------------------------------------------------------------------------- 1162 1163// Authenticate data address 1164// Authenticate instruction address 1165// Compute pointer authentication code for data address 1166// Compute pointer authentication code, using generic key 1167// Compute pointer authentication code for instruction address 1168def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 1169 1170// Branch and link, register, with pointer authentication 1171// Branch, register, with pointer authentication 1172// Branch, return, with pointer authentication 1173def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 1174 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 1175 ERETAA, ERETAB)>; 1176 1177 1178// Load register, with pointer authentication 1179def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 1180 1181// Strip pointer authentication code 1182def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 1183 1184// Miscellaneous data-processing instructions 1185// ----------------------------------------------------------------------------- 1186 1187// Address generation 1188def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>; 1189 1190// Bitfield extract, one reg 1191// Bitfield extract, two regs 1192def : SchedAlias<WriteExtr, V2Write_Extr>; 1193def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>; 1194 1195// Bitfield move, basic 1196def : SchedAlias<WriteIS, V2Write_1cyc_1I>; 1197 1198// Bitfield move, insert 1199def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 1200 1201// Load instructions 1202// ----------------------------------------------------------------------------- 1203 1204// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3. 1205 1206def : SchedAlias<WriteLD, V2Write_4cyc_1L>; 1207def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>; 1208 1209// Load register, literal 1210def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>; 1211 1212// Load pair, signed immed offset, signed words 1213def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>; 1214 1215// Load pair, immed post-index or immed pre-index, signed words 1216def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi, WriteAdr], 1217 (instregex "^LDPSW(post|pre)$")>; 1218 1219// Store instructions 1220// ----------------------------------------------------------------------------- 1221 1222// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I. 1223 1224def : SchedAlias<WriteST, V2Write_1cyc_1L01_1D>; 1225def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>; 1226def : SchedAlias<WriteSTP, V2Write_1cyc_1L01_1D>; 1227def : SchedAlias<WriteAdr, V2Write_1cyc_1I>; // copied from A57. 1228 1229// Tag load instructions 1230// ----------------------------------------------------------------------------- 1231 1232// Load allocation tag 1233// Load multiple allocation tags 1234def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>; 1235 1236// Tag store instructions 1237// ----------------------------------------------------------------------------- 1238 1239// Store allocation tags to one or two granules, post-index 1240// Store allocation tags to one or two granules, pre-index 1241// Store allocation tag to one or two granules, zeroing, post-index 1242// Store Allocation Tag to one or two granules, zeroing, pre-index 1243// Store allocation tag and reg pair to memory, post-Index 1244// Store allocation tag and reg pair to memory, pre-Index 1245def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 1246 ST2GPreIndex, ST2GPostIndex, 1247 STZGPreIndex, STZGPostIndex, 1248 STZ2GPreIndex, STZ2GPostIndex, 1249 STGPpre, STGPpost)>; 1250 1251// Store allocation tags to one or two granules, signed offset 1252// Store allocation tag to two granules, zeroing, signed offset 1253// Store allocation tag and reg pair to memory, signed offset 1254// Store multiple allocation tags 1255def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi, 1256 STZ2Gi, STGPi, STGM, STZGM)>; 1257 1258// FP data processing instructions 1259// ----------------------------------------------------------------------------- 1260 1261// FP absolute value 1262// FP arithmetic 1263// FP min/max 1264// FP negate 1265// FP select 1266def : SchedAlias<WriteF, V2Write_2cyc_1V>; 1267 1268// FP compare 1269def : SchedAlias<WriteFCmp, V2Write_2cyc_1V0>; 1270 1271// FP divide, square root 1272def : SchedAlias<WriteFDiv, V2Write_7cyc_1V02>; 1273 1274// FP divide, H-form 1275def : InstRW<[V2Write_7cyc_1V02], (instrs FDIVHrr)>; 1276// FP divide, S-form 1277def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>; 1278// FP divide, D-form 1279def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>; 1280 1281// FP square root, H-form 1282def : InstRW<[V2Write_7cyc_1V02], (instrs FSQRTHr)>; 1283// FP square root, S-form 1284def : InstRW<[V2Write_9cyc_1V02], (instrs FSQRTSr)>; 1285// FP square root, D-form 1286def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>; 1287 1288// FP multiply 1289def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; } 1290 1291// FP multiply accumulate 1292def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA], 1293 (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 1294 1295// FP round to integral 1296def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 1297 "^FRINT(32|64)[XZ][SD]r$")>; 1298 1299// FP miscellaneous instructions 1300// ----------------------------------------------------------------------------- 1301 1302// FP convert, from gen to vec reg 1303def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 1304 1305// FP convert, from vec to gen reg 1306def : InstRW<[V2Write_3cyc_1V01], 1307 (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>; 1308 1309// FP convert, Javascript from vec to gen reg 1310def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>; 1311 1312// FP convert, from vec to vec reg 1313def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr, 1314 FCVTHDr, FCVTSDr, FCVTXNv1i64)>; 1315 1316// FP move, immed 1317// FP move, register 1318def : SchedAlias<WriteFImm, V2Write_2cyc_1V>; 1319 1320// FP transfer, from gen to low half of vec reg 1321def : InstRW<[V2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 1322 1323// FP transfer, from gen to high half of vec reg 1324def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 1325 1326// FP transfer, from vec to gen reg 1327def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>; 1328 1329// FP load instructions 1330// ----------------------------------------------------------------------------- 1331 1332// Load vector reg, literal, S/D/Q forms 1333def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>; 1334 1335// Load vector reg, unscaled immed 1336def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>; 1337 1338// Load vector reg, immed post-index 1339// Load vector reg, immed pre-index 1340def : InstRW<[V2Write_6cyc_1I_1L, WriteAdr], 1341 (instregex "^LDR[BHSDQ](pre|post)$")>; 1342 1343// Load vector reg, unsigned immed 1344def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 1345 1346// Load vector reg, register offset, basic 1347// Load vector reg, register offset, scale, S/D-form 1348// Load vector reg, register offset, scale, H/Q-form 1349// Load vector reg, register offset, extend 1350// Load vector reg, register offset, extend, scale, S/D-form 1351// Load vector reg, register offset, extend, scale, H/Q-form 1352def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>; 1353 1354// Load vector pair, immed offset, S/D-form 1355def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 1356 1357// Load vector pair, immed offset, Q-form 1358def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 1359 1360// Load vector pair, immed post-index, S/D-form 1361// Load vector pair, immed pre-index, S/D-form 1362def : InstRW<[V2Write_6cyc_1I_1L, WriteLDHi, WriteAdr], 1363 (instregex "^LDP[SD](pre|post)$")>; 1364 1365// Load vector pair, immed post-index, Q-form 1366// Load vector pair, immed pre-index, Q-form 1367def : InstRW<[V2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost, 1368 LDPQpre)>; 1369 1370// FP store instructions 1371// ----------------------------------------------------------------------------- 1372 1373// Store vector reg, unscaled immed, B/H/S/D-form 1374// Store vector reg, unscaled immed, Q-form 1375def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 1376 1377// Store vector reg, immed post-index, B/H/S/D-form 1378// Store vector reg, immed post-index, Q-form 1379// Store vector reg, immed pre-index, B/H/S/D-form 1380// Store vector reg, immed pre-index, Q-form 1381def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I], 1382 (instregex "^STR[BHSDQ](pre|post)$")>; 1383 1384// Store vector reg, unsigned immed, B/H/S/D-form 1385// Store vector reg, unsigned immed, Q-form 1386def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 1387 1388// Store vector reg, register offset, basic, B/H/S/D-form 1389// Store vector reg, register offset, basic, Q-form 1390// Store vector reg, register offset, scale, H-form 1391// Store vector reg, register offset, scale, S/D-form 1392// Store vector reg, register offset, scale, Q-form 1393// Store vector reg, register offset, extend, B/H/S/D-form 1394// Store vector reg, register offset, extend, Q-form 1395// Store vector reg, register offset, extend, scale, H-form 1396// Store vector reg, register offset, extend, scale, S/D-form 1397// Store vector reg, register offset, extend, scale, Q-form 1398def : InstRW<[V2Write_StrHQ, ReadAdrBase], 1399 (instregex "^STR[BHSDQ]ro[WX]$")>; 1400 1401// Store vector pair, immed offset, S-form 1402// Store vector pair, immed offset, D-form 1403def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>; 1404 1405// Store vector pair, immed offset, Q-form 1406def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>; 1407 1408// Store vector pair, immed post-index, S-form 1409// Store vector pair, immed post-index, D-form 1410// Store vector pair, immed pre-index, S-form 1411// Store vector pair, immed pre-index, D-form 1412def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I], 1413 (instregex "^STP[SD](pre|post)$")>; 1414 1415// Store vector pair, immed post-index, Q-form 1416def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>; 1417 1418// Store vector pair, immed pre-index, Q-form 1419def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>; 1420 1421// ASIMD integer instructions 1422// ----------------------------------------------------------------------------- 1423 1424// ASIMD absolute diff 1425// ASIMD absolute diff long 1426// ASIMD arith, basic 1427// ASIMD arith, complex 1428// ASIMD arith, pair-wise 1429// ASIMD compare 1430// ASIMD logical 1431// ASIMD max/min, basic and pair-wise 1432def : SchedAlias<WriteVd, V2Write_2cyc_1V>; 1433def : SchedAlias<WriteVq, V2Write_2cyc_1V>; 1434 1435// ASIMD absolute diff accum 1436// ASIMD absolute diff accum long 1437def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>; 1438 1439// ASIMD arith, reduce, 4H/4S 1440def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 1441 1442// ASIMD arith, reduce, 8B/8H 1443def : InstRW<[V2Write_4cyc_1V13_1V], 1444 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 1445 1446// ASIMD arith, reduce, 16B 1447def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>; 1448 1449// ASIMD dot product 1450// ASIMD dot product using signed and unsigned integers 1451def : InstRW<[V2Wr_VDOT, V2Rd_VDOT], 1452 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 1453 1454// ASIMD matrix multiply-accumulate 1455def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>; 1456 1457// ASIMD max/min, reduce, 4H/4S 1458def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$", 1459 "^[SU](MAX|MIN)Vv4i32v$")>; 1460 1461// ASIMD max/min, reduce, 8B/8H 1462def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 1463 "^[SU](MAX|MIN)Vv8i16v$")>; 1464 1465// ASIMD max/min, reduce, 16B 1466def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 1467 1468// ASIMD multiply 1469def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>; 1470 1471// ASIMD multiply accumulate 1472def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>; 1473 1474// ASIMD multiply accumulate high 1475def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 1476 1477// ASIMD multiply accumulate long 1478def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 1479 1480// ASIMD multiply accumulate saturating long 1481def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>; 1482 1483// ASIMD multiply/multiply long (8x8) polynomial, D-form 1484// ASIMD multiply/multiply long (8x8) polynomial, Q-form 1485def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>; 1486 1487// ASIMD multiply long 1488def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>; 1489 1490// ASIMD pairwise add and accumulate long 1491def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>; 1492 1493// ASIMD shift accumulate 1494def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>; 1495 1496// ASIMD shift by immed, basic 1497def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv", 1498 "^SSHLLv", "^SSHR[dv]", "^USHLLv", 1499 "^USHR[dv]")>; 1500 1501// ASIMD shift by immed and insert, basic 1502def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>; 1503 1504// ASIMD shift by immed, complex 1505def : InstRW<[V2Write_4cyc_1V13], 1506 (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$", 1507 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1508 "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]", 1509 "^UQSHRN[bhsv]", "^URSHR[dv]")>; 1510 1511// ASIMD shift by register, basic 1512def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>; 1513 1514// ASIMD shift by register, complex 1515def : InstRW<[V2Write_4cyc_1V13], 1516 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1517 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1518 1519// ASIMD floating-point instructions 1520// ----------------------------------------------------------------------------- 1521 1522// ASIMD FP absolute value/difference 1523// ASIMD FP arith, normal 1524// ASIMD FP compare 1525// ASIMD FP complex add 1526// ASIMD FP max/min, normal 1527// ASIMD FP max/min, pairwise 1528// ASIMD FP negate 1529// Handled by SchedAlias<WriteV[dq], ...> 1530 1531// ASIMD FP complex multiply add 1532def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>; 1533 1534// ASIMD FP convert, long (F16 to F32) 1535def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>; 1536 1537// ASIMD FP convert, long (F32 to F64) 1538def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>; 1539 1540// ASIMD FP convert, narrow (F32 to F16) 1541def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>; 1542 1543// ASIMD FP convert, narrow (F64 to F32) 1544def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32", 1545 "^FCVTXN(v2|v4)f32")>; 1546 1547// ASIMD FP convert, other, D-form F32 and Q-form F64 1548def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$", 1549 "^FCVT[AMNPZ][SU]v1i64$", 1550 "^FCVTZ[SU]d$", 1551 "^[SU]CVTFv2f(32|64)$", 1552 "^[SU]CVTFv1i64$", 1553 "^[SU]CVTFd$")>; 1554 1555// ASIMD FP convert, other, D-form F16 and Q-form F32 1556def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$", 1557 "^FCVT[AMNPZ][SU]v1i32$", 1558 "^FCVTZ[SU]s$", 1559 "^[SU]CVTFv4f(16|32)$", 1560 "^[SU]CVTFv1i32$", 1561 "^[SU]CVTFs$")>; 1562 1563// ASIMD FP convert, other, Q-form F16 1564def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$", 1565 "^FCVT[AMNPZ][SU]v1f16$", 1566 "^FCVTZ[SU]h$", 1567 "^[SU]CVTFv8f16$", 1568 "^[SU]CVTFv1i16$", 1569 "^[SU]CVTFh$")>; 1570 1571// ASIMD FP divide, D-form, F16 1572def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>; 1573 1574// ASIMD FP divide, D-form, F32 1575def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>; 1576 1577// ASIMD FP divide, Q-form, F16 1578def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>; 1579 1580// ASIMD FP divide, Q-form, F32 1581def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>; 1582 1583// ASIMD FP divide, Q-form, F64 1584def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>; 1585 1586// ASIMD FP max/min, reduce, F32 and D-form F16 1587def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1588 1589// ASIMD FP max/min, reduce, Q-form F16 1590def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1591 1592// ASIMD FP multiply 1593def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>; 1594 1595// ASIMD FP multiply accumulate 1596def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>; 1597 1598// ASIMD FP multiply accumulate long 1599def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>; 1600 1601// ASIMD FP round, D-form F32 and Q-form F64 1602def : InstRW<[V2Write_3cyc_1V02], 1603 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1604 "^FRINT(32|64)[XZ]v2f(32|64)$")>; 1605 1606// ASIMD FP round, D-form F16 and Q-form F32 1607def : InstRW<[V2Write_4cyc_2V02], 1608 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1609 "^FRINT(32|64)[XZ]v4f32$")>; 1610 1611// ASIMD FP round, Q-form F16 1612def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1613 1614// ASIMD FP square root, D-form, F16 1615def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>; 1616 1617// ASIMD FP square root, D-form, F32 1618def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>; 1619 1620// ASIMD FP square root, Q-form, F16 1621def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>; 1622 1623// ASIMD FP square root, Q-form, F32 1624def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>; 1625 1626// ASIMD FP square root, Q-form, F64 1627def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>; 1628 1629// ASIMD BFloat16 (BF16) instructions 1630// ----------------------------------------------------------------------------- 1631 1632// ASIMD convert, F32 to BF16 1633def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>; 1634 1635// ASIMD dot product 1636def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1637 1638// ASIMD matrix multiply accumulate 1639def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>; 1640 1641// ASIMD multiply accumulate long 1642def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1643 BFMLALTIdx)>; 1644 1645// Scalar convert, F32 to BF16 1646def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>; 1647 1648// ASIMD miscellaneous instructions 1649// ----------------------------------------------------------------------------- 1650 1651// ASIMD bit reverse 1652// ASIMD bitwise insert 1653// ASIMD count 1654// ASIMD duplicate, element 1655// ASIMD extract 1656// ASIMD extract narrow 1657// ASIMD insert, element to element 1658// ASIMD move, FP immed 1659// ASIMD move, integer immed 1660// ASIMD reverse 1661// ASIMD table lookup extension, 1 table reg 1662// ASIMD transpose 1663// ASIMD unzip/zip 1664// Handled by SchedAlias<WriteV[dq], ...> 1665 1666// ASIMD duplicate, gen reg 1667def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1668 1669// ASIMD extract narrow, saturating 1670def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1671 1672// ASIMD reciprocal and square root estimate, D-form U32 1673def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>; 1674 1675// ASIMD reciprocal and square root estimate, Q-form U32 1676def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>; 1677 1678// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1679def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32, 1680 FRECPEv1i64, FRECPEv2f32, 1681 FRSQRTEv1f16, FRSQRTEv1i32, 1682 FRSQRTEv1i64, FRSQRTEv2f32)>; 1683 1684// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1685def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32, 1686 FRSQRTEv4f16, FRSQRTEv4f32)>; 1687 1688// ASIMD reciprocal and square root estimate, Q-form F16 1689def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1690 1691// ASIMD reciprocal exponent 1692def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>; 1693 1694// ASIMD reciprocal step 1695def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)", 1696 "^FRSQRTS(32|64|v)")>; 1697 1698// ASIMD table lookup, 1 or 2 table regs 1699def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One, 1700 TBLv8i8Two, TBLv16i8Two)>; 1701 1702// ASIMD table lookup, 3 table regs 1703def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>; 1704 1705// ASIMD table lookup, 4 table regs 1706def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1707 1708// ASIMD table lookup extension, 2 table reg 1709def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1710 1711// ASIMD table lookup extension, 3 table reg 1712def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1713 1714// ASIMD table lookup extension, 4 table reg 1715def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1716 1717// ASIMD transfer, element to gen reg 1718def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>; 1719 1720// ASIMD transfer, gen reg to element 1721def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1722 1723// ASIMD load instructions 1724// ----------------------------------------------------------------------------- 1725 1726// ASIMD load, 1 element, multiple, 1 reg, D-form 1727def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1728def : InstRW<[V2Write_6cyc_1L, WriteAdr], 1729 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1730 1731// ASIMD load, 1 element, multiple, 1 reg, Q-form 1732def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1733def : InstRW<[V2Write_6cyc_1L, WriteAdr], 1734 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1735 1736// ASIMD load, 1 element, multiple, 2 reg, D-form 1737def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1738def : InstRW<[V2Write_6cyc_2L, WriteAdr], 1739 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1740 1741// ASIMD load, 1 element, multiple, 2 reg, Q-form 1742def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1743def : InstRW<[V2Write_6cyc_2L, WriteAdr], 1744 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1745 1746// ASIMD load, 1 element, multiple, 3 reg, D-form 1747def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1748def : InstRW<[V2Write_6cyc_3L, WriteAdr], 1749 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1750 1751// ASIMD load, 1 element, multiple, 3 reg, Q-form 1752def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1753def : InstRW<[V2Write_6cyc_3L, WriteAdr], 1754 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1755 1756// ASIMD load, 1 element, multiple, 4 reg, D-form 1757def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1758def : InstRW<[V2Write_7cyc_4L, WriteAdr], 1759 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1760 1761// ASIMD load, 1 element, multiple, 4 reg, Q-form 1762def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1763def : InstRW<[V2Write_7cyc_4L, WriteAdr], 1764 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1765 1766// ASIMD load, 1 element, one lane, B/H/S 1767// ASIMD load, 1 element, one lane, D 1768def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1769def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 1770 1771// ASIMD load, 1 element, all lanes, D-form, B/H/S 1772// ASIMD load, 1 element, all lanes, D-form, D 1773def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1774def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1775 1776// ASIMD load, 1 element, all lanes, Q-form 1777def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1778def : InstRW<[V2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1779 1780// ASIMD load, 2 element, multiple, D-form, B/H/S 1781def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1782def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1783 1784// ASIMD load, 2 element, multiple, Q-form, B/H/S 1785// ASIMD load, 2 element, multiple, Q-form, D 1786def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1787def : InstRW<[V2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1788 1789// ASIMD load, 2 element, one lane, B/H 1790// ASIMD load, 2 element, one lane, S 1791// ASIMD load, 2 element, one lane, D 1792def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1793def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; 1794 1795// ASIMD load, 2 element, all lanes, D-form, B/H/S 1796// ASIMD load, 2 element, all lanes, D-form, D 1797def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1798def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1799 1800// ASIMD load, 2 element, all lanes, Q-form 1801def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1802def : InstRW<[V2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1803 1804// ASIMD load, 3 element, multiple, D-form, B/H/S 1805def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1806def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1807 1808// ASIMD load, 3 element, multiple, Q-form, B/H/S 1809// ASIMD load, 3 element, multiple, Q-form, D 1810def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>; 1811def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; 1812 1813// ASIMD load, 3 element, one lane, B/H 1814// ASIMD load, 3 element, one lane, S 1815// ASIMD load, 3 element, one lane, D 1816def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1817def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 1818 1819// ASIMD load, 3 element, all lanes, D-form, B/H/S 1820// ASIMD load, 3 element, all lanes, D-form, D 1821def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1822def : InstRW<[V2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1823 1824// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1825// ASIMD load, 3 element, all lanes, Q-form, D 1826def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1827def : InstRW<[V2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1828 1829// ASIMD load, 4 element, multiple, D-form, B/H/S 1830def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1831def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1832 1833// ASIMD load, 4 element, multiple, Q-form, B/H/S 1834// ASIMD load, 4 element, multiple, Q-form, D 1835def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1836def : InstRW<[V2Write_9cyc_6L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1837 1838// ASIMD load, 4 element, one lane, B/H 1839// ASIMD load, 4 element, one lane, S 1840// ASIMD load, 4 element, one lane, D 1841def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1842def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 1843 1844// ASIMD load, 4 element, all lanes, D-form, B/H/S 1845// ASIMD load, 4 element, all lanes, D-form, D 1846def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1847def : InstRW<[V2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1848 1849// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1850// ASIMD load, 4 element, all lanes, Q-form, D 1851def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1852def : InstRW<[V2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1853 1854// ASIMD store instructions 1855// ----------------------------------------------------------------------------- 1856 1857// ASIMD store, 1 element, multiple, 1 reg, D-form 1858def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1859def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1860 1861// ASIMD store, 1 element, multiple, 1 reg, Q-form 1862def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1863def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1864 1865// ASIMD store, 1 element, multiple, 2 reg, D-form 1866def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1867def : InstRW<[V2Write_2cyc_1L01_1V01, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1868 1869// ASIMD store, 1 element, multiple, 2 reg, Q-form 1870def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1871def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1872 1873// ASIMD store, 1 element, multiple, 3 reg, D-form 1874def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1875def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1876 1877// ASIMD store, 1 element, multiple, 3 reg, Q-form 1878def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1879def : InstRW<[V2Write_2cyc_3L01_3V01, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1880 1881// ASIMD store, 1 element, multiple, 4 reg, D-form 1882def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1883def : InstRW<[V2Write_2cyc_2L01_2V01, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1884 1885// ASIMD store, 1 element, multiple, 4 reg, Q-form 1886def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1887def : InstRW<[V2Write_2cyc_4L01_4V01, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1888 1889// ASIMD store, 1 element, one lane, B/H/S 1890// ASIMD store, 1 element, one lane, D 1891def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>; 1892def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 1893 1894// ASIMD store, 2 element, multiple, D-form, B/H/S 1895def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>; 1896def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1897 1898// ASIMD store, 2 element, multiple, Q-form, B/H/S 1899// ASIMD store, 2 element, multiple, Q-form, D 1900def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1901def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1902 1903// ASIMD store, 2 element, one lane, B/H/S 1904// ASIMD store, 2 element, one lane, D 1905def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>; 1906def : InstRW<[V2Write_4cyc_1L01_2V01, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 1907 1908// ASIMD store, 3 element, multiple, D-form, B/H/S 1909def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>; 1910def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1911 1912// ASIMD store, 3 element, multiple, Q-form, B/H/S 1913// ASIMD store, 3 element, multiple, Q-form, D 1914def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1915def : InstRW<[V2Write_6cyc_3L01_6V01, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1916 1917// ASIMD store, 3 element, one lane, B/H 1918// ASIMD store, 3 element, one lane, S 1919// ASIMD store, 3 element, one lane, D 1920def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>; 1921def : InstRW<[V2Write_5cyc_2L01_4V01, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 1922 1923// ASIMD store, 4 element, multiple, D-form, B/H/S 1924def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>; 1925def : InstRW<[V2Write_6cyc_2L01_6V01, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1926 1927// ASIMD store, 4 element, multiple, Q-form, B/H/S 1928def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>; 1929def : InstRW<[V2Write_7cyc_4L01_12V01, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1930 1931// ASIMD store, 4 element, multiple, Q-form, D 1932def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>; 1933def : InstRW<[V2Write_5cyc_4L01_8V01, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; 1934 1935// ASIMD store, 4 element, one lane, B/H/S 1936def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>; 1937def : InstRW<[V2Write_6cyc_1L01_3V01, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>; 1938 1939// ASIMD store, 4 element, one lane, D 1940def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>; 1941def : InstRW<[V2Write_4cyc_2L01_4V01, WriteAdr], (instregex "ST4i(64)_POST$")>; 1942 1943// Cryptography extensions 1944// ----------------------------------------------------------------------------- 1945 1946// Crypto AES ops 1947def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1948 1949// Crypto polynomial (64x64) multiply long 1950def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>; 1951 1952// Crypto SHA1 hash acceleration op 1953// Crypto SHA1 schedule acceleration ops 1954def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1955 1956// Crypto SHA1 hash acceleration ops 1957// Crypto SHA256 hash acceleration ops 1958def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1959 1960// Crypto SHA256 schedule acceleration ops 1961def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1962 1963// Crypto SHA512 hash acceleration ops 1964def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1965 1966// Crypto SHA3 ops 1967def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1968 1969// Crypto SM3 ops 1970def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1971 "^SM3TT[12][AB]$")>; 1972 1973// Crypto SM4 ops 1974def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1975 1976// CRC 1977// ----------------------------------------------------------------------------- 1978 1979def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>; 1980 1981// SVE Predicate instructions 1982// ----------------------------------------------------------------------------- 1983 1984// Loop control, based on predicate 1985def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 1986 BRKB_PPmP, BRKB_PPzP)>; 1987 1988// Loop control, based on predicate and flag setting 1989def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 1990 1991// Loop control, propagating 1992def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, 1993 BRKPB_PPzPP)>; 1994 1995// Loop control, propagating and flag setting 1996def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 1997 BRKPBS_PPzPP)>; 1998 1999// Loop control, based on GPR 2000def : InstRW<[V2Write_3cyc_2M], 2001 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; 2002def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; 2003 2004// Loop terminate 2005def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; 2006 2007// Predicate counting scalar 2008def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 2009def : InstRW<[V2Write_2cyc_1M], 2010 (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI", 2011 "^SQ(DEC|INC)[BHWD]_XPiWdI", 2012 "^UQ(DEC|INC)[BHWD]_WPiI")>; 2013 2014// Predicate counting scalar, ALL, {1,2,4} 2015def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>; 2016 2017// Predicate counting scalar, active predicate 2018def : InstRW<[V2Write_2cyc_1M], 2019 (instregex "^CNTP_XPP_[BHSD]", 2020 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", 2021 "^(UQDEC|UQINC)P_WP_[BHSD]", 2022 "^(SQDEC|SQINC)P_XPWd_[BHSD]")>; 2023 2024// Predicate counting vector, active predicate 2025def : InstRW<[V2Write_7cyc_1M_1M0_1V], 2026 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; 2027 2028// Predicate logical 2029def : InstRW<[V2Write_1or2cyc_1M0], 2030 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; 2031 2032// Predicate logical, flag setting 2033def : InstRW<[V2Write_1or2cyc_1M0_1M], 2034 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; 2035 2036// Predicate reverse 2037def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>; 2038 2039// Predicate select 2040def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>; 2041 2042// Predicate set 2043def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; 2044 2045// Predicate set/initialize, set flags 2046def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>; 2047 2048// Predicate find first/next 2049def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; 2050 2051// Predicate test 2052def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>; 2053 2054// Predicate transpose 2055def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>; 2056 2057// Predicate unpack and widen 2058def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 2059 2060// Predicate zip/unzip 2061def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>; 2062 2063// SVE integer instructions 2064// ----------------------------------------------------------------------------- 2065 2066// Arithmetic, absolute diff 2067def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]", 2068 "^[SU]ABD_ZPZZ_[BHSD]")>; 2069 2070// Arithmetic, absolute diff accum 2071def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; 2072 2073// Arithmetic, absolute diff accum long 2074def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; 2075 2076// Arithmetic, absolute diff long 2077def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; 2078 2079// Arithmetic, basic 2080def : InstRW<[V2Write_2cyc_1V], 2081 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 2082 "^(ADD|SUB)_ZZZ_[BHSD]", 2083 "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", 2084 "^(ADD|SUB|SUBR)_ZI_[BHSD]", 2085 "^ADR_[SU]XTW_ZZZ_D_[0123]", 2086 "^ADR_LSL_ZZZ_[SD]_[0123]", 2087 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", 2088 "^SADDLBT_ZZZ_[HSD]", 2089 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", 2090 "^SSUBL(BT|TB)_ZZZ_[HSD]")>; 2091 2092// Arithmetic, complex 2093def : InstRW<[V2Write_2cyc_1V], 2094 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", 2095 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 2096 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", 2097 "^[SU]Q(ADD|SUB)_ZI_[BHSD]", 2098 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", 2099 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; 2100 2101// Arithmetic, large integer 2102def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; 2103 2104// Arithmetic, pairwise add 2105def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>; 2106 2107// Arithmetic, pairwise add and accum long 2108def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA], 2109 (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; 2110 2111// Arithmetic, shift 2112def : InstRW<[V2Write_2cyc_1V13], 2113 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", 2114 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", 2115 "^(ASR|LSL|LSR)_ZPmI_[BHSD]", 2116 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", 2117 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 2118 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 2119 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 2120 2121// Arithmetic, shift and accumulate 2122def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>; 2123 2124// Arithmetic, shift by immediate 2125def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]", 2126 "^[SU]SHLL[BT]_ZZI_[HSD]")>; 2127 2128// Arithmetic, shift by immediate and insert 2129def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>; 2130 2131// Arithmetic, shift complex 2132def : InstRW<[V2Write_4cyc_1V13], 2133 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", 2134 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]", 2135 "^[SU]QR?SHL_ZPZZ_[BHSD]", 2136 "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", 2137 "^SQSHRU?N[BT]_ZZI_[BHS]", 2138 "^UQR?SHRN[BT]_ZZI_[BHS]")>; 2139 2140// Arithmetic, shift right for divide 2141def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; 2142 2143// Arithmetic, shift rounding 2144def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]", 2145 "^[SU]RSHL_ZPZZ_[BHSD]", 2146 "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>; 2147 2148// Bit manipulation 2149def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>; 2150 2151// Bitwise select 2152def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; 2153 2154// Count/reverse bits 2155def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; 2156 2157// Broadcast logical bitmask immediate to vector 2158def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>; 2159 2160// Compare and set flags 2161def : InstRW<[V2Write_4or5cyc_1V0_1M0], 2162 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", 2163 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; 2164 2165// Complex add 2166def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>; 2167 2168// Complex dot product 8-bit element 2169def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 2170 2171// Complex dot product 16-bit element 2172def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 2173 2174// Complex multiply-add B, H, S element size 2175def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]", 2176 "^CMLA_ZZZI_[HS]")>; 2177 2178// Complex multiply-add D element size 2179def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>; 2180 2181// Conditional extract operations, scalar form 2182def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; 2183 2184// Conditional extract operations, SIMD&FP scalar and vector forms 2185def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", 2186 "^COMPACT_ZPZ_[SD]", 2187 "^SPLICE_ZPZZ?_[BHSD]")>; 2188 2189// Convert to floating point, 64b to float or convert to double 2190def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 2191 "^[SU]CVTF_ZPmZ_StoD")>; 2192 2193// Convert to floating point, 32b to single or half 2194def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; 2195 2196// Convert to floating point, 16b to half 2197def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; 2198 2199// Copy, scalar 2200def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>; 2201 2202// Copy, scalar SIMD&FP or imm 2203def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]", 2204 "^CPY_ZPzI_[BHSD]")>; 2205 2206// Divides, 32 bit 2207def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", 2208 "^[SU]DIV_ZPZZ_S")>; 2209 2210// Divides, 64 bit 2211def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", 2212 "^[SU]DIV_ZPZZ_D")>; 2213 2214// Dot product, 8 bit 2215def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>; 2216 2217// Dot product, 8 bit, using signed and unsigned integers 2218def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 2219 2220// Dot product, 16 bit 2221def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>; 2222 2223// Duplicate, immediate and indexed form 2224def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]", 2225 "^DUP_ZZI_[BHSDQ]")>; 2226 2227// Duplicate, scalar form 2228def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>; 2229 2230// Extend, sign or zero 2231def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]", 2232 "^[SU]XTH_ZPmZ_[SD]", 2233 "^[SU]XTW_ZPmZ_[D]")>; 2234 2235// Extract 2236def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 2237 2238// Extract narrow saturating 2239def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", 2240 "^SQXTUN[BT]_ZZ_[BHS]")>; 2241 2242// Extract/insert operation, SIMD and FP scalar form 2243def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]", 2244 "^INSR_ZV_[BHSD]")>; 2245 2246// Extract/insert operation, scalar 2247def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]", 2248 "^INSR_ZR_[BHSD]")>; 2249 2250// Histogram operations 2251def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]", 2252 "^HISTSEG_ZZZ")>; 2253 2254// Horizontal operations, B, H, S form, immediate operands only 2255def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>; 2256 2257// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 2258// operands only / immediate, scalar operands 2259def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; 2260 2261// Horizontal operations, D form, immediate operands only 2262def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>; 2263 2264// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 2265// only / immediate, scalar operands 2266def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>; 2267 2268// Logical 2269def : InstRW<[V2Write_2cyc_1V], 2270 (instregex "^(AND|EOR|ORR)_ZI", 2271 "^(AND|BIC|EOR|ORR)_ZZZ", 2272 "^EOR(BT|TB)_ZZZ_[BHSD]", 2273 "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]", 2274 "^NOT_ZPmZ_[BHSD]")>; 2275 2276// Max/min, basic and pairwise 2277def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", 2278 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]", 2279 "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>; 2280 2281// Matching operations 2282// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the 2283// latency for this instruction is 4 cycles. 2284def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>; 2285 2286// Matrix multiply-accumulate 2287def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 2288 2289// Move prefix 2290def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", 2291 "^MOVPRFX_ZZ")>; 2292 2293// Multiply, B, H, S element size 2294def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", 2295 "^MUL_ZPZZ_[BHS]", 2296 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", 2297 "^[SU]MULH_ZPZZ_[BHS]")>; 2298 2299// Multiply, D element size 2300def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", 2301 "^MUL_ZPZZ_D", 2302 "^[SU]MULH_(ZPmZ|ZZZ)_D", 2303 "^[SU]MULH_ZPZZ_D")>; 2304 2305// Multiply long 2306def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", 2307 "^[SU]MULL[BT]_ZZZ_[HSD]")>; 2308 2309// Multiply accumulate, B, H, S element size 2310def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS], 2311 (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>; 2312def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS], 2313 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; 2314 2315// Multiply accumulate, D element size 2316def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD], 2317 (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>; 2318def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD], 2319 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; 2320 2321// Multiply accumulate long 2322def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", 2323 "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; 2324 2325// Multiply accumulate saturating doubling long regular 2326def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ], 2327 (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]", 2328 "^SQDML[AS]L[BT]_ZZZI_[SD]")>; 2329 2330// Multiply saturating doubling high, B, H, S element size 2331def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]", 2332 "^SQDMULH_ZZZI_[HS]")>; 2333 2334// Multiply saturating doubling high, D element size 2335def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 2336 2337// Multiply saturating doubling long 2338def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", 2339 "^SQDMULL[BT]_ZZZI_[SD]")>; 2340 2341// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 2342// element size 2343def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", 2344 "^SQRDCMLAH_ZZZ_[BHS]", 2345 "^SQRDML[AS]H_ZZZI_[HS]", 2346 "^SQRDCMLAH_ZZZI_[HS]")>; 2347 2348// Multiply saturating rounding doubling regular/complex accumulate, D element 2349// size 2350def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D", 2351 "^SQRDCMLAH_ZZZ_D")>; 2352 2353// Multiply saturating rounding doubling regular/complex, B, H, S element size 2354def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]", 2355 "^SQRDMULH_ZZZI_[HS]")>; 2356 2357// Multiply saturating rounding doubling regular/complex, D element size 2358def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>; 2359 2360// Multiply/multiply long, (8x8) polynomial 2361def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B", 2362 "^PMULL[BT]_ZZZ_[HDQ]")>; 2363 2364// Predicate counting vector 2365def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>; 2366 2367// Reciprocal estimate 2368def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; 2369 2370// Reduction, arithmetic, B form 2371def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 2372 2373// Reduction, arithmetic, H form 2374def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 2375 2376// Reduction, arithmetic, S form 2377def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 2378 2379// Reduction, arithmetic, D form 2380def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 2381 2382// Reduction, logical 2383def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>; 2384 2385// Reverse, vector 2386def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]", 2387 "^REVB_ZPmZ_[HSD]", 2388 "^REVH_ZPmZ_[SD]", 2389 "^REVW_ZPmZ_D")>; 2390 2391// Select, vector form 2392def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>; 2393 2394// Table lookup 2395def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>; 2396 2397// Table lookup extension 2398def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>; 2399 2400// Transpose, vector form 2401def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; 2402 2403// Unpack and extend 2404def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; 2405 2406// Zip/unzip 2407def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; 2408 2409// SVE floating-point instructions 2410// ----------------------------------------------------------------------------- 2411 2412// Floating point absolute value/difference 2413def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]", 2414 "^FABD_ZPZZ_[HSD]", 2415 "^FABS_ZPmZ_[HSD]")>; 2416 2417// Floating point arithmetic 2418def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", 2419 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", 2420 "^FADDP_ZPmZZ_[HSD]", 2421 "^FNEG_ZPmZ_[HSD]", 2422 "^FSUBR_ZPm[IZ]_[HSD]", 2423 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; 2424 2425// Floating point associative add, F16 2426def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>; 2427 2428// Floating point associative add, F32 2429def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>; 2430 2431// Floating point associative add, F64 2432def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 2433 2434// Floating point compare 2435def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]", 2436 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", 2437 "^FCM(LE|LT)_PPzZ0_[HSD]", 2438 "^FCMUO_PPzZZ_[HSD]")>; 2439 2440// Floating point complex add 2441def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>; 2442 2443// Floating point complex multiply add 2444def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; 2445def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; 2446 2447// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 2448def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", 2449 "^FCVTLT_ZPmZ_HtoS", 2450 "^FCVTNT_ZPmZ_StoH")>; 2451 2452// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 2453// or F64 to F16) 2454def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", 2455 "^FCVTLT_ZPmZ_StoD", 2456 "^FCVTNT_ZPmZ_DtoS")>; 2457 2458// Floating point convert, round to odd 2459def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 2460 2461// Floating point base2 log, F16 2462def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; 2463 2464// Floating point base2 log, F32 2465def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; 2466 2467// Floating point base2 log, F64 2468def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; 2469 2470// Floating point convert to integer, F16 2471def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; 2472 2473// Floating point convert to integer, F32 2474def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; 2475 2476// Floating point convert to integer, F64 2477def : InstRW<[V2Write_3cyc_1V02], 2478 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; 2479 2480// Floating point copy 2481def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]", 2482 "^FDUP_ZI_[HSD]")>; 2483 2484// Floating point divide, F16 2485def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; 2486 2487// Floating point divide, F32 2488def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; 2489 2490// Floating point divide, F64 2491def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; 2492 2493// Floating point min/max pairwise 2494def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; 2495 2496// Floating point min/max 2497def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", 2498 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; 2499 2500// Floating point multiply 2501def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", 2502 "^FMULX_ZPZZ_[HSD]", 2503 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", 2504 "^FMUL_ZPZ[IZ]_[HSD]")>; 2505 2506// Floating point multiply accumulate 2507def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA], 2508 (instregex "^FN?ML[AS]_ZPmZZ_[HSD]", 2509 "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>; 2510def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA], 2511 (instregex "^FML[AS]_ZZZI_[HSD]", 2512 "^FN?ML[AS]_ZPZZZ_[HSD]")>; 2513 2514// Floating point multiply add/sub accumulate long 2515def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; 2516 2517// Floating point reciprocal estimate, F16 2518def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>; 2519 2520// Floating point reciprocal estimate, F32 2521def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>; 2522 2523// Floating point reciprocal estimate, F64 2524def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>; 2525 2526// Floating point reciprocal step 2527def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; 2528 2529// Floating point reduction, F16 2530def : InstRW<[V2Write_8cyc_4V], 2531 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>; 2532 2533// Floating point reduction, F32 2534def : InstRW<[V2Write_6cyc_3V], 2535 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>; 2536 2537// Floating point reduction, F64 2538def : InstRW<[V2Write_4cyc_2V], 2539 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>; 2540 2541// Floating point round to integral, F16 2542def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; 2543 2544// Floating point round to integral, F32 2545def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; 2546 2547// Floating point round to integral, F64 2548def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; 2549 2550// Floating point square root, F16 2551def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; 2552 2553// Floating point square root, F32 2554def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; 2555 2556// Floating point square root, F64 2557def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; 2558 2559// Floating point trigonometric exponentiation 2560def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; 2561 2562// Floating point trigonometric multiply add 2563def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>; 2564 2565// Floating point trigonometric, miscellaneous 2566def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>; 2567 2568// SVE BFloat16 (BF16) instructions 2569// ----------------------------------------------------------------------------- 2570 2571// Convert, F32 to BF16 2572def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2573 2574// Dot product 2575def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2576 2577// Matrix multiply accumulate 2578def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>; 2579 2580// Multiply accumulate long 2581def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>; 2582 2583// SVE Load instructions 2584// ----------------------------------------------------------------------------- 2585 2586// Load vector 2587def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>; 2588 2589// Load predicate 2590def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2591 2592// Contiguous load, scalar + imm 2593def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$", 2594 "^LD1S?B_[HSD]_IMM_REAL$", 2595 "^LD1S?H_[SD]_IMM_REAL$", 2596 "^LD1S?W_D_IMM_REAL$" )>; 2597// Contiguous load, scalar + scalar 2598def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$", 2599 "^LD1S?B_[HSD]$", 2600 "^LD1S?H_[SD]$", 2601 "^LD1S?W_D$" )>; 2602 2603// Contiguous load broadcast, scalar + imm 2604def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2605 "^LD1RS?B_[HSD]_IMM$", 2606 "^LD1RS?H_[SD]_IMM$", 2607 "^LD1RW_D_IMM$", 2608 "^LD1RSW_IMM$", 2609 "^LD1RQ_[BHWD]_IMM$")>; 2610 2611// Contiguous load broadcast, scalar + scalar 2612def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2613 2614// Non temporal load, scalar + imm 2615// Non temporal load, scalar + scalar 2616def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>; 2617 2618// Non temporal gather load, vector + scalar 32-bit element size 2619def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", 2620 "^LDNT1S[BH]_ZZR_S_REAL$")>; 2621 2622// Non temporal gather load, vector + scalar 64-bit element size 2623def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; 2624def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; 2625 2626// Contiguous first faulting load, scalar + scalar 2627def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", 2628 "^LDFF1S?B_[HSD]_REAL$", 2629 "^LDFF1S?H_[SD]_REAL$", 2630 "^LDFF1S?W_D_REAL$")>; 2631 2632// Contiguous non faulting load, scalar + imm 2633def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", 2634 "^LDNF1S?B_[HSD]_IMM_REAL$", 2635 "^LDNF1S?H_[SD]_IMM_REAL$", 2636 "^LDNF1S?W_D_IMM_REAL$")>; 2637 2638// Contiguous Load two structures to two vectors, scalar + imm 2639def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>; 2640 2641// Contiguous Load two structures to two vectors, scalar + scalar 2642def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>; 2643 2644// Contiguous Load three structures to three vectors, scalar + imm 2645def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>; 2646 2647// Contiguous Load three structures to three vectors, scalar + scalar 2648def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>; 2649 2650// Contiguous Load four structures to four vectors, scalar + imm 2651def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>; 2652 2653// Contiguous Load four structures to four vectors, scalar + scalar 2654def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>; 2655 2656// Gather load, vector + imm, 32-bit element size 2657def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 2658 "^GLD(FF)?1W_IMM_REAL$")>; 2659 2660// Gather load, vector + imm, 64-bit element size 2661def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 2662 "^GLD(FF)?1D_IMM_REAL$")>; 2663 2664// Gather load, 32-bit scaled offset 2665def : InstRW<[V2Write_10cyc_1L_8V], 2666 (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$", 2667 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 2668 2669// Gather load, 64-bit scaled offset 2670// NOTE: These instructions are not specified in the SOG. 2671def : InstRW<[V2Write_10cyc_1L_4V], 2672 (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$", 2673 "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>; 2674 2675// Gather load, 32-bit unpacked unscaled offset 2676def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 2677 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 2678 2679// Gather load, 64-bit unpacked unscaled offset 2680// NOTE: These instructions are not specified in the SOG. 2681def : InstRW<[V2Write_9cyc_1L_2V], 2682 (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$", 2683 "^GLD(FF)?1D_([SU]XTW_)?REAL$")>; 2684 2685// SVE Store instructions 2686// ----------------------------------------------------------------------------- 2687 2688// Store from predicate reg 2689def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>; 2690 2691// Store from vector reg 2692def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>; 2693 2694// Contiguous store, scalar + imm 2695def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$", 2696 "^ST1B_[HSD]_IMM$", 2697 "^ST1H_[SD]_IMM$", 2698 "^ST1W_D_IMM$")>; 2699 2700// Contiguous store, scalar + scalar 2701def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>; 2702def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$", 2703 "^ST1B_[HSD]$", 2704 "^ST1W_D$")>; 2705 2706// Contiguous store two structures from two vectors, scalar + imm 2707def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>; 2708 2709// Contiguous store two structures from two vectors, scalar + scalar 2710def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>; 2711def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>; 2712 2713// Contiguous store three structures from three vectors, scalar + imm 2714def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>; 2715 2716// Contiguous store three structures from three vectors, scalar + scalar 2717def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>; 2718 2719// Contiguous store four structures from four vectors, scalar + imm 2720def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>; 2721 2722// Contiguous store four structures from four vectors, scalar + scalar 2723def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>; 2724 2725// Non temporal store, scalar + imm 2726def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2727 2728// Non temporal store, scalar + scalar 2729def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2730def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2731 2732// Scatter non temporal store, vector + scalar 32-bit element size 2733def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>; 2734 2735// Scatter non temporal store, vector + scalar 64-bit element size 2736def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>; 2737 2738// Scatter store vector + imm 32-bit element size 2739def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$", 2740 "^SST1W_IMM$")>; 2741 2742// Scatter store vector + imm 64-bit element size 2743def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$", 2744 "^SST1D_IMM$")>; 2745 2746// Scatter store, 32-bit scaled offset 2747def : InstRW<[V2Write_4cyc_4L01_4V01], 2748 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2749 2750// Scatter store, 32-bit unpacked unscaled offset 2751def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$", 2752 "^SST1D_[SU]XTW$")>; 2753 2754// Scatter store, 32-bit unpacked scaled offset 2755def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2756 "^SST1D_[SU]XTW_SCALED$")>; 2757 2758// Scatter store, 32-bit unscaled offset 2759def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$", 2760 "^SST1W_[SU]XTW$")>; 2761 2762// Scatter store, 64-bit scaled offset 2763def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$", 2764 "^SST1D_SCALED$")>; 2765 2766// Scatter store, 64-bit unscaled offset 2767def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$", 2768 "^SST1D$")>; 2769 2770// SVE Miscellaneous instructions 2771// ----------------------------------------------------------------------------- 2772 2773// Read first fault register, unpredicated 2774def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; 2775 2776// Read first fault register, predicated 2777def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; 2778 2779// Read first fault register and set flags 2780def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2781 2782// Set first fault register 2783// Write to first fault register 2784def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2785 2786// Prefetch 2787// NOTE: This is not specified in the SOG. 2788def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2789 2790// SVE Cryptographic instructions 2791// ----------------------------------------------------------------------------- 2792 2793// Crypto AES ops 2794def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2795 "^AESI?MC_ZZ_B$")>; 2796 2797// Crypto SHA3 ops 2798def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2799 "^RAX1_ZZZ_D$", 2800 "^XAR_ZZZI_[BHSD]$")>; 2801 2802// Crypto SM4 ops 2803def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2804 2805} 2806