1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V2 processors. 10// All information is taken from the V2 Software Optimisation guide: 11// 12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2 13// 14//===----------------------------------------------------------------------===// 15 16def NeoverseV2Model : SchedMachineModel { 17 let IssueWidth = 16; // Micro-ops dispatched at a time. 18 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2. 19 let LoadLatency = 4; // Optimistic load latency. 20 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2. 21 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 22 let CompleteModel = 1; 23 24 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, 25 [HasSVE2p1, HasCPA]); 26} 27 28//===----------------------------------------------------------------------===// 29// Define each kind of processor resource and number available on Neoverse V2. 30// Instructions are first fetched and then decoded into internal macro-ops 31// (MOPs). From there, the MOPs proceed through register renaming and dispatch 32// stages. A MOP can be split into two micro-ops further down the pipeline 33// after the decode stage. Once dispatched, micro-ops wait for their operands 34// and issue out-of-order to one of seventeen issue pipelines. Each issue 35// pipeline can accept one micro-op per cycle. 36 37let SchedModel = NeoverseV2Model in { 38 39// Define the (17) issue ports. 40def V2UnitB : ProcResource<2>; // Branch 0/1 41def V2UnitS0 : ProcResource<1>; // Integer single-cycle 0 42def V2UnitS1 : ProcResource<1>; // Integer single-cycle 1 43def V2UnitS2 : ProcResource<1>; // Integer single-cycle 2 44def V2UnitS3 : ProcResource<1>; // Integer single-cycle 3 45def V2UnitM0 : ProcResource<1>; // Integer single/multicycle 0 46def V2UnitM1 : ProcResource<1>; // Integer single/multicycle 1 47def V2UnitV0 : ProcResource<1>; // FP/ASIMD 0 48def V2UnitV1 : ProcResource<1>; // FP/ASIMD 1 49def V2UnitV2 : ProcResource<1>; // FP/ASIMD 2 50def V2UnitV3 : ProcResource<1>; // FP/ASIMD 3 51def V2UnitL01 : ProcResource<2>; // Load/Store 0/1 52def V2UnitL2 : ProcResource<1>; // Load 2 53def V2UnitD : ProcResource<2>; // Store data 0/1 54 55def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1 56def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3 57def V2UnitF : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1 and single/multicycle 0/1 58def V2UnitI : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1/2/3 and single/multicycle 0/1 59def V2UnitM : ProcResGroup<[V2UnitM0, V2UnitM1]>; // Integer single/multicycle 0/1 60def V2UnitL : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2 61def V2UnitV : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>; // FP/ASIMD 0/1/2/3 62def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>; // FP/ASIMD 0/1 63def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>; // FP/ASIMD 0/2 64def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>; // FP/ASIMD 1/3 65def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>; // FP/ASIMD 2/3 66 67// Define commonly used read types. 68 69// No forwarding is provided for these types. 70def : ReadAdvance<ReadI, 0>; 71def : ReadAdvance<ReadISReg, 0>; 72def : ReadAdvance<ReadIEReg, 0>; 73def : ReadAdvance<ReadIM, 0>; 74def : ReadAdvance<ReadIMA, 0>; 75def : ReadAdvance<ReadID, 0>; 76def : ReadAdvance<ReadExtrHi, 0>; 77def : ReadAdvance<ReadAdrBase, 0>; 78def : ReadAdvance<ReadST, 0>; 79def : ReadAdvance<ReadVLD, 0>; 80 81// NOTE: Copied from N2. 82def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 83def : WriteRes<WriteBarrier, []> { let Latency = 1; } 84def : WriteRes<WriteHint, []> { let Latency = 1; } 85def : WriteRes<WriteLDHi, []> { let Latency = 4; } 86 87//===----------------------------------------------------------------------===// 88// Define customized scheduler read/write types specific to the Neoverse V2. 89 90//===----------------------------------------------------------------------===// 91 92// Define generic 0 micro-op types 93def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; } 94 95// Define generic 1 micro-op types 96 97def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; } 98def V2Write_1cyc_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; } 99def V2Write_1cyc_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; } 100def V2Write_1cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; } 101def V2Write_1cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; } 102def V2Write_1cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 1; } 103def V2Write_2cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 2; } 104def V2Write_3cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 3; } 105def V2Write_2cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 106def V2Write_3cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; } 107def V2Write_5cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 5; } 108def V2Write_12cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 12; 109 let ReleaseAtCycles = [12]; } 110def V2Write_20cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 20; 111 let ReleaseAtCycles = [20]; } 112def V2Write_4cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 4; } 113def V2Write_6cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 6; } 114def V2Write_2cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 2; } 115def V2Write_2cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 2; } 116def V2Write_2cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 2; } 117def V2Write_2cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 2; } 118def V2Write_3cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 119def V2Write_3cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 3; 120 let ReleaseAtCycles = [2]; } 121def V2Write_3cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 3; } 122def V2Write_4cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 123def V2Write_5cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 124def V2Write_6cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 125def V2Write_12cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 12; } 126def V2Write_3cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 3; } 127def V2Write_3cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 3; } 128def V2Write_4cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 4; } 129def V2Write_4cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 130def V2Write_7cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 7; 131 let ReleaseAtCycles = [7]; } 132def V2Write_7cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 7; 133 let ReleaseAtCycles = [2]; } 134def V2Write_9cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 9; } 135def V2Write_9cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 9; 136 let ReleaseAtCycles = [2]; } 137def V2Write_10cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 10; } 138def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10; 139 let ReleaseAtCycles = [2]; } 140def V2Write_12cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 12; 141 let ReleaseAtCycles = [11]; } 142def V2Write_13cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 13; } 143def V2Write_15cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 15; } 144def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15; 145 let ReleaseAtCycles = [8]; } 146def V2Write_16cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 16; } 147def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16; 148 let ReleaseAtCycles = [8]; } 149def V2Write_20cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20; 150 let ReleaseAtCycles = [20]; } 151def V2Write_2cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; } 152def V2Write_2cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; } 153def V2Write_3cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; } 154def V2Write_4cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; } 155def V2Write_4cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 156def V2Write_6cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; } 157def V2Write_10cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 10; } 158def V2Write_6cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 6; } 159 160//===----------------------------------------------------------------------===// 161// Define generic 2 micro-op types 162 163def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> { 164 let Latency = 1; 165 let NumMicroOps = 2; 166} 167 168def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> { 169 let Latency = 6; 170 let NumMicroOps = 2; 171} 172 173def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> { 174 let Latency = 9; 175 let NumMicroOps = 2; 176} 177 178def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> { 179 let Latency = 3; 180 let NumMicroOps = 2; 181} 182 183def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 184 let Latency = 1; 185 let NumMicroOps = 2; 186} 187 188def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 189 let Latency = 3; 190 let NumMicroOps = 2; 191} 192 193def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 194 let Latency = 4; 195 let NumMicroOps = 2; 196} 197 198def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> { 199 let Latency = 5; 200 let NumMicroOps = 2; 201} 202 203def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> { 204 let Latency = 6; 205 let NumMicroOps = 2; 206} 207 208def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> { 209 let Latency = 7; 210 let NumMicroOps = 2; 211} 212 213def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> { 214 let Latency = 7; 215 let NumMicroOps = 2; 216} 217 218def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> { 219 let Latency = 1; 220 let NumMicroOps = 2; 221} 222 223def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> { 224 let Latency = 5; 225 let NumMicroOps = 2; 226} 227 228def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> { 229 let Latency = 2; 230 let NumMicroOps = 2; 231} 232 233def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> { 234 let Latency = 2; 235 let NumMicroOps = 2; 236} 237 238def V2Write_2cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> { 239 let Latency = 2; 240 let NumMicroOps = 2; 241} 242 243def V2Write_4cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> { 244 let Latency = 4; 245 let NumMicroOps = 2; 246} 247 248def V2Write_4cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> { 249 let Latency = 4; 250 let NumMicroOps = 2; 251} 252 253def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> { 254 let Latency = 4; 255 let NumMicroOps = 2; 256} 257 258def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> { 259 let Latency = 4; 260 let NumMicroOps = 2; 261} 262 263def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> { 264 let Latency = 4; 265 let NumMicroOps = 2; 266} 267 268def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> { 269 let Latency = 4; 270 let NumMicroOps = 2; 271} 272 273def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> { 274 let Latency = 6; 275 let NumMicroOps = 2; 276} 277 278def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> { 279 let Latency = 6; 280 let NumMicroOps = 2; 281} 282 283def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> { 284 let Latency = 8; 285 let NumMicroOps = 2; 286} 287 288def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> { 289 let Latency = 4; 290 let NumMicroOps = 2; 291} 292 293def V2Write_3cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 294 let Latency = 3; 295 let NumMicroOps = 2; 296} 297 298def V2Write_4cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 299 let Latency = 4; 300 let NumMicroOps = 2; 301} 302 303def V2Write_1cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 304 let Latency = 1; 305 let NumMicroOps = 2; 306} 307 308def V2Write_2cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 309 let Latency = 2; 310 let NumMicroOps = 2; 311} 312 313def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> { 314 let Latency = 6; 315 let NumMicroOps = 2; 316} 317 318def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> { 319 let Latency = 4; 320 let NumMicroOps = 2; 321} 322 323def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> { 324 let Latency = 5; 325 let NumMicroOps = 2; 326} 327 328def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> { 329 let Latency = 5; 330 let NumMicroOps = 2; 331} 332 333def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> { 334 let Latency = 5; 335 let NumMicroOps = 2; 336} 337 338def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> { 339 let Latency = 6; 340 let NumMicroOps = 2; 341} 342 343def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> { 344 let Latency = 7; 345 let NumMicroOps = 2; 346} 347 348def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> { 349 let Latency = 2; 350 let NumMicroOps = 2; 351} 352 353def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> { 354 let Latency = 3; 355 let NumMicroOps = 2; 356} 357 358def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> { 359 let Latency = 6; 360 let NumMicroOps = 2; 361} 362 363def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> { 364 let Latency = 6; 365 let NumMicroOps = 2; 366} 367 368def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> { 369 let Latency = 6; 370 let NumMicroOps = 2; 371} 372 373def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> { 374 let Latency = 4; 375 let NumMicroOps = 2; 376} 377 378def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> { 379 let Latency = 8; 380 let NumMicroOps = 2; 381} 382 383//===----------------------------------------------------------------------===// 384// Define generic 3 micro-op types 385 386def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> { 387 let Latency = 1; 388 let NumMicroOps = 3; 389} 390 391def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> { 392 let Latency = 2; 393 let NumMicroOps = 3; 394} 395 396def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> { 397 let Latency = 2; 398 let NumMicroOps = 3; 399} 400 401def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> { 402 let Latency = 4; 403 let NumMicroOps = 3; 404} 405 406def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> { 407 let Latency = 9; 408 let NumMicroOps = 3; 409} 410 411def V2Write_4cyc_3V01 : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> { 412 let Latency = 4; 413 let NumMicroOps = 3; 414} 415 416def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> { 417 let Latency = 7; 418 let NumMicroOps = 3; 419} 420 421def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> { 422 let Latency = 2; 423 let NumMicroOps = 3; 424} 425 426def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> { 427 let Latency = 2; 428 let NumMicroOps = 3; 429} 430 431def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> { 432 let Latency = 6; 433 let NumMicroOps = 3; 434} 435 436def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> { 437 let Latency = 6; 438 let NumMicroOps = 3; 439} 440 441def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> { 442 let Latency = 8; 443 let NumMicroOps = 3; 444} 445 446//===----------------------------------------------------------------------===// 447// Define generic 4 micro-op types 448 449def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 450 V2UnitI]> { 451 let Latency = 2; 452 let NumMicroOps = 4; 453} 454 455def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 456 V2UnitV01, V2UnitV01]> { 457 let Latency = 2; 458 let NumMicroOps = 4; 459} 460 461def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 462 V2UnitV01, V2UnitV01]> { 463 let Latency = 4; 464 let NumMicroOps = 4; 465} 466 467def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> { 468 let Latency = 5; 469 let NumMicroOps = 4; 470} 471 472def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1, 473 V2UnitV1]> { 474 let Latency = 9; 475 let NumMicroOps = 4; 476} 477 478def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> { 479 let Latency = 6; 480 let NumMicroOps = 4; 481} 482 483def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 484 let Latency = 8; 485 let NumMicroOps = 4; 486} 487 488def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 489 V2UnitV13]> { 490 let Latency = 6; 491 let NumMicroOps = 4; 492} 493 494def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 495 V2UnitV13]> { 496 let Latency = 8; 497 let NumMicroOps = 4; 498} 499 500def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02, 501 V2UnitV02]> { 502 let Latency = 6; 503 let NumMicroOps = 4; 504} 505 506def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 507 let Latency = 6; 508 let NumMicroOps = 4; 509} 510 511def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> { 512 let Latency = 8; 513 let NumMicroOps = 4; 514} 515 516def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> { 517 let Latency = 9; 518 let NumMicroOps = 4; 519} 520 521def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV, 522 V2UnitV]> { 523 let Latency = 2; 524 let NumMicroOps = 4; 525} 526 527def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV, 528 V2UnitV]> { 529 let Latency = 4; 530 let NumMicroOps = 4; 531} 532 533def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02, 534 V2UnitV02]> { 535 let Latency = 8; 536 let NumMicroOps = 4; 537} 538 539def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1, 540 V2UnitV1]> { 541 let Latency = 8; 542 let NumMicroOps = 4; 543} 544 545def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM, 546 V2UnitM]> { 547 let Latency = 4; 548 let NumMicroOps = 4; 549} 550 551def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM, 552 V2UnitM]> { 553 let Latency = 5; 554 let NumMicroOps = 4; 555} 556 557def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> { 558 let Latency = 6; 559 let NumMicroOps = 4; 560} 561 562def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> { 563 let Latency = 7; 564 let NumMicroOps = 4; 565} 566 567def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 568 V2UnitV01]> { 569 let Latency = 6; 570 let NumMicroOps = 4; 571} 572 573//===----------------------------------------------------------------------===// 574// Define generic 5 micro-op types 575 576def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 577 V2UnitI, V2UnitI]> { 578 let Latency = 2; 579 let NumMicroOps = 5; 580} 581 582def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV, 583 V2UnitV]> { 584 let Latency = 8; 585 let NumMicroOps = 5; 586} 587 588def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 589 V2UnitV]> { 590 let Latency = 9; 591 let NumMicroOps = 5; 592} 593 594def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 595 V2UnitV]> { 596 let Latency = 10; 597 let NumMicroOps = 5; 598} 599 600def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV, 601 V2UnitV]> { 602 let Latency = 6; 603 let NumMicroOps = 5; 604} 605 606//===----------------------------------------------------------------------===// 607// Define generic 6 micro-op types 608 609def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 610 V2UnitV, V2UnitV, V2UnitV]> { 611 let Latency = 8; 612 let NumMicroOps = 6; 613} 614 615def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 616 V2UnitV, V2UnitV, V2UnitV]> { 617 let Latency = 9; 618 let NumMicroOps = 6; 619} 620 621def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, 622 V2UnitV, V2UnitV, V2UnitV]> { 623 let Latency = 9; 624 let NumMicroOps = 6; 625} 626 627def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, 628 V2UnitV, V2UnitS, V2UnitS]> { 629 let Latency = 9; 630 let NumMicroOps = 6; 631} 632 633def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 634 V2UnitV13, V2UnitV13, V2UnitV13]> { 635 let Latency = 9; 636 let NumMicroOps = 6; 637} 638 639def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 640 V2UnitV, V2UnitV, V2UnitV]> { 641 let Latency = 2; 642 let NumMicroOps = 6; 643} 644 645def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 646 V2UnitV01, V2UnitV01, V2UnitV01]> { 647 let Latency = 4; 648 let NumMicroOps = 6; 649} 650 651def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 652 V2UnitV01, V2UnitV01, V2UnitV01]> { 653 let Latency = 5; 654 let NumMicroOps = 6; 655} 656 657def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 658 V2UnitV01, V2UnitV01, V2UnitV01]> { 659 let Latency = 2; 660 let NumMicroOps = 6; 661} 662 663def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS, 664 V2UnitS, V2UnitV01, V2UnitV01]> { 665 let Latency = 4; 666 let NumMicroOps = 6; 667} 668 669//===----------------------------------------------------------------------===// 670// Define generic 7 micro-op types 671 672def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 673 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 674 let Latency = 8; 675 let NumMicroOps = 7; 676} 677 678//===----------------------------------------------------------------------===// 679// Define generic 8 micro-op types 680 681def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 682 V2UnitL01, V2UnitV, V2UnitV, V2UnitV, 683 V2UnitV]> { 684 let Latency = 2; 685 let NumMicroOps = 8; 686} 687 688def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 689 V2UnitL01, V2UnitV01, V2UnitV01, 690 V2UnitV01, V2UnitV01]> { 691 let Latency = 2; 692 let NumMicroOps = 8; 693} 694 695def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 696 V2UnitL01, V2UnitV01, V2UnitV01, 697 V2UnitV01, V2UnitV01]> { 698 let Latency = 4; 699 let NumMicroOps = 8; 700} 701 702def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 703 V2UnitV01, V2UnitV01, V2UnitV01, 704 V2UnitV01, V2UnitV01]> { 705 let Latency = 6; 706 let NumMicroOps = 8; 707} 708 709def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL, 710 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 711 let Latency = 8; 712 let NumMicroOps = 8; 713} 714 715//===----------------------------------------------------------------------===// 716// Define generic 9 micro-op types 717 718def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 719 V2UnitV01, V2UnitV01, V2UnitV01, 720 V2UnitV01, V2UnitV01, V2UnitV01]> { 721 let Latency = 6; 722 let NumMicroOps = 9; 723} 724 725def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 726 V2UnitV, V2UnitV, V2UnitV, V2UnitV, 727 V2UnitV]> { 728 let Latency = 10; 729 let NumMicroOps = 9; 730} 731 732def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, 733 V2UnitL, V2UnitL, V2UnitL, 734 V2UnitS, V2UnitS, V2UnitS]> { 735 let Latency = 10; 736 let NumMicroOps = 9; 737} 738 739//===----------------------------------------------------------------------===// 740// Define generic 10 micro-op types 741 742def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL, 743 V2UnitL, V2UnitL, V2UnitV, V2UnitV, 744 V2UnitV, V2UnitV]> { 745 let Latency = 9; 746 let NumMicroOps = 10; 747} 748 749//===----------------------------------------------------------------------===// 750// Define generic 12 micro-op types 751 752def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 753 V2UnitL01, V2UnitV01, V2UnitV01, 754 V2UnitV01, V2UnitV01, V2UnitV01, 755 V2UnitV01, V2UnitV01, V2UnitV01]> { 756 let Latency = 5; 757 let NumMicroOps = 12; 758} 759 760def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 761 V2UnitL, V2UnitV, V2UnitV, 762 V2UnitV, V2UnitV, V2UnitV, 763 V2UnitV, V2UnitV, V2UnitV]> { 764 let Latency = 9; 765 let NumMicroOps = 12; 766} 767 768def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 769 V2UnitL, V2UnitV, V2UnitV, 770 V2UnitV, V2UnitV, V2UnitV, 771 V2UnitV, V2UnitV, V2UnitV]> { 772 let Latency = 10; 773 let NumMicroOps = 12; 774} 775 776//===----------------------------------------------------------------------===// 777// Define generic 16 micro-op types 778 779def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 780 V2UnitL01, V2UnitV01, V2UnitV01, 781 V2UnitV01, V2UnitV01, V2UnitV01, 782 V2UnitV01, V2UnitV01, V2UnitV01, 783 V2UnitV01, V2UnitV01, V2UnitV01, 784 V2UnitV01]> { 785 let Latency = 7; 786 let NumMicroOps = 16; 787} 788 789def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 790 V2UnitL, V2UnitV, V2UnitV, 791 V2UnitV, V2UnitV, V2UnitV, 792 V2UnitV, V2UnitV, V2UnitV, 793 V2UnitS, V2UnitS, V2UnitS, 794 V2UnitS]> { 795 let Latency = 10; 796 let NumMicroOps = 16; 797} 798 799//===----------------------------------------------------------------------===// 800// Define generic 18 micro-op types 801 802def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 803 V2UnitL01, V2UnitL01, V2UnitL01, 804 V2UnitL01, V2UnitL01, V2UnitL01, 805 V2UnitV01, V2UnitV01, V2UnitV01, 806 V2UnitV01, V2UnitV01, V2UnitV01, 807 V2UnitV01, V2UnitV01, V2UnitV01]> { 808 let Latency = 7; 809 let NumMicroOps = 18; 810} 811 812//===----------------------------------------------------------------------===// 813// Define generic 27 micro-op types 814 815def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 816 V2UnitL01, V2UnitL01, V2UnitL01, 817 V2UnitL01, V2UnitL01, V2UnitL01, 818 V2UnitS, V2UnitS, V2UnitS, 819 V2UnitS, V2UnitS, V2UnitS, 820 V2UnitS, V2UnitS, V2UnitS, 821 V2UnitV01, V2UnitV01, V2UnitV01, 822 V2UnitV01, V2UnitV01, V2UnitV01, 823 V2UnitV01, V2UnitV01, 824 V2UnitV01]> { 825 let Latency = 7; 826 let NumMicroOps = 27; 827} 828 829//===----------------------------------------------------------------------===// 830// Define generic 36 micro-op types 831 832def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 833 V2UnitL01, V2UnitL01, V2UnitL01, 834 V2UnitL01, V2UnitL01, V2UnitL01, 835 V2UnitL01, V2UnitL01, V2UnitL01, 836 V2UnitL01, V2UnitL01, V2UnitL01, 837 V2UnitL01, V2UnitL01, V2UnitL01, 838 V2UnitV01, V2UnitV01, V2UnitV01, 839 V2UnitV01, V2UnitV01, V2UnitV01, 840 V2UnitV01, V2UnitV01, V2UnitV01, 841 V2UnitV01, V2UnitV01, V2UnitV01, 842 V2UnitV01, V2UnitV01, V2UnitV01, 843 V2UnitV01, V2UnitV01, 844 V2UnitV01]> { 845 let Latency = 11; 846 let NumMicroOps = 36; 847} 848 849//===----------------------------------------------------------------------===// 850// Define generic 54 micro-op types 851 852def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 853 V2UnitL01, V2UnitL01, 854 V2UnitL01, V2UnitL01, 855 V2UnitL01, V2UnitL01, 856 V2UnitL01, V2UnitL01, 857 V2UnitL01, V2UnitL01, 858 V2UnitL01, V2UnitL01, 859 V2UnitL01, V2UnitL01, 860 V2UnitL01, V2UnitL01, 861 V2UnitS, V2UnitS, V2UnitS, 862 V2UnitS, V2UnitS, V2UnitS, 863 V2UnitS, V2UnitS, V2UnitS, 864 V2UnitS, V2UnitS, V2UnitS, 865 V2UnitS, V2UnitS, V2UnitS, 866 V2UnitS, V2UnitS, V2UnitS, 867 V2UnitV01, V2UnitV01, 868 V2UnitV01, V2UnitV01, 869 V2UnitV01, V2UnitV01, 870 V2UnitV01, V2UnitV01, 871 V2UnitV01, V2UnitV01, 872 V2UnitV01, V2UnitV01, 873 V2UnitV01, V2UnitV01, 874 V2UnitV01, V2UnitV01, 875 V2UnitV01, V2UnitV01]> { 876 let Latency = 11; 877 let NumMicroOps = 54; 878} 879 880//===----------------------------------------------------------------------===// 881// Define predicate-controlled types 882 883def V2Write_ArithI : SchedWriteVariant<[ 884 SchedVar<IsCheapLSL, [V2Write_1cyc_1I]>, 885 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 886 887def V2Write_ArithF : SchedWriteVariant<[ 888 SchedVar<IsCheapLSL, [V2Write_1cyc_1F]>, 889 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 890 891def V2Write_Logical : SchedWriteVariant<[ 892 SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>, 893 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 894 895def V2Write_Extr : SchedWriteVariant<[ 896 SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>, 897 SchedVar<NoSchedPred, [V2Write_3cyc_1I_1M]>]>; 898 899def V2Write_LdrHQ : SchedWriteVariant<[ 900 SchedVar<NeoverseHQForm, [V2Write_7cyc_1I_1L]>, 901 SchedVar<NoSchedPred, [V2Write_6cyc_1L]>]>; 902 903def V2Write_StrHQ : SchedWriteVariant<[ 904 SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>, 905 SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>; 906 907def V2Write_0or1cyc_1I : SchedWriteVariant<[ 908 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>, 909 SchedVar<NoSchedPred, [V2Write_1cyc_1I]>]>; 910 911def V2Write_0or2cyc_1V : SchedWriteVariant<[ 912 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>, 913 SchedVar<NoSchedPred, [V2Write_2cyc_1V]>]>; 914 915def V2Write_0or3cyc_1M0 : SchedWriteVariant<[ 916 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>, 917 SchedVar<NoSchedPred, [V2Write_3cyc_1M0]>]>; 918 919def V2Write_2or3cyc_1M : SchedWriteVariant<[ 920 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>, 921 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 922 923def V2Write_3or4cyc_2M : SchedWriteVariant<[ 924 SchedVar<NeoversePdIsPg, [V2Write_4cyc_2M]>, 925 SchedVar<NoSchedPred, [V2Write_3cyc_2M]>]>; 926 927def V2Write_1or2cyc_1M0 : SchedWriteVariant<[ 928 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0]>, 929 SchedVar<NoSchedPred, [V2Write_1cyc_1M0]>]>; 930 931def V2Write_2or3cyc_1M0 : SchedWriteVariant<[ 932 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M0]>, 933 SchedVar<NoSchedPred, [V2Write_2cyc_1M0]>]>; 934 935def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[ 936 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0_1M]>, 937 SchedVar<NoSchedPred, [V2Write_1cyc_1M0_1M]>]>; 938 939def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[ 940 SchedVar<NeoversePdIsPg, [V2Write_4cyc_1M0_1M]>, 941 SchedVar<NoSchedPred, [V2Write_3cyc_1M0_1M]>]>; 942 943def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[ 944 SchedVar<NeoversePdIsPg, [V2Write_5cyc_2M0_2M]>, 945 SchedVar<NoSchedPred, [V2Write_4cyc_2M0_2M]>]>; 946 947def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[ 948 SchedVar<NeoversePdIsPg, [V2Write_5cyc_1V0_1M0]>, 949 SchedVar<NoSchedPred, [V2Write_4cyc_1V0_1M0]>]>; 950 951def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[ 952 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1V0_1M]>, 953 SchedVar<NoSchedPred, [V2Write_2cyc_1V0_1M]>]>; 954 955def V2Write_IncDec : SchedWriteVariant<[ 956 SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>, 957 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 958 959//===----------------------------------------------------------------------===// 960// Define forwarded types 961 962// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for 963// consumers of 64 bit multiply high operations? 964def V2Wr_IM : SchedWriteRes<[V2UnitM]> { let Latency = 2; } 965def V2Wr_IMA : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 966def V2Wr_IMUL : SchedWriteVariant<[ 967 SchedVar<IsReg3ZeroPred, [V2Wr_IM]>, 968 SchedVar<NoSchedPred, [V2Wr_IMA]>]>; 969def V2Rd_IMA : SchedReadAdvance<1, [V2Wr_IMA]>; 970 971def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 972def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>; 973 974def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 975def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>; 976 977def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 978def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>; 979 980def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 981def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>; 982 983def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 984def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>; 985 986def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; } 987def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>; 988 989def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 990def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>; 991 992def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 993def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>; 994 995def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 996def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>; 997 998def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 999def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>; 1000 1001def V2Wr_VFM : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1002def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1003def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>; 1004 1005def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1006def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>; 1007 1008def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1009def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>; 1010def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 1011def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>; 1012def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1013def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>; 1014 1015def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 1016def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>; 1017 1018def V2Wr_ZA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1019def V2Rd_ZA : SchedReadAdvance<3, [V2Wr_ZA]>; 1020def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1021def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>; 1022def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1023def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>; 1024 1025def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1026def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>; 1027def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1028def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>; 1029 1030// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce 1031// throughput to 1 in case of forwarding? 1032def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1033def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>; 1034def V2Wr_ZCMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1035def V2Rd_ZCMAD : SchedReadAdvance<2, [V2Wr_ZCMAD]>; 1036 1037def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1038def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>; 1039 1040def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; } 1041def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>; 1042def V2Wr_ZMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1043def V2Rd_ZMAD : SchedReadAdvance<2, [V2Wr_ZMAD]>; 1044 1045def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1046def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>; 1047 1048def V2Wr_ZMASQL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1049def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1050def V2Wr_ZMASQD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1051def V2Rd_ZMASQ : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS, 1052 V2Wr_ZMASQD]>; 1053 1054def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1055def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>; 1056 1057def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1058def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>; 1059 1060def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1061def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>; 1062 1063def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1064def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>; 1065def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 1066def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>; 1067def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1068def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>; 1069 1070//===----------------------------------------------------------------------===// 1071// Define types with long resource cycles (rc) 1072 1073def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ReleaseAtCycles = [ 5]; } 1074def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ReleaseAtCycles = [ 7]; } 1075def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; } 1076def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } 1077def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; } 1078def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } 1079def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } 1080def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; } 1081def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; } 1082def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; } 1083def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; } 1084def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; } 1085def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; } 1086 1087// Miscellaneous 1088// ----------------------------------------------------------------------------- 1089 1090def : InstRW<[WriteI], (instrs COPY)>; 1091 1092// §3.3 Branch instructions 1093// ----------------------------------------------------------------------------- 1094 1095// Branch, immed 1096// Compare and branch 1097def : SchedAlias<WriteBr, V2Write_1cyc_1B>; 1098 1099// Branch, register 1100def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>; 1101 1102// Branch and link, immed 1103// Branch and link, register 1104def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>; 1105 1106// §3.4 Arithmetic and Logical Instructions 1107// ----------------------------------------------------------------------------- 1108 1109// ALU, basic 1110// ALU, basic, flagset 1111def : SchedAlias<WriteI, V2Write_1cyc_1I>; 1112def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>; 1113def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>; 1114 1115// ALU, extend and shift 1116def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>; 1117 1118// Arithmetic, LSL shift, shift <= 4 1119// Arithmetic, flagset, LSL shift, shift <= 4 1120// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 1121def : SchedAlias<WriteISReg, V2Write_ArithI>; 1122def : InstRW<[V2Write_ArithF], 1123 (instregex "^(ADD|SUB)S[WX]rs$")>; 1124 1125// Arithmetic, immediate to logical address tag 1126def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>; 1127 1128// Convert floating-point condition flags 1129// Flag manipulation instructions 1130def : WriteRes<WriteSys, []> { let Latency = 1; } 1131 1132// Insert Random Tags 1133def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>; 1134 1135// Insert Tag Mask 1136// Subtract Pointer 1137// Subtract Pointer, flagset 1138def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 1139 1140// Logical, shift, no flagset 1141def : InstRW<[V2Write_1cyc_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>; 1142def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>; 1143 1144// Logical, shift, flagset 1145def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; 1146 1147// Move and shift instructions 1148// ----------------------------------------------------------------------------- 1149 1150def : SchedAlias<WriteImm, V2Write_1cyc_1I>; 1151 1152// §3.5 Divide and multiply instructions 1153// ----------------------------------------------------------------------------- 1154 1155// SDIV, UDIV 1156def : SchedAlias<WriteID32, V2Write_12cyc_1M0>; 1157def : SchedAlias<WriteID64, V2Write_20cyc_1M0>; 1158 1159def : SchedAlias<WriteIM32, V2Write_2cyc_1M>; 1160def : SchedAlias<WriteIM64, V2Write_2cyc_1M>; 1161 1162// Multiply 1163// Multiply accumulate, W-form 1164// Multiply accumulate, X-form 1165def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA], 1166 (instregex "^M(ADD|SUB)[WX]rrr$")>; 1167 1168// Multiply accumulate long 1169// Multiply long 1170def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA], 1171 (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; 1172 1173// Multiply high 1174def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 1175 1176// Pointer Authentication Instructions (v8.3 PAC) 1177// ----------------------------------------------------------------------------- 1178 1179// Authenticate data address 1180// Authenticate instruction address 1181// Compute pointer authentication code for data address 1182// Compute pointer authentication code, using generic key 1183// Compute pointer authentication code for instruction address 1184def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 1185 1186// Branch and link, register, with pointer authentication 1187// Branch, register, with pointer authentication 1188// Branch, return, with pointer authentication 1189def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 1190 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 1191 ERETAA, ERETAB)>; 1192 1193 1194// Load register, with pointer authentication 1195def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 1196 1197// Strip pointer authentication code 1198def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 1199 1200// Miscellaneous data-processing instructions 1201// ----------------------------------------------------------------------------- 1202 1203// Address generation 1204def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>; 1205 1206// Bitfield extract, one reg 1207// Bitfield extract, two regs 1208def : SchedAlias<WriteExtr, V2Write_Extr>; 1209def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>; 1210 1211// Bitfield move, basic 1212def : SchedAlias<WriteIS, V2Write_1cyc_1I>; 1213 1214// Bitfield move, insert 1215def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 1216 1217// Load instructions 1218// ----------------------------------------------------------------------------- 1219 1220// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3. 1221 1222def : SchedAlias<WriteLD, V2Write_4cyc_1L>; 1223def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>; 1224 1225// Load register, literal 1226def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>; 1227 1228// Load pair, signed immed offset, signed words 1229def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>; 1230 1231// Load pair, immed post-index or immed pre-index, signed words 1232def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi], 1233 (instregex "^LDPSW(post|pre)$")>; 1234 1235// Store instructions 1236// ----------------------------------------------------------------------------- 1237 1238// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I. 1239 1240def : SchedAlias<WriteST, V2Write_1cyc_1L01_1D>; 1241def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>; 1242def : SchedAlias<WriteSTP, V2Write_1cyc_1L01_1D>; 1243def : SchedAlias<WriteAdr, V2Write_1cyc_1I>; 1244 1245// Tag load instructions 1246// ----------------------------------------------------------------------------- 1247 1248// Load allocation tag 1249// Load multiple allocation tags 1250def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>; 1251 1252// Tag store instructions 1253// ----------------------------------------------------------------------------- 1254 1255// Store allocation tags to one or two granules, post-index 1256// Store allocation tags to one or two granules, pre-index 1257// Store allocation tag to one or two granules, zeroing, post-index 1258// Store Allocation Tag to one or two granules, zeroing, pre-index 1259// Store allocation tag and reg pair to memory, post-Index 1260// Store allocation tag and reg pair to memory, pre-Index 1261def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 1262 ST2GPreIndex, ST2GPostIndex, 1263 STZGPreIndex, STZGPostIndex, 1264 STZ2GPreIndex, STZ2GPostIndex, 1265 STGPpre, STGPpost)>; 1266 1267// Store allocation tags to one or two granules, signed offset 1268// Store allocation tag to two granules, zeroing, signed offset 1269// Store allocation tag and reg pair to memory, signed offset 1270// Store multiple allocation tags 1271def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi, 1272 STZ2Gi, STGPi, STGM, STZGM)>; 1273 1274// FP data processing instructions 1275// ----------------------------------------------------------------------------- 1276 1277// FP absolute value 1278// FP arithmetic 1279// FP min/max 1280// FP negate 1281// FP select 1282def : SchedAlias<WriteF, V2Write_2cyc_1V>; 1283 1284// FP compare 1285def : SchedAlias<WriteFCmp, V2Write_2cyc_1V0>; 1286 1287// FP divide, square root 1288def : SchedAlias<WriteFDiv, V2Write_7cyc_1V02>; 1289 1290// FP divide, H-form 1291def : InstRW<[V2Write_7cyc_1V02], (instrs FDIVHrr)>; 1292// FP divide, S-form 1293def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>; 1294// FP divide, D-form 1295def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>; 1296 1297// FP square root, H-form 1298def : InstRW<[V2Write_7cyc_1V02], (instrs FSQRTHr)>; 1299// FP square root, S-form 1300def : InstRW<[V2Write_9cyc_1V02], (instrs FSQRTSr)>; 1301// FP square root, D-form 1302def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>; 1303 1304// FP multiply 1305def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; } 1306 1307// FP multiply accumulate 1308def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA], 1309 (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 1310 1311// FP round to integral 1312def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 1313 "^FRINT(32|64)[XZ][SD]r$")>; 1314 1315// FP miscellaneous instructions 1316// ----------------------------------------------------------------------------- 1317 1318// FP convert, from gen to vec reg 1319def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 1320 1321// FP convert, from vec to gen reg 1322def : InstRW<[V2Write_3cyc_1V01], 1323 (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>; 1324 1325// FP convert, Javascript from vec to gen reg 1326def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>; 1327 1328// FP convert, from vec to vec reg 1329def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr, 1330 FCVTHDr, FCVTSDr, FCVTXNv1i64)>; 1331 1332// FP move, immed 1333// FP move, register 1334def : SchedAlias<WriteFImm, V2Write_2cyc_1V>; 1335 1336// FP transfer, from gen to low half of vec reg 1337def : InstRW<[V2Write_0or3cyc_1M0], 1338 (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 1339 1340// FP transfer, from gen to high half of vec reg 1341def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 1342 1343// FP transfer, from vec to gen reg 1344def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>; 1345 1346// FP load instructions 1347// ----------------------------------------------------------------------------- 1348 1349// Load vector reg, literal, S/D/Q forms 1350def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>; 1351 1352// Load vector reg, unscaled immed 1353def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>; 1354 1355// Load vector reg, immed post-index 1356// Load vector reg, immed pre-index 1357def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L], 1358 (instregex "^LDR[BHSDQ](pre|post)$")>; 1359 1360// Load vector reg, unsigned immed 1361def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 1362 1363// Load vector reg, register offset, basic 1364// Load vector reg, register offset, scale, S/D-form 1365// Load vector reg, register offset, scale, H/Q-form 1366// Load vector reg, register offset, extend 1367// Load vector reg, register offset, extend, scale, S/D-form 1368// Load vector reg, register offset, extend, scale, H/Q-form 1369def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>; 1370 1371// Load vector pair, immed offset, S/D-form 1372def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 1373 1374// Load vector pair, immed offset, Q-form 1375def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 1376 1377// Load vector pair, immed post-index, S/D-form 1378// Load vector pair, immed pre-index, S/D-form 1379def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi], 1380 (instregex "^LDP[SD](pre|post)$")>; 1381 1382// Load vector pair, immed post-index, Q-form 1383// Load vector pair, immed pre-index, Q-form 1384def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost, 1385 LDPQpre)>; 1386 1387// FP store instructions 1388// ----------------------------------------------------------------------------- 1389 1390// Store vector reg, unscaled immed, B/H/S/D-form 1391// Store vector reg, unscaled immed, Q-form 1392def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 1393 1394// Store vector reg, immed post-index, B/H/S/D-form 1395// Store vector reg, immed post-index, Q-form 1396// Store vector reg, immed pre-index, B/H/S/D-form 1397// Store vector reg, immed pre-index, Q-form 1398def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I], 1399 (instregex "^STR[BHSDQ](pre|post)$")>; 1400 1401// Store vector reg, unsigned immed, B/H/S/D-form 1402// Store vector reg, unsigned immed, Q-form 1403def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 1404 1405// Store vector reg, register offset, basic, B/H/S/D-form 1406// Store vector reg, register offset, basic, Q-form 1407// Store vector reg, register offset, scale, H-form 1408// Store vector reg, register offset, scale, S/D-form 1409// Store vector reg, register offset, scale, Q-form 1410// Store vector reg, register offset, extend, B/H/S/D-form 1411// Store vector reg, register offset, extend, Q-form 1412// Store vector reg, register offset, extend, scale, H-form 1413// Store vector reg, register offset, extend, scale, S/D-form 1414// Store vector reg, register offset, extend, scale, Q-form 1415def : InstRW<[V2Write_StrHQ, ReadAdrBase], 1416 (instregex "^STR[BHSDQ]ro[WX]$")>; 1417 1418// Store vector pair, immed offset, S-form 1419// Store vector pair, immed offset, D-form 1420def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>; 1421 1422// Store vector pair, immed offset, Q-form 1423def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>; 1424 1425// Store vector pair, immed post-index, S-form 1426// Store vector pair, immed post-index, D-form 1427// Store vector pair, immed pre-index, S-form 1428// Store vector pair, immed pre-index, D-form 1429def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I], 1430 (instregex "^STP[SD](pre|post)$")>; 1431 1432// Store vector pair, immed post-index, Q-form 1433def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>; 1434 1435// Store vector pair, immed pre-index, Q-form 1436def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>; 1437 1438// ASIMD integer instructions 1439// ----------------------------------------------------------------------------- 1440 1441// ASIMD absolute diff 1442// ASIMD absolute diff long 1443// ASIMD arith, basic 1444// ASIMD arith, complex 1445// ASIMD arith, pair-wise 1446// ASIMD compare 1447// ASIMD logical 1448// ASIMD max/min, basic and pair-wise 1449def : SchedAlias<WriteVd, V2Write_2cyc_1V>; 1450def : SchedAlias<WriteVq, V2Write_2cyc_1V>; 1451 1452// ASIMD absolute diff accum 1453// ASIMD absolute diff accum long 1454def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>; 1455 1456// ASIMD arith, reduce, 4H/4S 1457def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 1458 1459// ASIMD arith, reduce, 8B/8H 1460def : InstRW<[V2Write_4cyc_1V13_1V], 1461 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 1462 1463// ASIMD arith, reduce, 16B 1464def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>; 1465 1466// ASIMD dot product 1467// ASIMD dot product using signed and unsigned integers 1468def : InstRW<[V2Wr_VDOT, V2Rd_VDOT], 1469 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 1470 1471// ASIMD matrix multiply-accumulate 1472def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>; 1473 1474// ASIMD max/min, reduce, 4H/4S 1475def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$", 1476 "^[SU](MAX|MIN)Vv4i32v$")>; 1477 1478// ASIMD max/min, reduce, 8B/8H 1479def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 1480 "^[SU](MAX|MIN)Vv8i16v$")>; 1481 1482// ASIMD max/min, reduce, 16B 1483def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 1484 1485// ASIMD multiply 1486def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>; 1487 1488// ASIMD multiply accumulate 1489def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>; 1490 1491// ASIMD multiply accumulate high 1492def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 1493 1494// ASIMD multiply accumulate long 1495def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 1496 1497// ASIMD multiply accumulate saturating long 1498def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>; 1499 1500// ASIMD multiply/multiply long (8x8) polynomial, D-form 1501// ASIMD multiply/multiply long (8x8) polynomial, Q-form 1502def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>; 1503 1504// ASIMD multiply long 1505def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>; 1506 1507// ASIMD pairwise add and accumulate long 1508def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>; 1509 1510// ASIMD shift accumulate 1511def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>; 1512 1513// ASIMD shift by immed, basic 1514def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv", 1515 "^SSHLLv", "^SSHR[dv]", "^USHLLv", 1516 "^USHR[dv]")>; 1517 1518// ASIMD shift by immed and insert, basic 1519def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>; 1520 1521// ASIMD shift by immed, complex 1522def : InstRW<[V2Write_4cyc_1V13], 1523 (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$", 1524 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1525 "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]", 1526 "^UQSHRN[bhsv]", "^URSHR[dv]")>; 1527 1528// ASIMD shift by register, basic 1529def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>; 1530 1531// ASIMD shift by register, complex 1532def : InstRW<[V2Write_4cyc_1V13], 1533 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1534 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1535 1536// ASIMD floating-point instructions 1537// ----------------------------------------------------------------------------- 1538 1539// ASIMD FP absolute value/difference 1540// ASIMD FP arith, normal 1541// ASIMD FP compare 1542// ASIMD FP complex add 1543// ASIMD FP max/min, normal 1544// ASIMD FP max/min, pairwise 1545// ASIMD FP negate 1546// Handled by SchedAlias<WriteV[dq], ...> 1547 1548// ASIMD FP complex multiply add 1549def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>; 1550 1551// ASIMD FP convert, long (F16 to F32) 1552def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>; 1553 1554// ASIMD FP convert, long (F32 to F64) 1555def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>; 1556 1557// ASIMD FP convert, narrow (F32 to F16) 1558def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>; 1559 1560// ASIMD FP convert, narrow (F64 to F32) 1561def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32", 1562 "^FCVTXN(v2|v4)f32")>; 1563 1564// ASIMD FP convert, other, D-form F32 and Q-form F64 1565def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$", 1566 "^FCVT[AMNPZ][SU]v1i64$", 1567 "^FCVTZ[SU]d$", 1568 "^[SU]CVTFv2f(32|64)$", 1569 "^[SU]CVTFv1i64$", 1570 "^[SU]CVTFd$")>; 1571 1572// ASIMD FP convert, other, D-form F16 and Q-form F32 1573def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$", 1574 "^FCVT[AMNPZ][SU]v1i32$", 1575 "^FCVTZ[SU]s$", 1576 "^[SU]CVTFv4f(16|32)$", 1577 "^[SU]CVTFv1i32$", 1578 "^[SU]CVTFs$")>; 1579 1580// ASIMD FP convert, other, Q-form F16 1581def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$", 1582 "^FCVT[AMNPZ][SU]v1f16$", 1583 "^FCVTZ[SU]h$", 1584 "^[SU]CVTFv8f16$", 1585 "^[SU]CVTFv1i16$", 1586 "^[SU]CVTFh$")>; 1587 1588// ASIMD FP divide, D-form, F16 1589def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>; 1590 1591// ASIMD FP divide, D-form, F32 1592def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>; 1593 1594// ASIMD FP divide, Q-form, F16 1595def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>; 1596 1597// ASIMD FP divide, Q-form, F32 1598def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>; 1599 1600// ASIMD FP divide, Q-form, F64 1601def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>; 1602 1603// ASIMD FP max/min, reduce, F32 and D-form F16 1604def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1605 1606// ASIMD FP max/min, reduce, Q-form F16 1607def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1608 1609// ASIMD FP multiply 1610def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>; 1611 1612// ASIMD FP multiply accumulate 1613def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>; 1614 1615// ASIMD FP multiply accumulate long 1616def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>; 1617 1618// ASIMD FP round, D-form F32 and Q-form F64 1619def : InstRW<[V2Write_3cyc_1V02], 1620 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1621 "^FRINT(32|64)[XZ]v2f(32|64)$")>; 1622 1623// ASIMD FP round, D-form F16 and Q-form F32 1624def : InstRW<[V2Write_4cyc_2V02], 1625 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1626 "^FRINT(32|64)[XZ]v4f32$")>; 1627 1628// ASIMD FP round, Q-form F16 1629def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1630 1631// ASIMD FP square root, D-form, F16 1632def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>; 1633 1634// ASIMD FP square root, D-form, F32 1635def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>; 1636 1637// ASIMD FP square root, Q-form, F16 1638def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>; 1639 1640// ASIMD FP square root, Q-form, F32 1641def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>; 1642 1643// ASIMD FP square root, Q-form, F64 1644def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>; 1645 1646// ASIMD BFloat16 (BF16) instructions 1647// ----------------------------------------------------------------------------- 1648 1649// ASIMD convert, F32 to BF16 1650def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>; 1651 1652// ASIMD dot product 1653def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1654 1655// ASIMD matrix multiply accumulate 1656def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>; 1657 1658// ASIMD multiply accumulate long 1659def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1660 BFMLALTIdx)>; 1661 1662// Scalar convert, F32 to BF16 1663def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>; 1664 1665// ASIMD miscellaneous instructions 1666// ----------------------------------------------------------------------------- 1667 1668// ASIMD bit reverse 1669// ASIMD bitwise insert 1670// ASIMD count 1671// ASIMD duplicate, element 1672// ASIMD extract 1673// ASIMD extract narrow 1674// ASIMD insert, element to element 1675// ASIMD move, FP immed 1676// ASIMD move, integer immed 1677// ASIMD reverse 1678// ASIMD table lookup extension, 1 table reg 1679// ASIMD transpose 1680// ASIMD unzip/zip 1681// Handled by SchedAlias<WriteV[dq], ...> 1682def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>; 1683 1684// ASIMD duplicate, gen reg 1685def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1686 1687// ASIMD extract narrow, saturating 1688def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1689 1690// ASIMD reciprocal and square root estimate, D-form U32 1691def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>; 1692 1693// ASIMD reciprocal and square root estimate, Q-form U32 1694def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>; 1695 1696// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1697def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32, 1698 FRECPEv1i64, FRECPEv2f32, 1699 FRSQRTEv1f16, FRSQRTEv1i32, 1700 FRSQRTEv1i64, FRSQRTEv2f32)>; 1701 1702// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1703def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32, 1704 FRSQRTEv4f16, FRSQRTEv4f32)>; 1705 1706// ASIMD reciprocal and square root estimate, Q-form F16 1707def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1708 1709// ASIMD reciprocal exponent 1710def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>; 1711 1712// ASIMD reciprocal step 1713def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)", 1714 "^FRSQRTS(32|64|v)")>; 1715 1716// ASIMD table lookup, 1 or 2 table regs 1717def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One, 1718 TBLv8i8Two, TBLv16i8Two)>; 1719 1720// ASIMD table lookup, 3 table regs 1721def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>; 1722 1723// ASIMD table lookup, 4 table regs 1724def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1725 1726// ASIMD table lookup extension, 2 table reg 1727def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1728 1729// ASIMD table lookup extension, 3 table reg 1730def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1731 1732// ASIMD table lookup extension, 4 table reg 1733def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1734 1735// ASIMD transfer, element to gen reg 1736def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>; 1737 1738// ASIMD transfer, gen reg to element 1739def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1740 1741// ASIMD load instructions 1742// ----------------------------------------------------------------------------- 1743 1744// ASIMD load, 1 element, multiple, 1 reg, D-form 1745def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1746def : InstRW<[WriteAdr, V2Write_6cyc_1L], 1747 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1748 1749// ASIMD load, 1 element, multiple, 1 reg, Q-form 1750def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1751def : InstRW<[WriteAdr, V2Write_6cyc_1L], 1752 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1753 1754// ASIMD load, 1 element, multiple, 2 reg, D-form 1755def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1756def : InstRW<[WriteAdr, V2Write_6cyc_2L], 1757 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1758 1759// ASIMD load, 1 element, multiple, 2 reg, Q-form 1760def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1761def : InstRW<[WriteAdr, V2Write_6cyc_2L], 1762 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1763 1764// ASIMD load, 1 element, multiple, 3 reg, D-form 1765def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1766def : InstRW<[WriteAdr, V2Write_6cyc_3L], 1767 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1768 1769// ASIMD load, 1 element, multiple, 3 reg, Q-form 1770def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1771def : InstRW<[WriteAdr, V2Write_6cyc_3L], 1772 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1773 1774// ASIMD load, 1 element, multiple, 4 reg, D-form 1775def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1776def : InstRW<[WriteAdr, V2Write_7cyc_4L], 1777 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1778 1779// ASIMD load, 1 element, multiple, 4 reg, Q-form 1780def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1781def : InstRW<[WriteAdr, V2Write_7cyc_4L], 1782 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1783 1784// ASIMD load, 1 element, one lane, B/H/S 1785// ASIMD load, 1 element, one lane, D 1786def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1787def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>; 1788 1789// ASIMD load, 1 element, all lanes, D-form, B/H/S 1790// ASIMD load, 1 element, all lanes, D-form, D 1791def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1792def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1793 1794// ASIMD load, 1 element, all lanes, Q-form 1795def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1796def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1797 1798// ASIMD load, 2 element, multiple, D-form, B/H/S 1799def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1800def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1801 1802// ASIMD load, 2 element, multiple, Q-form, B/H/S 1803// ASIMD load, 2 element, multiple, Q-form, D 1804def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1805def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1806 1807// ASIMD load, 2 element, one lane, B/H 1808// ASIMD load, 2 element, one lane, S 1809// ASIMD load, 2 element, one lane, D 1810def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1811def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>; 1812 1813// ASIMD load, 2 element, all lanes, D-form, B/H/S 1814// ASIMD load, 2 element, all lanes, D-form, D 1815def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1816def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1817 1818// ASIMD load, 2 element, all lanes, Q-form 1819def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1820def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1821 1822// ASIMD load, 3 element, multiple, D-form, B/H/S 1823def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1824def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1825 1826// ASIMD load, 3 element, multiple, Q-form, B/H/S 1827// ASIMD load, 3 element, multiple, Q-form, D 1828def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>; 1829def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; 1830 1831// ASIMD load, 3 element, one lane, B/H 1832// ASIMD load, 3 element, one lane, S 1833// ASIMD load, 3 element, one lane, D 1834def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1835def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>; 1836 1837// ASIMD load, 3 element, all lanes, D-form, B/H/S 1838// ASIMD load, 3 element, all lanes, D-form, D 1839def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1840def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1841 1842// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1843// ASIMD load, 3 element, all lanes, Q-form, D 1844def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1845def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1846 1847// ASIMD load, 4 element, multiple, D-form, B/H/S 1848def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1849def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1850 1851// ASIMD load, 4 element, multiple, Q-form, B/H/S 1852// ASIMD load, 4 element, multiple, Q-form, D 1853def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1854def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1855 1856// ASIMD load, 4 element, one lane, B/H 1857// ASIMD load, 4 element, one lane, S 1858// ASIMD load, 4 element, one lane, D 1859def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1860def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>; 1861 1862// ASIMD load, 4 element, all lanes, D-form, B/H/S 1863// ASIMD load, 4 element, all lanes, D-form, D 1864def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1865def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1866 1867// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1868// ASIMD load, 4 element, all lanes, Q-form, D 1869def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1870def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1871 1872// ASIMD store instructions 1873// ----------------------------------------------------------------------------- 1874 1875// ASIMD store, 1 element, multiple, 1 reg, D-form 1876def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1877def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1878 1879// ASIMD store, 1 element, multiple, 1 reg, Q-form 1880def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1881def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1882 1883// ASIMD store, 1 element, multiple, 2 reg, D-form 1884def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1885def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1886 1887// ASIMD store, 1 element, multiple, 2 reg, Q-form 1888def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1889def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1890 1891// ASIMD store, 1 element, multiple, 3 reg, D-form 1892def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1893def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1894 1895// ASIMD store, 1 element, multiple, 3 reg, Q-form 1896def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1897def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1898 1899// ASIMD store, 1 element, multiple, 4 reg, D-form 1900def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1901def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1902 1903// ASIMD store, 1 element, multiple, 4 reg, Q-form 1904def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1905def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1906 1907// ASIMD store, 1 element, one lane, B/H/S 1908// ASIMD store, 1 element, one lane, D 1909def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>; 1910def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>; 1911 1912// ASIMD store, 2 element, multiple, D-form, B/H/S 1913def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>; 1914def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1915 1916// ASIMD store, 2 element, multiple, Q-form, B/H/S 1917// ASIMD store, 2 element, multiple, Q-form, D 1918def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1919def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1920 1921// ASIMD store, 2 element, one lane, B/H/S 1922// ASIMD store, 2 element, one lane, D 1923def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>; 1924def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>; 1925 1926// ASIMD store, 3 element, multiple, D-form, B/H/S 1927def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>; 1928def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1929 1930// ASIMD store, 3 element, multiple, Q-form, B/H/S 1931// ASIMD store, 3 element, multiple, Q-form, D 1932def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1933def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1934 1935// ASIMD store, 3 element, one lane, B/H 1936// ASIMD store, 3 element, one lane, S 1937// ASIMD store, 3 element, one lane, D 1938def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>; 1939def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>; 1940 1941// ASIMD store, 4 element, multiple, D-form, B/H/S 1942def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>; 1943def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1944 1945// ASIMD store, 4 element, multiple, Q-form, B/H/S 1946def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>; 1947def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1948 1949// ASIMD store, 4 element, multiple, Q-form, D 1950def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>; 1951def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>; 1952 1953// ASIMD store, 4 element, one lane, B/H/S 1954def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>; 1955def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>; 1956 1957// ASIMD store, 4 element, one lane, D 1958def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>; 1959def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)_POST$")>; 1960 1961// Cryptography extensions 1962// ----------------------------------------------------------------------------- 1963 1964// Crypto AES ops 1965def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1966 1967// Crypto polynomial (64x64) multiply long 1968def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>; 1969 1970// Crypto SHA1 hash acceleration op 1971// Crypto SHA1 schedule acceleration ops 1972def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1973 1974// Crypto SHA1 hash acceleration ops 1975// Crypto SHA256 hash acceleration ops 1976def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1977 1978// Crypto SHA256 schedule acceleration ops 1979def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1980 1981// Crypto SHA512 hash acceleration ops 1982def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1983 1984// Crypto SHA3 ops 1985def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1986 1987// Crypto SM3 ops 1988def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1989 "^SM3TT[12][AB]$")>; 1990 1991// Crypto SM4 ops 1992def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1993 1994// CRC 1995// ----------------------------------------------------------------------------- 1996 1997def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>; 1998 1999// SVE Predicate instructions 2000// ----------------------------------------------------------------------------- 2001 2002// Loop control, based on predicate 2003def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 2004 BRKB_PPmP, BRKB_PPzP)>; 2005 2006// Loop control, based on predicate and flag setting 2007def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 2008 2009// Loop control, propagating 2010def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, 2011 BRKPB_PPzPP)>; 2012 2013// Loop control, propagating and flag setting 2014def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 2015 BRKPBS_PPzPP)>; 2016 2017// Loop control, based on GPR 2018def : InstRW<[V2Write_3cyc_2M], 2019 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; 2020def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; 2021 2022// Loop terminate 2023def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; 2024 2025// Predicate counting scalar 2026def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 2027def : InstRW<[V2Write_2cyc_1M], 2028 (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI", 2029 "^SQ(DEC|INC)[BHWD]_XPiWdI", 2030 "^UQ(DEC|INC)[BHWD]_WPiI")>; 2031 2032// Predicate counting scalar, ALL, {1,2,4} 2033def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>; 2034 2035// Predicate counting scalar, active predicate 2036def : InstRW<[V2Write_2cyc_1M], 2037 (instregex "^CNTP_XPP_[BHSD]", 2038 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", 2039 "^(UQDEC|UQINC)P_WP_[BHSD]", 2040 "^(SQDEC|SQINC)P_XPWd_[BHSD]")>; 2041 2042// Predicate counting vector, active predicate 2043def : InstRW<[V2Write_7cyc_1M_1M0_1V], 2044 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; 2045 2046// Predicate logical 2047def : InstRW<[V2Write_1or2cyc_1M0], 2048 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; 2049 2050// Predicate logical, flag setting 2051def : InstRW<[V2Write_1or2cyc_1M0_1M], 2052 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; 2053 2054// Predicate reverse 2055def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>; 2056 2057// Predicate select 2058def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>; 2059 2060// Predicate set 2061def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; 2062 2063// Predicate set/initialize, set flags 2064def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>; 2065 2066// Predicate find first/next 2067def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; 2068 2069// Predicate test 2070def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>; 2071 2072// Predicate transpose 2073def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>; 2074 2075// Predicate unpack and widen 2076def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 2077 2078// Predicate zip/unzip 2079def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>; 2080 2081// SVE integer instructions 2082// ----------------------------------------------------------------------------- 2083 2084// Arithmetic, absolute diff 2085def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]", 2086 "^[SU]ABD_ZPZZ_[BHSD]")>; 2087 2088// Arithmetic, absolute diff accum 2089def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; 2090 2091// Arithmetic, absolute diff accum long 2092def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; 2093 2094// Arithmetic, absolute diff long 2095def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; 2096 2097// Arithmetic, basic 2098def : InstRW<[V2Write_2cyc_1V], 2099 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 2100 "^(ADD|SUB)_ZZZ_[BHSD]", 2101 "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", 2102 "^(ADD|SUB|SUBR)_ZI_[BHSD]", 2103 "^ADR_[SU]XTW_ZZZ_D_[0123]", 2104 "^ADR_LSL_ZZZ_[SD]_[0123]", 2105 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", 2106 "^SADDLBT_ZZZ_[HSD]", 2107 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", 2108 "^SSUBL(BT|TB)_ZZZ_[HSD]")>; 2109 2110// Arithmetic, complex 2111def : InstRW<[V2Write_2cyc_1V], 2112 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", 2113 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 2114 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", 2115 "^[SU]Q(ADD|SUB)_ZI_[BHSD]", 2116 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", 2117 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; 2118 2119// Arithmetic, large integer 2120def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; 2121 2122// Arithmetic, pairwise add 2123def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>; 2124 2125// Arithmetic, pairwise add and accum long 2126def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA], 2127 (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; 2128 2129// Arithmetic, shift 2130def : InstRW<[V2Write_2cyc_1V13], 2131 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", 2132 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", 2133 "^(ASR|LSL|LSR)_ZPmI_[BHSD]", 2134 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", 2135 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 2136 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 2137 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 2138 2139// Arithmetic, shift and accumulate 2140def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>; 2141 2142// Arithmetic, shift by immediate 2143def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]", 2144 "^[SU]SHLL[BT]_ZZI_[HSD]")>; 2145 2146// Arithmetic, shift by immediate and insert 2147def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>; 2148 2149// Arithmetic, shift complex 2150def : InstRW<[V2Write_4cyc_1V13], 2151 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", 2152 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]", 2153 "^[SU]QR?SHL_ZPZZ_[BHSD]", 2154 "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", 2155 "^SQSHRU?N[BT]_ZZI_[BHS]", 2156 "^UQR?SHRN[BT]_ZZI_[BHS]")>; 2157 2158// Arithmetic, shift right for divide 2159def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; 2160 2161// Arithmetic, shift rounding 2162def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]", 2163 "^[SU]RSHL_ZPZZ_[BHSD]", 2164 "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>; 2165 2166// Bit manipulation 2167def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>; 2168 2169// Bitwise select 2170def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; 2171 2172// Count/reverse bits 2173def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; 2174 2175// Broadcast logical bitmask immediate to vector 2176def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>; 2177 2178// Compare and set flags 2179def : InstRW<[V2Write_4or5cyc_1V0_1M0], 2180 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", 2181 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; 2182 2183// Complex add 2184def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>; 2185 2186// Complex dot product 8-bit element 2187def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 2188 2189// Complex dot product 16-bit element 2190def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 2191 2192// Complex multiply-add B, H, S element size 2193def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]", 2194 "^CMLA_ZZZI_[HS]")>; 2195 2196// Complex multiply-add D element size 2197def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>; 2198 2199// Conditional extract operations, scalar form 2200def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; 2201 2202// Conditional extract operations, SIMD&FP scalar and vector forms 2203def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", 2204 "^COMPACT_ZPZ_[SD]", 2205 "^SPLICE_ZPZZ?_[BHSD]")>; 2206 2207// Convert to floating point, 64b to float or convert to double 2208def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 2209 "^[SU]CVTF_ZPmZ_StoD")>; 2210 2211// Convert to floating point, 32b to single or half 2212def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; 2213 2214// Convert to floating point, 16b to half 2215def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; 2216 2217// Copy, scalar 2218def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>; 2219 2220// Copy, scalar SIMD&FP or imm 2221def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]", 2222 "^CPY_ZPzI_[BHSD]")>; 2223 2224// Divides, 32 bit 2225def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", 2226 "^[SU]DIV_ZPZZ_S")>; 2227 2228// Divides, 64 bit 2229def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", 2230 "^[SU]DIV_ZPZZ_D")>; 2231 2232// Dot product, 8 bit 2233def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>; 2234 2235// Dot product, 8 bit, using signed and unsigned integers 2236def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 2237 2238// Dot product, 16 bit 2239def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>; 2240 2241// Duplicate, immediate and indexed form 2242def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]", 2243 "^DUP_ZZI_[BHSDQ]")>; 2244 2245// Duplicate, scalar form 2246def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>; 2247 2248// Extend, sign or zero 2249def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]", 2250 "^[SU]XTH_ZPmZ_[SD]", 2251 "^[SU]XTW_ZPmZ_[D]")>; 2252 2253// Extract 2254def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 2255 2256// Extract narrow saturating 2257def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", 2258 "^SQXTUN[BT]_ZZ_[BHS]")>; 2259 2260// Extract/insert operation, SIMD and FP scalar form 2261def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]", 2262 "^INSR_ZV_[BHSD]")>; 2263 2264// Extract/insert operation, scalar 2265def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]", 2266 "^INSR_ZR_[BHSD]")>; 2267 2268// Histogram operations 2269def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]", 2270 "^HISTSEG_ZZZ")>; 2271 2272// Horizontal operations, B, H, S form, immediate operands only 2273def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>; 2274 2275// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 2276// operands only / immediate, scalar operands 2277def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; 2278 2279// Horizontal operations, D form, immediate operands only 2280def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>; 2281 2282// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 2283// only / immediate, scalar operands 2284def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>; 2285 2286// Logical 2287def : InstRW<[V2Write_2cyc_1V], 2288 (instregex "^(AND|EOR|ORR)_ZI", 2289 "^(AND|BIC|EOR|ORR)_ZZZ", 2290 "^EOR(BT|TB)_ZZZ_[BHSD]", 2291 "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]", 2292 "^NOT_ZPmZ_[BHSD]")>; 2293 2294// Max/min, basic and pairwise 2295def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", 2296 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]", 2297 "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>; 2298 2299// Matching operations 2300// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the 2301// latency for this instruction is 4 cycles. 2302def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>; 2303 2304// Matrix multiply-accumulate 2305def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 2306 2307// Move prefix 2308def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", 2309 "^MOVPRFX_ZZ")>; 2310 2311// Multiply, B, H, S element size 2312def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", 2313 "^MUL_ZPZZ_[BHS]", 2314 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", 2315 "^[SU]MULH_ZPZZ_[BHS]")>; 2316 2317// Multiply, D element size 2318def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", 2319 "^MUL_ZPZZ_D", 2320 "^[SU]MULH_(ZPmZ|ZZZ)_D", 2321 "^[SU]MULH_ZPZZ_D")>; 2322 2323// Multiply long 2324def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", 2325 "^[SU]MULL[BT]_ZZZ_[HSD]")>; 2326 2327// Multiply accumulate, B, H, S element size 2328def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS], 2329 (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>; 2330def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS], 2331 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; 2332 2333// Multiply accumulate, D element size 2334def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD], 2335 (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>; 2336def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD], 2337 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; 2338 2339// Multiply accumulate long 2340def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", 2341 "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; 2342 2343// Multiply accumulate saturating doubling long regular 2344def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ], 2345 (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]", 2346 "^SQDML[AS]L[BT]_ZZZI_[SD]")>; 2347 2348// Multiply saturating doubling high, B, H, S element size 2349def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]", 2350 "^SQDMULH_ZZZI_[HS]")>; 2351 2352// Multiply saturating doubling high, D element size 2353def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 2354 2355// Multiply saturating doubling long 2356def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", 2357 "^SQDMULL[BT]_ZZZI_[SD]")>; 2358 2359// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 2360// element size 2361def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", 2362 "^SQRDCMLAH_ZZZ_[BHS]", 2363 "^SQRDML[AS]H_ZZZI_[HS]", 2364 "^SQRDCMLAH_ZZZI_[HS]")>; 2365 2366// Multiply saturating rounding doubling regular/complex accumulate, D element 2367// size 2368def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D", 2369 "^SQRDCMLAH_ZZZ_D")>; 2370 2371// Multiply saturating rounding doubling regular/complex, B, H, S element size 2372def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]", 2373 "^SQRDMULH_ZZZI_[HS]")>; 2374 2375// Multiply saturating rounding doubling regular/complex, D element size 2376def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>; 2377 2378// Multiply/multiply long, (8x8) polynomial 2379def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B", 2380 "^PMULL[BT]_ZZZ_[HDQ]")>; 2381 2382// Predicate counting vector 2383def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>; 2384 2385// Reciprocal estimate 2386def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; 2387 2388// Reduction, arithmetic, B form 2389def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 2390 2391// Reduction, arithmetic, H form 2392def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 2393 2394// Reduction, arithmetic, S form 2395def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 2396 2397// Reduction, arithmetic, D form 2398def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 2399 2400// Reduction, logical 2401def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>; 2402 2403// Reverse, vector 2404def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]", 2405 "^REVB_ZPmZ_[HSD]", 2406 "^REVH_ZPmZ_[SD]", 2407 "^REVW_ZPmZ_D")>; 2408 2409// Select, vector form 2410def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>; 2411 2412// Table lookup 2413def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>; 2414 2415// Table lookup extension 2416def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>; 2417 2418// Transpose, vector form 2419def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; 2420 2421// Unpack and extend 2422def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; 2423 2424// Zip/unzip 2425def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; 2426 2427// SVE floating-point instructions 2428// ----------------------------------------------------------------------------- 2429 2430// Floating point absolute value/difference 2431def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]", 2432 "^FABD_ZPZZ_[HSD]", 2433 "^FABS_ZPmZ_[HSD]")>; 2434 2435// Floating point arithmetic 2436def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", 2437 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", 2438 "^FADDP_ZPmZZ_[HSD]", 2439 "^FNEG_ZPmZ_[HSD]", 2440 "^FSUBR_ZPm[IZ]_[HSD]", 2441 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; 2442 2443// Floating point associative add, F16 2444def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>; 2445 2446// Floating point associative add, F32 2447def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>; 2448 2449// Floating point associative add, F64 2450def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 2451 2452// Floating point compare 2453def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]", 2454 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", 2455 "^FCM(LE|LT)_PPzZ0_[HSD]", 2456 "^FCMUO_PPzZZ_[HSD]")>; 2457 2458// Floating point complex add 2459def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>; 2460 2461// Floating point complex multiply add 2462def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; 2463def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; 2464 2465// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 2466def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", 2467 "^FCVTLT_ZPmZ_HtoS", 2468 "^FCVTNT_ZPmZ_StoH")>; 2469 2470// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 2471// or F64 to F16) 2472def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", 2473 "^FCVTLT_ZPmZ_StoD", 2474 "^FCVTNT_ZPmZ_DtoS")>; 2475 2476// Floating point convert, round to odd 2477def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 2478 2479// Floating point base2 log, F16 2480def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; 2481 2482// Floating point base2 log, F32 2483def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; 2484 2485// Floating point base2 log, F64 2486def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; 2487 2488// Floating point convert to integer, F16 2489def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; 2490 2491// Floating point convert to integer, F32 2492def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; 2493 2494// Floating point convert to integer, F64 2495def : InstRW<[V2Write_3cyc_1V02], 2496 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; 2497 2498// Floating point copy 2499def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]", 2500 "^FDUP_ZI_[HSD]")>; 2501 2502// Floating point divide, F16 2503def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; 2504 2505// Floating point divide, F32 2506def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; 2507 2508// Floating point divide, F64 2509def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; 2510 2511// Floating point min/max pairwise 2512def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; 2513 2514// Floating point min/max 2515def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", 2516 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; 2517 2518// Floating point multiply 2519def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", 2520 "^FMULX_ZPZZ_[HSD]", 2521 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", 2522 "^FMUL_ZPZ[IZ]_[HSD]")>; 2523 2524// Floating point multiply accumulate 2525def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA], 2526 (instregex "^FN?ML[AS]_ZPmZZ_[HSD]", 2527 "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>; 2528def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA], 2529 (instregex "^FML[AS]_ZZZI_[HSD]", 2530 "^FN?ML[AS]_ZPZZZ_[HSD]")>; 2531 2532// Floating point multiply add/sub accumulate long 2533def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; 2534 2535// Floating point reciprocal estimate, F16 2536def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>; 2537 2538// Floating point reciprocal estimate, F32 2539def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>; 2540 2541// Floating point reciprocal estimate, F64 2542def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>; 2543 2544// Floating point reciprocal step 2545def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; 2546 2547// Floating point reduction, F16 2548def : InstRW<[V2Write_8cyc_4V], 2549 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>; 2550 2551// Floating point reduction, F32 2552def : InstRW<[V2Write_6cyc_3V], 2553 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>; 2554 2555// Floating point reduction, F64 2556def : InstRW<[V2Write_4cyc_2V], 2557 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>; 2558 2559// Floating point round to integral, F16 2560def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; 2561 2562// Floating point round to integral, F32 2563def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; 2564 2565// Floating point round to integral, F64 2566def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; 2567 2568// Floating point square root, F16 2569def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; 2570 2571// Floating point square root, F32 2572def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; 2573 2574// Floating point square root, F64 2575def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; 2576 2577// Floating point trigonometric exponentiation 2578def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; 2579 2580// Floating point trigonometric multiply add 2581def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>; 2582 2583// Floating point trigonometric, miscellaneous 2584def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>; 2585 2586// SVE BFloat16 (BF16) instructions 2587// ----------------------------------------------------------------------------- 2588 2589// Convert, F32 to BF16 2590def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2591 2592// Dot product 2593def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2594 2595// Matrix multiply accumulate 2596def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>; 2597 2598// Multiply accumulate long 2599def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>; 2600 2601// SVE Load instructions 2602// ----------------------------------------------------------------------------- 2603 2604// Load vector 2605def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>; 2606 2607// Load predicate 2608def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2609 2610// Contiguous load, scalar + imm 2611def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$", 2612 "^LD1S?B_[HSD]_IMM$", 2613 "^LD1S?H_[SD]_IMM$", 2614 "^LD1S?W_D_IMM$" )>; 2615// Contiguous load, scalar + scalar 2616def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$", 2617 "^LD1S?B_[HSD]$", 2618 "^LD1S?H_[SD]$", 2619 "^LD1S?W_D$" )>; 2620 2621// Contiguous load broadcast, scalar + imm 2622def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2623 "^LD1RS?B_[HSD]_IMM$", 2624 "^LD1RS?H_[SD]_IMM$", 2625 "^LD1RW_D_IMM$", 2626 "^LD1RSW_IMM$", 2627 "^LD1RQ_[BHWD]_IMM$")>; 2628 2629// Contiguous load broadcast, scalar + scalar 2630def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2631 2632// Non temporal load, scalar + imm 2633// Non temporal load, scalar + scalar 2634def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>; 2635 2636// Non temporal gather load, vector + scalar 32-bit element size 2637def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", 2638 "^LDNT1S[BH]_ZZR_S_REAL$")>; 2639 2640// Non temporal gather load, vector + scalar 64-bit element size 2641def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; 2642def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; 2643 2644// Contiguous first faulting load, scalar + scalar 2645def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", 2646 "^LDFF1S?B_[HSD]_REAL$", 2647 "^LDFF1S?H_[SD]_REAL$", 2648 "^LDFF1S?W_D_REAL$")>; 2649 2650// Contiguous non faulting load, scalar + imm 2651def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", 2652 "^LDNF1S?B_[HSD]_IMM_REAL$", 2653 "^LDNF1S?H_[SD]_IMM_REAL$", 2654 "^LDNF1S?W_D_IMM_REAL$")>; 2655 2656// Contiguous Load two structures to two vectors, scalar + imm 2657def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>; 2658 2659// Contiguous Load two structures to two vectors, scalar + scalar 2660def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>; 2661 2662// Contiguous Load three structures to three vectors, scalar + imm 2663def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>; 2664 2665// Contiguous Load three structures to three vectors, scalar + scalar 2666def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>; 2667 2668// Contiguous Load four structures to four vectors, scalar + imm 2669def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>; 2670 2671// Contiguous Load four structures to four vectors, scalar + scalar 2672def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>; 2673 2674// Gather load, vector + imm, 32-bit element size 2675def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 2676 "^GLD(FF)?1W_IMM_REAL$")>; 2677 2678// Gather load, vector + imm, 64-bit element size 2679def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 2680 "^GLD(FF)?1D_IMM_REAL$")>; 2681 2682// Gather load, 32-bit scaled offset 2683def : InstRW<[V2Write_10cyc_1L_8V], 2684 (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$", 2685 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 2686 2687// Gather load, 64-bit scaled offset 2688// NOTE: These instructions are not specified in the SOG. 2689def : InstRW<[V2Write_10cyc_1L_4V], 2690 (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$", 2691 "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>; 2692 2693// Gather load, 32-bit unpacked unscaled offset 2694def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 2695 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 2696 2697// Gather load, 64-bit unpacked unscaled offset 2698// NOTE: These instructions are not specified in the SOG. 2699def : InstRW<[V2Write_9cyc_1L_2V], 2700 (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$", 2701 "^GLD(FF)?1D_([SU]XTW_)?REAL$")>; 2702 2703// SVE Store instructions 2704// ----------------------------------------------------------------------------- 2705 2706// Store from predicate reg 2707def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>; 2708 2709// Store from vector reg 2710def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>; 2711 2712// Contiguous store, scalar + imm 2713def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$", 2714 "^ST1B_[HSD]_IMM$", 2715 "^ST1H_[SD]_IMM$", 2716 "^ST1W_D_IMM$")>; 2717 2718// Contiguous store, scalar + scalar 2719def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>; 2720def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$", 2721 "^ST1B_[HSD]$", 2722 "^ST1W_D$")>; 2723 2724// Contiguous store two structures from two vectors, scalar + imm 2725def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>; 2726 2727// Contiguous store two structures from two vectors, scalar + scalar 2728def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>; 2729def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>; 2730 2731// Contiguous store three structures from three vectors, scalar + imm 2732def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>; 2733 2734// Contiguous store three structures from three vectors, scalar + scalar 2735def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>; 2736 2737// Contiguous store four structures from four vectors, scalar + imm 2738def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>; 2739 2740// Contiguous store four structures from four vectors, scalar + scalar 2741def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>; 2742 2743// Non temporal store, scalar + imm 2744def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2745 2746// Non temporal store, scalar + scalar 2747def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2748def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2749 2750// Scatter non temporal store, vector + scalar 32-bit element size 2751def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>; 2752 2753// Scatter non temporal store, vector + scalar 64-bit element size 2754def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>; 2755 2756// Scatter store vector + imm 32-bit element size 2757def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$", 2758 "^SST1W_IMM$")>; 2759 2760// Scatter store vector + imm 64-bit element size 2761def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$", 2762 "^SST1D_IMM$")>; 2763 2764// Scatter store, 32-bit scaled offset 2765def : InstRW<[V2Write_4cyc_4L01_4V01], 2766 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2767 2768// Scatter store, 32-bit unpacked unscaled offset 2769def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$", 2770 "^SST1D_[SU]XTW$")>; 2771 2772// Scatter store, 32-bit unpacked scaled offset 2773def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2774 "^SST1D_[SU]XTW_SCALED$")>; 2775 2776// Scatter store, 32-bit unscaled offset 2777def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$", 2778 "^SST1W_[SU]XTW$")>; 2779 2780// Scatter store, 64-bit scaled offset 2781def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$", 2782 "^SST1D_SCALED$")>; 2783 2784// Scatter store, 64-bit unscaled offset 2785def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$", 2786 "^SST1D$")>; 2787 2788// SVE Miscellaneous instructions 2789// ----------------------------------------------------------------------------- 2790 2791// Read first fault register, unpredicated 2792def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; 2793 2794// Read first fault register, predicated 2795def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; 2796 2797// Read first fault register and set flags 2798def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2799 2800// Set first fault register 2801// Write to first fault register 2802def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2803 2804// Prefetch 2805// NOTE: This is not specified in the SOG. 2806def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2807 2808// SVE Cryptographic instructions 2809// ----------------------------------------------------------------------------- 2810 2811// Crypto AES ops 2812def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2813 "^AESI?MC_ZZ_B$")>; 2814 2815// Crypto SHA3 ops 2816def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2817 "^RAX1_ZZZ_D$", 2818 "^XAR_ZZZI_[BHSD]$")>; 2819 2820// Crypto SM4 ops 2821def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2822 2823} 2824