1//=- AArch64SchedNeoverseV2.td - NeoverseV2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse V2 processors. 10// All information is taken from the V2 Software Optimisation guide: 11// 12// https://developer.arm.com/documentation/PJDOC-466751330-593177/r0p2 13// 14//===----------------------------------------------------------------------===// 15 16def NeoverseV2Model : SchedMachineModel { 17 let IssueWidth = 16; // Micro-ops dispatched at a time. 18 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. NOTE: Copied from N2. 19 let LoadLatency = 4; // Optimistic load latency. 20 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. NOTE: Copied from N2. 21 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 22 let CompleteModel = 1; 23 24 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, 25 [HasSVE2p1, HasCPA, 26 HasCSSC]); 27} 28 29//===----------------------------------------------------------------------===// 30// Define each kind of processor resource and number available on Neoverse V2. 31// Instructions are first fetched and then decoded into internal macro-ops 32// (MOPs). From there, the MOPs proceed through register renaming and dispatch 33// stages. A MOP can be split into two micro-ops further down the pipeline 34// after the decode stage. Once dispatched, micro-ops wait for their operands 35// and issue out-of-order to one of seventeen issue pipelines. Each issue 36// pipeline can accept one micro-op per cycle. 37 38let SchedModel = NeoverseV2Model in { 39 40// Define the (17) issue ports. 41def V2UnitB : ProcResource<2>; // Branch 0/1 42def V2UnitS0 : ProcResource<1>; // Integer single-cycle 0 43def V2UnitS1 : ProcResource<1>; // Integer single-cycle 1 44def V2UnitS2 : ProcResource<1>; // Integer single-cycle 2 45def V2UnitS3 : ProcResource<1>; // Integer single-cycle 3 46def V2UnitM0 : ProcResource<1>; // Integer single/multicycle 0 47def V2UnitM1 : ProcResource<1>; // Integer single/multicycle 1 48def V2UnitV0 : ProcResource<1>; // FP/ASIMD 0 49def V2UnitV1 : ProcResource<1>; // FP/ASIMD 1 50def V2UnitV2 : ProcResource<1>; // FP/ASIMD 2 51def V2UnitV3 : ProcResource<1>; // FP/ASIMD 3 52def V2UnitL01 : ProcResource<2>; // Load/Store 0/1 53def V2UnitL2 : ProcResource<1>; // Load 2 54def V2UnitD : ProcResource<2>; // Store data 0/1 55 56def V2UnitR : ProcResGroup<[V2UnitS0, V2UnitS1]>; // Integer single-cycle 0/1 57def V2UnitS : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3]>; // Integer single-cycle 0/1/2/3 58def V2UnitF : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1 and single/multicycle 0/1 59def V2UnitI : ProcResGroup<[V2UnitS0, V2UnitS1, V2UnitS2, V2UnitS3, V2UnitM0, V2UnitM1]>; // Integer single-cycle 0/1/2/3 and single/multicycle 0/1 60def V2UnitM : ProcResGroup<[V2UnitM0, V2UnitM1]>; // Integer single/multicycle 0/1 61def V2UnitL : ProcResGroup<[V2UnitL01, V2UnitL2]>; // Load/Store 0/1 and Load 2 62def V2UnitV : ProcResGroup<[V2UnitV0, V2UnitV1, V2UnitV2, V2UnitV3]>; // FP/ASIMD 0/1/2/3 63def V2UnitV01 : ProcResGroup<[V2UnitV0, V2UnitV1]>; // FP/ASIMD 0/1 64def V2UnitV02 : ProcResGroup<[V2UnitV0, V2UnitV2]>; // FP/ASIMD 0/2 65def V2UnitV13 : ProcResGroup<[V2UnitV1, V2UnitV3]>; // FP/ASIMD 1/3 66def V2UnitV23 : ProcResGroup<[V2UnitV2, V2UnitV3]>; // FP/ASIMD 2/3 67 68// Define commonly used read types. 69 70// No forwarding is provided for these types. 71def : ReadAdvance<ReadI, 0>; 72def : ReadAdvance<ReadISReg, 0>; 73def : ReadAdvance<ReadIEReg, 0>; 74def : ReadAdvance<ReadIM, 0>; 75def : ReadAdvance<ReadIMA, 0>; 76def : ReadAdvance<ReadID, 0>; 77def : ReadAdvance<ReadExtrHi, 0>; 78def : ReadAdvance<ReadAdrBase, 0>; 79def : ReadAdvance<ReadST, 0>; 80def : ReadAdvance<ReadVLD, 0>; 81 82// NOTE: Copied from N2. 83def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 84def : WriteRes<WriteBarrier, []> { let Latency = 1; } 85def : WriteRes<WriteHint, []> { let Latency = 1; } 86def : WriteRes<WriteLDHi, []> { let Latency = 4; } 87 88//===----------------------------------------------------------------------===// 89// Define customized scheduler read/write types specific to the Neoverse V2. 90 91//===----------------------------------------------------------------------===// 92 93// Define generic 0 micro-op types 94def V2Write_0cyc : SchedWriteRes<[]> { let Latency = 0; } 95 96// Define generic 1 micro-op types 97 98def V2Write_1cyc_1B : SchedWriteRes<[V2UnitB]> { let Latency = 1; } 99def V2Write_1cyc_1F : SchedWriteRes<[V2UnitF]> { let Latency = 1; } 100def V2Write_1cyc_1I : SchedWriteRes<[V2UnitI]> { let Latency = 1; } 101def V2Write_1cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 1; } 102def V2Write_1cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 1; } 103def V2Write_1cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 1; } 104def V2Write_2cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 2; } 105def V2Write_3cyc_1M : SchedWriteRes<[V2UnitM]> { let Latency = 3; } 106def V2Write_2cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 107def V2Write_3cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 3; } 108def V2Write_5cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 5; } 109def V2Write_12cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 12; 110 let ReleaseAtCycles = [12]; } 111def V2Write_20cyc_1M0 : SchedWriteRes<[V2UnitM0]> { let Latency = 20; 112 let ReleaseAtCycles = [20]; } 113def V2Write_4cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 4; } 114def V2Write_6cyc_1L : SchedWriteRes<[V2UnitL]> { let Latency = 6; } 115def V2Write_2cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 2; } 116def V2Write_2cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 2; } 117def V2Write_2cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 2; } 118def V2Write_2cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 2; } 119def V2Write_3cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 120def V2Write_3cyc_1V01 : SchedWriteRes<[V2UnitV01]> { let Latency = 3; 121 let ReleaseAtCycles = [2]; } 122def V2Write_3cyc_1V23 : SchedWriteRes<[V2UnitV23]> { let Latency = 3; } 123def V2Write_4cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 124def V2Write_5cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 125def V2Write_6cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 126def V2Write_12cyc_1V : SchedWriteRes<[V2UnitV]> { let Latency = 12; } 127def V2Write_3cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 3; } 128def V2Write_3cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 3; } 129def V2Write_4cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 4; } 130def V2Write_4cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 131def V2Write_7cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 7; 132 let ReleaseAtCycles = [7]; } 133def V2Write_7cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 7; 134 let ReleaseAtCycles = [2]; } 135def V2Write_9cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 9; } 136def V2Write_9cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 9; 137 let ReleaseAtCycles = [2]; } 138def V2Write_10cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 10; } 139def V2Write_10cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 10; 140 let ReleaseAtCycles = [2]; } 141def V2Write_12cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 12; 142 let ReleaseAtCycles = [11]; } 143def V2Write_13cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 13; } 144def V2Write_15cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 15; } 145def V2Write_15cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 15; 146 let ReleaseAtCycles = [8]; } 147def V2Write_16cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 16; } 148def V2Write_16cyc_1V02 : SchedWriteRes<[V2UnitV02]> { let Latency = 16; 149 let ReleaseAtCycles = [8]; } 150def V2Write_20cyc_1V0 : SchedWriteRes<[V2UnitV0]> { let Latency = 20; 151 let ReleaseAtCycles = [20]; } 152def V2Write_2cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 2; } 153def V2Write_2cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 2; } 154def V2Write_3cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 3; } 155def V2Write_4cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 4; } 156def V2Write_4cyc_1V13 : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 157def V2Write_6cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 6; } 158def V2Write_10cyc_1V1 : SchedWriteRes<[V2UnitV1]> { let Latency = 10; } 159def V2Write_6cyc_1L01 : SchedWriteRes<[V2UnitL01]> { let Latency = 6; } 160 161//===----------------------------------------------------------------------===// 162// Define generic 2 micro-op types 163 164def V2Write_1cyc_1B_1R : SchedWriteRes<[V2UnitB, V2UnitR]> { 165 let Latency = 1; 166 let NumMicroOps = 2; 167} 168 169def V2Write_6cyc_1M0_1B : SchedWriteRes<[V2UnitM0, V2UnitB]> { 170 let Latency = 6; 171 let NumMicroOps = 2; 172} 173 174def V2Write_9cyc_1M0_1L : SchedWriteRes<[V2UnitM0, V2UnitL]> { 175 let Latency = 9; 176 let NumMicroOps = 2; 177} 178 179def V2Write_3cyc_1I_1M : SchedWriteRes<[V2UnitI, V2UnitM]> { 180 let Latency = 3; 181 let NumMicroOps = 2; 182} 183 184def V2Write_1cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 185 let Latency = 1; 186 let NumMicroOps = 2; 187} 188 189def V2Write_3cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 190 let Latency = 3; 191 let NumMicroOps = 2; 192} 193 194def V2Write_4cyc_2M : SchedWriteRes<[V2UnitM, V2UnitM]> { 195 let Latency = 4; 196 let NumMicroOps = 2; 197} 198 199def V2Write_5cyc_1L_1F : SchedWriteRes<[V2UnitL, V2UnitF]> { 200 let Latency = 5; 201 let NumMicroOps = 2; 202} 203 204def V2Write_6cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> { 205 let Latency = 6; 206 let NumMicroOps = 2; 207} 208 209def V2Write_7cyc_1F_1L : SchedWriteRes<[V2UnitF, V2UnitL]> { 210 let Latency = 7; 211 let NumMicroOps = 2; 212} 213 214def V2Write_7cyc_1I_1L : SchedWriteRes<[V2UnitI, V2UnitL]> { 215 let Latency = 7; 216 let NumMicroOps = 2; 217} 218 219def V2Write_1cyc_1L01_1D : SchedWriteRes<[V2UnitL01, V2UnitD]> { 220 let Latency = 1; 221 let NumMicroOps = 2; 222} 223 224def V2Write_5cyc_1M0_1V : SchedWriteRes<[V2UnitM0, V2UnitV]> { 225 let Latency = 5; 226 let NumMicroOps = 2; 227} 228 229def V2Write_2cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> { 230 let Latency = 2; 231 let NumMicroOps = 2; 232} 233 234def V2Write_2cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> { 235 let Latency = 2; 236 let NumMicroOps = 2; 237} 238 239def V2Write_2cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> { 240 let Latency = 2; 241 let NumMicroOps = 2; 242} 243 244def V2Write_4cyc_2V01 : SchedWriteRes<[V2UnitV01, V2UnitV01]> { 245 let Latency = 4; 246 let NumMicroOps = 2; 247} 248 249def V2Write_4cyc_1L01_1V01 : SchedWriteRes<[V2UnitL01, V2UnitV01]> { 250 let Latency = 4; 251 let NumMicroOps = 2; 252} 253 254def V2Write_4cyc_1V13_1V : SchedWriteRes<[V2UnitV13, V2UnitV]> { 255 let Latency = 4; 256 let NumMicroOps = 2; 257} 258 259def V2Write_4cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> { 260 let Latency = 4; 261 let NumMicroOps = 2; 262} 263 264def V2Write_4cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> { 265 let Latency = 4; 266 let NumMicroOps = 2; 267} 268 269def V2Write_4cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> { 270 let Latency = 4; 271 let NumMicroOps = 2; 272} 273 274def V2Write_6cyc_2V : SchedWriteRes<[V2UnitV, V2UnitV]> { 275 let Latency = 6; 276 let NumMicroOps = 2; 277} 278 279def V2Write_6cyc_2L : SchedWriteRes<[V2UnitL, V2UnitL]> { 280 let Latency = 6; 281 let NumMicroOps = 2; 282} 283 284def V2Write_8cyc_1L_1V : SchedWriteRes<[V2UnitL, V2UnitV]> { 285 let Latency = 8; 286 let NumMicroOps = 2; 287} 288 289def V2Write_4cyc_1L01_1V : SchedWriteRes<[V2UnitL01, V2UnitV]> { 290 let Latency = 4; 291 let NumMicroOps = 2; 292} 293 294def V2Write_3cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 295 let Latency = 3; 296 let NumMicroOps = 2; 297} 298 299def V2Write_4cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 300 let Latency = 4; 301 let NumMicroOps = 2; 302} 303 304def V2Write_1cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 305 let Latency = 1; 306 let NumMicroOps = 2; 307} 308 309def V2Write_2cyc_1M0_1M : SchedWriteRes<[V2UnitM0, V2UnitM]> { 310 let Latency = 2; 311 let NumMicroOps = 2; 312} 313 314def V2Write_6cyc_2V1 : SchedWriteRes<[V2UnitV1, V2UnitV1]> { 315 let Latency = 6; 316 let NumMicroOps = 2; 317} 318 319def V2Write_4cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> { 320 let Latency = 4; 321 let NumMicroOps = 2; 322} 323 324def V2Write_5cyc_1V0_1M0 : SchedWriteRes<[V2UnitV0, V2UnitM0]> { 325 let Latency = 5; 326 let NumMicroOps = 2; 327} 328 329def V2Write_5cyc_2V0 : SchedWriteRes<[V2UnitV0, V2UnitV0]> { 330 let Latency = 5; 331 let NumMicroOps = 2; 332} 333 334def V2Write_5cyc_2V02 : SchedWriteRes<[V2UnitV02, V2UnitV02]> { 335 let Latency = 5; 336 let NumMicroOps = 2; 337} 338 339def V2Write_6cyc_1V1_1M0 : SchedWriteRes<[V2UnitV1, V2UnitM0]> { 340 let Latency = 6; 341 let NumMicroOps = 2; 342} 343 344def V2Write_7cyc_1M0_1V02 : SchedWriteRes<[V2UnitM0, V2UnitV02]> { 345 let Latency = 7; 346 let NumMicroOps = 2; 347} 348 349def V2Write_2cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> { 350 let Latency = 2; 351 let NumMicroOps = 2; 352} 353 354def V2Write_3cyc_1V0_1M : SchedWriteRes<[V2UnitV0, V2UnitM]> { 355 let Latency = 3; 356 let NumMicroOps = 2; 357} 358 359def V2Write_6cyc_1V_1V13 : SchedWriteRes<[V2UnitV, V2UnitV13]> { 360 let Latency = 6; 361 let NumMicroOps = 2; 362} 363 364def V2Write_6cyc_1L_1M : SchedWriteRes<[V2UnitL, V2UnitM]> { 365 let Latency = 6; 366 let NumMicroOps = 2; 367} 368 369def V2Write_6cyc_1L_1S : SchedWriteRes<[V2UnitL, V2UnitS]> { 370 let Latency = 6; 371 let NumMicroOps = 2; 372} 373 374def V2Write_4cyc_2V13 : SchedWriteRes<[V2UnitV13, V2UnitV13]> { 375 let Latency = 4; 376 let NumMicroOps = 2; 377} 378 379def V2Write_8cyc_1M0_1V01 : SchedWriteRes<[V2UnitM0, V2UnitV01]> { 380 let Latency = 8; 381 let NumMicroOps = 2; 382} 383 384//===----------------------------------------------------------------------===// 385// Define generic 3 micro-op types 386 387def V2Write_1cyc_1L01_1D_1I : SchedWriteRes<[V2UnitL01, V2UnitD, V2UnitI]> { 388 let Latency = 1; 389 let NumMicroOps = 3; 390} 391 392def V2Write_2cyc_1L01_1V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitI]> { 393 let Latency = 2; 394 let NumMicroOps = 3; 395} 396 397def V2Write_2cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> { 398 let Latency = 2; 399 let NumMicroOps = 3; 400} 401 402def V2Write_4cyc_1L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01]> { 403 let Latency = 4; 404 let NumMicroOps = 3; 405} 406 407def V2Write_9cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> { 408 let Latency = 9; 409 let NumMicroOps = 3; 410} 411 412def V2Write_4cyc_3V01 : SchedWriteRes<[V2UnitV01, V2UnitV01, V2UnitV01]> { 413 let Latency = 4; 414 let NumMicroOps = 3; 415} 416 417def V2Write_7cyc_1M_1M0_1V : SchedWriteRes<[V2UnitM, V2UnitM0, V2UnitV]> { 418 let Latency = 7; 419 let NumMicroOps = 3; 420} 421 422def V2Write_2cyc_1L01_1S_1V : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV]> { 423 let Latency = 2; 424 let NumMicroOps = 3; 425} 426 427def V2Write_2cyc_1L01_1S_1V01 : SchedWriteRes<[V2UnitL01, V2UnitS, V2UnitV01]> { 428 let Latency = 2; 429 let NumMicroOps = 3; 430} 431 432def V2Write_6cyc_3L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL]> { 433 let Latency = 6; 434 let NumMicroOps = 3; 435} 436 437def V2Write_6cyc_3V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV]> { 438 let Latency = 6; 439 let NumMicroOps = 3; 440} 441 442def V2Write_8cyc_1L_2V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV]> { 443 let Latency = 8; 444 let NumMicroOps = 3; 445} 446 447//===----------------------------------------------------------------------===// 448// Define generic 4 micro-op types 449 450def V2Write_2cyc_1L01_2V01_1I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 451 V2UnitI]> { 452 let Latency = 2; 453 let NumMicroOps = 4; 454} 455 456def V2Write_2cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 457 V2UnitV01, V2UnitV01]> { 458 let Latency = 2; 459 let NumMicroOps = 4; 460} 461 462def V2Write_4cyc_2L01_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 463 V2UnitV01, V2UnitV01]> { 464 let Latency = 4; 465 let NumMicroOps = 4; 466} 467 468def V2Write_5cyc_1I_3L : SchedWriteRes<[V2UnitI, V2UnitL, V2UnitL, V2UnitL]> { 469 let Latency = 5; 470 let NumMicroOps = 4; 471} 472 473def V2Write_9cyc_2L_2V1 : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV1, 474 V2UnitV1]> { 475 let Latency = 9; 476 let NumMicroOps = 4; 477} 478 479def V2Write_6cyc_4V0 : SchedWriteRes<[V2UnitV0, V2UnitV0, V2UnitV0, V2UnitV0]> { 480 let Latency = 6; 481 let NumMicroOps = 4; 482} 483 484def V2Write_8cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 485 let Latency = 8; 486 let NumMicroOps = 4; 487} 488 489def V2Write_6cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 490 V2UnitV13]> { 491 let Latency = 6; 492 let NumMicroOps = 4; 493} 494 495def V2Write_8cyc_2V_2V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 496 V2UnitV13]> { 497 let Latency = 8; 498 let NumMicroOps = 4; 499} 500 501def V2Write_6cyc_4V02 : SchedWriteRes<[V2UnitV02, V2UnitV02, V2UnitV02, 502 V2UnitV02]> { 503 let Latency = 6; 504 let NumMicroOps = 4; 505} 506 507def V2Write_6cyc_4V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 508 let Latency = 6; 509 let NumMicroOps = 4; 510} 511 512def V2Write_8cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> { 513 let Latency = 8; 514 let NumMicroOps = 4; 515} 516 517def V2Write_9cyc_2L_2V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV]> { 518 let Latency = 9; 519 let NumMicroOps = 4; 520} 521 522def V2Write_2cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV, 523 V2UnitV]> { 524 let Latency = 2; 525 let NumMicroOps = 4; 526} 527 528def V2Write_4cyc_2L01_2V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV, 529 V2UnitV]> { 530 let Latency = 4; 531 let NumMicroOps = 4; 532} 533 534def V2Write_8cyc_2M0_2V02 : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitV02, 535 V2UnitV02]> { 536 let Latency = 8; 537 let NumMicroOps = 4; 538} 539 540def V2Write_8cyc_2V_2V1 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV1, 541 V2UnitV1]> { 542 let Latency = 8; 543 let NumMicroOps = 4; 544} 545 546def V2Write_4cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM, 547 V2UnitM]> { 548 let Latency = 4; 549 let NumMicroOps = 4; 550} 551 552def V2Write_5cyc_2M0_2M : SchedWriteRes<[V2UnitM0, V2UnitM0, V2UnitM, 553 V2UnitM]> { 554 let Latency = 5; 555 let NumMicroOps = 4; 556} 557 558def V2Write_6cyc_2I_2L : SchedWriteRes<[V2UnitI, V2UnitI, V2UnitL, V2UnitL]> { 559 let Latency = 6; 560 let NumMicroOps = 4; 561} 562 563def V2Write_7cyc_4L : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL]> { 564 let Latency = 7; 565 let NumMicroOps = 4; 566} 567 568def V2Write_6cyc_1L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 569 V2UnitV01]> { 570 let Latency = 6; 571 let NumMicroOps = 4; 572} 573 574//===----------------------------------------------------------------------===// 575// Define generic 5 micro-op types 576 577def V2Write_2cyc_1L01_2V01_2I : SchedWriteRes<[V2UnitL01, V2UnitV01, V2UnitV01, 578 V2UnitI, V2UnitI]> { 579 let Latency = 2; 580 let NumMicroOps = 5; 581} 582 583def V2Write_8cyc_2L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, V2UnitV, 584 V2UnitV]> { 585 let Latency = 8; 586 let NumMicroOps = 5; 587} 588 589def V2Write_9cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 590 V2UnitV]> { 591 let Latency = 9; 592 let NumMicroOps = 5; 593} 594 595def V2Write_10cyc_1L_4V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 596 V2UnitV]> { 597 let Latency = 10; 598 let NumMicroOps = 5; 599} 600 601def V2Write_6cyc_5V : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, V2UnitV, 602 V2UnitV]> { 603 let Latency = 6; 604 let NumMicroOps = 5; 605} 606 607//===----------------------------------------------------------------------===// 608// Define generic 6 micro-op types 609 610def V2Write_8cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 611 V2UnitV, V2UnitV, V2UnitV]> { 612 let Latency = 8; 613 let NumMicroOps = 6; 614} 615 616def V2Write_9cyc_3L_3V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 617 V2UnitV, V2UnitV, V2UnitV]> { 618 let Latency = 9; 619 let NumMicroOps = 6; 620} 621 622def V2Write_9cyc_2L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, 623 V2UnitV, V2UnitV, V2UnitV]> { 624 let Latency = 9; 625 let NumMicroOps = 6; 626} 627 628def V2Write_9cyc_2L_2V_2S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitV, 629 V2UnitV, V2UnitS, V2UnitS]> { 630 let Latency = 9; 631 let NumMicroOps = 6; 632} 633 634def V2Write_9cyc_2V_4V13 : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV13, 635 V2UnitV13, V2UnitV13, V2UnitV13]> { 636 let Latency = 9; 637 let NumMicroOps = 6; 638} 639 640def V2Write_2cyc_3L01_3V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 641 V2UnitV, V2UnitV, V2UnitV]> { 642 let Latency = 2; 643 let NumMicroOps = 6; 644} 645 646def V2Write_4cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 647 V2UnitV01, V2UnitV01, V2UnitV01]> { 648 let Latency = 4; 649 let NumMicroOps = 6; 650} 651 652def V2Write_5cyc_2L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 653 V2UnitV01, V2UnitV01, V2UnitV01]> { 654 let Latency = 5; 655 let NumMicroOps = 6; 656} 657 658def V2Write_2cyc_3L01_3V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 659 V2UnitV01, V2UnitV01, V2UnitV01]> { 660 let Latency = 2; 661 let NumMicroOps = 6; 662} 663 664def V2Write_4cyc_2L01_2S_2V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitS, 665 V2UnitS, V2UnitV01, V2UnitV01]> { 666 let Latency = 4; 667 let NumMicroOps = 6; 668} 669 670//===----------------------------------------------------------------------===// 671// Define generic 7 micro-op types 672 673def V2Write_8cyc_3L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 674 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 675 let Latency = 8; 676 let NumMicroOps = 7; 677} 678 679//===----------------------------------------------------------------------===// 680// Define generic 8 micro-op types 681 682def V2Write_2cyc_4L01_4V : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 683 V2UnitL01, V2UnitV, V2UnitV, V2UnitV, 684 V2UnitV]> { 685 let Latency = 2; 686 let NumMicroOps = 8; 687} 688 689def V2Write_2cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 690 V2UnitL01, V2UnitV01, V2UnitV01, 691 V2UnitV01, V2UnitV01]> { 692 let Latency = 2; 693 let NumMicroOps = 8; 694} 695 696def V2Write_4cyc_4L01_4V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 697 V2UnitL01, V2UnitV01, V2UnitV01, 698 V2UnitV01, V2UnitV01]> { 699 let Latency = 4; 700 let NumMicroOps = 8; 701} 702 703def V2Write_6cyc_2L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitV01, 704 V2UnitV01, V2UnitV01, V2UnitV01, 705 V2UnitV01, V2UnitV01]> { 706 let Latency = 6; 707 let NumMicroOps = 8; 708} 709 710def V2Write_8cyc_4L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL, 711 V2UnitV, V2UnitV, V2UnitV, V2UnitV]> { 712 let Latency = 8; 713 let NumMicroOps = 8; 714} 715 716//===----------------------------------------------------------------------===// 717// Define generic 9 micro-op types 718 719def V2Write_6cyc_3L01_6V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 720 V2UnitV01, V2UnitV01, V2UnitV01, 721 V2UnitV01, V2UnitV01, V2UnitV01]> { 722 let Latency = 6; 723 let NumMicroOps = 9; 724} 725 726def V2Write_10cyc_1L_8V : SchedWriteRes<[V2UnitL, V2UnitV, V2UnitV, V2UnitV, 727 V2UnitV, V2UnitV, V2UnitV, V2UnitV, 728 V2UnitV]> { 729 let Latency = 10; 730 let NumMicroOps = 9; 731} 732 733def V2Write_10cyc_3V_3L_3S : SchedWriteRes<[V2UnitV, V2UnitV, V2UnitV, 734 V2UnitL, V2UnitL, V2UnitL, 735 V2UnitS, V2UnitS, V2UnitS]> { 736 let Latency = 10; 737 let NumMicroOps = 9; 738} 739 740//===----------------------------------------------------------------------===// 741// Define generic 10 micro-op types 742 743def V2Write_9cyc_6L_4V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, V2UnitL, 744 V2UnitL, V2UnitL, V2UnitV, V2UnitV, 745 V2UnitV, V2UnitV]> { 746 let Latency = 9; 747 let NumMicroOps = 10; 748} 749 750//===----------------------------------------------------------------------===// 751// Define generic 12 micro-op types 752 753def V2Write_5cyc_4L01_8V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 754 V2UnitL01, V2UnitV01, V2UnitV01, 755 V2UnitV01, V2UnitV01, V2UnitV01, 756 V2UnitV01, V2UnitV01, V2UnitV01]> { 757 let Latency = 5; 758 let NumMicroOps = 12; 759} 760 761def V2Write_9cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 762 V2UnitL, V2UnitV, V2UnitV, 763 V2UnitV, V2UnitV, V2UnitV, 764 V2UnitV, V2UnitV, V2UnitV]> { 765 let Latency = 9; 766 let NumMicroOps = 12; 767} 768 769def V2Write_10cyc_4L_8V : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 770 V2UnitL, V2UnitV, V2UnitV, 771 V2UnitV, V2UnitV, V2UnitV, 772 V2UnitV, V2UnitV, V2UnitV]> { 773 let Latency = 10; 774 let NumMicroOps = 12; 775} 776 777//===----------------------------------------------------------------------===// 778// Define generic 16 micro-op types 779 780def V2Write_7cyc_4L01_12V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 781 V2UnitL01, V2UnitV01, V2UnitV01, 782 V2UnitV01, V2UnitV01, V2UnitV01, 783 V2UnitV01, V2UnitV01, V2UnitV01, 784 V2UnitV01, V2UnitV01, V2UnitV01, 785 V2UnitV01]> { 786 let Latency = 7; 787 let NumMicroOps = 16; 788} 789 790def V2Write_10cyc_4L_8V_4S : SchedWriteRes<[V2UnitL, V2UnitL, V2UnitL, 791 V2UnitL, V2UnitV, V2UnitV, 792 V2UnitV, V2UnitV, V2UnitV, 793 V2UnitV, V2UnitV, V2UnitV, 794 V2UnitS, V2UnitS, V2UnitS, 795 V2UnitS]> { 796 let Latency = 10; 797 let NumMicroOps = 16; 798} 799 800//===----------------------------------------------------------------------===// 801// Define generic 18 micro-op types 802 803def V2Write_7cyc_9L01_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 804 V2UnitL01, V2UnitL01, V2UnitL01, 805 V2UnitL01, V2UnitL01, V2UnitL01, 806 V2UnitV01, V2UnitV01, V2UnitV01, 807 V2UnitV01, V2UnitV01, V2UnitV01, 808 V2UnitV01, V2UnitV01, V2UnitV01]> { 809 let Latency = 7; 810 let NumMicroOps = 18; 811} 812 813//===----------------------------------------------------------------------===// 814// Define generic 27 micro-op types 815 816def V2Write_7cyc_9L01_9S_9V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 817 V2UnitL01, V2UnitL01, V2UnitL01, 818 V2UnitL01, V2UnitL01, V2UnitL01, 819 V2UnitS, V2UnitS, V2UnitS, 820 V2UnitS, V2UnitS, V2UnitS, 821 V2UnitS, V2UnitS, V2UnitS, 822 V2UnitV01, V2UnitV01, V2UnitV01, 823 V2UnitV01, V2UnitV01, V2UnitV01, 824 V2UnitV01, V2UnitV01, 825 V2UnitV01]> { 826 let Latency = 7; 827 let NumMicroOps = 27; 828} 829 830//===----------------------------------------------------------------------===// 831// Define generic 36 micro-op types 832 833def V2Write_11cyc_18L01_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, V2UnitL01, 834 V2UnitL01, V2UnitL01, V2UnitL01, 835 V2UnitL01, V2UnitL01, V2UnitL01, 836 V2UnitL01, V2UnitL01, V2UnitL01, 837 V2UnitL01, V2UnitL01, V2UnitL01, 838 V2UnitL01, V2UnitL01, V2UnitL01, 839 V2UnitV01, V2UnitV01, V2UnitV01, 840 V2UnitV01, V2UnitV01, V2UnitV01, 841 V2UnitV01, V2UnitV01, V2UnitV01, 842 V2UnitV01, V2UnitV01, V2UnitV01, 843 V2UnitV01, V2UnitV01, V2UnitV01, 844 V2UnitV01, V2UnitV01, 845 V2UnitV01]> { 846 let Latency = 11; 847 let NumMicroOps = 36; 848} 849 850//===----------------------------------------------------------------------===// 851// Define generic 54 micro-op types 852 853def V2Write_11cyc_18L01_18S_18V01 : SchedWriteRes<[V2UnitL01, V2UnitL01, 854 V2UnitL01, V2UnitL01, 855 V2UnitL01, V2UnitL01, 856 V2UnitL01, V2UnitL01, 857 V2UnitL01, V2UnitL01, 858 V2UnitL01, V2UnitL01, 859 V2UnitL01, V2UnitL01, 860 V2UnitL01, V2UnitL01, 861 V2UnitL01, V2UnitL01, 862 V2UnitS, V2UnitS, V2UnitS, 863 V2UnitS, V2UnitS, V2UnitS, 864 V2UnitS, V2UnitS, V2UnitS, 865 V2UnitS, V2UnitS, V2UnitS, 866 V2UnitS, V2UnitS, V2UnitS, 867 V2UnitS, V2UnitS, V2UnitS, 868 V2UnitV01, V2UnitV01, 869 V2UnitV01, V2UnitV01, 870 V2UnitV01, V2UnitV01, 871 V2UnitV01, V2UnitV01, 872 V2UnitV01, V2UnitV01, 873 V2UnitV01, V2UnitV01, 874 V2UnitV01, V2UnitV01, 875 V2UnitV01, V2UnitV01, 876 V2UnitV01, V2UnitV01]> { 877 let Latency = 11; 878 let NumMicroOps = 54; 879} 880 881//===----------------------------------------------------------------------===// 882// Define predicate-controlled types 883 884def V2Write_ArithI : SchedWriteVariant<[ 885 SchedVar<IsCheapLSL, [V2Write_1cyc_1I]>, 886 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 887 888def V2Write_ArithF : SchedWriteVariant<[ 889 SchedVar<IsCheapLSL, [V2Write_1cyc_1F]>, 890 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 891 892def V2Write_Logical : SchedWriteVariant<[ 893 SchedVar<NeoverseNoLSL, [V2Write_1cyc_1F]>, 894 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 895 896def V2Write_Extr : SchedWriteVariant<[ 897 SchedVar<IsRORImmIdiomPred, [V2Write_1cyc_1I]>, 898 SchedVar<NoSchedPred, [V2Write_3cyc_1I_1M]>]>; 899 900def V2Write_LdrHQ : SchedWriteVariant<[ 901 SchedVar<NeoverseHQForm, [V2Write_7cyc_1I_1L]>, 902 SchedVar<NoSchedPred, [V2Write_6cyc_1L]>]>; 903 904def V2Write_StrHQ : SchedWriteVariant<[ 905 SchedVar<NeoverseHQForm, [V2Write_2cyc_1L01_1V01_1I]>, 906 SchedVar<NoSchedPred, [V2Write_2cyc_1L01_1V01]>]>; 907 908def V2Write_0or1cyc_1I : SchedWriteVariant<[ 909 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>, 910 SchedVar<NoSchedPred, [V2Write_1cyc_1I]>]>; 911 912def V2Write_0or2cyc_1V : SchedWriteVariant<[ 913 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>, 914 SchedVar<NoSchedPred, [V2Write_2cyc_1V]>]>; 915 916def V2Write_0or3cyc_1M0 : SchedWriteVariant<[ 917 SchedVar<NeoverseZeroMove, [V2Write_0cyc]>, 918 SchedVar<NoSchedPred, [V2Write_3cyc_1M0]>]>; 919 920def V2Write_2or3cyc_1M : SchedWriteVariant<[ 921 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M]>, 922 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 923 924def V2Write_3or4cyc_2M : SchedWriteVariant<[ 925 SchedVar<NeoversePdIsPg, [V2Write_4cyc_2M]>, 926 SchedVar<NoSchedPred, [V2Write_3cyc_2M]>]>; 927 928def V2Write_1or2cyc_1M0 : SchedWriteVariant<[ 929 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0]>, 930 SchedVar<NoSchedPred, [V2Write_1cyc_1M0]>]>; 931 932def V2Write_2or3cyc_1M0 : SchedWriteVariant<[ 933 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1M0]>, 934 SchedVar<NoSchedPred, [V2Write_2cyc_1M0]>]>; 935 936def V2Write_1or2cyc_1M0_1M : SchedWriteVariant<[ 937 SchedVar<NeoversePdIsPg, [V2Write_2cyc_1M0_1M]>, 938 SchedVar<NoSchedPred, [V2Write_1cyc_1M0_1M]>]>; 939 940def V2Write_3or4cyc_1M0_1M : SchedWriteVariant<[ 941 SchedVar<NeoversePdIsPg, [V2Write_4cyc_1M0_1M]>, 942 SchedVar<NoSchedPred, [V2Write_3cyc_1M0_1M]>]>; 943 944def V2Write_4or5cyc_2M0_2M : SchedWriteVariant<[ 945 SchedVar<NeoversePdIsPg, [V2Write_5cyc_2M0_2M]>, 946 SchedVar<NoSchedPred, [V2Write_4cyc_2M0_2M]>]>; 947 948def V2Write_4or5cyc_1V0_1M0 : SchedWriteVariant<[ 949 SchedVar<NeoversePdIsPg, [V2Write_5cyc_1V0_1M0]>, 950 SchedVar<NoSchedPred, [V2Write_4cyc_1V0_1M0]>]>; 951 952def V2Write_2or3cyc_1V0_1M : SchedWriteVariant<[ 953 SchedVar<NeoversePdIsPg, [V2Write_3cyc_1V0_1M]>, 954 SchedVar<NoSchedPred, [V2Write_2cyc_1V0_1M]>]>; 955 956def V2Write_IncDec : SchedWriteVariant<[ 957 SchedVar<NeoverseCheapIncDec, [V2Write_1cyc_1F]>, 958 SchedVar<NoSchedPred, [V2Write_2cyc_1M]>]>; 959 960//===----------------------------------------------------------------------===// 961// Define forwarded types 962 963// NOTE: SOG, p. 16, n. 2: Accumulator forwarding is not supported for 964// consumers of 64 bit multiply high operations? 965def V2Wr_IM : SchedWriteRes<[V2UnitM]> { let Latency = 2; } 966def V2Wr_IMA : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 967def V2Wr_IMUL : SchedWriteVariant<[ 968 SchedVar<IsReg3ZeroPred, [V2Wr_IM]>, 969 SchedVar<NoSchedPred, [V2Wr_IMA]>]>; 970def V2Rd_IMA : SchedReadAdvance<1, [V2Wr_IMA]>; 971 972def V2Wr_FMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 973def V2Rd_FMA : SchedReadAdvance<2, [WriteFMul, V2Wr_FMA]>; 974 975def V2Wr_VA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 976def V2Rd_VA : SchedReadAdvance<3, [V2Wr_VA]>; 977 978def V2Wr_VDOT : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 979def V2Rd_VDOT : SchedReadAdvance<2, [V2Wr_VDOT]>; 980 981def V2Wr_VMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 982def V2Rd_VMMA : SchedReadAdvance<2, [V2Wr_VMMA]>; 983 984def V2Wr_VMA : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 985def V2Rd_VMA : SchedReadAdvance<3, [V2Wr_VMA]>; 986 987def V2Wr_VMAH : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; } 988def V2Rd_VMAH : SchedReadAdvance<2, [V2Wr_VMAH]>; 989 990def V2Wr_VMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 991def V2Rd_VMAL : SchedReadAdvance<3, [V2Wr_VMAL]>; 992 993def V2Wr_VPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 994def V2Rd_VPA : SchedReadAdvance<3, [V2Wr_VPA]>; 995 996def V2Wr_VSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 997def V2Rd_VSA : SchedReadAdvance<3, [V2Wr_VSA]>; 998 999def V2Wr_VFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1000def V2Rd_VFCMA : SchedReadAdvance<2, [V2Wr_VFCMA]>; 1001 1002def V2Wr_VFM : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1003def V2Wr_VFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1004def V2Rd_VFMA : SchedReadAdvance<2, [V2Wr_VFM, V2Wr_VFMA]>; 1005 1006def V2Wr_VFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1007def V2Rd_VFMAL : SchedReadAdvance<2, [V2Wr_VFMAL]>; 1008 1009def V2Wr_VBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1010def V2Rd_VBFDOT : SchedReadAdvance<2, [V2Wr_VBFDOT]>; 1011def V2Wr_VBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 1012def V2Rd_VBFMMA : SchedReadAdvance<2, [V2Wr_VBFMMA]>; 1013def V2Wr_VBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1014def V2Rd_VBFMAL : SchedReadAdvance<3, [V2Wr_VBFMAL]>; 1015 1016def V2Wr_CRC : SchedWriteRes<[V2UnitM0]> { let Latency = 2; } 1017def V2Rd_CRC : SchedReadAdvance<1, [V2Wr_CRC]>; 1018 1019def V2Wr_ZA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1020def V2Rd_ZA : SchedReadAdvance<3, [V2Wr_ZA]>; 1021def V2Wr_ZPA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1022def V2Rd_ZPA : SchedReadAdvance<3, [V2Wr_ZPA]>; 1023def V2Wr_ZSA : SchedWriteRes<[V2UnitV13]> { let Latency = 4; } 1024def V2Rd_ZSA : SchedReadAdvance<3, [V2Wr_ZSA]>; 1025 1026def V2Wr_ZDOTB : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1027def V2Rd_ZDOTB : SchedReadAdvance<2, [V2Wr_ZDOTB]>; 1028def V2Wr_ZDOTH : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1029def V2Rd_ZDOTH : SchedReadAdvance<3, [V2Wr_ZDOTH]>; 1030 1031// NOTE: SOG p. 43: Complex multiply-add B, H, S element size: How to reduce 1032// throughput to 1 in case of forwarding? 1033def V2Wr_ZCMABHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1034def V2Rd_ZCMABHS : SchedReadAdvance<3, [V2Wr_ZCMABHS]>; 1035def V2Wr_ZCMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1036def V2Rd_ZCMAD : SchedReadAdvance<2, [V2Wr_ZCMAD]>; 1037 1038def V2Wr_ZMMA : SchedWriteRes<[V2UnitV]> { let Latency = 3; } 1039def V2Rd_ZMMA : SchedReadAdvance<2, [V2Wr_ZMMA]>; 1040 1041def V2Wr_ZMABHS : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 4; } 1042def V2Rd_ZMABHS : SchedReadAdvance<3, [V2Wr_ZMABHS]>; 1043def V2Wr_ZMAD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1044def V2Rd_ZMAD : SchedReadAdvance<2, [V2Wr_ZMAD]>; 1045 1046def V2Wr_ZMAL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1047def V2Rd_ZMAL : SchedReadAdvance<3, [V2Wr_ZMAL]>; 1048 1049def V2Wr_ZMASQL : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1050def V2Wr_ZMASQBHS : SchedWriteRes<[V2UnitV02]> { let Latency = 4; } 1051def V2Wr_ZMASQD : SchedWriteRes<[V2UnitV02, V2UnitV02]> { let Latency = 5; } 1052def V2Rd_ZMASQ : SchedReadAdvance<2, [V2Wr_ZMASQL, V2Wr_ZMASQBHS, 1053 V2Wr_ZMASQD]>; 1054 1055def V2Wr_ZFCMA : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1056def V2Rd_ZFCMA : SchedReadAdvance<3, [V2Wr_ZFCMA]>; 1057 1058def V2Wr_ZFMA : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1059def V2Rd_ZFMA : SchedReadAdvance<2, [V2Wr_ZFMA]>; 1060 1061def V2Wr_ZFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 4; } 1062def V2Rd_ZFMAL : SchedReadAdvance<2, [V2Wr_ZFMAL]>; 1063 1064def V2Wr_ZBFDOT : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1065def V2Rd_ZBFDOT : SchedReadAdvance<2, [V2Wr_ZBFDOT]>; 1066def V2Wr_ZBFMMA : SchedWriteRes<[V2UnitV]> { let Latency = 6; } 1067def V2Rd_ZBFMMA : SchedReadAdvance<2, [V2Wr_ZBFMMA]>; 1068def V2Wr_ZBFMAL : SchedWriteRes<[V2UnitV]> { let Latency = 5; } 1069def V2Rd_ZBFMAL : SchedReadAdvance<3, [V2Wr_ZBFMAL]>; 1070 1071//===----------------------------------------------------------------------===// 1072// Define types with long resource cycles (rc) 1073 1074def V2Write_6cyc_1V1_5rc : SchedWriteRes<[V2UnitV1]> { let Latency = 6; let ReleaseAtCycles = [ 5]; } 1075def V2Write_7cyc_1V02_7rc : SchedWriteRes<[V2UnitV02]> { let Latency = 7; let ReleaseAtCycles = [ 7]; } 1076def V2Write_10cyc_1V02_5rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 5]; } 1077def V2Write_10cyc_1V02_9rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } 1078def V2Write_10cyc_1V02_10rc : SchedWriteRes<[V2UnitV02]> { let Latency = 10; let ReleaseAtCycles = [10]; } 1079def V2Write_10cyc_1V0_9rc : SchedWriteRes<[V2UnitV0]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } 1080def V2Write_10cyc_1V1_9rc : SchedWriteRes<[V2UnitV1]> { let Latency = 10; let ReleaseAtCycles = [ 9]; } 1081def V2Write_13cyc_1V0_12rc : SchedWriteRes<[V2UnitV0]> { let Latency = 13; let ReleaseAtCycles = [12]; } 1082def V2Write_13cyc_1V02_12rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [12]; } 1083def V2Write_13cyc_1V02_13rc : SchedWriteRes<[V2UnitV02]> { let Latency = 13; let ReleaseAtCycles = [13]; } 1084def V2Write_15cyc_1V02_14rc : SchedWriteRes<[V2UnitV02]> { let Latency = 15; let ReleaseAtCycles = [14]; } 1085def V2Write_16cyc_1V02_15rc : SchedWriteRes<[V2UnitV02]> { let Latency = 16; let ReleaseAtCycles = [15]; } 1086def V2Write_16cyc_1V0_14rc : SchedWriteRes<[V2UnitV0]> { let Latency = 16; let ReleaseAtCycles = [14]; } 1087 1088// Miscellaneous 1089// ----------------------------------------------------------------------------- 1090 1091def : InstRW<[WriteI], (instrs COPY)>; 1092 1093// §3.3 Branch instructions 1094// ----------------------------------------------------------------------------- 1095 1096// Branch, immed 1097// Compare and branch 1098def : SchedAlias<WriteBr, V2Write_1cyc_1B>; 1099 1100// Branch, register 1101def : SchedAlias<WriteBrReg, V2Write_1cyc_1B>; 1102 1103// Branch and link, immed 1104// Branch and link, register 1105def : InstRW<[V2Write_1cyc_1B_1R], (instrs BL, BLR)>; 1106 1107// §3.4 Arithmetic and Logical Instructions 1108// ----------------------------------------------------------------------------- 1109 1110// ALU, basic 1111// ALU, basic, flagset 1112def : SchedAlias<WriteI, V2Write_1cyc_1I>; 1113def : InstRW<[V2Write_1cyc_1F], (instregex "^(ADC|SBC)S[WX]r$")>; 1114def : InstRW<[V2Write_0or1cyc_1I], (instregex "^MOVZ[WX]i$")>; 1115 1116// ALU, extend and shift 1117def : SchedAlias<WriteIEReg, V2Write_2cyc_1M>; 1118 1119// Arithmetic, LSL shift, shift <= 4 1120// Arithmetic, flagset, LSL shift, shift <= 4 1121// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 1122def : SchedAlias<WriteISReg, V2Write_ArithI>; 1123def : InstRW<[V2Write_ArithF], 1124 (instregex "^(ADD|SUB)S[WX]rs$")>; 1125 1126// Arithmetic, immediate to logical address tag 1127def : InstRW<[V2Write_2cyc_1M], (instrs ADDG, SUBG)>; 1128 1129// Convert floating-point condition flags 1130// Flag manipulation instructions 1131def : WriteRes<WriteSys, []> { let Latency = 1; } 1132 1133// Insert Random Tags 1134def : InstRW<[V2Write_2cyc_1M], (instrs IRG, IRGstack)>; 1135 1136// Insert Tag Mask 1137// Subtract Pointer 1138// Subtract Pointer, flagset 1139def : InstRW<[V2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 1140 1141// Logical, shift, no flagset 1142def : InstRW<[V2Write_1cyc_1I], (instregex "^(AND|BIC|EON|EOR|ORN)[WX]rs$")>; 1143def : InstRW<[V2Write_0or1cyc_1I], (instregex "^ORR[WX]rs$")>; 1144 1145// Logical, shift, flagset 1146def : InstRW<[V2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; 1147 1148// Move and shift instructions 1149// ----------------------------------------------------------------------------- 1150 1151def : SchedAlias<WriteImm, V2Write_1cyc_1I>; 1152 1153// §3.5 Divide and multiply instructions 1154// ----------------------------------------------------------------------------- 1155 1156// SDIV, UDIV 1157def : SchedAlias<WriteID32, V2Write_12cyc_1M0>; 1158def : SchedAlias<WriteID64, V2Write_20cyc_1M0>; 1159 1160def : SchedAlias<WriteIM32, V2Write_2cyc_1M>; 1161def : SchedAlias<WriteIM64, V2Write_2cyc_1M>; 1162 1163// Multiply 1164// Multiply accumulate, W-form 1165// Multiply accumulate, X-form 1166def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA], 1167 (instregex "^M(ADD|SUB)[WX]rrr$")>; 1168 1169// Multiply accumulate long 1170// Multiply long 1171def : InstRW<[V2Wr_IMUL, ReadIM, ReadIM, V2Rd_IMA], 1172 (instregex "^(S|U)M(ADD|SUB)Lrrr$")>; 1173 1174// Multiply high 1175def : InstRW<[V2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 1176 1177// Pointer Authentication Instructions (v8.3 PAC) 1178// ----------------------------------------------------------------------------- 1179 1180// Authenticate data address 1181// Authenticate instruction address 1182// Compute pointer authentication code for data address 1183// Compute pointer authentication code, using generic key 1184// Compute pointer authentication code for instruction address 1185def : InstRW<[V2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 1186 1187// Branch and link, register, with pointer authentication 1188// Branch, register, with pointer authentication 1189// Branch, return, with pointer authentication 1190def : InstRW<[V2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 1191 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 1192 ERETAA, ERETAB)>; 1193 1194 1195// Load register, with pointer authentication 1196def : InstRW<[V2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 1197 1198// Strip pointer authentication code 1199def : InstRW<[V2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 1200 1201// Miscellaneous data-processing instructions 1202// ----------------------------------------------------------------------------- 1203 1204// Address generation 1205def : InstRW<[V2Write_1cyc_1F], (instrs ADR, ADRP)>; 1206 1207// Bitfield extract, one reg 1208// Bitfield extract, two regs 1209def : SchedAlias<WriteExtr, V2Write_Extr>; 1210def : InstRW<[V2Write_Extr], (instrs EXTRWrri, EXTRXrri)>; 1211 1212// Bitfield move, basic 1213def : SchedAlias<WriteIS, V2Write_1cyc_1I>; 1214 1215// Bitfield move, insert 1216def : InstRW<[V2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 1217 1218// Load instructions 1219// ----------------------------------------------------------------------------- 1220 1221// NOTE: SOG p. 19: Throughput of LDN?P X-form should be 2, but reported as 3. 1222 1223def : SchedAlias<WriteLD, V2Write_4cyc_1L>; 1224def : SchedAlias<WriteLDIdx, V2Write_4cyc_1L>; 1225 1226// Load register, literal 1227def : InstRW<[V2Write_5cyc_1L_1F], (instrs LDRWl, LDRXl, LDRSWl, PRFMl)>; 1228 1229// Load pair, signed immed offset, signed words 1230def : InstRW<[V2Write_5cyc_1I_3L, WriteLDHi], (instrs LDPSWi)>; 1231 1232// Load pair, immed post-index or immed pre-index, signed words 1233def : InstRW<[WriteAdr, V2Write_5cyc_1I_3L, WriteLDHi], 1234 (instregex "^LDPSW(post|pre)$")>; 1235 1236// Store instructions 1237// ----------------------------------------------------------------------------- 1238 1239// NOTE: SOG, p. 20: Unsure if STRH uses pipeline I. 1240 1241def : SchedAlias<WriteST, V2Write_1cyc_1L01_1D>; 1242def : SchedAlias<WriteSTIdx, V2Write_1cyc_1L01_1D>; 1243def : SchedAlias<WriteSTP, V2Write_1cyc_1L01_1D>; 1244def : SchedAlias<WriteAdr, V2Write_1cyc_1I>; 1245 1246// Tag load instructions 1247// ----------------------------------------------------------------------------- 1248 1249// Load allocation tag 1250// Load multiple allocation tags 1251def : InstRW<[V2Write_4cyc_1L], (instrs LDG, LDGM)>; 1252 1253// Tag store instructions 1254// ----------------------------------------------------------------------------- 1255 1256// Store allocation tags to one or two granules, post-index 1257// Store allocation tags to one or two granules, pre-index 1258// Store allocation tag to one or two granules, zeroing, post-index 1259// Store Allocation Tag to one or two granules, zeroing, pre-index 1260// Store allocation tag and reg pair to memory, post-Index 1261// Store allocation tag and reg pair to memory, pre-Index 1262def : InstRW<[V2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 1263 ST2GPreIndex, ST2GPostIndex, 1264 STZGPreIndex, STZGPostIndex, 1265 STZ2GPreIndex, STZ2GPostIndex, 1266 STGPpre, STGPpost)>; 1267 1268// Store allocation tags to one or two granules, signed offset 1269// Store allocation tag to two granules, zeroing, signed offset 1270// Store allocation tag and reg pair to memory, signed offset 1271// Store multiple allocation tags 1272def : InstRW<[V2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi, 1273 STZ2Gi, STGPi, STGM, STZGM)>; 1274 1275// FP data processing instructions 1276// ----------------------------------------------------------------------------- 1277 1278// FP absolute value 1279// FP arithmetic 1280// FP min/max 1281// FP negate 1282// FP select 1283def : SchedAlias<WriteF, V2Write_2cyc_1V>; 1284 1285// FP compare 1286def : SchedAlias<WriteFCmp, V2Write_2cyc_1V0>; 1287 1288// FP divide, square root 1289def : SchedAlias<WriteFDiv, V2Write_7cyc_1V02>; 1290 1291// FP divide, H-form 1292def : InstRW<[V2Write_7cyc_1V02], (instrs FDIVHrr)>; 1293// FP divide, S-form 1294def : InstRW<[V2Write_10cyc_1V02], (instrs FDIVSrr)>; 1295// FP divide, D-form 1296def : InstRW<[V2Write_15cyc_1V02], (instrs FDIVDrr)>; 1297 1298// FP square root, H-form 1299def : InstRW<[V2Write_7cyc_1V02], (instrs FSQRTHr)>; 1300// FP square root, S-form 1301def : InstRW<[V2Write_9cyc_1V02], (instrs FSQRTSr)>; 1302// FP square root, D-form 1303def : InstRW<[V2Write_16cyc_1V02], (instrs FSQRTDr)>; 1304 1305// FP multiply 1306def : WriteRes<WriteFMul, [V2UnitV]> { let Latency = 3; } 1307 1308// FP multiply accumulate 1309def : InstRW<[V2Wr_FMA, ReadDefault, ReadDefault, V2Rd_FMA], 1310 (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 1311 1312// FP round to integral 1313def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ][HSD]r$", 1314 "^FRINT(32|64)[XZ][SD]r$")>; 1315 1316// FP miscellaneous instructions 1317// ----------------------------------------------------------------------------- 1318 1319// FP convert, from gen to vec reg 1320def : InstRW<[V2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 1321 1322// FP convert, from vec to gen reg 1323def : InstRW<[V2Write_3cyc_1V01], 1324 (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]ri?$")>; 1325 1326// FP convert, Javascript from vec to gen reg 1327def : SchedAlias<WriteFCvt, V2Write_3cyc_1V0>; 1328 1329// FP convert, from vec to vec reg 1330def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTSHr, FCVTDHr, FCVTHSr, FCVTDSr, 1331 FCVTHDr, FCVTSDr, FCVTXNv1i64)>; 1332 1333// FP move, immed 1334// FP move, register 1335def : SchedAlias<WriteFImm, V2Write_2cyc_1V>; 1336 1337// FP transfer, from gen to low half of vec reg 1338def : InstRW<[V2Write_0or3cyc_1M0], 1339 (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr)>; 1340 1341// FP transfer, from gen to high half of vec reg 1342def : InstRW<[V2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 1343 1344// FP transfer, from vec to gen reg 1345def : SchedAlias<WriteFCopy, V2Write_2cyc_2V01>; 1346 1347// FP load instructions 1348// ----------------------------------------------------------------------------- 1349 1350// Load vector reg, literal, S/D/Q forms 1351def : InstRW<[V2Write_7cyc_1F_1L], (instregex "^LDR[SDQ]l$")>; 1352 1353// Load vector reg, unscaled immed 1354def : InstRW<[V2Write_6cyc_1L], (instregex "^LDUR[BHSDQ]i$")>; 1355 1356// Load vector reg, immed post-index 1357// Load vector reg, immed pre-index 1358def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L], 1359 (instregex "^LDR[BHSDQ](pre|post)$")>; 1360 1361// Load vector reg, unsigned immed 1362def : InstRW<[V2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 1363 1364// Load vector reg, register offset, basic 1365// Load vector reg, register offset, scale, S/D-form 1366// Load vector reg, register offset, scale, H/Q-form 1367// Load vector reg, register offset, extend 1368// Load vector reg, register offset, extend, scale, S/D-form 1369// Load vector reg, register offset, extend, scale, H/Q-form 1370def : InstRW<[V2Write_LdrHQ, ReadAdrBase], (instregex "^LDR[BHSDQ]ro[WX]$")>; 1371 1372// Load vector pair, immed offset, S/D-form 1373def : InstRW<[V2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 1374 1375// Load vector pair, immed offset, Q-form 1376def : InstRW<[V2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 1377 1378// Load vector pair, immed post-index, S/D-form 1379// Load vector pair, immed pre-index, S/D-form 1380def : InstRW<[WriteAdr, V2Write_6cyc_1I_1L, WriteLDHi], 1381 (instregex "^LDP[SD](pre|post)$")>; 1382 1383// Load vector pair, immed post-index, Q-form 1384// Load vector pair, immed pre-index, Q-form 1385def : InstRW<[WriteAdr, V2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost, 1386 LDPQpre)>; 1387 1388// FP store instructions 1389// ----------------------------------------------------------------------------- 1390 1391// Store vector reg, unscaled immed, B/H/S/D-form 1392// Store vector reg, unscaled immed, Q-form 1393def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STUR[BHSDQ]i$")>; 1394 1395// Store vector reg, immed post-index, B/H/S/D-form 1396// Store vector reg, immed post-index, Q-form 1397// Store vector reg, immed pre-index, B/H/S/D-form 1398// Store vector reg, immed pre-index, Q-form 1399def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I], 1400 (instregex "^STR[BHSDQ](pre|post)$")>; 1401 1402// Store vector reg, unsigned immed, B/H/S/D-form 1403// Store vector reg, unsigned immed, Q-form 1404def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STR[BHSDQ]ui$")>; 1405 1406// Store vector reg, register offset, basic, B/H/S/D-form 1407// Store vector reg, register offset, basic, Q-form 1408// Store vector reg, register offset, scale, H-form 1409// Store vector reg, register offset, scale, S/D-form 1410// Store vector reg, register offset, scale, Q-form 1411// Store vector reg, register offset, extend, B/H/S/D-form 1412// Store vector reg, register offset, extend, Q-form 1413// Store vector reg, register offset, extend, scale, H-form 1414// Store vector reg, register offset, extend, scale, S/D-form 1415// Store vector reg, register offset, extend, scale, Q-form 1416def : InstRW<[V2Write_StrHQ, ReadAdrBase], 1417 (instregex "^STR[BHSDQ]ro[WX]$")>; 1418 1419// Store vector pair, immed offset, S-form 1420// Store vector pair, immed offset, D-form 1421def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^STN?P[SD]i$")>; 1422 1423// Store vector pair, immed offset, Q-form 1424def : InstRW<[V2Write_2cyc_1L01_2V01], (instrs STPQi, STNPQi)>; 1425 1426// Store vector pair, immed post-index, S-form 1427// Store vector pair, immed post-index, D-form 1428// Store vector pair, immed pre-index, S-form 1429// Store vector pair, immed pre-index, D-form 1430def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01_1I], 1431 (instregex "^STP[SD](pre|post)$")>; 1432 1433// Store vector pair, immed post-index, Q-form 1434def : InstRW<[V2Write_2cyc_1L01_2V01_1I], (instrs STPQpost)>; 1435 1436// Store vector pair, immed pre-index, Q-form 1437def : InstRW<[V2Write_2cyc_1L01_2V01_2I], (instrs STPQpre)>; 1438 1439// ASIMD integer instructions 1440// ----------------------------------------------------------------------------- 1441 1442// ASIMD absolute diff 1443// ASIMD absolute diff long 1444// ASIMD arith, basic 1445// ASIMD arith, complex 1446// ASIMD arith, pair-wise 1447// ASIMD compare 1448// ASIMD logical 1449// ASIMD max/min, basic and pair-wise 1450def : SchedAlias<WriteVd, V2Write_2cyc_1V>; 1451def : SchedAlias<WriteVq, V2Write_2cyc_1V>; 1452 1453// ASIMD absolute diff accum 1454// ASIMD absolute diff accum long 1455def : InstRW<[V2Wr_VA, V2Rd_VA], (instregex "^[SU]ABAL?v")>; 1456 1457// ASIMD arith, reduce, 4H/4S 1458def : InstRW<[V2Write_2cyc_1V13], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 1459 1460// ASIMD arith, reduce, 8B/8H 1461def : InstRW<[V2Write_4cyc_1V13_1V], 1462 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 1463 1464// ASIMD arith, reduce, 16B 1465def : InstRW<[V2Write_4cyc_2V13], (instregex "^(ADDV|[SU]ADDLV)v16i8v$")>; 1466 1467// ASIMD dot product 1468// ASIMD dot product using signed and unsigned integers 1469def : InstRW<[V2Wr_VDOT, V2Rd_VDOT], 1470 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 1471 1472// ASIMD matrix multiply-accumulate 1473def : InstRW<[V2Wr_VMMA, V2Rd_VMMA], (instrs SMMLA, UMMLA, USMMLA)>; 1474 1475// ASIMD max/min, reduce, 4H/4S 1476def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU](MAX|MIN)Vv4i16v$", 1477 "^[SU](MAX|MIN)Vv4i32v$")>; 1478 1479// ASIMD max/min, reduce, 8B/8H 1480def : InstRW<[V2Write_4cyc_1V13_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 1481 "^[SU](MAX|MIN)Vv8i16v$")>; 1482 1483// ASIMD max/min, reduce, 16B 1484def : InstRW<[V2Write_4cyc_2V13], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 1485 1486// ASIMD multiply 1487def : InstRW<[V2Write_4cyc_1V02], (instregex "^MULv", "^SQ(R)?DMULHv")>; 1488 1489// ASIMD multiply accumulate 1490def : InstRW<[V2Wr_VMA, V2Rd_VMA], (instregex "^MLAv", "^MLSv")>; 1491 1492// ASIMD multiply accumulate high 1493def : InstRW<[V2Wr_VMAH, V2Rd_VMAH], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 1494 1495// ASIMD multiply accumulate long 1496def : InstRW<[V2Wr_VMAL, V2Rd_VMAL], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 1497 1498// ASIMD multiply accumulate saturating long 1499def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDML[AS]L[iv]")>; 1500 1501// ASIMD multiply/multiply long (8x8) polynomial, D-form 1502// ASIMD multiply/multiply long (8x8) polynomial, Q-form 1503def : InstRW<[V2Write_3cyc_1V23], (instregex "^PMULL?(v8i8|v16i8)$")>; 1504 1505// ASIMD multiply long 1506def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]MULLv", "^SQDMULL[iv]")>; 1507 1508// ASIMD pairwise add and accumulate long 1509def : InstRW<[V2Wr_VPA, V2Rd_VPA], (instregex "^[SU]ADALPv")>; 1510 1511// ASIMD shift accumulate 1512def : InstRW<[V2Wr_VSA, V2Rd_VSA], (instregex "^[SU]SRA[dv]", "^[SU]RSRA[dv]")>; 1513 1514// ASIMD shift by immed, basic 1515def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHL[dv]", "^SHLLv", "^SHRNv", 1516 "^SSHLLv", "^SSHR[dv]", "^USHLLv", 1517 "^USHR[dv]")>; 1518 1519// ASIMD shift by immed and insert, basic 1520def : InstRW<[V2Write_2cyc_1V13], (instregex "^SLI[dv]", "^SRI[dv]")>; 1521 1522// ASIMD shift by immed, complex 1523def : InstRW<[V2Write_4cyc_1V13], 1524 (instregex "^RSHRNv", "^SQRSHRU?N[bhsv]", "^(SQSHLU?|UQSHL)[bhsd]$", 1525 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1526 "^SQSHRU?N[bhsv]", "^SRSHR[dv]", "^UQRSHRN[bhsv]", 1527 "^UQSHRN[bhsv]", "^URSHR[dv]")>; 1528 1529// ASIMD shift by register, basic 1530def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]SHLv")>; 1531 1532// ASIMD shift by register, complex 1533def : InstRW<[V2Write_4cyc_1V13], 1534 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1535 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1536 1537// ASIMD floating-point instructions 1538// ----------------------------------------------------------------------------- 1539 1540// ASIMD FP absolute value/difference 1541// ASIMD FP arith, normal 1542// ASIMD FP compare 1543// ASIMD FP complex add 1544// ASIMD FP max/min, normal 1545// ASIMD FP max/min, pairwise 1546// ASIMD FP negate 1547// Handled by SchedAlias<WriteV[dq], ...> 1548 1549// ASIMD FP complex multiply add 1550def : InstRW<[V2Wr_VFCMA, V2Rd_VFCMA], (instregex "^FCMLAv")>; 1551 1552// ASIMD FP convert, long (F16 to F32) 1553def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTL(v4|v8)i16")>; 1554 1555// ASIMD FP convert, long (F32 to F64) 1556def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTL(v2|v4)i32")>; 1557 1558// ASIMD FP convert, narrow (F32 to F16) 1559def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTN(v4|v8)i16")>; 1560 1561// ASIMD FP convert, narrow (F64 to F32) 1562def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVTN(v2|v4)i32", 1563 "^FCVTXN(v2|v4)f32")>; 1564 1565// ASIMD FP convert, other, D-form F32 and Q-form F64 1566def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT[AMNPZ][SU]v2f(32|64)$", 1567 "^FCVT[AMNPZ][SU]v1i64$", 1568 "^FCVTZ[SU]d$", 1569 "^[SU]CVTFv2f(32|64)$", 1570 "^[SU]CVTFv1i64$", 1571 "^[SU]CVTFd$")>; 1572 1573// ASIMD FP convert, other, D-form F16 and Q-form F32 1574def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT[AMNPZ][SU]v4f(16|32)$", 1575 "^FCVT[AMNPZ][SU]v1i32$", 1576 "^FCVTZ[SU]s$", 1577 "^[SU]CVTFv4f(16|32)$", 1578 "^[SU]CVTFv1i32$", 1579 "^[SU]CVTFs$")>; 1580 1581// ASIMD FP convert, other, Q-form F16 1582def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVT[AMNPZ][SU]v8f16$", 1583 "^FCVT[AMNPZ][SU]v1f16$", 1584 "^FCVTZ[SU]h$", 1585 "^[SU]CVTFv8f16$", 1586 "^[SU]CVTFv1i16$", 1587 "^[SU]CVTFh$")>; 1588 1589// ASIMD FP divide, D-form, F16 1590def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FDIVv4f16)>; 1591 1592// ASIMD FP divide, D-form, F32 1593def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FDIVv2f32)>; 1594 1595// ASIMD FP divide, Q-form, F16 1596def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FDIVv8f16)>; 1597 1598// ASIMD FP divide, Q-form, F32 1599def : InstRW<[V2Write_10cyc_1V02_10rc], (instrs FDIVv4f32)>; 1600 1601// ASIMD FP divide, Q-form, F64 1602def : InstRW<[V2Write_15cyc_1V02_14rc], (instrs FDIVv2f64)>; 1603 1604// ASIMD FP max/min, reduce, F32 and D-form F16 1605def : InstRW<[V2Write_4cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1606 1607// ASIMD FP max/min, reduce, Q-form F16 1608def : InstRW<[V2Write_6cyc_3V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1609 1610// ASIMD FP multiply 1611def : InstRW<[V2Wr_VFM], (instregex "^FMULv", "^FMULXv")>; 1612 1613// ASIMD FP multiply accumulate 1614def : InstRW<[V2Wr_VFMA, V2Rd_VFMA], (instregex "^FMLAv", "^FMLSv")>; 1615 1616// ASIMD FP multiply accumulate long 1617def : InstRW<[V2Wr_VFMAL, V2Rd_VFMAL], (instregex "^FML[AS]L2?(lane)?v")>; 1618 1619// ASIMD FP round, D-form F32 and Q-form F64 1620def : InstRW<[V2Write_3cyc_1V02], 1621 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1622 "^FRINT(32|64)[XZ]v2f(32|64)$")>; 1623 1624// ASIMD FP round, D-form F16 and Q-form F32 1625def : InstRW<[V2Write_4cyc_2V02], 1626 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1627 "^FRINT(32|64)[XZ]v4f32$")>; 1628 1629// ASIMD FP round, Q-form F16 1630def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1631 1632// ASIMD FP square root, D-form, F16 1633def : InstRW<[V2Write_7cyc_1V02_7rc], (instrs FSQRTv4f16)>; 1634 1635// ASIMD FP square root, D-form, F32 1636def : InstRW<[V2Write_10cyc_1V02_5rc], (instrs FSQRTv2f32)>; 1637 1638// ASIMD FP square root, Q-form, F16 1639def : InstRW<[V2Write_13cyc_1V02_13rc], (instrs FSQRTv8f16)>; 1640 1641// ASIMD FP square root, Q-form, F32 1642def : InstRW<[V2Write_10cyc_1V02_9rc], (instrs FSQRTv4f32)>; 1643 1644// ASIMD FP square root, Q-form, F64 1645def : InstRW<[V2Write_16cyc_1V02_15rc], (instrs FSQRTv2f64)>; 1646 1647// ASIMD BFloat16 (BF16) instructions 1648// ----------------------------------------------------------------------------- 1649 1650// ASIMD convert, F32 to BF16 1651def : InstRW<[V2Write_4cyc_2V02], (instrs BFCVTN, BFCVTN2)>; 1652 1653// ASIMD dot product 1654def : InstRW<[V2Wr_VBFDOT, V2Rd_VBFDOT], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1655 1656// ASIMD matrix multiply accumulate 1657def : InstRW<[V2Wr_VBFMMA, V2Rd_VBFMMA], (instrs BFMMLA)>; 1658 1659// ASIMD multiply accumulate long 1660def : InstRW<[V2Wr_VBFMAL, V2Rd_VBFMAL], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1661 BFMLALTIdx)>; 1662 1663// Scalar convert, F32 to BF16 1664def : InstRW<[V2Write_3cyc_1V02], (instrs BFCVT)>; 1665 1666// ASIMD miscellaneous instructions 1667// ----------------------------------------------------------------------------- 1668 1669// ASIMD bit reverse 1670// ASIMD bitwise insert 1671// ASIMD count 1672// ASIMD duplicate, element 1673// ASIMD extract 1674// ASIMD extract narrow 1675// ASIMD insert, element to element 1676// ASIMD move, FP immed 1677// ASIMD move, integer immed 1678// ASIMD reverse 1679// ASIMD table lookup extension, 1 table reg 1680// ASIMD transpose 1681// ASIMD unzip/zip 1682// Handled by SchedAlias<WriteV[dq], ...> 1683def : InstRW<[V2Write_0or2cyc_1V], (instrs MOVID, MOVIv2d_ns)>; 1684 1685// ASIMD duplicate, gen reg 1686def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1687 1688// ASIMD extract narrow, saturating 1689def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1690 1691// ASIMD reciprocal and square root estimate, D-form U32 1692def : InstRW<[V2Write_3cyc_1V02], (instrs URECPEv2i32, URSQRTEv2i32)>; 1693 1694// ASIMD reciprocal and square root estimate, Q-form U32 1695def : InstRW<[V2Write_4cyc_2V02], (instrs URECPEv4i32, URSQRTEv4i32)>; 1696 1697// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1698def : InstRW<[V2Write_3cyc_1V02], (instrs FRECPEv1f16, FRECPEv1i32, 1699 FRECPEv1i64, FRECPEv2f32, 1700 FRSQRTEv1f16, FRSQRTEv1i32, 1701 FRSQRTEv1i64, FRSQRTEv2f32)>; 1702 1703// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1704def : InstRW<[V2Write_4cyc_2V02], (instrs FRECPEv4f16, FRECPEv4f32, 1705 FRSQRTEv4f16, FRSQRTEv4f32)>; 1706 1707// ASIMD reciprocal and square root estimate, Q-form F16 1708def : InstRW<[V2Write_6cyc_4V02], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1709 1710// ASIMD reciprocal exponent 1711def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRECPXv")>; 1712 1713// ASIMD reciprocal step 1714def : InstRW<[V2Write_4cyc_1V], (instregex "^FRECPS(32|64|v)", 1715 "^FRSQRTS(32|64|v)")>; 1716 1717// ASIMD table lookup, 1 or 2 table regs 1718def : InstRW<[V2Write_2cyc_1V01], (instrs TBLv8i8One, TBLv16i8One, 1719 TBLv8i8Two, TBLv16i8Two)>; 1720 1721// ASIMD table lookup, 3 table regs 1722def : InstRW<[V2Write_4cyc_2V01], (instrs TBLv8i8Three, TBLv16i8Three)>; 1723 1724// ASIMD table lookup, 4 table regs 1725def : InstRW<[V2Write_4cyc_3V01], (instrs TBLv8i8Four, TBLv16i8Four)>; 1726 1727// ASIMD table lookup extension, 2 table reg 1728def : InstRW<[V2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1729 1730// ASIMD table lookup extension, 3 table reg 1731def : InstRW<[V2Write_6cyc_3V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1732 1733// ASIMD table lookup extension, 4 table reg 1734def : InstRW<[V2Write_6cyc_5V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1735 1736// ASIMD transfer, element to gen reg 1737def : InstRW<[V2Write_2cyc_2V01], (instregex "^[SU]MOVv")>; 1738 1739// ASIMD transfer, gen reg to element 1740def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1741 1742// ASIMD load instructions 1743// ----------------------------------------------------------------------------- 1744 1745// ASIMD load, 1 element, multiple, 1 reg, D-form 1746def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1747def : InstRW<[WriteAdr, V2Write_6cyc_1L], 1748 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1749 1750// ASIMD load, 1 element, multiple, 1 reg, Q-form 1751def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1752def : InstRW<[WriteAdr, V2Write_6cyc_1L], 1753 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1754 1755// ASIMD load, 1 element, multiple, 2 reg, D-form 1756def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1757def : InstRW<[WriteAdr, V2Write_6cyc_2L], 1758 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1759 1760// ASIMD load, 1 element, multiple, 2 reg, Q-form 1761def : InstRW<[V2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1762def : InstRW<[WriteAdr, V2Write_6cyc_2L], 1763 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1764 1765// ASIMD load, 1 element, multiple, 3 reg, D-form 1766def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1767def : InstRW<[WriteAdr, V2Write_6cyc_3L], 1768 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1769 1770// ASIMD load, 1 element, multiple, 3 reg, Q-form 1771def : InstRW<[V2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1772def : InstRW<[WriteAdr, V2Write_6cyc_3L], 1773 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1774 1775// ASIMD load, 1 element, multiple, 4 reg, D-form 1776def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1777def : InstRW<[WriteAdr, V2Write_7cyc_4L], 1778 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1779 1780// ASIMD load, 1 element, multiple, 4 reg, Q-form 1781def : InstRW<[V2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1782def : InstRW<[WriteAdr, V2Write_7cyc_4L], 1783 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1784 1785// ASIMD load, 1 element, one lane, B/H/S 1786// ASIMD load, 1 element, one lane, D 1787def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1788def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>; 1789 1790// ASIMD load, 1 element, all lanes, D-form, B/H/S 1791// ASIMD load, 1 element, all lanes, D-form, D 1792def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1793def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1794 1795// ASIMD load, 1 element, all lanes, Q-form 1796def : InstRW<[V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1797def : InstRW<[WriteAdr, V2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1798 1799// ASIMD load, 2 element, multiple, D-form, B/H/S 1800def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1801def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1802 1803// ASIMD load, 2 element, multiple, Q-form, B/H/S 1804// ASIMD load, 2 element, multiple, Q-form, D 1805def : InstRW<[V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1806def : InstRW<[WriteAdr, V2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1807 1808// ASIMD load, 2 element, one lane, B/H 1809// ASIMD load, 2 element, one lane, S 1810// ASIMD load, 2 element, one lane, D 1811def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1812def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>; 1813 1814// ASIMD load, 2 element, all lanes, D-form, B/H/S 1815// ASIMD load, 2 element, all lanes, D-form, D 1816def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1817def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1818 1819// ASIMD load, 2 element, all lanes, Q-form 1820def : InstRW<[V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1821def : InstRW<[WriteAdr, V2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1822 1823// ASIMD load, 3 element, multiple, D-form, B/H/S 1824def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1825def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1826 1827// ASIMD load, 3 element, multiple, Q-form, B/H/S 1828// ASIMD load, 3 element, multiple, Q-form, D 1829def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)$")>; 1830def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s|2d)_POST$")>; 1831 1832// ASIMD load, 3 element, one lane, B/H 1833// ASIMD load, 3 element, one lane, S 1834// ASIMD load, 3 element, one lane, D 1835def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1836def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>; 1837 1838// ASIMD load, 3 element, all lanes, D-form, B/H/S 1839// ASIMD load, 3 element, all lanes, D-form, D 1840def : InstRW<[V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1841def : InstRW<[WriteAdr, V2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1842 1843// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1844// ASIMD load, 3 element, all lanes, Q-form, D 1845def : InstRW<[V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1846def : InstRW<[WriteAdr, V2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1847 1848// ASIMD load, 4 element, multiple, D-form, B/H/S 1849def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1850def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1851 1852// ASIMD load, 4 element, multiple, Q-form, B/H/S 1853// ASIMD load, 4 element, multiple, Q-form, D 1854def : InstRW<[V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1855def : InstRW<[WriteAdr, V2Write_9cyc_6L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1856 1857// ASIMD load, 4 element, one lane, B/H 1858// ASIMD load, 4 element, one lane, S 1859// ASIMD load, 4 element, one lane, D 1860def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1861def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>; 1862 1863// ASIMD load, 4 element, all lanes, D-form, B/H/S 1864// ASIMD load, 4 element, all lanes, D-form, D 1865def : InstRW<[V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1866def : InstRW<[WriteAdr, V2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1867 1868// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1869// ASIMD load, 4 element, all lanes, Q-form, D 1870def : InstRW<[V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1871def : InstRW<[WriteAdr, V2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1872 1873// ASIMD store instructions 1874// ----------------------------------------------------------------------------- 1875 1876// ASIMD store, 1 element, multiple, 1 reg, D-form 1877def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1878def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1879 1880// ASIMD store, 1 element, multiple, 1 reg, Q-form 1881def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1882def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1883 1884// ASIMD store, 1 element, multiple, 2 reg, D-form 1885def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1886def : InstRW<[WriteAdr, V2Write_2cyc_1L01_1V01], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1887 1888// ASIMD store, 1 element, multiple, 2 reg, Q-form 1889def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1890def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1891 1892// ASIMD store, 1 element, multiple, 3 reg, D-form 1893def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1894def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1895 1896// ASIMD store, 1 element, multiple, 3 reg, Q-form 1897def : InstRW<[V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1898def : InstRW<[WriteAdr, V2Write_2cyc_3L01_3V01], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1899 1900// ASIMD store, 1 element, multiple, 4 reg, D-form 1901def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1902def : InstRW<[WriteAdr, V2Write_2cyc_2L01_2V01], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1903 1904// ASIMD store, 1 element, multiple, 4 reg, Q-form 1905def : InstRW<[V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1906def : InstRW<[WriteAdr, V2Write_2cyc_4L01_4V01], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1907 1908// ASIMD store, 1 element, one lane, B/H/S 1909// ASIMD store, 1 element, one lane, D 1910def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)$")>; 1911def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST1i(8|16|32|64)_POST$")>; 1912 1913// ASIMD store, 2 element, multiple, D-form, B/H/S 1914def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)$")>; 1915def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1916 1917// ASIMD store, 2 element, multiple, Q-form, B/H/S 1918// ASIMD store, 2 element, multiple, Q-form, D 1919def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1920def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1921 1922// ASIMD store, 2 element, one lane, B/H/S 1923// ASIMD store, 2 element, one lane, D 1924def : InstRW<[V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)$")>; 1925def : InstRW<[WriteAdr, V2Write_4cyc_1L01_2V01], (instregex "ST2i(8|16|32|64)_POST$")>; 1926 1927// ASIMD store, 3 element, multiple, D-form, B/H/S 1928def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)$")>; 1929def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1930 1931// ASIMD store, 3 element, multiple, Q-form, B/H/S 1932// ASIMD store, 3 element, multiple, Q-form, D 1933def : InstRW<[V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1934def : InstRW<[WriteAdr, V2Write_6cyc_3L01_6V01], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1935 1936// ASIMD store, 3 element, one lane, B/H 1937// ASIMD store, 3 element, one lane, S 1938// ASIMD store, 3 element, one lane, D 1939def : InstRW<[V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)$")>; 1940def : InstRW<[WriteAdr, V2Write_5cyc_2L01_4V01], (instregex "ST3i(8|16|32|64)_POST$")>; 1941 1942// ASIMD store, 4 element, multiple, D-form, B/H/S 1943def : InstRW<[V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)$")>; 1944def : InstRW<[WriteAdr, V2Write_6cyc_2L01_6V01], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1945 1946// ASIMD store, 4 element, multiple, Q-form, B/H/S 1947def : InstRW<[V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)$")>; 1948def : InstRW<[WriteAdr, V2Write_7cyc_4L01_12V01], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1949 1950// ASIMD store, 4 element, multiple, Q-form, D 1951def : InstRW<[V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)$")>; 1952def : InstRW<[WriteAdr, V2Write_5cyc_4L01_8V01], (instregex "ST4Fourv(2d)_POST$")>; 1953 1954// ASIMD store, 4 element, one lane, B/H/S 1955def : InstRW<[V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)$")>; 1956def : InstRW<[WriteAdr, V2Write_6cyc_1L01_3V01], (instregex "ST4i(8|16|32)_POST$")>; 1957 1958// ASIMD store, 4 element, one lane, D 1959def : InstRW<[V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)$")>; 1960def : InstRW<[WriteAdr, V2Write_4cyc_2L01_4V01], (instregex "ST4i(64)_POST$")>; 1961 1962// Cryptography extensions 1963// ----------------------------------------------------------------------------- 1964 1965// Crypto AES ops 1966def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1967 1968// Crypto polynomial (64x64) multiply long 1969def : InstRW<[V2Write_2cyc_1V], (instrs PMULLv1i64, PMULLv2i64)>; 1970 1971// Crypto SHA1 hash acceleration op 1972// Crypto SHA1 schedule acceleration ops 1973def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1974 1975// Crypto SHA1 hash acceleration ops 1976// Crypto SHA256 hash acceleration ops 1977def : InstRW<[V2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1978 1979// Crypto SHA256 schedule acceleration ops 1980def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1981 1982// Crypto SHA512 hash acceleration ops 1983def : InstRW<[V2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1984 1985// Crypto SHA3 ops 1986def : InstRW<[V2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1987 1988// Crypto SM3 ops 1989def : InstRW<[V2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1990 "^SM3TT[12][AB]$")>; 1991 1992// Crypto SM4 ops 1993def : InstRW<[V2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1994 1995// CRC 1996// ----------------------------------------------------------------------------- 1997 1998def : InstRW<[V2Wr_CRC, V2Rd_CRC], (instregex "^CRC32")>; 1999 2000// SVE Predicate instructions 2001// ----------------------------------------------------------------------------- 2002 2003// Loop control, based on predicate 2004def : InstRW<[V2Write_2or3cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 2005 BRKB_PPmP, BRKB_PPzP)>; 2006 2007// Loop control, based on predicate and flag setting 2008def : InstRW<[V2Write_3or4cyc_2M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 2009 2010// Loop control, propagating 2011def : InstRW<[V2Write_2or3cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, 2012 BRKPB_PPzPP)>; 2013 2014// Loop control, propagating and flag setting 2015def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 2016 BRKPBS_PPzPP)>; 2017 2018// Loop control, based on GPR 2019def : InstRW<[V2Write_3cyc_2M], 2020 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]")>; 2021def : InstRW<[V2Write_3cyc_2M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]")>; 2022 2023// Loop terminate 2024def : InstRW<[V2Write_1cyc_2M], (instregex "^CTERM(EQ|NE)_(WW|XX)")>; 2025 2026// Predicate counting scalar 2027def : InstRW<[V2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 2028def : InstRW<[V2Write_2cyc_1M], 2029 (instregex "^(CNT|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI", 2030 "^SQ(DEC|INC)[BHWD]_XPiWdI", 2031 "^UQ(DEC|INC)[BHWD]_WPiI")>; 2032 2033// Predicate counting scalar, ALL, {1,2,4} 2034def : InstRW<[V2Write_IncDec], (instregex "^(DEC|INC)[BHWD]_XPiI")>; 2035 2036// Predicate counting scalar, active predicate 2037def : InstRW<[V2Write_2cyc_1M], 2038 (instregex "^CNTP_XPP_[BHSD]", 2039 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]", 2040 "^(UQDEC|UQINC)P_WP_[BHSD]", 2041 "^(SQDEC|SQINC)P_XPWd_[BHSD]")>; 2042 2043// Predicate counting vector, active predicate 2044def : InstRW<[V2Write_7cyc_1M_1M0_1V], 2045 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]")>; 2046 2047// Predicate logical 2048def : InstRW<[V2Write_1or2cyc_1M0], 2049 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP")>; 2050 2051// Predicate logical, flag setting 2052def : InstRW<[V2Write_1or2cyc_1M0_1M], 2053 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP")>; 2054 2055// Predicate reverse 2056def : InstRW<[V2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]")>; 2057 2058// Predicate select 2059def : InstRW<[V2Write_1cyc_1M0], (instrs SEL_PPPP)>; 2060 2061// Predicate set 2062def : InstRW<[V2Write_2cyc_1M], (instregex "^PFALSE", "^PTRUE_[BHSD]")>; 2063 2064// Predicate set/initialize, set flags 2065def : InstRW<[V2Write_3cyc_2M], (instregex "^PTRUES_[BHSD]")>; 2066 2067// Predicate find first/next 2068def : InstRW<[V2Write_2cyc_1M], (instregex "^PFIRST_B", "^PNEXT_[BHSD]")>; 2069 2070// Predicate test 2071def : InstRW<[V2Write_1cyc_1M], (instrs PTEST_PP)>; 2072 2073// Predicate transpose 2074def : InstRW<[V2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSD]")>; 2075 2076// Predicate unpack and widen 2077def : InstRW<[V2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 2078 2079// Predicate zip/unzip 2080def : InstRW<[V2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSD]")>; 2081 2082// SVE integer instructions 2083// ----------------------------------------------------------------------------- 2084 2085// Arithmetic, absolute diff 2086def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]", 2087 "^[SU]ABD_ZPZZ_[BHSD]")>; 2088 2089// Arithmetic, absolute diff accum 2090def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABA_ZZZ_[BHSD]")>; 2091 2092// Arithmetic, absolute diff accum long 2093def : InstRW<[V2Wr_ZA, V2Rd_ZA], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]")>; 2094 2095// Arithmetic, absolute diff long 2096def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]")>; 2097 2098// Arithmetic, basic 2099def : InstRW<[V2Write_2cyc_1V], 2100 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 2101 "^(ADD|SUB)_ZZZ_[BHSD]", 2102 "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", 2103 "^(ADD|SUB|SUBR)_ZI_[BHSD]", 2104 "^ADR_[SU]XTW_ZZZ_D_[0123]", 2105 "^ADR_LSL_ZZZ_[SD]_[0123]", 2106 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", 2107 "^SADDLBT_ZZZ_[HSD]", 2108 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", 2109 "^SSUBL(BT|TB)_ZZZ_[HSD]")>; 2110 2111// Arithmetic, complex 2112def : InstRW<[V2Write_2cyc_1V], 2113 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", 2114 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 2115 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", 2116 "^[SU]Q(ADD|SUB)_ZI_[BHSD]", 2117 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", 2118 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; 2119 2120// Arithmetic, large integer 2121def : InstRW<[V2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]")>; 2122 2123// Arithmetic, pairwise add 2124def : InstRW<[V2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]")>; 2125 2126// Arithmetic, pairwise add and accum long 2127def : InstRW<[V2Wr_ZPA, ReadDefault, V2Rd_ZPA], 2128 (instregex "^[SU]ADALP_ZPmZ_[HSD]")>; 2129 2130// Arithmetic, shift 2131def : InstRW<[V2Write_2cyc_1V13], 2132 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", 2133 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", 2134 "^(ASR|LSL|LSR)_ZPmI_[BHSD]", 2135 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", 2136 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 2137 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 2138 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 2139 2140// Arithmetic, shift and accumulate 2141def : InstRW<[V2Wr_ZSA, V2Rd_ZSA], (instregex "^[SU]R?SRA_ZZI_[BHSD]")>; 2142 2143// Arithmetic, shift by immediate 2144def : InstRW<[V2Write_2cyc_1V13], (instregex "^SHRN[BT]_ZZI_[BHS]", 2145 "^[SU]SHLL[BT]_ZZI_[HSD]")>; 2146 2147// Arithmetic, shift by immediate and insert 2148def : InstRW<[V2Write_2cyc_1V13], (instregex "^(SLI|SRI)_ZZI_[BHSD]")>; 2149 2150// Arithmetic, shift complex 2151def : InstRW<[V2Write_4cyc_1V13], 2152 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", 2153 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]", 2154 "^[SU]QR?SHL_ZPZZ_[BHSD]", 2155 "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", 2156 "^SQSHRU?N[BT]_ZZI_[BHS]", 2157 "^UQR?SHRN[BT]_ZZI_[BHS]")>; 2158 2159// Arithmetic, shift right for divide 2160def : InstRW<[V2Write_4cyc_1V13], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; 2161 2162// Arithmetic, shift rounding 2163def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]", 2164 "^[SU]RSHL_ZPZZ_[BHSD]", 2165 "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>; 2166 2167// Bit manipulation 2168def : InstRW<[V2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>; 2169 2170// Bitwise select 2171def : InstRW<[V2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ")>; 2172 2173// Count/reverse bits 2174def : InstRW<[V2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; 2175 2176// Broadcast logical bitmask immediate to vector 2177def : InstRW<[V2Write_2cyc_1V], (instrs DUPM_ZI)>; 2178 2179// Compare and set flags 2180def : InstRW<[V2Write_4or5cyc_1V0_1M0], 2181 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]", 2182 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]")>; 2183 2184// Complex add 2185def : InstRW<[V2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]")>; 2186 2187// Complex dot product 8-bit element 2188def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 2189 2190// Complex dot product 16-bit element 2191def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 2192 2193// Complex multiply-add B, H, S element size 2194def : InstRW<[V2Wr_ZCMABHS, V2Rd_ZCMABHS], (instregex "^CMLA_ZZZ_[BHS]", 2195 "^CMLA_ZZZI_[HS]")>; 2196 2197// Complex multiply-add D element size 2198def : InstRW<[V2Wr_ZCMAD, V2Rd_ZCMAD], (instrs CMLA_ZZZ_D)>; 2199 2200// Conditional extract operations, scalar form 2201def : InstRW<[V2Write_8cyc_1M0_1V01], (instregex "^CLAST[AB]_RPZ_[BHSD]")>; 2202 2203// Conditional extract operations, SIMD&FP scalar and vector forms 2204def : InstRW<[V2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]", 2205 "^COMPACT_ZPZ_[SD]", 2206 "^SPLICE_ZPZZ?_[BHSD]")>; 2207 2208// Convert to floating point, 64b to float or convert to double 2209def : InstRW<[V2Write_3cyc_1V02], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 2210 "^[SU]CVTF_ZPmZ_StoD")>; 2211 2212// Convert to floating point, 32b to single or half 2213def : InstRW<[V2Write_4cyc_2V02], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; 2214 2215// Convert to floating point, 16b to half 2216def : InstRW<[V2Write_6cyc_4V02], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; 2217 2218// Copy, scalar 2219def : InstRW<[V2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]")>; 2220 2221// Copy, scalar SIMD&FP or imm 2222def : InstRW<[V2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]", 2223 "^CPY_ZPzI_[BHSD]")>; 2224 2225// Divides, 32 bit 2226def : InstRW<[V2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", 2227 "^[SU]DIV_ZPZZ_S")>; 2228 2229// Divides, 64 bit 2230def : InstRW<[V2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", 2231 "^[SU]DIV_ZPZZ_D")>; 2232 2233// Dot product, 8 bit 2234def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instregex "^[SU]DOT_ZZZI?_S")>; 2235 2236// Dot product, 8 bit, using signed and unsigned integers 2237def : InstRW<[V2Wr_ZDOTB, V2Rd_ZDOTB], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 2238 2239// Dot product, 16 bit 2240def : InstRW<[V2Wr_ZDOTH, V2Rd_ZDOTH], (instregex "^[SU]DOT_ZZZI?_D")>; 2241 2242// Duplicate, immediate and indexed form 2243def : InstRW<[V2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]", 2244 "^DUP_ZZI_[BHSDQ]")>; 2245 2246// Duplicate, scalar form 2247def : InstRW<[V2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]")>; 2248 2249// Extend, sign or zero 2250def : InstRW<[V2Write_2cyc_1V13], (instregex "^[SU]XTB_ZPmZ_[HSD]", 2251 "^[SU]XTH_ZPmZ_[SD]", 2252 "^[SU]XTW_ZPmZ_[D]")>; 2253 2254// Extract 2255def : InstRW<[V2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 2256 2257// Extract narrow saturating 2258def : InstRW<[V2Write_4cyc_1V13], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]", 2259 "^SQXTUN[BT]_ZZ_[BHS]")>; 2260 2261// Extract/insert operation, SIMD and FP scalar form 2262def : InstRW<[V2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]", 2263 "^INSR_ZV_[BHSD]")>; 2264 2265// Extract/insert operation, scalar 2266def : InstRW<[V2Write_6cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]", 2267 "^INSR_ZR_[BHSD]")>; 2268 2269// Histogram operations 2270def : InstRW<[V2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]", 2271 "^HISTSEG_ZZZ")>; 2272 2273// Horizontal operations, B, H, S form, immediate operands only 2274def : InstRW<[V2Write_4cyc_1V02], (instregex "^INDEX_II_[BHS]")>; 2275 2276// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 2277// operands only / immediate, scalar operands 2278def : InstRW<[V2Write_7cyc_1M0_1V02], (instregex "^INDEX_(IR|RI|RR)_[BHS]")>; 2279 2280// Horizontal operations, D form, immediate operands only 2281def : InstRW<[V2Write_5cyc_2V02], (instrs INDEX_II_D)>; 2282 2283// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 2284// only / immediate, scalar operands 2285def : InstRW<[V2Write_8cyc_2M0_2V02], (instregex "^INDEX_(IR|RI|RR)_D")>; 2286 2287// Logical 2288def : InstRW<[V2Write_2cyc_1V], 2289 (instregex "^(AND|EOR|ORR)_ZI", 2290 "^(AND|BIC|EOR|ORR)_ZZZ", 2291 "^EOR(BT|TB)_ZZZ_[BHSD]", 2292 "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]", 2293 "^NOT_ZPmZ_[BHSD]")>; 2294 2295// Max/min, basic and pairwise 2296def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", 2297 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]", 2298 "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>; 2299 2300// Matching operations 2301// FIXME: SOG p. 44, n. 5: If the consuming instruction has a flag source, the 2302// latency for this instruction is 4 cycles. 2303def : InstRW<[V2Write_2or3cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]")>; 2304 2305// Matrix multiply-accumulate 2306def : InstRW<[V2Wr_ZMMA, V2Rd_ZMMA], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 2307 2308// Move prefix 2309def : InstRW<[V2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]", 2310 "^MOVPRFX_ZZ")>; 2311 2312// Multiply, B, H, S element size 2313def : InstRW<[V2Write_4cyc_1V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", 2314 "^MUL_ZPZZ_[BHS]", 2315 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", 2316 "^[SU]MULH_ZPZZ_[BHS]")>; 2317 2318// Multiply, D element size 2319def : InstRW<[V2Write_5cyc_2V02], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", 2320 "^MUL_ZPZZ_D", 2321 "^[SU]MULH_(ZPmZ|ZZZ)_D", 2322 "^[SU]MULH_ZPZZ_D")>; 2323 2324// Multiply long 2325def : InstRW<[V2Write_4cyc_1V02], (instregex "^[SU]MULL[BT]_ZZZI_[SD]", 2326 "^[SU]MULL[BT]_ZZZ_[HSD]")>; 2327 2328// Multiply accumulate, B, H, S element size 2329def : InstRW<[V2Wr_ZMABHS, V2Rd_ZMABHS], 2330 (instregex "^ML[AS]_ZZZI_[HS]", "^ML[AS]_ZPZZZ_[BHS]")>; 2331def : InstRW<[V2Wr_ZMABHS, ReadDefault, V2Rd_ZMABHS], 2332 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]")>; 2333 2334// Multiply accumulate, D element size 2335def : InstRW<[V2Wr_ZMAD, V2Rd_ZMAD], 2336 (instregex "^ML[AS]_ZZZI_D", "^ML[AS]_ZPZZZ_D")>; 2337def : InstRW<[V2Wr_ZMAD, ReadDefault, V2Rd_ZMAD], 2338 (instregex "^(ML[AS]|MAD|MSB)_ZPmZZ_D")>; 2339 2340// Multiply accumulate long 2341def : InstRW<[V2Wr_ZMAL, V2Rd_ZMAL], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]", 2342 "^[SU]ML[AS]L[BT]_ZZZI_[SD]")>; 2343 2344// Multiply accumulate saturating doubling long regular 2345def : InstRW<[V2Wr_ZMASQL, V2Rd_ZMASQ], 2346 (instregex "^SQDML[AS]L(B|T|BT)_ZZZ_[HSD]", 2347 "^SQDML[AS]L[BT]_ZZZI_[SD]")>; 2348 2349// Multiply saturating doubling high, B, H, S element size 2350def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULH_ZZZ_[BHS]", 2351 "^SQDMULH_ZZZI_[HS]")>; 2352 2353// Multiply saturating doubling high, D element size 2354def : InstRW<[V2Write_5cyc_2V02], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 2355 2356// Multiply saturating doubling long 2357def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQDMULL[BT]_ZZZ_[HSD]", 2358 "^SQDMULL[BT]_ZZZI_[SD]")>; 2359 2360// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 2361// element size 2362def : InstRW<[V2Wr_ZMASQBHS, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZ_[BHS]", 2363 "^SQRDCMLAH_ZZZ_[BHS]", 2364 "^SQRDML[AS]H_ZZZI_[HS]", 2365 "^SQRDCMLAH_ZZZI_[HS]")>; 2366 2367// Multiply saturating rounding doubling regular/complex accumulate, D element 2368// size 2369def : InstRW<[V2Wr_ZMASQD, V2Rd_ZMASQ], (instregex "^SQRDML[AS]H_ZZZI?_D", 2370 "^SQRDCMLAH_ZZZ_D")>; 2371 2372// Multiply saturating rounding doubling regular/complex, B, H, S element size 2373def : InstRW<[V2Write_4cyc_1V02], (instregex "^SQRDMULH_ZZZ_[BHS]", 2374 "^SQRDMULH_ZZZI_[HS]")>; 2375 2376// Multiply saturating rounding doubling regular/complex, D element size 2377def : InstRW<[V2Write_5cyc_2V02], (instregex "^SQRDMULH_ZZZI?_D")>; 2378 2379// Multiply/multiply long, (8x8) polynomial 2380def : InstRW<[V2Write_2cyc_1V23], (instregex "^PMUL_ZZZ_B", 2381 "^PMULL[BT]_ZZZ_[HDQ]")>; 2382 2383// Predicate counting vector 2384def : InstRW<[V2Write_2cyc_1V], (instregex "^([SU]Q)?(DEC|INC)[HWD]_ZPiI")>; 2385 2386// Reciprocal estimate 2387def : InstRW<[V2Write_4cyc_2V02], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; 2388 2389// Reduction, arithmetic, B form 2390def : InstRW<[V2Write_9cyc_2V_4V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 2391 2392// Reduction, arithmetic, H form 2393def : InstRW<[V2Write_8cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 2394 2395// Reduction, arithmetic, S form 2396def : InstRW<[V2Write_6cyc_2V_2V13], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 2397 2398// Reduction, arithmetic, D form 2399def : InstRW<[V2Write_4cyc_2V], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 2400 2401// Reduction, logical 2402def : InstRW<[V2Write_6cyc_1V_1V13], (instregex "^(AND|EOR|OR)V_VPZ_[BHSD]")>; 2403 2404// Reverse, vector 2405def : InstRW<[V2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]", 2406 "^REVB_ZPmZ_[HSD]", 2407 "^REVH_ZPmZ_[SD]", 2408 "^REVW_ZPmZ_D")>; 2409 2410// Select, vector form 2411def : InstRW<[V2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]")>; 2412 2413// Table lookup 2414def : InstRW<[V2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]")>; 2415 2416// Table lookup extension 2417def : InstRW<[V2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]")>; 2418 2419// Transpose, vector form 2420def : InstRW<[V2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]")>; 2421 2422// Unpack and extend 2423def : InstRW<[V2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]")>; 2424 2425// Zip/unzip 2426def : InstRW<[V2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]")>; 2427 2428// SVE floating-point instructions 2429// ----------------------------------------------------------------------------- 2430 2431// Floating point absolute value/difference 2432def : InstRW<[V2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]", 2433 "^FABD_ZPZZ_[HSD]", 2434 "^FABS_ZPmZ_[HSD]")>; 2435 2436// Floating point arithmetic 2437def : InstRW<[V2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", 2438 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", 2439 "^FADDP_ZPmZZ_[HSD]", 2440 "^FNEG_ZPmZ_[HSD]", 2441 "^FSUBR_ZPm[IZ]_[HSD]", 2442 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; 2443 2444// Floating point associative add, F16 2445def : InstRW<[V2Write_10cyc_1V1_9rc], (instrs FADDA_VPZ_H)>; 2446 2447// Floating point associative add, F32 2448def : InstRW<[V2Write_6cyc_1V1_5rc], (instrs FADDA_VPZ_S)>; 2449 2450// Floating point associative add, F64 2451def : InstRW<[V2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 2452 2453// Floating point compare 2454def : InstRW<[V2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]", 2455 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]", 2456 "^FCM(LE|LT)_PPzZ0_[HSD]", 2457 "^FCMUO_PPzZZ_[HSD]")>; 2458 2459// Floating point complex add 2460def : InstRW<[V2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]")>; 2461 2462// Floating point complex multiply add 2463def : InstRW<[V2Wr_ZFCMA, ReadDefault, V2Rd_ZFCMA], (instregex "^FCMLA_ZPmZZ_[HSD]")>; 2464def : InstRW<[V2Wr_ZFCMA, V2Rd_ZFCMA], (instregex "^FCMLA_ZZZI_[HS]")>; 2465 2466// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 2467def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", 2468 "^FCVTLT_ZPmZ_HtoS", 2469 "^FCVTNT_ZPmZ_StoH")>; 2470 2471// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 2472// or F64 to F16) 2473def : InstRW<[V2Write_3cyc_1V02], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", 2474 "^FCVTLT_ZPmZ_StoD", 2475 "^FCVTNT_ZPmZ_DtoS")>; 2476 2477// Floating point convert, round to odd 2478def : InstRW<[V2Write_3cyc_1V02], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 2479 2480// Floating point base2 log, F16 2481def : InstRW<[V2Write_6cyc_4V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; 2482 2483// Floating point base2 log, F32 2484def : InstRW<[V2Write_4cyc_2V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; 2485 2486// Floating point base2 log, F64 2487def : InstRW<[V2Write_3cyc_1V02], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; 2488 2489// Floating point convert to integer, F16 2490def : InstRW<[V2Write_6cyc_4V02], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; 2491 2492// Floating point convert to integer, F32 2493def : InstRW<[V2Write_4cyc_2V02], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; 2494 2495// Floating point convert to integer, F64 2496def : InstRW<[V2Write_3cyc_1V02], 2497 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; 2498 2499// Floating point copy 2500def : InstRW<[V2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]", 2501 "^FDUP_ZI_[HSD]")>; 2502 2503// Floating point divide, F16 2504def : InstRW<[V2Write_13cyc_1V02_12rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; 2505 2506// Floating point divide, F32 2507def : InstRW<[V2Write_10cyc_1V02_9rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; 2508 2509// Floating point divide, F64 2510def : InstRW<[V2Write_15cyc_1V02_14rc], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; 2511 2512// Floating point min/max pairwise 2513def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; 2514 2515// Floating point min/max 2516def : InstRW<[V2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", 2517 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; 2518 2519// Floating point multiply 2520def : InstRW<[V2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", 2521 "^FMULX_ZPZZ_[HSD]", 2522 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", 2523 "^FMUL_ZPZ[IZ]_[HSD]")>; 2524 2525// Floating point multiply accumulate 2526def : InstRW<[V2Wr_ZFMA, ReadDefault, V2Rd_ZFMA], 2527 (instregex "^FN?ML[AS]_ZPmZZ_[HSD]", 2528 "^FN?(MAD|MSB)_ZPmZZ_[HSD]")>; 2529def : InstRW<[V2Wr_ZFMA, V2Rd_ZFMA], 2530 (instregex "^FML[AS]_ZZZI_[HSD]", 2531 "^FN?ML[AS]_ZPZZZ_[HSD]")>; 2532 2533// Floating point multiply add/sub accumulate long 2534def : InstRW<[V2Wr_ZFMAL, V2Rd_ZFMAL], (instregex "^FML[AS]L[BT]_ZZZI?_SHH")>; 2535 2536// Floating point reciprocal estimate, F16 2537def : InstRW<[V2Write_6cyc_4V02], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>; 2538 2539// Floating point reciprocal estimate, F32 2540def : InstRW<[V2Write_4cyc_2V02], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>; 2541 2542// Floating point reciprocal estimate, F64 2543def : InstRW<[V2Write_3cyc_1V02], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>; 2544 2545// Floating point reciprocal step 2546def : InstRW<[V2Write_4cyc_1V], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]")>; 2547 2548// Floating point reduction, F16 2549def : InstRW<[V2Write_8cyc_4V], 2550 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H")>; 2551 2552// Floating point reduction, F32 2553def : InstRW<[V2Write_6cyc_3V], 2554 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S")>; 2555 2556// Floating point reduction, F64 2557def : InstRW<[V2Write_4cyc_2V], 2558 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D")>; 2559 2560// Floating point round to integral, F16 2561def : InstRW<[V2Write_6cyc_4V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; 2562 2563// Floating point round to integral, F32 2564def : InstRW<[V2Write_4cyc_2V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; 2565 2566// Floating point round to integral, F64 2567def : InstRW<[V2Write_3cyc_1V02], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; 2568 2569// Floating point square root, F16 2570def : InstRW<[V2Write_13cyc_1V0_12rc], (instregex "^FSQRT_ZPmZ_H", "^FSQRT_ZPmZ_H")>; 2571 2572// Floating point square root, F32 2573def : InstRW<[V2Write_10cyc_1V0_9rc], (instregex "^FSQRT_ZPmZ_S", "^FSQRT_ZPmZ_S")>; 2574 2575// Floating point square root, F64 2576def : InstRW<[V2Write_16cyc_1V0_14rc], (instregex "^FSQRT_ZPmZ_D", "^FSQRT_ZPmZ_D")>; 2577 2578// Floating point trigonometric exponentiation 2579def : InstRW<[V2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]")>; 2580 2581// Floating point trigonometric multiply add 2582def : InstRW<[V2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]")>; 2583 2584// Floating point trigonometric, miscellaneous 2585def : InstRW<[V2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]")>; 2586 2587// SVE BFloat16 (BF16) instructions 2588// ----------------------------------------------------------------------------- 2589 2590// Convert, F32 to BF16 2591def : InstRW<[V2Write_4cyc_1V02], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2592 2593// Dot product 2594def : InstRW<[V2Wr_ZBFDOT, V2Rd_ZBFDOT], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2595 2596// Matrix multiply accumulate 2597def : InstRW<[V2Wr_ZBFMMA, V2Rd_ZBFMMA], (instrs BFMMLA_ZZZ)>; 2598 2599// Multiply accumulate long 2600def : InstRW<[V2Wr_ZBFMAL, V2Rd_ZBFMAL], (instregex "^BFMLAL[BT]_ZZZI?")>; 2601 2602// SVE Load instructions 2603// ----------------------------------------------------------------------------- 2604 2605// Load vector 2606def : InstRW<[V2Write_6cyc_1L], (instrs LDR_ZXI)>; 2607 2608// Load predicate 2609def : InstRW<[V2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2610 2611// Contiguous load, scalar + imm 2612def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$", 2613 "^LD1S?B_[HSD]_IMM$", 2614 "^LD1S?H_[SD]_IMM$", 2615 "^LD1S?W_D_IMM$" )>; 2616// Contiguous load, scalar + scalar 2617def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1[BHWD]$", 2618 "^LD1S?B_[HSD]$", 2619 "^LD1S?H_[SD]$", 2620 "^LD1S?W_D$" )>; 2621 2622// Contiguous load broadcast, scalar + imm 2623def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2624 "^LD1RS?B_[HSD]_IMM$", 2625 "^LD1RS?H_[SD]_IMM$", 2626 "^LD1RW_D_IMM$", 2627 "^LD1RSW_IMM$", 2628 "^LD1RQ_[BHWD]_IMM$")>; 2629 2630// Contiguous load broadcast, scalar + scalar 2631def : InstRW<[V2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2632 2633// Non temporal load, scalar + imm 2634// Non temporal load, scalar + scalar 2635def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZR[IR]$")>; 2636 2637// Non temporal gather load, vector + scalar 32-bit element size 2638def : InstRW<[V2Write_9cyc_2L_4V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", 2639 "^LDNT1S[BH]_ZZR_S_REAL$")>; 2640 2641// Non temporal gather load, vector + scalar 64-bit element size 2642def : InstRW<[V2Write_9cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; 2643def : InstRW<[V2Write_9cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; 2644 2645// Contiguous first faulting load, scalar + scalar 2646def : InstRW<[V2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", 2647 "^LDFF1S?B_[HSD]_REAL$", 2648 "^LDFF1S?H_[SD]_REAL$", 2649 "^LDFF1S?W_D_REAL$")>; 2650 2651// Contiguous non faulting load, scalar + imm 2652def : InstRW<[V2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", 2653 "^LDNF1S?B_[HSD]_IMM_REAL$", 2654 "^LDNF1S?H_[SD]_IMM_REAL$", 2655 "^LDNF1S?W_D_IMM_REAL$")>; 2656 2657// Contiguous Load two structures to two vectors, scalar + imm 2658def : InstRW<[V2Write_8cyc_2L_2V], (instregex "^LD2[BHWD]_IMM$")>; 2659 2660// Contiguous Load two structures to two vectors, scalar + scalar 2661def : InstRW<[V2Write_9cyc_2L_2V_2S], (instregex "^LD2[BHWD]$")>; 2662 2663// Contiguous Load three structures to three vectors, scalar + imm 2664def : InstRW<[V2Write_9cyc_3L_3V], (instregex "^LD3[BHWD]_IMM$")>; 2665 2666// Contiguous Load three structures to three vectors, scalar + scalar 2667def : InstRW<[V2Write_10cyc_3V_3L_3S], (instregex "^LD3[BHWD]$")>; 2668 2669// Contiguous Load four structures to four vectors, scalar + imm 2670def : InstRW<[V2Write_9cyc_4L_8V], (instregex "^LD4[BHWD]_IMM$")>; 2671 2672// Contiguous Load four structures to four vectors, scalar + scalar 2673def : InstRW<[V2Write_10cyc_4L_8V_4S], (instregex "^LD4[BHWD]$")>; 2674 2675// Gather load, vector + imm, 32-bit element size 2676def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 2677 "^GLD(FF)?1W_IMM_REAL$")>; 2678 2679// Gather load, vector + imm, 64-bit element size 2680def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 2681 "^GLD(FF)?1D_IMM_REAL$")>; 2682 2683// Gather load, 32-bit scaled offset 2684def : InstRW<[V2Write_10cyc_1L_8V], 2685 (instregex "^GLD(FF)?1S?H_S_[SU]XTW_SCALED_REAL$", 2686 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 2687 2688// Gather load, 64-bit scaled offset 2689// NOTE: These instructions are not specified in the SOG. 2690def : InstRW<[V2Write_10cyc_1L_4V], 2691 (instregex "^GLD(FF)?1S?[HW]_D_([SU]XTW_)?SCALED_REAL$", 2692 "^GLD(FF)?1D_([SU]XTW_)?SCALED_REAL$")>; 2693 2694// Gather load, 32-bit unpacked unscaled offset 2695def : InstRW<[V2Write_9cyc_1L_4V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 2696 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 2697 2698// Gather load, 64-bit unpacked unscaled offset 2699// NOTE: These instructions are not specified in the SOG. 2700def : InstRW<[V2Write_9cyc_1L_2V], 2701 (instregex "^GLD(FF)?1S?[BHW]_D_([SU]XTW_)?REAL$", 2702 "^GLD(FF)?1D_([SU]XTW_)?REAL$")>; 2703 2704// SVE Store instructions 2705// ----------------------------------------------------------------------------- 2706 2707// Store from predicate reg 2708def : InstRW<[V2Write_1cyc_1L01], (instrs STR_PXI)>; 2709 2710// Store from vector reg 2711def : InstRW<[V2Write_2cyc_1L01_1V01], (instrs STR_ZXI)>; 2712 2713// Contiguous store, scalar + imm 2714def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BHWD]_IMM$", 2715 "^ST1B_[HSD]_IMM$", 2716 "^ST1H_[SD]_IMM$", 2717 "^ST1W_D_IMM$")>; 2718 2719// Contiguous store, scalar + scalar 2720def : InstRW<[V2Write_2cyc_1L01_1S_1V01], (instregex "^ST1H(_[SD])?$")>; 2721def : InstRW<[V2Write_2cyc_1L01_1V01], (instregex "^ST1[BWD]$", 2722 "^ST1B_[HSD]$", 2723 "^ST1W_D$")>; 2724 2725// Contiguous store two structures from two vectors, scalar + imm 2726def : InstRW<[V2Write_4cyc_1L01_1V01], (instregex "^ST2[BHWD]_IMM$")>; 2727 2728// Contiguous store two structures from two vectors, scalar + scalar 2729def : InstRW<[V2Write_4cyc_2L01_2S_2V01], (instrs ST2H)>; 2730def : InstRW<[V2Write_4cyc_2L01_2V01], (instregex "^ST2[BWD]$")>; 2731 2732// Contiguous store three structures from three vectors, scalar + imm 2733def : InstRW<[V2Write_7cyc_9L01_9V01], (instregex "^ST3[BHWD]_IMM$")>; 2734 2735// Contiguous store three structures from three vectors, scalar + scalar 2736def : InstRW<[V2Write_7cyc_9L01_9S_9V01], (instregex "^ST3[BHWD]$")>; 2737 2738// Contiguous store four structures from four vectors, scalar + imm 2739def : InstRW<[V2Write_11cyc_18L01_18V01], (instregex "^ST4[BHWD]_IMM$")>; 2740 2741// Contiguous store four structures from four vectors, scalar + scalar 2742def : InstRW<[V2Write_11cyc_18L01_18S_18V01], (instregex "^ST4[BHWD]$")>; 2743 2744// Non temporal store, scalar + imm 2745def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2746 2747// Non temporal store, scalar + scalar 2748def : InstRW<[V2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2749def : InstRW<[V2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2750 2751// Scatter non temporal store, vector + scalar 32-bit element size 2752def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^STNT1[BHW]_ZZR_S")>; 2753 2754// Scatter non temporal store, vector + scalar 64-bit element size 2755def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^STNT1[BHWD]_ZZR_D")>; 2756 2757// Scatter store vector + imm 32-bit element size 2758def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_IMM$", 2759 "^SST1W_IMM$")>; 2760 2761// Scatter store vector + imm 64-bit element size 2762def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_IMM$", 2763 "^SST1D_IMM$")>; 2764 2765// Scatter store, 32-bit scaled offset 2766def : InstRW<[V2Write_4cyc_4L01_4V01], 2767 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2768 2769// Scatter store, 32-bit unpacked unscaled offset 2770def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D_[SU]XTW$", 2771 "^SST1D_[SU]XTW$")>; 2772 2773// Scatter store, 32-bit unpacked scaled offset 2774def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2775 "^SST1D_[SU]XTW_SCALED$")>; 2776 2777// Scatter store, 32-bit unscaled offset 2778def : InstRW<[V2Write_4cyc_4L01_4V01], (instregex "^SST1[BH]_S_[SU]XTW$", 2779 "^SST1W_[SU]XTW$")>; 2780 2781// Scatter store, 64-bit scaled offset 2782def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[HW]_D_SCALED$", 2783 "^SST1D_SCALED$")>; 2784 2785// Scatter store, 64-bit unscaled offset 2786def : InstRW<[V2Write_2cyc_2L01_2V01], (instregex "^SST1[BHW]_D$", 2787 "^SST1D$")>; 2788 2789// SVE Miscellaneous instructions 2790// ----------------------------------------------------------------------------- 2791 2792// Read first fault register, unpredicated 2793def : InstRW<[V2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; 2794 2795// Read first fault register, predicated 2796def : InstRW<[V2Write_3or4cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; 2797 2798// Read first fault register and set flags 2799def : InstRW<[V2Write_4or5cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2800 2801// Set first fault register 2802// Write to first fault register 2803def : InstRW<[V2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2804 2805// Prefetch 2806// NOTE: This is not specified in the SOG. 2807def : InstRW<[V2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2808 2809// SVE Cryptographic instructions 2810// ----------------------------------------------------------------------------- 2811 2812// Crypto AES ops 2813def : InstRW<[V2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2814 "^AESI?MC_ZZ_B$")>; 2815 2816// Crypto SHA3 ops 2817def : InstRW<[V2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2818 "^RAX1_ZZZ_D$", 2819 "^XAR_ZZZI_[BHSD]$")>; 2820 2821// Crypto SM4 ops 2822def : InstRW<[V2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2823 2824} 2825