1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse N2 processors. 10// 11//===----------------------------------------------------------------------===// 12 13def NeoverseN2Model : SchedMachineModel { 14 let IssueWidth = 10; // Micro-ops dispatched at a time. 15 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. 16 let LoadLatency = 4; // Optimistic load latency. 17 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. 18 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 19 let CompleteModel = 1; 20 21 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, 22 [HasSVE2p1, HasPAuthLR, HasCPA]); 23} 24 25//===----------------------------------------------------------------------===// 26// Define each kind of processor resource and number available on Neoverse N2. 27// Instructions are first fetched and then decoded into internal macro-ops 28// (MOPs). From there, the MOPs proceed through register renaming and dispatch 29// stages. A MOP can be split into two micro-ops further down the pipeline 30// after the decode stage. Once dispatched, micro-ops wait for their operands 31// and issue out-of-order to one of thirteen issue pipelines. Each issue 32// pipeline can accept one micro-op per cycle. 33 34let SchedModel = NeoverseN2Model in { 35 36// Define the (13) issue ports. 37def N2UnitB : ProcResource<2>; // Branch 0/1 38def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1 39def N2UnitM0 : ProcResource<1>; // Integer multicycle 0 40def N2UnitM1 : ProcResource<1>; // Integer multicycle 1 41def N2UnitL01 : ProcResource<2>; // Load/Store 0/1 42def N2UnitL2 : ProcResource<1>; // Load 2 43def N2UnitD : ProcResource<2>; // Store data 0/1 44def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0 45def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1 46 47def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1 48def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1 49def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2 50def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1 51 52// Define commonly used read types. 53 54// No forwarding is provided for these types. 55def : ReadAdvance<ReadI, 0>; 56def : ReadAdvance<ReadISReg, 0>; 57def : ReadAdvance<ReadIEReg, 0>; 58def : ReadAdvance<ReadIM, 0>; 59def : ReadAdvance<ReadIMA, 0>; 60def : ReadAdvance<ReadID, 0>; 61def : ReadAdvance<ReadExtrHi, 0>; 62def : ReadAdvance<ReadAdrBase, 0>; 63def : ReadAdvance<ReadST, 0>; 64def : ReadAdvance<ReadVLD, 0>; 65 66def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 67def : WriteRes<WriteBarrier, []> { let Latency = 1; } 68def : WriteRes<WriteHint, []> { let Latency = 1; } 69def : WriteRes<WriteLDHi, []> { let Latency = 4; } 70 71//===----------------------------------------------------------------------===// 72// Define customized scheduler read/write types specific to the Neoverse N2. 73 74//===----------------------------------------------------------------------===// 75// Define generic 1 micro-op types 76 77def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; } 78def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; } 79def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; } 80def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; } 81def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; } 82def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; } 83def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; } 84def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2; 85 let ReleaseAtCycles = [2]; } 86def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3; 87 let ReleaseAtCycles = [3]; } 88def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5; 89 let ReleaseAtCycles = [5]; } 90def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12; 91 let ReleaseAtCycles = [12]; } 92def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20; 93 let ReleaseAtCycles = [20]; } 94def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; } 95def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; } 96def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; } 97def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; } 98def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; } 99def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; } 100def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; } 101def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; } 102def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; } 103def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; } 104def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7; 105 let ReleaseAtCycles = [7]; } 106def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; } 107def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; } 108def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; } 109def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; } 110def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; } 111def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; } 112def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; } 113def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; } 114def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; } 115def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; } 116def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; } 117def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; } 118def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; } 119 120//===----------------------------------------------------------------------===// 121// Define generic 2 micro-op types 122 123def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> { 124 let Latency = 1; 125 let NumMicroOps = 2; 126} 127 128def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> { 129 let Latency = 6; 130 let NumMicroOps = 2; 131} 132 133def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> { 134 let Latency = 9; 135 let NumMicroOps = 2; 136} 137 138def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> { 139 let Latency = 3; 140 let NumMicroOps = 2; 141} 142 143def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 144 let Latency = 4; 145 let NumMicroOps = 2; 146} 147 148def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 149 let Latency = 5; 150 let NumMicroOps = 2; 151} 152 153def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 154 let Latency = 6; 155 let NumMicroOps = 2; 156} 157 158def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 159 let Latency = 7; 160 let NumMicroOps = 2; 161} 162 163def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> { 164 let Latency = 1; 165 let NumMicroOps = 2; 166} 167 168def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> { 169 let Latency = 5; 170 let NumMicroOps = 2; 171} 172 173def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 174 let Latency = 2; 175 let NumMicroOps = 2; 176} 177 178def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> { 179 let Latency = 4; 180 let NumMicroOps = 2; 181} 182 183def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 184 let Latency = 4; 185 let NumMicroOps = 2; 186} 187 188def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 189 let Latency = 10; 190 let NumMicroOps = 2; 191 let ReleaseAtCycles = [5, 5]; 192} 193 194def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 195 let Latency = 13; 196 let NumMicroOps = 2; 197 let ReleaseAtCycles = [6, 7]; 198} 199 200def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 201 let Latency = 15; 202 let NumMicroOps = 2; 203 let ReleaseAtCycles = [7, 8]; 204} 205 206def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 207 let Latency = 16; 208 let NumMicroOps = 2; 209 let ReleaseAtCycles = [8, 8]; 210} 211 212def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 213 let Latency = 4; 214 let NumMicroOps = 2; 215} 216 217def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 218 let Latency = 6; 219 let NumMicroOps = 2; 220} 221 222def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> { 223 let Latency = 6; 224 let NumMicroOps = 2; 225} 226 227def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 228 let Latency = 8; 229 let NumMicroOps = 2; 230} 231 232def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 233 let Latency = 4; 234 let NumMicroOps = 2; 235} 236 237def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 238 let Latency = 3; 239 let NumMicroOps = 2; 240} 241 242def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 243 let Latency = 2; 244 let NumMicroOps = 2; 245} 246 247def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 248 let Latency = 6; 249 let NumMicroOps = 2; 250} 251 252def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 253 let Latency = 4; 254 let NumMicroOps = 2; 255} 256 257def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 258 let Latency = 5; 259 let NumMicroOps = 2; 260} 261 262def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> { 263 let Latency = 5; 264 let NumMicroOps = 2; 265} 266 267def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> { 268 let Latency = 7; 269 let NumMicroOps = 2; 270} 271 272def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 273 let Latency = 2; 274 let NumMicroOps = 2; 275} 276 277def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> { 278 let Latency = 6; 279 let NumMicroOps = 2; 280} 281 282def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> { 283 let Latency = 6; 284 let NumMicroOps = 2; 285} 286 287def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> { 288 let Latency = 6; 289 let NumMicroOps = 2; 290} 291 292def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 293 let Latency = 9; 294 let NumMicroOps = 2; 295} 296 297def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 298 let Latency = 4; 299 let NumMicroOps = 2; 300} 301 302//===----------------------------------------------------------------------===// 303// Define generic 3 micro-op types 304 305def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> { 306 let Latency = 1; 307 let NumMicroOps = 3; 308} 309 310def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> { 311 let Latency = 2; 312 let NumMicroOps = 3; 313} 314 315def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> { 316 let Latency = 2; 317 let NumMicroOps = 3; 318} 319 320def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> { 321 let Latency = 7; 322 let NumMicroOps = 3; 323} 324 325def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> { 326 let Latency = 8; 327 let NumMicroOps = 3; 328} 329 330def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> { 331 let Latency = 10; 332 let NumMicroOps = 3; 333} 334 335def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 336 let Latency = 2; 337 let NumMicroOps = 3; 338} 339 340def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 341 let Latency = 4; 342 let NumMicroOps = 3; 343} 344 345def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> { 346 let Latency = 6; 347 let NumMicroOps = 3; 348} 349 350def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> { 351 let Latency = 8; 352 let NumMicroOps = 3; 353} 354 355//===----------------------------------------------------------------------===// 356// Define generic 4 micro-op types 357 358def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 359 N2UnitI]> { 360 let Latency = 2; 361 let NumMicroOps = 4; 362} 363 364def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> { 365 let Latency = 6; 366 let NumMicroOps = 4; 367} 368 369def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 370 let Latency = 4; 371 let NumMicroOps = 4; 372} 373 374def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 375 let Latency = 6; 376 let NumMicroOps = 4; 377} 378 379def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 380 let Latency = 8; 381 let NumMicroOps = 4; 382} 383 384def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 385 let Latency = 9; 386 let NumMicroOps = 4; 387} 388 389def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 390 N2UnitV]> { 391 let Latency = 2; 392 let NumMicroOps = 4; 393} 394 395def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 396 N2UnitV]> { 397 let Latency = 4; 398 let NumMicroOps = 4; 399} 400 401def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 402 N2UnitV]> { 403 let Latency = 5; 404 let NumMicroOps = 4; 405} 406 407def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0, 408 N2UnitV0]> { 409 let Latency = 8; 410 let NumMicroOps = 4; 411} 412 413def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 414 N2UnitV1]> { 415 let Latency = 11; 416 let NumMicroOps = 4; 417} 418 419def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 420 N2UnitV1]> { 421 let Latency = 9; 422 let NumMicroOps = 4; 423} 424 425def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 426 N2UnitV1]> { 427 let Latency = 8; 428 let NumMicroOps = 4; 429} 430 431def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 432 N2UnitV1]> { 433 let Latency = 10; 434 let NumMicroOps = 4; 435} 436 437def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 438 let Latency = 10; 439 let NumMicroOps = 4; 440} 441 442def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM, 443 N2UnitM]> { 444 let Latency = 4; 445 let NumMicroOps = 4; 446} 447 448def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> { 449 let Latency = 6; 450 let NumMicroOps = 4; 451} 452 453def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> { 454 let Latency = 7; 455 let NumMicroOps = 4; 456} 457 458//===----------------------------------------------------------------------===// 459// Define generic 5 micro-op types 460 461def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 462 N2UnitI, N2UnitI]> { 463 let Latency = 2; 464 let NumMicroOps = 5; 465} 466 467def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 468 N2UnitV]> { 469 let Latency = 8; 470 let NumMicroOps = 5; 471} 472 473//===----------------------------------------------------------------------===// 474// Define generic 6 micro-op types 475 476def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 477 N2UnitV, N2UnitV, N2UnitV]> { 478 let Latency = 8; 479 let NumMicroOps = 6; 480} 481 482def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 483 N2UnitV, N2UnitV, N2UnitV]> { 484 let Latency = 2; 485 let NumMicroOps = 6; 486} 487 488def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 489 N2UnitV, N2UnitV, N2UnitV]> { 490 let Latency = 6; 491 let NumMicroOps = 6; 492} 493 494def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 495 N2UnitV, N2UnitV, N2UnitV]> { 496 let Latency = 4; 497 let NumMicroOps = 6; 498} 499 500def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 501 N2UnitS, N2UnitS]> { 502 let Latency = 10; 503 let NumMicroOps = 6; 504} 505 506//===----------------------------------------------------------------------===// 507// Define generic 7 micro-op types 508 509def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 510 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 511 let Latency = 8; 512 let NumMicroOps = 7; 513} 514 515//===----------------------------------------------------------------------===// 516// Define generic 8 micro-op types 517 518def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV, 519 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 520 let Latency = 6; 521 let NumMicroOps = 8; 522} 523 524def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 525 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 526 N2UnitV]> { 527 let Latency = 2; 528 let NumMicroOps = 8; 529} 530 531def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 532 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 533 N2UnitV]> { 534 let Latency = 5; 535 let NumMicroOps = 8; 536} 537 538def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 539 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 540 let Latency = 8; 541 let NumMicroOps = 8; 542} 543 544def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 545 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 546 let Latency = 9; 547 let NumMicroOps = 8; 548} 549 550//===----------------------------------------------------------------------===// 551// Define generic 10 micro-op types 552 553def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 554 N2UnitL01, N2UnitL01, N2UnitV, 555 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 556 let Latency = 7; 557 let NumMicroOps = 10; 558} 559 560//===----------------------------------------------------------------------===// 561// Define generic 12 micro-op types 562 563def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 564 N2UnitL01, N2UnitL01, N2UnitL01, 565 N2UnitV, N2UnitV, N2UnitV, N2UnitV, 566 N2UnitV, N2UnitV]> { 567 let Latency = 7; 568 let NumMicroOps = 12; 569} 570 571//===----------------------------------------------------------------------===// 572// Define generic 15 micro-op types 573 574def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 575 N2UnitL01, N2UnitL01, N2UnitS, 576 N2UnitS, N2UnitS, N2UnitS, 577 N2UnitS, N2UnitV, N2UnitV, 578 N2UnitV, N2UnitV, N2UnitV]> { 579 let Latency = 7; 580 let NumMicroOps = 15; 581} 582 583//===----------------------------------------------------------------------===// 584// Define generic 18 micro-op types 585 586def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 587 N2UnitL01, N2UnitL01, N2UnitL01, 588 N2UnitL01, N2UnitL01, N2UnitL01, 589 N2UnitV, N2UnitV, N2UnitV, 590 N2UnitV, N2UnitV, N2UnitV, 591 N2UnitV, N2UnitV, N2UnitV]> { 592 let Latency = 11; 593 let NumMicroOps = 18; 594} 595 596//===----------------------------------------------------------------------===// 597// Define generic 27 micro-op types 598 599def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 600 N2UnitL01, N2UnitL01, N2UnitL01, 601 N2UnitL01, N2UnitL01, N2UnitL01, 602 N2UnitS, N2UnitS, N2UnitS, 603 N2UnitS, N2UnitS, N2UnitS, 604 N2UnitS, N2UnitS, N2UnitS, 605 N2UnitV, N2UnitV, N2UnitV, 606 N2UnitV, N2UnitV, N2UnitV, 607 N2UnitV, N2UnitV, N2UnitV]> { 608 let Latency = 11; 609 let NumMicroOps = 27; 610} 611 612//===----------------------------------------------------------------------===// 613// Define types for arithmetic and logical ops with short shifts 614def N2Write_Arith : SchedWriteVariant<[ 615 SchedVar<IsCheapLSL, [N2Write_1cyc_1I]>, 616 SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>; 617 618def N2Write_Logical: SchedWriteVariant<[ 619 SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>, 620 SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>; 621 622// Miscellaneous 623// ----------------------------------------------------------------------------- 624 625def : InstRW<[WriteI], (instrs COPY)>; 626 627// Branch Instructions 628// ----------------------------------------------------------------------------- 629 630// Branch, immed 631// Compare and branch 632def : SchedAlias<WriteBr, N2Write_1cyc_1B>; 633 634// Branch, register 635def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>; 636 637// Branch and link, immed 638// Branch and link, register 639def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>; 640 641// Arithmetic and Logical Instructions 642// ----------------------------------------------------------------------------- 643 644// ALU, basic 645// ALU, basic, flagset 646def : SchedAlias<WriteI, N2Write_1cyc_1I>; 647 648// ALU, extend and shift 649def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>; 650 651// Arithmetic, LSL shift, shift <= 4 652// Arithmetic, flagset, LSL shift, shift <= 4 653// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 654def : SchedAlias<WriteISReg, N2Write_Arith>; 655 656// Logical, shift, no flagset 657def : InstRW<[N2Write_1cyc_1I], 658 (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 659 660// Logical, shift, flagset 661def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; 662 663// Arithmetic, immediate to logical address tag 664def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>; 665 666// Convert floating-point condition flags 667// Flag manipulation instructions 668def : WriteRes<WriteSys, []> { let Latency = 1; } 669 670// Insert Random Tags 671def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>; 672 673// Insert Tag Mask 674// Subtract Pointer 675// Subtract Pointer, flagset 676def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 677 678// Move and shift instructions 679// ----------------------------------------------------------------------------- 680 681def : SchedAlias<WriteImm, N2Write_1cyc_1I>; 682 683// Divide and Multiply Instructions 684// ----------------------------------------------------------------------------- 685 686// SDIV, UDIV 687def : SchedAlias<WriteID32, N2Write_12cyc_1M0>; 688def : SchedAlias<WriteID64, N2Write_20cyc_1M0>; 689 690def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; } 691def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; } 692 693// Multiply high 694def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 695 696// Pointer Authentication Instructions (v8.3 PAC) 697// ----------------------------------------------------------------------------- 698 699// Authenticate data address 700// Authenticate instruction address 701// Compute pointer authentication code for data address 702// Compute pointer authentication code, using generic key 703// Compute pointer authentication code for instruction address 704def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 705 706// Branch and link, register, with pointer authentication 707// Branch, register, with pointer authentication 708// Branch, return, with pointer authentication 709def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 710 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 711 ERETAA, ERETAB)>; 712 713 714// Load register, with pointer authentication 715def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 716 717// Strip pointer authentication code 718def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 719 720// Miscellaneous data-processing instructions 721// ----------------------------------------------------------------------------- 722 723// Bitfield extract, one reg 724// Bitfield extract, two regs 725// NOTE: We don't model the difference between EXTR where both operands are the 726// same (one reg). 727def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>; 728def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>; 729 730// Bitfield move, basic 731def : SchedAlias<WriteIS, N2Write_1cyc_1I>; 732 733// Bitfield move, insert 734def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 735 736// Load instructions 737// ----------------------------------------------------------------------------- 738 739def : SchedAlias<WriteLD, N2Write_4cyc_1L>; 740def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>; 741 742// Load pair, signed immed offset, signed words 743def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>; 744// Load pair, immed post-index or immed pre-index, signed words 745def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi], 746 (instregex "^LDPSW(post|pre)$")>; 747 748// Store instructions 749// ----------------------------------------------------------------------------- 750 751def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>; 752def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>; 753def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>; 754def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57. 755 756// Tag load instructions 757// ----------------------------------------------------------------------------- 758 759// Load allocation tag 760// Load multiple allocation tags 761def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>; 762 763// Tag store instructions 764// ----------------------------------------------------------------------------- 765 766// Store allocation tags to one or two granules, post-index 767// Store allocation tags to one or two granules, pre-index 768// Store allocation tag to one or two granules, zeroing, post-index 769// Store Allocation Tag to one or two granules, zeroing, pre-index 770// Store allocation tag and reg pair to memory, post-Index 771// Store allocation tag and reg pair to memory, pre-Index 772def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 773 ST2GPreIndex, ST2GPostIndex, 774 STZGPreIndex, STZGPostIndex, 775 STZ2GPreIndex, STZ2GPostIndex, 776 STGPpre, STGPpost)>; 777 778// Store allocation tags to one or two granules, signed offset 779// Store allocation tag to two granules, zeroing, signed offset 780// Store allocation tag and reg pair to memory, signed offset 781// Store multiple allocation tags 782def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi, 783 STZ2Gi, STGPi, STGM, STZGM)>; 784 785// FP data processing instructions 786// ----------------------------------------------------------------------------- 787 788// FP absolute value 789// FP arithmetic 790// FP min/max 791// FP negate 792// FP select 793def : SchedAlias<WriteF, N2Write_2cyc_1V>; 794 795// FP compare 796def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>; 797 798// FP divide, square root 799def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>; 800 801// FP divide, H-form 802def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>; 803// FP divide, S-form 804def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>; 805// FP divide, D-form 806def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>; 807 808// FP square root, H-form 809def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>; 810// FP square root, S-form 811def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>; 812// FP square root, D-form 813def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>; 814 815// FP multiply 816def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; } 817 818// FP multiply accumulate 819def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 820 821// FP round to integral 822def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$", 823 "^FRINT(32|64)[XZ][SD]r$")>; 824 825// FP miscellaneous instructions 826// ----------------------------------------------------------------------------- 827 828// FP convert, from gen to vec reg 829def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 830 831// FP convert, from vec to gen reg 832def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 833 834// FP convert, Javascript from vec to gen reg 835// FP convert, from vec to vec reg 836def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>; 837 838// FP move, immed 839// FP move, register 840def : SchedAlias<WriteFImm, N2Write_2cyc_1V>; 841 842// FP transfer, from gen to low half of vec reg 843def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, 844 FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; 845 846// FP transfer, from gen to high half of vec reg 847def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 848 849// FP transfer, from vec to gen reg 850def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>; 851 852// FP load instructions 853// ----------------------------------------------------------------------------- 854 855// Load vector reg, literal, S/D/Q forms 856// Load vector reg, unscaled immed 857def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$", 858 "^LDUR[BHSDQ]i$")>; 859 860// Load vector reg, immed post-index 861def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>; 862// Load vector reg, immed pre-index 863def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>; 864 865// Load vector reg, unsigned immed 866def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 867 868// Load vector reg, register offset, basic 869// Load vector reg, register offset, scale, S/D-form 870// Load vector reg, register offset, extend 871// Load vector reg, register offset, extend, scale, S/D-form 872def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 873 874// Load vector reg, register offset, scale, H/Q-form 875// Load vector reg, register offset, extend, scale, H/Q-form 876def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 877 878// Load vector pair, immed offset, S/D-form 879def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 880 881// Load vector pair, immed offset, Q-form 882def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 883 884// Load vector pair, immed post-index, S/D-form 885// Load vector pair, immed pre-index, S/D-form 886def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi], 887 (instregex "^LDP[SD](pre|post)$")>; 888 889// Load vector pair, immed post-index, Q-form 890// Load vector pair, immed pre-index, Q-form 891def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost, 892 LDPQpre)>; 893 894// FP store instructions 895// ----------------------------------------------------------------------------- 896 897// Store vector reg, unscaled immed, B/H/S/D-form 898// Store vector reg, unscaled immed, Q-form 899def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>; 900 901// Store vector reg, immed post-index, B/H/S/D-form 902// Store vector reg, immed post-index, Q-form 903// Store vector reg, immed pre-index, B/H/S/D-form 904// Store vector reg, immed pre-index, Q-form 905def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase], 906 (instregex "^STR[BHSDQ](pre|post)$")>; 907 908// Store vector reg, unsigned immed, B/H/S/D-form 909// Store vector reg, unsigned immed, Q-form 910def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>; 911 912// Store vector reg, register offset, basic, B/H/S/D-form 913// Store vector reg, register offset, basic, Q-form 914// Store vector reg, register offset, scale, S/D-form 915// Store vector reg, register offset, extend, B/H/S/D-form 916// Store vector reg, register offset, extend, Q-form 917// Store vector reg, register offset, extend, scale, S/D-form 918def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 919 (instregex "^STR[BSD]ro[WX]$")>; 920 921// Store vector reg, register offset, scale, H-form 922// Store vector reg, register offset, scale, Q-form 923// Store vector reg, register offset, extend, scale, H-form 924// Store vector reg, register offset, extend, scale, Q-form 925def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 926 (instregex "^STR[HQ]ro[WX]$")>; 927 928// Store vector pair, immed offset, S-form 929// Store vector pair, immed offset, D-form 930def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>; 931 932// Store vector pair, immed offset, Q-form 933def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>; 934 935// Store vector pair, immed post-index, S-form 936// Store vector pair, immed post-index, D-form 937// Store vector pair, immed pre-index, S-form 938// Store vector pair, immed pre-index, D-form 939def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I], 940 (instregex "^STP[SD](pre|post)$")>; 941 942// Store vector pair, immed post-index, Q-form 943def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>; 944 945// Store vector pair, immed pre-index, Q-form 946def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>; 947 948// ASIMD integer instructions 949// ----------------------------------------------------------------------------- 950 951// ASIMD absolute diff 952// ASIMD absolute diff long 953// ASIMD arith, basic 954// ASIMD arith, complex 955// ASIMD arith, pair-wise 956// ASIMD compare 957// ASIMD logical 958// ASIMD max/min, basic and pair-wise 959def : SchedAlias<WriteVd, N2Write_2cyc_1V>; 960def : SchedAlias<WriteVq, N2Write_2cyc_1V>; 961 962// ASIMD absolute diff accum 963// ASIMD absolute diff accum long 964def : InstRW<[N2Write_4cyc_1V1], 965 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 966 967// ASIMD arith, reduce, 4H/4S 968def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 969 970// ASIMD arith, reduce, 8B/8H 971def : InstRW<[N2Write_4cyc_1V1_1V], 972 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 973 974// ASIMD arith, reduce, 16B 975def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v, 976 UADDLVv16i8v)>; 977 978// ASIMD dot product 979// ASIMD dot product using signed and unsigned integers 980def : InstRW<[N2Write_3cyc_1V], 981 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 982 983// ASIMD matrix multiply-accumulate 984def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>; 985 986// ASIMD max/min, reduce, 4H/4S 987def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$", 988 "^[SU](MAX|MIN)Vv4i32v$")>; 989 990// ASIMD max/min, reduce, 8B/8H 991def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 992 "^[SU](MAX|MIN)Vv8i16v$")>; 993 994// ASIMD max/min, reduce, 16B 995def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 996 997// ASIMD multiply 998def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>; 999 1000// ASIMD multiply accumulate 1001def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>; 1002 1003// ASIMD multiply accumulate high 1004def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 1005 1006// ASIMD multiply accumulate long 1007def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 1008 1009// ASIMD multiply accumulate saturating long 1010def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>; 1011 1012// ASIMD multiply/multiply long (8x8) polynomial, D-form 1013// ASIMD multiply/multiply long (8x8) polynomial, Q-form 1014def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>; 1015 1016// ASIMD multiply long 1017def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>; 1018 1019// ASIMD pairwise add and accumulate long 1020def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>; 1021 1022// ASIMD shift accumulate 1023def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>; 1024 1025// ASIMD shift by immed, basic 1026def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv", 1027 "^SSHLLv", "^SSHRv", "^USHLLv", 1028 "^USHRv")>; 1029 1030// ASIMD shift by immed and insert, basic 1031def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>; 1032 1033// ASIMD shift by immed, complex 1034def : InstRW<[N2Write_4cyc_1V1], 1035 (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv", 1036 "^(SQSHLU?|UQSHL)[bhsd]$", 1037 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1038 "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv", 1039 "^UQSHRNv", "^URSHRv")>; 1040 1041// ASIMD shift by register, basic 1042def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>; 1043 1044// ASIMD shift by register, complex 1045def : InstRW<[N2Write_4cyc_1V1], 1046 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1047 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1048 1049// ASIMD floating-point instructions 1050// ----------------------------------------------------------------------------- 1051 1052// ASIMD FP absolute value/difference 1053// ASIMD FP arith, normal 1054// ASIMD FP compare 1055// ASIMD FP complex add 1056// ASIMD FP max/min, normal 1057// ASIMD FP max/min, pairwise 1058// ASIMD FP negate 1059// Handled by SchedAlias<WriteV[dq], ...> 1060 1061// ASIMD FP complex multiply add 1062def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>; 1063 1064// ASIMD FP convert, long (F16 to F32) 1065def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>; 1066 1067// ASIMD FP convert, long (F32 to F64) 1068def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>; 1069 1070// ASIMD FP convert, narrow (F32 to F16) 1071def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>; 1072 1073// ASIMD FP convert, narrow (F64 to F32) 1074def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32", 1075 "^FCVTXN(v2|v4)f32")>; 1076 1077// ASIMD FP convert, other, D-form F32 and Q-form F64 1078def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 1079 "^[SU]CVTFv2f(32|64)$")>; 1080 1081// ASIMD FP convert, other, D-form F16 and Q-form F32 1082def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 1083 "^[SU]CVTFv4f(16|32)$")>; 1084 1085// ASIMD FP convert, other, Q-form F16 1086def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 1087 "^[SU]CVTFv8f16$")>; 1088 1089// ASIMD FP divide, D-form, F16 1090def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>; 1091 1092// ASIMD FP divide, D-form, F32 1093def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>; 1094 1095// ASIMD FP divide, Q-form, F16 1096def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>; 1097 1098// ASIMD FP divide, Q-form, F32 1099def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>; 1100 1101// ASIMD FP divide, Q-form, F64 1102def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>; 1103 1104// ASIMD FP max/min, reduce, F32 and D-form F16 1105def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1106 1107// ASIMD FP max/min, reduce, Q-form F16 1108def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1109 1110// ASIMD FP multiply 1111def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>; 1112 1113// ASIMD FP multiply accumulate 1114def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>; 1115 1116// ASIMD FP multiply accumulate long 1117def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>; 1118 1119// ASIMD FP round, D-form F32 and Q-form F64 1120def : InstRW<[N2Write_3cyc_1V0], 1121 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1122 "^FRINT[32|64)[XZ]v2f(32|64)$")>; 1123 1124// ASIMD FP round, D-form F16 and Q-form F32 1125def : InstRW<[N2Write_4cyc_2V0], 1126 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1127 "^FRINT(32|64)[XZ]v4f32$")>; 1128 1129 1130// ASIMD FP round, Q-form F16 1131def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1132 1133// ASIMD FP square root, D-form, F16 1134def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>; 1135 1136// ASIMD FP square root, D-form, F32 1137def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>; 1138 1139// ASIMD FP square root, Q-form, F16 1140def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>; 1141 1142// ASIMD FP square root, Q-form, F32 1143def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>; 1144 1145// ASIMD FP square root, Q-form, F64 1146def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>; 1147 1148// ASIMD BFloat16 (BF16) instructions 1149// ----------------------------------------------------------------------------- 1150 1151// ASIMD convert, F32 to BF16 1152def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>; 1153 1154// ASIMD dot product 1155def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1156 1157// ASIMD matrix multiply accumulate 1158def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>; 1159 1160// ASIMD multiply accumulate long 1161def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1162 BFMLALTIdx)>; 1163 1164// Scalar convert, F32 to BF16 1165def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>; 1166 1167// ASIMD miscellaneous instructions 1168// ----------------------------------------------------------------------------- 1169 1170// ASIMD bit reverse 1171// ASIMD bitwise insert 1172// ASIMD count 1173// ASIMD duplicate, element 1174// ASIMD extract 1175// ASIMD extract narrow 1176// ASIMD insert, element to element 1177// ASIMD move, FP immed 1178// ASIMD move, integer immed 1179// ASIMD reverse 1180// ASIMD table lookup, 1 or 2 table regs 1181// ASIMD table lookup extension, 1 table reg 1182// ASIMD transfer, element to gen reg 1183// ASIMD transpose 1184// ASIMD unzip/zip 1185// Handled by SchedAlias<WriteV[dq], ...> 1186 1187// ASIMD duplicate, gen reg 1188def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1189 1190// ASIMD extract narrow, saturating 1191def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1192 1193// ASIMD reciprocal and square root estimate, D-form U32 1194def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>; 1195 1196// ASIMD reciprocal and square root estimate, Q-form U32 1197def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>; 1198 1199// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1200def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32, 1201 FRECPEv1i64, FRECPEv2f32, 1202 FRSQRTEv1f16, FRSQRTEv1i32, 1203 FRSQRTEv1i64, FRSQRTEv2f32)>; 1204 1205// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1206def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32, 1207 FRSQRTEv4f16, FRSQRTEv4f32)>; 1208 1209// ASIMD reciprocal and square root estimate, Q-form F16 1210def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1211 1212// ASIMD reciprocal exponent 1213def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>; 1214 1215// ASIMD reciprocal step 1216def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>; 1217 1218// ASIMD table lookup, 3 table regs 1219def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>; 1220 1221// ASIMD table lookup, 4 table regs 1222def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>; 1223 1224// ASIMD table lookup extension, 2 table reg 1225def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1226 1227// ASIMD table lookup extension, 3 table reg 1228def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1229 1230// ASIMD table lookup extension, 4 table reg 1231def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1232 1233// ASIMD transfer, gen reg to element 1234def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1235 1236// ASIMD load instructions 1237// ----------------------------------------------------------------------------- 1238 1239// ASIMD load, 1 element, multiple, 1 reg, D-form 1240def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1241def : InstRW<[WriteAdr, N2Write_6cyc_1L], 1242 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1243 1244// ASIMD load, 1 element, multiple, 1 reg, Q-form 1245def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1246def : InstRW<[WriteAdr, N2Write_6cyc_1L], 1247 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1248 1249// ASIMD load, 1 element, multiple, 2 reg, D-form 1250def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1251def : InstRW<[WriteAdr, N2Write_6cyc_2L], 1252 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1253 1254// ASIMD load, 1 element, multiple, 2 reg, Q-form 1255def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1256def : InstRW<[WriteAdr, N2Write_6cyc_2L], 1257 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1258 1259// ASIMD load, 1 element, multiple, 3 reg, D-form 1260def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1261def : InstRW<[WriteAdr, N2Write_6cyc_3L], 1262 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1263 1264// ASIMD load, 1 element, multiple, 3 reg, Q-form 1265def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1266def : InstRW<[WriteAdr, N2Write_6cyc_3L], 1267 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1268 1269// ASIMD load, 1 element, multiple, 4 reg, D-form 1270def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1271def : InstRW<[WriteAdr, N2Write_7cyc_4L], 1272 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1273 1274// ASIMD load, 1 element, multiple, 4 reg, Q-form 1275def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1276def : InstRW<[WriteAdr, N2Write_7cyc_4L], 1277 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1278 1279// ASIMD load, 1 element, one lane, B/H/S 1280// ASIMD load, 1 element, one lane, D 1281def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1282def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>; 1283 1284// ASIMD load, 1 element, all lanes, D-form, B/H/S 1285// ASIMD load, 1 element, all lanes, D-form, D 1286def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1287def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1288 1289// ASIMD load, 1 element, all lanes, Q-form 1290def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1291def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1292 1293// ASIMD load, 2 element, multiple, D-form, B/H/S 1294def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1295def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1296 1297// ASIMD load, 2 element, multiple, Q-form, B/H/S 1298// ASIMD load, 2 element, multiple, Q-form, D 1299def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1300def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1301 1302// ASIMD load, 2 element, one lane, B/H 1303// ASIMD load, 2 element, one lane, S 1304// ASIMD load, 2 element, one lane, D 1305def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1306def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>; 1307 1308// ASIMD load, 2 element, all lanes, D-form, B/H/S 1309// ASIMD load, 2 element, all lanes, D-form, D 1310def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1311def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1312 1313// ASIMD load, 2 element, all lanes, Q-form 1314def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1315def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1316 1317// ASIMD load, 3 element, multiple, D-form, B/H/S 1318def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1319def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1320 1321// ASIMD load, 3 element, multiple, Q-form, B/H/S 1322def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>; 1323def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>; 1324 1325// ASIMD load, 3 element, multiple, Q-form, D 1326def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>; 1327def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>; 1328 1329// ASIMD load, 3 element, one lane, B/H 1330// ASIMD load, 3 element, one lane, S 1331// ASIMD load, 3 element, one lane, D 1332def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1333def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>; 1334 1335// ASIMD load, 3 element, all lanes, D-form, B/H/S 1336// ASIMD load, 3 element, all lanes, D-form, D 1337def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1338def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1339 1340// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1341// ASIMD load, 3 element, all lanes, Q-form, D 1342def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1343def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1344 1345// ASIMD load, 4 element, multiple, D-form, B/H/S 1346def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1347def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1348 1349// ASIMD load, 4 element, multiple, Q-form, B/H/S 1350// ASIMD load, 4 element, multiple, Q-form, D 1351def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1352def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1353 1354// ASIMD load, 4 element, one lane, B/H 1355// ASIMD load, 4 element, one lane, S 1356// ASIMD load, 4 element, one lane, D 1357def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1358def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>; 1359 1360// ASIMD load, 4 element, all lanes, D-form, B/H/S 1361// ASIMD load, 4 element, all lanes, D-form, D 1362def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1363def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1364 1365// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1366// ASIMD load, 4 element, all lanes, Q-form, D 1367def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1368def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1369 1370// ASIMD store instructions 1371// ----------------------------------------------------------------------------- 1372 1373// ASIMD store, 1 element, multiple, 1 reg, D-form 1374def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1375def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1376 1377// ASIMD store, 1 element, multiple, 1 reg, Q-form 1378def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1379def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1380 1381// ASIMD store, 1 element, multiple, 2 reg, D-form 1382def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1383def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1384 1385// ASIMD store, 1 element, multiple, 2 reg, Q-form 1386def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1387def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1388 1389// ASIMD store, 1 element, multiple, 3 reg, D-form 1390def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1391def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1392 1393// ASIMD store, 1 element, multiple, 3 reg, Q-form 1394def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1395def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1396 1397// ASIMD store, 1 element, multiple, 4 reg, D-form 1398def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1399def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1400 1401// ASIMD store, 1 element, multiple, 4 reg, Q-form 1402def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1403def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1404 1405// ASIMD store, 1 element, one lane, B/H/S 1406// ASIMD store, 1 element, one lane, D 1407def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>; 1408def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>; 1409 1410// ASIMD store, 2 element, multiple, D-form, B/H/S 1411def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>; 1412def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1413 1414// ASIMD store, 2 element, multiple, Q-form, B/H/S 1415// ASIMD store, 2 element, multiple, Q-form, D 1416def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1417def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1418 1419// ASIMD store, 2 element, one lane, B/H/S 1420// ASIMD store, 2 element, one lane, D 1421def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>; 1422def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>; 1423 1424// ASIMD store, 3 element, multiple, D-form, B/H/S 1425def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>; 1426def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1427 1428// ASIMD store, 3 element, multiple, Q-form, B/H/S 1429// ASIMD store, 3 element, multiple, Q-form, D 1430def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1431def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1432 1433// ASIMD store, 3 element, one lane, B/H 1434// ASIMD store, 3 element, one lane, S 1435// ASIMD store, 3 element, one lane, D 1436def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>; 1437def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>; 1438 1439// ASIMD store, 4 element, multiple, D-form, B/H/S 1440def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>; 1441def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1442 1443// ASIMD store, 4 element, multiple, Q-form, B/H/S 1444def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>; 1445def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1446 1447// ASIMD store, 4 element, multiple, Q-form, D 1448def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>; 1449def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>; 1450 1451// ASIMD store, 4 element, one lane, B/H/S 1452def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>; 1453def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>; 1454 1455// ASIMD store, 4 element, one lane, D 1456def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>; 1457def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V], (instregex "ST4i(64)_POST$")>; 1458 1459// Cryptography extensions 1460// ----------------------------------------------------------------------------- 1461 1462// Crypto AES ops 1463def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1464 1465// Crypto polynomial (64x64) multiply long 1466def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>; 1467 1468// Crypto SHA1 hash acceleration op 1469// Crypto SHA1 schedule acceleration ops 1470def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1471 1472// Crypto SHA1 hash acceleration ops 1473// Crypto SHA256 hash acceleration ops 1474def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1475 1476// Crypto SHA256 schedule acceleration ops 1477def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1478 1479// Crypto SHA512 hash acceleration ops 1480def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1481 1482// Crypto SHA3 ops 1483def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1484 1485// Crypto SM3 ops 1486def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1487 "^SM3TT[12][AB]$")>; 1488 1489// Crypto SM4 ops 1490def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1491 1492// CRC 1493// ----------------------------------------------------------------------------- 1494 1495def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>; 1496 1497// SVE Predicate instructions 1498// ----------------------------------------------------------------------------- 1499 1500// Loop control, based on predicate 1501def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 1502 BRKB_PPmP, BRKB_PPzP)>; 1503 1504// Loop control, based on predicate and flag setting 1505def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 1506 1507// Loop control, propagating 1508def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1509 1510// Loop control, propagating and flag setting 1511def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 1512 BRKPBS_PPzPP)>; 1513 1514// Loop control, based on GPR 1515def : InstRW<[N2Write_3cyc_1M], 1516 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1517 1518def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; 1519 1520// Loop terminate 1521def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1522 1523// Predicate counting scalar 1524def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1525def : InstRW<[N2Write_2cyc_1M], 1526 (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", 1527 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1528 "^(UQDEC|UQINC)[BHWD]_WPiI$")>; 1529 1530// Predicate counting scalar, active predicate 1531def : InstRW<[N2Write_2cyc_1M], 1532 (instregex "^CNTP_XPP_[BHSD]$", 1533 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", 1534 "^(UQDEC|UQINC)P_WP_[BHSD]$", 1535 "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>; 1536 1537// Predicate counting vector, active predicate 1538def : InstRW<[N2Write_7cyc_1M_1M0_1V], 1539 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; 1540 1541// Predicate logical 1542def : InstRW<[N2Write_1cyc_1M0], 1543 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1544 1545// Predicate logical, flag setting 1546def : InstRW<[N2Write_2cyc_1M0_1M], 1547 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; 1548 1549// Predicate reverse 1550def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; 1551 1552// Predicate select 1553def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>; 1554 1555// Predicate set 1556def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; 1557 1558// Predicate set/initialize, set flags 1559def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>; 1560 1561// Predicate find first/next 1562def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; 1563 1564// Predicate test 1565def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>; 1566 1567// Predicate transpose 1568def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; 1569 1570// Predicate unpack and widen 1571def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 1572 1573// Predicate zip/unzip 1574def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1575 1576// SVE integer instructions 1577// ----------------------------------------------------------------------------- 1578 1579// Arithmetic, absolute diff 1580def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>; 1581 1582// Arithmetic, absolute diff accum 1583def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; 1584 1585// Arithmetic, absolute diff accum long 1586def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; 1587 1588// Arithmetic, absolute diff long 1589def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; 1590 1591// Arithmetic, basic 1592def : InstRW<[N2Write_2cyc_1V], 1593 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", 1594 "^(ADD|SUB)_ZZZ_[BHSD]$", 1595 "^(ADD|SUB|SUBR)_ZI_[BHSD]$", 1596 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1597 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1598 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", 1599 "^SADDLBT_ZZZ_[HSD]$", 1600 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", 1601 "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; 1602 1603// Arithmetic, complex 1604def : InstRW<[N2Write_2cyc_1V], 1605 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", 1606 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", 1607 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", 1608 "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", 1609 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", 1610 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; 1611 1612// Arithmetic, large integer 1613def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; 1614 1615// Arithmetic, pairwise add 1616def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; 1617 1618// Arithmetic, pairwise add and accum long 1619def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; 1620 1621// Arithmetic, shift 1622def : InstRW<[N2Write_2cyc_1V1], 1623 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", 1624 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", 1625 "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", 1626 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", 1627 "^(ASR|LSL|LSR)_ZZI_[BHSD]$", 1628 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; 1629 1630// Arithmetic, shift and accumulate 1631def : InstRW<[N2Write_4cyc_1V1], 1632 (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>; 1633 1634// Arithmetic, shift by immediate 1635// Arithmetic, shift by immediate and insert 1636def : InstRW<[N2Write_2cyc_1V1], 1637 (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>; 1638 1639// Arithmetic, shift complex 1640def : InstRW<[N2Write_4cyc_1V1], 1641 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", 1642 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", 1643 "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", 1644 "^SQSHRU?N[BT]_ZZI_[BHS]$", 1645 "^UQR?SHRN[BT]_ZZI_[BHS]$")>; 1646 1647// Arithmetic, shift right for divide 1648def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>; 1649 1650// Arithmetic, shift rounding 1651def : InstRW<[N2Write_4cyc_1V1], 1652 (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$", 1653 "^[SU]RSHR_ZPmI_[BHSD]$")>; 1654 1655// Bit manipulation 1656def : InstRW<[N2Write_6cyc_2V1], 1657 (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>; 1658 1659// Bitwise select 1660def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; 1661 1662// Count/reverse bits 1663def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; 1664 1665// Broadcast logical bitmask immediate to vector 1666def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>; 1667 1668// Compare and set flags 1669def : InstRW<[N2Write_4cyc_1V0_1M], 1670 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1671 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1672 1673// Complex add 1674def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; 1675 1676// Complex dot product 8-bit element 1677def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 1678 1679// Complex dot product 16-bit element 1680def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 1681 1682// Complex multiply-add B, H, S element size 1683def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$", 1684 "^CMLA_ZZZI_[HS]$")>; 1685 1686// Complex multiply-add D element size 1687def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>; 1688 1689// Conditional extract operations, scalar form 1690def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1691 1692// Conditional extract operations, SIMD&FP scalar and vector forms 1693def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1694 "^COMPACT_ZPZ_[SD]$", 1695 "^SPLICE_ZPZZ?_[BHSD]$")>; 1696 1697// Convert to floating point, 64b to float or convert to double 1698def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>; 1699 1700// Convert to floating point, 64b to half 1701def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>; 1702 1703// Convert to floating point, 32b to single or half 1704def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; 1705 1706// Convert to floating point, 32b to double 1707def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>; 1708 1709// Convert to floating point, 16b to half 1710def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; 1711 1712// Copy, scalar 1713def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; 1714 1715// Copy, scalar SIMD&FP or imm 1716def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", 1717 "^CPY_ZPzI_[BHSD]$")>; 1718 1719// Divides, 32 bit 1720def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; 1721 1722// Divides, 64 bit 1723def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; 1724 1725// Dot product, 8 bit 1726def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>; 1727 1728// Dot product, 8 bit, using signed and unsigned integers 1729def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 1730 1731// Dot product, 16 bit 1732def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>; 1733 1734// Duplicate, immediate and indexed form 1735def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", 1736 "^DUP_ZZI_[BHSDQ]$")>; 1737 1738// Duplicate, scalar form 1739def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1740 1741// Extend, sign or zero 1742def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", 1743 "^[SU]XTH_ZPmZ_[SD]$", 1744 "^[SU]XTW_ZPmZ_[D]$")>; 1745 1746// Extract 1747def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 1748 1749// Extract narrow saturating 1750def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", 1751 "^SQXTUN[BT]_ZZ_[BHS]$")>; 1752 1753// Extract/insert operation, SIMD and FP scalar form 1754def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1755 "^INSR_ZV_[BHSD]$")>; 1756 1757// Extract/insert operation, scalar 1758def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1759 "^INSR_ZR_[BHSD]$")>; 1760 1761// Histogram operations 1762def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", 1763 "^HISTSEG_ZZZ$")>; 1764 1765// Horizontal operations, B, H, S form, immediate operands only 1766def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>; 1767 1768// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 1769// operands only / immediate, scalar operands 1770def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1771 1772// Horizontal operations, D form, immediate operands only 1773def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>; 1774 1775// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 1776// only / immediate, scalar operands 1777def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1778 1779// Logical 1780def : InstRW<[N2Write_2cyc_1V], 1781 (instregex "^(AND|EOR|ORR)_ZI$", 1782 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", 1783 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1784 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; 1785 1786// Max/min, basic and pairwise 1787def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", 1788 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>; 1789 1790// Matching operations 1791def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; 1792 1793// Matrix multiply-accumulate 1794def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1795 1796// Move prefix 1797def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1798 "^MOVPRFX_ZZ$")>; 1799 1800// Multiply, B, H, S element size 1801def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", 1802 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; 1803 1804// Multiply, D element size 1805def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", 1806 "^[SU]MULH_(ZPmZ|ZZZ)_D$")>; 1807 1808// Multiply long 1809def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", 1810 "^[SU]MULL[BT]_ZZZ_[HSD]$")>; 1811 1812// Multiply accumulate, B, H, S element size 1813def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$", 1814 "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; 1815 1816// Multiply accumulate, D element size 1817def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$", 1818 "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; 1819 1820// Multiply accumulate long 1821def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", 1822 "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; 1823 1824// Multiply accumulate saturating doubling long regular 1825def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$", 1826 "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>; 1827 1828// Multiply saturating doubling high, B, H, S element size 1829def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$", 1830 "^SQDMULH_ZZZI_[HS]$")>; 1831 1832// Multiply saturating doubling high, D element size 1833def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 1834 1835// Multiply saturating doubling long 1836def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", 1837 "^SQDMULL[BT]_ZZZI_[SD]$")>; 1838 1839// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 1840// element size 1841def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", 1842 "^SQRDCMLAH_ZZZ_[BHS]$", 1843 "^SQRDML[AS]H_ZZZI_[HS]$", 1844 "^SQRDCMLAH_ZZZI_[HS]$")>; 1845 1846// Multiply saturating rounding doubling regular/complex accumulate, D element 1847// size 1848def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$", 1849 "^SQRDCMLAH_ZZZ_D$")>; 1850 1851// Multiply saturating rounding doubling regular/complex, B, H, S element size 1852def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$", 1853 "^SQRDMULH_ZZZI_[HS]$")>; 1854 1855// Multiply saturating rounding doubling regular/complex, D element size 1856def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>; 1857 1858// Multiply/multiply long, (8x8) polynomial 1859def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$", 1860 "^PMULL[BT]_ZZZ_[HDQ]$")>; 1861 1862// Predicate counting vector 1863def : InstRW<[N2Write_2cyc_1V0], 1864 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>; 1865 1866// Reciprocal estimate 1867def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>; 1868 1869// Reduction, arithmetic, B form 1870def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1871 1872// Reduction, arithmetic, H form 1873def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1874 1875// Reduction, arithmetic, S form 1876def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1877 1878// Reduction, arithmetic, D form 1879def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1880 1881// Reduction, logical 1882def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>; 1883 1884// Reverse, vector 1885def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", 1886 "^REVB_ZPmZ_[HSD]$", 1887 "^REVH_ZPmZ_[SD]$", 1888 "^REVW_ZPmZ_D$")>; 1889 1890// Select, vector form 1891def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; 1892 1893// Table lookup 1894def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; 1895 1896// Table lookup extension 1897def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; 1898 1899// Transpose, vector form 1900def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; 1901 1902// Unpack and extend 1903def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; 1904 1905// Zip/unzip 1906def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1907 1908// SVE floating-point instructions 1909// ----------------------------------------------------------------------------- 1910 1911// Floating point absolute value/difference 1912def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>; 1913 1914// Floating point arithmetic 1915def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", 1916 "^FADDP_ZPmZZ_[HSD]$", 1917 "^FNEG_ZPmZ_[HSD]$", 1918 "^FSUBR_ZPm[IZ]_[HSD]$")>; 1919 1920// Floating point associative add, F16 1921def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>; 1922 1923// Floating point associative add, F32 1924def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>; 1925 1926// Floating point associative add, F64 1927def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 1928 1929// Floating point compare 1930def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", 1931 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", 1932 "^FCM(LE|LT)_PPzZ0_[HSD]$", 1933 "^FCMUO_PPzZZ_[HSD]$")>; 1934 1935// Floating point complex add 1936def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1937 1938// Floating point complex multiply add 1939def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1940 "^FCMLA_ZZZI_[HS]$")>; 1941 1942// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1943def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", 1944 "^FCVTLT_ZPmZ_HtoS$", 1945 "^FCVTNT_ZPmZ_StoH$")>; 1946 1947// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 1948// or F64 to F16) 1949def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", 1950 "^FCVTLT_ZPmZ_StoD$", 1951 "^FCVTNT_ZPmZ_DtoS$")>; 1952 1953// Floating point convert, round to odd 1954def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 1955 1956// Floating point base2 log, F16 1957def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>; 1958 1959// Floating point base2 log, F32 1960def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>; 1961 1962// Floating point base2 log, F64 1963def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>; 1964 1965// Floating point convert to integer, F16 1966def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; 1967 1968// Floating point convert to integer, F32 1969def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; 1970 1971// Floating point convert to integer, F64 1972def : InstRW<[N2Write_3cyc_1V0], 1973 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; 1974 1975// Floating point copy 1976def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", 1977 "^FDUP_ZI_[HSD]$")>; 1978 1979// Floating point divide, F16 1980def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; 1981 1982// Floating point divide, F32 1983def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; 1984 1985// Floating point divide, F64 1986def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; 1987 1988// Floating point min/max pairwise 1989def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; 1990 1991// Floating point min/max 1992def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; 1993 1994// Floating point multiply 1995def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", 1996 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; 1997 1998// Floating point multiply accumulate 1999def : InstRW<[N2Write_4cyc_1V], 2000 (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$", 2001 "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>; 2002 2003// Floating point multiply add/sub accumulate long 2004def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; 2005 2006// Floating point reciprocal estimate, F16 2007def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, 2008 FRSQRTE_ZZ_H)>; 2009 2010// Floating point reciprocal estimate, F32 2011def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, 2012 FRSQRTE_ZZ_S)>; 2013 2014// Floating point reciprocal estimate, F64 2015def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, 2016 FRSQRTE_ZZ_D)>; 2017 2018// Floating point reciprocal step 2019def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 2020 2021// Floating point reduction, F16 2022def : InstRW<[N2Write_6cyc_2V], 2023 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; 2024 2025// Floating point reduction, F32 2026def : InstRW<[N2Write_4cyc_1V], 2027 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; 2028 2029// Floating point reduction, F64 2030def : InstRW<[N2Write_2cyc_1V], 2031 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; 2032 2033// Floating point round to integral, F16 2034def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; 2035 2036// Floating point round to integral, F32 2037def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; 2038 2039// Floating point round to integral, F64 2040def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; 2041 2042// Floating point square root, F16 2043def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>; 2044 2045// Floating point square root, F32 2046def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>; 2047 2048// Floating point square root, F64 2049def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>; 2050 2051// Floating point trigonometric exponentiation 2052def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; 2053 2054// Floating point trigonometric multiply add 2055def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; 2056 2057// Floating point trigonometric, miscellaneous 2058def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 2059 2060// SVE BFloat16 (BF16) instructions 2061// ----------------------------------------------------------------------------- 2062 2063// Convert, F32 to BF16 2064def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2065 2066// Dot product 2067def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2068 2069// Matrix multiply accumulate 2070def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>; 2071 2072// Multiply accumulate long 2073def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 2074 2075// SVE Load instructions 2076// ----------------------------------------------------------------------------- 2077 2078// Load vector 2079def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>; 2080 2081// Load predicate 2082def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2083 2084// Contiguous load, scalar + imm 2085def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$", 2086 "^LD1S?B_[HSD]_IMM$", 2087 "^LD1S?H_[SD]_IMM$", 2088 "^LD1S?W_D_IMM$" )>; 2089// Contiguous load, scalar + scalar 2090def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$", 2091 "^LD1S?B_[HSD]$", 2092 "^LD1S?H_[SD]$", 2093 "^LD1S?W_D$" )>; 2094 2095// Contiguous load broadcast, scalar + imm 2096def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2097 "^LD1RSW_IMM$", 2098 "^LD1RS?B_[HSD]_IMM$", 2099 "^LD1RS?H_[SD]_IMM$", 2100 "^LD1RS?W_D_IMM$", 2101 "^LD1RQ_[BHWD]_IMM$")>; 2102 2103// Contiguous load broadcast, scalar + scalar 2104def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2105 2106// Non temporal load, scalar + imm 2107def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>; 2108 2109// Non temporal load, scalar + scalar 2110def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>; 2111 2112// Non temporal gather load, vector + scalar 32-bit element size 2113def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", 2114 "^LDNT1S[BH]_ZZR_S_REAL$")>; 2115 2116// Non temporal gather load, vector + scalar 64-bit element size 2117def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; 2118def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; 2119 2120// Contiguous first faulting load, scalar + scalar 2121def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", 2122 "^LDFF1S?B_[HSD]_REAL$", 2123 "^LDFF1S?H_[SD]_REAL$", 2124 "^LDFF1S?W_D_REAL$")>; 2125 2126// Contiguous non faulting load, scalar + imm 2127def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", 2128 "^LDNF1S?B_[HSD]_IMM_REAL$", 2129 "^LDNF1S?H_[SD]_IMM_REAL$", 2130 "^LDNF1S?W_D_IMM_REAL$")>; 2131 2132// Contiguous Load two structures to two vectors, scalar + imm 2133def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>; 2134 2135// Contiguous Load two structures to two vectors, scalar + scalar 2136def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>; 2137 2138// Contiguous Load three structures to three vectors, scalar + imm 2139def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>; 2140 2141// Contiguous Load three structures to three vectors, scalar + scalar 2142def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>; 2143 2144// Contiguous Load four structures to four vectors, scalar + imm 2145def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>; 2146 2147// Contiguous Load four structures to four vectors, scalar + scalar 2148def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>; 2149 2150// Gather load, vector + imm, 32-bit element size 2151def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 2152 "^GLD(FF)?1W_IMM_REAL$")>; 2153 2154// Gather load, vector + imm, 64-bit element size 2155def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 2156 "^GLD(FF)?1D_IMM_REAL$")>; 2157 2158// Gather load, 64-bit element size 2159def : InstRW<[N2Write_9cyc_2L_2V], 2160 (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$", 2161 "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$", 2162 "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$", 2163 "^GLD(FF)?1D_(SCALED_)?REAL$")>; 2164 2165// Gather load, 32-bit scaled offset 2166def : InstRW<[N2Write_10cyc_2L_2V], 2167 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", 2168 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 2169 2170// Gather load, 32-bit unpacked unscaled offset 2171def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 2172 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 2173 2174// SVE Store instructions 2175// ----------------------------------------------------------------------------- 2176 2177// Store from predicate reg 2178def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>; 2179 2180// Store from vector reg 2181def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>; 2182 2183// Contiguous store, scalar + imm 2184def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 2185 "^ST1B_[HSD]_IMM$", 2186 "^ST1H_[SD]_IMM$", 2187 "^ST1W_D_IMM$")>; 2188 2189// Contiguous store, scalar + scalar 2190def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 2191def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$", 2192 "^ST1B_[HSD]$", 2193 "^ST1W_D$")>; 2194 2195// Contiguous store two structures from two vectors, scalar + imm 2196def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>; 2197 2198// Contiguous store two structures from two vectors, scalar + scalar 2199def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>; 2200 2201// Contiguous store two structures from two vectors, scalar + scalar 2202def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>; 2203 2204// Contiguous store three structures from three vectors, scalar + imm 2205def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 2206 2207// Contiguous store three structures from three vectors, scalar + scalar 2208def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>; 2209 2210// Contiguous store three structures from three vectors, scalar + scalar 2211def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>; 2212 2213// Contiguous store four structures from four vectors, scalar + imm 2214def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 2215 2216// Contiguous store four structures from four vectors, scalar + scalar 2217def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>; 2218 2219// Contiguous store four structures from four vectors, scalar + scalar 2220def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>; 2221 2222// Non temporal store, scalar + imm 2223def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2224 2225// Non temporal store, scalar + scalar 2226def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2227def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2228 2229// Scatter non temporal store, vector + scalar 32-bit element size 2230def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>; 2231 2232// Scatter non temporal store, vector + scalar 64-bit element size 2233def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>; 2234 2235// Scatter store vector + imm 32-bit element size 2236def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 2237 "^SST1W_IMM$")>; 2238 2239// Scatter store vector + imm 64-bit element size 2240def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 2241 "^SST1D_IMM$")>; 2242 2243// Scatter store, 32-bit scaled offset 2244def : InstRW<[N2Write_4cyc_2L01_2V], 2245 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2246 2247// Scatter store, 32-bit unpacked unscaled offset 2248def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 2249 "^SST1D_[SU]XTW$")>; 2250 2251// Scatter store, 32-bit unpacked scaled offset 2252def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2253 "^SST1D_[SU]XTW_SCALED$")>; 2254 2255// Scatter store, 32-bit unscaled offset 2256def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$", 2257 "^SST1W_[SU]XTW$")>; 2258 2259// Scatter store, 64-bit scaled offset 2260def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$", 2261 "^SST1D_SCALED$")>; 2262 2263// Scatter store, 64-bit unscaled offset 2264def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$", 2265 "^SST1D$")>; 2266 2267// SVE Miscellaneous instructions 2268// ----------------------------------------------------------------------------- 2269 2270// Read first fault register, unpredicated 2271def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; 2272 2273// Read first fault register, predicated 2274def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; 2275 2276// Read first fault register and set flags 2277def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2278 2279// Set first fault register 2280// Write to first fault register 2281def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2282 2283// Prefetch 2284def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2285 2286// SVE Cryptographic instructions 2287// ----------------------------------------------------------------------------- 2288 2289// Crypto AES ops 2290def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2291 "^AESI?MC_ZZ_B$")>; 2292 2293// Crypto SHA3 ops 2294def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2295 "^RAX1_ZZZ_D$", 2296 "^XAR_ZZZI_[BHSD]$")>; 2297 2298// Crypto SM4 ops 2299def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2300 2301} 2302