1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse N2 processors. 10// 11//===----------------------------------------------------------------------===// 12 13def NeoverseN2Model : SchedMachineModel { 14 let IssueWidth = 10; // Micro-ops dispatched at a time. 15 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. 16 let LoadLatency = 4; // Optimistic load latency. 17 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. 18 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 19 let CompleteModel = 1; 20 21 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, 22 [HasSVE2p1]); 23} 24 25//===----------------------------------------------------------------------===// 26// Define each kind of processor resource and number available on Neoverse N2. 27// Instructions are first fetched and then decoded into internal macro-ops 28// (MOPs). From there, the MOPs proceed through register renaming and dispatch 29// stages. A MOP can be split into two micro-ops further down the pipeline 30// after the decode stage. Once dispatched, micro-ops wait for their operands 31// and issue out-of-order to one of thirteen issue pipelines. Each issue 32// pipeline can accept one micro-op per cycle. 33 34let SchedModel = NeoverseN2Model in { 35 36// Define the (13) issue ports. 37def N2UnitB : ProcResource<2>; // Branch 0/1 38def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1 39def N2UnitM0 : ProcResource<1>; // Integer multicycle 0 40def N2UnitM1 : ProcResource<1>; // Integer multicycle 1 41def N2UnitL01 : ProcResource<2>; // Load/Store 0/1 42def N2UnitL2 : ProcResource<1>; // Load 2 43def N2UnitD : ProcResource<2>; // Store data 0/1 44def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0 45def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1 46 47def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1 48def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1 49def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2 50def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1 51 52// Define commonly used read types. 53 54// No forwarding is provided for these types. 55def : ReadAdvance<ReadI, 0>; 56def : ReadAdvance<ReadISReg, 0>; 57def : ReadAdvance<ReadIEReg, 0>; 58def : ReadAdvance<ReadIM, 0>; 59def : ReadAdvance<ReadIMA, 0>; 60def : ReadAdvance<ReadID, 0>; 61def : ReadAdvance<ReadExtrHi, 0>; 62def : ReadAdvance<ReadAdrBase, 0>; 63def : ReadAdvance<ReadST, 0>; 64def : ReadAdvance<ReadVLD, 0>; 65 66def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 67def : WriteRes<WriteBarrier, []> { let Latency = 1; } 68def : WriteRes<WriteHint, []> { let Latency = 1; } 69def : WriteRes<WriteLDHi, []> { let Latency = 4; } 70 71//===----------------------------------------------------------------------===// 72// Define customized scheduler read/write types specific to the Neoverse N2. 73 74//===----------------------------------------------------------------------===// 75// Define generic 1 micro-op types 76 77def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; } 78def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; } 79def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; } 80def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; } 81def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; } 82def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; } 83def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; } 84def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2; 85 let ResourceCycles = [2]; } 86def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3; 87 let ResourceCycles = [3]; } 88def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5; 89 let ResourceCycles = [5]; } 90def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12; 91 let ResourceCycles = [12]; } 92def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20; 93 let ResourceCycles = [20]; } 94def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; } 95def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; } 96def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; } 97def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; } 98def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; } 99def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; } 100def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; } 101def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; } 102def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; } 103def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; } 104def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7; 105 let ResourceCycles = [7]; } 106def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; } 107def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; } 108def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; } 109def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; } 110def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; } 111def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; } 112def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; } 113def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; } 114def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; } 115def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; } 116def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; } 117def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; } 118def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; } 119 120//===----------------------------------------------------------------------===// 121// Define generic 2 micro-op types 122 123def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> { 124 let Latency = 1; 125 let NumMicroOps = 2; 126} 127 128def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> { 129 let Latency = 6; 130 let NumMicroOps = 2; 131} 132 133def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> { 134 let Latency = 9; 135 let NumMicroOps = 2; 136} 137 138def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> { 139 let Latency = 3; 140 let NumMicroOps = 2; 141} 142 143def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 144 let Latency = 4; 145 let NumMicroOps = 2; 146} 147 148def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 149 let Latency = 5; 150 let NumMicroOps = 2; 151} 152 153def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 154 let Latency = 6; 155 let NumMicroOps = 2; 156} 157 158def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 159 let Latency = 7; 160 let NumMicroOps = 2; 161} 162 163def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> { 164 let Latency = 1; 165 let NumMicroOps = 2; 166} 167 168def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> { 169 let Latency = 5; 170 let NumMicroOps = 2; 171} 172 173def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 174 let Latency = 2; 175 let NumMicroOps = 2; 176} 177 178def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> { 179 let Latency = 4; 180 let NumMicroOps = 2; 181} 182 183def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 184 let Latency = 4; 185 let NumMicroOps = 2; 186} 187 188def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 189 let Latency = 10; 190 let NumMicroOps = 2; 191 let ResourceCycles = [5, 5]; 192} 193 194def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 195 let Latency = 13; 196 let NumMicroOps = 2; 197 let ResourceCycles = [6, 7]; 198} 199 200def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 201 let Latency = 15; 202 let NumMicroOps = 2; 203 let ResourceCycles = [7, 8]; 204} 205 206def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 207 let Latency = 16; 208 let NumMicroOps = 2; 209 let ResourceCycles = [8, 8]; 210} 211 212def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 213 let Latency = 4; 214 let NumMicroOps = 2; 215} 216 217def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 218 let Latency = 6; 219 let NumMicroOps = 2; 220} 221 222def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> { 223 let Latency = 6; 224 let NumMicroOps = 2; 225} 226 227def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 228 let Latency = 8; 229 let NumMicroOps = 2; 230} 231 232def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 233 let Latency = 4; 234 let NumMicroOps = 2; 235} 236 237def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 238 let Latency = 3; 239 let NumMicroOps = 2; 240} 241 242def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 243 let Latency = 2; 244 let NumMicroOps = 2; 245} 246 247def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 248 let Latency = 6; 249 let NumMicroOps = 2; 250} 251 252def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 253 let Latency = 4; 254 let NumMicroOps = 2; 255} 256 257def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 258 let Latency = 5; 259 let NumMicroOps = 2; 260} 261 262def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> { 263 let Latency = 5; 264 let NumMicroOps = 2; 265} 266 267def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> { 268 let Latency = 7; 269 let NumMicroOps = 2; 270} 271 272def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 273 let Latency = 2; 274 let NumMicroOps = 2; 275} 276 277def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> { 278 let Latency = 6; 279 let NumMicroOps = 2; 280} 281 282def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> { 283 let Latency = 6; 284 let NumMicroOps = 2; 285} 286 287def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> { 288 let Latency = 6; 289 let NumMicroOps = 2; 290} 291 292def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 293 let Latency = 9; 294 let NumMicroOps = 2; 295} 296 297def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 298 let Latency = 4; 299 let NumMicroOps = 2; 300} 301 302//===----------------------------------------------------------------------===// 303// Define generic 3 micro-op types 304 305def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> { 306 let Latency = 1; 307 let NumMicroOps = 3; 308} 309 310def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> { 311 let Latency = 2; 312 let NumMicroOps = 3; 313} 314 315def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> { 316 let Latency = 2; 317 let NumMicroOps = 3; 318} 319 320def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> { 321 let Latency = 7; 322 let NumMicroOps = 3; 323} 324 325def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> { 326 let Latency = 8; 327 let NumMicroOps = 3; 328} 329 330def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> { 331 let Latency = 10; 332 let NumMicroOps = 3; 333} 334 335def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 336 let Latency = 2; 337 let NumMicroOps = 3; 338} 339 340def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 341 let Latency = 4; 342 let NumMicroOps = 3; 343} 344 345def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> { 346 let Latency = 6; 347 let NumMicroOps = 3; 348} 349 350def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> { 351 let Latency = 8; 352 let NumMicroOps = 3; 353} 354 355//===----------------------------------------------------------------------===// 356// Define generic 4 micro-op types 357 358def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 359 N2UnitI]> { 360 let Latency = 2; 361 let NumMicroOps = 4; 362} 363 364def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> { 365 let Latency = 6; 366 let NumMicroOps = 4; 367} 368 369def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 370 let Latency = 4; 371 let NumMicroOps = 4; 372} 373 374def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 375 let Latency = 6; 376 let NumMicroOps = 4; 377} 378 379def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 380 let Latency = 8; 381 let NumMicroOps = 4; 382} 383 384def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 385 let Latency = 9; 386 let NumMicroOps = 4; 387} 388 389def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 390 N2UnitV]> { 391 let Latency = 2; 392 let NumMicroOps = 4; 393} 394 395def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 396 N2UnitV]> { 397 let Latency = 4; 398 let NumMicroOps = 4; 399} 400 401def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 402 N2UnitV]> { 403 let Latency = 5; 404 let NumMicroOps = 4; 405} 406 407def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0, 408 N2UnitV0]> { 409 let Latency = 8; 410 let NumMicroOps = 4; 411} 412 413def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 414 N2UnitV1]> { 415 let Latency = 11; 416 let NumMicroOps = 4; 417} 418 419def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 420 N2UnitV1]> { 421 let Latency = 9; 422 let NumMicroOps = 4; 423} 424 425def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 426 N2UnitV1]> { 427 let Latency = 8; 428 let NumMicroOps = 4; 429} 430 431def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 432 N2UnitV1]> { 433 let Latency = 10; 434 let NumMicroOps = 4; 435} 436 437def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 438 let Latency = 10; 439 let NumMicroOps = 4; 440} 441 442def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM, 443 N2UnitM]> { 444 let Latency = 4; 445 let NumMicroOps = 4; 446} 447 448def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> { 449 let Latency = 6; 450 let NumMicroOps = 4; 451} 452 453def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> { 454 let Latency = 7; 455 let NumMicroOps = 4; 456} 457 458//===----------------------------------------------------------------------===// 459// Define generic 5 micro-op types 460 461def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 462 N2UnitI, N2UnitI]> { 463 let Latency = 2; 464 let NumMicroOps = 5; 465} 466 467def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 468 N2UnitV]> { 469 let Latency = 8; 470 let NumMicroOps = 5; 471} 472 473//===----------------------------------------------------------------------===// 474// Define generic 6 micro-op types 475 476def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 477 N2UnitV, N2UnitV, N2UnitV]> { 478 let Latency = 8; 479 let NumMicroOps = 6; 480} 481 482def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 483 N2UnitV, N2UnitV, N2UnitV]> { 484 let Latency = 2; 485 let NumMicroOps = 6; 486} 487 488def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 489 N2UnitV, N2UnitV, N2UnitV]> { 490 let Latency = 6; 491 let NumMicroOps = 6; 492} 493 494def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 495 N2UnitV, N2UnitV, N2UnitV]> { 496 let Latency = 4; 497 let NumMicroOps = 6; 498} 499 500def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 501 N2UnitS, N2UnitS]> { 502 let Latency = 10; 503 let NumMicroOps = 6; 504} 505 506//===----------------------------------------------------------------------===// 507// Define generic 7 micro-op types 508 509def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 510 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 511 let Latency = 8; 512 let NumMicroOps = 7; 513} 514 515//===----------------------------------------------------------------------===// 516// Define generic 8 micro-op types 517 518def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV, 519 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 520 let Latency = 6; 521 let NumMicroOps = 8; 522} 523 524def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 525 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 526 N2UnitV]> { 527 let Latency = 2; 528 let NumMicroOps = 8; 529} 530 531def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 532 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 533 N2UnitV]> { 534 let Latency = 5; 535 let NumMicroOps = 8; 536} 537 538def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 539 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 540 let Latency = 8; 541 let NumMicroOps = 8; 542} 543 544def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 545 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 546 let Latency = 9; 547 let NumMicroOps = 8; 548} 549 550//===----------------------------------------------------------------------===// 551// Define generic 10 micro-op types 552 553def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 554 N2UnitL01, N2UnitL01, N2UnitV, 555 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 556 let Latency = 7; 557 let NumMicroOps = 10; 558} 559 560//===----------------------------------------------------------------------===// 561// Define generic 12 micro-op types 562 563def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 564 N2UnitL01, N2UnitL01, N2UnitL01, 565 N2UnitV, N2UnitV, N2UnitV, N2UnitV, 566 N2UnitV, N2UnitV]> { 567 let Latency = 7; 568 let NumMicroOps = 12; 569} 570 571//===----------------------------------------------------------------------===// 572// Define generic 15 micro-op types 573 574def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 575 N2UnitL01, N2UnitL01, N2UnitS, 576 N2UnitS, N2UnitS, N2UnitS, 577 N2UnitS, N2UnitV, N2UnitV, 578 N2UnitV, N2UnitV, N2UnitV]> { 579 let Latency = 7; 580 let NumMicroOps = 15; 581} 582 583//===----------------------------------------------------------------------===// 584// Define generic 18 micro-op types 585 586def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 587 N2UnitL01, N2UnitL01, N2UnitL01, 588 N2UnitL01, N2UnitL01, N2UnitL01, 589 N2UnitV, N2UnitV, N2UnitV, 590 N2UnitV, N2UnitV, N2UnitV, 591 N2UnitV, N2UnitV, N2UnitV]> { 592 let Latency = 11; 593 let NumMicroOps = 18; 594} 595 596//===----------------------------------------------------------------------===// 597// Define generic 27 micro-op types 598 599def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 600 N2UnitL01, N2UnitL01, N2UnitL01, 601 N2UnitL01, N2UnitL01, N2UnitL01, 602 N2UnitS, N2UnitS, N2UnitS, 603 N2UnitS, N2UnitS, N2UnitS, 604 N2UnitS, N2UnitS, N2UnitS, 605 N2UnitV, N2UnitV, N2UnitV, 606 N2UnitV, N2UnitV, N2UnitV, 607 N2UnitV, N2UnitV, N2UnitV]> { 608 let Latency = 11; 609 let NumMicroOps = 27; 610} 611 612// Miscellaneous 613// ----------------------------------------------------------------------------- 614 615def : InstRW<[WriteI], (instrs COPY)>; 616 617// Branch Instructions 618// ----------------------------------------------------------------------------- 619 620// Branch, immed 621// Compare and branch 622def : SchedAlias<WriteBr, N2Write_1cyc_1B>; 623 624// Branch, register 625def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>; 626 627// Branch and link, immed 628// Branch and link, register 629def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>; 630 631// Arithmetic and Logical Instructions 632// ----------------------------------------------------------------------------- 633 634// ALU, basic 635// ALU, basic, flagset 636def : SchedAlias<WriteI, N2Write_1cyc_1I>; 637 638// ALU, extend and shift 639def : SchedAlias<WriteISReg, N2Write_2cyc_1M>; 640def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>; 641 642// Arithmetic, immediate to logical address tag 643def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>; 644 645// Convert floating-point condition flags 646// Flag manipulation instructions 647def : WriteRes<WriteSys, []> { let Latency = 1; } 648 649// Insert Random Tags 650def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>; 651 652// Insert Tag Mask 653// Subtract Pointer 654// Subtract Pointer, flagset 655def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 656 657// Move and shift instructions 658// ----------------------------------------------------------------------------- 659 660def : SchedAlias<WriteImm, N2Write_1cyc_1I>; 661 662// Divide and Multiply Instructions 663// ----------------------------------------------------------------------------- 664 665// SDIV, UDIV 666def : SchedAlias<WriteID32, N2Write_12cyc_1M0>; 667def : SchedAlias<WriteID64, N2Write_20cyc_1M0>; 668 669def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; } 670def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; } 671 672// Multiply high 673def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 674 675// Pointer Authentication Instructions (v8.3 PAC) 676// ----------------------------------------------------------------------------- 677 678// Authenticate data address 679// Authenticate instruction address 680// Compute pointer authentication code for data address 681// Compute pointer authentication code, using generic key 682// Compute pointer authentication code for instruction address 683def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 684 685// Branch and link, register, with pointer authentication 686// Branch, register, with pointer authentication 687// Branch, return, with pointer authentication 688def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 689 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 690 ERETAA, ERETAB)>; 691 692 693// Load register, with pointer authentication 694def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 695 696// Strip pointer authentication code 697def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 698 699// Miscellaneous data-processing instructions 700// ----------------------------------------------------------------------------- 701 702// Bitfield extract, one reg 703// Bitfield extract, two regs 704// NOTE: We don't model the difference between EXTR where both operands are the 705// same (one reg). 706def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>; 707def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>; 708 709// Bitfield move, basic 710def : SchedAlias<WriteIS, N2Write_1cyc_1I>; 711 712// Bitfield move, insert 713def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 714 715// Load instructions 716// ----------------------------------------------------------------------------- 717 718def : SchedAlias<WriteLD, N2Write_4cyc_1L>; 719def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>; 720 721// Load pair, signed immed offset, signed words 722def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>; 723// Load pair, immed post-index or immed pre-index, signed words 724def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr], 725 (instregex "^LDPSW(post|pre)$")>; 726 727// Store instructions 728// ----------------------------------------------------------------------------- 729 730def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>; 731def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>; 732def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>; 733def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57. 734 735// Tag load instructions 736// ----------------------------------------------------------------------------- 737 738// Load allocation tag 739// Load multiple allocation tags 740def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>; 741 742// Tag store instructions 743// ----------------------------------------------------------------------------- 744 745// Store allocation tags to one or two granules, post-index 746// Store allocation tags to one or two granules, pre-index 747// Store allocation tag to one or two granules, zeroing, post-index 748// Store Allocation Tag to one or two granules, zeroing, pre-index 749// Store allocation tag and reg pair to memory, post-Index 750// Store allocation tag and reg pair to memory, pre-Index 751def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 752 ST2GPreIndex, ST2GPostIndex, 753 STZGPreIndex, STZGPostIndex, 754 STZ2GPreIndex, STZ2GPostIndex, 755 STGPpre, STGPpost)>; 756 757// Store allocation tags to one or two granules, signed offset 758// Store allocation tag to two granules, zeroing, signed offset 759// Store allocation tag and reg pair to memory, signed offset 760// Store multiple allocation tags 761def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset, 762 STZ2GOffset, STGPi, STGM, STZGM)>; 763 764// FP data processing instructions 765// ----------------------------------------------------------------------------- 766 767// FP absolute value 768// FP arithmetic 769// FP min/max 770// FP negate 771// FP select 772def : SchedAlias<WriteF, N2Write_2cyc_1V>; 773 774// FP compare 775def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>; 776 777// FP divide, square root 778def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>; 779 780// FP divide, H-form 781def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>; 782// FP divide, S-form 783def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>; 784// FP divide, D-form 785def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>; 786 787// FP square root, H-form 788def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>; 789// FP square root, S-form 790def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>; 791// FP square root, D-form 792def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>; 793 794// FP multiply 795def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; } 796 797// FP multiply accumulate 798def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 799 800// FP round to integral 801def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$", 802 "^FRINT(32|64)[XZ][SD]r$")>; 803 804// FP miscellaneous instructions 805// ----------------------------------------------------------------------------- 806 807// FP convert, from gen to vec reg 808def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 809 810// FP convert, from vec to gen reg 811def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 812 813// FP convert, Javascript from vec to gen reg 814// FP convert, from vec to vec reg 815def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>; 816 817// FP move, immed 818// FP move, register 819def : SchedAlias<WriteFImm, N2Write_2cyc_1V>; 820 821// FP transfer, from gen to low half of vec reg 822def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, 823 FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; 824 825// FP transfer, from gen to high half of vec reg 826def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 827 828// FP transfer, from vec to gen reg 829def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>; 830 831// FP load instructions 832// ----------------------------------------------------------------------------- 833 834// Load vector reg, literal, S/D/Q forms 835// Load vector reg, unscaled immed 836def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$", 837 "^LDUR[BHSDQ]i$")>; 838 839// Load vector reg, immed post-index 840def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>; 841// Load vector reg, immed pre-index 842def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>; 843 844// Load vector reg, unsigned immed 845def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 846 847// Load vector reg, register offset, basic 848// Load vector reg, register offset, scale, S/D-form 849// Load vector reg, register offset, extend 850// Load vector reg, register offset, extend, scale, S/D-form 851def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 852 853// Load vector reg, register offset, scale, H/Q-form 854// Load vector reg, register offset, extend, scale, H/Q-form 855def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 856 857// Load vector pair, immed offset, S/D-form 858def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 859 860// Load vector pair, immed offset, Q-form 861def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 862 863// Load vector pair, immed post-index, S/D-form 864// Load vector pair, immed pre-index, S/D-form 865def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr], 866 (instregex "^LDP[SD](pre|post)$")>; 867 868// Load vector pair, immed post-index, Q-form 869// Load vector pair, immed pre-index, Q-form 870def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost, 871 LDPQpre)>; 872 873// FP store instructions 874// ----------------------------------------------------------------------------- 875 876// Store vector reg, unscaled immed, B/H/S/D-form 877// Store vector reg, unscaled immed, Q-form 878def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>; 879 880// Store vector reg, immed post-index, B/H/S/D-form 881// Store vector reg, immed post-index, Q-form 882// Store vector reg, immed pre-index, B/H/S/D-form 883// Store vector reg, immed pre-index, Q-form 884def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase], 885 (instregex "^STR[BHSDQ](pre|post)$")>; 886 887// Store vector reg, unsigned immed, B/H/S/D-form 888// Store vector reg, unsigned immed, Q-form 889def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>; 890 891// Store vector reg, register offset, basic, B/H/S/D-form 892// Store vector reg, register offset, basic, Q-form 893// Store vector reg, register offset, scale, S/D-form 894// Store vector reg, register offset, extend, B/H/S/D-form 895// Store vector reg, register offset, extend, Q-form 896// Store vector reg, register offset, extend, scale, S/D-form 897def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 898 (instregex "^STR[BSD]ro[WX]$")>; 899 900// Store vector reg, register offset, scale, H-form 901// Store vector reg, register offset, scale, Q-form 902// Store vector reg, register offset, extend, scale, H-form 903// Store vector reg, register offset, extend, scale, Q-form 904def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 905 (instregex "^STR[HQ]ro[WX]$")>; 906 907// Store vector pair, immed offset, S-form 908// Store vector pair, immed offset, D-form 909def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>; 910 911// Store vector pair, immed offset, Q-form 912def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>; 913 914// Store vector pair, immed post-index, S-form 915// Store vector pair, immed post-index, D-form 916// Store vector pair, immed pre-index, S-form 917// Store vector pair, immed pre-index, D-form 918def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I], 919 (instregex "^STP[SD](pre|post)$")>; 920 921// Store vector pair, immed post-index, Q-form 922def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>; 923 924// Store vector pair, immed pre-index, Q-form 925def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>; 926 927// ASIMD integer instructions 928// ----------------------------------------------------------------------------- 929 930// ASIMD absolute diff 931// ASIMD absolute diff long 932// ASIMD arith, basic 933// ASIMD arith, complex 934// ASIMD arith, pair-wise 935// ASIMD compare 936// ASIMD logical 937// ASIMD max/min, basic and pair-wise 938def : SchedAlias<WriteVd, N2Write_2cyc_1V>; 939def : SchedAlias<WriteVq, N2Write_2cyc_1V>; 940 941// ASIMD absolute diff accum 942// ASIMD absolute diff accum long 943def : InstRW<[N2Write_4cyc_1V1], 944 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 945 946// ASIMD arith, reduce, 4H/4S 947def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 948 949// ASIMD arith, reduce, 8B/8H 950def : InstRW<[N2Write_4cyc_1V1_1V], 951 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 952 953// ASIMD arith, reduce, 16B 954def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v, 955 UADDLVv16i8v)>; 956 957// ASIMD dot product 958// ASIMD dot product using signed and unsigned integers 959def : InstRW<[N2Write_3cyc_1V], 960 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 961 962// ASIMD matrix multiply-accumulate 963def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>; 964 965// ASIMD max/min, reduce, 4H/4S 966def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$", 967 "^[SU](MAX|MIN)Vv4i32v$")>; 968 969// ASIMD max/min, reduce, 8B/8H 970def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 971 "^[SU](MAX|MIN)Vv8i16v$")>; 972 973// ASIMD max/min, reduce, 16B 974def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 975 976// ASIMD multiply 977def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>; 978 979// ASIMD multiply accumulate 980def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>; 981 982// ASIMD multiply accumulate high 983def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 984 985// ASIMD multiply accumulate long 986def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 987 988// ASIMD multiply accumulate saturating long 989def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>; 990 991// ASIMD multiply/multiply long (8x8) polynomial, D-form 992// ASIMD multiply/multiply long (8x8) polynomial, Q-form 993def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>; 994 995// ASIMD multiply long 996def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>; 997 998// ASIMD pairwise add and accumulate long 999def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>; 1000 1001// ASIMD shift accumulate 1002def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>; 1003 1004// ASIMD shift by immed, basic 1005def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv", 1006 "^SSHLLv", "^SSHRv", "^USHLLv", 1007 "^USHRv")>; 1008 1009// ASIMD shift by immed and insert, basic 1010def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>; 1011 1012// ASIMD shift by immed, complex 1013def : InstRW<[N2Write_4cyc_1V1], 1014 (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv", 1015 "^(SQSHLU?|UQSHL)[bhsd]$", 1016 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1017 "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv", 1018 "^UQSHRNv", "^URSHRv")>; 1019 1020// ASIMD shift by register, basic 1021def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>; 1022 1023// ASIMD shift by register, complex 1024def : InstRW<[N2Write_4cyc_1V1], 1025 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1026 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1027 1028// ASIMD floating-point instructions 1029// ----------------------------------------------------------------------------- 1030 1031// ASIMD FP absolute value/difference 1032// ASIMD FP arith, normal 1033// ASIMD FP compare 1034// ASIMD FP complex add 1035// ASIMD FP max/min, normal 1036// ASIMD FP max/min, pairwise 1037// ASIMD FP negate 1038// Handled by SchedAlias<WriteV[dq], ...> 1039 1040// ASIMD FP complex multiply add 1041def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>; 1042 1043// ASIMD FP convert, long (F16 to F32) 1044def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>; 1045 1046// ASIMD FP convert, long (F32 to F64) 1047def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>; 1048 1049// ASIMD FP convert, narrow (F32 to F16) 1050def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>; 1051 1052// ASIMD FP convert, narrow (F64 to F32) 1053def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32", 1054 "^FCVTXN(v2|v4)f32")>; 1055 1056// ASIMD FP convert, other, D-form F32 and Q-form F64 1057def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 1058 "^[SU]CVTFv2f(32|64)$")>; 1059 1060// ASIMD FP convert, other, D-form F16 and Q-form F32 1061def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 1062 "^[SU]CVTFv4f(16|32)$")>; 1063 1064// ASIMD FP convert, other, Q-form F16 1065def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 1066 "^[SU]CVTFv8f16$")>; 1067 1068// ASIMD FP divide, D-form, F16 1069def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>; 1070 1071// ASIMD FP divide, D-form, F32 1072def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>; 1073 1074// ASIMD FP divide, Q-form, F16 1075def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>; 1076 1077// ASIMD FP divide, Q-form, F32 1078def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>; 1079 1080// ASIMD FP divide, Q-form, F64 1081def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>; 1082 1083// ASIMD FP max/min, reduce, F32 and D-form F16 1084def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1085 1086// ASIMD FP max/min, reduce, Q-form F16 1087def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1088 1089// ASIMD FP multiply 1090def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>; 1091 1092// ASIMD FP multiply accumulate 1093def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>; 1094 1095// ASIMD FP multiply accumulate long 1096def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>; 1097 1098// ASIMD FP round, D-form F32 and Q-form F64 1099def : InstRW<[N2Write_3cyc_1V0], 1100 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1101 "^FRINT[32|64)[XZ]v2f(32|64)$")>; 1102 1103// ASIMD FP round, D-form F16 and Q-form F32 1104def : InstRW<[N2Write_4cyc_2V0], 1105 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1106 "^FRINT(32|64)[XZ]v4f32$")>; 1107 1108 1109// ASIMD FP round, Q-form F16 1110def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1111 1112// ASIMD FP square root, D-form, F16 1113def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>; 1114 1115// ASIMD FP square root, D-form, F32 1116def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>; 1117 1118// ASIMD FP square root, Q-form, F16 1119def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>; 1120 1121// ASIMD FP square root, Q-form, F32 1122def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>; 1123 1124// ASIMD FP square root, Q-form, F64 1125def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>; 1126 1127// ASIMD BFloat16 (BF16) instructions 1128// ----------------------------------------------------------------------------- 1129 1130// ASIMD convert, F32 to BF16 1131def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>; 1132 1133// ASIMD dot product 1134def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1135 1136// ASIMD matrix multiply accumulate 1137def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>; 1138 1139// ASIMD multiply accumulate long 1140def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1141 BFMLALTIdx)>; 1142 1143// Scalar convert, F32 to BF16 1144def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>; 1145 1146// ASIMD miscellaneous instructions 1147// ----------------------------------------------------------------------------- 1148 1149// ASIMD bit reverse 1150// ASIMD bitwise insert 1151// ASIMD count 1152// ASIMD duplicate, element 1153// ASIMD extract 1154// ASIMD extract narrow 1155// ASIMD insert, element to element 1156// ASIMD move, FP immed 1157// ASIMD move, integer immed 1158// ASIMD reverse 1159// ASIMD table lookup, 1 or 2 table regs 1160// ASIMD table lookup extension, 1 table reg 1161// ASIMD transfer, element to gen reg 1162// ASIMD transpose 1163// ASIMD unzip/zip 1164// Handled by SchedAlias<WriteV[dq], ...> 1165 1166// ASIMD duplicate, gen reg 1167def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1168 1169// ASIMD extract narrow, saturating 1170def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1171 1172// ASIMD reciprocal and square root estimate, D-form U32 1173def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>; 1174 1175// ASIMD reciprocal and square root estimate, Q-form U32 1176def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>; 1177 1178// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1179def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32, 1180 FRECPEv1i64, FRECPEv2f32, 1181 FRSQRTEv1f16, FRSQRTEv1i32, 1182 FRSQRTEv1i64, FRSQRTEv2f32)>; 1183 1184// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1185def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32, 1186 FRSQRTEv4f16, FRSQRTEv4f32)>; 1187 1188// ASIMD reciprocal and square root estimate, Q-form F16 1189def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1190 1191// ASIMD reciprocal exponent 1192def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>; 1193 1194// ASIMD reciprocal step 1195def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>; 1196 1197// ASIMD table lookup, 3 table regs 1198def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>; 1199 1200// ASIMD table lookup, 4 table regs 1201def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>; 1202 1203// ASIMD table lookup extension, 2 table reg 1204def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1205 1206// ASIMD table lookup extension, 3 table reg 1207def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1208 1209// ASIMD table lookup extension, 4 table reg 1210def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1211 1212// ASIMD transfer, gen reg to element 1213def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>; 1214 1215// ASIMD load instructions 1216// ----------------------------------------------------------------------------- 1217 1218// ASIMD load, 1 element, multiple, 1 reg, D-form 1219def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1220def : InstRW<[N2Write_6cyc_1L, WriteAdr], 1221 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1222 1223// ASIMD load, 1 element, multiple, 1 reg, Q-form 1224def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1225def : InstRW<[N2Write_6cyc_1L, WriteAdr], 1226 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1227 1228// ASIMD load, 1 element, multiple, 2 reg, D-form 1229def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1230def : InstRW<[N2Write_6cyc_2L, WriteAdr], 1231 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1232 1233// ASIMD load, 1 element, multiple, 2 reg, Q-form 1234def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1235def : InstRW<[N2Write_6cyc_2L, WriteAdr], 1236 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1237 1238// ASIMD load, 1 element, multiple, 3 reg, D-form 1239def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1240def : InstRW<[N2Write_6cyc_3L, WriteAdr], 1241 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1242 1243// ASIMD load, 1 element, multiple, 3 reg, Q-form 1244def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1245def : InstRW<[N2Write_6cyc_3L, WriteAdr], 1246 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1247 1248// ASIMD load, 1 element, multiple, 4 reg, D-form 1249def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1250def : InstRW<[N2Write_7cyc_4L, WriteAdr], 1251 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1252 1253// ASIMD load, 1 element, multiple, 4 reg, Q-form 1254def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1255def : InstRW<[N2Write_7cyc_4L, WriteAdr], 1256 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1257 1258// ASIMD load, 1 element, one lane, B/H/S 1259// ASIMD load, 1 element, one lane, D 1260def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1261def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 1262 1263// ASIMD load, 1 element, all lanes, D-form, B/H/S 1264// ASIMD load, 1 element, all lanes, D-form, D 1265def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1266def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1267 1268// ASIMD load, 1 element, all lanes, Q-form 1269def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1270def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1271 1272// ASIMD load, 2 element, multiple, D-form, B/H/S 1273def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1274def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1275 1276// ASIMD load, 2 element, multiple, Q-form, B/H/S 1277// ASIMD load, 2 element, multiple, Q-form, D 1278def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1279def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1280 1281// ASIMD load, 2 element, one lane, B/H 1282// ASIMD load, 2 element, one lane, S 1283// ASIMD load, 2 element, one lane, D 1284def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1285def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; 1286 1287// ASIMD load, 2 element, all lanes, D-form, B/H/S 1288// ASIMD load, 2 element, all lanes, D-form, D 1289def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1290def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1291 1292// ASIMD load, 2 element, all lanes, Q-form 1293def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1294def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1295 1296// ASIMD load, 3 element, multiple, D-form, B/H/S 1297def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1298def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1299 1300// ASIMD load, 3 element, multiple, Q-form, B/H/S 1301def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>; 1302def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; 1303 1304// ASIMD load, 3 element, multiple, Q-form, D 1305def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>; 1306def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; 1307 1308// ASIMD load, 3 element, one lane, B/H 1309// ASIMD load, 3 element, one lane, S 1310// ASIMD load, 3 element, one lane, D 1311def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1312def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 1313 1314// ASIMD load, 3 element, all lanes, D-form, B/H/S 1315// ASIMD load, 3 element, all lanes, D-form, D 1316def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1317def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1318 1319// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1320// ASIMD load, 3 element, all lanes, Q-form, D 1321def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1322def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1323 1324// ASIMD load, 4 element, multiple, D-form, B/H/S 1325def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1326def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1327 1328// ASIMD load, 4 element, multiple, Q-form, B/H/S 1329// ASIMD load, 4 element, multiple, Q-form, D 1330def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1331def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1332 1333// ASIMD load, 4 element, one lane, B/H 1334// ASIMD load, 4 element, one lane, S 1335// ASIMD load, 4 element, one lane, D 1336def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1337def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 1338 1339// ASIMD load, 4 element, all lanes, D-form, B/H/S 1340// ASIMD load, 4 element, all lanes, D-form, D 1341def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1342def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1343 1344// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1345// ASIMD load, 4 element, all lanes, Q-form, D 1346def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1347def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1348 1349// ASIMD store instructions 1350// ----------------------------------------------------------------------------- 1351 1352// ASIMD store, 1 element, multiple, 1 reg, D-form 1353def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1354def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1355 1356// ASIMD store, 1 element, multiple, 1 reg, Q-form 1357def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1358def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1359 1360// ASIMD store, 1 element, multiple, 2 reg, D-form 1361def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1362def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1363 1364// ASIMD store, 1 element, multiple, 2 reg, Q-form 1365def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1366def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1367 1368// ASIMD store, 1 element, multiple, 3 reg, D-form 1369def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1370def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1371 1372// ASIMD store, 1 element, multiple, 3 reg, Q-form 1373def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1374def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1375 1376// ASIMD store, 1 element, multiple, 4 reg, D-form 1377def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1378def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1379 1380// ASIMD store, 1 element, multiple, 4 reg, Q-form 1381def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1382def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1383 1384// ASIMD store, 1 element, one lane, B/H/S 1385// ASIMD store, 1 element, one lane, D 1386def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>; 1387def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 1388 1389// ASIMD store, 2 element, multiple, D-form, B/H/S 1390def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>; 1391def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1392 1393// ASIMD store, 2 element, multiple, Q-form, B/H/S 1394// ASIMD store, 2 element, multiple, Q-form, D 1395def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1396def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1397 1398// ASIMD store, 2 element, one lane, B/H/S 1399// ASIMD store, 2 element, one lane, D 1400def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>; 1401def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 1402 1403// ASIMD store, 3 element, multiple, D-form, B/H/S 1404def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>; 1405def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1406 1407// ASIMD store, 3 element, multiple, Q-form, B/H/S 1408// ASIMD store, 3 element, multiple, Q-form, D 1409def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1410def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1411 1412// ASIMD store, 3 element, one lane, B/H 1413// ASIMD store, 3 element, one lane, S 1414// ASIMD store, 3 element, one lane, D 1415def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>; 1416def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 1417 1418// ASIMD store, 4 element, multiple, D-form, B/H/S 1419def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>; 1420def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1421 1422// ASIMD store, 4 element, multiple, Q-form, B/H/S 1423def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>; 1424def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1425 1426// ASIMD store, 4 element, multiple, Q-form, D 1427def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>; 1428def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; 1429 1430// ASIMD store, 4 element, one lane, B/H/S 1431def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>; 1432def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>; 1433 1434// ASIMD store, 4 element, one lane, D 1435def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>; 1436def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr], (instregex "ST4i(64)_POST$")>; 1437 1438// Cryptography extensions 1439// ----------------------------------------------------------------------------- 1440 1441// Crypto AES ops 1442def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1443 1444// Crypto polynomial (64x64) multiply long 1445def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>; 1446 1447// Crypto SHA1 hash acceleration op 1448// Crypto SHA1 schedule acceleration ops 1449def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1450 1451// Crypto SHA1 hash acceleration ops 1452// Crypto SHA256 hash acceleration ops 1453def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1454 1455// Crypto SHA256 schedule acceleration ops 1456def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1457 1458// Crypto SHA512 hash acceleration ops 1459def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1460 1461// Crypto SHA3 ops 1462def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1463 1464// Crypto SM3 ops 1465def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1466 "^SM3TT[12][AB]$")>; 1467 1468// Crypto SM4 ops 1469def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1470 1471// CRC 1472// ----------------------------------------------------------------------------- 1473 1474def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>; 1475 1476// SVE Predicate instructions 1477// ----------------------------------------------------------------------------- 1478 1479// Loop control, based on predicate 1480def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 1481 BRKB_PPmP, BRKB_PPzP)>; 1482 1483// Loop control, based on predicate and flag setting 1484def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 1485 1486// Loop control, propagating 1487def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1488 1489// Loop control, propagating and flag setting 1490def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 1491 BRKPBS_PPzPP)>; 1492 1493// Loop control, based on GPR 1494def : InstRW<[N2Write_3cyc_1M], 1495 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1496 1497def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; 1498 1499// Loop terminate 1500def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1501 1502// Predicate counting scalar 1503def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1504def : InstRW<[N2Write_2cyc_1M], 1505 (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", 1506 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1507 "^(UQDEC|UQINC)[BHWD]_WPiI$")>; 1508 1509// Predicate counting scalar, active predicate 1510def : InstRW<[N2Write_2cyc_1M], 1511 (instregex "^CNTP_XPP_[BHSD]$", 1512 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", 1513 "^(UQDEC|UQINC)P_WP_[BHSD]$", 1514 "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>; 1515 1516// Predicate counting vector, active predicate 1517def : InstRW<[N2Write_7cyc_1M_1M0_1V], 1518 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; 1519 1520// Predicate logical 1521def : InstRW<[N2Write_1cyc_1M0], 1522 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1523 1524// Predicate logical, flag setting 1525def : InstRW<[N2Write_2cyc_1M0_1M], 1526 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; 1527 1528// Predicate reverse 1529def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; 1530 1531// Predicate select 1532def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>; 1533 1534// Predicate set 1535def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; 1536 1537// Predicate set/initialize, set flags 1538def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>; 1539 1540// Predicate find first/next 1541def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; 1542 1543// Predicate test 1544def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>; 1545 1546// Predicate transpose 1547def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; 1548 1549// Predicate unpack and widen 1550def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 1551 1552// Predicate zip/unzip 1553def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1554 1555// SVE integer instructions 1556// ----------------------------------------------------------------------------- 1557 1558// Arithmetic, absolute diff 1559def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>; 1560 1561// Arithmetic, absolute diff accum 1562def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; 1563 1564// Arithmetic, absolute diff accum long 1565def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; 1566 1567// Arithmetic, absolute diff long 1568def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; 1569 1570// Arithmetic, basic 1571def : InstRW<[N2Write_2cyc_1V], 1572 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", 1573 "^(ADD|SUB)_ZZZ_[BHSD]$", 1574 "^(ADD|SUB|SUBR)_ZI_[BHSD]$", 1575 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1576 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1577 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", 1578 "^SADDLBT_ZZZ_[HSD]$", 1579 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", 1580 "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; 1581 1582// Arithmetic, complex 1583def : InstRW<[N2Write_2cyc_1V], 1584 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", 1585 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", 1586 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", 1587 "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", 1588 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", 1589 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; 1590 1591// Arithmetic, large integer 1592def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; 1593 1594// Arithmetic, pairwise add 1595def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; 1596 1597// Arithmetic, pairwise add and accum long 1598def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; 1599 1600// Arithmetic, shift 1601def : InstRW<[N2Write_2cyc_1V1], 1602 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", 1603 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", 1604 "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", 1605 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", 1606 "^(ASR|LSL|LSR)_ZZI_[BHSD]$", 1607 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; 1608 1609// Arithmetic, shift and accumulate 1610def : InstRW<[N2Write_4cyc_1V1], 1611 (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>; 1612 1613// Arithmetic, shift by immediate 1614// Arithmetic, shift by immediate and insert 1615def : InstRW<[N2Write_2cyc_1V1], 1616 (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>; 1617 1618// Arithmetic, shift complex 1619def : InstRW<[N2Write_4cyc_1V1], 1620 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", 1621 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", 1622 "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", 1623 "^SQSHRU?N[BT]_ZZI_[BHS]$", 1624 "^UQR?SHRN[BT]_ZZI_[BHS]$")>; 1625 1626// Arithmetic, shift right for divide 1627def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>; 1628 1629// Arithmetic, shift rounding 1630def : InstRW<[N2Write_4cyc_1V1], 1631 (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$", 1632 "^[SU]RSHR_ZPmI_[BHSD]$")>; 1633 1634// Bit manipulation 1635def : InstRW<[N2Write_6cyc_2V1], 1636 (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>; 1637 1638// Bitwise select 1639def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; 1640 1641// Count/reverse bits 1642def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; 1643 1644// Broadcast logical bitmask immediate to vector 1645def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>; 1646 1647// Compare and set flags 1648def : InstRW<[N2Write_4cyc_1V0_1M], 1649 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1650 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1651 1652// Complex add 1653def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; 1654 1655// Complex dot product 8-bit element 1656def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 1657 1658// Complex dot product 16-bit element 1659def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 1660 1661// Complex multiply-add B, H, S element size 1662def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$", 1663 "^CMLA_ZZZI_[HS]$")>; 1664 1665// Complex multiply-add D element size 1666def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>; 1667 1668// Conditional extract operations, scalar form 1669def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1670 1671// Conditional extract operations, SIMD&FP scalar and vector forms 1672def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1673 "^COMPACT_ZPZ_[SD]$", 1674 "^SPLICE_ZPZZ?_[BHSD]$")>; 1675 1676// Convert to floating point, 64b to float or convert to double 1677def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>; 1678 1679// Convert to floating point, 64b to half 1680def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>; 1681 1682// Convert to floating point, 32b to single or half 1683def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; 1684 1685// Convert to floating point, 32b to double 1686def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>; 1687 1688// Convert to floating point, 16b to half 1689def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; 1690 1691// Copy, scalar 1692def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; 1693 1694// Copy, scalar SIMD&FP or imm 1695def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", 1696 "^CPY_ZPzI_[BHSD]$")>; 1697 1698// Divides, 32 bit 1699def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; 1700 1701// Divides, 64 bit 1702def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; 1703 1704// Dot product, 8 bit 1705def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>; 1706 1707// Dot product, 8 bit, using signed and unsigned integers 1708def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 1709 1710// Dot product, 16 bit 1711def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>; 1712 1713// Duplicate, immediate and indexed form 1714def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", 1715 "^DUP_ZZI_[BHSDQ]$")>; 1716 1717// Duplicate, scalar form 1718def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1719 1720// Extend, sign or zero 1721def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", 1722 "^[SU]XTH_ZPmZ_[SD]$", 1723 "^[SU]XTW_ZPmZ_[D]$")>; 1724 1725// Extract 1726def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 1727 1728// Extract narrow saturating 1729def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", 1730 "^SQXTUN[BT]_ZZ_[BHS]$")>; 1731 1732// Extract/insert operation, SIMD and FP scalar form 1733def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1734 "^INSR_ZV_[BHSD]$")>; 1735 1736// Extract/insert operation, scalar 1737def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1738 "^INSR_ZR_[BHSD]$")>; 1739 1740// Histogram operations 1741def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", 1742 "^HISTSEG_ZZZ$")>; 1743 1744// Horizontal operations, B, H, S form, immediate operands only 1745def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>; 1746 1747// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 1748// operands only / immediate, scalar operands 1749def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1750 1751// Horizontal operations, D form, immediate operands only 1752def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>; 1753 1754// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 1755// only / immediate, scalar operands 1756def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1757 1758// Logical 1759def : InstRW<[N2Write_2cyc_1V], 1760 (instregex "^(AND|EOR|ORR)_ZI$", 1761 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", 1762 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1763 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; 1764 1765// Max/min, basic and pairwise 1766def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", 1767 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>; 1768 1769// Matching operations 1770def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; 1771 1772// Matrix multiply-accumulate 1773def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1774 1775// Move prefix 1776def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1777 "^MOVPRFX_ZZ$")>; 1778 1779// Multiply, B, H, S element size 1780def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", 1781 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; 1782 1783// Multiply, D element size 1784def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", 1785 "^[SU]MULH_(ZPmZ|ZZZ)_D$")>; 1786 1787// Multiply long 1788def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", 1789 "^[SU]MULL[BT]_ZZZ_[HSD]$")>; 1790 1791// Multiply accumulate, B, H, S element size 1792def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$", 1793 "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; 1794 1795// Multiply accumulate, D element size 1796def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$", 1797 "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; 1798 1799// Multiply accumulate long 1800def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", 1801 "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; 1802 1803// Multiply accumulate saturating doubling long regular 1804def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$", 1805 "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>; 1806 1807// Multiply saturating doubling high, B, H, S element size 1808def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$", 1809 "^SQDMULH_ZZZI_[HS]$")>; 1810 1811// Multiply saturating doubling high, D element size 1812def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 1813 1814// Multiply saturating doubling long 1815def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", 1816 "^SQDMULL[BT]_ZZZI_[SD]$")>; 1817 1818// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 1819// element size 1820def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", 1821 "^SQRDCMLAH_ZZZ_[BHS]$", 1822 "^SQRDML[AS]H_ZZZI_[HS]$", 1823 "^SQRDCMLAH_ZZZI_[HS]$")>; 1824 1825// Multiply saturating rounding doubling regular/complex accumulate, D element 1826// size 1827def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$", 1828 "^SQRDCMLAH_ZZZ_D$")>; 1829 1830// Multiply saturating rounding doubling regular/complex, B, H, S element size 1831def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$", 1832 "^SQRDMULH_ZZZI_[HS]$")>; 1833 1834// Multiply saturating rounding doubling regular/complex, D element size 1835def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>; 1836 1837// Multiply/multiply long, (8x8) polynomial 1838def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$", 1839 "^PMULL[BT]_ZZZ_[HDQ]$")>; 1840 1841// Predicate counting vector 1842def : InstRW<[N2Write_2cyc_1V0], 1843 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>; 1844 1845// Reciprocal estimate 1846def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>; 1847 1848// Reduction, arithmetic, B form 1849def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1850 1851// Reduction, arithmetic, H form 1852def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1853 1854// Reduction, arithmetic, S form 1855def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1856 1857// Reduction, arithmetic, D form 1858def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1859 1860// Reduction, logical 1861def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>; 1862 1863// Reverse, vector 1864def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", 1865 "^REVB_ZPmZ_[HSD]$", 1866 "^REVH_ZPmZ_[SD]$", 1867 "^REVW_ZPmZ_D$")>; 1868 1869// Select, vector form 1870def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; 1871 1872// Table lookup 1873def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; 1874 1875// Table lookup extension 1876def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; 1877 1878// Transpose, vector form 1879def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; 1880 1881// Unpack and extend 1882def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; 1883 1884// Zip/unzip 1885def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1886 1887// SVE floating-point instructions 1888// ----------------------------------------------------------------------------- 1889 1890// Floating point absolute value/difference 1891def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>; 1892 1893// Floating point arithmetic 1894def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", 1895 "^FADDP_ZPmZZ_[HSD]$", 1896 "^FNEG_ZPmZ_[HSD]$", 1897 "^FSUBR_ZPm[IZ]_[HSD]$")>; 1898 1899// Floating point associative add, F16 1900def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>; 1901 1902// Floating point associative add, F32 1903def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>; 1904 1905// Floating point associative add, F64 1906def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 1907 1908// Floating point compare 1909def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", 1910 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", 1911 "^FCM(LE|LT)_PPzZ0_[HSD]$", 1912 "^FCMUO_PPzZZ_[HSD]$")>; 1913 1914// Floating point complex add 1915def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1916 1917// Floating point complex multiply add 1918def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1919 "^FCMLA_ZZZI_[HS]$")>; 1920 1921// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1922def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", 1923 "^FCVTLT_ZPmZ_HtoS$", 1924 "^FCVTNT_ZPmZ_StoH$")>; 1925 1926// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 1927// or F64 to F16) 1928def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", 1929 "^FCVTLT_ZPmZ_StoD$", 1930 "^FCVTNT_ZPmZ_DtoS$")>; 1931 1932// Floating point convert, round to odd 1933def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 1934 1935// Floating point base2 log, F16 1936def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>; 1937 1938// Floating point base2 log, F32 1939def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>; 1940 1941// Floating point base2 log, F64 1942def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>; 1943 1944// Floating point convert to integer, F16 1945def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; 1946 1947// Floating point convert to integer, F32 1948def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; 1949 1950// Floating point convert to integer, F64 1951def : InstRW<[N2Write_3cyc_1V0], 1952 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; 1953 1954// Floating point copy 1955def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", 1956 "^FDUP_ZI_[HSD]$")>; 1957 1958// Floating point divide, F16 1959def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; 1960 1961// Floating point divide, F32 1962def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; 1963 1964// Floating point divide, F64 1965def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; 1966 1967// Floating point min/max pairwise 1968def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; 1969 1970// Floating point min/max 1971def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; 1972 1973// Floating point multiply 1974def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", 1975 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; 1976 1977// Floating point multiply accumulate 1978def : InstRW<[N2Write_4cyc_1V], 1979 (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$", 1980 "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>; 1981 1982// Floating point multiply add/sub accumulate long 1983def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; 1984 1985// Floating point reciprocal estimate, F16 1986def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, 1987 FRSQRTE_ZZ_H)>; 1988 1989// Floating point reciprocal estimate, F32 1990def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, 1991 FRSQRTE_ZZ_S)>; 1992 1993// Floating point reciprocal estimate, F64 1994def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, 1995 FRSQRTE_ZZ_D)>; 1996 1997// Floating point reciprocal step 1998def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 1999 2000// Floating point reduction, F16 2001def : InstRW<[N2Write_6cyc_2V], 2002 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; 2003 2004// Floating point reduction, F32 2005def : InstRW<[N2Write_4cyc_1V], 2006 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; 2007 2008// Floating point reduction, F64 2009def : InstRW<[N2Write_2cyc_1V], 2010 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; 2011 2012// Floating point round to integral, F16 2013def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; 2014 2015// Floating point round to integral, F32 2016def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; 2017 2018// Floating point round to integral, F64 2019def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; 2020 2021// Floating point square root, F16 2022def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>; 2023 2024// Floating point square root, F32 2025def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>; 2026 2027// Floating point square root, F64 2028def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>; 2029 2030// Floating point trigonometric exponentiation 2031def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; 2032 2033// Floating point trigonometric multiply add 2034def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; 2035 2036// Floating point trigonometric, miscellaneous 2037def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 2038 2039// SVE BFloat16 (BF16) instructions 2040// ----------------------------------------------------------------------------- 2041 2042// Convert, F32 to BF16 2043def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2044 2045// Dot product 2046def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2047 2048// Matrix multiply accumulate 2049def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>; 2050 2051// Multiply accumulate long 2052def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 2053 2054// SVE Load instructions 2055// ----------------------------------------------------------------------------- 2056 2057// Load vector 2058def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>; 2059 2060// Load predicate 2061def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2062 2063// Contiguous load, scalar + imm 2064def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$", 2065 "^LD1S?B_[HSD]_IMM_REAL$", 2066 "^LD1S?H_[SD]_IMM_REAL$", 2067 "^LD1S?W_D_IMM_REAL$" )>; 2068// Contiguous load, scalar + scalar 2069def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$", 2070 "^LD1S?B_[HSD]$", 2071 "^LD1S?H_[SD]$", 2072 "^LD1S?W_D$" )>; 2073 2074// Contiguous load broadcast, scalar + imm 2075def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2076 "^LD1RSW_IMM$", 2077 "^LD1RS?B_[HSD]_IMM$", 2078 "^LD1RS?H_[SD]_IMM$", 2079 "^LD1RS?W_D_IMM$", 2080 "^LD1RQ_[BHWD]_IMM$")>; 2081 2082// Contiguous load broadcast, scalar + scalar 2083def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2084 2085// Non temporal load, scalar + imm 2086def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>; 2087 2088// Non temporal load, scalar + scalar 2089def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>; 2090 2091// Non temporal gather load, vector + scalar 32-bit element size 2092def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", 2093 "^LDNT1S[BH]_ZZR_S_REAL$")>; 2094 2095// Non temporal gather load, vector + scalar 64-bit element size 2096def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; 2097def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; 2098 2099// Contiguous first faulting load, scalar + scalar 2100def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", 2101 "^LDFF1S?B_[HSD]_REAL$", 2102 "^LDFF1S?H_[SD]_REAL$", 2103 "^LDFF1S?W_D_REAL$")>; 2104 2105// Contiguous non faulting load, scalar + imm 2106def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", 2107 "^LDNF1S?B_[HSD]_IMM_REAL$", 2108 "^LDNF1S?H_[SD]_IMM_REAL$", 2109 "^LDNF1S?W_D_IMM_REAL$")>; 2110 2111// Contiguous Load two structures to two vectors, scalar + imm 2112def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>; 2113 2114// Contiguous Load two structures to two vectors, scalar + scalar 2115def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>; 2116 2117// Contiguous Load three structures to three vectors, scalar + imm 2118def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>; 2119 2120// Contiguous Load three structures to three vectors, scalar + scalar 2121def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>; 2122 2123// Contiguous Load four structures to four vectors, scalar + imm 2124def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>; 2125 2126// Contiguous Load four structures to four vectors, scalar + scalar 2127def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>; 2128 2129// Gather load, vector + imm, 32-bit element size 2130def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 2131 "^GLD(FF)?1W_IMM_REAL$")>; 2132 2133// Gather load, vector + imm, 64-bit element size 2134def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 2135 "^GLD(FF)?1D_IMM_REAL$")>; 2136 2137// Gather load, 64-bit element size 2138def : InstRW<[N2Write_9cyc_2L_2V], 2139 (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$", 2140 "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$", 2141 "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$", 2142 "^GLD(FF)?1D_(SCALED_)?REAL$")>; 2143 2144// Gather load, 32-bit scaled offset 2145def : InstRW<[N2Write_10cyc_2L_2V], 2146 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", 2147 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 2148 2149// Gather load, 32-bit unpacked unscaled offset 2150def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 2151 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 2152 2153// SVE Store instructions 2154// ----------------------------------------------------------------------------- 2155 2156// Store from predicate reg 2157def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>; 2158 2159// Store from vector reg 2160def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>; 2161 2162// Contiguous store, scalar + imm 2163def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 2164 "^ST1B_[HSD]_IMM$", 2165 "^ST1H_[SD]_IMM$", 2166 "^ST1W_D_IMM$")>; 2167 2168// Contiguous store, scalar + scalar 2169def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 2170def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$", 2171 "^ST1B_[HSD]$", 2172 "^ST1W_D$")>; 2173 2174// Contiguous store two structures from two vectors, scalar + imm 2175def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>; 2176 2177// Contiguous store two structures from two vectors, scalar + scalar 2178def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>; 2179 2180// Contiguous store two structures from two vectors, scalar + scalar 2181def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>; 2182 2183// Contiguous store three structures from three vectors, scalar + imm 2184def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 2185 2186// Contiguous store three structures from three vectors, scalar + scalar 2187def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>; 2188 2189// Contiguous store three structures from three vectors, scalar + scalar 2190def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>; 2191 2192// Contiguous store four structures from four vectors, scalar + imm 2193def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 2194 2195// Contiguous store four structures from four vectors, scalar + scalar 2196def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>; 2197 2198// Contiguous store four structures from four vectors, scalar + scalar 2199def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>; 2200 2201// Non temporal store, scalar + imm 2202def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2203 2204// Non temporal store, scalar + scalar 2205def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2206def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2207 2208// Scatter non temporal store, vector + scalar 32-bit element size 2209def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>; 2210 2211// Scatter non temporal store, vector + scalar 64-bit element size 2212def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>; 2213 2214// Scatter store vector + imm 32-bit element size 2215def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 2216 "^SST1W_IMM$")>; 2217 2218// Scatter store vector + imm 64-bit element size 2219def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 2220 "^SST1D_IMM$")>; 2221 2222// Scatter store, 32-bit scaled offset 2223def : InstRW<[N2Write_4cyc_2L01_2V], 2224 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2225 2226// Scatter store, 32-bit unpacked unscaled offset 2227def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 2228 "^SST1D_[SU]XTW$")>; 2229 2230// Scatter store, 32-bit unpacked scaled offset 2231def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2232 "^SST1D_[SU]XTW_SCALED$")>; 2233 2234// Scatter store, 32-bit unscaled offset 2235def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$", 2236 "^SST1W_[SU]XTW$")>; 2237 2238// Scatter store, 64-bit scaled offset 2239def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$", 2240 "^SST1D_SCALED$")>; 2241 2242// Scatter store, 64-bit unscaled offset 2243def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$", 2244 "^SST1D$")>; 2245 2246// SVE Miscellaneous instructions 2247// ----------------------------------------------------------------------------- 2248 2249// Read first fault register, unpredicated 2250def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; 2251 2252// Read first fault register, predicated 2253def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; 2254 2255// Read first fault register and set flags 2256def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2257 2258// Set first fault register 2259// Write to first fault register 2260def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2261 2262// Prefetch 2263def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2264 2265// SVE Cryptographic instructions 2266// ----------------------------------------------------------------------------- 2267 2268// Crypto AES ops 2269def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2270 "^AESI?MC_ZZ_B$")>; 2271 2272// Crypto SHA3 ops 2273def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2274 "^RAX1_ZZZ_D$", 2275 "^XAR_ZZZI_[BHSD]$")>; 2276 2277// Crypto SM4 ops 2278def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2279 2280} 2281