1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse N2 processors. 10// 11//===----------------------------------------------------------------------===// 12 13def NeoverseN2Model : SchedMachineModel { 14 let IssueWidth = 10; // Micro-ops dispatched at a time. 15 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. 16 let LoadLatency = 4; // Optimistic load latency. 17 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. 18 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 19 let CompleteModel = 1; 20 21 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, 22 [HasSVE2p1, HasPAuthLR, HasCPA, HasCSSC]); 23} 24 25//===----------------------------------------------------------------------===// 26// Define each kind of processor resource and number available on Neoverse N2. 27// Instructions are first fetched and then decoded into internal macro-ops 28// (MOPs). From there, the MOPs proceed through register renaming and dispatch 29// stages. A MOP can be split into two micro-ops further down the pipeline 30// after the decode stage. Once dispatched, micro-ops wait for their operands 31// and issue out-of-order to one of thirteen issue pipelines. Each issue 32// pipeline can accept one micro-op per cycle. 33 34let SchedModel = NeoverseN2Model in { 35 36// Define the (13) issue ports. 37def N2UnitB : ProcResource<2>; // Branch 0/1 38def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1 39def N2UnitM0 : ProcResource<1>; // Integer multicycle 0 40def N2UnitM1 : ProcResource<1>; // Integer multicycle 1 41def N2UnitL01 : ProcResource<2>; // Load/Store 0/1 42def N2UnitL2 : ProcResource<1>; // Load 2 43def N2UnitD : ProcResource<2>; // Store data 0/1 44def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0 45def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1 46 47def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1 48def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1 49def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2 50def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1 51 52// Define commonly used read types. 53 54// No forwarding is provided for these types. 55def : ReadAdvance<ReadI, 0>; 56def : ReadAdvance<ReadISReg, 0>; 57def : ReadAdvance<ReadIEReg, 0>; 58def : ReadAdvance<ReadIM, 0>; 59def : ReadAdvance<ReadIMA, 0>; 60def : ReadAdvance<ReadID, 0>; 61def : ReadAdvance<ReadExtrHi, 0>; 62def : ReadAdvance<ReadAdrBase, 0>; 63def : ReadAdvance<ReadST, 0>; 64def : ReadAdvance<ReadVLD, 0>; 65 66def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 67def : WriteRes<WriteBarrier, []> { let Latency = 1; } 68def : WriteRes<WriteHint, []> { let Latency = 1; } 69def : WriteRes<WriteLDHi, []> { let Latency = 4; } 70 71//===----------------------------------------------------------------------===// 72// Define customized scheduler read/write types specific to the Neoverse N2. 73 74//===----------------------------------------------------------------------===// 75// Define generic 1 micro-op types 76 77def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; } 78def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; } 79def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; } 80def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; } 81def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; } 82def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; } 83def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; } 84def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2; 85 let ReleaseAtCycles = [2]; } 86def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3; 87 let ReleaseAtCycles = [3]; } 88def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5; 89 let ReleaseAtCycles = [5]; } 90def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12; 91 let ReleaseAtCycles = [12]; } 92def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20; 93 let ReleaseAtCycles = [20]; } 94def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; } 95def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; } 96def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; } 97def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; } 98def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; } 99def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; } 100def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; } 101def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; } 102def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; } 103def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; } 104def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7; 105 let ReleaseAtCycles = [7]; } 106def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; } 107def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; } 108def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; } 109def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; } 110def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; } 111def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; } 112def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; } 113def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; } 114def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; } 115def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; } 116def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; } 117def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; } 118def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; } 119 120//===----------------------------------------------------------------------===// 121// Define generic 2 micro-op types 122 123def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> { 124 let Latency = 1; 125 let NumMicroOps = 2; 126} 127 128def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> { 129 let Latency = 6; 130 let NumMicroOps = 2; 131} 132 133def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> { 134 let Latency = 9; 135 let NumMicroOps = 2; 136} 137 138def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> { 139 let Latency = 3; 140 let NumMicroOps = 2; 141} 142 143def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 144 let Latency = 4; 145 let NumMicroOps = 2; 146} 147 148def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 149 let Latency = 5; 150 let NumMicroOps = 2; 151} 152 153def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 154 let Latency = 6; 155 let NumMicroOps = 2; 156} 157 158def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 159 let Latency = 7; 160 let NumMicroOps = 2; 161} 162 163def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> { 164 let Latency = 1; 165 let NumMicroOps = 2; 166} 167 168def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> { 169 let Latency = 5; 170 let NumMicroOps = 2; 171} 172 173def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 174 let Latency = 2; 175 let NumMicroOps = 2; 176} 177 178def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> { 179 let Latency = 4; 180 let NumMicroOps = 2; 181} 182 183def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 184 let Latency = 4; 185 let NumMicroOps = 2; 186} 187 188def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 189 let Latency = 10; 190 let NumMicroOps = 2; 191 let ReleaseAtCycles = [5, 5]; 192} 193 194def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 195 let Latency = 13; 196 let NumMicroOps = 2; 197 let ReleaseAtCycles = [6, 7]; 198} 199 200def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 201 let Latency = 15; 202 let NumMicroOps = 2; 203 let ReleaseAtCycles = [7, 8]; 204} 205 206def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 207 let Latency = 16; 208 let NumMicroOps = 2; 209 let ReleaseAtCycles = [8, 8]; 210} 211 212def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 213 let Latency = 4; 214 let NumMicroOps = 2; 215} 216 217def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 218 let Latency = 6; 219 let NumMicroOps = 2; 220} 221 222def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> { 223 let Latency = 6; 224 let NumMicroOps = 2; 225} 226 227def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 228 let Latency = 8; 229 let NumMicroOps = 2; 230} 231 232def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 233 let Latency = 4; 234 let NumMicroOps = 2; 235} 236 237def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 238 let Latency = 3; 239 let NumMicroOps = 2; 240} 241 242def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 243 let Latency = 2; 244 let NumMicroOps = 2; 245} 246 247def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 248 let Latency = 6; 249 let NumMicroOps = 2; 250} 251 252def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 253 let Latency = 4; 254 let NumMicroOps = 2; 255} 256 257def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 258 let Latency = 5; 259 let NumMicroOps = 2; 260} 261 262def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> { 263 let Latency = 5; 264 let NumMicroOps = 2; 265} 266 267def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> { 268 let Latency = 7; 269 let NumMicroOps = 2; 270} 271 272def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 273 let Latency = 2; 274 let NumMicroOps = 2; 275} 276 277def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> { 278 let Latency = 6; 279 let NumMicroOps = 2; 280} 281 282def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> { 283 let Latency = 6; 284 let NumMicroOps = 2; 285} 286 287def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> { 288 let Latency = 6; 289 let NumMicroOps = 2; 290} 291 292def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 293 let Latency = 9; 294 let NumMicroOps = 2; 295} 296 297def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 298 let Latency = 4; 299 let NumMicroOps = 2; 300} 301 302//===----------------------------------------------------------------------===// 303// Define generic 3 micro-op types 304 305def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> { 306 let Latency = 1; 307 let NumMicroOps = 3; 308} 309 310def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> { 311 let Latency = 2; 312 let NumMicroOps = 3; 313} 314 315def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> { 316 let Latency = 2; 317 let NumMicroOps = 3; 318} 319 320def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> { 321 let Latency = 7; 322 let NumMicroOps = 3; 323} 324 325def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> { 326 let Latency = 8; 327 let NumMicroOps = 3; 328} 329 330def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> { 331 let Latency = 10; 332 let NumMicroOps = 3; 333} 334 335def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 336 let Latency = 2; 337 let NumMicroOps = 3; 338} 339 340def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 341 let Latency = 4; 342 let NumMicroOps = 3; 343} 344 345def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> { 346 let Latency = 6; 347 let NumMicroOps = 3; 348} 349 350def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> { 351 let Latency = 8; 352 let NumMicroOps = 3; 353} 354 355//===----------------------------------------------------------------------===// 356// Define generic 4 micro-op types 357 358def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 359 N2UnitI]> { 360 let Latency = 2; 361 let NumMicroOps = 4; 362} 363 364def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> { 365 let Latency = 6; 366 let NumMicroOps = 4; 367} 368 369def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 370 let Latency = 4; 371 let NumMicroOps = 4; 372} 373 374def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 375 let Latency = 6; 376 let NumMicroOps = 4; 377} 378 379def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 380 let Latency = 8; 381 let NumMicroOps = 4; 382} 383 384def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 385 let Latency = 9; 386 let NumMicroOps = 4; 387} 388 389def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 390 N2UnitV]> { 391 let Latency = 2; 392 let NumMicroOps = 4; 393} 394 395def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 396 N2UnitV]> { 397 let Latency = 4; 398 let NumMicroOps = 4; 399} 400 401def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 402 N2UnitV]> { 403 let Latency = 5; 404 let NumMicroOps = 4; 405} 406 407def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0, 408 N2UnitV0]> { 409 let Latency = 8; 410 let NumMicroOps = 4; 411} 412 413def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 414 N2UnitV1]> { 415 let Latency = 11; 416 let NumMicroOps = 4; 417} 418 419def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 420 N2UnitV1]> { 421 let Latency = 9; 422 let NumMicroOps = 4; 423} 424 425def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 426 N2UnitV1]> { 427 let Latency = 8; 428 let NumMicroOps = 4; 429} 430 431def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 432 N2UnitV1]> { 433 let Latency = 10; 434 let NumMicroOps = 4; 435} 436 437def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 438 let Latency = 10; 439 let NumMicroOps = 4; 440} 441 442def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM, 443 N2UnitM]> { 444 let Latency = 4; 445 let NumMicroOps = 4; 446} 447 448def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> { 449 let Latency = 6; 450 let NumMicroOps = 4; 451} 452 453def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> { 454 let Latency = 7; 455 let NumMicroOps = 4; 456} 457 458//===----------------------------------------------------------------------===// 459// Define generic 5 micro-op types 460 461def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 462 N2UnitI, N2UnitI]> { 463 let Latency = 2; 464 let NumMicroOps = 5; 465} 466 467def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 468 N2UnitV]> { 469 let Latency = 8; 470 let NumMicroOps = 5; 471} 472 473//===----------------------------------------------------------------------===// 474// Define generic 6 micro-op types 475 476def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 477 N2UnitV, N2UnitV, N2UnitV]> { 478 let Latency = 8; 479 let NumMicroOps = 6; 480} 481 482def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 483 N2UnitV, N2UnitV, N2UnitV]> { 484 let Latency = 2; 485 let NumMicroOps = 6; 486} 487 488def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 489 N2UnitV, N2UnitV, N2UnitV]> { 490 let Latency = 6; 491 let NumMicroOps = 6; 492} 493 494def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 495 N2UnitV, N2UnitV, N2UnitV]> { 496 let Latency = 4; 497 let NumMicroOps = 6; 498} 499 500def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 501 N2UnitS, N2UnitS]> { 502 let Latency = 10; 503 let NumMicroOps = 6; 504} 505 506//===----------------------------------------------------------------------===// 507// Define generic 7 micro-op types 508 509def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 510 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 511 let Latency = 8; 512 let NumMicroOps = 7; 513} 514 515//===----------------------------------------------------------------------===// 516// Define generic 8 micro-op types 517 518def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV, 519 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 520 let Latency = 6; 521 let NumMicroOps = 8; 522} 523 524def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 525 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 526 N2UnitV]> { 527 let Latency = 2; 528 let NumMicroOps = 8; 529} 530 531def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 532 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 533 N2UnitV]> { 534 let Latency = 5; 535 let NumMicroOps = 8; 536} 537 538def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 539 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 540 let Latency = 8; 541 let NumMicroOps = 8; 542} 543 544def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 545 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 546 let Latency = 9; 547 let NumMicroOps = 8; 548} 549 550//===----------------------------------------------------------------------===// 551// Define generic 10 micro-op types 552 553def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 554 N2UnitL01, N2UnitL01, N2UnitV, 555 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 556 let Latency = 7; 557 let NumMicroOps = 10; 558} 559 560//===----------------------------------------------------------------------===// 561// Define generic 12 micro-op types 562 563def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 564 N2UnitL01, N2UnitL01, N2UnitL01, 565 N2UnitV, N2UnitV, N2UnitV, N2UnitV, 566 N2UnitV, N2UnitV]> { 567 let Latency = 7; 568 let NumMicroOps = 12; 569} 570 571//===----------------------------------------------------------------------===// 572// Define generic 15 micro-op types 573 574def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 575 N2UnitL01, N2UnitL01, N2UnitS, 576 N2UnitS, N2UnitS, N2UnitS, 577 N2UnitS, N2UnitV, N2UnitV, 578 N2UnitV, N2UnitV, N2UnitV]> { 579 let Latency = 7; 580 let NumMicroOps = 15; 581} 582 583//===----------------------------------------------------------------------===// 584// Define generic 18 micro-op types 585 586def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 587 N2UnitL01, N2UnitL01, N2UnitL01, 588 N2UnitL01, N2UnitL01, N2UnitL01, 589 N2UnitV, N2UnitV, N2UnitV, 590 N2UnitV, N2UnitV, N2UnitV, 591 N2UnitV, N2UnitV, N2UnitV]> { 592 let Latency = 11; 593 let NumMicroOps = 18; 594} 595 596//===----------------------------------------------------------------------===// 597// Define generic 27 micro-op types 598 599def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 600 N2UnitL01, N2UnitL01, N2UnitL01, 601 N2UnitL01, N2UnitL01, N2UnitL01, 602 N2UnitS, N2UnitS, N2UnitS, 603 N2UnitS, N2UnitS, N2UnitS, 604 N2UnitS, N2UnitS, N2UnitS, 605 N2UnitV, N2UnitV, N2UnitV, 606 N2UnitV, N2UnitV, N2UnitV, 607 N2UnitV, N2UnitV, N2UnitV]> { 608 let Latency = 11; 609 let NumMicroOps = 27; 610} 611 612//===----------------------------------------------------------------------===// 613// Define types for arithmetic and logical ops with short shifts 614def N2Write_Arith : SchedWriteVariant<[ 615 SchedVar<IsCheapLSL, [N2Write_1cyc_1I]>, 616 SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>; 617 618def N2Write_Logical: SchedWriteVariant<[ 619 SchedVar<NeoverseNoLSL, [N2Write_1cyc_1I]>, 620 SchedVar<NoSchedPred, [N2Write_2cyc_1M]>]>; 621 622// Miscellaneous 623// ----------------------------------------------------------------------------- 624 625def : InstRW<[WriteI], (instrs COPY)>; 626 627// Branch Instructions 628// ----------------------------------------------------------------------------- 629 630// Branch, immed 631// Compare and branch 632def : SchedAlias<WriteBr, N2Write_1cyc_1B>; 633 634// Branch, register 635def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>; 636 637// Branch and link, immed 638// Branch and link, register 639def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>; 640 641// Arithmetic and Logical Instructions 642// ----------------------------------------------------------------------------- 643 644// ALU, basic 645// ALU, basic, flagset 646def : SchedAlias<WriteI, N2Write_1cyc_1I>; 647 648// ALU, extend and shift 649def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>; 650 651// Arithmetic, LSL shift, shift <= 4 652// Arithmetic, flagset, LSL shift, shift <= 4 653// Arithmetic, LSR/ASR/ROR shift or LSL shift > 4 654def : SchedAlias<WriteISReg, N2Write_Arith>; 655 656// Logical, shift, no flagset 657def : InstRW<[N2Write_1cyc_1I], 658 (instregex "^(AND|BIC|EON|EOR|ORN|ORR)[WX]rs$")>; 659 660// Logical, shift, flagset 661def : InstRW<[N2Write_Logical], (instregex "^(AND|BIC)S[WX]rs$")>; 662 663// Arithmetic, immediate to logical address tag 664def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>; 665 666// Convert floating-point condition flags 667// Flag manipulation instructions 668def : WriteRes<WriteSys, []> { let Latency = 1; } 669 670// Insert Random Tags 671def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>; 672 673// Insert Tag Mask 674// Subtract Pointer 675// Subtract Pointer, flagset 676def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 677 678// Move and shift instructions 679// ----------------------------------------------------------------------------- 680 681def : SchedAlias<WriteImm, N2Write_1cyc_1I>; 682 683// Divide and Multiply Instructions 684// ----------------------------------------------------------------------------- 685 686// SDIV, UDIV 687def : SchedAlias<WriteID32, N2Write_12cyc_1M0>; 688def : SchedAlias<WriteID64, N2Write_20cyc_1M0>; 689 690def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; } 691def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; } 692 693// Multiply high 694def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 695 696// Pointer Authentication Instructions (v8.3 PAC) 697// ----------------------------------------------------------------------------- 698 699// Authenticate data address 700// Authenticate instruction address 701// Compute pointer authentication code for data address 702// Compute pointer authentication code, using generic key 703// Compute pointer authentication code for instruction address 704def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 705 706// Branch and link, register, with pointer authentication 707// Branch, register, with pointer authentication 708// Branch, return, with pointer authentication 709def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 710 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 711 ERETAA, ERETAB)>; 712 713 714// Load register, with pointer authentication 715def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 716 717// Strip pointer authentication code 718def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 719 720// Miscellaneous data-processing instructions 721// ----------------------------------------------------------------------------- 722 723// Bitfield extract, one reg 724// Bitfield extract, two regs 725// NOTE: We don't model the difference between EXTR where both operands are the 726// same (one reg). 727def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>; 728def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>; 729 730// Bitfield move, basic 731def : SchedAlias<WriteIS, N2Write_1cyc_1I>; 732 733// Bitfield move, insert 734def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 735 736// Load instructions 737// ----------------------------------------------------------------------------- 738 739def : SchedAlias<WriteLD, N2Write_4cyc_1L>; 740def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>; 741 742// Load pair, signed immed offset, signed words 743def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>; 744// Load pair, immed post-index or immed pre-index, signed words 745def : InstRW<[WriteAdr, N2Write_5cyc_1M0, WriteLDHi], 746 (instregex "^LDPSW(post|pre)$")>; 747 748// Store instructions 749// ----------------------------------------------------------------------------- 750 751def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>; 752def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>; 753def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>; 754def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57. 755 756// Tag load instructions 757// ----------------------------------------------------------------------------- 758 759// Load allocation tag 760// Load multiple allocation tags 761def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>; 762 763// Tag store instructions 764// ----------------------------------------------------------------------------- 765 766// Store allocation tags to one or two granules, post-index 767// Store allocation tags to one or two granules, pre-index 768// Store allocation tag to one or two granules, zeroing, post-index 769// Store Allocation Tag to one or two granules, zeroing, pre-index 770// Store allocation tag and reg pair to memory, post-Index 771// Store allocation tag and reg pair to memory, pre-Index 772def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 773 ST2GPreIndex, ST2GPostIndex, 774 STZGPreIndex, STZGPostIndex, 775 STZ2GPreIndex, STZ2GPostIndex, 776 STGPpre, STGPpost)>; 777 778// Store allocation tags to one or two granules, signed offset 779// Store allocation tag to two granules, zeroing, signed offset 780// Store allocation tag and reg pair to memory, signed offset 781// Store multiple allocation tags 782def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGi, ST2Gi, STZGi, 783 STZ2Gi, STGPi, STGM, STZGM)>; 784 785// FP data processing instructions 786// ----------------------------------------------------------------------------- 787 788// FP absolute value 789// FP arithmetic 790// FP min/max 791// FP negate 792// FP select 793def : SchedAlias<WriteF, N2Write_2cyc_1V>; 794 795// FP compare 796def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>; 797 798// FP divide, square root 799def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>; 800 801// FP divide, H-form 802def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>; 803// FP divide, S-form 804def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>; 805// FP divide, D-form 806def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>; 807 808// FP square root, H-form 809def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>; 810// FP square root, S-form 811def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>; 812// FP square root, D-form 813def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>; 814 815// FP multiply 816def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; } 817 818// FP multiply accumulate 819def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 820 821// FP round to integral 822def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$", 823 "^FRINT(32|64)[XZ][SD]r$")>; 824 825// FP miscellaneous instructions 826// ----------------------------------------------------------------------------- 827 828// FP convert, from gen to vec reg 829def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 830 831// FP convert, from vec to gen reg 832def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 833 834// FP convert, Javascript from vec to gen reg 835// FP convert, from vec to vec reg 836def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>; 837 838// FP move, immed 839// FP move, register 840def : SchedAlias<WriteFImm, N2Write_2cyc_1V>; 841 842// FP transfer, from gen to low half of vec reg 843def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, 844 FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; 845 846// FP transfer, from gen to high half of vec reg 847def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 848 849// FP transfer, from vec to gen reg 850def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>; 851 852// FP load instructions 853// ----------------------------------------------------------------------------- 854 855// Load vector reg, literal, S/D/Q forms 856// Load vector reg, unscaled immed 857def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$", 858 "^LDUR[BHSDQ]i$")>; 859 860// Load vector reg, immed post-index 861def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>; 862// Load vector reg, immed pre-index 863def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L], (instregex "^LDR[BHSDQ]pre$")>; 864 865// Load vector reg, unsigned immed 866def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 867 868// Load vector reg, register offset, basic 869// Load vector reg, register offset, scale, S/D-form 870// Load vector reg, register offset, extend 871// Load vector reg, register offset, extend, scale, S/D-form 872def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 873 874// Load vector reg, register offset, scale, H/Q-form 875// Load vector reg, register offset, extend, scale, H/Q-form 876def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 877 878// Load vector pair, immed offset, S/D-form 879def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 880 881// Load vector pair, immed offset, Q-form 882def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 883 884// Load vector pair, immed post-index, S/D-form 885// Load vector pair, immed pre-index, S/D-form 886def : InstRW<[WriteAdr, N2Write_6cyc_1I_1L, WriteLDHi], 887 (instregex "^LDP[SD](pre|post)$")>; 888 889// Load vector pair, immed post-index, Q-form 890// Load vector pair, immed pre-index, Q-form 891def : InstRW<[WriteAdr, N2Write_6cyc_2I_2L, WriteLDHi], (instrs LDPQpost, 892 LDPQpre)>; 893 894// FP store instructions 895// ----------------------------------------------------------------------------- 896 897// Store vector reg, unscaled immed, B/H/S/D-form 898// Store vector reg, unscaled immed, Q-form 899def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>; 900 901// Store vector reg, immed post-index, B/H/S/D-form 902// Store vector reg, immed post-index, Q-form 903// Store vector reg, immed pre-index, B/H/S/D-form 904// Store vector reg, immed pre-index, Q-form 905def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase], 906 (instregex "^STR[BHSDQ](pre|post)$")>; 907 908// Store vector reg, unsigned immed, B/H/S/D-form 909// Store vector reg, unsigned immed, Q-form 910def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>; 911 912// Store vector reg, register offset, basic, B/H/S/D-form 913// Store vector reg, register offset, basic, Q-form 914// Store vector reg, register offset, scale, S/D-form 915// Store vector reg, register offset, extend, B/H/S/D-form 916// Store vector reg, register offset, extend, Q-form 917// Store vector reg, register offset, extend, scale, S/D-form 918def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 919 (instregex "^STR[BSD]ro[WX]$")>; 920 921// Store vector reg, register offset, scale, H-form 922// Store vector reg, register offset, scale, Q-form 923// Store vector reg, register offset, extend, scale, H-form 924// Store vector reg, register offset, extend, scale, Q-form 925def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 926 (instregex "^STR[HQ]ro[WX]$")>; 927 928// Store vector pair, immed offset, S-form 929// Store vector pair, immed offset, D-form 930def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>; 931 932// Store vector pair, immed offset, Q-form 933def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>; 934 935// Store vector pair, immed post-index, S-form 936// Store vector pair, immed post-index, D-form 937// Store vector pair, immed pre-index, S-form 938// Store vector pair, immed pre-index, D-form 939def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I], 940 (instregex "^STP[SD](pre|post)$")>; 941 942// Store vector pair, immed post-index, Q-form 943def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>; 944 945// Store vector pair, immed pre-index, Q-form 946def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>; 947 948// ASIMD integer instructions 949// ----------------------------------------------------------------------------- 950 951// ASIMD absolute diff 952// ASIMD absolute diff long 953// ASIMD arith, basic 954// ASIMD arith, complex 955// ASIMD arith, pair-wise 956// ASIMD compare 957// ASIMD logical 958// ASIMD max/min, basic and pair-wise 959def : SchedAlias<WriteVd, N2Write_2cyc_1V>; 960def : SchedAlias<WriteVq, N2Write_2cyc_1V>; 961 962// ASIMD absolute diff accum 963// ASIMD absolute diff accum long 964def : InstRW<[N2Write_4cyc_1V1], 965 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 966 967// ASIMD arith, reduce, 4H/4S 968def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 969 970// ASIMD arith, reduce, 8B/8H 971def : InstRW<[N2Write_4cyc_1V1_1V], 972 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 973 974// ASIMD arith, reduce, 16B 975def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v, 976 UADDLVv16i8v)>; 977 978// ASIMD dot product 979// ASIMD dot product using signed and unsigned integers 980def : InstRW<[N2Write_3cyc_1V], 981 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 982 983// ASIMD matrix multiply-accumulate 984def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>; 985 986// ASIMD max/min, reduce, 4H/4S 987def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$", 988 "^[SU](MAX|MIN)Vv4i32v$")>; 989 990// ASIMD max/min, reduce, 8B/8H 991def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 992 "^[SU](MAX|MIN)Vv8i16v$")>; 993 994// ASIMD max/min, reduce, 16B 995def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 996 997// ASIMD multiply 998def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>; 999 1000// ASIMD multiply accumulate 1001def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>; 1002 1003// ASIMD multiply accumulate high 1004def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 1005 1006// ASIMD multiply accumulate long 1007def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 1008 1009// ASIMD multiply accumulate saturating long 1010def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>; 1011 1012// ASIMD multiply/multiply long (8x8) polynomial, D-form 1013// ASIMD multiply/multiply long (8x8) polynomial, Q-form 1014def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>; 1015 1016// ASIMD multiply long 1017def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>; 1018 1019// ASIMD pairwise add and accumulate long 1020def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>; 1021 1022// ASIMD shift accumulate 1023def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>; 1024 1025// ASIMD shift by immed, basic 1026def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv", 1027 "^SSHLLv", "^SSHRv", "^USHLLv", 1028 "^USHRv")>; 1029 1030// ASIMD shift by immed and insert, basic 1031def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>; 1032 1033// ASIMD shift by immed, complex 1034def : InstRW<[N2Write_4cyc_1V1], 1035 (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv", 1036 "^(SQSHLU?|UQSHL)[bhsd]$", 1037 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1038 "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv", 1039 "^UQSHRNv", "^URSHRv")>; 1040 1041// ASIMD shift by register, basic 1042def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>; 1043 1044// ASIMD shift by register, complex 1045def : InstRW<[N2Write_4cyc_1V1], 1046 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1047 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1048 1049// ASIMD floating-point instructions 1050// ----------------------------------------------------------------------------- 1051 1052// ASIMD FP absolute value/difference 1053// ASIMD FP arith, normal 1054// ASIMD FP compare 1055// ASIMD FP complex add 1056// ASIMD FP max/min, normal 1057// ASIMD FP max/min, pairwise 1058// ASIMD FP negate 1059// Handled by SchedAlias<WriteV[dq], ...> 1060 1061// ASIMD FP complex multiply add 1062def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>; 1063 1064// ASIMD FP convert, long (F16 to F32) 1065def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>; 1066 1067// ASIMD FP convert, long (F32 to F64) 1068def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>; 1069 1070// ASIMD FP convert, narrow (F32 to F16) 1071def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>; 1072 1073// ASIMD FP convert, narrow (F64 to F32) 1074def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32", 1075 "^FCVTXN(v2|v4)f32")>; 1076 1077// ASIMD FP convert, other, D-form F32 and Q-form F64 1078def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 1079 "^[SU]CVTFv2f(32|64)$")>; 1080 1081// ASIMD FP convert, other, D-form F16 and Q-form F32 1082def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 1083 "^[SU]CVTFv4f(16|32)$")>; 1084 1085// ASIMD FP convert, other, Q-form F16 1086def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 1087 "^[SU]CVTFv8f16$")>; 1088 1089// ASIMD FP divide, D-form, F16 1090def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>; 1091 1092// ASIMD FP divide, D-form, F32 1093def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>; 1094 1095// ASIMD FP divide, Q-form, F16 1096def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>; 1097 1098// ASIMD FP divide, Q-form, F32 1099def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>; 1100 1101// ASIMD FP divide, Q-form, F64 1102def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>; 1103 1104// ASIMD FP max/min, reduce, F32 and D-form F16 1105def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1106 1107// ASIMD FP max/min, reduce, Q-form F16 1108def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1109 1110// ASIMD FP multiply 1111def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>; 1112 1113// ASIMD FP multiply accumulate 1114def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>; 1115 1116// ASIMD FP multiply accumulate long 1117def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>; 1118 1119// ASIMD FP round, D-form F32 and Q-form F64 1120def : InstRW<[N2Write_3cyc_1V0], 1121 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1122 "^FRINT[32|64)[XZ]v2f(32|64)$")>; 1123 1124// ASIMD FP round, D-form F16 and Q-form F32 1125def : InstRW<[N2Write_4cyc_2V0], 1126 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1127 "^FRINT(32|64)[XZ]v4f32$")>; 1128 1129 1130// ASIMD FP round, Q-form F16 1131def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1132 1133// ASIMD FP square root, D-form, F16 1134def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>; 1135 1136// ASIMD FP square root, D-form, F32 1137def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>; 1138 1139// ASIMD FP square root, Q-form, F16 1140def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>; 1141 1142// ASIMD FP square root, Q-form, F32 1143def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>; 1144 1145// ASIMD FP square root, Q-form, F64 1146def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>; 1147 1148// ASIMD BFloat16 (BF16) instructions 1149// ----------------------------------------------------------------------------- 1150 1151// ASIMD convert, F32 to BF16 1152def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>; 1153 1154// ASIMD dot product 1155def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1156 1157// ASIMD matrix multiply accumulate 1158def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>; 1159 1160// ASIMD multiply accumulate long 1161def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1162 BFMLALTIdx)>; 1163 1164// Scalar convert, F32 to BF16 1165def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>; 1166 1167// ASIMD miscellaneous instructions 1168// ----------------------------------------------------------------------------- 1169 1170// ASIMD bit reverse 1171// ASIMD bitwise insert 1172// ASIMD count 1173// ASIMD duplicate, element 1174// ASIMD extract 1175// ASIMD extract narrow 1176// ASIMD insert, element to element 1177// ASIMD move, FP immed 1178// ASIMD move, integer immed 1179// ASIMD reverse 1180// ASIMD table lookup, 1 or 2 table regs 1181// ASIMD table lookup extension, 1 table reg 1182// ASIMD transfer, element to gen reg 1183// ASIMD transpose 1184// ASIMD unzip/zip 1185// Handled by SchedAlias<WriteV[dq], ...> 1186 1187// ASIMD duplicate, gen reg 1188def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1189 1190// ASIMD extract narrow, saturating 1191def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1192 1193// ASIMD reciprocal and square root estimate, D-form U32 1194def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>; 1195 1196// ASIMD reciprocal and square root estimate, Q-form U32 1197def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>; 1198 1199// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1200def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32, 1201 FRECPEv1i64, FRECPEv2f32, 1202 FRSQRTEv1f16, FRSQRTEv1i32, 1203 FRSQRTEv1i64, FRSQRTEv2f32)>; 1204 1205// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1206def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32, 1207 FRSQRTEv4f16, FRSQRTEv4f32)>; 1208 1209// ASIMD reciprocal and square root estimate, Q-form F16 1210def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1211 1212// ASIMD reciprocal exponent 1213def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>; 1214 1215// ASIMD reciprocal step 1216def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>; 1217 1218// ASIMD table lookup, 3 table regs 1219def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>; 1220 1221// ASIMD table lookup, 4 table regs 1222def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>; 1223 1224// ASIMD table lookup extension, 2 table reg 1225def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1226 1227// ASIMD table lookup extension, 3 table reg 1228def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1229 1230// ASIMD table lookup extension, 4 table reg 1231def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1232 1233// ASIMD transfer, gen reg to element 1234def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSvi(8|16|32|64)gpr$")>; 1235 1236// ASIMD load instructions 1237// ----------------------------------------------------------------------------- 1238 1239// ASIMD load, 1 element, multiple, 1 reg, D-form 1240def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1241def : InstRW<[WriteAdr, N2Write_6cyc_1L], 1242 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1243 1244// ASIMD load, 1 element, multiple, 1 reg, Q-form 1245def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1246def : InstRW<[WriteAdr, N2Write_6cyc_1L], 1247 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1248 1249// ASIMD load, 1 element, multiple, 2 reg, D-form 1250def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1251def : InstRW<[WriteAdr, N2Write_6cyc_2L], 1252 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1253 1254// ASIMD load, 1 element, multiple, 2 reg, Q-form 1255def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1256def : InstRW<[WriteAdr, N2Write_6cyc_2L], 1257 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1258 1259// ASIMD load, 1 element, multiple, 3 reg, D-form 1260def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1261def : InstRW<[WriteAdr, N2Write_6cyc_3L], 1262 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1263 1264// ASIMD load, 1 element, multiple, 3 reg, Q-form 1265def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1266def : InstRW<[WriteAdr, N2Write_6cyc_3L], 1267 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1268 1269// ASIMD load, 1 element, multiple, 4 reg, D-form 1270def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1271def : InstRW<[WriteAdr, N2Write_7cyc_4L], 1272 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1273 1274// ASIMD load, 1 element, multiple, 4 reg, Q-form 1275def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1276def : InstRW<[WriteAdr, N2Write_7cyc_4L], 1277 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1278 1279// ASIMD load, 1 element, one lane, B/H/S 1280// ASIMD load, 1 element, one lane, D 1281def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1282def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)_POST$")>; 1283 1284// ASIMD load, 1 element, all lanes, D-form, B/H/S 1285// ASIMD load, 1 element, all lanes, D-form, D 1286def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1287def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1288 1289// ASIMD load, 1 element, all lanes, Q-form 1290def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1291def : InstRW<[WriteAdr, N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1292 1293// ASIMD load, 2 element, multiple, D-form, B/H/S 1294def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1295def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1296 1297// ASIMD load, 2 element, multiple, Q-form, B/H/S 1298// ASIMD load, 2 element, multiple, Q-form, D 1299def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1300def : InstRW<[WriteAdr, N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1301 1302// ASIMD load, 2 element, one lane, B/H 1303// ASIMD load, 2 element, one lane, S 1304// ASIMD load, 2 element, one lane, D 1305def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1306def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)_POST$")>; 1307 1308// ASIMD load, 2 element, all lanes, D-form, B/H/S 1309// ASIMD load, 2 element, all lanes, D-form, D 1310def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1311def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1312 1313// ASIMD load, 2 element, all lanes, Q-form 1314def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1315def : InstRW<[WriteAdr, N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1316 1317// ASIMD load, 3 element, multiple, D-form, B/H/S 1318def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1319def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1320 1321// ASIMD load, 3 element, multiple, Q-form, B/H/S 1322def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>; 1323def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)_POST$")>; 1324 1325// ASIMD load, 3 element, multiple, Q-form, D 1326def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>; 1327def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)_POST$")>; 1328 1329// ASIMD load, 3 element, one lane, B/H 1330// ASIMD load, 3 element, one lane, S 1331// ASIMD load, 3 element, one lane, D 1332def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1333def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)_POST$")>; 1334 1335// ASIMD load, 3 element, all lanes, D-form, B/H/S 1336// ASIMD load, 3 element, all lanes, D-form, D 1337def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1338def : InstRW<[WriteAdr, N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1339 1340// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1341// ASIMD load, 3 element, all lanes, Q-form, D 1342def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1343def : InstRW<[WriteAdr, N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1344 1345// ASIMD load, 4 element, multiple, D-form, B/H/S 1346def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1347def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1348 1349// ASIMD load, 4 element, multiple, Q-form, B/H/S 1350// ASIMD load, 4 element, multiple, Q-form, D 1351def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1352def : InstRW<[WriteAdr, N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1353 1354// ASIMD load, 4 element, one lane, B/H 1355// ASIMD load, 4 element, one lane, S 1356// ASIMD load, 4 element, one lane, D 1357def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1358def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)_POST$")>; 1359 1360// ASIMD load, 4 element, all lanes, D-form, B/H/S 1361// ASIMD load, 4 element, all lanes, D-form, D 1362def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1363def : InstRW<[WriteAdr, N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1364 1365// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1366// ASIMD load, 4 element, all lanes, Q-form, D 1367def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1368def : InstRW<[WriteAdr, N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1369 1370// ASIMD store instructions 1371// ----------------------------------------------------------------------------- 1372 1373// ASIMD store, 1 element, multiple, 1 reg, D-form 1374def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1375def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1376 1377// ASIMD store, 1 element, multiple, 1 reg, Q-form 1378def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1379def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1380 1381// ASIMD store, 1 element, multiple, 2 reg, D-form 1382def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1383def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1384 1385// ASIMD store, 1 element, multiple, 2 reg, Q-form 1386def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1387def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1388 1389// ASIMD store, 1 element, multiple, 3 reg, D-form 1390def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1391def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1392 1393// ASIMD store, 1 element, multiple, 3 reg, Q-form 1394def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1395def : InstRW<[WriteAdr, N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1396 1397// ASIMD store, 1 element, multiple, 4 reg, D-form 1398def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1399def : InstRW<[WriteAdr, N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1400 1401// ASIMD store, 1 element, multiple, 4 reg, Q-form 1402def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1403def : InstRW<[WriteAdr, N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1404 1405// ASIMD store, 1 element, one lane, B/H/S 1406// ASIMD store, 1 element, one lane, D 1407def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>; 1408def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)_POST$")>; 1409 1410// ASIMD store, 2 element, multiple, D-form, B/H/S 1411def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>; 1412def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1413 1414// ASIMD store, 2 element, multiple, Q-form, B/H/S 1415// ASIMD store, 2 element, multiple, Q-form, D 1416def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1417def : InstRW<[WriteAdr, N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1418 1419// ASIMD store, 2 element, one lane, B/H/S 1420// ASIMD store, 2 element, one lane, D 1421def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>; 1422def : InstRW<[WriteAdr, N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)_POST$")>; 1423 1424// ASIMD store, 3 element, multiple, D-form, B/H/S 1425def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>; 1426def : InstRW<[WriteAdr, N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1427 1428// ASIMD store, 3 element, multiple, Q-form, B/H/S 1429// ASIMD store, 3 element, multiple, Q-form, D 1430def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1431def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1432 1433// ASIMD store, 3 element, one lane, B/H 1434// ASIMD store, 3 element, one lane, S 1435// ASIMD store, 3 element, one lane, D 1436def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>; 1437def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)_POST$")>; 1438 1439// ASIMD store, 4 element, multiple, D-form, B/H/S 1440def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>; 1441def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1442 1443// ASIMD store, 4 element, multiple, Q-form, B/H/S 1444def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>; 1445def : InstRW<[WriteAdr, N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1446 1447// ASIMD store, 4 element, multiple, Q-form, D 1448def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>; 1449def : InstRW<[WriteAdr, N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)_POST$")>; 1450 1451// ASIMD store, 4 element, one lane, B/H/S 1452def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>; 1453def : InstRW<[WriteAdr, N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)_POST$")>; 1454 1455// ASIMD store, 4 element, one lane, D 1456def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>; 1457def : InstRW<[WriteAdr, N2Write_4cyc_3L01_3V], (instregex "ST4i(64)_POST$")>; 1458 1459// Cryptography extensions 1460// ----------------------------------------------------------------------------- 1461 1462// Crypto AES ops 1463def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1464 1465// Crypto polynomial (64x64) multiply long 1466def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>; 1467 1468// Crypto SHA1 hash acceleration op 1469// Crypto SHA1 schedule acceleration ops 1470def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1471 1472// Crypto SHA1 hash acceleration ops 1473// Crypto SHA256 hash acceleration ops 1474def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1475 1476// Crypto SHA256 schedule acceleration ops 1477def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1478 1479// Crypto SHA512 hash acceleration ops 1480def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1481 1482// Crypto SHA3 ops 1483def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1484 1485// Crypto SM3 ops 1486def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1487 "^SM3TT[12][AB]$")>; 1488 1489// Crypto SM4 ops 1490def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1491 1492// CRC 1493// ----------------------------------------------------------------------------- 1494 1495def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>; 1496 1497// SVE Predicate instructions 1498// ----------------------------------------------------------------------------- 1499 1500// Loop control, based on predicate 1501def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 1502 BRKB_PPmP, BRKB_PPzP)>; 1503 1504// Loop control, based on predicate and flag setting 1505def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 1506 1507// Loop control, propagating 1508def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1509 1510// Loop control, propagating and flag setting 1511def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 1512 BRKPBS_PPzPP)>; 1513 1514// Loop control, based on GPR 1515def : InstRW<[N2Write_3cyc_1M], 1516 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1517 1518def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; 1519 1520// Loop terminate 1521def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1522 1523// Predicate counting scalar 1524def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1525def : InstRW<[N2Write_2cyc_1M], 1526 (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", 1527 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1528 "^(UQDEC|UQINC)[BHWD]_WPiI$")>; 1529 1530// Predicate counting scalar, active predicate 1531def : InstRW<[N2Write_2cyc_1M], 1532 (instregex "^CNTP_XPP_[BHSD]$", 1533 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", 1534 "^(UQDEC|UQINC)P_WP_[BHSD]$", 1535 "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>; 1536 1537// Predicate counting vector, active predicate 1538def : InstRW<[N2Write_7cyc_1M_1M0_1V], 1539 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; 1540 1541// Predicate logical 1542def : InstRW<[N2Write_1cyc_1M0], 1543 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1544 1545// Predicate logical, flag setting 1546def : InstRW<[N2Write_2cyc_1M0_1M], 1547 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; 1548 1549// Predicate reverse 1550def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; 1551 1552// Predicate select 1553def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>; 1554 1555// Predicate set 1556def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; 1557 1558// Predicate set/initialize, set flags 1559def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>; 1560 1561// Predicate find first/next 1562def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; 1563 1564// Predicate test 1565def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>; 1566 1567// Predicate transpose 1568def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; 1569 1570// Predicate unpack and widen 1571def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 1572 1573// Predicate zip/unzip 1574def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1575 1576// SVE integer instructions 1577// ----------------------------------------------------------------------------- 1578 1579// Arithmetic, absolute diff 1580def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]", 1581 "^[SU]ABD_ZPZZ_[BHSD]")>; 1582 1583// Arithmetic, absolute diff accum 1584def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; 1585 1586// Arithmetic, absolute diff accum long 1587def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; 1588 1589// Arithmetic, absolute diff long 1590def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; 1591 1592// Arithmetic, basic 1593def : InstRW<[N2Write_2cyc_1V], 1594 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 1595 "^(ADD|SUB)_ZZZ_[BHSD]", 1596 "^(ADD|SUB|SUBR)_ZPZZ_[BHSD]", 1597 "^(ADD|SUB|SUBR)_ZI_[BHSD]", 1598 "^ADR_[SU]XTW_ZZZ_D_[0123]", 1599 "^ADR_LSL_ZZZ_[SD]_[0123]", 1600 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", 1601 "^SADDLBT_ZZZ_[HSD]", 1602 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", 1603 "^SSUBL(BT|TB)_ZZZ_[HSD]")>; 1604 1605// Arithmetic, complex 1606def : InstRW<[N2Write_2cyc_1V], 1607 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]", 1608 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]", 1609 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]", 1610 "^[SU]Q(ADD|SUB)_ZI_[BHSD]", 1611 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]", 1612 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]")>; 1613 1614// Arithmetic, large integer 1615def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; 1616 1617// Arithmetic, pairwise add 1618def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; 1619 1620// Arithmetic, pairwise add and accum long 1621def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; 1622 1623// Arithmetic, shift 1624def : InstRW<[N2Write_2cyc_1V1], 1625 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]", 1626 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]", 1627 "^(ASR|LSL|LSR)_ZPmI_[BHSD]", 1628 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]", 1629 "^(ASR|LSL|LSR)_ZZI_[BHSD]", 1630 "^(ASR|LSL|LSR)_ZPZ[IZ]_[BHSD]", 1631 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]")>; 1632 1633// Arithmetic, shift and accumulate 1634def : InstRW<[N2Write_4cyc_1V1], 1635 (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>; 1636 1637// Arithmetic, shift by immediate 1638// Arithmetic, shift by immediate and insert 1639def : InstRW<[N2Write_2cyc_1V1], 1640 (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>; 1641 1642// Arithmetic, shift complex 1643def : InstRW<[N2Write_4cyc_1V1], 1644 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]", 1645 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]", 1646 "^[SU]QR?SHL_ZPZZ_[BHSD]", 1647 "^(SQSHL|SQSHLU|UQSHL)_(ZPmI|ZPZI)_[BHSD]", 1648 "^SQSHRU?N[BT]_ZZI_[BHS]", 1649 "^UQR?SHRN[BT]_ZZI_[BHS]")>; 1650 1651// Arithmetic, shift right for divide 1652def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_(ZPmI|ZPZI)_[BHSD]")>; 1653 1654// Arithmetic, shift rounding 1655def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]RSHLR?_ZPmZ_[BHSD]", 1656 "^[SU]RSHL_ZPZZ_[BHSD]", 1657 "^[SU]RSHR_(ZPmI|ZPZI)_[BHSD]")>; 1658 1659// Bit manipulation 1660def : InstRW<[N2Write_6cyc_2V1], (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]")>; 1661 1662// Bitwise select 1663def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; 1664 1665// Count/reverse bits 1666def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]")>; 1667 1668// Broadcast logical bitmask immediate to vector 1669def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>; 1670 1671// Compare and set flags 1672def : InstRW<[N2Write_4cyc_1V0_1M], 1673 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1674 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1675 1676// Complex add 1677def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; 1678 1679// Complex dot product 8-bit element 1680def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 1681 1682// Complex dot product 16-bit element 1683def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 1684 1685// Complex multiply-add B, H, S element size 1686def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$", 1687 "^CMLA_ZZZI_[HS]$")>; 1688 1689// Complex multiply-add D element size 1690def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>; 1691 1692// Conditional extract operations, scalar form 1693def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1694 1695// Conditional extract operations, SIMD&FP scalar and vector forms 1696def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1697 "^COMPACT_ZPZ_[SD]$", 1698 "^SPLICE_ZPZZ?_[BHSD]$")>; 1699 1700// Convert to floating point, 64b to float or convert to double 1701def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[HSD]", 1702 "^[SU]CVTF_ZPmZ_StoD")>; 1703 1704// Convert to floating point, 32b to single or half 1705def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]")>; 1706 1707// Convert to floating point, 16b to half 1708def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH")>; 1709 1710// Copy, scalar 1711def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; 1712 1713// Copy, scalar SIMD&FP or imm 1714def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", 1715 "^CPY_ZPzI_[BHSD]$")>; 1716 1717// Divides, 32 bit 1718def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S", 1719 "^[SU]DIV_ZPZZ_S")>; 1720 1721// Divides, 64 bit 1722def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D", 1723 "^[SU]DIV_ZPZZ_D")>; 1724 1725// Dot product, 8 bit 1726def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>; 1727 1728// Dot product, 8 bit, using signed and unsigned integers 1729def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 1730 1731// Dot product, 16 bit 1732def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>; 1733 1734// Duplicate, immediate and indexed form 1735def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", 1736 "^DUP_ZZI_[BHSDQ]$")>; 1737 1738// Duplicate, scalar form 1739def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1740 1741// Extend, sign or zero 1742def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]", 1743 "^[SU]XTH_ZPmZ_[SD]", 1744 "^[SU]XTW_ZPmZ_[D]")>; 1745 1746// Extract 1747def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 1748 1749// Extract narrow saturating 1750def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", 1751 "^SQXTUN[BT]_ZZ_[BHS]$")>; 1752 1753// Extract/insert operation, SIMD and FP scalar form 1754def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1755 "^INSR_ZV_[BHSD]$")>; 1756 1757// Extract/insert operation, scalar 1758def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1759 "^INSR_ZR_[BHSD]$")>; 1760 1761// Histogram operations 1762def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", 1763 "^HISTSEG_ZZZ$")>; 1764 1765// Horizontal operations, B, H, S form, immediate operands only 1766def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>; 1767 1768// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 1769// operands only / immediate, scalar operands 1770def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1771 1772// Horizontal operations, D form, immediate operands only 1773def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>; 1774 1775// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 1776// only / immediate, scalar operands 1777def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1778 1779// Logical 1780def : InstRW<[N2Write_2cyc_1V], 1781 (instregex "^(AND|EOR|ORR)_ZI", 1782 "^(AND|BIC|EOR|ORR)_ZZZ", 1783 "^EOR(BT|TB)_ZZZ_[BHSD]", 1784 "^(AND|BIC|EOR|NOT|ORR)_(ZPmZ|ZPZZ)_[BHSD]", 1785 "^NOT_ZPmZ_[BHSD]")>; 1786 1787// Max/min, basic and pairwise 1788def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]", 1789 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]", 1790 "^[SU](MAX|MIN)_ZPZZ_[BHSD]")>; 1791 1792// Matching operations 1793def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; 1794 1795// Matrix multiply-accumulate 1796def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1797 1798// Move prefix 1799def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1800 "^MOVPRFX_ZZ$")>; 1801 1802// Multiply, B, H, S element size 1803def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]", 1804 "^MUL_ZPZZ_[BHS]", 1805 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]", 1806 "^[SU]MULH_ZPZZ_[BHS]")>; 1807 1808// Multiply, D element size 1809def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D", 1810 "^MUL_ZPZZ_D", 1811 "^[SU]MULH_(ZPmZ|ZZZ)_D", 1812 "^[SU]MULH_ZPZZ_D")>; 1813 1814// Multiply long 1815def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", 1816 "^[SU]MULL[BT]_ZZZ_[HSD]$")>; 1817 1818// Multiply accumulate, B, H, S element size 1819def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$", 1820 "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_[BHS]")>; 1821 1822// Multiply accumulate, D element size 1823def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$", 1824 "^(ML[AS]|MAD|MSB)_(ZPmZZ|ZPZZZ)_D")>; 1825 1826// Multiply accumulate long 1827def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", 1828 "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; 1829 1830// Multiply accumulate saturating doubling long regular 1831def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$", 1832 "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>; 1833 1834// Multiply saturating doubling high, B, H, S element size 1835def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$", 1836 "^SQDMULH_ZZZI_[HS]$")>; 1837 1838// Multiply saturating doubling high, D element size 1839def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 1840 1841// Multiply saturating doubling long 1842def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", 1843 "^SQDMULL[BT]_ZZZI_[SD]$")>; 1844 1845// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 1846// element size 1847def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", 1848 "^SQRDCMLAH_ZZZ_[BHS]$", 1849 "^SQRDML[AS]H_ZZZI_[HS]$", 1850 "^SQRDCMLAH_ZZZI_[HS]$")>; 1851 1852// Multiply saturating rounding doubling regular/complex accumulate, D element 1853// size 1854def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$", 1855 "^SQRDCMLAH_ZZZ_D$")>; 1856 1857// Multiply saturating rounding doubling regular/complex, B, H, S element size 1858def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$", 1859 "^SQRDMULH_ZZZI_[HS]$")>; 1860 1861// Multiply saturating rounding doubling regular/complex, D element size 1862def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>; 1863 1864// Multiply/multiply long, (8x8) polynomial 1865def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$", 1866 "^PMULL[BT]_ZZZ_[HDQ]$")>; 1867 1868// Predicate counting vector 1869def : InstRW<[N2Write_2cyc_1V0], 1870 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>; 1871 1872// Reciprocal estimate 1873def : InstRW<[N2Write_4cyc_2V0], (instregex "^URECPE_ZPmZ_S", "^URSQRTE_ZPmZ_S")>; 1874 1875// Reduction, arithmetic, B form 1876def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1877 1878// Reduction, arithmetic, H form 1879def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1880 1881// Reduction, arithmetic, S form 1882def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1883 1884// Reduction, arithmetic, D form 1885def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1886 1887// Reduction, logical 1888def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>; 1889 1890// Reverse, vector 1891def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", 1892 "^REVB_ZPmZ_[HSD]$", 1893 "^REVH_ZPmZ_[SD]$", 1894 "^REVW_ZPmZ_D$")>; 1895 1896// Select, vector form 1897def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; 1898 1899// Table lookup 1900def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; 1901 1902// Table lookup extension 1903def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; 1904 1905// Transpose, vector form 1906def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; 1907 1908// Unpack and extend 1909def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; 1910 1911// Zip/unzip 1912def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1913 1914// SVE floating-point instructions 1915// ----------------------------------------------------------------------------- 1916 1917// Floating point absolute value/difference 1918def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]", 1919 "^FABD_ZPZZ_[HSD]", 1920 "^FABS_ZPmZ_[HSD]")>; 1921 1922// Floating point arithmetic 1923def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]", 1924 "^F(ADD|SUB)_ZPZ[IZ]_[HSD]", 1925 "^FADDP_ZPmZZ_[HSD]", 1926 "^FNEG_ZPmZ_[HSD]", 1927 "^FSUBR_ZPm[IZ]_[HSD]", 1928 "^FSUBR_(ZPZI|ZPZZ)_[HSD]")>; 1929 1930// Floating point associative add, F16 1931def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>; 1932 1933// Floating point associative add, F32 1934def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>; 1935 1936// Floating point associative add, F64 1937def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 1938 1939// Floating point compare 1940def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", 1941 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", 1942 "^FCM(LE|LT)_PPzZ0_[HSD]$", 1943 "^FCMUO_PPzZZ_[HSD]$")>; 1944 1945// Floating point complex add 1946def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1947 1948// Floating point complex multiply add 1949def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1950 "^FCMLA_ZZZI_[HS]$")>; 1951 1952// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1953def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)", 1954 "^FCVTLT_ZPmZ_HtoS", 1955 "^FCVTNT_ZPmZ_StoH")>; 1956 1957// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 1958// or F64 to F16) 1959def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)", 1960 "^FCVTLT_ZPmZ_StoD", 1961 "^FCVTNT_ZPmZ_DtoS")>; 1962 1963// Floating point convert, round to odd 1964def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 1965 1966// Floating point base2 log, F16 1967def : InstRW<[N2Write_6cyc_4V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_H")>; 1968 1969// Floating point base2 log, F32 1970def : InstRW<[N2Write_4cyc_2V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_S")>; 1971 1972// Floating point base2 log, F64 1973def : InstRW<[N2Write_3cyc_1V0], (instregex "^FLOGB_(ZPmZ|ZPZZ)_D")>; 1974 1975// Floating point convert to integer, F16 1976def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH")>; 1977 1978// Floating point convert to integer, F32 1979def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)")>; 1980 1981// Floating point convert to integer, F64 1982def : InstRW<[N2Write_3cyc_1V0], 1983 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)")>; 1984 1985// Floating point copy 1986def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", 1987 "^FDUP_ZI_[HSD]$")>; 1988 1989// Floating point divide, F16 1990def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_H")>; 1991 1992// Floating point divide, F32 1993def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_S")>; 1994 1995// Floating point divide, F64 1996def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_(ZPmZ|ZPZZ)_D")>; 1997 1998// Floating point min/max pairwise 1999def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]")>; 2000 2001// Floating point min/max 2002def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]", 2003 "^F(MAX|MIN)(NM)?_ZPZ[IZ]_[HSD]")>; 2004 2005// Floating point multiply 2006def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]", 2007 "^FMULX_ZPZZ_[HSD]", 2008 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]", 2009 "^FMUL_ZPZ[IZ]_[HSD]")>; 2010 2011// Floating point multiply accumulate 2012def : InstRW<[N2Write_4cyc_1V], (instregex "^F(N?M(AD|SB)|N?ML[AS])_ZPmZZ_[HSD]$", 2013 "^FN?ML[AS]_ZPZZZ_[HSD]", 2014 "^FML[AS]_ZZZI_[HSD]$")>; 2015 2016// Floating point multiply add/sub accumulate long 2017def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; 2018 2019// Floating point reciprocal estimate, F16 2020def : InstRW<[N2Write_6cyc_4V0], (instregex "^FR(ECP|SQRT)E_ZZ_H", "^FRECPX_ZPmZ_H")>; 2021 2022// Floating point reciprocal estimate, F32 2023def : InstRW<[N2Write_4cyc_2V0], (instregex "^FR(ECP|SQRT)E_ZZ_S", "^FRECPX_ZPmZ_S")>; 2024 2025// Floating point reciprocal estimate, F64 2026def : InstRW<[N2Write_3cyc_1V0], (instregex "^FR(ECP|SQRT)E_ZZ_D", "^FRECPX_ZPmZ_D")>; 2027 2028// Floating point reciprocal step 2029def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 2030 2031// Floating point reduction, F16 2032def : InstRW<[N2Write_6cyc_2V], 2033 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; 2034 2035// Floating point reduction, F32 2036def : InstRW<[N2Write_4cyc_1V], 2037 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; 2038 2039// Floating point reduction, F64 2040def : InstRW<[N2Write_2cyc_1V], 2041 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; 2042 2043// Floating point round to integral, F16 2044def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H")>; 2045 2046// Floating point round to integral, F32 2047def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S")>; 2048 2049// Floating point round to integral, F64 2050def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D")>; 2051 2052// Floating point square root, F16 2053def : InstRW<[N2Write_13cyc_1V0], (instregex "^FSQRT_ZPmZ_H")>; 2054 2055// Floating point square root, F32 2056def : InstRW<[N2Write_10cyc_1V0], (instregex "^FSQRT_ZPmZ_S")>; 2057 2058// Floating point square root, F64 2059def : InstRW<[N2Write_16cyc_1V0], (instregex "^FSQRT_ZPmZ_D")>; 2060 2061// Floating point trigonometric exponentiation 2062def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; 2063 2064// Floating point trigonometric multiply add 2065def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; 2066 2067// Floating point trigonometric, miscellaneous 2068def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 2069 2070// SVE BFloat16 (BF16) instructions 2071// ----------------------------------------------------------------------------- 2072 2073// Convert, F32 to BF16 2074def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2075 2076// Dot product 2077def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2078 2079// Matrix multiply accumulate 2080def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>; 2081 2082// Multiply accumulate long 2083def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZZ(I)?$")>; 2084 2085// SVE Load instructions 2086// ----------------------------------------------------------------------------- 2087 2088// Load vector 2089def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>; 2090 2091// Load predicate 2092def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2093 2094// Contiguous load, scalar + imm 2095def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM$", 2096 "^LD1S?B_[HSD]_IMM$", 2097 "^LD1S?H_[SD]_IMM$", 2098 "^LD1S?W_D_IMM$" )>; 2099// Contiguous load, scalar + scalar 2100def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$", 2101 "^LD1S?B_[HSD]$", 2102 "^LD1S?H_[SD]$", 2103 "^LD1S?W_D$" )>; 2104 2105// Contiguous load broadcast, scalar + imm 2106def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2107 "^LD1RSW_IMM$", 2108 "^LD1RS?B_[HSD]_IMM$", 2109 "^LD1RS?H_[SD]_IMM$", 2110 "^LD1RS?W_D_IMM$", 2111 "^LD1RQ_[BHWD]_IMM$")>; 2112 2113// Contiguous load broadcast, scalar + scalar 2114def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2115 2116// Non temporal load, scalar + imm 2117def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>; 2118 2119// Non temporal load, scalar + scalar 2120def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>; 2121 2122// Non temporal gather load, vector + scalar 32-bit element size 2123def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S$", 2124 "^LDNT1S[BH]_ZZR_S$")>; 2125 2126// Non temporal gather load, vector + scalar 64-bit element size 2127def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D$")>; 2128def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D)>; 2129 2130// Contiguous first faulting load, scalar + scalar 2131def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]$", 2132 "^LDFF1S?B_[HSD]$", 2133 "^LDFF1S?H_[SD]$", 2134 "^LDFF1S?W_D$")>; 2135 2136// Contiguous non faulting load, scalar + imm 2137def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM$", 2138 "^LDNF1S?B_[HSD]_IMM$", 2139 "^LDNF1S?H_[SD]_IMM$", 2140 "^LDNF1S?W_D_IMM$")>; 2141 2142// Contiguous Load two structures to two vectors, scalar + imm 2143def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>; 2144 2145// Contiguous Load two structures to two vectors, scalar + scalar 2146def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>; 2147 2148// Contiguous Load three structures to three vectors, scalar + imm 2149def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>; 2150 2151// Contiguous Load three structures to three vectors, scalar + scalar 2152def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>; 2153 2154// Contiguous Load four structures to four vectors, scalar + imm 2155def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>; 2156 2157// Contiguous Load four structures to four vectors, scalar + scalar 2158def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>; 2159 2160// Gather load, vector + imm, 32-bit element size 2161def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM$", 2162 "^GLD(FF)?1W_IMM$")>; 2163 2164// Gather load, vector + imm, 64-bit element size 2165def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM$", 2166 "^GLD(FF)?1D_IMM$")>; 2167 2168// Gather load, 64-bit element size 2169def : InstRW<[N2Write_9cyc_2L_2V], 2170 (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW(_SCALED)?$", 2171 "^GLD(FF)?1S?[BHW]_D(_SCALED)?$", 2172 "^GLD(FF)?1D_[SU]XTW(_SCALED)?$", 2173 "^GLD(FF)?1D(_SCALED)?$")>; 2174 2175// Gather load, 32-bit scaled offset 2176def : InstRW<[N2Write_10cyc_2L_2V], 2177 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED$", 2178 "^GLD(FF)?1W_[SU]XTW_SCALED")>; 2179 2180// Gather load, 32-bit unpacked unscaled offset 2181def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW$", 2182 "^GLD(FF)?1W_[SU]XTW$")>; 2183 2184// SVE Store instructions 2185// ----------------------------------------------------------------------------- 2186 2187// Store from predicate reg 2188def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>; 2189 2190// Store from vector reg 2191def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>; 2192 2193// Contiguous store, scalar + imm 2194def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 2195 "^ST1B_[HSD]_IMM$", 2196 "^ST1H_[SD]_IMM$", 2197 "^ST1W_D_IMM$")>; 2198 2199// Contiguous store, scalar + scalar 2200def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 2201def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$", 2202 "^ST1B_[HSD]$", 2203 "^ST1W_D$")>; 2204 2205// Contiguous store two structures from two vectors, scalar + imm 2206def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>; 2207 2208// Contiguous store two structures from two vectors, scalar + scalar 2209def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>; 2210 2211// Contiguous store two structures from two vectors, scalar + scalar 2212def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>; 2213 2214// Contiguous store three structures from three vectors, scalar + imm 2215def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 2216 2217// Contiguous store three structures from three vectors, scalar + scalar 2218def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>; 2219 2220// Contiguous store three structures from three vectors, scalar + scalar 2221def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>; 2222 2223// Contiguous store four structures from four vectors, scalar + imm 2224def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 2225 2226// Contiguous store four structures from four vectors, scalar + scalar 2227def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>; 2228 2229// Contiguous store four structures from four vectors, scalar + scalar 2230def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>; 2231 2232// Non temporal store, scalar + imm 2233def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2234 2235// Non temporal store, scalar + scalar 2236def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2237def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2238 2239// Scatter non temporal store, vector + scalar 32-bit element size 2240def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>; 2241 2242// Scatter non temporal store, vector + scalar 64-bit element size 2243def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>; 2244 2245// Scatter store vector + imm 32-bit element size 2246def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 2247 "^SST1W_IMM$")>; 2248 2249// Scatter store vector + imm 64-bit element size 2250def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 2251 "^SST1D_IMM$")>; 2252 2253// Scatter store, 32-bit scaled offset 2254def : InstRW<[N2Write_4cyc_2L01_2V], 2255 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2256 2257// Scatter store, 32-bit unpacked unscaled offset 2258def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 2259 "^SST1D_[SU]XTW$")>; 2260 2261// Scatter store, 32-bit unpacked scaled offset 2262def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2263 "^SST1D_[SU]XTW_SCALED$")>; 2264 2265// Scatter store, 32-bit unscaled offset 2266def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$", 2267 "^SST1W_[SU]XTW$")>; 2268 2269// Scatter store, 64-bit scaled offset 2270def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$", 2271 "^SST1D_SCALED$")>; 2272 2273// Scatter store, 64-bit unscaled offset 2274def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$", 2275 "^SST1D$")>; 2276 2277// SVE Miscellaneous instructions 2278// ----------------------------------------------------------------------------- 2279 2280// Read first fault register, unpredicated 2281def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P)>; 2282 2283// Read first fault register, predicated 2284def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz)>; 2285 2286// Read first fault register and set flags 2287def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2288 2289// Set first fault register 2290// Write to first fault register 2291def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2292 2293// Prefetch 2294def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2295 2296// SVE Cryptographic instructions 2297// ----------------------------------------------------------------------------- 2298 2299// Crypto AES ops 2300def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2301 "^AESI?MC_ZZ_B$")>; 2302 2303// Crypto SHA3 ops 2304def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2305 "^RAX1_ZZZ_D$", 2306 "^XAR_ZZZI_[BHSD]$")>; 2307 2308// Crypto SM4 ops 2309def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2310 2311} 2312