1//=- AArch64SchedNeoverseN2.td - NeoverseN2 Scheduling Defs --*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Arm Neoverse N2 processors. 10// 11//===----------------------------------------------------------------------===// 12 13def NeoverseN2Model : SchedMachineModel { 14 let IssueWidth = 10; // Micro-ops dispatched at a time. 15 let MicroOpBufferSize = 160; // Entries in micro-op re-order buffer. 16 let LoadLatency = 4; // Optimistic load latency. 17 let MispredictPenalty = 10; // Extra cycles for mispredicted branch. 18 let LoopMicroOpBufferSize = 16; // NOTE: Copied from Cortex-A57. 19 let CompleteModel = 1; 20 21 list<Predicate> UnsupportedFeatures = SMEUnsupported.F; 22} 23 24//===----------------------------------------------------------------------===// 25// Define each kind of processor resource and number available on Neoverse N2. 26// Instructions are first fetched and then decoded into internal macro-ops 27// (MOPs). From there, the MOPs proceed through register renaming and dispatch 28// stages. A MOP can be split into two micro-ops further down the pipeline 29// after the decode stage. Once dispatched, micro-ops wait for their operands 30// and issue out-of-order to one of thirteen issue pipelines. Each issue 31// pipeline can accept one micro-op per cycle. 32 33let SchedModel = NeoverseN2Model in { 34 35// Define the (13) issue ports. 36def N2UnitB : ProcResource<2>; // Branch 0/1 37def N2UnitS : ProcResource<2>; // Integer single Cycle 0/1 38def N2UnitM0 : ProcResource<1>; // Integer multicycle 0 39def N2UnitM1 : ProcResource<1>; // Integer multicycle 1 40def N2UnitL01 : ProcResource<2>; // Load/Store 0/1 41def N2UnitL2 : ProcResource<1>; // Load 2 42def N2UnitD : ProcResource<2>; // Store data 0/1 43def N2UnitV0 : ProcResource<1>; // FP/ASIMD 0 44def N2UnitV1 : ProcResource<1>; // FP/ASIMD 1 45 46def N2UnitV : ProcResGroup<[N2UnitV0, N2UnitV1]>; // FP/ASIMD 0/1 47def N2UnitM : ProcResGroup<[N2UnitM0, N2UnitM1]>; // Integer single/multicycle 0/1 48def N2UnitL : ProcResGroup<[N2UnitL01, N2UnitL2]>; // Load/Store 0/1 and Load 2 49def N2UnitI : ProcResGroup<[N2UnitS, N2UnitM0, N2UnitM1]>; // Integer single cycle 0/1 and single/multicycle 0/1 50 51// Define commonly used read types. 52 53// No forwarding is provided for these types. 54def : ReadAdvance<ReadI, 0>; 55def : ReadAdvance<ReadISReg, 0>; 56def : ReadAdvance<ReadIEReg, 0>; 57def : ReadAdvance<ReadIM, 0>; 58def : ReadAdvance<ReadIMA, 0>; 59def : ReadAdvance<ReadID, 0>; 60def : ReadAdvance<ReadExtrHi, 0>; 61def : ReadAdvance<ReadAdrBase, 0>; 62def : ReadAdvance<ReadST, 0>; 63def : ReadAdvance<ReadVLD, 0>; 64 65def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 66def : WriteRes<WriteBarrier, []> { let Latency = 1; } 67def : WriteRes<WriteHint, []> { let Latency = 1; } 68def : WriteRes<WriteLDHi, []> { let Latency = 4; } 69 70//===----------------------------------------------------------------------===// 71// Define customized scheduler read/write types specific to the Neoverse N2. 72 73//===----------------------------------------------------------------------===// 74// Define generic 1 micro-op types 75 76def N2Write_1cyc_1B : SchedWriteRes<[N2UnitB]> { let Latency = 1; } 77def N2Write_1cyc_1I : SchedWriteRes<[N2UnitI]> { let Latency = 1; } 78def N2Write_1cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 1; } 79def N2Write_1cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 1; } 80def N2Write_1cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 1; } 81def N2Write_2cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 2; } 82def N2Write_3cyc_1M : SchedWriteRes<[N2UnitM]> { let Latency = 3; } 83def N2Write_2cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 2; 84 let ResourceCycles = [2]; } 85def N2Write_3cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 3; 86 let ResourceCycles = [3]; } 87def N2Write_5cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 5; 88 let ResourceCycles = [5]; } 89def N2Write_12cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 12; 90 let ResourceCycles = [12]; } 91def N2Write_20cyc_1M0 : SchedWriteRes<[N2UnitM0]> { let Latency = 20; 92 let ResourceCycles = [20]; } 93def N2Write_4cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 4; } 94def N2Write_6cyc_1L : SchedWriteRes<[N2UnitL]> { let Latency = 6; } 95def N2Write_2cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 2; } 96def N2Write_3cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 3; } 97def N2Write_4cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 4; } 98def N2Write_5cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 5; } 99def N2Write_12cyc_1V : SchedWriteRes<[N2UnitV]> { let Latency = 12; } 100def N2Write_2cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 2; } 101def N2Write_3cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 3; } 102def N2Write_4cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 4; } 103def N2Write_7cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 7; 104 let ResourceCycles = [7]; } 105def N2Write_9cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 9; } 106def N2Write_10cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 10; } 107def N2Write_12cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 12; } 108def N2Write_13cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 13; } 109def N2Write_15cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 15; } 110def N2Write_16cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 16; } 111def N2Write_20cyc_1V0 : SchedWriteRes<[N2UnitV0]> { let Latency = 20; } 112def N2Write_2cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 2; } 113def N2Write_3cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 3; } 114def N2Write_4cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 4; } 115def N2Write_6cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 6; } 116def N2Write_10cyc_1V1 : SchedWriteRes<[N2UnitV1]> { let Latency = 10; } 117def N2Write_6cyc_1L01 : SchedWriteRes<[N2UnitL01]> { let Latency = 6; } 118 119//===----------------------------------------------------------------------===// 120// Define generic 2 micro-op types 121 122def N2Write_1cyc_1B_1S : SchedWriteRes<[N2UnitB, N2UnitS]> { 123 let Latency = 1; 124 let NumMicroOps = 2; 125} 126 127def N2Write_6cyc_1M0_1B : SchedWriteRes<[N2UnitM0, N2UnitB]> { 128 let Latency = 6; 129 let NumMicroOps = 2; 130} 131 132def N2Write_9cyc_1M0_1L : SchedWriteRes<[N2UnitM0, N2UnitL]> { 133 let Latency = 9; 134 let NumMicroOps = 2; 135} 136 137def N2Write_3cyc_1I_1M : SchedWriteRes<[N2UnitI, N2UnitM]> { 138 let Latency = 3; 139 let NumMicroOps = 2; 140} 141 142def N2Write_4cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 143 let Latency = 4; 144 let NumMicroOps = 2; 145} 146 147def N2Write_5cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 148 let Latency = 5; 149 let NumMicroOps = 2; 150} 151 152def N2Write_6cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 153 let Latency = 6; 154 let NumMicroOps = 2; 155} 156 157def N2Write_7cyc_1I_1L : SchedWriteRes<[N2UnitI, N2UnitL]> { 158 let Latency = 7; 159 let NumMicroOps = 2; 160} 161 162def N2Write_1cyc_1L01_1D : SchedWriteRes<[N2UnitL01, N2UnitD]> { 163 let Latency = 1; 164 let NumMicroOps = 2; 165} 166 167def N2Write_5cyc_1M0_1V : SchedWriteRes<[N2UnitM0, N2UnitV]> { 168 let Latency = 5; 169 let NumMicroOps = 2; 170} 171 172def N2Write_2cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 173 let Latency = 2; 174 let NumMicroOps = 2; 175} 176 177def N2Write_4cyc_1V1_1V : SchedWriteRes<[N2UnitV1, N2UnitV]> { 178 let Latency = 4; 179 let NumMicroOps = 2; 180} 181 182def N2Write_4cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 183 let Latency = 4; 184 let NumMicroOps = 2; 185} 186 187def N2Write_10cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 188 let Latency = 10; 189 let NumMicroOps = 2; 190 let ResourceCycles = [5, 5]; 191} 192 193def N2Write_13cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 194 let Latency = 13; 195 let NumMicroOps = 2; 196 let ResourceCycles = [6, 7]; 197} 198 199def N2Write_15cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 200 let Latency = 15; 201 let NumMicroOps = 2; 202 let ResourceCycles = [7, 8]; 203} 204 205def N2Write_16cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 206 let Latency = 16; 207 let NumMicroOps = 2; 208 let ResourceCycles = [8, 8]; 209} 210 211def N2Write_4cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 212 let Latency = 4; 213 let NumMicroOps = 2; 214} 215 216def N2Write_6cyc_2V : SchedWriteRes<[N2UnitV, N2UnitV]> { 217 let Latency = 6; 218 let NumMicroOps = 2; 219} 220 221def N2Write_6cyc_2L : SchedWriteRes<[N2UnitL, N2UnitL]> { 222 let Latency = 6; 223 let NumMicroOps = 2; 224} 225 226def N2Write_8cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 227 let Latency = 8; 228 let NumMicroOps = 2; 229} 230 231def N2Write_4cyc_1L01_1V : SchedWriteRes<[N2UnitL01, N2UnitV]> { 232 let Latency = 4; 233 let NumMicroOps = 2; 234} 235 236def N2Write_3cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 237 let Latency = 3; 238 let NumMicroOps = 2; 239} 240 241def N2Write_2cyc_1M0_1M : SchedWriteRes<[N2UnitM0, N2UnitM]> { 242 let Latency = 2; 243 let NumMicroOps = 2; 244} 245 246def N2Write_6cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 247 let Latency = 6; 248 let NumMicroOps = 2; 249} 250 251def N2Write_4cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 252 let Latency = 4; 253 let NumMicroOps = 2; 254} 255 256def N2Write_5cyc_2V0 : SchedWriteRes<[N2UnitV0, N2UnitV0]> { 257 let Latency = 5; 258 let NumMicroOps = 2; 259} 260 261def N2Write_5cyc_1V1_1M0 : SchedWriteRes<[N2UnitV1, N2UnitM0]> { 262 let Latency = 5; 263 let NumMicroOps = 2; 264} 265 266def N2Write_7cyc_1M0_1V0 : SchedWriteRes<[N2UnitM0, N2UnitV0]> { 267 let Latency = 7; 268 let NumMicroOps = 2; 269} 270 271def N2Write_2cyc_1V0_1M : SchedWriteRes<[N2UnitV0, N2UnitM]> { 272 let Latency = 2; 273 let NumMicroOps = 2; 274} 275 276def N2Write_6cyc_1V_1V1 : SchedWriteRes<[N2UnitV, N2UnitV1]> { 277 let Latency = 6; 278 let NumMicroOps = 2; 279} 280 281def N2Write_6cyc_1L_1M : SchedWriteRes<[N2UnitL, N2UnitM]> { 282 let Latency = 6; 283 let NumMicroOps = 2; 284} 285 286def N2Write_6cyc_1L_1S : SchedWriteRes<[N2UnitL, N2UnitS]> { 287 let Latency = 6; 288 let NumMicroOps = 2; 289} 290 291def N2Write_9cyc_1L_1V : SchedWriteRes<[N2UnitL, N2UnitV]> { 292 let Latency = 9; 293 let NumMicroOps = 2; 294} 295 296def N2Write_4cyc_2V1 : SchedWriteRes<[N2UnitV1, N2UnitV1]> { 297 let Latency = 4; 298 let NumMicroOps = 2; 299} 300 301//===----------------------------------------------------------------------===// 302// Define generic 3 micro-op types 303 304def N2Write_1cyc_1L01_1D_1I : SchedWriteRes<[N2UnitL01, N2UnitD, N2UnitI]> { 305 let Latency = 1; 306 let NumMicroOps = 3; 307} 308 309def N2Write_2cyc_1L01_1V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitI]> { 310 let Latency = 2; 311 let NumMicroOps = 3; 312} 313 314def N2Write_2cyc_1L01_2V : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV]> { 315 let Latency = 2; 316 let NumMicroOps = 3; 317} 318 319def N2Write_7cyc_1M_1M0_1V : SchedWriteRes<[N2UnitM, N2UnitM0, N2UnitV]> { 320 let Latency = 7; 321 let NumMicroOps = 3; 322} 323 324def N2Write_8cyc_1M0_1V1_1V : SchedWriteRes<[N2UnitM0, N2UnitV1, N2UnitV]> { 325 let Latency = 8; 326 let NumMicroOps = 3; 327} 328 329def N2Write_10cyc_1V_1L_1S : SchedWriteRes<[N2UnitV, N2UnitL, N2UnitL]> { 330 let Latency = 10; 331 let NumMicroOps = 3; 332} 333 334def N2Write_2cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 335 let Latency = 2; 336 let NumMicroOps = 3; 337} 338 339def N2Write_4cyc_1L01_1S_1V : SchedWriteRes<[N2UnitL01, N2UnitS, N2UnitV]> { 340 let Latency = 4; 341 let NumMicroOps = 3; 342} 343 344def N2Write_6cyc_3L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL]> { 345 let Latency = 6; 346 let NumMicroOps = 3; 347} 348 349def N2Write_8cyc_1L_2V : SchedWriteRes<[N2UnitL, N2UnitV, N2UnitV]> { 350 let Latency = 8; 351 let NumMicroOps = 3; 352} 353 354//===----------------------------------------------------------------------===// 355// Define generic 4 micro-op types 356 357def N2Write_2cyc_1L01_2V_1I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 358 N2UnitI]> { 359 let Latency = 2; 360 let NumMicroOps = 4; 361} 362 363def N2Write_6cyc_4V0 : SchedWriteRes<[N2UnitV0, N2UnitV0, N2UnitV0, N2UnitV0]> { 364 let Latency = 6; 365 let NumMicroOps = 4; 366} 367 368def N2Write_4cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 369 let Latency = 4; 370 let NumMicroOps = 4; 371} 372 373def N2Write_6cyc_4V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 374 let Latency = 6; 375 let NumMicroOps = 4; 376} 377 378def N2Write_8cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 379 let Latency = 8; 380 let NumMicroOps = 4; 381} 382 383def N2Write_9cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 384 let Latency = 9; 385 let NumMicroOps = 4; 386} 387 388def N2Write_2cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 389 N2UnitV]> { 390 let Latency = 2; 391 let NumMicroOps = 4; 392} 393 394def N2Write_4cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 395 N2UnitV]> { 396 let Latency = 4; 397 let NumMicroOps = 4; 398} 399 400def N2Write_5cyc_2L01_2V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitV, 401 N2UnitV]> { 402 let Latency = 5; 403 let NumMicroOps = 4; 404} 405 406def N2Write_8cyc_2M0_2V0 : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitV0, 407 N2UnitV0]> { 408 let Latency = 8; 409 let NumMicroOps = 4; 410} 411 412def N2Write_11cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 413 N2UnitV1]> { 414 let Latency = 11; 415 let NumMicroOps = 4; 416} 417 418def N2Write_9cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 419 N2UnitV1]> { 420 let Latency = 9; 421 let NumMicroOps = 4; 422} 423 424def N2Write_8cyc_2V_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 425 N2UnitV1]> { 426 let Latency = 8; 427 let NumMicroOps = 4; 428} 429 430def N2Write_10cyc_2L_2V1 : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV1, 431 N2UnitV1]> { 432 let Latency = 10; 433 let NumMicroOps = 4; 434} 435 436def N2Write_10cyc_2L_2V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV]> { 437 let Latency = 10; 438 let NumMicroOps = 4; 439} 440 441def N2Write_4cyc_2M0_2M : SchedWriteRes<[N2UnitM0, N2UnitM0, N2UnitM, 442 N2UnitM]> { 443 let Latency = 4; 444 let NumMicroOps = 4; 445} 446 447def N2Write_6cyc_2I_2L : SchedWriteRes<[N2UnitI, N2UnitI, N2UnitL, N2UnitL]> { 448 let Latency = 6; 449 let NumMicroOps = 4; 450} 451 452def N2Write_7cyc_4L : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL]> { 453 let Latency = 7; 454 let NumMicroOps = 4; 455} 456 457//===----------------------------------------------------------------------===// 458// Define generic 5 micro-op types 459 460def N2Write_2cyc_1L01_2V_2I : SchedWriteRes<[N2UnitL01, N2UnitV, N2UnitV, 461 N2UnitI, N2UnitI]> { 462 let Latency = 2; 463 let NumMicroOps = 5; 464} 465 466def N2Write_8cyc_2L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 467 N2UnitV]> { 468 let Latency = 8; 469 let NumMicroOps = 5; 470} 471 472//===----------------------------------------------------------------------===// 473// Define generic 6 micro-op types 474 475def N2Write_8cyc_3L_3V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 476 N2UnitV, N2UnitV, N2UnitV]> { 477 let Latency = 8; 478 let NumMicroOps = 6; 479} 480 481def N2Write_2cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 482 N2UnitV, N2UnitV, N2UnitV]> { 483 let Latency = 2; 484 let NumMicroOps = 6; 485} 486 487def N2Write_6cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 488 N2UnitV, N2UnitV, N2UnitV]> { 489 let Latency = 6; 490 let NumMicroOps = 6; 491} 492 493def N2Write_4cyc_3L01_3V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 494 N2UnitV, N2UnitV, N2UnitV]> { 495 let Latency = 4; 496 let NumMicroOps = 6; 497} 498 499def N2Write_10cyc_2L_2V_2S : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitV, N2UnitV, 500 N2UnitS, N2UnitS]> { 501 let Latency = 10; 502 let NumMicroOps = 6; 503} 504 505//===----------------------------------------------------------------------===// 506// Define generic 7 micro-op types 507 508def N2Write_8cyc_3L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, 509 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 510 let Latency = 8; 511 let NumMicroOps = 7; 512} 513 514//===----------------------------------------------------------------------===// 515// Define generic 8 micro-op types 516 517def N2Write_6cyc_8V : SchedWriteRes<[N2UnitV, N2UnitV, N2UnitV, N2UnitV, 518 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 519 let Latency = 6; 520 let NumMicroOps = 8; 521} 522 523def N2Write_2cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 524 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 525 N2UnitV]> { 526 let Latency = 2; 527 let NumMicroOps = 8; 528} 529 530def N2Write_5cyc_4L01_4V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 531 N2UnitL01, N2UnitV, N2UnitV, N2UnitV, 532 N2UnitV]> { 533 let Latency = 5; 534 let NumMicroOps = 8; 535} 536 537def N2Write_8cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 538 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 539 let Latency = 8; 540 let NumMicroOps = 8; 541} 542 543def N2Write_9cyc_4L_4V : SchedWriteRes<[N2UnitL, N2UnitL, N2UnitL, N2UnitL, 544 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 545 let Latency = 9; 546 let NumMicroOps = 8; 547} 548 549//===----------------------------------------------------------------------===// 550// Define generic 10 micro-op types 551 552def N2Write_7cyc_5L01_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 553 N2UnitL01, N2UnitL01, N2UnitV, 554 N2UnitV, N2UnitV, N2UnitV, N2UnitV]> { 555 let Latency = 7; 556 let NumMicroOps = 10; 557} 558 559//===----------------------------------------------------------------------===// 560// Define generic 12 micro-op types 561 562def N2Write_7cyc_6L01_6V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 563 N2UnitL01, N2UnitL01, N2UnitL01, 564 N2UnitV, N2UnitV, N2UnitV, N2UnitV, 565 N2UnitV, N2UnitV]> { 566 let Latency = 7; 567 let NumMicroOps = 12; 568} 569 570//===----------------------------------------------------------------------===// 571// Define generic 15 micro-op types 572 573def N2Write_7cyc_5L01_5S_5V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 574 N2UnitL01, N2UnitL01, N2UnitS, 575 N2UnitS, N2UnitS, N2UnitS, 576 N2UnitS, N2UnitV, N2UnitV, 577 N2UnitV, N2UnitV, N2UnitV]> { 578 let Latency = 7; 579 let NumMicroOps = 15; 580} 581 582//===----------------------------------------------------------------------===// 583// Define generic 18 micro-op types 584 585def N2Write_11cyc_9L01_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 586 N2UnitL01, N2UnitL01, N2UnitL01, 587 N2UnitL01, N2UnitL01, N2UnitL01, 588 N2UnitV, N2UnitV, N2UnitV, 589 N2UnitV, N2UnitV, N2UnitV, 590 N2UnitV, N2UnitV, N2UnitV]> { 591 let Latency = 11; 592 let NumMicroOps = 18; 593} 594 595//===----------------------------------------------------------------------===// 596// Define generic 27 micro-op types 597 598def N2Write_11cyc_9L01_9S_9V : SchedWriteRes<[N2UnitL01, N2UnitL01, N2UnitL01, 599 N2UnitL01, N2UnitL01, N2UnitL01, 600 N2UnitL01, N2UnitL01, N2UnitL01, 601 N2UnitS, N2UnitS, N2UnitS, 602 N2UnitS, N2UnitS, N2UnitS, 603 N2UnitS, N2UnitS, N2UnitS, 604 N2UnitV, N2UnitV, N2UnitV, 605 N2UnitV, N2UnitV, N2UnitV, 606 N2UnitV, N2UnitV, N2UnitV]> { 607 let Latency = 11; 608 let NumMicroOps = 27; 609} 610 611// Miscellaneous 612// ----------------------------------------------------------------------------- 613 614def : InstRW<[WriteI], (instrs COPY)>; 615 616// Branch Instructions 617// ----------------------------------------------------------------------------- 618 619// Branch, immed 620// Compare and branch 621def : SchedAlias<WriteBr, N2Write_1cyc_1B>; 622 623// Branch, register 624def : SchedAlias<WriteBrReg, N2Write_1cyc_1B>; 625 626// Branch and link, immed 627// Branch and link, register 628def : InstRW<[N2Write_1cyc_1B_1S], (instrs BL, BLR)>; 629 630// Arithmetic and Logical Instructions 631// ----------------------------------------------------------------------------- 632 633// ALU, basic 634// ALU, basic, flagset 635def : SchedAlias<WriteI, N2Write_1cyc_1I>; 636 637// ALU, extend and shift 638def : SchedAlias<WriteISReg, N2Write_2cyc_1M>; 639def : SchedAlias<WriteIEReg, N2Write_2cyc_1M>; 640 641// Arithmetic, immediate to logical address tag 642def : InstRW<[N2Write_2cyc_1M], (instrs ADDG, SUBG)>; 643 644// Convert floating-point condition flags 645// Flag manipulation instructions 646def : WriteRes<WriteSys, []> { let Latency = 1; } 647 648// Insert Random Tags 649def : InstRW<[N2Write_2cyc_1M], (instrs IRG, IRGstack)>; 650 651// Insert Tag Mask 652// Subtract Pointer 653// Subtract Pointer, flagset 654def : InstRW<[N2Write_1cyc_1I], (instrs GMI, SUBP, SUBPS)>; 655 656// Move and shift instructions 657// ----------------------------------------------------------------------------- 658 659def : SchedAlias<WriteImm, N2Write_1cyc_1I>; 660 661// Divide and Multiply Instructions 662// ----------------------------------------------------------------------------- 663 664// SDIV, UDIV 665def : SchedAlias<WriteID32, N2Write_12cyc_1M0>; 666def : SchedAlias<WriteID64, N2Write_20cyc_1M0>; 667 668def : WriteRes<WriteIM32, [N2UnitM]> { let Latency = 2; } 669def : WriteRes<WriteIM64, [N2UnitM]> { let Latency = 2; } 670 671// Multiply high 672def : InstRW<[N2Write_3cyc_1M], (instrs SMULHrr, UMULHrr)>; 673 674// Pointer Authentication Instructions (v8.3 PAC) 675// ----------------------------------------------------------------------------- 676 677// Authenticate data address 678// Authenticate instruction address 679// Compute pointer authentication code for data address 680// Compute pointer authentication code, using generic key 681// Compute pointer authentication code for instruction address 682def : InstRW<[N2Write_5cyc_1M0], (instregex "^AUT", "^PAC")>; 683 684// Branch and link, register, with pointer authentication 685// Branch, register, with pointer authentication 686// Branch, return, with pointer authentication 687def : InstRW<[N2Write_6cyc_1M0_1B], (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ, BRAA, 688 BRAAZ, BRAB, BRABZ, RETAA, RETAB, 689 ERETAA, ERETAB)>; 690 691 692// Load register, with pointer authentication 693def : InstRW<[N2Write_9cyc_1M0_1L], (instregex "^LDRA[AB](indexed|writeback)")>; 694 695// Strip pointer authentication code 696def : InstRW<[N2Write_2cyc_1M0], (instrs XPACD, XPACI, XPACLRI)>; 697 698// Miscellaneous data-processing instructions 699// ----------------------------------------------------------------------------- 700 701// Bitfield extract, one reg 702// Bitfield extract, two regs 703// NOTE: We don't model the difference between EXTR where both operands are the 704// same (one reg). 705def : SchedAlias<WriteExtr, N2Write_3cyc_1I_1M>; 706def : InstRW<[N2Write_3cyc_1I_1M], (instrs EXTRWrri, EXTRXrri)>; 707 708// Bitfield move, basic 709def : SchedAlias<WriteIS, N2Write_1cyc_1I>; 710 711// Bitfield move, insert 712def : InstRW<[N2Write_2cyc_1M], (instregex "^BFM[WX]ri$")>; 713 714// Load instructions 715// ----------------------------------------------------------------------------- 716 717def : SchedAlias<WriteLD, N2Write_4cyc_1L>; 718def : SchedAlias<WriteLDIdx, N2Write_4cyc_1I_1L>; 719 720// Load pair, signed immed offset, signed words 721def : InstRW<[N2Write_5cyc_1M0, WriteLDHi], (instrs LDPSWi)>; 722// Load pair, immed post-index or immed pre-index, signed words 723def : InstRW<[N2Write_5cyc_1M0, WriteLDHi, WriteAdr], 724 (instregex "^LDPSW(post|pre)$")>; 725 726// Store instructions 727// ----------------------------------------------------------------------------- 728 729def : SchedAlias<WriteST, N2Write_1cyc_1L01_1D>; 730def : SchedAlias<WriteSTIdx, N2Write_1cyc_1L01_1D_1I>; 731def : SchedAlias<WriteSTP, N2Write_1cyc_1L01_1D>; 732def : SchedAlias<WriteAdr, N2Write_1cyc_1I>; // copied from A57. 733 734// Tag load instructions 735// ----------------------------------------------------------------------------- 736 737// Load allocation tag 738// Load multiple allocation tags 739def : InstRW<[N2Write_4cyc_1L], (instrs LDG, LDGM)>; 740 741// Tag store instructions 742// ----------------------------------------------------------------------------- 743 744// Store allocation tags to one or two granules, post-index 745// Store allocation tags to one or two granules, pre-index 746// Store allocation tag to one or two granules, zeroing, post-index 747// Store Allocation Tag to one or two granules, zeroing, pre-index 748// Store allocation tag and reg pair to memory, post-Index 749// Store allocation tag and reg pair to memory, pre-Index 750def : InstRW<[N2Write_1cyc_1L01_1D_1I], (instrs STGPreIndex, STGPostIndex, 751 ST2GPreIndex, ST2GPostIndex, 752 STZGPreIndex, STZGPostIndex, 753 STZ2GPreIndex, STZ2GPostIndex, 754 STGPpre, STGPpost)>; 755 756// Store allocation tags to one or two granules, signed offset 757// Store allocation tag to two granules, zeroing, signed offset 758// Store allocation tag and reg pair to memory, signed offset 759// Store multiple allocation tags 760def : InstRW<[N2Write_1cyc_1L01_1D], (instrs STGOffset, ST2GOffset, STZGOffset, 761 STZ2GOffset, STGPi, STGM, STZGM)>; 762 763// FP data processing instructions 764// ----------------------------------------------------------------------------- 765 766// FP absolute value 767// FP arithmetic 768// FP min/max 769// FP negate 770// FP select 771def : SchedAlias<WriteF, N2Write_2cyc_1V>; 772 773// FP compare 774def : SchedAlias<WriteFCmp, N2Write_2cyc_1V0>; 775 776// FP divide, square root 777def : SchedAlias<WriteFDiv, N2Write_7cyc_1V0>; 778 779// FP divide, H-form 780def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVHrr)>; 781// FP divide, S-form 782def : InstRW<[N2Write_10cyc_1V0], (instrs FDIVSrr)>; 783// FP divide, D-form 784def : InstRW<[N2Write_15cyc_1V0], (instrs FDIVDrr)>; 785 786// FP square root, H-form 787def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTHr)>; 788// FP square root, S-form 789def : InstRW<[N2Write_9cyc_1V0], (instrs FSQRTSr)>; 790// FP square root, D-form 791def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRTDr)>; 792 793// FP multiply 794def : WriteRes<WriteFMul, [N2UnitV]> { let Latency = 3; } 795 796// FP multiply accumulate 797def : InstRW<[N2Write_4cyc_1V], (instregex "^FN?M(ADD|SUB)[HSD]rrr$")>; 798 799// FP round to integral 800def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ][HSD]r$", 801 "^FRINT(32|64)[XZ][SD]r$")>; 802 803// FP miscellaneous instructions 804// ----------------------------------------------------------------------------- 805 806// FP convert, from gen to vec reg 807def : InstRW<[N2Write_3cyc_1M0], (instregex "^[SU]CVTF[SU][WX][HSD]ri$")>; 808 809// FP convert, from vec to gen reg 810def : InstRW<[N2Write_3cyc_1V], (instregex "^FCVT[AMNPZ][SU][SU][WX][HSD]r$")>; 811 812// FP convert, Javascript from vec to gen reg 813// FP convert, from vec to vec reg 814def : SchedAlias<WriteFCvt, N2Write_3cyc_1V0>; 815 816// FP move, immed 817// FP move, register 818def : SchedAlias<WriteFImm, N2Write_2cyc_1V>; 819 820// FP transfer, from gen to low half of vec reg 821def : InstRW<[N2Write_3cyc_1M0], (instrs FMOVWHr, FMOVXHr, FMOVWSr, FMOVXDr, 822 FMOVHWr, FMOVHXr, FMOVSWr, FMOVDXr)>; 823 824// FP transfer, from gen to high half of vec reg 825def : InstRW<[N2Write_5cyc_1M0_1V], (instrs FMOVXDHighr)>; 826 827// FP transfer, from vec to gen reg 828def : SchedAlias<WriteFCopy, N2Write_2cyc_1V>; 829 830// FP load instructions 831// ----------------------------------------------------------------------------- 832 833// Load vector reg, literal, S/D/Q forms 834// Load vector reg, unscaled immed 835def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[SDQ]l$", 836 "^LDUR[BHSDQ]i$")>; 837 838// Load vector reg, immed post-index 839def : InstRW<[N2Write_6cyc_1I_1L, WriteI], (instregex "^LDR[BHSDQ]post$")>; 840// Load vector reg, immed pre-index 841def : InstRW<[N2Write_6cyc_1I_1L, WriteAdr], (instregex "^LDR[BHSDQ]pre$")>; 842 843// Load vector reg, unsigned immed 844def : InstRW<[N2Write_6cyc_1L], (instregex "^LDR[BHSDQ]ui$")>; 845 846// Load vector reg, register offset, basic 847// Load vector reg, register offset, scale, S/D-form 848// Load vector reg, register offset, extend 849// Load vector reg, register offset, extend, scale, S/D-form 850def : InstRW<[N2Write_6cyc_1L, ReadAdrBase], (instregex "^LDR[BSD]ro[WX]$")>; 851 852// Load vector reg, register offset, scale, H/Q-form 853// Load vector reg, register offset, extend, scale, H/Q-form 854def : InstRW<[N2Write_7cyc_1I_1L, ReadAdrBase], (instregex "^LDR[HQ]ro[WX]$")>; 855 856// Load vector pair, immed offset, S/D-form 857def : InstRW<[N2Write_6cyc_1L, WriteLDHi], (instregex "^LDN?P[SD]i$")>; 858 859// Load vector pair, immed offset, Q-form 860def : InstRW<[N2Write_6cyc_2L, WriteLDHi], (instrs LDPQi, LDNPQi)>; 861 862// Load vector pair, immed post-index, S/D-form 863// Load vector pair, immed pre-index, S/D-form 864def : InstRW<[N2Write_6cyc_1I_1L, WriteLDHi, WriteAdr], 865 (instregex "^LDP[SD](pre|post)$")>; 866 867// Load vector pair, immed post-index, Q-form 868// Load vector pair, immed pre-index, Q-form 869def : InstRW<[N2Write_6cyc_2I_2L, WriteLDHi, WriteAdr], (instrs LDPQpost, 870 LDPQpre)>; 871 872// FP store instructions 873// ----------------------------------------------------------------------------- 874 875// Store vector reg, unscaled immed, B/H/S/D-form 876// Store vector reg, unscaled immed, Q-form 877def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STUR[BHSDQ]i$")>; 878 879// Store vector reg, immed post-index, B/H/S/D-form 880// Store vector reg, immed post-index, Q-form 881// Store vector reg, immed pre-index, B/H/S/D-form 882// Store vector reg, immed pre-index, Q-form 883def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I, ReadAdrBase], 884 (instregex "^STR[BHSDQ](pre|post)$")>; 885 886// Store vector reg, unsigned immed, B/H/S/D-form 887// Store vector reg, unsigned immed, Q-form 888def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STR[BHSDQ]ui$")>; 889 890// Store vector reg, register offset, basic, B/H/S/D-form 891// Store vector reg, register offset, basic, Q-form 892// Store vector reg, register offset, scale, S/D-form 893// Store vector reg, register offset, extend, B/H/S/D-form 894// Store vector reg, register offset, extend, Q-form 895// Store vector reg, register offset, extend, scale, S/D-form 896def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 897 (instregex "^STR[BSD]ro[WX]$")>; 898 899// Store vector reg, register offset, scale, H-form 900// Store vector reg, register offset, scale, Q-form 901// Store vector reg, register offset, extend, scale, H-form 902// Store vector reg, register offset, extend, scale, Q-form 903def : InstRW<[N2Write_2cyc_1L01_1V, ReadAdrBase], 904 (instregex "^STR[HQ]ro[WX]$")>; 905 906// Store vector pair, immed offset, S-form 907// Store vector pair, immed offset, D-form 908def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STN?P[SD]i$")>; 909 910// Store vector pair, immed offset, Q-form 911def : InstRW<[N2Write_2cyc_1L01_2V], (instrs STPQi, STNPQi)>; 912 913// Store vector pair, immed post-index, S-form 914// Store vector pair, immed post-index, D-form 915// Store vector pair, immed pre-index, S-form 916// Store vector pair, immed pre-index, D-form 917def : InstRW<[WriteAdr, N2Write_2cyc_1L01_1V_1I], 918 (instregex "^STP[SD](pre|post)$")>; 919 920// Store vector pair, immed post-index, Q-form 921def : InstRW<[N2Write_2cyc_1L01_2V_1I], (instrs STPQpost)>; 922 923// Store vector pair, immed pre-index, Q-form 924def : InstRW<[N2Write_2cyc_1L01_2V_2I], (instrs STPQpre)>; 925 926// ASIMD integer instructions 927// ----------------------------------------------------------------------------- 928 929// ASIMD absolute diff 930// ASIMD absolute diff long 931// ASIMD arith, basic 932// ASIMD arith, complex 933// ASIMD arith, pair-wise 934// ASIMD compare 935// ASIMD logical 936// ASIMD max/min, basic and pair-wise 937def : SchedAlias<WriteVd, N2Write_2cyc_1V>; 938def : SchedAlias<WriteVq, N2Write_2cyc_1V>; 939 940// ASIMD absolute diff accum 941// ASIMD absolute diff accum long 942def : InstRW<[N2Write_4cyc_1V1], 943 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 944 945// ASIMD arith, reduce, 4H/4S 946def : InstRW<[N2Write_2cyc_1V1], (instregex "^(ADDV|[SU]ADDLV)v4(i16|i32)v$")>; 947 948// ASIMD arith, reduce, 8B/8H 949def : InstRW<[N2Write_4cyc_1V1_1V], 950 (instregex "^(ADDV|[SU]ADDLV)v8(i8|i16)v$")>; 951 952// ASIMD arith, reduce, 16B 953def : InstRW<[N2Write_4cyc_1V1], (instrs ADDVv16i8v, SADDLVv16i8v, 954 UADDLVv16i8v)>; 955 956// ASIMD dot product 957// ASIMD dot product using signed and unsigned integers 958def : InstRW<[N2Write_3cyc_1V], 959 (instregex "^([SU]|SU|US)DOT(lane)?(v8|v16)i8$")>; 960 961// ASIMD matrix multiply-accumulate 962def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA, UMMLA, USMMLA)>; 963 964// ASIMD max/min, reduce, 4H/4S 965def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU](MAX|MIN)Vv4i16v$", 966 "^[SU](MAX|MIN)Vv4i32v$")>; 967 968// ASIMD max/min, reduce, 8B/8H 969def : InstRW<[N2Write_4cyc_1V1_1V], (instregex "^[SU](MAX|MIN)Vv8i8v$", 970 "^[SU](MAX|MIN)Vv8i16v$")>; 971 972// ASIMD max/min, reduce, 16B 973def : InstRW<[N2Write_4cyc_2V1], (instregex "[SU](MAX|MIN)Vv16i8v$")>; 974 975// ASIMD multiply 976def : InstRW<[N2Write_4cyc_1V0], (instregex "^MULv", "^SQ(R)?DMULHv")>; 977 978// ASIMD multiply accumulate 979def : InstRW<[N2Write_4cyc_1V0], (instregex "^MLAv", "^MLSv")>; 980 981// ASIMD multiply accumulate high 982def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMLAHv", "^SQRDMLSHv")>; 983 984// ASIMD multiply accumulate long 985def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MLALv", "^[SU]MLSLv")>; 986 987// ASIMD multiply accumulate saturating long 988def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMLALv", "^SQDMLSLv")>; 989 990// ASIMD multiply/multiply long (8x8) polynomial, D-form 991// ASIMD multiply/multiply long (8x8) polynomial, Q-form 992def : InstRW<[N2Write_3cyc_1V0], (instregex "^PMULL?(v8i8|v16i8)$")>; 993 994// ASIMD multiply long 995def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]MULLv", "^SQDMULLv")>; 996 997// ASIMD pairwise add and accumulate long 998def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALPv")>; 999 1000// ASIMD shift accumulate 1001def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]SRAv", "^[SU]RSRAv")>; 1002 1003// ASIMD shift by immed, basic 1004def : InstRW<[N2Write_2cyc_1V1], (instregex "^SHLv", "^SHLLv", "^SHRNv", 1005 "^SSHLLv", "^SSHRv", "^USHLLv", 1006 "^USHRv")>; 1007 1008// ASIMD shift by immed and insert, basic 1009def : InstRW<[N2Write_2cyc_1V1], (instregex "^SLIv", "^SRIv")>; 1010 1011// ASIMD shift by immed, complex 1012def : InstRW<[N2Write_4cyc_1V1], 1013 (instregex "^RSHRNv", "^SQRSHRNv", "^SQRSHRUNv", 1014 "^(SQSHLU?|UQSHL)[bhsd]$", 1015 "^(SQSHLU?|UQSHL)(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)_shift$", 1016 "^SQSHRNv", "^SQSHRUNv", "^SRSHRv", "^UQRSHRNv", 1017 "^UQSHRNv", "^URSHRv")>; 1018 1019// ASIMD shift by register, basic 1020def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]SHLv")>; 1021 1022// ASIMD shift by register, complex 1023def : InstRW<[N2Write_4cyc_1V1], 1024 (instregex "^[SU]RSHLv", "^[SU]QRSHLv", 1025 "^[SU]QSHL(v1i8|v1i16|v1i32|v1i64|v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v2i64)$")>; 1026 1027// ASIMD floating-point instructions 1028// ----------------------------------------------------------------------------- 1029 1030// ASIMD FP absolute value/difference 1031// ASIMD FP arith, normal 1032// ASIMD FP compare 1033// ASIMD FP complex add 1034// ASIMD FP max/min, normal 1035// ASIMD FP max/min, pairwise 1036// ASIMD FP negate 1037// Handled by SchedAlias<WriteV[dq], ...> 1038 1039// ASIMD FP complex multiply add 1040def : InstRW<[N2Write_4cyc_1V], (instregex "^FCMLAv")>; 1041 1042// ASIMD FP convert, long (F16 to F32) 1043def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTL(v4|v8)i16")>; 1044 1045// ASIMD FP convert, long (F32 to F64) 1046def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTL(v2|v4)i32")>; 1047 1048// ASIMD FP convert, narrow (F32 to F16) 1049def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTN(v4|v8)i16")>; 1050 1051// ASIMD FP convert, narrow (F64 to F32) 1052def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVTN(v2|v4)i32", 1053 "^FCVTXN(v2|v4)f32")>; 1054 1055// ASIMD FP convert, other, D-form F32 and Q-form F64 1056def : InstRW<[N2Write_3cyc_1V0], (instregex "^[FSU]CVT[AMNPZ][SU]v2f(32|64)$", 1057 "^[SU]CVTFv2f(32|64)$")>; 1058 1059// ASIMD FP convert, other, D-form F16 and Q-form F32 1060def : InstRW<[N2Write_4cyc_2V0], (instregex "^[FSU]CVT[AMNPZ][SU]v4f(16|32)$", 1061 "^[SU]CVTFv4f(16|32)$")>; 1062 1063// ASIMD FP convert, other, Q-form F16 1064def : InstRW<[N2Write_6cyc_4V0], (instregex "^[FSU]CVT[AMNPZ][SU]v8f16$", 1065 "^[SU]CVTFv8f16$")>; 1066 1067// ASIMD FP divide, D-form, F16 1068def : InstRW<[N2Write_7cyc_1V0], (instrs FDIVv4f16)>; 1069 1070// ASIMD FP divide, D-form, F32 1071def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv2f32)>; 1072 1073// ASIMD FP divide, Q-form, F16 1074def : InstRW<[N2Write_13cyc_2V0], (instrs FDIVv8f16)>; 1075 1076// ASIMD FP divide, Q-form, F32 1077def : InstRW<[N2Write_10cyc_2V0], (instrs FDIVv4f32)>; 1078 1079// ASIMD FP divide, Q-form, F64 1080def : InstRW<[N2Write_15cyc_2V0], (instrs FDIVv2f64)>; 1081 1082// ASIMD FP max/min, reduce, F32 and D-form F16 1083def : InstRW<[N2Write_4cyc_1V], (instregex "^(FMAX|FMIN)(NM)?Vv4(i16|i32)v$")>; 1084 1085// ASIMD FP max/min, reduce, Q-form F16 1086def : InstRW<[N2Write_6cyc_2V], (instregex "^(FMAX|FMIN)(NM)?Vv8i16v$")>; 1087 1088// ASIMD FP multiply 1089def : InstRW<[N2Write_3cyc_1V], (instregex "^FMULv", "^FMULXv")>; 1090 1091// ASIMD FP multiply accumulate 1092def : InstRW<[N2Write_4cyc_1V], (instregex "^FMLAv", "^FMLSv")>; 1093 1094// ASIMD FP multiply accumulate long 1095def : InstRW<[N2Write_5cyc_1V], (instregex "^FMLALv", "^FMLSLv")>; 1096 1097// ASIMD FP round, D-form F32 and Q-form F64 1098def : InstRW<[N2Write_3cyc_1V0], 1099 (instregex "^FRINT[AIMNPXZ]v2f(32|64)$", 1100 "^FRINT[32|64)[XZ]v2f(32|64)$")>; 1101 1102// ASIMD FP round, D-form F16 and Q-form F32 1103def : InstRW<[N2Write_4cyc_2V0], 1104 (instregex "^FRINT[AIMNPXZ]v4f(16|32)$", 1105 "^FRINT(32|64)[XZ]v4f32$")>; 1106 1107 1108// ASIMD FP round, Q-form F16 1109def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]v8f16$")>; 1110 1111// ASIMD FP square root, D-form, F16 1112def : InstRW<[N2Write_7cyc_1V0], (instrs FSQRTv4f16)>; 1113 1114// ASIMD FP square root, D-form, F32 1115def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv2f32)>; 1116 1117// ASIMD FP square root, Q-form, F16 1118def : InstRW<[N2Write_13cyc_2V0], (instrs FSQRTv8f16)>; 1119 1120// ASIMD FP square root, Q-form, F32 1121def : InstRW<[N2Write_10cyc_2V0], (instrs FSQRTv4f32)>; 1122 1123// ASIMD FP square root, Q-form, F64 1124def : InstRW<[N2Write_16cyc_2V0], (instrs FSQRTv2f64)>; 1125 1126// ASIMD BFloat16 (BF16) instructions 1127// ----------------------------------------------------------------------------- 1128 1129// ASIMD convert, F32 to BF16 1130def : InstRW<[N2Write_4cyc_1V0], (instrs BFCVTN, BFCVTN2)>; 1131 1132// ASIMD dot product 1133def : InstRW<[N2Write_4cyc_1V], (instrs BFDOTv4bf16, BFDOTv8bf16)>; 1134 1135// ASIMD matrix multiply accumulate 1136def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA)>; 1137 1138// ASIMD multiply accumulate long 1139def : InstRW<[N2Write_4cyc_1V], (instrs BFMLALB, BFMLALBIdx, BFMLALT, 1140 BFMLALTIdx)>; 1141 1142// Scalar convert, F32 to BF16 1143def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT)>; 1144 1145// ASIMD miscellaneous instructions 1146// ----------------------------------------------------------------------------- 1147 1148// ASIMD bit reverse 1149// ASIMD bitwise insert 1150// ASIMD count 1151// ASIMD duplicate, element 1152// ASIMD extract 1153// ASIMD extract narrow 1154// ASIMD insert, element to element 1155// ASIMD move, FP immed 1156// ASIMD move, integer immed 1157// ASIMD reverse 1158// ASIMD table lookup, 1 or 2 table regs 1159// ASIMD table lookup extension, 1 table reg 1160// ASIMD transfer, element to gen reg 1161// ASIMD transpose 1162// ASIMD unzip/zip 1163// Handled by SchedAlias<WriteV[dq], ...> 1164 1165// ASIMD duplicate, gen reg 1166def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUPv.+gpr")>; 1167 1168// ASIMD extract narrow, saturating 1169def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTNv", "^SQXTUNv")>; 1170 1171// ASIMD reciprocal and square root estimate, D-form U32 1172def : InstRW<[N2Write_3cyc_1V0], (instrs URECPEv2i32, URSQRTEv2i32)>; 1173 1174// ASIMD reciprocal and square root estimate, Q-form U32 1175def : InstRW<[N2Write_4cyc_2V0], (instrs URECPEv4i32, URSQRTEv4i32)>; 1176 1177// ASIMD reciprocal and square root estimate, D-form F32 and scalar forms 1178def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPEv1f16, FRECPEv1i32, 1179 FRECPEv1i64, FRECPEv2f32, 1180 FRSQRTEv1f16, FRSQRTEv1i32, 1181 FRSQRTEv1i64, FRSQRTEv2f32)>; 1182 1183// ASIMD reciprocal and square root estimate, D-form F16 and Q-form F32 1184def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPEv4f16, FRECPEv4f32, 1185 FRSQRTEv4f16, FRSQRTEv4f32)>; 1186 1187// ASIMD reciprocal and square root estimate, Q-form F16 1188def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPEv8f16, FRSQRTEv8f16)>; 1189 1190// ASIMD reciprocal exponent 1191def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRECPXv")>; 1192 1193// ASIMD reciprocal step 1194def : InstRW<[N2Write_4cyc_1V], (instregex "^FRECPSv", "^FRSQRTSv")>; 1195 1196// ASIMD table lookup, 3 table regs 1197def : InstRW<[N2Write_4cyc_2V], (instrs TBLv8i8Three, TBLv16i8Three)>; 1198 1199// ASIMD table lookup, 4 table regs 1200def : InstRW<[N2Write_4cyc_4V], (instrs TBLv8i8Four, TBLv16i8Four)>; 1201 1202// ASIMD table lookup extension, 2 table reg 1203def : InstRW<[N2Write_4cyc_2V], (instrs TBXv8i8Two, TBXv16i8Two)>; 1204 1205// ASIMD table lookup extension, 3 table reg 1206def : InstRW<[N2Write_6cyc_4V], (instrs TBXv8i8Three, TBXv16i8Three)>; 1207 1208// ASIMD table lookup extension, 4 table reg 1209def : InstRW<[N2Write_6cyc_8V], (instrs TBXv8i8Four, TBXv16i8Four)>; 1210 1211// ASIMD transfer, gen reg to element 1212def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^INSv")>; 1213 1214// ASIMD load instructions 1215// ----------------------------------------------------------------------------- 1216 1217// ASIMD load, 1 element, multiple, 1 reg, D-form 1218def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(8b|4h|2s|1d)$")>; 1219def : InstRW<[N2Write_6cyc_1L, WriteAdr], 1220 (instregex "^LD1Onev(8b|4h|2s|1d)_POST$")>; 1221 1222// ASIMD load, 1 element, multiple, 1 reg, Q-form 1223def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1Onev(16b|8h|4s|2d)$")>; 1224def : InstRW<[N2Write_6cyc_1L, WriteAdr], 1225 (instregex "^LD1Onev(16b|8h|4s|2d)_POST$")>; 1226 1227// ASIMD load, 1 element, multiple, 2 reg, D-form 1228def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(8b|4h|2s|1d)$")>; 1229def : InstRW<[N2Write_6cyc_2L, WriteAdr], 1230 (instregex "^LD1Twov(8b|4h|2s|1d)_POST$")>; 1231 1232// ASIMD load, 1 element, multiple, 2 reg, Q-form 1233def : InstRW<[N2Write_6cyc_2L], (instregex "^LD1Twov(16b|8h|4s|2d)$")>; 1234def : InstRW<[N2Write_6cyc_2L, WriteAdr], 1235 (instregex "^LD1Twov(16b|8h|4s|2d)_POST$")>; 1236 1237// ASIMD load, 1 element, multiple, 3 reg, D-form 1238def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(8b|4h|2s|1d)$")>; 1239def : InstRW<[N2Write_6cyc_3L, WriteAdr], 1240 (instregex "^LD1Threev(8b|4h|2s|1d)_POST$")>; 1241 1242// ASIMD load, 1 element, multiple, 3 reg, Q-form 1243def : InstRW<[N2Write_6cyc_3L], (instregex "^LD1Threev(16b|8h|4s|2d)$")>; 1244def : InstRW<[N2Write_6cyc_3L, WriteAdr], 1245 (instregex "^LD1Threev(16b|8h|4s|2d)_POST$")>; 1246 1247// ASIMD load, 1 element, multiple, 4 reg, D-form 1248def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(8b|4h|2s|1d)$")>; 1249def : InstRW<[N2Write_7cyc_4L, WriteAdr], 1250 (instregex "^LD1Fourv(8b|4h|2s|1d)_POST$")>; 1251 1252// ASIMD load, 1 element, multiple, 4 reg, Q-form 1253def : InstRW<[N2Write_7cyc_4L], (instregex "^LD1Fourv(16b|8h|4s|2d)$")>; 1254def : InstRW<[N2Write_7cyc_4L, WriteAdr], 1255 (instregex "^LD1Fourv(16b|8h|4s|2d)_POST$")>; 1256 1257// ASIMD load, 1 element, one lane, B/H/S 1258// ASIMD load, 1 element, one lane, D 1259def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1i(8|16|32|64)$")>; 1260def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32|64)_POST$")>; 1261 1262// ASIMD load, 1 element, all lanes, D-form, B/H/S 1263// ASIMD load, 1 element, all lanes, D-form, D 1264def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s|1d)$")>; 1265def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s|1d)_POST$")>; 1266 1267// ASIMD load, 1 element, all lanes, Q-form 1268def : InstRW<[N2Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; 1269def : InstRW<[N2Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; 1270 1271// ASIMD load, 2 element, multiple, D-form, B/H/S 1272def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Twov(8b|4h|2s)$")>; 1273def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; 1274 1275// ASIMD load, 2 element, multiple, Q-form, B/H/S 1276// ASIMD load, 2 element, multiple, Q-form, D 1277def : InstRW<[N2Write_8cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s|2d)$")>; 1278def : InstRW<[N2Write_8cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s|2d)_POST$")>; 1279 1280// ASIMD load, 2 element, one lane, B/H 1281// ASIMD load, 2 element, one lane, S 1282// ASIMD load, 2 element, one lane, D 1283def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2i(8|16|32|64)$")>; 1284def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16|32|64)_POST$")>; 1285 1286// ASIMD load, 2 element, all lanes, D-form, B/H/S 1287// ASIMD load, 2 element, all lanes, D-form, D 1288def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(8b|4h|2s|1d)$")>; 1289def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(8b|4h|2s|1d)_POST$")>; 1290 1291// ASIMD load, 2 element, all lanes, Q-form 1292def : InstRW<[N2Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; 1293def : InstRW<[N2Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; 1294 1295// ASIMD load, 3 element, multiple, D-form, B/H/S 1296def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Threev(8b|4h|2s)$")>; 1297def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; 1298 1299// ASIMD load, 3 element, multiple, Q-form, B/H/S 1300def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(16b|8h|4s)$")>; 1301def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; 1302 1303// ASIMD load, 3 element, multiple, Q-form, D 1304def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Threev(2d)$")>; 1305def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; 1306 1307// ASIMD load, 3 element, one lane, B/H 1308// ASIMD load, 3 element, one lane, S 1309// ASIMD load, 3 element, one lane, D 1310def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3i(8|16|32|64)$")>; 1311def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3i(8|16|32|64)_POST$")>; 1312 1313// ASIMD load, 3 element, all lanes, D-form, B/H/S 1314// ASIMD load, 3 element, all lanes, D-form, D 1315def : InstRW<[N2Write_8cyc_2L_3V], (instregex "LD3Rv(8b|4h|2s|1d)$")>; 1316def : InstRW<[N2Write_8cyc_2L_3V, WriteAdr], (instregex "LD3Rv(8b|4h|2s|1d)_POST$")>; 1317 1318// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1319// ASIMD load, 3 element, all lanes, Q-form, D 1320def : InstRW<[N2Write_8cyc_3L_3V], (instregex "LD3Rv(16b|8h|4s|2d)$")>; 1321def : InstRW<[N2Write_8cyc_3L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s|2d)_POST$")>; 1322 1323// ASIMD load, 4 element, multiple, D-form, B/H/S 1324def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Fourv(8b|4h|2s)$")>; 1325def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; 1326 1327// ASIMD load, 4 element, multiple, Q-form, B/H/S 1328// ASIMD load, 4 element, multiple, Q-form, D 1329def : InstRW<[N2Write_9cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s|2d)$")>; 1330def : InstRW<[N2Write_9cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s|2d)_POST$")>; 1331 1332// ASIMD load, 4 element, one lane, B/H 1333// ASIMD load, 4 element, one lane, S 1334// ASIMD load, 4 element, one lane, D 1335def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4i(8|16|32|64)$")>; 1336def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4i(8|16|32|64)_POST$")>; 1337 1338// ASIMD load, 4 element, all lanes, D-form, B/H/S 1339// ASIMD load, 4 element, all lanes, D-form, D 1340def : InstRW<[N2Write_8cyc_3L_4V], (instregex "LD4Rv(8b|4h|2s|1d)$")>; 1341def : InstRW<[N2Write_8cyc_3L_4V, WriteAdr], (instregex "LD4Rv(8b|4h|2s|1d)_POST$")>; 1342 1343// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1344// ASIMD load, 4 element, all lanes, Q-form, D 1345def : InstRW<[N2Write_8cyc_4L_4V], (instregex "LD4Rv(16b|8h|4s|2d)$")>; 1346def : InstRW<[N2Write_8cyc_4L_4V, WriteAdr], (instregex "LD4Rv(16b|8h|4s|2d)_POST$")>; 1347 1348// ASIMD store instructions 1349// ----------------------------------------------------------------------------- 1350 1351// ASIMD store, 1 element, multiple, 1 reg, D-form 1352def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(8b|4h|2s|1d)$")>; 1353def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; 1354 1355// ASIMD store, 1 element, multiple, 1 reg, Q-form 1356def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Onev(16b|8h|4s|2d)$")>; 1357def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; 1358 1359// ASIMD store, 1 element, multiple, 2 reg, D-form 1360def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "ST1Twov(8b|4h|2s|1d)$")>; 1361def : InstRW<[N2Write_2cyc_1L01_1V, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; 1362 1363// ASIMD store, 1 element, multiple, 2 reg, Q-form 1364def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Twov(16b|8h|4s|2d)$")>; 1365def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; 1366 1367// ASIMD store, 1 element, multiple, 3 reg, D-form 1368def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Threev(8b|4h|2s|1d)$")>; 1369def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; 1370 1371// ASIMD store, 1 element, multiple, 3 reg, Q-form 1372def : InstRW<[N2Write_2cyc_3L01_3V], (instregex "ST1Threev(16b|8h|4s|2d)$")>; 1373def : InstRW<[N2Write_2cyc_3L01_3V, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; 1374 1375// ASIMD store, 1 element, multiple, 4 reg, D-form 1376def : InstRW<[N2Write_2cyc_2L01_2V], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; 1377def : InstRW<[N2Write_2cyc_2L01_2V, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; 1378 1379// ASIMD store, 1 element, multiple, 4 reg, Q-form 1380def : InstRW<[N2Write_2cyc_4L01_4V], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; 1381def : InstRW<[N2Write_2cyc_4L01_4V, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; 1382 1383// ASIMD store, 1 element, one lane, B/H/S 1384// ASIMD store, 1 element, one lane, D 1385def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST1i(8|16|32|64)$")>; 1386def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST1i(8|16|32|64)_POST$")>; 1387 1388// ASIMD store, 2 element, multiple, D-form, B/H/S 1389def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2Twov(8b|4h|2s)$")>; 1390def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; 1391 1392// ASIMD store, 2 element, multiple, Q-form, B/H/S 1393// ASIMD store, 2 element, multiple, Q-form, D 1394def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "ST2Twov(16b|8h|4s|2d)$")>; 1395def : InstRW<[N2Write_4cyc_2L01_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s|2d)_POST$")>; 1396 1397// ASIMD store, 2 element, one lane, B/H/S 1398// ASIMD store, 2 element, one lane, D 1399def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "ST2i(8|16|32|64)$")>; 1400def : InstRW<[N2Write_4cyc_1L01_1V, WriteAdr], (instregex "ST2i(8|16|32|64)_POST$")>; 1401 1402// ASIMD store, 3 element, multiple, D-form, B/H/S 1403def : InstRW<[N2Write_5cyc_2L01_2V], (instregex "ST3Threev(8b|4h|2s)$")>; 1404def : InstRW<[N2Write_5cyc_2L01_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; 1405 1406// ASIMD store, 3 element, multiple, Q-form, B/H/S 1407// ASIMD store, 3 element, multiple, Q-form, D 1408def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3Threev(16b|8h|4s|2d)$")>; 1409def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3Threev(16b|8h|4s|2d)_POST$")>; 1410 1411// ASIMD store, 3 element, one lane, B/H 1412// ASIMD store, 3 element, one lane, S 1413// ASIMD store, 3 element, one lane, D 1414def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST3i(8|16|32|64)$")>; 1415def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST3i(8|16|32|64)_POST$")>; 1416 1417// ASIMD store, 4 element, multiple, D-form, B/H/S 1418def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4Fourv(8b|4h|2s)$")>; 1419def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; 1420 1421// ASIMD store, 4 element, multiple, Q-form, B/H/S 1422def : InstRW<[N2Write_7cyc_6L01_6V], (instregex "ST4Fourv(16b|8h|4s)$")>; 1423def : InstRW<[N2Write_7cyc_6L01_6V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; 1424 1425// ASIMD store, 4 element, multiple, Q-form, D 1426def : InstRW<[N2Write_5cyc_4L01_4V], (instregex "ST4Fourv(2d)$")>; 1427def : InstRW<[N2Write_5cyc_4L01_4V, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; 1428 1429// ASIMD store, 4 element, one lane, B/H/S 1430def : InstRW<[N2Write_6cyc_3L01_3V], (instregex "ST4i(8|16|32)$")>; 1431def : InstRW<[N2Write_6cyc_3L01_3V, WriteAdr], (instregex "ST4i(8|16|32)_POST$")>; 1432 1433// ASIMD store, 4 element, one lane, D 1434def : InstRW<[N2Write_4cyc_3L01_3V], (instregex "ST4i(64)$")>; 1435def : InstRW<[N2Write_4cyc_3L01_3V, WriteAdr], (instregex "ST4i(64)_POST$")>; 1436 1437// Cryptography extensions 1438// ----------------------------------------------------------------------------- 1439 1440// Crypto AES ops 1441def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]rr$", "^AESI?MCrr")>; 1442 1443// Crypto polynomial (64x64) multiply long 1444def : InstRW<[N2Write_2cyc_1V0], (instrs PMULLv1i64, PMULLv2i64)>; 1445 1446// Crypto SHA1 hash acceleration op 1447// Crypto SHA1 schedule acceleration ops 1448def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA1(H|SU0|SU1)")>; 1449 1450// Crypto SHA1 hash acceleration ops 1451// Crypto SHA256 hash acceleration ops 1452def : InstRW<[N2Write_4cyc_1V0], (instregex "^SHA1[CMP]", "^SHA256H2?")>; 1453 1454// Crypto SHA256 schedule acceleration ops 1455def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA256SU[01]")>; 1456 1457// Crypto SHA512 hash acceleration ops 1458def : InstRW<[N2Write_2cyc_1V0], (instregex "^SHA512(H|H2|SU0|SU1)")>; 1459 1460// Crypto SHA3 ops 1461def : InstRW<[N2Write_2cyc_1V0], (instrs BCAX, EOR3, RAX1, XAR)>; 1462 1463// Crypto SM3 ops 1464def : InstRW<[N2Write_2cyc_1V0], (instregex "^SM3PARTW[12]$", "^SM3SS1$", 1465 "^SM3TT[12][AB]$")>; 1466 1467// Crypto SM4 ops 1468def : InstRW<[N2Write_4cyc_1V0], (instrs SM4E, SM4ENCKEY)>; 1469 1470// CRC 1471// ----------------------------------------------------------------------------- 1472 1473def : InstRW<[N2Write_2cyc_1M0], (instregex "^CRC32")>; 1474 1475// SVE Predicate instructions 1476// ----------------------------------------------------------------------------- 1477 1478// Loop control, based on predicate 1479def : InstRW<[N2Write_2cyc_1M], (instrs BRKA_PPmP, BRKA_PPzP, 1480 BRKB_PPmP, BRKB_PPzP)>; 1481 1482// Loop control, based on predicate and flag setting 1483def : InstRW<[N2Write_3cyc_1M], (instrs BRKAS_PPzP, BRKBS_PPzP)>; 1484 1485// Loop control, propagating 1486def : InstRW<[N2Write_2cyc_1M0], (instrs BRKN_PPzP, BRKPA_PPzPP, BRKPB_PPzPP)>; 1487 1488// Loop control, propagating and flag setting 1489def : InstRW<[N2Write_3cyc_1M0_1M], (instrs BRKNS_PPzP, BRKPAS_PPzPP, 1490 BRKPBS_PPzPP)>; 1491 1492// Loop control, based on GPR 1493def : InstRW<[N2Write_3cyc_1M], 1494 (instregex "^WHILE(GE|GT|HI|HS|LE|LO|LS|LT)_P(WW|XX)_[BHSD]$")>; 1495 1496def : InstRW<[N2Write_3cyc_1M], (instregex "^WHILE(RW|WR)_PXX_[BHSD]$")>; 1497 1498// Loop terminate 1499def : InstRW<[N2Write_1cyc_1M], (instregex "^CTERM(EQ|NE)_(WW|XX)$")>; 1500 1501// Predicate counting scalar 1502def : InstRW<[N2Write_2cyc_1M], (instrs ADDPL_XXI, ADDVL_XXI, RDVLI_XI)>; 1503def : InstRW<[N2Write_2cyc_1M], 1504 (instregex "^(CNT|DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[BHWD]_XPiI$", 1505 "^SQ(DEC|INC)[BHWD]_XPiWdI$", 1506 "^(UQDEC|UQINC)[BHWD]_WPiI$")>; 1507 1508// Predicate counting scalar, active predicate 1509def : InstRW<[N2Write_2cyc_1M], 1510 (instregex "^CNTP_XPP_[BHSD]$", 1511 "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_XP_[BHSD]$", 1512 "^(UQDEC|UQINC)P_WP_[BHSD]$", 1513 "^(SQDEC|SQINC|UQDEC|UQINC)P_XPWd_[BHSD]$")>; 1514 1515// Predicate counting vector, active predicate 1516def : InstRW<[N2Write_7cyc_1M_1M0_1V], 1517 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)P_ZP_[HSD]$")>; 1518 1519// Predicate logical 1520def : InstRW<[N2Write_1cyc_1M0], 1521 (instregex "^(AND|BIC|EOR|NAND|NOR|ORN|ORR)_PPzPP$")>; 1522 1523// Predicate logical, flag setting 1524def : InstRW<[N2Write_2cyc_1M0_1M], 1525 (instregex "^(ANDS|BICS|EORS|NANDS|NORS|ORNS|ORRS)_PPzPP$")>; 1526 1527// Predicate reverse 1528def : InstRW<[N2Write_2cyc_1M], (instregex "^REV_PP_[BHSD]$")>; 1529 1530// Predicate select 1531def : InstRW<[N2Write_1cyc_1M0], (instrs SEL_PPPP)>; 1532 1533// Predicate set 1534def : InstRW<[N2Write_2cyc_1M], (instregex "^PFALSE$", "^PTRUE_[BHSD]$")>; 1535 1536// Predicate set/initialize, set flags 1537def : InstRW<[N2Write_3cyc_1M], (instregex "^PTRUES_[BHSD]$")>; 1538 1539// Predicate find first/next 1540def : InstRW<[N2Write_3cyc_1M], (instregex "^PFIRST_B$", "^PNEXT_[BHSD]$")>; 1541 1542// Predicate test 1543def : InstRW<[N2Write_1cyc_1M], (instrs PTEST_PP)>; 1544 1545// Predicate transpose 1546def : InstRW<[N2Write_2cyc_1M], (instregex "^TRN[12]_PPP_[BHSDQ]$")>; 1547 1548// Predicate unpack and widen 1549def : InstRW<[N2Write_2cyc_1M], (instrs PUNPKHI_PP, PUNPKLO_PP)>; 1550 1551// Predicate zip/unzip 1552def : InstRW<[N2Write_2cyc_1M], (instregex "^(ZIP|UZP)[12]_PPP_[BHSDQ]$")>; 1553 1554// SVE integer instructions 1555// ----------------------------------------------------------------------------- 1556 1557// Arithmetic, absolute diff 1558def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABD_ZPmZ_[BHSD]$")>; 1559 1560// Arithmetic, absolute diff accum 1561def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABA_ZZZ_[BHSD]$")>; 1562 1563// Arithmetic, absolute diff accum long 1564def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ABAL[TB]_ZZZ_[HSD]$")>; 1565 1566// Arithmetic, absolute diff long 1567def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]ABDL[TB]_ZZZ_[HSD]$")>; 1568 1569// Arithmetic, basic 1570def : InstRW<[N2Write_2cyc_1V], 1571 (instregex "^(ABS|ADD|CNOT|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", 1572 "^(ADD|SUB)_ZZZ_[BHSD]$", 1573 "^(ADD|SUB|SUBR)_ZI_[BHSD]$", 1574 "^ADR_[SU]XTW_ZZZ_D_[0123]$", 1575 "^ADR_LSL_ZZZ_[SD]_[0123]$", 1576 "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]$", 1577 "^SADDLBT_ZZZ_[HSD]$", 1578 "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]$", 1579 "^SSUBL(BT|TB)_ZZZ_[HSD]$")>; 1580 1581// Arithmetic, complex 1582def : InstRW<[N2Write_2cyc_1V], 1583 (instregex "^R?(ADD|SUB)HN[BT]_ZZZ_[BHS]$", 1584 "^SQ(ABS|ADD|NEG|SUB|SUBR)_ZPmZ_[BHSD]$", 1585 "^[SU]Q(ADD|SUB)_ZZZ_[BHSD]$", 1586 "^[SU]Q(ADD|SUB)_ZI_[BHSD]$", 1587 "^(SRH|SUQ|UQ|USQ|URH)ADD_ZPmZ_[BHSD]$", 1588 "^(UQSUB|UQSUBR)_ZPmZ_[BHSD]$")>; 1589 1590// Arithmetic, large integer 1591def : InstRW<[N2Write_2cyc_1V], (instregex "^(AD|SB)CL[BT]_ZZZ_[SD]$")>; 1592 1593// Arithmetic, pairwise add 1594def : InstRW<[N2Write_2cyc_1V], (instregex "^ADDP_ZPmZ_[BHSD]$")>; 1595 1596// Arithmetic, pairwise add and accum long 1597def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]ADALP_ZPmZ_[HSD]$")>; 1598 1599// Arithmetic, shift 1600def : InstRW<[N2Write_2cyc_1V1], 1601 (instregex "^(ASR|LSL|LSR)_WIDE_ZPmZ_[BHS]$", 1602 "^(ASR|LSL|LSR)_WIDE_ZZZ_[BHS]$", 1603 "^(ASR|LSL|LSR)_ZPmI_[BHSD]$", 1604 "^(ASR|LSL|LSR)_ZPmZ_[BHSD]$", 1605 "^(ASR|LSL|LSR)_ZZI_[BHSD]$", 1606 "^(ASRR|LSLR|LSRR)_ZPmZ_[BHSD]$")>; 1607 1608// Arithmetic, shift and accumulate 1609def : InstRW<[N2Write_4cyc_1V1], 1610 (instregex "^(SRSRA|SSRA|URSRA|USRA)_ZZI_[BHSD]$")>; 1611 1612// Arithmetic, shift by immediate 1613// Arithmetic, shift by immediate and insert 1614def : InstRW<[N2Write_2cyc_1V1], 1615 (instregex "^(SHRNB|SHRNT|SSHLLB|SSHLLT|USHLLB|USHLLT|SLI|SRI)_ZZI_[BHSD]$")>; 1616 1617// Arithmetic, shift complex 1618def : InstRW<[N2Write_4cyc_1V1], 1619 (instregex "^(SQ)?RSHRU?N[BT]_ZZI_[BHS]$", 1620 "^(SQRSHL|SQRSHLR|SQSHL|SQSHLR|UQRSHL|UQRSHLR|UQSHL|UQSHLR)_ZPmZ_[BHSD]$", 1621 "^(SQSHL|SQSHLU|UQSHL)_ZPmI_[BHSD]$", 1622 "^SQSHRU?N[BT]_ZZI_[BHS]$", 1623 "^UQR?SHRN[BT]_ZZI_[BHS]$")>; 1624 1625// Arithmetic, shift right for divide 1626def : InstRW<[N2Write_4cyc_1V1], (instregex "^ASRD_ZPmI_[BHSD]$")>; 1627 1628// Arithmetic, shift rounding 1629def : InstRW<[N2Write_4cyc_1V1], 1630 (instregex "^(SRSHL|SRSHLR|URSHL|URSHLR)_ZPmZ_[BHSD]$", 1631 "^[SU]RSHR_ZPmI_[BHSD]$")>; 1632 1633// Bit manipulation 1634def : InstRW<[N2Write_6cyc_2V1], 1635 (instregex "^(BDEP|BEXT|BGRP)_ZZZ_[BHSD]$")>; 1636 1637// Bitwise select 1638def : InstRW<[N2Write_2cyc_1V], (instregex "^(BSL|BSL1N|BSL2N|NBSL)_ZZZZ$")>; 1639 1640// Count/reverse bits 1641def : InstRW<[N2Write_2cyc_1V], (instregex "^(CLS|CLZ|CNT|RBIT)_ZPmZ_[BHSD]$")>; 1642 1643// Broadcast logical bitmask immediate to vector 1644def : InstRW<[N2Write_2cyc_1V], (instrs DUPM_ZI)>; 1645 1646// Compare and set flags 1647def : InstRW<[N2Write_4cyc_1V0_1M], 1648 (instregex "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_PPzZ[IZ]_[BHSD]$", 1649 "^CMP(EQ|GE|GT|HI|HS|LE|LO|LS|LT|NE)_WIDE_PPzZZ_[BHS]$")>; 1650 1651// Complex add 1652def : InstRW<[N2Write_2cyc_1V], (instregex "^(SQ)?CADD_ZZI_[BHSD]$")>; 1653 1654// Complex dot product 8-bit element 1655def : InstRW<[N2Write_3cyc_1V], (instrs CDOT_ZZZ_S, CDOT_ZZZI_S)>; 1656 1657// Complex dot product 16-bit element 1658def : InstRW<[N2Write_4cyc_1V0], (instrs CDOT_ZZZ_D, CDOT_ZZZI_D)>; 1659 1660// Complex multiply-add B, H, S element size 1661def : InstRW<[N2Write_4cyc_1V0], (instregex "^CMLA_ZZZ_[BHS]$", 1662 "^CMLA_ZZZI_[HS]$")>; 1663 1664// Complex multiply-add D element size 1665def : InstRW<[N2Write_5cyc_2V0], (instrs CMLA_ZZZ_D)>; 1666 1667// Conditional extract operations, scalar form 1668def : InstRW<[N2Write_8cyc_1M0_1V1_1V], (instregex "^CLAST[AB]_RPZ_[BHSD]$")>; 1669 1670// Conditional extract operations, SIMD&FP scalar and vector forms 1671def : InstRW<[N2Write_3cyc_1V1], (instregex "^CLAST[AB]_[VZ]PZ_[BHSD]$", 1672 "^COMPACT_ZPZ_[SD]$", 1673 "^SPLICE_ZPZZ?_[BHSD]$")>; 1674 1675// Convert to floating point, 64b to float or convert to double 1676def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_Dto[SD]$")>; 1677 1678// Convert to floating point, 64b to half 1679def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_DtoH$")>; 1680 1681// Convert to floating point, 32b to single or half 1682def : InstRW<[N2Write_4cyc_2V0], (instregex "^[SU]CVTF_ZPmZ_Sto[HS]$")>; 1683 1684// Convert to floating point, 32b to double 1685def : InstRW<[N2Write_3cyc_1V0], (instregex "^[SU]CVTF_ZPmZ_StoD$")>; 1686 1687// Convert to floating point, 16b to half 1688def : InstRW<[N2Write_6cyc_4V0], (instregex "^[SU]CVTF_ZPmZ_HtoH$")>; 1689 1690// Copy, scalar 1691def : InstRW<[N2Write_5cyc_1M0_1V], (instregex "^CPY_ZPmR_[BHSD]$")>; 1692 1693// Copy, scalar SIMD&FP or imm 1694def : InstRW<[N2Write_2cyc_1V], (instregex "^CPY_ZPm[IV]_[BHSD]$", 1695 "^CPY_ZPzI_[BHSD]$")>; 1696 1697// Divides, 32 bit 1698def : InstRW<[N2Write_12cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_S$")>; 1699 1700// Divides, 64 bit 1701def : InstRW<[N2Write_20cyc_1V0], (instregex "^[SU]DIVR?_ZPmZ_D$")>; 1702 1703// Dot product, 8 bit 1704def : InstRW<[N2Write_3cyc_1V], (instregex "^[SU]DOT_ZZZI?_S$")>; 1705 1706// Dot product, 8 bit, using signed and unsigned integers 1707def : InstRW<[N2Write_3cyc_1V], (instrs SUDOT_ZZZI, USDOT_ZZZI, USDOT_ZZZ)>; 1708 1709// Dot product, 16 bit 1710def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]DOT_ZZZI?_D$")>; 1711 1712// Duplicate, immediate and indexed form 1713def : InstRW<[N2Write_2cyc_1V], (instregex "^DUP_ZI_[BHSD]$", 1714 "^DUP_ZZI_[BHSDQ]$")>; 1715 1716// Duplicate, scalar form 1717def : InstRW<[N2Write_3cyc_1M0], (instregex "^DUP_ZR_[BHSD]$")>; 1718 1719// Extend, sign or zero 1720def : InstRW<[N2Write_2cyc_1V1], (instregex "^[SU]XTB_ZPmZ_[HSD]$", 1721 "^[SU]XTH_ZPmZ_[SD]$", 1722 "^[SU]XTW_ZPmZ_[D]$")>; 1723 1724// Extract 1725def : InstRW<[N2Write_2cyc_1V], (instrs EXT_ZZI, EXT_ZZI_B)>; 1726 1727// Extract narrow saturating 1728def : InstRW<[N2Write_4cyc_1V1], (instregex "^[SU]QXTN[BT]_ZZ_[BHS]$", 1729 "^SQXTUN[BT]_ZZ_[BHS]$")>; 1730 1731// Extract/insert operation, SIMD and FP scalar form 1732def : InstRW<[N2Write_3cyc_1V1], (instregex "^LAST[AB]_VPZ_[BHSD]$", 1733 "^INSR_ZV_[BHSD]$")>; 1734 1735// Extract/insert operation, scalar 1736def : InstRW<[N2Write_5cyc_1V1_1M0], (instregex "^LAST[AB]_RPZ_[BHSD]$", 1737 "^INSR_ZR_[BHSD]$")>; 1738 1739// Histogram operations 1740def : InstRW<[N2Write_2cyc_1V], (instregex "^HISTCNT_ZPzZZ_[SD]$", 1741 "^HISTSEG_ZZZ$")>; 1742 1743// Horizontal operations, B, H, S form, immediate operands only 1744def : InstRW<[N2Write_4cyc_1V0], (instregex "^INDEX_II_[BHS]$")>; 1745 1746// Horizontal operations, B, H, S form, scalar, immediate operands/ scalar 1747// operands only / immediate, scalar operands 1748def : InstRW<[N2Write_7cyc_1M0_1V0], (instregex "^INDEX_(IR|RI|RR)_[BHS]$")>; 1749 1750// Horizontal operations, D form, immediate operands only 1751def : InstRW<[N2Write_5cyc_2V0], (instrs INDEX_II_D)>; 1752 1753// Horizontal operations, D form, scalar, immediate operands)/ scalar operands 1754// only / immediate, scalar operands 1755def : InstRW<[N2Write_8cyc_2M0_2V0], (instregex "^INDEX_(IR|RI|RR)_D$")>; 1756 1757// Logical 1758def : InstRW<[N2Write_2cyc_1V], 1759 (instregex "^(AND|EOR|ORR)_ZI$", 1760 "^(AND|BIC|EOR|EOR(BT|TB)?|ORR)_ZZZ$", 1761 "^EOR(BT|TB)_ZZZ_[BHSD]$", 1762 "^(AND|BIC|EOR|NOT|ORR)_ZPmZ_[BHSD]$")>; 1763 1764// Max/min, basic and pairwise 1765def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU](MAX|MIN)_ZI_[BHSD]$", 1766 "^[SU](MAX|MIN)P?_ZPmZ_[BHSD]$")>; 1767 1768// Matching operations 1769def : InstRW<[N2Write_2cyc_1V0_1M], (instregex "^N?MATCH_PPzZZ_[BH]$")>; 1770 1771// Matrix multiply-accumulate 1772def : InstRW<[N2Write_3cyc_1V], (instrs SMMLA_ZZZ, UMMLA_ZZZ, USMMLA_ZZZ)>; 1773 1774// Move prefix 1775def : InstRW<[N2Write_2cyc_1V], (instregex "^MOVPRFX_ZP[mz]Z_[BHSD]$", 1776 "^MOVPRFX_ZZ$")>; 1777 1778// Multiply, B, H, S element size 1779def : InstRW<[N2Write_4cyc_1V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_[BHS]$", 1780 "^[SU]MULH_(ZPmZ|ZZZ)_[BHS]$")>; 1781 1782// Multiply, D element size 1783def : InstRW<[N2Write_5cyc_2V0], (instregex "^MUL_(ZI|ZPmZ|ZZZI|ZZZ)_D$", 1784 "^[SU]MULH_(ZPmZ|ZZZ)_D$")>; 1785 1786// Multiply long 1787def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]MULL[BT]_ZZZI_[SD]$", 1788 "^[SU]MULL[BT]_ZZZ_[HSD]$")>; 1789 1790// Multiply accumulate, B, H, S element size 1791def : InstRW<[N2Write_4cyc_1V0], (instregex "^ML[AS]_ZZZI_[BHS]$", 1792 "^(ML[AS]|MAD|MSB)_ZPmZZ_[BHS]$")>; 1793 1794// Multiply accumulate, D element size 1795def : InstRW<[N2Write_5cyc_2V0], (instregex "^ML[AS]_ZZZI_D$", 1796 "^(ML[AS]|MAD|MSB)_ZPmZZ_D$")>; 1797 1798// Multiply accumulate long 1799def : InstRW<[N2Write_4cyc_1V0], (instregex "^[SU]ML[AS]L[BT]_ZZZ_[HSD]$", 1800 "^[SU]ML[AS]L[BT]_ZZZI_[SD]$")>; 1801 1802// Multiply accumulate saturating doubling long regular 1803def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDML[AS](LB|LT|LBT)_ZZZ_[HSD]$", 1804 "^SQDML[AS](LB|LT)_ZZZI_[SD]$")>; 1805 1806// Multiply saturating doubling high, B, H, S element size 1807def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULH_ZZZ_[BHS]$", 1808 "^SQDMULH_ZZZI_[HS]$")>; 1809 1810// Multiply saturating doubling high, D element size 1811def : InstRW<[N2Write_5cyc_2V0], (instrs SQDMULH_ZZZ_D, SQDMULH_ZZZI_D)>; 1812 1813// Multiply saturating doubling long 1814def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQDMULL[BT]_ZZZ_[HSD]$", 1815 "^SQDMULL[BT]_ZZZI_[SD]$")>; 1816 1817// Multiply saturating rounding doubling regular/complex accumulate, B, H, S 1818// element size 1819def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDML[AS]H_ZZZ_[BHS]$", 1820 "^SQRDCMLAH_ZZZ_[BHS]$", 1821 "^SQRDML[AS]H_ZZZI_[HS]$", 1822 "^SQRDCMLAH_ZZZI_[HS]$")>; 1823 1824// Multiply saturating rounding doubling regular/complex accumulate, D element 1825// size 1826def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDML[AS]H_ZZZI?_D$", 1827 "^SQRDCMLAH_ZZZ_D$")>; 1828 1829// Multiply saturating rounding doubling regular/complex, B, H, S element size 1830def : InstRW<[N2Write_4cyc_1V0], (instregex "^SQRDMULH_ZZZ_[BHS]$", 1831 "^SQRDMULH_ZZZI_[HS]$")>; 1832 1833// Multiply saturating rounding doubling regular/complex, D element size 1834def : InstRW<[N2Write_5cyc_2V0], (instregex "^SQRDMULH_ZZZI?_D$")>; 1835 1836// Multiply/multiply long, (8x8) polynomial 1837def : InstRW<[N2Write_2cyc_1V0], (instregex "^PMUL_ZZZ_B$", 1838 "^PMULL[BT]_ZZZ_[HDQ]$")>; 1839 1840// Predicate counting vector 1841def : InstRW<[N2Write_2cyc_1V0], 1842 (instregex "^(DEC|INC|SQDEC|SQINC|UQDEC|UQINC)[HWD]_ZPiI$")>; 1843 1844// Reciprocal estimate 1845def : InstRW<[N2Write_4cyc_2V0], (instrs URECPE_ZPmZ_S, URSQRTE_ZPmZ_S)>; 1846 1847// Reduction, arithmetic, B form 1848def : InstRW<[N2Write_11cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_B")>; 1849 1850// Reduction, arithmetic, H form 1851def : InstRW<[N2Write_9cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_H")>; 1852 1853// Reduction, arithmetic, S form 1854def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_S")>; 1855 1856// Reduction, arithmetic, D form 1857def : InstRW<[N2Write_8cyc_2V_2V1], (instregex "^[SU](ADD|MAX|MIN)V_VPZ_D")>; 1858 1859// Reduction, logical 1860def : InstRW<[N2Write_6cyc_1V_1V1], (instregex "^(ANDV|EORV|ORV)_VPZ_[BHSD]$")>; 1861 1862// Reverse, vector 1863def : InstRW<[N2Write_2cyc_1V], (instregex "^REV_ZZ_[BHSD]$", 1864 "^REVB_ZPmZ_[HSD]$", 1865 "^REVH_ZPmZ_[SD]$", 1866 "^REVW_ZPmZ_D$")>; 1867 1868// Select, vector form 1869def : InstRW<[N2Write_2cyc_1V], (instregex "^SEL_ZPZZ_[BHSD]$")>; 1870 1871// Table lookup 1872def : InstRW<[N2Write_2cyc_1V], (instregex "^TBL_ZZZZ?_[BHSD]$")>; 1873 1874// Table lookup extension 1875def : InstRW<[N2Write_2cyc_1V], (instregex "^TBX_ZZZ_[BHSD]$")>; 1876 1877// Transpose, vector form 1878def : InstRW<[N2Write_2cyc_1V], (instregex "^TRN[12]_ZZZ_[BHSDQ]$")>; 1879 1880// Unpack and extend 1881def : InstRW<[N2Write_2cyc_1V], (instregex "^[SU]UNPK(HI|LO)_ZZ_[HSD]$")>; 1882 1883// Zip/unzip 1884def : InstRW<[N2Write_2cyc_1V], (instregex "^(UZP|ZIP)[12]_ZZZ_[BHSDQ]$")>; 1885 1886// SVE floating-point instructions 1887// ----------------------------------------------------------------------------- 1888 1889// Floating point absolute value/difference 1890def : InstRW<[N2Write_2cyc_1V], (instregex "^FAB[SD]_ZPmZ_[HSD]$")>; 1891 1892// Floating point arithmetic 1893def : InstRW<[N2Write_2cyc_1V], (instregex "^F(ADD|SUB)_(ZPm[IZ]|ZZZ)_[HSD]$", 1894 "^FADDP_ZPmZZ_[HSD]$", 1895 "^FNEG_ZPmZ_[HSD]$", 1896 "^FSUBR_ZPm[IZ]_[HSD]$")>; 1897 1898// Floating point associative add, F16 1899def : InstRW<[N2Write_10cyc_1V1], (instrs FADDA_VPZ_H)>; 1900 1901// Floating point associative add, F32 1902def : InstRW<[N2Write_6cyc_1V1], (instrs FADDA_VPZ_S)>; 1903 1904// Floating point associative add, F64 1905def : InstRW<[N2Write_4cyc_1V], (instrs FADDA_VPZ_D)>; 1906 1907// Floating point compare 1908def : InstRW<[N2Write_2cyc_1V0], (instregex "^FACG[ET]_PPzZZ_[HSD]$", 1909 "^FCM(EQ|GE|GT|NE)_PPzZ[0Z]_[HSD]$", 1910 "^FCM(LE|LT)_PPzZ0_[HSD]$", 1911 "^FCMUO_PPzZZ_[HSD]$")>; 1912 1913// Floating point complex add 1914def : InstRW<[N2Write_3cyc_1V], (instregex "^FCADD_ZPmZ_[HSD]$")>; 1915 1916// Floating point complex multiply add 1917def : InstRW<[N2Write_5cyc_1V], (instregex "^FCMLA_ZPmZZ_[HSD]$", 1918 "^FCMLA_ZZZI_[HS]$")>; 1919 1920// Floating point convert, long or narrow (F16 to F32 or F32 to F16) 1921def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVT_ZPmZ_(HtoS|StoH)$", 1922 "^FCVTLT_ZPmZ_HtoS$", 1923 "^FCVTNT_ZPmZ_StoH$")>; 1924 1925// Floating point convert, long or narrow (F16 to F64, F32 to F64, F64 to F32 1926// or F64 to F16) 1927def : InstRW<[N2Write_3cyc_1V0], (instregex "^FCVT_ZPmZ_(HtoD|StoD|DtoS|DtoH)$", 1928 "^FCVTLT_ZPmZ_StoD$", 1929 "^FCVTNT_ZPmZ_DtoS$")>; 1930 1931// Floating point convert, round to odd 1932def : InstRW<[N2Write_3cyc_1V0], (instrs FCVTX_ZPmZ_DtoS, FCVTXNT_ZPmZ_DtoS)>; 1933 1934// Floating point base2 log, F16 1935def : InstRW<[N2Write_6cyc_4V0], (instrs FLOGB_ZPmZ_H)>; 1936 1937// Floating point base2 log, F32 1938def : InstRW<[N2Write_4cyc_2V0], (instrs FLOGB_ZPmZ_S)>; 1939 1940// Floating point base2 log, F64 1941def : InstRW<[N2Write_3cyc_1V0], (instrs FLOGB_ZPmZ_D)>; 1942 1943// Floating point convert to integer, F16 1944def : InstRW<[N2Write_6cyc_4V0], (instregex "^FCVTZ[SU]_ZPmZ_HtoH$")>; 1945 1946// Floating point convert to integer, F32 1947def : InstRW<[N2Write_4cyc_2V0], (instregex "^FCVTZ[SU]_ZPmZ_(HtoS|StoS)$")>; 1948 1949// Floating point convert to integer, F64 1950def : InstRW<[N2Write_3cyc_1V0], 1951 (instregex "^FCVTZ[SU]_ZPmZ_(HtoD|StoD|DtoS|DtoD)$")>; 1952 1953// Floating point copy 1954def : InstRW<[N2Write_2cyc_1V], (instregex "^FCPY_ZPmI_[HSD]$", 1955 "^FDUP_ZI_[HSD]$")>; 1956 1957// Floating point divide, F16 1958def : InstRW<[N2Write_13cyc_1V0], (instregex "^FDIVR?_ZPmZ_H$")>; 1959 1960// Floating point divide, F32 1961def : InstRW<[N2Write_10cyc_1V0], (instregex "^FDIVR?_ZPmZ_S$")>; 1962 1963// Floating point divide, F64 1964def : InstRW<[N2Write_15cyc_1V0], (instregex "^FDIVR?_ZPmZ_D$")>; 1965 1966// Floating point min/max pairwise 1967def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?P_ZPmZZ_[HSD]$")>; 1968 1969// Floating point min/max 1970def : InstRW<[N2Write_2cyc_1V], (instregex "^F(MAX|MIN)(NM)?_ZPm[IZ]_[HSD]$")>; 1971 1972// Floating point multiply 1973def : InstRW<[N2Write_3cyc_1V], (instregex "^(FSCALE|FMULX)_ZPmZ_[HSD]$", 1974 "^FMUL_(ZPm[IZ]|ZZZI?)_[HSD]$")>; 1975 1976// Floating point multiply accumulate 1977def : InstRW<[N2Write_4cyc_1V], 1978 (instregex "^FML[AS]_(ZPmZZ|ZZZI)_[HSD]$", 1979 "^(FMAD|FNMAD|FNML[AS]|FN?MSB)_ZPmZZ_[HSD]$")>; 1980 1981// Floating point multiply add/sub accumulate long 1982def : InstRW<[N2Write_4cyc_1V], (instregex "^FML[AS]L[BT]_ZZZI?_SHH$")>; 1983 1984// Floating point reciprocal estimate, F16 1985def : InstRW<[N2Write_6cyc_4V0], (instrs FRECPE_ZZ_H, FRECPX_ZPmZ_H, 1986 FRSQRTE_ZZ_H)>; 1987 1988// Floating point reciprocal estimate, F32 1989def : InstRW<[N2Write_4cyc_2V0], (instrs FRECPE_ZZ_S, FRECPX_ZPmZ_S, 1990 FRSQRTE_ZZ_S)>; 1991 1992// Floating point reciprocal estimate, F64 1993def : InstRW<[N2Write_3cyc_1V0], (instrs FRECPE_ZZ_D, FRECPX_ZPmZ_D, 1994 FRSQRTE_ZZ_D)>; 1995 1996// Floating point reciprocal step 1997def : InstRW<[N2Write_4cyc_1V0], (instregex "^F(RECPS|RSQRTS)_ZZZ_[HSD]$")>; 1998 1999// Floating point reduction, F16 2000def : InstRW<[N2Write_6cyc_2V], 2001 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_H$")>; 2002 2003// Floating point reduction, F32 2004def : InstRW<[N2Write_4cyc_1V], 2005 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_S$")>; 2006 2007// Floating point reduction, F64 2008def : InstRW<[N2Write_2cyc_1V], 2009 (instregex "^(FADDV|FMAXNMV|FMAXV|FMINNMV|FMINV)_VPZ_D$")>; 2010 2011// Floating point round to integral, F16 2012def : InstRW<[N2Write_6cyc_4V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_H$")>; 2013 2014// Floating point round to integral, F32 2015def : InstRW<[N2Write_4cyc_2V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_S$")>; 2016 2017// Floating point round to integral, F64 2018def : InstRW<[N2Write_3cyc_1V0], (instregex "^FRINT[AIMNPXZ]_ZPmZ_D$")>; 2019 2020// Floating point square root, F16 2021def : InstRW<[N2Write_13cyc_1V0], (instrs FSQRT_ZPmZ_H)>; 2022 2023// Floating point square root, F32 2024def : InstRW<[N2Write_10cyc_1V0], (instrs FSQRT_ZPmZ_S)>; 2025 2026// Floating point square root, F64 2027def : InstRW<[N2Write_16cyc_1V0], (instrs FSQRT_ZPmZ_D)>; 2028 2029// Floating point trigonometric exponentiation 2030def : InstRW<[N2Write_3cyc_1V1], (instregex "^FEXPA_ZZ_[HSD]$")>; 2031 2032// Floating point trigonometric multiply add 2033def : InstRW<[N2Write_4cyc_1V], (instregex "^FTMAD_ZZI_[HSD]$")>; 2034 2035// Floating point trigonometric, miscellaneous 2036def : InstRW<[N2Write_3cyc_1V], (instregex "^FTS(MUL|SEL)_ZZZ_[HSD]$")>; 2037 2038// SVE BFloat16 (BF16) instructions 2039// ----------------------------------------------------------------------------- 2040 2041// Convert, F32 to BF16 2042def : InstRW<[N2Write_3cyc_1V0], (instrs BFCVT_ZPmZ, BFCVTNT_ZPmZ)>; 2043 2044// Dot product 2045def : InstRW<[N2Write_4cyc_1V], (instrs BFDOT_ZZI, BFDOT_ZZZ)>; 2046 2047// Matrix multiply accumulate 2048def : InstRW<[N2Write_5cyc_1V], (instrs BFMMLA_ZZZ)>; 2049 2050// Multiply accumulate long 2051def : InstRW<[N2Write_4cyc_1V], (instregex "^BFMLAL[BT]_ZZ[ZI]$")>; 2052 2053// SVE Load instructions 2054// ----------------------------------------------------------------------------- 2055 2056// Load vector 2057def : InstRW<[N2Write_6cyc_1L], (instrs LDR_ZXI)>; 2058 2059// Load predicate 2060def : InstRW<[N2Write_6cyc_1L_1M], (instrs LDR_PXI)>; 2061 2062// Contiguous load, scalar + imm 2063def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1[BHWD]_IMM_REAL$", 2064 "^LD1S?B_[HSD]_IMM_REAL$", 2065 "^LD1S?H_[SD]_IMM_REAL$", 2066 "^LD1S?W_D_IMM_REAL$" )>; 2067// Contiguous load, scalar + scalar 2068def : InstRW<[N2Write_6cyc_1L01], (instregex "^LD1[BHWD]$", 2069 "^LD1S?B_[HSD]$", 2070 "^LD1S?H_[SD]$", 2071 "^LD1S?W_D$" )>; 2072 2073// Contiguous load broadcast, scalar + imm 2074def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1R[BHWD]_IMM$", 2075 "^LD1RSW_IMM$", 2076 "^LD1RS?B_[HSD]_IMM$", 2077 "^LD1RS?H_[SD]_IMM$", 2078 "^LD1RS?W_D_IMM$", 2079 "^LD1RQ_[BHWD]_IMM$")>; 2080 2081// Contiguous load broadcast, scalar + scalar 2082def : InstRW<[N2Write_6cyc_1L], (instregex "^LD1RQ_[BHWD]$")>; 2083 2084// Non temporal load, scalar + imm 2085def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNT1[BHWD]_ZRI$")>; 2086 2087// Non temporal load, scalar + scalar 2088def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDNT1[BHWD]_ZRR$")>; 2089 2090// Non temporal gather load, vector + scalar 32-bit element size 2091def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LDNT1[BHW]_ZZR_S_REAL$", 2092 "^LDNT1S[BH]_ZZR_S_REAL$")>; 2093 2094// Non temporal gather load, vector + scalar 64-bit element size 2095def : InstRW<[N2Write_10cyc_2L_2V1], (instregex "^LDNT1S?[BHW]_ZZR_D_REAL$")>; 2096def : InstRW<[N2Write_10cyc_2L_2V1], (instrs LDNT1D_ZZR_D_REAL)>; 2097 2098// Contiguous first faulting load, scalar + scalar 2099def : InstRW<[N2Write_6cyc_1L_1S], (instregex "^LDFF1[BHWD]_REAL$", 2100 "^LDFF1S?B_[HSD]_REAL$", 2101 "^LDFF1S?H_[SD]_REAL$", 2102 "^LDFF1S?W_D_REAL$")>; 2103 2104// Contiguous non faulting load, scalar + imm 2105def : InstRW<[N2Write_6cyc_1L], (instregex "^LDNF1[BHWD]_IMM_REAL$", 2106 "^LDNF1S?B_[HSD]_IMM_REAL$", 2107 "^LDNF1S?H_[SD]_IMM_REAL$", 2108 "^LDNF1S?W_D_IMM_REAL$")>; 2109 2110// Contiguous Load two structures to two vectors, scalar + imm 2111def : InstRW<[N2Write_8cyc_1L_1V], (instregex "^LD2[BHWD]_IMM$")>; 2112 2113// Contiguous Load two structures to two vectors, scalar + scalar 2114def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD2[BHWD]$")>; 2115 2116// Contiguous Load three structures to three vectors, scalar + imm 2117def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^LD3[BHWD]_IMM$")>; 2118 2119// Contiguous Load three structures to three vectors, scalar + scalar 2120def : InstRW<[N2Write_10cyc_1V_1L_1S], (instregex "^LD3[BHWD]$")>; 2121 2122// Contiguous Load four structures to four vectors, scalar + imm 2123def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^LD4[BHWD]_IMM$")>; 2124 2125// Contiguous Load four structures to four vectors, scalar + scalar 2126def : InstRW<[N2Write_10cyc_2L_2V_2S], (instregex "^LD4[BHWD]$")>; 2127 2128// Gather load, vector + imm, 32-bit element size 2129def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_IMM_REAL$", 2130 "^GLD(FF)?1W_IMM_REAL$")>; 2131 2132// Gather load, vector + imm, 64-bit element size 2133def : InstRW<[N2Write_9cyc_2L_2V], (instregex "^GLD(FF)?1S?[BHW]_D_IMM_REAL$", 2134 "^GLD(FF)?1D_IMM_REAL$")>; 2135 2136// Gather load, 64-bit element size 2137def : InstRW<[N2Write_9cyc_2L_2V], 2138 (instregex "^GLD(FF)?1S?[BHW]_D_[SU]XTW_(SCALED_)?REAL$", 2139 "^GLD(FF)?1S?[BHW]_D_(SCALED_)?REAL$", 2140 "^GLD(FF)?1D_[SU]XTW_(SCALED_)?REAL$", 2141 "^GLD(FF)?1D_(SCALED_)?REAL$")>; 2142 2143// Gather load, 32-bit scaled offset 2144def : InstRW<[N2Write_10cyc_2L_2V], 2145 (instregex "^GLD(FF)?1S?[HW]_S_[SU]XTW_SCALED_REAL$", 2146 "^GLD(FF)?1W_[SU]XTW_SCALED_REAL")>; 2147 2148// Gather load, 32-bit unpacked unscaled offset 2149def : InstRW<[N2Write_9cyc_1L_1V], (instregex "^GLD(FF)?1S?[BH]_S_[SU]XTW_REAL$", 2150 "^GLD(FF)?1W_[SU]XTW_REAL$")>; 2151 2152// SVE Store instructions 2153// ----------------------------------------------------------------------------- 2154 2155// Store from predicate reg 2156def : InstRW<[N2Write_1cyc_1L01], (instrs STR_PXI)>; 2157 2158// Store from vector reg 2159def : InstRW<[N2Write_2cyc_1L01_1V], (instrs STR_ZXI)>; 2160 2161// Contiguous store, scalar + imm 2162def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BHWD]_IMM$", 2163 "^ST1B_[HSD]_IMM$", 2164 "^ST1H_[SD]_IMM$", 2165 "^ST1W_D_IMM$")>; 2166 2167// Contiguous store, scalar + scalar 2168def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instregex "^ST1H(_[SD])?$")>; 2169def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^ST1[BWD]$", 2170 "^ST1B_[HSD]$", 2171 "^ST1W_D$")>; 2172 2173// Contiguous store two structures from two vectors, scalar + imm 2174def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BHWD]_IMM$")>; 2175 2176// Contiguous store two structures from two vectors, scalar + scalar 2177def : InstRW<[N2Write_4cyc_1L01_1S_1V], (instrs ST2H)>; 2178 2179// Contiguous store two structures from two vectors, scalar + scalar 2180def : InstRW<[N2Write_4cyc_1L01_1V], (instregex "^ST2[BWD]$")>; 2181 2182// Contiguous store three structures from three vectors, scalar + imm 2183def : InstRW<[N2Write_7cyc_5L01_5V], (instregex "^ST3[BHWD]_IMM$")>; 2184 2185// Contiguous store three structures from three vectors, scalar + scalar 2186def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instrs ST3H)>; 2187 2188// Contiguous store three structures from three vectors, scalar + scalar 2189def : InstRW<[N2Write_7cyc_5L01_5S_5V], (instregex "^ST3[BWD]$")>; 2190 2191// Contiguous store four structures from four vectors, scalar + imm 2192def : InstRW<[N2Write_11cyc_9L01_9V], (instregex "^ST4[BHWD]_IMM$")>; 2193 2194// Contiguous store four structures from four vectors, scalar + scalar 2195def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instrs ST4H)>; 2196 2197// Contiguous store four structures from four vectors, scalar + scalar 2198def : InstRW<[N2Write_11cyc_9L01_9S_9V], (instregex "^ST4[BWD]$")>; 2199 2200// Non temporal store, scalar + imm 2201def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZRI$")>; 2202 2203// Non temporal store, scalar + scalar 2204def : InstRW<[N2Write_2cyc_1L01_1S_1V], (instrs STNT1H_ZRR)>; 2205def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BWD]_ZRR$")>; 2206 2207// Scatter non temporal store, vector + scalar 32-bit element size 2208def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^STNT1[BHW]_ZZR_S")>; 2209 2210// Scatter non temporal store, vector + scalar 64-bit element size 2211def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^STNT1[BHWD]_ZZR_D")>; 2212 2213// Scatter store vector + imm 32-bit element size 2214def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_IMM$", 2215 "^SST1W_IMM$")>; 2216 2217// Scatter store vector + imm 64-bit element size 2218def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_IMM$", 2219 "^SST1D_IMM$")>; 2220 2221// Scatter store, 32-bit scaled offset 2222def : InstRW<[N2Write_4cyc_2L01_2V], 2223 (instregex "^SST1(H_S|W)_[SU]XTW_SCALED$")>; 2224 2225// Scatter store, 32-bit unpacked unscaled offset 2226def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D_[SU]XTW$", 2227 "^SST1D_[SU]XTW$")>; 2228 2229// Scatter store, 32-bit unpacked scaled offset 2230def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_[SU]XTW_SCALED$", 2231 "^SST1D_[SU]XTW_SCALED$")>; 2232 2233// Scatter store, 32-bit unscaled offset 2234def : InstRW<[N2Write_4cyc_2L01_2V], (instregex "^SST1[BH]_S_[SU]XTW$", 2235 "^SST1W_[SU]XTW$")>; 2236 2237// Scatter store, 64-bit scaled offset 2238def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[HW]_D_SCALED$", 2239 "^SST1D_SCALED$")>; 2240 2241// Scatter store, 64-bit unscaled offset 2242def : InstRW<[N2Write_2cyc_1L01_1V], (instregex "^SST1[BHW]_D$", 2243 "^SST1D$")>; 2244 2245// SVE Miscellaneous instructions 2246// ----------------------------------------------------------------------------- 2247 2248// Read first fault register, unpredicated 2249def : InstRW<[N2Write_2cyc_1M0], (instrs RDFFR_P_REAL)>; 2250 2251// Read first fault register, predicated 2252def : InstRW<[N2Write_3cyc_1M0_1M], (instrs RDFFR_PPz_REAL)>; 2253 2254// Read first fault register and set flags 2255def : InstRW<[N2Write_4cyc_2M0_2M], (instrs RDFFRS_PPz)>; 2256 2257// Set first fault register 2258// Write to first fault register 2259def : InstRW<[N2Write_2cyc_1M0], (instrs SETFFR, WRFFR)>; 2260 2261// Prefetch 2262def : InstRW<[N2Write_4cyc_1L], (instregex "^PRF[BHWD]")>; 2263 2264// SVE Cryptographic instructions 2265// ----------------------------------------------------------------------------- 2266 2267// Crypto AES ops 2268def : InstRW<[N2Write_2cyc_1V], (instregex "^AES[DE]_ZZZ_B$", 2269 "^AESI?MC_ZZ_B$")>; 2270 2271// Crypto SHA3 ops 2272def : InstRW<[N2Write_2cyc_1V0], (instregex "^(BCAX|EOR3)_ZZZZ$", 2273 "^RAX1_ZZZ_D$", 2274 "^XAR_ZZZI_[BHSD]$")>; 2275 2276// Crypto SM4 ops 2277def : InstRW<[N2Write_4cyc_1V0], (instregex "^SM4E(KEY)?_ZZZ_S$")>; 2278 2279} 2280