1//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for the Ampere Computing Ampere-1 to 10// support instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14// The Ampere-1 core is an out-of-order micro-architecture. The front 15// end has branch prediction, with a 10-cycle recovery time from a 16// mispredicted branch. Instructions coming out of the front end are 17// decoded into internal micro-ops (uops). 18 19def Ampere1Model : SchedMachineModel { 20 let IssueWidth = 4; // 4-way decode and dispatch 21 let MicroOpBufferSize = 174; // micro-op re-order buffer size 22 let LoadLatency = 4; // Optimistic load latency 23 let MispredictPenalty = 10; // Branch mispredict penalty 24 let LoopMicroOpBufferSize = 32; // Instruction queue size 25 let CompleteModel = 1; 26 27 list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 28 SMEUnsupported.F); 29} 30 31let SchedModel = Ampere1Model in { 32 33//===----------------------------------------------------------------------===// 34// Define each kind of processor resource and number available on Ampere-1. 35// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP, 36// and 2 memory) issue into. The integer and FP schedulers can each issue 37// one uop per cycle, while the memory schedulers can each issue one load 38// and one store address calculation per cycle. 39 40def Ampere1UnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w 41def Ampere1UnitB : ProcResource<2>; // integer single-cycle, and complex shifts 42def Ampere1UnitBS : ProcResource<1>; // integer multi-cycle 43def Ampere1UnitL : ProcResource<2>; // load 44def Ampere1UnitS : ProcResource<2>; // store address calculation 45def Ampere1UnitX : ProcResource<1>; // FP and vector operations, and flag write 46def Ampere1UnitY : ProcResource<1>; // FP and vector operations, and crypto 47def Ampere1UnitZ : ProcResource<1>; // FP store data and FP-to-integer moves 48 49def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>; 50def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>; 51 52//===----------------------------------------------------------------------===// 53// Define customized scheduler read/write types specific to the Ampere-1. 54 55def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> { 56 let Latency = 1; 57 let NumMicroOps = 1; 58} 59 60def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> { 61 let Latency = 1; 62 let NumMicroOps = 2; 63} 64 65def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> { 66 let Latency = 1; 67 let NumMicroOps = 1; 68} 69 70def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> { 71 let Latency = 1; 72 let NumMicroOps = 1; 73} 74 75def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> { 76 let Latency = 1; 77 let NumMicroOps = 1; 78} 79 80def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> { 81 let Latency = 1; 82 let NumMicroOps = 1; 83} 84 85def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> { 86 let Latency = 1; 87 let NumMicroOps = 2; 88} 89 90def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { 91 let Latency = 2; 92 let NumMicroOps = 1; 93} 94 95def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> { 96 let Latency = 2; 97 let NumMicroOps = 2; 98} 99 100def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> { 101 let Latency = 2; 102 let NumMicroOps = 2; 103} 104 105def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> { 106 let Latency = 2; 107 let NumMicroOps = 2; 108} 109 110def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> { 111 let Latency = 2; 112 let NumMicroOps = 2; 113} 114 115def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { 116 let Latency = 2; 117 let NumMicroOps = 2; 118} 119 120def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, 121 Ampere1UnitS]> { 122 let Latency = 2; 123 let NumMicroOps = 3; 124} 125 126def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, 127 Ampere1UnitZ]> { 128 let Latency = 2; 129 let NumMicroOps = 3; 130} 131 132def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> { 133 let Latency = 2; 134 let NumMicroOps = 2; 135} 136 137def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 138 let Latency = 2; 139 let NumMicroOps = 1; 140} 141 142def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> { 143 let Latency = 2; 144 let NumMicroOps = 2; 145} 146 147def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 148 let Latency = 3; 149 let NumMicroOps = 1; 150} 151 152def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 153 let Latency = 3; 154 let NumMicroOps = 1; 155} 156 157def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS, 158 Ampere1UnitAB]> { 159 let Latency = 2; 160 let NumMicroOps = 3; 161} 162 163def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> { 164 let Latency = 2; 165 let NumMicroOps = 3; 166} 167 168def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, 169 Ampere1UnitZ, Ampere1UnitZ]> { 170 let Latency = 2; 171 let NumMicroOps = 4; 172} 173 174def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 175 let Latency = 4; 176 let NumMicroOps = 1; 177} 178 179def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> { 180 let Latency = 4; 181 let NumMicroOps = 1; 182} 183 184def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> { 185 let Latency = 4; 186 let NumMicroOps = 1; 187} 188 189def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { 190 let Latency = 4; 191 let NumMicroOps = 1; 192} 193 194def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> { 195 let Latency = 4; 196 let NumMicroOps = 1; 197} 198 199def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { 200 let Latency = 4; 201 let NumMicroOps = 2; 202} 203 204def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 205 let Latency = 4; 206 let NumMicroOps = 1; 207} 208 209def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 210 let Latency = 4; 211 let NumMicroOps = 2; 212} 213 214def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> { 215 let Latency = 4; 216 let NumMicroOps = 3; 217} 218 219def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, 220 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { 221 let Latency = 4; 222 let NumMicroOps = 6; 223} 224 225def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { 226 let Latency = 5; 227 let NumMicroOps = 2; 228} 229 230def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 231 let Latency = 5; 232 let NumMicroOps = 1; 233} 234 235def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> { 236 let Latency = 5; 237 let NumMicroOps = 1; 238} 239 240def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> { 241 let Latency = 5; 242 let NumMicroOps = 1; 243} 244 245def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { 246 let Latency = 5; 247 let NumMicroOps = 2; 248} 249 250def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> { 251 let Latency = 5; 252 let NumMicroOps = 2; 253} 254 255def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 256 let Latency = 5; 257 let NumMicroOps = 1; 258} 259 260def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 261 let Latency = 5; 262 let NumMicroOps = 2; 263} 264 265def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, 266 Ampere1UnitS, Ampere1UnitS, 267 Ampere1UnitZ, Ampere1UnitZ, 268 Ampere1UnitZ, Ampere1UnitZ]> { 269 let Latency = 5; 270 let NumMicroOps = 8; 271} 272 273def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 274 Ampere1UnitS, Ampere1UnitS, 275 Ampere1UnitZ, Ampere1UnitZ]> { 276 let Latency = 5; 277 let NumMicroOps = 6; 278} 279 280def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 281 Ampere1UnitS, Ampere1UnitS, 282 Ampere1UnitZ, Ampere1UnitZ]> { 283 let Latency = 6; 284 let NumMicroOps = 6; 285} 286 287def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY, 288 Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, 289 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { 290 let Latency = 6; 291 let NumMicroOps = 9; 292} 293 294def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { 295 let Latency = 6; 296 let NumMicroOps = 2; 297} 298 299def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 300 let Latency = 6; 301 let NumMicroOps = 1; 302} 303 304def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 305 let Latency = 6; 306 let NumMicroOps = 2; 307} 308 309def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 310 let Latency = 6; 311 let NumMicroOps = 3; 312} 313 314def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> { 315 let Latency = 6; 316 let NumMicroOps = 3; 317} 318 319def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 320 Ampere1UnitL, Ampere1UnitL]> { 321 let Latency = 6; 322 let NumMicroOps = 4; 323} 324 325def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { 326 let Latency = 6; 327 let NumMicroOps = 2; 328} 329 330def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 331 let Latency = 7; 332 let NumMicroOps = 1; 333} 334 335def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> { 336 let Latency = 7; 337 let NumMicroOps = 2; 338} 339 340def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> { 341 let Latency = 7; 342 let NumMicroOps = 2; 343} 344 345def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 346 Ampere1UnitXY, Ampere1UnitXY]> { 347 let Latency = 7; 348 let NumMicroOps = 4; 349} 350 351def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 352 let Latency = 7; 353 let NumMicroOps = 2; 354} 355 356def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 357 Ampere1UnitXY, Ampere1UnitXY, 358 Ampere1UnitS, Ampere1UnitS, 359 Ampere1UnitS, Ampere1UnitS, 360 Ampere1UnitZ, Ampere1UnitZ, 361 Ampere1UnitZ, Ampere1UnitZ]> { 362 let Latency = 7; 363 let NumMicroOps = 12; 364} 365 366def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> { 367 let Latency = 8; 368 let NumMicroOps = 2; 369} 370 371def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA, 372 Ampere1UnitA]> { 373 let Latency = 8; 374 let NumMicroOps = 3; 375} 376 377def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 378 let Latency = 8; 379 let NumMicroOps = 2; 380} 381 382def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 383 Ampere1UnitXY, Ampere1UnitXY]> { 384 let Latency = 8; 385 let NumMicroOps = 4; 386} 387 388def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, 389 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 390 let Latency = 8; 391 let NumMicroOps = 6; 392} 393 394def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 395 Ampere1UnitL, Ampere1UnitL, 396 Ampere1UnitXY, Ampere1UnitXY, 397 Ampere1UnitXY, Ampere1UnitXY]> { 398 let Latency = 8; 399 let NumMicroOps = 8; 400} 401 402def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, 403 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 404 let Latency = 9; 405 let NumMicroOps = 6; 406} 407 408def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 409 Ampere1UnitL, Ampere1UnitL, 410 Ampere1UnitXY, Ampere1UnitXY, 411 Ampere1UnitXY, Ampere1UnitXY]> { 412 let Latency = 9; 413 let NumMicroOps = 8; 414} 415 416def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 417 let Latency = 9; 418 let NumMicroOps = 3; 419} 420 421def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 422 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 423 let Latency = 9; 424 let NumMicroOps = 5; 425} 426 427def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 428 Ampere1UnitXY, Ampere1UnitXY, 429 Ampere1UnitXY, Ampere1UnitXY, 430 Ampere1UnitS, Ampere1UnitS, 431 Ampere1UnitS, Ampere1UnitS, 432 Ampere1UnitZ, Ampere1UnitZ, 433 Ampere1UnitZ, Ampere1UnitZ]> { 434 let Latency = 9; 435 let NumMicroOps = 14; 436} 437 438def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 439 Ampere1UnitXY, Ampere1UnitXY, 440 Ampere1UnitXY, Ampere1UnitXY, 441 Ampere1UnitXY, Ampere1UnitXY, 442 Ampere1UnitS, Ampere1UnitS, 443 Ampere1UnitS, Ampere1UnitS, 444 Ampere1UnitZ, Ampere1UnitZ, 445 Ampere1UnitZ, Ampere1UnitZ]> { 446 let Latency = 9; 447 let NumMicroOps = 16; 448} 449 450def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 451 let Latency = 10; 452 let NumMicroOps = 2; 453} 454 455def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { 456 let Latency = 10; 457 let NumMicroOps = 2; 458} 459 460def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> { 461 let Latency = 10; 462 let NumMicroOps = 2; 463} 464 465def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, 466 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 467 let Latency = 10; 468 let NumMicroOps = 6; 469} 470 471def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { 472 let Latency = 10; 473 let NumMicroOps = 3; 474} 475 476def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { 477 let Latency = 10; 478 let NumMicroOps = 3; 479} 480 481def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> { 482 let Latency = 11; 483 let NumMicroOps = 2; 484} 485 486def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { 487 let Latency = 11; 488 let NumMicroOps = 3; 489} 490 491def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { 492 let Latency = 11; 493 let NumMicroOps = 3; 494} 495 496def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 497 Ampere1UnitL, Ampere1UnitL, 498 Ampere1UnitXY, Ampere1UnitXY, 499 Ampere1UnitXY, Ampere1UnitXY, 500 Ampere1UnitXY, Ampere1UnitXY, 501 Ampere1UnitXY, Ampere1UnitXY]> { 502 let Latency = 11; 503 let NumMicroOps = 12; 504} 505 506def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 507 Ampere1UnitL, Ampere1UnitL, 508 Ampere1UnitXY, Ampere1UnitXY, 509 Ampere1UnitXY, Ampere1UnitXY, 510 Ampere1UnitXY, Ampere1UnitXY, 511 Ampere1UnitXY, Ampere1UnitXY]> { 512 let Latency = 12; 513 let NumMicroOps = 12; 514} 515 516def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 517 let Latency = 12; 518 let NumMicroOps = 3; 519} 520 521def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 522 Ampere1UnitXY, Ampere1UnitXY]> { 523 let Latency = 12; 524 let NumMicroOps = 4; 525} 526 527def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 528 let Latency = 18; 529 let NumMicroOps = 1; 530} 531 532def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 533 let Latency = 19; 534 let NumMicroOps = 1; 535} 536 537def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 538 let Latency = 25; 539 let NumMicroOps = 1; 540} 541 542def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 543 let Latency = 32; 544 let NumMicroOps = 1; 545} 546 547def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 548 let Latency = 34; 549 let NumMicroOps = 1; 550} 551 552def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 553 let Latency = 34; 554 let NumMicroOps = 1; 555} 556 557def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 558 let Latency = 39; 559 let NumMicroOps = 1; 560} 561 562def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 563 let Latency = 62; 564 let NumMicroOps = 1; 565} 566 567// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), 568// which are a single uop, and for extended registers, which have full flexibility 569// across Unit A or B for both uops. 570def Ampere1Write_Arith : SchedWriteVariant<[ 571 SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>, 572 SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1AB]>, 573 SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1AB]>]>; 574 575def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[ 576 SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>, 577 SchedVar<AmpereCheapLSL, [Ampere1Write_1cyc_1A]>, 578 SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1A]>]>; 579 580//===----------------------------------------------------------------------===// 581// Map the target-defined scheduler read/write resources and latencies for Ampere-1. 582// This provides a coarse model, which is then specialised below. 583 584def : WriteRes<WriteImm, [Ampere1UnitAB]>; // MOVN, MOVZ 585def : WriteRes<WriteI, [Ampere1UnitAB]>; // ALU 586def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> { 587 let Latency = 2; 588 let NumMicroOps = 2; 589} // ALU of Shifted-Reg 590def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> { 591 let Latency = 2; 592 let NumMicroOps = 2; 593} // ALU of Extended-Reg 594def : WriteRes<WriteExtr, [Ampere1UnitB]>; // EXTR shifts a reg pair 595def : WriteRes<WriteIS, [Ampere1UnitB]>; // Shift/Scale 596def : WriteRes<WriteID32, [Ampere1UnitBS]> { 597 let Latency = 18; 598} // 32-bit Divide 599def : WriteRes<WriteID64, [Ampere1UnitBS]> { 600 let Latency = 34; 601} // 64-bit Divide 602def : WriteRes<WriteIM32, [Ampere1UnitBS]> { 603 let Latency = 3; 604} // 32-bit Multiply 605def : WriteRes<WriteIM64, [Ampere1UnitBS]> { 606 let Latency = 3; 607} // 32-bit Multiply 608def : WriteRes<WriteBr, [Ampere1UnitA]>; 609def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>; 610def : WriteRes<WriteLD, [Ampere1UnitL]> { 611 let Latency = 4; 612} // Load from base addr plus immediate offset 613def : WriteRes<WriteST, [Ampere1UnitS]> { 614 let Latency = 1; 615} // Store to base addr plus immediate offset 616def : WriteRes<WriteSTP, [Ampere1UnitS, Ampere1UnitS]> { 617 let Latency = 1; 618 let NumMicroOps = 2; 619} // Store a register pair. 620def : WriteRes<WriteAdr, [Ampere1UnitAB]>; 621def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> { 622 let Latency = 5; 623 let NumMicroOps = 2; 624} // Load from a register index (maybe scaled). 625def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> { 626 let Latency = 1; 627 let NumMicroOps = 2; 628} // Store to a register index (maybe scaled). 629def : WriteRes<WriteF, [Ampere1UnitXY]> { 630 let Latency = 2; 631} // General floating-point ops. 632def : WriteRes<WriteFCmp, [Ampere1UnitX]> { 633 let Latency = 5; 634} // Floating-point compare. 635def : WriteRes<WriteFCvt, [Ampere1UnitXY]> { 636 let Latency = 6; 637} // Float conversion. 638def : WriteRes<WriteFCopy, [Ampere1UnitXY]> { 639} // Float-int register copy. 640def : WriteRes<WriteFImm, [Ampere1UnitXY]> { 641 let Latency = 2; 642} // Float-int register copy. 643def : WriteRes<WriteFMul, [Ampere1UnitXY]> { 644 let Latency = 5; 645} // Floating-point multiply. 646def : WriteRes<WriteFDiv, [Ampere1UnitXY]> { 647 let Latency = 34; 648} // Floating-point division. 649def : WriteRes<WriteVd, [Ampere1UnitXY]> { 650 let Latency = 3; 651} // 64bit Vector D ops. 652def : WriteRes<WriteVq, [Ampere1UnitXY]> { 653 let Latency = 3; 654} // 128bit Vector Q ops. 655def : WriteRes<WriteVLD, [Ampere1UnitL, Ampere1UnitL]> { 656 let Latency = 5; 657} // Vector loads. 658def : WriteRes<WriteVST, [Ampere1UnitS, Ampere1UnitZ]> { 659 let Latency = 2; 660} // Vector stores. 661 662def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 663 664def : WriteRes<WriteSys, []> { let Latency = 1; } 665def : WriteRes<WriteBarrier, []> { let Latency = 1; } 666def : WriteRes<WriteHint, []> { let Latency = 1; } 667 668def : WriteRes<WriteLDHi, []> { 669 let Latency = 4; 670} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP 671 672// Forwarding logic. 673def : ReadAdvance<ReadI, 0>; 674def : ReadAdvance<ReadISReg, 0>; 675def : ReadAdvance<ReadIEReg, 0>; 676def : ReadAdvance<ReadIM, 0>; 677def : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>; 678def : ReadAdvance<ReadID, 0>; 679def : ReadAdvance<ReadExtrHi, 0>; 680def : ReadAdvance<ReadST, 0>; 681def : ReadAdvance<ReadAdrBase, 0>; 682def : ReadAdvance<ReadVLD, 0>; 683 684//===----------------------------------------------------------------------===// 685// Specialising the scheduling model further for Ampere-1. 686 687def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>; 688 689// Branch instructions 690def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>; 691def : InstRW<[Ampere1Write_1cyc_1A], 692 (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; 693def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>; 694 695// Cryptography instructions 696// -- AES encryption/decryption 697def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>; 698def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>; 699// -- Polynomial multiplication 700def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; 701// -- SHA-256 hash 702def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>; 703// -- SHA-256 schedule update 704def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>; 705// -- SHA-3 instructions 706def : InstRW<[Ampere1Write_2cyc_1XY], 707 (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; 708// -- SHA-512 hash 709def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>; 710// -- SHA-512 schedule update 711def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>; 712// -- SHA1 choose/majority/parity 713def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>; 714// -- SHA1 hash/schedule update 715def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>; 716def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>; 717 718// FP and vector load instructions 719// -- Load 1-element structure to one/all lanes 720// ---- all lanes 721def : InstRW<[Ampere1Write_7cyc_1L_1XY], 722 (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; 723// ---- one lane 724def : InstRW<[Ampere1Write_7cyc_1L_1XY], 725 (instregex "^LD1i(8|16|32|64)")>; 726// -- Load 1-element structure to one/all lanes, 1D size 727def : InstRW<[Ampere1Write_5cyc_1L], 728 (instregex "^LD1Rv1d")>; 729// -- Load 1-element structures to 1 register 730def : InstRW<[Ampere1Write_5cyc_1L], 731 (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 732// -- Load 1-element structures to 2 registers 733def : InstRW<[Ampere1Write_5cyc_2L], 734 (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 735// -- Load 1-element structures to 3 registers 736def : InstRW<[Ampere1Write_6cyc_3L], 737 (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 738// -- Load 1-element structures to 4 registers 739def : InstRW<[Ampere1Write_6cyc_4L], 740 (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 741// -- Load 2-element structure to all lanes of 2 registers, 1D size 742def : InstRW<[Ampere1Write_5cyc_2L], 743 (instregex "^LD2Rv1d")>; 744// -- Load 2-element structure to all lanes of 2 registers, other sizes 745def : InstRW<[Ampere1Write_7cyc_2L_2XY], 746 (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; 747// -- Load 2-element structure to one lane of 2 registers 748def : InstRW<[Ampere1Write_7cyc_2L_2XY], 749 (instregex "^LD2i(8|16|32|64)")>; 750// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size 751def : InstRW<[Ampere1Write_7cyc_2L_2XY], 752 (instregex "^LD2Twov(16b|8h|4s|2d)")>; 753// -- Load 2-element structures to 2 registers, 8B/4H/2S size 754def : InstRW<[Ampere1Write_9cyc_2L_3XY], 755 (instregex "^LD2Twov(8b|4h|2s)")>; 756// -- Load 3-element structure to all lanes of 3 registers, 1D size 757def : InstRW<[Ampere1Write_6cyc_3L], 758 (instregex "^LD3Rv1d")>; 759// -- Load 3-element structure to all lanes of 3 registers, other sizes 760def : InstRW<[Ampere1Write_8cyc_3L_3XY], 761 (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; 762// -- Load 3-element structure to one lane of 3 registers 763def : InstRW<[Ampere1Write_8cyc_3L_3XY], 764 (instregex "^LD3i(8|16|32|64)")>; 765// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes 766def : InstRW<[Ampere1Write_9cyc_3L_3XY], 767 (instregex "^LD3Threev(16b|8h|4s)")>; 768// -- Load 3-element structures to 3 registers, 2D size 769def : InstRW<[Ampere1Write_8cyc_3L_3XY], 770 (instregex "^LD3Threev2d")>; 771// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes 772def : InstRW<[Ampere1Write_10cyc_3L_3XY], 773 (instregex "^LD3Threev(8b|4h|2s)")>; 774// -- Load 4-element structure to all lanes of 4 registers, 1D size 775def : InstRW<[Ampere1Write_6cyc_4L], 776 (instregex "^LD4Rv1d")>; 777// -- Load 4-element structure to all lanes of 4 registers, other sizes 778def : InstRW<[Ampere1Write_8cyc_4L_4XY], 779 (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; 780// -- Load 4-element structure to one lane of 4 registers 781def : InstRW<[Ampere1Write_6cyc_4L], 782 (instregex "^LD4i(8|16|32|64)")>; 783// -- Load 4-element structures to 4 registers, 2D size 784def : InstRW<[Ampere1Write_9cyc_4L_4XY], 785 (instregex "^LD4Fourv2d")>; 786// -- Load 4-element structures to 4 registers, 2S size 787def : InstRW<[Ampere1Write_12cyc_4L_8XY], 788 (instregex "^LD4Fourv2s")>; 789// -- Load 4-element structures to 4 registers, other sizes 790def : InstRW<[Ampere1Write_11cyc_4L_8XY], 791 (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; 792// -- Load pair, Q-form 793def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>; 794// -- Load pair, S/D-form 795def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; 796// -- Load register 797def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>; 798// -- Load register, sign-extended register 799def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; 800 801// FP and vector store instructions 802// -- Store 1-element structure from one lane of 1 register 803def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z], 804 (instregex "^ST1i(8|16|32|64)")>; 805// -- Store 1-element structures from 1 register 806def : InstRW<[Ampere1Write_2cyc_1S_1Z], 807 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 808// -- Store 1-element structures from 2 registers 809def : InstRW<[Ampere1Write_3cyc_2S_2Z], 810 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 811// -- Store 1-element structures from 3 registers 812def : InstRW<[Ampere1Write_4cyc_3S_3Z], 813 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 814// -- Store 1-element structures from 4 registers 815def : InstRW<[Ampere1Write_5cyc_4S_4Z], 816 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 817// -- Store 2-element structure from one lane of 2 registers 818def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], 819 (instregex "^ST2i(8|16|32|64)")>; 820// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes 821def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], 822 (instregex "^ST2Twov(16b|8h|4s|2d)")>; 823// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes 824def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z], 825 (instregex "^ST2Twov(8b|4h|2s)")>; 826// -- Store 3-element structure from one lane of 3 registers 827def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], 828 (instregex "^ST3i(8|16|32|64)")>; 829// -- Store 3-element structures from 3 registers 830def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], 831 (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 832// -- Store 4-element structure from one lane of 4 registers 833def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], 834 (instregex "^ST4i(8|16|32|64)")>; 835// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes 836def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z], 837 (instregex "^ST4Fourv(16b|8h|4s)")>; 838// -- Store 4-element structures from 4 registers, 2D sizes 839def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], 840 (instregex "^ST4Fourv2d")>; 841// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes 842def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z], 843 (instregex "^ST4Fourv(8b|4h|2s)")>; 844// -- Store pair, Q-form 845def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>; 846// -- Store pair, S/D-form 847def : InstRW<[Ampere1Write_3cyc_1S_2Z], (instregex "^STN?P[SD]")>; 848// -- Store register 849def : InstRW<[Ampere1Write_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; 850// -- Store register, sign-extended register offset 851def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; 852 853// FP data processing, bfloat16 format 854def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>; 855def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>; 856def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; 857def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>; 858def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>; 859 860// FP data processing, scalar/vector, half precision 861def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; 862def : InstRW<[Ampere1Write_4cyc_1XY], 863 (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; 864def : InstRW<[Ampere1Write_4cyc_1XY], 865 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; 866def : InstRW<[Ampere1Write_4cyc_1XY], 867 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; 868def : InstRW<[Ampere1Write_4cyc_1X], 869 (instregex "^FCMPE?H")>; 870def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X], 871 (instregex "^FCCMPE?H")>; 872def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY], 873 (instregex "^FCSELH")>; 874def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; 875def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>; 876def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>; 877def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; 878def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; 879def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; 880def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; 881def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>; 882def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; 883def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; 884def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>; 885def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; 886def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; 887def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>; 888 889// FP data processing, scalar/vector, single/double precision 890def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; 891def : InstRW<[Ampere1Write_5cyc_1XY], 892 (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; 893def : InstRW<[Ampere1Write_5cyc_1XY], 894 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; 895def : InstRW<[Ampere1Write_5cyc_1XY], 896 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; 897def : InstRW<[Ampere1Write_5cyc_1X], 898 (instregex "^FCMPE?(S|D)")>; 899def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X], 900 (instregex "^FCCMPE?(S|D)")>; 901def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY], 902 (instregex "^FCSEL(S|D)")>; 903def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; 904def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>; 905def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>; 906def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>; 907def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; 908def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; 909def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; 910def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>; 911def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; 912def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; 913def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; 914def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; 915def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; 916def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>; 917def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>; 918def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>; 919 920// FP miscellaneous instructions 921def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; 922def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>; 923def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; 924def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>; 925def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>; 926def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>; 927def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; 928def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; 929def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; 930def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>; 931def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; 932 933// Integer arithmetic and logical instructions 934def : InstRW<[Ampere1Write_1cyc_1A], 935 (instregex "ADC(W|X)r", "SBC(W|X)r")>; 936def : InstRW<[Ampere1Write_Arith], 937 (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r")>; 938def : InstRW<[Ampere1Write_ArithFlagsetting], 939 (instregex "(ADD|AND|BIC|SUB)S(W|X)r")>; 940def : InstRW<[Ampere1Write_1cyc_1A], 941 (instregex "(ADC|SBC)S(W|X)r")>; 942def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>; 943def : InstRW<[Ampere1Write_1cyc_1A], 944 (instregex "(CCMN|CCMP)(X|W)")>; 945def : InstRW<[Ampere1Write_1cyc_1A], 946 (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; 947def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>; 948def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>; 949def : InstRW<[Ampere1Write_3cyc_1BS], 950 (instregex "(S|U)MULHr")>; 951def : InstRW<[Ampere1Write_4cyc_1BS], 952 (instregex "(S|U)?M(ADD|SUB)L?r")>; 953 954// Integer load instructions 955def : InstRW<[Ampere1Write_4cyc_2L], 956 (instregex "(LDNP|LDP|LDPSW)(X|W)")>; 957def : InstRW<[Ampere1Write_4cyc_1L], 958 (instregex "LDR(B|D|H|Q|S)ui")>; 959def : InstRW<[Ampere1Write_4cyc_1L], 960 (instregex "LDR(D|Q|W|X)l")>; 961def : InstRW<[Ampere1Write_4cyc_1L], 962 (instregex "LDTR(B|H|W|X)i")>; 963def : InstRW<[Ampere1Write_4cyc_1L], 964 (instregex "LDTRS(BW|BX|HW|HX|W)i")>; 965def : InstRW<[Ampere1Write_4cyc_1L], 966 (instregex "LDUR(BB|HH|X|W)i")>; 967def : InstRW<[Ampere1Write_4cyc_1L], 968 (instregex "LDURS(BW|BX|HW|HX|W)i")>; 969def : InstRW<[Ampere1Write_5cyc_1AB_1L], 970 (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; 971def : InstRW<[Ampere1Write_1cyc_1L], 972 (instrs PRFMl, PRFUMi, PRFUMi)>; 973def : InstRW<[Ampere1Write_2cyc_1AB_1L], 974 (instrs PRFMroW, PRFMroX)>; 975 976// Integer miscellaneous instructions 977def : InstRW<[Ampere1Write_1cyc_1A], (instrs ADR, ADRP)>; 978def : InstRW<[Ampere1Write_1cyc_1B], (instregex "EXTR(W|X)")>; 979def : InstRW<[Ampere1Write_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; 980def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; 981def : InstRW<[Ampere1Write_1cyc_1B], (instregex "CLS(W|X)")>; 982def : InstRW<[Ampere1Write_1cyc_1A], (instrs SETF8, SETF16)>; 983def : InstRW<[Ampere1Write_1cyc_1AB], 984 (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 985def : InstRW<[Ampere1Write_1cyc_1B], 986 (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; 987def : InstRW<[Ampere1Write_1cyc_1B], 988 (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; 989 990// Integer store instructions 991def : InstRW<[Ampere1Write_1cyc_2S], (instregex "STNP(X|W)i")>; 992def : InstRW<[Ampere1Write_2cyc_1B_1S], 993 (instrs STPWi, STPXi)>; 994def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB], 995 (instregex "STP(W|X)(pre|post)")>; 996def : InstRW<[Ampere1Write_1cyc_1S], 997 (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; 998def : InstRW<[Ampere1Write_1cyc_1S], 999 (instregex "STUR(BB|HH|X|W)i", 1000 "STR(X|W)ui", 1001 "STUR(BB|HH|X|W)i")>; 1002def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>; 1003def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>; 1004 1005// Pointer authentication 1006//def : InstRW<[Ampere1Write_7cyc_1BS], 1007// (instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>; 1008def : InstRW<[Ampere1Write_8cyc_1BS_1A], 1009 (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; 1010def : InstRW<[Ampere1Write_8cyc_1BS_2A], 1011 (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; 1012//def : InstRW<[Ampere1Write_7cyc_1BS], 1013// (instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>; 1014def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>; 1015def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>; 1016 1017// Vector integer instructions 1018// -- absolute difference 1019def : InstRW<[Ampere1Write_3cyc_1XY], 1020 (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", 1021 "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; 1022// -- arithmetic 1023def : InstRW<[Ampere1Write_3cyc_1XY], 1024 (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", 1025 "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", 1026 "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; 1027// -- arithmetic, horizontal, 16B 1028def : InstRW<[Ampere1Write_12cyc_4XY], 1029 (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; 1030def : InstRW<[Ampere1Write_12cyc_4XY], 1031 (instregex "^[SU](MIN|MAX)Vv16i8v")>; 1032// -- arithmetic, horizontal, 4H/4S 1033def : InstRW<[Ampere1Write_6cyc_2XY], 1034 (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; 1035def : InstRW<[Ampere1Write_6cyc_2XY], 1036 (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; 1037// -- arithmetic, horizontal, 8B/8H 1038def : InstRW<[Ampere1Write_9cyc_3XY], 1039 (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; 1040def : InstRW<[Ampere1Write_9cyc_3XY], 1041 (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; 1042// -- arithmetic, narrowing 1043def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; 1044def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; 1045// -- arithmetic, pairwise 1046def : InstRW<[Ampere1Write_3cyc_1XY], 1047 (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; 1048// -- arithmetic, saturating 1049def : InstRW<[Ampere1Write_3cyc_1XY], 1050 (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; 1051// -- bit count 1052def : InstRW<[Ampere1Write_2cyc_1XY], 1053 (instregex "^(CLS|CLZ|CNT)v")>; 1054// -- compare 1055def : InstRW<[Ampere1Write_3cyc_1XY], 1056 (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", 1057 "^CMHIv", "^CMHSv")>; 1058// -- compare non-zero 1059def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>; 1060// -- dot product 1061def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; 1062// -- fp reciprocal estimate 1063def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>; 1064// -- integer reciprocal estimate 1065def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; 1066// -- logical 1067def : InstRW<[Ampere1Write_2cyc_1XY], 1068 (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1069// -- logical, narrowing 1070def : InstRW<[Ampere1Write_5cyc_2XY], 1071 (instregex "RSHRNv", 1072 "SHRNv", "SQSHRNv", "SQSHRUNv", 1073 "UQXTNv")>; 1074// -- matrix multiply 1075def : InstRW<[Ampere1Write_6cyc_2XY], 1076 (instrs SMMLA, UMMLA, USMMLA)>; 1077// -- max/min 1078def : InstRW<[Ampere1Write_3cyc_1XY], 1079 (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1080def : InstRW<[Ampere1Write_3cyc_1XY], 1081 (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1082// -- move immediate 1083def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; 1084// -- multiply 1085def : InstRW<[Ampere1Write_3cyc_1XY], 1086 (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; 1087// -- multiply accumulate 1088def : InstRW<[Ampere1Write_3cyc_1XY], 1089 (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; 1090// -- negation, saturating 1091def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; 1092// -- reverse bits/bytes 1093def : InstRW<[Ampere1Write_2cyc_1XY], 1094 (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; 1095// -- shift 1096def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1097// -- shift and accumulate 1098def : InstRW<[Ampere1Write_3cyc_1XY], 1099 (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; 1100// -- shift, saturating 1101def : InstRW<[Ampere1Write_3cyc_1XY], 1102 (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", 1103 "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", 1104 "^UQSHL")>; 1105 1106// Vector miscellaneous instructions 1107// -- duplicate element 1108def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>; 1109// -- duplicate from GPR 1110def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>; 1111// -- extract narrow 1112def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>; 1113// -- insert/extract element 1114def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; 1115// -- move FP immediate 1116def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>; 1117// -- move element to GPR 1118def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>; 1119// -- move from GPR to any element 1120def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; 1121// -- table lookup 1122def : InstRW<[Ampere1Write_2cyc_1XY], 1123 (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; 1124def : InstRW<[Ampere1Write_4cyc_2XY], 1125 (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; 1126def : InstRW<[Ampere1Write_6cyc_3XY], 1127 (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; 1128def : InstRW<[Ampere1Write_8cyc_4XY], 1129 (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; 1130// -- transpose 1131def : InstRW<[Ampere1Write_2cyc_1XY], 1132 (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; 1133// -- zip/unzip 1134def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; 1135 1136} // SchedModel = Ampere1Model 1137