1//=- AArch64SchedAmpere1.td - Ampere-1 scheduling def -----*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the machine model for the Ampere Computing Ampere-1 to 10// support instruction scheduling and other instruction cost heuristics. 11// 12//===----------------------------------------------------------------------===// 13 14// The Ampere-1 core is an out-of-order micro-architecture. The front 15// end has branch prediction, with a 10-cycle recovery time from a 16// mispredicted branch. Instructions coming out of the front end are 17// decoded into internal micro-ops (uops). 18 19def Ampere1Model : SchedMachineModel { 20 let IssueWidth = 4; // 4-way decode and dispatch 21 let MicroOpBufferSize = 192; // re-order buffer size 22 let LoadLatency = 4; // Optimistic load latency 23 let MispredictPenalty = 10; // Branch mispredict penalty 24 let LoopMicroOpBufferSize = 32; // Instruction queue size 25 let CompleteModel = 0; 26 27 list<Predicate> UnsupportedFeatures = !listconcat(SVEUnsupported.F, 28 SMEUnsupported.F, 29 PAUnsupported.F, 30 [HasMTE]); 31} 32 33let SchedModel = Ampere1Model in { 34 35//===----------------------------------------------------------------------===// 36// Define each kind of processor resource and number available on Ampere-1. 37// Ampere-1 has 12 pipelines that 8 independent scheduler (4 integer, 2 FP, 38// and 2 memory) issue into. The integer and FP schedulers can each issue 39// one uop per cycle, while the memory schedulers can each issue one load 40// and one store address calculation per cycle. 41 42def Ampere1UnitA : ProcResource<2>; // integer single-cycle, branch, and flags r/w 43def Ampere1UnitB : ProcResource<2>; // integer single-cycle, and complex shifts 44def Ampere1UnitBS : ProcResource<1>; // integer multi-cycle 45def Ampere1UnitL : ProcResource<2>; // load 46def Ampere1UnitS : ProcResource<2>; // store address calculation 47def Ampere1UnitX : ProcResource<1>; // FP and vector operations, and flag write 48def Ampere1UnitY : ProcResource<1>; // FP and vector operations, and crypto 49def Ampere1UnitZ : ProcResource<1>; // FP store data and FP-to-integer moves 50 51def Ampere1UnitAB : ProcResGroup<[Ampere1UnitA, Ampere1UnitB]>; 52def Ampere1UnitXY : ProcResGroup<[Ampere1UnitX, Ampere1UnitY]>; 53 54//===----------------------------------------------------------------------===// 55// Define customized scheduler read/write types specific to the Ampere-1. 56 57def Ampere1Write_1cyc_1A : SchedWriteRes<[Ampere1UnitA]> { 58 let Latency = 1; 59 let NumMicroOps = 1; 60} 61 62def Ampere1Write_1cyc_2A : SchedWriteRes<[Ampere1UnitA, Ampere1UnitA]> { 63 let Latency = 1; 64 let NumMicroOps = 2; 65} 66 67def Ampere1Write_1cyc_1B : SchedWriteRes<[Ampere1UnitB]> { 68 let Latency = 1; 69 let NumMicroOps = 1; 70} 71 72def Ampere1Write_1cyc_1AB : SchedWriteRes<[Ampere1UnitAB]> { 73 let Latency = 1; 74 let NumMicroOps = 1; 75} 76 77def Ampere1Write_1cyc_1L : SchedWriteRes<[Ampere1UnitL]> { 78 let Latency = 1; 79 let NumMicroOps = 1; 80} 81 82def Ampere1Write_1cyc_1S : SchedWriteRes<[Ampere1UnitS]> { 83 let Latency = 1; 84 let NumMicroOps = 1; 85} 86 87def Ampere1Write_1cyc_2S : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS]> { 88 let Latency = 1; 89 let NumMicroOps = 2; 90} 91 92def Ampere1Write_2cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { 93 let Latency = 2; 94 let NumMicroOps = 1; 95} 96 97def Ampere1Write_2cyc_2AB : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitAB]> { 98 let Latency = 2; 99 let NumMicroOps = 2; 100} 101 102def Ampere1Write_2cyc_1B_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitAB]> { 103 let Latency = 2; 104 let NumMicroOps = 2; 105} 106 107def Ampere1Write_2cyc_1B_1A : SchedWriteRes<[Ampere1UnitB, Ampere1UnitA]> { 108 let Latency = 2; 109 let NumMicroOps = 2; 110} 111 112def Ampere1Write_2cyc_1AB_1A : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitA]> { 113 let Latency = 2; 114 let NumMicroOps = 2; 115} 116 117def Ampere1Write_2cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { 118 let Latency = 2; 119 let NumMicroOps = 2; 120} 121 122def Ampere1Write_2cyc_1AB_2S : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, 123 Ampere1UnitS]> { 124 let Latency = 2; 125 let NumMicroOps = 3; 126} 127 128def Ampere1Write_2cyc_1AB_1S_1Z : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitS, 129 Ampere1UnitZ]> { 130 let Latency = 2; 131 let NumMicroOps = 3; 132} 133 134def Ampere1Write_2cyc_1B_1S : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS]> { 135 let Latency = 2; 136 let NumMicroOps = 2; 137} 138 139def Ampere1Write_2cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 140 let Latency = 2; 141 let NumMicroOps = 1; 142} 143 144def Ampere1Write_2cyc_1S_1Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ]> { 145 let Latency = 2; 146 let NumMicroOps = 2; 147} 148 149def Ampere1Write_3cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 150 let Latency = 3; 151 let NumMicroOps = 1; 152} 153 154def Ampere1Write_3cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 155 let Latency = 3; 156 let NumMicroOps = 1; 157} 158 159def Ampere1Write_3cyc_1B_1S_1AB : SchedWriteRes<[Ampere1UnitB, Ampere1UnitS, 160 Ampere1UnitAB]> { 161 let Latency = 2; 162 let NumMicroOps = 3; 163} 164 165def Ampere1Write_3cyc_1S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitZ, Ampere1UnitZ]> { 166 let Latency = 2; 167 let NumMicroOps = 3; 168} 169 170def Ampere1Write_3cyc_2S_2Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, 171 Ampere1UnitZ, Ampere1UnitZ]> { 172 let Latency = 2; 173 let NumMicroOps = 4; 174} 175 176def Ampere1Write_4cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 177 let Latency = 4; 178 let NumMicroOps = 1; 179} 180 181def Ampere1Write_4cyc_1L : SchedWriteRes<[Ampere1UnitL]> { 182 let Latency = 4; 183 let NumMicroOps = 1; 184} 185 186def Ampere1Write_4cyc_1X : SchedWriteRes<[Ampere1UnitX]> { 187 let Latency = 4; 188 let NumMicroOps = 1; 189} 190 191def Ampere1Write_4cyc_1Y : SchedWriteRes<[Ampere1UnitY]> { 192 let Latency = 4; 193 let NumMicroOps = 1; 194} 195 196def Ampere1Write_4cyc_1Z : SchedWriteRes<[Ampere1UnitZ]> { 197 let Latency = 4; 198 let NumMicroOps = 1; 199} 200 201def Ampere1Write_4cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { 202 let Latency = 4; 203 let NumMicroOps = 2; 204} 205 206def Ampere1Write_4cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 207 let Latency = 4; 208 let NumMicroOps = 1; 209} 210 211def Ampere1Write_4cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 212 let Latency = 4; 213 let NumMicroOps = 2; 214} 215 216def Ampere1Write_4cyc_1XY_1S_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitS, Ampere1UnitZ]> { 217 let Latency = 4; 218 let NumMicroOps = 3; 219} 220 221def Ampere1Write_4cyc_3S_3Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, 222 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { 223 let Latency = 4; 224 let NumMicroOps = 6; 225} 226 227def Ampere1Write_5cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { 228 let Latency = 5; 229 let NumMicroOps = 2; 230} 231 232def Ampere1Write_5cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 233 let Latency = 5; 234 let NumMicroOps = 1; 235} 236 237def Ampere1Write_5cyc_1X : SchedWriteRes<[Ampere1UnitX]> { 238 let Latency = 5; 239 let NumMicroOps = 1; 240} 241 242def Ampere1Write_5cyc_1L : SchedWriteRes<[Ampere1UnitL]> { 243 let Latency = 5; 244 let NumMicroOps = 1; 245} 246 247def Ampere1Write_5cyc_2L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL]> { 248 let Latency = 5; 249 let NumMicroOps = 2; 250} 251 252def Ampere1Write_5cyc_1L_1BS : SchedWriteRes<[Ampere1UnitL, Ampere1UnitBS]> { 253 let Latency = 5; 254 let NumMicroOps = 2; 255} 256 257def Ampere1Write_5cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 258 let Latency = 5; 259 let NumMicroOps = 1; 260} 261 262def Ampere1Write_5cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 263 let Latency = 5; 264 let NumMicroOps = 2; 265} 266 267def Ampere1Write_5cyc_4S_4Z : SchedWriteRes<[Ampere1UnitS, Ampere1UnitS, 268 Ampere1UnitS, Ampere1UnitS, 269 Ampere1UnitZ, Ampere1UnitZ, 270 Ampere1UnitZ, Ampere1UnitZ]> { 271 let Latency = 5; 272 let NumMicroOps = 8; 273} 274 275def Ampere1Write_5cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 276 Ampere1UnitS, Ampere1UnitS, 277 Ampere1UnitZ, Ampere1UnitZ]> { 278 let Latency = 5; 279 let NumMicroOps = 6; 280} 281 282def Ampere1Write_6cyc_2XY_2S_2Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 283 Ampere1UnitS, Ampere1UnitS, 284 Ampere1UnitZ, Ampere1UnitZ]> { 285 let Latency = 6; 286 let NumMicroOps = 6; 287} 288 289def Ampere1Write_6cyc_3XY_3S_3Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY, 290 Ampere1UnitS, Ampere1UnitS, Ampere1UnitS, 291 Ampere1UnitZ, Ampere1UnitZ, Ampere1UnitZ]> { 292 let Latency = 6; 293 let NumMicroOps = 9; 294} 295 296def Ampere1Write_6cyc_1AB_1L : SchedWriteRes<[Ampere1UnitAB, Ampere1UnitL]> { 297 let Latency = 6; 298 let NumMicroOps = 2; 299} 300 301def Ampere1Write_6cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 302 let Latency = 6; 303 let NumMicroOps = 1; 304} 305 306def Ampere1Write_6cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 307 let Latency = 6; 308 let NumMicroOps = 2; 309} 310 311def Ampere1Write_6cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 312 let Latency = 6; 313 let NumMicroOps = 3; 314} 315 316def Ampere1Write_6cyc_3L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL]> { 317 let Latency = 6; 318 let NumMicroOps = 3; 319} 320 321def Ampere1Write_6cyc_4L : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 322 Ampere1UnitL, Ampere1UnitL]> { 323 let Latency = 6; 324 let NumMicroOps = 4; 325} 326 327def Ampere1Write_6cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { 328 let Latency = 6; 329 let NumMicroOps = 2; 330} 331 332def Ampere1Write_7cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 333 let Latency = 7; 334 let NumMicroOps = 1; 335} 336 337def Ampere1Write_7cyc_1BS_1XY : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitXY]> { 338 let Latency = 7; 339 let NumMicroOps = 2; 340} 341 342def Ampere1Write_7cyc_1L_1XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitXY]> { 343 let Latency = 7; 344 let NumMicroOps = 2; 345} 346 347def Ampere1Write_7cyc_2L_2XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 348 Ampere1UnitXY, Ampere1UnitXY]> { 349 let Latency = 7; 350 let NumMicroOps = 4; 351} 352 353def Ampere1Write_7cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 354 let Latency = 7; 355 let NumMicroOps = 2; 356} 357 358def Ampere1Write_7cyc_4XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 359 Ampere1UnitXY, Ampere1UnitXY, 360 Ampere1UnitS, Ampere1UnitS, 361 Ampere1UnitS, Ampere1UnitS, 362 Ampere1UnitZ, Ampere1UnitZ, 363 Ampere1UnitZ, Ampere1UnitZ]> { 364 let Latency = 7; 365 let NumMicroOps = 12; 366} 367 368def Ampere1Write_8cyc_1BS_1A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA]> { 369 let Latency = 8; 370 let NumMicroOps = 2; 371} 372 373def Ampere1Write_8cyc_1BS_2A : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitA, 374 Ampere1UnitA]> { 375 let Latency = 8; 376 let NumMicroOps = 3; 377} 378 379def Ampere1Write_8cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 380 let Latency = 8; 381 let NumMicroOps = 2; 382} 383 384def Ampere1Write_8cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 385 Ampere1UnitXY, Ampere1UnitXY]> { 386 let Latency = 8; 387 let NumMicroOps = 4; 388} 389 390def Ampere1Write_8cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, 391 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 392 let Latency = 8; 393 let NumMicroOps = 6; 394} 395 396def Ampere1Write_8cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 397 Ampere1UnitL, Ampere1UnitL, 398 Ampere1UnitXY, Ampere1UnitXY, 399 Ampere1UnitXY, Ampere1UnitXY]> { 400 let Latency = 8; 401 let NumMicroOps = 8; 402} 403 404def Ampere1Write_9cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, 405 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 406 let Latency = 9; 407 let NumMicroOps = 6; 408} 409 410def Ampere1Write_9cyc_4L_4XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 411 Ampere1UnitL, Ampere1UnitL, 412 Ampere1UnitXY, Ampere1UnitXY, 413 Ampere1UnitXY, Ampere1UnitXY]> { 414 let Latency = 9; 415 let NumMicroOps = 8; 416} 417 418def Ampere1Write_9cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 419 let Latency = 9; 420 let NumMicroOps = 3; 421} 422 423def Ampere1Write_9cyc_2L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 424 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 425 let Latency = 9; 426 let NumMicroOps = 5; 427} 428 429def Ampere1Write_9cyc_6XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 430 Ampere1UnitXY, Ampere1UnitXY, 431 Ampere1UnitXY, Ampere1UnitXY, 432 Ampere1UnitS, Ampere1UnitS, 433 Ampere1UnitS, Ampere1UnitS, 434 Ampere1UnitZ, Ampere1UnitZ, 435 Ampere1UnitZ, Ampere1UnitZ]> { 436 let Latency = 9; 437 let NumMicroOps = 14; 438} 439 440def Ampere1Write_9cyc_8XY_4S_4Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 441 Ampere1UnitXY, Ampere1UnitXY, 442 Ampere1UnitXY, Ampere1UnitXY, 443 Ampere1UnitXY, Ampere1UnitXY, 444 Ampere1UnitS, Ampere1UnitS, 445 Ampere1UnitS, Ampere1UnitS, 446 Ampere1UnitZ, Ampere1UnitZ, 447 Ampere1UnitZ, Ampere1UnitZ]> { 448 let Latency = 9; 449 let NumMicroOps = 16; 450} 451 452def Ampere1Write_10cyc_2XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY]> { 453 let Latency = 10; 454 let NumMicroOps = 2; 455} 456 457def Ampere1Write_10cyc_1XY_1Z : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitZ]> { 458 let Latency = 10; 459 let NumMicroOps = 2; 460} 461 462def Ampere1Write_10cyc_1X_1Z : SchedWriteRes<[Ampere1UnitX, Ampere1UnitZ]> { 463 let Latency = 10; 464 let NumMicroOps = 2; 465} 466 467def Ampere1Write_10cyc_3L_3XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, Ampere1UnitL, 468 Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 469 let Latency = 10; 470 let NumMicroOps = 6; 471} 472 473def Ampere1Write_10cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { 474 let Latency = 10; 475 let NumMicroOps = 3; 476} 477 478def Ampere1Write_10cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { 479 let Latency = 10; 480 let NumMicroOps = 3; 481} 482 483def Ampere1Write_11cyc_1BS_1L : SchedWriteRes<[Ampere1UnitBS, Ampere1UnitL]> { 484 let Latency = 11; 485 let NumMicroOps = 2; 486} 487 488def Ampere1Write_11cyc_1A_1BS_1X : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitX]> { 489 let Latency = 11; 490 let NumMicroOps = 3; 491} 492 493def Ampere1Write_11cyc_1A_1BS_1XY : SchedWriteRes<[Ampere1UnitA, Ampere1UnitBS, Ampere1UnitXY]> { 494 let Latency = 11; 495 let NumMicroOps = 3; 496} 497 498def Ampere1Write_11cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 499 Ampere1UnitL, Ampere1UnitL, 500 Ampere1UnitXY, Ampere1UnitXY, 501 Ampere1UnitXY, Ampere1UnitXY, 502 Ampere1UnitXY, Ampere1UnitXY, 503 Ampere1UnitXY, Ampere1UnitXY]> { 504 let Latency = 11; 505 let NumMicroOps = 12; 506} 507 508def Ampere1Write_12cyc_4L_8XY : SchedWriteRes<[Ampere1UnitL, Ampere1UnitL, 509 Ampere1UnitL, Ampere1UnitL, 510 Ampere1UnitXY, Ampere1UnitXY, 511 Ampere1UnitXY, Ampere1UnitXY, 512 Ampere1UnitXY, Ampere1UnitXY, 513 Ampere1UnitXY, Ampere1UnitXY]> { 514 let Latency = 12; 515 let NumMicroOps = 12; 516} 517 518def Ampere1Write_12cyc_3XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, Ampere1UnitXY]> { 519 let Latency = 12; 520 let NumMicroOps = 3; 521} 522 523def Ampere1Write_12cyc_4XY : SchedWriteRes<[Ampere1UnitXY, Ampere1UnitXY, 524 Ampere1UnitXY, Ampere1UnitXY]> { 525 let Latency = 12; 526 let NumMicroOps = 4; 527} 528 529def Ampere1Write_18cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 530 let Latency = 18; 531 let NumMicroOps = 1; 532} 533 534def Ampere1Write_19cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 535 let Latency = 19; 536 let NumMicroOps = 1; 537} 538 539def Ampere1Write_25cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 540 let Latency = 25; 541 let NumMicroOps = 1; 542} 543 544def Ampere1Write_32cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 545 let Latency = 32; 546 let NumMicroOps = 1; 547} 548 549def Ampere1Write_34cyc_1BS : SchedWriteRes<[Ampere1UnitBS]> { 550 let Latency = 34; 551 let NumMicroOps = 1; 552} 553 554def Ampere1Write_34cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 555 let Latency = 34; 556 let NumMicroOps = 1; 557} 558 559def Ampere1Write_39cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 560 let Latency = 39; 561 let NumMicroOps = 1; 562} 563 564def Ampere1Write_62cyc_1XY : SchedWriteRes<[Ampere1UnitXY]> { 565 let Latency = 62; 566 let NumMicroOps = 1; 567} 568 569// For basic arithmetic, we have more flexibility for short shifts (LSL shift <= 4), 570// which are a single uop, and for extended registers, which have full flexibility 571// across Unit A or B for both uops. 572def Ampere1Write_Arith : SchedWriteVariant<[ 573 SchedVar<RegExtendedPred, [Ampere1Write_2cyc_2AB]>, 574 SchedVar<IsCheapLSL, [Ampere1Write_1cyc_1AB]>, 575 SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1AB]>]>; 576 577def Ampere1Write_ArithFlagsetting : SchedWriteVariant<[ 578 SchedVar<RegExtendedPred, [Ampere1Write_2cyc_1AB_1A]>, 579 SchedVar<IsCheapLSL, [Ampere1Write_1cyc_1A]>, 580 SchedVar<NoSchedPred, [Ampere1Write_2cyc_1B_1A]>]>; 581 582//===----------------------------------------------------------------------===// 583// Map the target-defined scheduler read/write resources and latencies for Ampere-1. 584// This provides a coarse model, which is then specialised below. 585 586def : WriteRes<WriteImm, [Ampere1UnitAB]>; // MOVN, MOVZ 587def : WriteRes<WriteI, [Ampere1UnitAB]>; // ALU 588def : WriteRes<WriteISReg, [Ampere1UnitB, Ampere1UnitA]> { 589 let Latency = 2; 590 let NumMicroOps = 2; 591} // ALU of Shifted-Reg 592def : WriteRes<WriteIEReg, [Ampere1UnitAB, Ampere1UnitA]> { 593 let Latency = 2; 594 let NumMicroOps = 2; 595} // ALU of Extended-Reg 596def : WriteRes<WriteExtr, [Ampere1UnitB]>; // EXTR shifts a reg pair 597def : WriteRes<WriteIS, [Ampere1UnitB]>; // Shift/Scale 598def : WriteRes<WriteID32, [Ampere1UnitBS]> { 599 let Latency = 18; 600} // 32-bit Divide 601def : WriteRes<WriteID64, [Ampere1UnitBS]> { 602 let Latency = 34; 603} // 64-bit Divide 604def : WriteRes<WriteIM32, [Ampere1UnitBS]> { 605 let Latency = 3; 606} // 32-bit Multiply 607def : WriteRes<WriteIM64, [Ampere1UnitBS]> { 608 let Latency = 3; 609} // 32-bit Multiply 610def : WriteRes<WriteBr, [Ampere1UnitA]>; 611def : WriteRes<WriteBrReg, [Ampere1UnitA, Ampere1UnitA]>; 612def : WriteRes<WriteLD, [Ampere1UnitL]> { 613 let Latency = 4; 614} // Load from base addr plus immediate offset 615def : WriteRes<WriteST, [Ampere1UnitS]> { 616 let Latency = 1; 617} // Store to base addr plus immediate offset 618def : WriteRes<WriteSTP, [Ampere1UnitS, Ampere1UnitS]> { 619 let Latency = 1; 620 let NumMicroOps = 2; 621} // Store a register pair. 622def : WriteRes<WriteAdr, [Ampere1UnitAB]>; 623def : WriteRes<WriteLDIdx, [Ampere1UnitAB, Ampere1UnitS]> { 624 let Latency = 5; 625 let NumMicroOps = 2; 626} // Load from a register index (maybe scaled). 627def : WriteRes<WriteSTIdx, [Ampere1UnitS, Ampere1UnitS]> { 628 let Latency = 1; 629 let NumMicroOps = 2; 630} // Store to a register index (maybe scaled). 631def : WriteRes<WriteF, [Ampere1UnitXY]> { 632 let Latency = 2; 633} // General floating-point ops. 634def : WriteRes<WriteFCmp, [Ampere1UnitX]> { 635 let Latency = 5; 636} // Floating-point compare. 637def : WriteRes<WriteFCvt, [Ampere1UnitXY]> { 638 let Latency = 6; 639} // Float conversion. 640def : WriteRes<WriteFCopy, [Ampere1UnitXY]> { 641} // Float-int register copy. 642def : WriteRes<WriteFImm, [Ampere1UnitXY]> { 643 let Latency = 2; 644} // Float-int register copy. 645def : WriteRes<WriteFMul, [Ampere1UnitXY]> { 646 let Latency = 5; 647} // Floating-point multiply. 648def : WriteRes<WriteFDiv, [Ampere1UnitXY]> { 649 let Latency = 34; 650} // Floating-point division. 651def : WriteRes<WriteVd, [Ampere1UnitXY]> { 652 let Latency = 3; 653} // 64bit Vector D ops. 654def : WriteRes<WriteVq, [Ampere1UnitXY]> { 655 let Latency = 3; 656} // 128bit Vector Q ops. 657def : WriteRes<WriteVLD, [Ampere1UnitL, Ampere1UnitL]> { 658 let Latency = 5; 659} // Vector loads. 660def : WriteRes<WriteVST, [Ampere1UnitS, Ampere1UnitZ]> { 661 let Latency = 2; 662} // Vector stores. 663 664def : WriteRes<WriteAtomic, []> { let Unsupported = 1; } 665 666def : WriteRes<WriteSys, []> { let Latency = 1; } 667def : WriteRes<WriteBarrier, []> { let Latency = 1; } 668def : WriteRes<WriteHint, []> { let Latency = 1; } 669 670def : WriteRes<WriteLDHi, []> { 671 let Latency = 4; 672} // The second register of a load-pair: LDP,LDPSW,LDNP,LDXP,LDAXP 673 674// Forwarding logic. 675def : ReadAdvance<ReadI, 0>; 676def : ReadAdvance<ReadISReg, 0>; 677def : ReadAdvance<ReadIEReg, 0>; 678def : ReadAdvance<ReadIM, 0>; 679def : ReadAdvance<ReadIMA, 1, [WriteIM32, WriteIM64]>; 680def : ReadAdvance<ReadID, 0>; 681def : ReadAdvance<ReadExtrHi, 0>; 682def : ReadAdvance<ReadST, 0>; 683def : ReadAdvance<ReadAdrBase, 0>; 684def : ReadAdvance<ReadVLD, 0>; 685 686//===----------------------------------------------------------------------===// 687// Specialising the scheduling model further for Ampere-1. 688 689def : InstRW<[Ampere1Write_1cyc_1AB], (instrs COPY)>; 690 691// Branch instructions 692def : InstRW<[Ampere1Write_1cyc_1A], (instrs Bcc, BL, RET)>; 693def : InstRW<[Ampere1Write_1cyc_1A], 694 (instrs CBZW, CBZX, CBNZW, CBNZX, TBZW, TBZX, TBNZW, TBNZX)>; 695def : InstRW<[Ampere1Write_1cyc_2A], (instrs BLR)>; 696 697// Cryptography instructions 698// -- AES encryption/decryption 699def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AES[DE]")>; 700def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^AESI?MC")>; 701// -- Polynomial multiplication 702def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^PMUL", "^PMULL")>; 703// -- SHA-256 hash 704def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA256(H|H2)")>; 705// -- SHA-256 schedule update 706def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA256SU[01]")>; 707// -- SHA-3 instructions 708def : InstRW<[Ampere1Write_2cyc_1XY], 709 (instregex "^BCAX", "^EOR3", "^RAX1", "^XAR")>; 710// -- SHA-512 hash 711def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA512(H|H2)")>; 712// -- SHA-512 schedule update 713def : InstRW<[Ampere1Write_4cyc_1Y], (instregex "^SHA512SU[01]")>; 714// -- SHA1 choose/majority/parity 715def : InstRW<[Ampere1Write_4cyc_1X], (instregex "^SHA1[CMP]")>; 716// -- SHA1 hash/schedule update 717def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1SU[01]")>; 718def : InstRW<[Ampere1Write_2cyc_1Y], (instregex "^SHA1H")>; 719 720// FP and vector load instructions 721// -- Load 1-element structure to one/all lanes 722// ---- all lanes 723def : InstRW<[Ampere1Write_7cyc_1L_1XY], 724 (instregex "^LD1Rv(8b|4h|2s|16b|8h|4s|2d)")>; 725// ---- one lane 726def : InstRW<[Ampere1Write_7cyc_1L_1XY], 727 (instregex "^LD1i(8|16|32|64)")>; 728// -- Load 1-element structure to one/all lanes, 1D size 729def : InstRW<[Ampere1Write_5cyc_1L], 730 (instregex "^LD1Rv1d")>; 731// -- Load 1-element structures to 1 register 732def : InstRW<[Ampere1Write_5cyc_1L], 733 (instregex "^LD1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 734// -- Load 1-element structures to 2 registers 735def : InstRW<[Ampere1Write_5cyc_2L], 736 (instregex "^LD1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 737// -- Load 1-element structures to 3 registers 738def : InstRW<[Ampere1Write_6cyc_3L], 739 (instregex "^LD1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 740// -- Load 1-element structures to 4 registers 741def : InstRW<[Ampere1Write_6cyc_4L], 742 (instregex "^LD1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 743// -- Load 2-element structure to all lanes of 2 registers, 1D size 744def : InstRW<[Ampere1Write_5cyc_2L], 745 (instregex "^LD2Rv1d")>; 746// -- Load 2-element structure to all lanes of 2 registers, other sizes 747def : InstRW<[Ampere1Write_7cyc_2L_2XY], 748 (instregex "^LD2Rv(8b|4h|2s|16b|8h|4s|2d)")>; 749// -- Load 2-element structure to one lane of 2 registers 750def : InstRW<[Ampere1Write_7cyc_2L_2XY], 751 (instregex "^LD2i(8|16|32|64)")>; 752// -- Load 2-element structures to 2 registers, 16B/8H/4S/2D size 753def : InstRW<[Ampere1Write_7cyc_2L_2XY], 754 (instregex "^LD2Twov(16b|8h|4s|2d)")>; 755// -- Load 2-element structures to 2 registers, 8B/4H/2S size 756def : InstRW<[Ampere1Write_9cyc_2L_3XY], 757 (instregex "^LD2Twov(8b|4h|2s)")>; 758// -- Load 3-element structure to all lanes of 3 registers, 1D size 759def : InstRW<[Ampere1Write_6cyc_3L], 760 (instregex "^LD3Rv1d")>; 761// -- Load 3-element structure to all lanes of 3 registers, other sizes 762def : InstRW<[Ampere1Write_8cyc_3L_3XY], 763 (instregex "^LD3Rv(8b|4h|2s|16b|8h|4s|2d)")>; 764// -- Load 3-element structure to one lane of 3 registers 765def : InstRW<[Ampere1Write_8cyc_3L_3XY], 766 (instregex "^LD3i(8|16|32|64)")>; 767// -- Load 3-element structures to 3 registers, 16B/8H/4S sizes 768def : InstRW<[Ampere1Write_9cyc_3L_3XY], 769 (instregex "^LD3Threev(16b|8h|4s)")>; 770// -- Load 3-element structures to 3 registers, 2D size 771def : InstRW<[Ampere1Write_8cyc_3L_3XY], 772 (instregex "^LD3Threev2d")>; 773// -- Load 3-element structures to 3 registers, 8B/4H/2S sizes 774def : InstRW<[Ampere1Write_10cyc_3L_3XY], 775 (instregex "^LD3Threev(8b|4h|2s)")>; 776// -- Load 4-element structure to all lanes of 4 registers, 1D size 777def : InstRW<[Ampere1Write_6cyc_4L], 778 (instregex "^LD4Rv1d")>; 779// -- Load 4-element structure to all lanes of 4 registers, other sizes 780def : InstRW<[Ampere1Write_8cyc_4L_4XY], 781 (instregex "^LD4Rv(8b|4h|2s|16b|8h|4s|2d)")>; 782// -- Load 4-element structure to one lane of 4 registers 783def : InstRW<[Ampere1Write_6cyc_4L], 784 (instregex "^LD4i(8|16|32|64)")>; 785// -- Load 4-element structures to 4 registers, 2D size 786def : InstRW<[Ampere1Write_9cyc_4L_4XY], 787 (instregex "^LD4Fourv2d")>; 788// -- Load 4-element structures to 4 registers, 2S size 789def : InstRW<[Ampere1Write_12cyc_4L_8XY], 790 (instregex "^LD4Fourv2s")>; 791// -- Load 4-element structures to 4 registers, other sizes 792def : InstRW<[Ampere1Write_11cyc_4L_8XY], 793 (instregex "^LD4Fourv(8b|4h|16b|8h|4s)")>; 794// -- Load pair, Q-form 795def : InstRW<[Ampere1Write_5cyc_2L], (instregex "LDN?PQ")>; 796// -- Load pair, S/D-form 797def : InstRW<[Ampere1Write_5cyc_1L_1BS], (instregex "LDN?P(S|D)")>; 798// -- Load register 799def : InstRW<[Ampere1Write_5cyc_1L], (instregex "LDU?R[BHSDQ]i")>; 800// -- Load register, sign-extended register 801def : InstRW<[Ampere1Write_6cyc_1AB_1L], (instregex "LDR[BHSDQ]ro(W|X)")>; 802 803// FP and vector store instructions 804// -- Store 1-element structure from one lane of 1 register 805def : InstRW<[Ampere1Write_4cyc_1XY_1S_1Z], 806 (instregex "^ST1i(8|16|32|64)")>; 807// -- Store 1-element structures from 1 register 808def : InstRW<[Ampere1Write_2cyc_1S_1Z], 809 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 810// -- Store 1-element structures from 2 registers 811def : InstRW<[Ampere1Write_3cyc_2S_2Z], 812 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)")>; 813// -- Store 1-element structures from 3 registers 814def : InstRW<[Ampere1Write_4cyc_3S_3Z], 815 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 816// -- Store 1-element structures from 4 registers 817def : InstRW<[Ampere1Write_5cyc_4S_4Z], 818 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)")>; 819// -- Store 2-element structure from one lane of 2 registers 820def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], 821 (instregex "^ST2i(8|16|32|64)")>; 822// -- Store 2-element structures from 2 registers, 16B/8H/4S/2D sizes 823def : InstRW<[Ampere1Write_5cyc_2XY_2S_2Z], 824 (instregex "^ST2Twov(16b|8h|4s|2d)")>; 825// -- Store 2-element structures from 2 registers, 8B/4H/2S sizes 826def : InstRW<[Ampere1Write_6cyc_2XY_2S_2Z], 827 (instregex "^ST2Twov(8b|4h|2s)")>; 828// -- Store 3-element structure from one lane of 3 registers 829def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], 830 (instregex "^ST3i(8|16|32|64)")>; 831// -- Store 3-element structures from 3 registers 832def : InstRW<[Ampere1Write_6cyc_3XY_3S_3Z], 833 (instregex "^ST3Threev(8b|4h|2s|1d|16b|8h|4s|2d)")>; 834// -- Store 4-element structure from one lane of 4 registers 835def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], 836 (instregex "^ST4i(8|16|32|64)")>; 837// -- Store 4-element structures from 4 registers, 16B/8H/4S sizes 838def : InstRW<[Ampere1Write_9cyc_8XY_4S_4Z], 839 (instregex "^ST4Fourv(16b|8h|4s)")>; 840// -- Store 4-element structures from 4 registers, 2D sizes 841def : InstRW<[Ampere1Write_7cyc_4XY_4S_4Z], 842 (instregex "^ST4Fourv2d")>; 843// -- Store 4-element structures from 4 registers, 8B/4H/2S sizes 844def : InstRW<[Ampere1Write_9cyc_6XY_4S_4Z], 845 (instregex "^ST4Fourv(8b|4h|2s)")>; 846// -- Store pair, Q-form 847def : InstRW<[Ampere1Write_3cyc_2S_2Z], (instregex "^STN?PQ")>; 848// -- Store pair, S/D-form 849def : InstRW<[Ampere1Write_3cyc_1S_2Z], (instregex "^STN?P[SD]")>; 850// -- Store register 851def : InstRW<[Ampere1Write_2cyc_1S_1Z], (instregex "^STU?R[BHSDQ](ui|i)")>; 852// -- Store register, sign-extended register offset 853def : InstRW<[Ampere1Write_2cyc_1AB_1S_1Z], (instregex "^STR[BHSDQ]ro[XW]")>; 854 855// FP data processing, bfloat16 format 856def : InstRW<[Ampere1Write_5cyc_1XY], (instrs BFCVT)>; 857def : InstRW<[Ampere1Write_7cyc_2XY], (instrs BFCVTN, BFCVTN2)>; 858def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^BFDOTv", "^BF16DOT")>; 859def : InstRW<[Ampere1Write_4cyc_2XY], (instrs BFMMLA)>; 860def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^BFMLAL")>; 861 862// FP data processing, scalar/vector, half precision 863def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(ABD|ABS)v.[fi]16")>; 864def : InstRW<[Ampere1Write_4cyc_1XY], 865 (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi]16")>; 866def : InstRW<[Ampere1Write_4cyc_1XY], 867 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi]16")>; 868def : InstRW<[Ampere1Write_4cyc_1XY], 869 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)16")>; 870def : InstRW<[Ampere1Write_4cyc_1X], 871 (instregex "^FCMPE?H")>; 872def : InstRW<[Ampere1Write_10cyc_1A_1BS_1X], 873 (instregex "^FCCMPE?H")>; 874def : InstRW<[Ampere1Write_10cyc_1A_1BS_1XY], 875 (instregex "^FCSELH")>; 876def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if]16")>; 877def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^[SU]CVTFv.[fi]16")>; 878def : InstRW<[Ampere1Write_25cyc_1XY], (instregex "^FDIVv.[if]16", "FDIVH")>; 879def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if]16")>; 880def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv4[if]16")>; 881def : InstRW<[Ampere1Write_12cyc_3XY], (instregex "^F(MAX|MIN)(NM)?Vv8[if]16")>; 882def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FMULX?v.[if]16")>; 883def : InstRW<[Ampere1Write_4cyc_1XY], (instrs FMULX16)>; 884def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FN?M(ADD|SUB)[H]rrr")>; 885def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FML[AS]v.[if]16")>; 886def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRECPXv.[if]16")>; 887def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^F(RECP|RSQRT)S16")>; 888def : InstRW<[Ampere1Write_4cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if]16")>; 889def : InstRW<[Ampere1Write_39cyc_1XY], (instregex "^FSQRTv.f16", "^FSQRTHr")>; 890 891// FP data processing, scalar/vector, single/double precision 892def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(ABD|ABS)v.[fi](32|64)")>; 893def : InstRW<[Ampere1Write_5cyc_1XY], 894 (instregex "^F(ADD|ADDP|CADD|NEG|NMUL|SUB)v.[fi](32|64)")>; 895def : InstRW<[Ampere1Write_5cyc_1XY], 896 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v.[fi](32|64)")>; 897def : InstRW<[Ampere1Write_5cyc_1XY], 898 (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)(32|64)")>; 899def : InstRW<[Ampere1Write_5cyc_1X], 900 (instregex "^FCMPE?(S|D)")>; 901def : InstRW<[Ampere1Write_11cyc_1A_1BS_1X], 902 (instregex "^FCCMPE?(S|D)")>; 903def : InstRW<[Ampere1Write_11cyc_1A_1BS_1XY], 904 (instregex "^FCSEL(S|D)")>; 905def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[AMNPZ][SU]v.[if](32|64)")>; 906def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^[SU]CVTFv.[fi](32|64)")>; 907def : InstRW<[Ampere1Write_34cyc_1XY], (instregex "^FDIVv.[if](64)", "FDIVD")>; 908def : InstRW<[Ampere1Write_19cyc_1XY], (instregex "^FDIVv.[if](32)", "FDIVS")>; 909def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^F(MAX|MIN)(NM)?P?v.[if](32|64)")>; 910def : InstRW<[Ampere1Write_10cyc_2XY], (instregex "^F(MAX|MIN)(NM)?Vv.[if](32|64)")>; 911def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FMULX?v.[if](32|64)")>; 912def : InstRW<[Ampere1Write_6cyc_1XY], (instrs FMULX32, FMULX64)>; 913def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FN?M(ADD|SUB)[SD]rrr")>; 914def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FML[AS]v.[if](32|64)")>; 915def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPXv.[if](32|64)")>; 916def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^F(RECP|RSQRT)S(32|64)")>; 917def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT[AIMNPXZ]v.[if](32|64)")>; 918def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FRINT(32|64)")>; 919def : InstRW<[Ampere1Write_62cyc_1XY], (instregex "^FSQRTv.f64", "^FSQRTDr")>; 920def : InstRW<[Ampere1Write_32cyc_1XY], (instregex "^FSQRTv.f32", "^FSQRTSr")>; 921 922// FP miscellaneous instructions 923def : InstRW<[Ampere1Write_10cyc_1XY_1Z], (instregex "^FCVT[AMNPZ][SU][SU][XW][HSD]r")>; 924def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FCVT[HSD]Hr")>; 925def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVT[HSD][SD]r")>; 926def : InstRW<[Ampere1Write_6cyc_1XY], (instregex "^FCVTLv")>; 927def : InstRW<[Ampere1Write_8cyc_2XY], (instregex "^FCVT(N|XN)v")>; 928def : InstRW<[Ampere1Write_10cyc_1X_1Z], (instrs FJCVTZS)>; 929def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^FMOV[HSD][WX]r")>; 930def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^FMOVDXHighr")>; 931def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOV[HSD][ri]")>; 932def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "^FMOVXDHighr")>; 933def : InstRW<[Ampere1Write_4cyc_1Z], (instregex "^FMOV[WX][HSD]r")>; 934 935// Integer arithmetic and logical instructions 936def : InstRW<[Ampere1Write_1cyc_1A], 937 (instregex "ADC(W|X)r", "SBC(W|X)r")>; 938def : InstRW<[Ampere1Write_Arith], 939 (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[sx]")>; 940def : InstRW<[Ampere1Write_1cyc_1AB], 941 (instregex "(ADD|AND|BIC|EON|EOR|ORN|ORR|SUB)(W|X)r[ri]")>; 942def : InstRW<[Ampere1Write_ArithFlagsetting], 943 (instregex "(ADD|AND|BIC|SUB)S(W|X)r[sx]")>; 944def : InstRW<[Ampere1Write_1cyc_1A], 945 (instregex "(ADD|AND|BIC|SUB)S(W|X)r[ri]")>; 946def : InstRW<[Ampere1Write_1cyc_1A], 947 (instregex "(ADC|SBC)S(W|X)r")>; 948def : InstRW<[Ampere1Write_1cyc_1A], (instrs RMIF)>; 949def : InstRW<[Ampere1Write_1cyc_1A], 950 (instregex "(CCMN|CCMP)(X|W)")>; 951def : InstRW<[Ampere1Write_1cyc_1A], 952 (instregex "(CSEL|CSINC|CSINV|CSNEG)(X|W)")>; 953def : InstRW<[Ampere1Write_18cyc_1BS], (instrs SDIVWr, UDIVWr)>; 954def : InstRW<[Ampere1Write_34cyc_1BS], (instrs SDIVXr, UDIVXr)>; 955def : InstRW<[Ampere1Write_3cyc_1BS], 956 (instregex "(S|U)MULHr")>; 957def : InstRW<[Ampere1Write_4cyc_1BS], 958 (instregex "(S|U)?M(ADD|SUB)L?r")>; 959 960// Integer load instructions 961def : InstRW<[Ampere1Write_4cyc_2L], 962 (instregex "(LDNP|LDP|LDPSW)(X|W)")>; 963def : InstRW<[Ampere1Write_4cyc_1L], 964 (instregex "LDR(B|D|H|Q|S)ui")>; 965def : InstRW<[Ampere1Write_4cyc_1L], 966 (instregex "LDR(D|Q|W|X)l")>; 967def : InstRW<[Ampere1Write_4cyc_1L], 968 (instregex "LDTR(B|H|W|X)i")>; 969def : InstRW<[Ampere1Write_4cyc_1L], 970 (instregex "LDTRS(BW|BX|HW|HX|W)i")>; 971def : InstRW<[Ampere1Write_4cyc_1L], 972 (instregex "LDUR(BB|HH|X|W)i")>; 973def : InstRW<[Ampere1Write_4cyc_1L], 974 (instregex "LDURS(BW|BX|HW|HX|W)i")>; 975def : InstRW<[Ampere1Write_5cyc_1AB_1L], 976 (instregex "LDR(HH|SHW|SHX|W|X)ro(W|X)")>; 977def : InstRW<[Ampere1Write_1cyc_1L], 978 (instrs PRFMl, PRFUMi, PRFUMi)>; 979def : InstRW<[Ampere1Write_2cyc_1AB_1L], 980 (instrs PRFMroW, PRFMroX)>; 981 982// Integer miscellaneous instructions 983def : InstRW<[Ampere1Write_1cyc_1A], (instrs ADR, ADRP)>; 984def : InstRW<[Ampere1Write_1cyc_1B], (instregex "EXTR(W|X)")>; 985def : InstRW<[Ampere1Write_1cyc_1B], (instregex "(S|U)?BFM(W|X)")>; 986def : InstRW<[Ampere1Write_3cyc_1BS], (instregex "^CRC32C?[BHWX]")>; 987def : InstRW<[Ampere1Write_1cyc_1B], (instregex "CLS(W|X)")>; 988def : InstRW<[Ampere1Write_1cyc_1A], (instrs SETF8, SETF16)>; 989def : InstRW<[Ampere1Write_1cyc_1AB], 990 (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 991def : InstRW<[Ampere1Write_1cyc_1B], 992 (instregex "(RBIT|REV|REV16)(W|X)r", "REV32Xr")>; 993def : InstRW<[Ampere1Write_1cyc_1B], 994 (instregex "(ASR|LSL|LSR|ROR)V(W|X)r")>; 995 996// Integer store instructions 997def : InstRW<[Ampere1Write_1cyc_2S], (instregex "STNP(X|W)i")>; 998def : InstRW<[Ampere1Write_2cyc_1B_1S], 999 (instrs STPWi, STPXi)>; 1000def : InstRW<[Ampere1Write_3cyc_1B_1S_1AB], 1001 (instregex "STP(W|X)(pre|post)")>; 1002def : InstRW<[Ampere1Write_1cyc_1S], 1003 (instrs STTRBi, STTRHi, STTRWi, STTRXi)>; 1004def : InstRW<[Ampere1Write_1cyc_1S], 1005 (instregex "STUR(BB|HH|X|W)i", 1006 "STR(X|W)ui", 1007 "STUR(BB|HH|X|W)i")>; 1008def : InstRW<[Ampere1Write_1cyc_2S], (instrs STRWroX, STRXroX)>; 1009def : InstRW<[Ampere1Write_2cyc_1AB_2S], (instrs STRWroW, STRXroW)>; 1010 1011// Pointer authentication 1012//def : InstRW<[Ampere1Write_7cyc_1BS], 1013// (instrs AUTIAZ, AUTIBZ, AUTIASP, AUTIBSP, AUTIA1716, AUTIB1716)>; 1014def : InstRW<[Ampere1Write_8cyc_1BS_1A], 1015 (instregex "BRA(A|AZ|B|BZ)", "RETA(A|B)", "ERETA(A|B)")>; 1016def : InstRW<[Ampere1Write_8cyc_1BS_2A], 1017 (instrs BLRAA, BLRAAZ, BLRAB, BLRABZ)>; 1018//def : InstRW<[Ampere1Write_7cyc_1BS], 1019// (instrs PACIAZ, PACIBZ, PACIASP, PACIBSP, PACIA1716, PACIB1716)>; 1020def : InstRW<[Ampere1Write_11cyc_1BS_1L], (instregex "^LDRA(A|B)")>; 1021def : InstRW<[Ampere1Write_7cyc_1BS], (instrs XPACD, XPACI)>; 1022 1023// Vector integer instructions 1024// -- absolute difference 1025def : InstRW<[Ampere1Write_3cyc_1XY], 1026 (instregex "^SABAv", "^SABALv", "^SABDv", "^SABDLv", 1027 "^UABAv", "^UABALv", "^UABDv", "^UABDLv")>; 1028// -- arithmetic 1029def : InstRW<[Ampere1Write_3cyc_1XY], 1030 (instregex "^ABSv", "^(ADD|SUB)v", "^SADDLv", "^SADDW", "SHADD", 1031 "SHSUB", "^SRHADD", "^URHADD", "SSUBL", "SSUBW", 1032 "^UADDLv", "^UADDW", "UHADD", "UHSUB", "USUBL", "USUBW")>; 1033// -- arithmetic, horizontal, 16B 1034def : InstRW<[Ampere1Write_12cyc_4XY], 1035 (instregex "^ADDVv16i8v", "^SADDLVv16i8v", "^UADDLVv16i8v")>; 1036def : InstRW<[Ampere1Write_12cyc_4XY], 1037 (instregex "^[SU](MIN|MAX)Vv16i8v")>; 1038// -- arithmetic, horizontal, 4H/4S 1039def : InstRW<[Ampere1Write_6cyc_2XY], 1040 (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v")>; 1041def : InstRW<[Ampere1Write_6cyc_2XY], 1042 (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v")>; 1043// -- arithmetic, horizontal, 8B/8H 1044def : InstRW<[Ampere1Write_9cyc_3XY], 1045 (instregex "^[SU]?ADDL?V(v8i16|v4i32)v")>; 1046def : InstRW<[Ampere1Write_9cyc_3XY], 1047 (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v")>; 1048// -- arithmetic, narrowing 1049def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(ADD|SUB)HNv.*")>; 1050def : InstRW<[Ampere1Write_5cyc_2XY], (instregex "(RADD|RSUB)HNv.*")>; 1051// -- arithmetic, pairwise 1052def : InstRW<[Ampere1Write_3cyc_1XY], 1053 (instregex "^ADDPv", "^SADALP", "^UADALP", "^SADDLPv", "^UADDLPv")>; 1054// -- arithmetic, saturating 1055def : InstRW<[Ampere1Write_3cyc_1XY], 1056 (instregex "^SQADD", "^SQSUB", "^SUQADD", "^UQADD", "^UQSUB", "^USQADD")>; 1057// -- bit count 1058def : InstRW<[Ampere1Write_2cyc_1XY], 1059 (instregex "^(CLS|CLZ|CNT)v")>; 1060// -- compare 1061def : InstRW<[Ampere1Write_3cyc_1XY], 1062 (instregex "^CMEQv", "^CMGEv", "^CMGTv", "^CMLEv", "^CMLTv", 1063 "^CMHIv", "^CMHSv")>; 1064// -- compare non-zero 1065def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^CMTSTv")>; 1066// -- dot product 1067def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^(S|SU|U|US)DOTv")>; 1068// -- fp reciprocal estimate 1069def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^FRECPEv", "^FRSQRTEv")>; 1070// -- integer reciprocal estimate 1071def : InstRW<[Ampere1Write_5cyc_1XY], (instregex "^URECPEv", "^URSQRTEv")>; 1072// -- logical 1073def : InstRW<[Ampere1Write_2cyc_1XY], 1074 (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1075// -- logical, narrowing 1076def : InstRW<[Ampere1Write_5cyc_2XY], 1077 (instregex "RSHRNv", 1078 "SHRNv", "SQSHRNv", "SQSHRUNv", 1079 "UQXTNv")>; 1080// -- matrix multiply 1081def : InstRW<[Ampere1Write_6cyc_2XY], 1082 (instrs SMMLA, UMMLA, USMMLA)>; 1083// -- max/min 1084def : InstRW<[Ampere1Write_3cyc_1XY], 1085 (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1086def : InstRW<[Ampere1Write_3cyc_1XY], 1087 (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1088// -- move immediate 1089def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^MOVIv", "^MVNIv")>; 1090// -- multiply 1091def : InstRW<[Ampere1Write_3cyc_1XY], 1092 (instregex "MULv", "SMULLv", "UMULLv", "SQDMUL(H|L)v", "SQRDMULHv")>; 1093// -- multiply accumulate 1094def : InstRW<[Ampere1Write_3cyc_1XY], 1095 (instregex "MLAv", "MLSv", "(S|U|SQD)(MLAL|MLSL)v", "SQRDML(A|S)Hv")>; 1096// -- negation, saturating 1097def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^SQABS", "^SQNEG")>; 1098// -- reverse bits/bytes 1099def : InstRW<[Ampere1Write_2cyc_1XY], 1100 (instregex "^RBITv", "^REV16v", "^REV32v", "^REV64v")>; 1101// -- shift 1102def : InstRW<[Ampere1Write_3cyc_1XY], (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1103// -- shift and accumulate 1104def : InstRW<[Ampere1Write_3cyc_1XY], 1105 (instregex "SRSRAv", "SSRAv", "URSRAv", "USRAv")>; 1106// -- shift, saturating 1107def : InstRW<[Ampere1Write_3cyc_1XY], 1108 (instregex "^SQRSHLv", "^SQRSHRNv", "^SQRSHRUNv", "^SQSHL", "^SQSHLU", 1109 "^SQXTNv", "^SQXTUNv", "^UQSHRNv", "UQRSHRNv", "^UQRSHL", 1110 "^UQSHL")>; 1111 1112// Vector miscellaneous instructions 1113// -- duplicate element 1114def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^DUPv.+lane")>; 1115// -- duplicate from GPR 1116def : InstRW<[Ampere1Write_5cyc_1BS], (instregex "^DUPv.+gpr")>; 1117// -- extract narrow 1118def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^XTNv")>; 1119// -- insert/extract element 1120def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^EXTv", "^INSv.+lane")>; 1121// -- move FP immediate 1122def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^FMOVv")>; 1123// -- move element to GPR 1124def : InstRW<[Ampere1Write_6cyc_1XY_1Z], (instregex "(S|U)MOVv")>; 1125// -- move from GPR to any element 1126def : InstRW<[Ampere1Write_7cyc_1BS_1XY], (instregex "^INSv.+gpr")>; 1127// -- table lookup 1128def : InstRW<[Ampere1Write_2cyc_1XY], 1129 (instrs TBLv8i8One, TBLv16i8One, TBXv8i8One, TBXv16i8One)>; 1130def : InstRW<[Ampere1Write_4cyc_2XY], 1131 (instrs TBLv8i8Two, TBLv16i8Two, TBXv8i8Two, TBXv16i8Two)>; 1132def : InstRW<[Ampere1Write_6cyc_3XY], 1133 (instrs TBLv8i8Three, TBLv16i8Three, TBXv8i8Three, TBXv16i8Three)>; 1134def : InstRW<[Ampere1Write_8cyc_4XY], 1135 (instrs TBLv8i8Four, TBLv16i8Four, TBXv8i8Four, TBXv16i8Four)>; 1136// -- transpose 1137def : InstRW<[Ampere1Write_2cyc_1XY], 1138 (instregex "^TRN1v", "^TRN2v", "^UZP1v", "^UZP2v")>; 1139// -- zip/unzip 1140def : InstRW<[Ampere1Write_2cyc_1XY], (instregex "^ZIP1v", "^ZIP2v")>; 1141 1142} // SchedModel = Ampere1Model 1143