1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Fujitsu A64FX processors. 10// 11//===----------------------------------------------------------------------===// 12 13def A64FXModel : SchedMachineModel { 14 let IssueWidth = 6; // 6 micro-ops dispatched at a time. 15 let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. 16 let LoadLatency = 5; // Optimistic load latency. 17 let MispredictPenalty = 12; // Extra cycles for mispredicted branch. 18 // Determined via a mix of micro-arch details and experimentation. 19 let LoopMicroOpBufferSize = 128; 20 let PostRAScheduler = 1; // Using PostRA sched. 21 let CompleteModel = 1; 22 23 list<Predicate> UnsupportedFeatures = 24 [HasSVE2, HasSVE2AES, HasSVE2SM4, HasSVE2SHA3, HasSVE2BitPerm, HasPAuth, 25 HasSVE2orSME, HasMTE, HasMatMulInt8, HasBF16, HasSME2, HasSME2p1, HasSVE2p1, 26 HasSVE2p1_or_HasSME2p1, HasSMEF16F16]; 27 28 let FullInstRWOverlapCheck = 0; 29} 30 31let SchedModel = A64FXModel in { 32 33// Define the issue ports. 34 35// A64FXIP* 36 37// Port 0 38def A64FXIPFLA : ProcResource<1>; 39 40// Port 1 41def A64FXIPPR : ProcResource<1>; 42 43// Port 2 44def A64FXIPEXA : ProcResource<1>; 45 46// Port 3 47def A64FXIPFLB : ProcResource<1>; 48 49// Port 4 50def A64FXIPEXB : ProcResource<1>; 51 52// Port 5 53def A64FXIPEAGA : ProcResource<1>; 54 55// Port 6 56def A64FXIPEAGB : ProcResource<1>; 57 58// Port 7 59def A64FXIPBR : ProcResource<1>; 60 61// Define groups for the functional units on each issue port. Each group 62// created will be used by a WriteRes later on. 63 64def A64FXGI7 : ProcResGroup<[A64FXIPBR]>; 65 66def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>; 67 68def A64FXGI1 : ProcResGroup<[A64FXIPPR]>; 69 70def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>; 71 72def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>; 73 74def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>; 75 76def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>; 77 78def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>; 79 80def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>; 81 82def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>; 83 84def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>; 85 86def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>; 87 88def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>; 89 90def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>; 91 92def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB, 93 A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>; 94 95def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> { 96 let Latency = 1; 97} 98 99def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 100 let Latency = 2; 101} 102 103def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 104 let Latency = 4; 105} 106 107def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 108 let Latency = 6; 109} 110 111def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 112 let Latency = 8; 113} 114 115def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 116 let Latency = 9; 117} 118 119def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> { 120 let Latency = 3; 121} 122 123def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 124 let Latency = 5; 125} 126 127def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 128 let Latency = 4; 129} 130 131def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 132 let Latency = 6; 133} 134 135def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 136 let Latency = 4; 137} 138 139def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 140 let Latency = 8; 141} 142 143def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 144 let Latency = 9; 145} 146 147def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 148 let Latency = 10; 149} 150 151def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 152 let Latency = 12; 153} 154 155def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 156 let Latency = 20; 157} 158 159def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 160 let Latency = 5; 161} 162 163def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 164 let Latency = 11; 165} 166 167def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> { 168 let Latency = 5; 169} 170 171def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 172 let Latency = 1; 173} 174 175def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 176 let Latency = 2; 177} 178 179def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> { 180 let Latency = 4; 181 let NumMicroOps = 4; 182} 183 184def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 185 let Latency = 1; 186} 187 188def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 189 let Latency = 5; 190} 191 192def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 193 let Latency = 8; 194} 195 196def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 197 let Latency = 11; 198} 199 200def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> { 201 let Latency = 5; 202 let NumMicroOps = 2; 203} 204 205def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> { 206 let Latency = 5; 207 let NumMicroOps = 3; 208} 209 210def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> { 211 let Latency = 5; 212 let NumMicroOps = 2; 213} 214 215def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> { 216 let Latency = 8; 217 let NumMicroOps = 2; 218} 219 220def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> { 221 let Latency = 11; 222 let NumMicroOps = 2; 223 224} 225 226def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> { 227 let Latency = 8; 228 let NumMicroOps = 3; 229} 230 231def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> { 232 let Latency = 11; 233 let NumMicroOps = 3; 234} 235 236def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> { 237 let Latency = 8; 238 let NumMicroOps = 4; 239} 240 241def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> { 242 let Latency = 11; 243 let NumMicroOps = 4; 244} 245 246def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> { 247 let Latency = 8; 248 let NumMicroOps = 2; 249} 250 251def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> { 252 let Latency = 11; 253 let NumMicroOps = 2; 254} 255 256def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> { 257 let Latency = 8; 258 let NumMicroOps = 3; 259} 260 261def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> { 262 let Latency = 11; 263 let NumMicroOps = 3; 264} 265 266def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> { 267 let Latency = 8; 268 let NumMicroOps = 4; 269} 270 271def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> { 272 let Latency = 11; 273 let NumMicroOps = 4; 274} 275 276def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> { 277 let Latency = 8; 278 let NumMicroOps = 5; 279} 280 281def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> { 282 let Latency = 11; 283 let NumMicroOps = 5; 284} 285 286def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> { 287 let Latency = 8; 288 let NumMicroOps = 2; 289} 290 291def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> { 292 let Latency = 8; 293 let NumMicroOps = 3; 294} 295 296def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> { 297 let Latency = 8; 298 let NumMicroOps = 4; 299} 300 301def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> { 302 let Latency = 8; 303 let NumMicroOps = 5; 304} 305 306def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> { 307 let Latency = 8; 308 let NumMicroOps = 6; 309} 310 311def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> { 312 let Latency = 8; 313 let NumMicroOps = 7; 314} 315 316def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> { 317 let Latency = 8; 318 let NumMicroOps = 8; 319} 320 321def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> { 322 let Latency = 8; 323 let NumMicroOps = 9; 324} 325 326def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> { 327 let Latency = 1; 328} 329 330def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> { 331 let Latency = 10; 332} 333 334def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> { 335 let Latency = 14; 336} 337 338def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> { 339 let Latency = 12; 340} 341 342def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> { 343 let Latency = 14; 344} 345 346def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> { 347 let Latency = 14; 348} 349 350def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> { 351 let Latency = 6; 352} 353 354def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> { 355 let Latency = 8; 356} 357 358def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> { 359 let Latency = 10; 360} 361 362def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> { 363 let Latency = 12; 364 let NumMicroOps = 6; 365} 366 367def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> { 368 let Latency = 14; 369 let NumMicroOps = 6; 370} 371 372def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> { 373 let Latency = 9; 374} 375 376def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> { 377 let Latency = 8; 378} 379 380 381def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> { 382 let Latency = 8; 383 let NumMicroOps = 3; 384} 385 386def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> { 387 let Latency = 8; 388 let NumMicroOps = 2; 389} 390 391def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> { 392 let Latency = 10; 393 let NumMicroOps = 3; 394} 395 396def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> { 397 let Latency = 10; 398 let NumMicroOps = 2; 399} 400 401 402def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> { 403 let Latency = 10; 404 let NumMicroOps = 3; 405} 406 407def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> { 408 let Latency = 15; 409 let NumMicroOps = 2; 410} 411 412def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> { 413 let Latency = 15; 414 let NumMicroOps = 3; 415} 416 417def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> { 418 let Latency = 10; 419 let NumMicroOps = 3; 420} 421 422def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> { 423 let Latency = 10; 424 let NumMicroOps = 2; 425} 426 427def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> { 428 let Latency = 15; 429 let NumMicroOps = 2; 430} 431 432def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> { 433 let Latency = 14; 434 let NumMicroOps = 7; 435} 436 437def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> { 438 let Latency = 5; 439} 440 441def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> { 442 let Latency = 10; 443} 444 445def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> { 446 let Latency = 9; 447} 448 449def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> { 450 let Latency = 12; 451} 452 453def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> { 454 let Latency = 25; 455} 456 457def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> { 458 let Latency = 10; 459 let NumMicroOps = 3; 460} 461 462def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> { 463 let Latency = 10; 464 let NumMicroOps = 5; 465} 466 467def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> { 468 let Latency = 10; 469 let NumMicroOps = 7; 470} 471 472def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> { 473 let Latency = 10; 474 let NumMicroOps = 9; 475} 476 477def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> { 478 let Latency = 0; 479} 480 481def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> { 482 let Latency = 0; 483} 484 485def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> { 486 let Latency = 0; 487} 488 489def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> { 490 let Latency = 0; 491} 492 493def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> { 494 let Latency = 0; 495} 496 497def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> { 498 let Latency = 0; 499} 500 501def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> { 502 let Latency = 0; 503} 504 505def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> { 506 let Latency = 0; 507} 508 509def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> { 510 let Latency = 0; 511} 512 513def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> { 514 let Latency = 0; 515} 516 517def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> { 518 let Latency = 1; 519} 520 521def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> { 522 let Latency = 1; 523} 524 525def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> { 526 let Latency = 1; 527} 528 529def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> { 530 let Latency = 1; 531} 532 533def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> { 534 let Latency = 7; 535} 536 537// Define commonly used read types. 538 539// No forwarding is provided for these types. 540def : ReadAdvance<ReadI, 0>; 541def : ReadAdvance<ReadISReg, 0>; 542def : ReadAdvance<ReadIEReg, 0>; 543def : ReadAdvance<ReadIM, 0>; 544def : ReadAdvance<ReadIMA, 0>; 545def : ReadAdvance<ReadID, 0>; 546def : ReadAdvance<ReadExtrHi, 0>; 547def : ReadAdvance<ReadAdrBase, 0>; 548def : ReadAdvance<ReadST, 0>; 549def : ReadAdvance<ReadVLD, 0>; 550 551//===----------------------------------------------------------------------===// 552// 3. Instruction Tables. 553 554//--- 555// 3.1 Branch Instructions 556//--- 557 558// Branch, immed 559// Branch and link, immed 560// Compare and branch 561def : WriteRes<WriteBr, [A64FXGI7]> { 562 let Latency = 1; 563} 564 565// Branch, register 566// Branch and link, register != LR 567// Branch and link, register = LR 568def : WriteRes<WriteBrReg, [A64FXGI7]> { 569 let Latency = 1; 570} 571 572def : WriteRes<WriteSys, []> { let Latency = 1; } 573def : WriteRes<WriteBarrier, []> { let Latency = 1; } 574def : WriteRes<WriteHint, []> { let Latency = 1; } 575 576def : WriteRes<WriteAtomic, []> { 577 let Latency = 4; 578} 579 580//--- 581// Branch 582//--- 583def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>; 584def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>; 585def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>; 586def : InstRW<[A64FXWrite_1Cyc_GI7], 587 (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; 588 589//--- 590// 3.2 Arithmetic and Logical Instructions 591// 3.3 Move and Shift Instructions 592//--- 593 594// ALU, basic 595// Conditional compare 596// Conditional select 597// Address generation 598def : WriteRes<WriteI, [A64FXGI2456]> { 599 let Latency = 1; 600} 601 602def : InstRW<[WriteI], 603 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 604 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 605 "ADC(W|X)r", 606 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 607 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 608 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 609 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 610 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 611 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 612 "CSINC(W|X)r", "CSINV(W|X)r", 613 "CSNEG(W|X)r")>; 614 615def : InstRW<[WriteI], (instrs COPY)>; 616 617// ALU, extend and/or shift 618def : WriteRes<WriteISReg, [A64FXGI2456]> { 619 let Latency = 2; 620} 621 622def : InstRW<[WriteISReg], 623 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 624 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 625 "ADC(W|X)r", 626 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 627 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 628 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 629 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 630 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 631 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 632 "CSINC(W|X)r", "CSINV(W|X)r", 633 "CSNEG(W|X)r")>; 634 635def : WriteRes<WriteIEReg, [A64FXGI2456]> { 636 let Latency = 1; 637} 638 639def : InstRW<[WriteIEReg], 640 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 641 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 642 "ADC(W|X)r", 643 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 644 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 645 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 646 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 647 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 648 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 649 "CSINC(W|X)r", "CSINV(W|X)r", 650 "CSNEG(W|X)r")>; 651 652// Move immed 653def : WriteRes<WriteImm, [A64FXGI2456]> { 654 let Latency = 1; 655} 656 657def : InstRW<[A64FXWrite_1Cyc_GI2456], 658 (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 659 660def : InstRW<[A64FXWrite_2Cyc_GI24], 661 (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; 662 663// Variable shift 664def : WriteRes<WriteIS, [A64FXGI2456]> { 665 let Latency = 1; 666} 667 668//--- 669// 3.4 Divide and Multiply Instructions 670//--- 671 672// Divide, W-form 673def : WriteRes<WriteID32, [A64FXGI4]> { 674 let Latency = 39; 675 let ResourceCycles = [39]; 676} 677 678// Divide, X-form 679def : WriteRes<WriteID64, [A64FXGI4]> { 680 let Latency = 23; 681 let ResourceCycles = [23]; 682} 683 684// Multiply accumulate, W-form 685def : WriteRes<WriteIM32, [A64FXGI2456]> { 686 let Latency = 5; 687} 688 689// Multiply accumulate, X-form 690def : WriteRes<WriteIM64, [A64FXGI2456]> { 691 let Latency = 5; 692} 693 694def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; 695def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; 696def : InstRW<[A64FXWrite_MADDL], 697 (instregex "(S|U)(MADDL|MSUBL)rrr")>; 698 699def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; 700def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; 701 702// Bitfield extract, two reg 703def : WriteRes<WriteExtr, [A64FXGI2456]> { 704 let Latency = 1; 705} 706 707// Multiply high 708def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>; 709 710// Miscellaneous Data-Processing Instructions 711// Bitfield extract 712def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>; 713 714// Bitifield move - basic 715def : InstRW<[A64FXWrite_1Cyc_GI24], 716 (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; 717 718// Bitfield move, insert 719def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>; 720def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>; 721 722// Count leading 723def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$", 724 "^CLZ(W|X)r$")>; 725 726// Reverse bits 727def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>; 728 729// Cryptography Extensions 730def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>; 731def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>; 732def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>; 733def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>; 734def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>; 735def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>; 736def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>; 737def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>; 738def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>; 739 740// CRC Instructions 741def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>; 742def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>; 743def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>; 744 745def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>; 746def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>; 747def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>; 748 749// Reverse bits/bytes 750// NOTE: Handled by WriteI. 751 752//--- 753// 3.6 Load Instructions 754// 3.10 FP Load Instructions 755//--- 756 757// Load register, literal 758// Load register, unscaled immed 759// Load register, immed unprivileged 760// Load register, unsigned immed 761def : WriteRes<WriteLD, [A64FXGI56]> { 762 let Latency = 4; 763} 764 765// Load register, immed post-index 766// NOTE: Handled by WriteLD, WriteI. 767// Load register, immed pre-index 768// NOTE: Handled by WriteLD, WriteAdr. 769def : WriteRes<WriteAdr, [A64FXGI2456]> { 770 let Latency = 1; 771} 772 773// Load pair, immed offset, normal 774// Load pair, immed offset, signed words, base != SP 775// Load pair, immed offset signed words, base = SP 776// LDP only breaks into *one* LS micro-op. Thus 777// the resources are handled by WriteLD. 778def : WriteRes<WriteLDHi, []> { 779 let Latency = 5; 780} 781 782// Load register offset, basic 783// Load register, register offset, scale by 4/8 784// Load register, register offset, scale by 2 785// Load register offset, extend 786// Load register, register offset, extend, scale by 4/8 787// Load register, register offset, extend, scale by 2 788def A64FXWriteLDIdx : SchedWriteVariant<[ 789 SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>, 790 SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>; 791def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>; 792 793def A64FXReadAdrBase : SchedReadVariant<[ 794 SchedVar<ScaledIdxPred, [ReadDefault]>, 795 SchedVar<NoSchedPred, [ReadDefault]>]>; 796def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>; 797 798// Load pair, immed pre-index, normal 799// Load pair, immed pre-index, signed words 800// Load pair, immed post-index, normal 801// Load pair, immed post-index, signed words 802// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 803 804def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>; 805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>; 806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>; 807def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>; 808def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>; 809 810def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>; 811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>; 812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>; 813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>; 814def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>; 815def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>; 816 817def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>; 818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>; 819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>; 820def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>; 821def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>; 822 823def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>; 824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>; 825def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>; 826def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>; 827 828def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>; 829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>; 830def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>; 831def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>; 832 833def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>; 834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>; 835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>; 836def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>; 837def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>; 838 839def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 840 (instrs LDPDpre)>; 841def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 842 (instrs LDPQpre)>; 843def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 844 (instrs LDPSpre)>; 845def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 846 (instrs LDPWpre)>; 847def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 848 (instrs LDPWpre)>; 849 850def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 855def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 856def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 857 858def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>; 859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>; 860def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>; 861def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>; 862 863def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>; 864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>; 865def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>; 866def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>; 867 868def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>; 869def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>; 870 871def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>; 872def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>; 873 874def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 875 (instrs LDPDpost)>; 876def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 877 (instrs LDPQpost)>; 878def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 879 (instrs LDPSpost)>; 880def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 881 (instrs LDPWpost)>; 882def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 883 (instrs LDPXpost)>; 884 885def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 890def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 891def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 892 893def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 894 (instrs LDPDpre)>; 895def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 896 (instrs LDPQpre)>; 897def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 898 (instrs LDPSpre)>; 899def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 900 (instrs LDPWpre)>; 901def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 902 (instrs LDPXpre)>; 903 904def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 909def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 910def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 911 912def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 913 (instrs LDPDpost)>; 914def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 915 (instrs LDPQpost)>; 916def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 917 (instrs LDPSpost)>; 918def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 919 (instrs LDPWpost)>; 920def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 921 (instrs LDPXpost)>; 922 923def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 928def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 929def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 930 931def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>; 932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>; 933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>; 934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>; 935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>; 936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>; 937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>; 938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>; 939def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>; 940def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>; 941 942def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>; 943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>; 944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>; 945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>; 946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>; 947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>; 948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>; 949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>; 950def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>; 951def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>; 952 953def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 954 (instrs LDRBroW)>; 955def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 956 (instrs LDRBroW)>; 957def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 958 (instrs LDRDroW)>; 959def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 960 (instrs LDRHroW)>; 961def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 962 (instrs LDRHHroW)>; 963def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 964 (instrs LDRQroW)>; 965def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 966 (instrs LDRSroW)>; 967def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 968 (instrs LDRSHWroW)>; 969def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 970 (instrs LDRSHXroW)>; 971def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 972 (instrs LDRWroW)>; 973def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 974 (instrs LDRXroW)>; 975def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 976 (instrs LDRBroX)>; 977def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 978 (instrs LDRDroX)>; 979def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 980 (instrs LDRHroX)>; 981def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 982 (instrs LDRHHroX)>; 983def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 984 (instrs LDRQroX)>; 985def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 986 (instrs LDRSroX)>; 987def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 988 (instrs LDRSHWroX)>; 989def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 990 (instrs LDRSHXroX)>; 991def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 992 (instrs LDRWroX)>; 993def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 994 (instrs LDRXroX)>; 995 996def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>; 997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>; 998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>; 999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>; 1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>; 1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>; 1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>; 1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>; 1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>; 1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>; 1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>; 1007def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>; 1008def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>; 1009 1010//--- 1011// Prefetch 1012//--- 1013def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>; 1014def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>; 1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>; 1016def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>; 1017def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>; 1018 1019//-- 1020// 3.7 Store Instructions 1021// 3.11 FP Store Instructions 1022//-- 1023 1024// Store register, unscaled immed 1025// Store register, immed unprivileged 1026// Store register, unsigned immed 1027def : WriteRes<WriteST, [A64FXGI56]> { 1028 let Latency = 1; 1029} 1030 1031// Store register, immed post-index 1032// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase 1033 1034// Store register, immed pre-index 1035// NOTE: Handled by WriteAdr, WriteST 1036 1037// Store register, register offset, basic 1038// Store register, register offset, scaled by 4/8 1039// Store register, register offset, scaled by 2 1040// Store register, register offset, extend 1041// Store register, register offset, extend, scale by 4/8 1042// Store register, register offset, extend, scale by 1 1043def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> { 1044 let Latency = 1; 1045} 1046 1047// Store pair, immed offset, W-form 1048// Store pair, immed offset, X-form 1049def : WriteRes<WriteSTP, [A64FXGI56]> { 1050 let Latency = 1; 1051} 1052 1053// Store pair, immed post-index, W-form 1054// Store pair, immed post-index, X-form 1055// Store pair, immed pre-index, W-form 1056// Store pair, immed pre-index, X-form 1057// NOTE: Handled by WriteAdr, WriteSTP. 1058 1059def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>; 1060def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>; 1061def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>; 1062def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>; 1063def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>; 1064def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>; 1065def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>; 1066def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>; 1067def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>; 1068 1069def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>; 1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>; 1071def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>; 1072def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>; 1073 1074def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>; 1075def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>; 1076def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>; 1077def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>; 1078 1079def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>; 1080def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>; 1081def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>; 1082def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>; 1083 1084def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1085def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1086def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1087def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1088def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1089def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1090def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1091def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1092def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1093def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1094def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1095def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1096 1097def : InstRW<[A64FXWrite_STP01], 1098 (instrs STPDpre, STPDpost)>; 1099def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1100 (instrs STPDpre, STPDpost)>; 1101def : InstRW<[A64FXWrite_STP01], 1102 (instrs STPDpre, STPDpost)>; 1103def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1104 (instrs STPDpre, STPDpost)>; 1105def : InstRW<[A64FXWrite_STP01], 1106 (instrs STPQpre, STPQpost)>; 1107def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1108 (instrs STPQpre, STPQpost)>; 1109def : InstRW<[A64FXWrite_STP01], 1110 (instrs STPQpre, STPQpost)>; 1111def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1112 (instrs STPQpre, STPQpost)>; 1113def : InstRW<[A64FXWrite_STP01], 1114 (instrs STPSpre, STPSpost)>; 1115def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1116 (instrs STPSpre, STPSpost)>; 1117def : InstRW<[A64FXWrite_STP01], 1118 (instrs STPSpre, STPSpost)>; 1119def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1120 (instrs STPSpre, STPSpost)>; 1121def : InstRW<[A64FXWrite_STP01], 1122 (instrs STPWpre, STPWpost)>; 1123def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1124 (instrs STPWpre, STPWpost)>; 1125def : InstRW<[A64FXWrite_STP01], 1126 (instrs STPWpre, STPWpost)>; 1127def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1128 (instrs STPWpre, STPWpost)>; 1129def : InstRW<[A64FXWrite_STP01], 1130 (instrs STPXpre, STPXpost)>; 1131def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1132 (instrs STPXpre, STPXpost)>; 1133def : InstRW<[A64FXWrite_STP01], 1134 (instrs STPXpre, STPXpost)>; 1135def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1136 (instrs STPXpre, STPXpost)>; 1137 1138def : InstRW<[WriteAdr, A64FXWrite_STP01], 1139 (instrs STRBpre, STRBpost)>; 1140def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1141 (instrs STRBpre, STRBpost)>; 1142def : InstRW<[WriteAdr, A64FXWrite_STP01], 1143 (instrs STRBpre, STRBpost)>; 1144def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1145 (instrs STRBpre, STRBpost)>; 1146def : InstRW<[WriteAdr, A64FXWrite_STP01], 1147 (instrs STRBBpre, STRBBpost)>; 1148def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1149 (instrs STRBBpre, STRBBpost)>; 1150def : InstRW<[WriteAdr, A64FXWrite_STP01], 1151 (instrs STRBBpre, STRBBpost)>; 1152def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1153 (instrs STRBBpre, STRBBpost)>; 1154def : InstRW<[WriteAdr, A64FXWrite_STP01], 1155 (instrs STRDpre, STRDpost)>; 1156def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1157 (instrs STRDpre, STRDpost)>; 1158def : InstRW<[WriteAdr, A64FXWrite_STP01], 1159 (instrs STRDpre, STRDpost)>; 1160def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1161 (instrs STRDpre, STRDpost)>; 1162def : InstRW<[WriteAdr, A64FXWrite_STP01], 1163 (instrs STRHpre, STRHpost)>; 1164def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1165 (instrs STRHpre, STRHpost)>; 1166def : InstRW<[WriteAdr, A64FXWrite_STP01], 1167 (instrs STRHpre, STRHpost)>; 1168def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1169 (instrs STRHpre, STRHpost)>; 1170def : InstRW<[WriteAdr, A64FXWrite_STP01], 1171 (instrs STRHHpre, STRHHpost)>; 1172def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1173 (instrs STRHHpre, STRHHpost)>; 1174def : InstRW<[WriteAdr, A64FXWrite_STP01], 1175 (instrs STRHHpre, STRHHpost)>; 1176def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1177 (instrs STRHHpre, STRHHpost)>; 1178def : InstRW<[WriteAdr, A64FXWrite_STP01], 1179 (instrs STRQpre, STRQpost)>; 1180def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1181 (instrs STRQpre, STRQpost)>; 1182def : InstRW<[WriteAdr, A64FXWrite_STP01], 1183 (instrs STRQpre, STRQpost)>; 1184def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1185 (instrs STRQpre, STRQpost)>; 1186def : InstRW<[WriteAdr, A64FXWrite_STP01], 1187 (instrs STRSpre, STRSpost)>; 1188def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1189 (instrs STRSpre, STRSpost)>; 1190def : InstRW<[WriteAdr, A64FXWrite_STP01], 1191 (instrs STRSpre, STRSpost)>; 1192def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1193 (instrs STRSpre, STRSpost)>; 1194def : InstRW<[WriteAdr, A64FXWrite_STP01], 1195 (instrs STRWpre, STRWpost)>; 1196def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1197 (instrs STRWpre, STRWpost)>; 1198def : InstRW<[WriteAdr, A64FXWrite_STP01], 1199 (instrs STRWpre, STRWpost)>; 1200def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1201 (instrs STRWpre, STRWpost)>; 1202def : InstRW<[WriteAdr, A64FXWrite_STP01], 1203 (instrs STRXpre, STRXpost)>; 1204def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1205 (instrs STRXpre, STRXpost)>; 1206def : InstRW<[WriteAdr, A64FXWrite_STP01], 1207 (instrs STRXpre, STRXpost)>; 1208def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1209 (instrs STRXpre, STRXpost)>; 1210 1211def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1212 (instrs STRBroW, STRBroX)>; 1213def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1214 (instrs STRBroW, STRBroX)>; 1215def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1216 (instrs STRBBroW, STRBBroX)>; 1217def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1218 (instrs STRBBroW, STRBBroX)>; 1219def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1220 (instrs STRDroW, STRDroX)>; 1221def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1222 (instrs STRDroW, STRDroX)>; 1223def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1224 (instrs STRHroW, STRHroX)>; 1225def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1226 (instrs STRHroW, STRHroX)>; 1227def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1228 (instrs STRHHroW, STRHHroX)>; 1229def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1230 (instrs STRHHroW, STRHHroX)>; 1231def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1232 (instrs STRQroW, STRQroX)>; 1233def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1234 (instrs STRQroW, STRQroX)>; 1235def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1236 (instrs STRSroW, STRSroX)>; 1237def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1238 (instrs STRSroW, STRSroX)>; 1239def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1240 (instrs STRWroW, STRWroX)>; 1241def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1242 (instrs STRWroW, STRWroX)>; 1243def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1244 (instrs STRXroW, STRXroX)>; 1245def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1246 (instrs STRXroW, STRXroX)>; 1247 1248//--- 1249// 3.8 FP Data Processing Instructions 1250//--- 1251 1252// FP absolute value 1253// FP min/max 1254// FP negate 1255def : WriteRes<WriteF, [A64FXGI03]> { 1256 let Latency = 4; 1257 let ResourceCycles = [2]; 1258} 1259 1260// FP arithmetic 1261 1262def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>; 1263def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>; 1264 1265// FP compare 1266def : WriteRes<WriteFCmp, [A64FXGI03]> { 1267 let Latency = 4; 1268 let ResourceCycles = [2]; 1269} 1270 1271// FP Div, Sqrt 1272def : WriteRes<WriteFDiv, [A64FXGI0]> { 1273 let Latency = 43; 1274} 1275 1276def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> { 1277 let Latency = 38; 1278} 1279 1280def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> { 1281 let Latency = 29; 1282} 1283 1284def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> { 1285 let Latency = 43; 1286} 1287 1288def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> { 1289 let Latency = 29; 1290} 1291 1292def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> { 1293 let Latency = 43; 1294} 1295 1296// FP divide, S-form 1297// FP square root, S-form 1298def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>; 1299def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>; 1300def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>; 1301def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 1302def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>; 1303def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>; 1304 1305// FP divide, D-form 1306// FP square root, D-form 1307def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>; 1308def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>; 1309def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>; 1310def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 1311def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>; 1312def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>; 1313 1314// FP round to integral 1315def : InstRW<[A64FXWrite_9Cyc_GI03], 1316 (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 1317 1318// FP select 1319def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>; 1320 1321//--- 1322// 3.9 FP Miscellaneous Instructions 1323//--- 1324 1325// FP convert, from vec to vec reg 1326// FP convert, from gen to vec reg 1327// FP convert, from vec to gen reg 1328def : WriteRes<WriteFCvt, [A64FXGI03]> { 1329 let Latency = 9; 1330 let ResourceCycles = [2]; 1331} 1332 1333// FP move, immed 1334// FP move, register 1335def : WriteRes<WriteFImm, [A64FXGI0]> { 1336 let Latency = 4; 1337 let ResourceCycles = [2]; 1338} 1339 1340// FP transfer, from gen to vec reg 1341// FP transfer, from vec to gen reg 1342def : WriteRes<WriteFCopy, [A64FXGI0]> { 1343 let Latency = 4; 1344 let ResourceCycles = [2]; 1345} 1346 1347def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>; 1348def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>; 1349 1350//--- 1351// 3.12 ASIMD Integer Instructions 1352//--- 1353 1354// ASIMD absolute diff, D-form 1355// ASIMD absolute diff, Q-form 1356// ASIMD absolute diff accum, D-form 1357// ASIMD absolute diff accum, Q-form 1358// ASIMD absolute diff accum long 1359// ASIMD absolute diff long 1360// ASIMD arith, basic 1361// ASIMD arith, complex 1362// ASIMD compare 1363// ASIMD logical (AND, BIC, EOR) 1364// ASIMD max/min, basic 1365// ASIMD max/min, reduce, 4H/4S 1366// ASIMD max/min, reduce, 8B/8H 1367// ASIMD max/min, reduce, 16B 1368// ASIMD multiply, D-form 1369// ASIMD multiply, Q-form 1370// ASIMD multiply accumulate long 1371// ASIMD multiply accumulate saturating long 1372// ASIMD multiply long 1373// ASIMD pairwise add and accumulate 1374// ASIMD shift accumulate 1375// ASIMD shift by immed, basic 1376// ASIMD shift by immed and insert, basic, D-form 1377// ASIMD shift by immed and insert, basic, Q-form 1378// ASIMD shift by immed, complex 1379// ASIMD shift by register, basic, D-form 1380// ASIMD shift by register, basic, Q-form 1381// ASIMD shift by register, complex, D-form 1382// ASIMD shift by register, complex, Q-form 1383def : WriteRes<WriteVd, [A64FXGI03]> { 1384 let Latency = 4; 1385} 1386def : WriteRes<WriteVq, [A64FXGI03]> { 1387 let Latency = 4; 1388} 1389 1390// ASIMD arith, reduce, 4H/4S 1391// ASIMD arith, reduce, 8B/8H 1392// ASIMD arith, reduce, 16B 1393 1394// ASIMD logical (MVN (alias for NOT), ORN, ORR) 1395def : InstRW<[A64FXWrite_4Cyc_GI03], 1396 (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1397 1398// ASIMD arith, reduce 1399def : InstRW<[A64FXWrite_ADDLV], 1400 (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; 1401 1402// ASIMD polynomial (8x8) multiply long 1403def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>; 1404def : InstRW<[A64FXWrite_MULLV], 1405 (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; 1406def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>; 1407def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>; 1408 1409// ASIMD absolute diff accum, D-form 1410def : InstRW<[A64FXWrite_ABA], 1411 (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; 1412// ASIMD absolute diff accum, Q-form 1413def : InstRW<[A64FXWrite_ABA], 1414 (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; 1415// ASIMD absolute diff accum long 1416def : InstRW<[A64FXWrite_ABAL], 1417 (instregex "^[SU]ABAL")>; 1418// ASIMD arith, reduce, 4H/4S 1419def : InstRW<[A64FXWrite_ADDLV1], 1420 (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; 1421// ASIMD arith, reduce, 8B 1422def : InstRW<[A64FXWrite_ADDLV1], 1423 (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; 1424// ASIMD arith, reduce, 16B/16H 1425def : InstRW<[A64FXWrite_ADDLV1], 1426 (instregex "^[SU]?ADDL?Vv16i8v$")>; 1427// ASIMD max/min, reduce, 4H/4S 1428def : InstRW<[A64FXWrite_MINMAXV], 1429 (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; 1430// ASIMD max/min, reduce, 8B/8H 1431def : InstRW<[A64FXWrite_MINMAXV], 1432 (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; 1433// ASIMD max/min, reduce, 16B/16H 1434def : InstRW<[A64FXWrite_MINMAXV], 1435 (instregex "^[SU](MIN|MAX)Vv16i8v$")>; 1436// ASIMD multiply, D-form 1437def : InstRW<[A64FXWrite_PMUL], 1438 (instregex "^(P?MUL|SQR?DMUL)" # 1439 "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # 1440 "(_indexed)?$")>; 1441 1442// ASIMD multiply, Q-form 1443def : InstRW<[A64FXWrite_PMUL], 1444 (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1445 1446// ASIMD multiply, Q-form 1447def : InstRW<[A64FXWrite_SQRDMULH], 1448 (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1449 1450// ASIMD multiply accumulate, D-form 1451def : InstRW<[A64FXWrite_9Cyc_GI03], 1452 (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; 1453// ASIMD multiply accumulate, Q-form 1454def : InstRW<[A64FXWrite_9Cyc_GI03], 1455 (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; 1456// ASIMD shift accumulate 1457def : InstRW<[A64FXWrite_SRSRAV], 1458 (instregex "SRSRAv", "URSRAv")>; 1459def : InstRW<[A64FXWrite_SSRAV], 1460 (instregex "SSRAv", "USRAv")>; 1461 1462// ASIMD shift by immed, basic 1463def : InstRW<[A64FXWrite_RSHRN], 1464 (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>; 1465def : InstRW<[A64FXWrite_SHRN], 1466 (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>; 1467 1468def : InstRW<[A64FXWrite_6Cyc_GI3], 1469 (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>; 1470 1471// ASIMD shift by immed, complex 1472def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>; 1473def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>; 1474// ASIMD shift by register, basic, Q-form 1475def : InstRW<[A64FXWrite_6Cyc_GI3], 1476 (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1477// ASIMD shift by register, complex, D-form 1478def : InstRW<[A64FXWrite_6Cyc_GI3], 1479 (instregex "^[SU][QR]{1,2}SHL" # 1480 "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; 1481// ASIMD shift by register, complex, Q-form 1482def : InstRW<[A64FXWrite_6Cyc_GI3], 1483 (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; 1484 1485// ASIMD Arithmetic 1486def : InstRW<[A64FXWrite_4Cyc_GI03], 1487 (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; 1488def : InstRW<[A64FXWrite_4Cyc_GI03], 1489 (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; 1490def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>; 1491def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>; 1492def : InstRW<[A64FXWrite_4Cyc_GI03], 1493 (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", 1494 "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; 1495def : InstRW<[A64FXWrite_ADDP], 1496 (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; 1497def : InstRW<[A64FXWrite_4Cyc_GI03], 1498 (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # 1499 "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; 1500def : InstRW<[A64FXWrite_4Cyc_GI0], 1501 (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; 1502def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>; 1503def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>; 1504def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>; 1505def : InstRW<[A64FXWrite_MINMAXV], 1506 (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>; 1507def : InstRW<[A64FXWrite_ABA], 1508 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 1509def : InstRW<[A64FXWrite_4Cyc_GI03], 1510 (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>; 1511def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>; 1512def : InstRW<[A64FXWrite_SHRN], 1513 (instregex "^ADDHNv", "^SUBHNv")>; 1514def : InstRW<[A64FXWrite_RSHRN], 1515 (instregex "^RADDHNv", "^RSUBHNv")>; 1516def : InstRW<[A64FXWrite_4Cyc_GI03], 1517 (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", 1518 "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB", 1519 "^URHADD", "^USQADD")>; 1520 1521def : InstRW<[A64FXWrite_4Cyc_GI03], 1522 (instregex "^CMEQv", "^CMGEv", "^CMGTv", 1523 "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>; 1524def : InstRW<[A64FXWrite_MINMAXV], 1525 (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1526def : InstRW<[A64FXWrite_ADDP], 1527 (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1528def : InstRW<[A64FXWrite_4Cyc_GI03], 1529 (instregex "^SABDv", "^UABDv")>; 1530def : InstRW<[A64FXWrite_TBX1], 1531 (instregex "^SABDLv", "^UABDLv")>; 1532 1533//--- 1534// 3.13 ASIMD Floating-point Instructions 1535//--- 1536 1537def : WriteRes<WriteFMul, [A64FXGI03]> { 1538 let Latency = 9; 1539} 1540 1541// ASIMD FP absolute value 1542def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>; 1543 1544// ASIMD FP arith, normal, D-form 1545// ASIMD FP arith, normal, Q-form 1546def : InstRW<[A64FXWrite_9Cyc_GI03], 1547 (instregex "^FABDv", "^FADDv", "^FSUBv")>; 1548 1549// ASIMD FP arith, pairwise, D-form 1550// ASIMD FP arith, pairwise, Q-form 1551def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>; 1552 1553// ASIMD FP compare, D-form 1554// ASIMD FP compare, Q-form 1555def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>; 1556def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv", 1557 "^FCMGTv", "^FCMLEv", 1558 "^FCMLTv")>; 1559// ASIMD FP round, D-form 1560def : InstRW<[A64FXWrite_9Cyc_GI03], 1561 (instregex "^FRINT[AIMNPXZ](v2f32)")>; 1562// ASIMD FP round, Q-form 1563def : InstRW<[A64FXWrite_9Cyc_GI03], 1564 (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; 1565 1566// ASIMD FP convert, long 1567// ASIMD FP convert, narrow 1568// ASIMD FP convert, other, D-form 1569// ASIMD FP convert, other, Q-form 1570 1571// ASIMD FP convert, long and narrow 1572def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>; 1573// ASIMD FP convert, other, D-form 1574def : InstRW<[A64FXWrite_FCVTXNV], 1575 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; 1576// ASIMD FP convert, other, Q-form 1577def : InstRW<[A64FXWrite_FCVTXNV], 1578 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; 1579 1580// ASIMD FP divide, D-form, F32 1581def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>; 1582def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>; 1583 1584// ASIMD FP divide, Q-form, F32 1585def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>; 1586def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>; 1587 1588// ASIMD FP divide, Q-form, F64 1589def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>; 1590def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>; 1591 1592// ASIMD FP max/min, normal, D-form 1593// ASIMD FP max/min, normal, Q-form 1594def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv", 1595 "^FMINv", "^FMINNMv")>; 1596 1597// ASIMD FP max/min, pairwise, D-form 1598// ASIMD FP max/min, pairwise, Q-form 1599def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv", 1600 "^FMINPv", "^FMINNMPv")>; 1601 1602// ASIMD FP max/min, reduce 1603def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv", 1604 "^FMINVv", "^FMINNMVv")>; 1605 1606// ASIMD FP multiply, D-form, FZ 1607// ASIMD FP multiply, D-form, no FZ 1608// ASIMD FP multiply, Q-form, FZ 1609// ASIMD FP multiply, Q-form, no FZ 1610def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>; 1611def : InstRW<[A64FXWrite_FMULXE], 1612 (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; 1613def : InstRW<[A64FXWrite_FMULXE], 1614 (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; 1615 1616// ASIMD FP multiply accumulate, Dform, FZ 1617// ASIMD FP multiply accumulate, Dform, no FZ 1618// ASIMD FP multiply accumulate, Qform, FZ 1619// ASIMD FP multiply accumulate, Qform, no FZ 1620def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>; 1621def : InstRW<[A64FXWrite_FMULXE], 1622 (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; 1623def : InstRW<[A64FXWrite_FMULXE], 1624 (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; 1625 1626// ASIMD FP negate 1627def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>; 1628 1629//-- 1630// 3.14 ASIMD Miscellaneous Instructions 1631//-- 1632 1633// ASIMD bit reverse 1634def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>; 1635 1636// ASIMD bitwise insert, D-form 1637// ASIMD bitwise insert, Q-form 1638def : InstRW<[A64FXWrite_BIF], 1639 (instregex "^BIFv", "^BITv", "^BSLv")>; 1640 1641// ASIMD count, D-form 1642// ASIMD count, Q-form 1643def : InstRW<[A64FXWrite_4Cyc_GI0], 1644 (instregex "^CLSv", "^CLZv", "^CNTv")>; 1645 1646// ASIMD duplicate, gen reg 1647// ASIMD duplicate, element 1648def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; 1649def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>; 1650def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; 1651 1652// ASIMD extract 1653def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>; 1654 1655// ASIMD extract narrow 1656def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>; 1657 1658// ASIMD extract narrow, saturating 1659def : InstRW<[A64FXWrite_6Cyc_GI3], 1660 (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; 1661 1662// ASIMD insert, element to element 1663def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1664 1665// ASIMD transfer, element to gen reg 1666def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1667 1668// ASIMD move, integer immed 1669def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>; 1670 1671// ASIMD move, FP immed 1672def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>; 1673 1674// ASIMD table lookup, D-form 1675def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>; 1676def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>; 1677def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>; 1678def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>; 1679def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>; 1680def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>; 1681def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>; 1682def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>; 1683 1684// ASIMD table lookup, Q-form 1685def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>; 1686def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>; 1687def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>; 1688def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>; 1689def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>; 1690def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>; 1691def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>; 1692def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>; 1693 1694// ASIMD unzip/zip 1695def : InstRW<[A64FXWrite_6Cyc_GI0], 1696 (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; 1697 1698// ASIMD reciprocal estimate, D-form 1699// ASIMD reciprocal estimate, Q-form 1700def : InstRW<[A64FXWrite_4Cyc_GI03], 1701 (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", 1702 "^FRSQRTEv", "^URSQRTEv")>; 1703 1704// ASIMD reciprocal step, D-form, FZ 1705// ASIMD reciprocal step, D-form, no FZ 1706// ASIMD reciprocal step, Q-form, FZ 1707// ASIMD reciprocal step, Q-form, no FZ 1708def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>; 1709 1710// ASIMD reverse 1711def : InstRW<[A64FXWrite_4Cyc_GI03], 1712 (instregex "^REV16v", "^REV32v", "^REV64v")>; 1713 1714// ASIMD table lookup, D-form 1715// ASIMD table lookup, Q-form 1716def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>; 1717 1718// ASIMD transfer, element to word or word 1719def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1720 1721// ASIMD transfer, element to gen reg 1722def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>; 1723 1724// ASIMD transfer gen reg to element 1725def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1726 1727// ASIMD transpose 1728def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v", 1729 "^UZP1v", "^UZP2v")>; 1730 1731// ASIMD unzip/zip 1732def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>; 1733 1734//-- 1735// 3.15 ASIMD Load Instructions 1736//-- 1737 1738// ASIMD load, 1 element, multiple, 1 reg, D-form 1739// ASIMD load, 1 element, multiple, 1 reg, Q-form 1740def : InstRW<[A64FXWrite_8Cyc_GI56], 1741 (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>; 1742def : InstRW<[A64FXWrite_11Cyc_GI56], 1743 (instregex "^LD1Onev(16b|8h|4s)$")>; 1744def : InstRW<[A64FXWrite_LD108, WriteAdr], 1745 (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>; 1746def : InstRW<[A64FXWrite_LD109, WriteAdr], 1747 (instregex "^LD1Onev(16b|8h|4s)_POST$")>; 1748 1749// ASIMD load, 1 element, multiple, 2 reg, D-form 1750// ASIMD load, 1 element, multiple, 2 reg, Q-form 1751def : InstRW<[A64FXWrite_LD102], 1752 (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>; 1753def : InstRW<[A64FXWrite_LD103], 1754 (instregex "^LD1Twov(16b|8h|4s)$")>; 1755def : InstRW<[A64FXWrite_LD110, WriteAdr], 1756 (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>; 1757def : InstRW<[A64FXWrite_LD111, WriteAdr], 1758 (instregex "^LD1Twov(16b|8h|4s)_POST$")>; 1759 1760// ASIMD load, 1 element, multiple, 3 reg, D-form 1761// ASIMD load, 1 element, multiple, 3 reg, Q-form 1762def : InstRW<[A64FXWrite_LD104], 1763 (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>; 1764def : InstRW<[A64FXWrite_LD105], 1765 (instregex "^LD1Threev(16b|8h|4s)$")>; 1766def : InstRW<[A64FXWrite_LD112, WriteAdr], 1767 (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>; 1768def : InstRW<[A64FXWrite_LD113, WriteAdr], 1769 (instregex "^LD1Threev(16b|8h|4s)_POST$")>; 1770 1771// ASIMD load, 1 element, multiple, 4 reg, D-form 1772// ASIMD load, 1 element, multiple, 4 reg, Q-form 1773def : InstRW<[A64FXWrite_LD106], 1774 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>; 1775def : InstRW<[A64FXWrite_LD107], 1776 (instregex "^LD1Fourv(16b|8h|4s)$")>; 1777def : InstRW<[A64FXWrite_LD114, WriteAdr], 1778 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>; 1779def : InstRW<[A64FXWrite_LD115, WriteAdr], 1780 (instregex "^LD1Fourv(16b|8h|4s)_POST$")>; 1781 1782// ASIMD load, 1 element, one lane, B/H/S 1783// ASIMD load, 1 element, one lane, D 1784def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>; 1785def : InstRW<[A64FXWrite_LD1I1, WriteAdr], 1786 (instregex "^LD1i(8|16|32|64)_POST$")>; 1787 1788// ASIMD load, 1 element, all lanes, D-form, B/H/S 1789// ASIMD load, 1 element, all lanes, D-form, D 1790// ASIMD load, 1 element, all lanes, Q-form 1791def : InstRW<[A64FXWrite_8Cyc_GI03], 1792 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1793def : InstRW<[A64FXWrite_LD108, WriteAdr], 1794 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1795 1796// ASIMD load, 2 element, multiple, D-form, B/H/S 1797// ASIMD load, 2 element, multiple, Q-form, D 1798def : InstRW<[A64FXWrite_LD103], 1799 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1800def : InstRW<[A64FXWrite_LD111, WriteAdr], 1801 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1802 1803// ASIMD load, 2 element, one lane, B/H 1804// ASIMD load, 2 element, one lane, S 1805// ASIMD load, 2 element, one lane, D 1806def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>; 1807def : InstRW<[A64FXWrite_LD2I1, WriteAdr], 1808 (instregex "^LD2i(8|16|32|64)_POST$")>; 1809 1810// ASIMD load, 2 element, all lanes, D-form, B/H/S 1811// ASIMD load, 2 element, all lanes, D-form, D 1812// ASIMD load, 2 element, all lanes, Q-form 1813def : InstRW<[A64FXWrite_LD102], 1814 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1815def : InstRW<[A64FXWrite_LD110, WriteAdr], 1816 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1817 1818// ASIMD load, 3 element, multiple, D-form, B/H/S 1819// ASIMD load, 3 element, multiple, Q-form, B/H/S 1820// ASIMD load, 3 element, multiple, Q-form, D 1821def : InstRW<[A64FXWrite_LD105], 1822 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1823def : InstRW<[A64FXWrite_LD113, WriteAdr], 1824 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1825 1826// ASIMD load, 3 element, one lone, B/H 1827// ASIMD load, 3 element, one lane, S 1828// ASIMD load, 3 element, one lane, D 1829def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>; 1830def : InstRW<[A64FXWrite_LD3I1, WriteAdr], 1831 (instregex "^LD3i(8|16|32|64)_POST$")>; 1832 1833// ASIMD load, 3 element, all lanes, D-form, B/H/S 1834// ASIMD load, 3 element, all lanes, D-form, D 1835// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1836// ASIMD load, 3 element, all lanes, Q-form, D 1837def : InstRW<[A64FXWrite_LD104], 1838 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1839def : InstRW<[A64FXWrite_LD112, WriteAdr], 1840 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1841 1842// ASIMD load, 4 element, multiple, D-form, B/H/S 1843// ASIMD load, 4 element, multiple, Q-form, B/H/S 1844// ASIMD load, 4 element, multiple, Q-form, D 1845def : InstRW<[A64FXWrite_LD107], 1846 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1847def : InstRW<[A64FXWrite_LD115, WriteAdr], 1848 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1849 1850// ASIMD load, 4 element, one lane, B/H 1851// ASIMD load, 4 element, one lane, S 1852// ASIMD load, 4 element, one lane, D 1853def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>; 1854def : InstRW<[A64FXWrite_LD4I1, WriteAdr], 1855 (instregex "^LD4i(8|16|32|64)_POST$")>; 1856 1857// ASIMD load, 4 element, all lanes, D-form, B/H/S 1858// ASIMD load, 4 element, all lanes, D-form, D 1859// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1860// ASIMD load, 4 element, all lanes, Q-form, D 1861def : InstRW<[A64FXWrite_LD106], 1862 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1863def : InstRW<[A64FXWrite_LD114, WriteAdr], 1864 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1865 1866//-- 1867// 3.16 ASIMD Store Instructions 1868//-- 1869 1870// ASIMD store, 1 element, multiple, 1 reg, D-form 1871// ASIMD store, 1 element, multiple, 1 reg, Q-form 1872def : InstRW<[A64FXWrite_ST10], 1873 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1874def : InstRW<[A64FXWrite_ST14, WriteAdr], 1875 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1876 1877// ASIMD store, 1 element, multiple, 2 reg, D-form 1878// ASIMD store, 1 element, multiple, 2 reg, Q-form 1879def : InstRW<[A64FXWrite_ST11], 1880 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1881def : InstRW<[A64FXWrite_ST15, WriteAdr], 1882 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1883 1884// ASIMD store, 1 element, multiple, 3 reg, D-form 1885// ASIMD store, 1 element, multiple, 3 reg, Q-form 1886def : InstRW<[A64FXWrite_ST12], 1887 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1888def : InstRW<[A64FXWrite_ST16, WriteAdr], 1889 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1890 1891// ASIMD store, 1 element, multiple, 4 reg, D-form 1892// ASIMD store, 1 element, multiple, 4 reg, Q-form 1893def : InstRW<[A64FXWrite_ST13], 1894 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1895def : InstRW<[A64FXWrite_ST17, WriteAdr], 1896 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1897 1898// ASIMD store, 1 element, one lane, B/H/S 1899// ASIMD store, 1 element, one lane, D 1900def : InstRW<[A64FXWrite_ST10], 1901 (instregex "^ST1i(8|16|32|64)$")>; 1902def : InstRW<[A64FXWrite_ST14, WriteAdr], 1903 (instregex "^ST1i(8|16|32|64)_POST$")>; 1904 1905// ASIMD store, 2 element, multiple, D-form, B/H/S 1906// ASIMD store, 2 element, multiple, Q-form, B/H/S 1907// ASIMD store, 2 element, multiple, Q-form, D 1908def : InstRW<[A64FXWrite_ST11], 1909 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1910def : InstRW<[A64FXWrite_ST15, WriteAdr], 1911 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1912 1913// ASIMD store, 2 element, one lane, B/H/S 1914// ASIMD store, 2 element, one lane, D 1915def : InstRW<[A64FXWrite_ST11], 1916 (instregex "^ST2i(8|16|32|64)$")>; 1917def : InstRW<[A64FXWrite_ST15, WriteAdr], 1918 (instregex "^ST2i(8|16|32|64)_POST$")>; 1919 1920// ASIMD store, 3 element, multiple, D-form, B/H/S 1921// ASIMD store, 3 element, multiple, Q-form, B/H/S 1922// ASIMD store, 3 element, multiple, Q-form, D 1923def : InstRW<[A64FXWrite_ST12], 1924 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1925def : InstRW<[A64FXWrite_ST16, WriteAdr], 1926 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1927 1928// ASIMD store, 3 element, one lane, B/H 1929// ASIMD store, 3 element, one lane, S 1930// ASIMD store, 3 element, one lane, D 1931def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>; 1932def : InstRW<[A64FXWrite_ST16, WriteAdr], 1933 (instregex "^ST3i(8|16|32|64)_POST$")>; 1934 1935// ASIMD store, 4 element, multiple, D-form, B/H/S 1936// ASIMD store, 4 element, multiple, Q-form, B/H/S 1937// ASIMD store, 4 element, multiple, Q-form, D 1938def : InstRW<[A64FXWrite_ST13], 1939 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1940def : InstRW<[A64FXWrite_ST17, WriteAdr], 1941 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1942 1943// ASIMD store, 4 element, one lane, B/H 1944// ASIMD store, 4 element, one lane, S 1945// ASIMD store, 4 element, one lane, D 1946def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>; 1947def : InstRW<[A64FXWrite_ST17, WriteAdr], 1948 (instregex "^ST4i(8|16|32|64)_POST$")>; 1949 1950// V8.1a Atomics (LSE) 1951def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1952 (instrs CASB, CASH, CASW, CASX)>; 1953 1954def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1955 (instrs CASAB, CASAH, CASAW, CASAX)>; 1956 1957def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1958 (instrs CASLB, CASLH, CASLW, CASLX)>; 1959 1960def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1961 (instrs CASALB, CASALH, CASALW, CASALX)>; 1962 1963def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1964 (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; 1965 1966def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1967 (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; 1968 1969def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1970 (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; 1971 1972def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1973 (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; 1974 1975def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1976 (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; 1977 1978def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1979 (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; 1980 1981def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1982 (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; 1983 1984def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1985 (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; 1986 1987def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1988 (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; 1989 1990def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1991 (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; 1992 1993def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1994 (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; 1995 1996def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1997 (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; 1998 1999def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2000 (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; 2001 2002def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2003 (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; 2004 2005def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2006 (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; 2007 2008def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2009 (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; 2010 2011def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2012 (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; 2013 2014def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2015 (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, 2016 LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, 2017 LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, 2018 LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; 2019 2020def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2021 (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, 2022 LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, 2023 LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, 2024 LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; 2025 2026def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2027 (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, 2028 LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, 2029 LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, 2030 LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; 2031 2032def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2033 (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, 2034 LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, 2035 LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, 2036 LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; 2037 2038def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2039 (instrs SWPB, SWPH, SWPW, SWPX)>; 2040 2041def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2042 (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; 2043 2044def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2045 (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; 2046 2047def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2048 (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; 2049 2050def : InstRW<[A64FXWrite_STUR, WriteAtomic], 2051 (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; 2052 2053// SVE instructions 2054 2055// The modeling method for SVE instructions is more accurate than others. 2056// TODO: modify the model of other instructions similarly. 2057 2058def : InstRW<[A64FXWrite_4Cyc_GI0], 2059 (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z", 2060 "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P", 2061 "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI", 2062 "^SUBR?_ZI")>; 2063 2064def : InstRW<[A64FXWrite_6Cyc_GI0], 2065 (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR", 2066 "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z", 2067 "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>; 2068 2069def : InstRW<[A64FXWrite_9Cyc_GI0], 2070 (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z", 2071 "^INDEX_II_[SD]", "^MUL_ZI")>; 2072 2073def : InstRW<[A64FXWrite_4Cyc_GI3], 2074 (instregex "^CNT_Z")>; 2075 2076def : InstRW<[A64FXWrite_4Cyc_GI03], 2077 (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z", 2078 "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z", 2079 "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z", 2080 "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_", 2081 "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z", 2082 "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z", 2083 "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z", 2084 "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>; 2085 2086def : InstRW<[A64FXWrite_9Cyc_GI03 ], 2087 (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP", 2088 "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z", 2089 "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z", 2090 "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP", 2091 "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>; 2092 2093def : InstRW<[A64FXWrite_3Cyc_GI1], 2094 (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P", 2095 "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT", 2096 "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)", 2097 "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>; 2098 2099def : InstRW<[A64FXWrite_1Cyc_GI24], 2100 (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X", 2101 "^RDVLI")>; 2102 2103def : InstRW<[A64FXWrite_11Cyc_GI5], 2104 (instregex "^LDR_[PZ]XI")>; 2105 2106def : InstRW<[A64FXWrite_11Cyc_GI56], 2107 (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>; 2108 2109def A64FXWrite_None : SchedWriteRes<[]> { 2110} 2111def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>; 2112 2113def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> { 2114 let Latency = 15; 2115 let NumMicroOps = 2; 2116 let ResourceCycles = [2]; 2117} 2118def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>; 2119 2120def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> { 2121 let Latency = 5; 2122 let NumMicroOps = 2; 2123 let ResourceCycles = [2]; 2124} 2125def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>; 2126 2127def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> { 2128 let Latency = 8; 2129 let NumMicroOps = 2; 2130} 2131def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>; 2132 2133def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> { 2134 let Latency = 46; 2135 let NumMicroOps = 10; 2136 let ResourceCycles = [10]; 2137} 2138def : InstRW<[A64FXWrite_Reduction4CycB], 2139 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>; 2140 2141def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> { 2142 let Latency = 42; 2143 let NumMicroOps = 9; 2144 let ResourceCycles = [9]; 2145} 2146def : InstRW<[A64FXWrite_Reduction4CycH], 2147 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>; 2148 2149def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> { 2150 let Latency = 38; 2151 let NumMicroOps = 8; 2152 let ResourceCycles = [8]; 2153} 2154def : InstRW<[A64FXWrite_Reduction4CycS], 2155 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>; 2156 2157def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> { 2158 let Latency = 34; 2159 let NumMicroOps = 7; 2160 let ResourceCycles = [7]; 2161} 2162def : InstRW<[A64FXWrite_Reduction4CycD], 2163 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>; 2164 2165def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2166 let Latency = 29; 2167} 2168def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>; 2169 2170def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2171 let Latency = 4; 2172} 2173def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>; 2174 2175def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> { 2176 let Latency = 6; 2177} 2178def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>; 2179 2180def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2181 let Latency = 8; 2182} 2183def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>; 2184 2185def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> { 2186 let Latency = 2; 2187 let ResourceCycles = [2]; 2188} 2189def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>; 2190 2191def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> { 2192 let Latency = 7; 2193 let NumMicroOps = 2; 2194} 2195def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>; 2196 2197def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2198 let Latency = 12; 2199} 2200def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>; 2201 2202def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> { 2203 let Latency = 75; 2204 let NumMicroOps = 11; 2205 let ResourceCycles = [11]; 2206} 2207def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>; 2208 2209def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> { 2210 let Latency = 60; 2211 let NumMicroOps = 9; 2212 let ResourceCycles = [9]; 2213} 2214def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>; 2215 2216def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> { 2217 let Latency = 45; 2218 let NumMicroOps = 7; 2219 let ResourceCycles = [7]; 2220} 2221def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>; 2222 2223def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> { 2224 let Latency = 468; 2225 let NumMicroOps = 63; 2226 let ResourceCycles = [63]; 2227} 2228def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>; 2229 2230def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> { 2231 let Latency = 228; 2232 let NumMicroOps = 31; 2233 let ResourceCycles = [31]; 2234} 2235def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>; 2236 2237def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> { 2238 let Latency = 108; 2239 let NumMicroOps = 15; 2240 let ResourceCycles = [15]; 2241} 2242def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>; 2243 2244def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2245 let Latency = 15; 2246 let NumMicroOps = 2; 2247} 2248def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>; 2249 2250def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> { 2251 let Latency = 15; 2252 let NumMicroOps = 3; 2253 let ResourceCycles = [3]; 2254} 2255def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>; 2256 2257def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> { 2258 let Latency = 134; 2259 let ResourceCycles = [134]; 2260} 2261def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>; 2262 2263def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> { 2264 let Latency = 98; 2265 let ResourceCycles = [98]; 2266} 2267def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>; 2268 2269def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> { 2270 let Latency = 154; 2271 let ResourceCycles = [154]; 2272} 2273def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>; 2274 2275def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> { 2276 let Latency = 54; 2277 let NumMicroOps = 11; 2278 let ResourceCycles = [11]; 2279} 2280def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>; 2281 2282def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> { 2283 let Latency = 44; 2284 let NumMicroOps = 9; 2285 let ResourceCycles = [9]; 2286} 2287def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>; 2288 2289def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> { 2290 let Latency = 34; 2291 let NumMicroOps = 7; 2292 let ResourceCycles = [7]; 2293} 2294def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>; 2295 2296def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2297 let Latency = 17; 2298 let NumMicroOps = 2; 2299 let ResourceCycles = [2, 2]; 2300} 2301def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>; 2302 2303def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2304 let Latency = 13; 2305 let NumMicroOps = 1; 2306} 2307def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>; 2308 2309def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> { 2310 let Latency = 13; 2311 let NumMicroOps = 2; 2312 let ResourceCycles = [2]; 2313} 2314def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>; 2315 2316def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> { 2317 let Latency = 17; 2318 let NumMicroOps = 3; 2319 let ResourceCycles = [2, 2, 1]; 2320} 2321def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>; 2322 2323def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2324 let Latency = 17; 2325 let NumMicroOps = 2; 2326 let ResourceCycles = [2, 1]; 2327} 2328def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>; 2329 2330def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2331 let Latency = 10; 2332} 2333def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>; 2334 2335def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2336 let Latency = 25; 2337} 2338def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>; 2339 2340def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2341 let Latency = 19; 2342 let ResourceCycles = [2, 4, 4]; 2343} 2344def : InstRW<[A64FXWrite_GLD_S_ZI], 2345 (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>; 2346 2347def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2348 let Latency = 16; 2349 let ResourceCycles = [1, 2, 2]; 2350} 2351def : InstRW<[A64FXWrite_GLD_D_ZI], 2352 (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>; 2353 2354def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2355 let Latency = 23; 2356 let ResourceCycles = [2, 1, 4, 4]; 2357} 2358def : InstRW<[A64FXWrite_GLD_S_RZ], 2359 (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>; 2360 2361def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2362 let Latency = 20; 2363 let ResourceCycles = [1, 1, 2, 2]; 2364} 2365def : InstRW<[A64FXWrite_GLD_D_RZ], 2366 (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]", 2367 "^GLD(FF)?1S?[BHW]_D$")>; 2368 2369def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> { 2370 let Latency = 15; 2371 let NumMicroOps = 3; 2372 let ResourceCycles = [9]; 2373} 2374def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>; 2375 2376def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2377 let Latency = 11; 2378 let NumMicroOps = 2; 2379 let ResourceCycles = [2]; 2380} 2381def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>; 2382 2383def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> { 2384 let Latency = 12; 2385 let NumMicroOps = 3; 2386 let ResourceCycles = [3]; 2387} 2388def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>; 2389 2390def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> { 2391 let Latency = 15; 2392 let NumMicroOps = 4; 2393 let ResourceCycles = [13]; 2394} 2395def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>; 2396 2397def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2398 let Latency = 11; 2399 let NumMicroOps = 3; 2400 let ResourceCycles = [3]; 2401} 2402def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>; 2403 2404def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> { 2405 let Latency = 12; 2406 let NumMicroOps = 4; 2407 let ResourceCycles = [4]; 2408} 2409def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>; 2410 2411def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> { 2412 let Latency = 15; 2413 let NumMicroOps = 5; 2414 let ResourceCycles = [17]; 2415} 2416def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>; 2417 2418def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2419 let Latency = 11; 2420 let NumMicroOps = 4; 2421 let ResourceCycles = [4]; 2422} 2423def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>; 2424 2425def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> { 2426 let Latency = 12; 2427 let NumMicroOps = 5; 2428 let ResourceCycles = [5]; 2429} 2430def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>; 2431 2432def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> { 2433} 2434def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>; 2435 2436def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2437 let ResourceCycles = [2, 1, 4]; 2438} 2439def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>; 2440 2441def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2442 let ResourceCycles = [2, 4]; 2443} 2444def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>; 2445 2446def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2447 let ResourceCycles = [1, 1, 2]; 2448} 2449def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>; 2450 2451def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2452 let ResourceCycles = [1, 2]; 2453} 2454def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>; 2455 2456def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> { 2457 let Latency = 114; 2458 let ResourceCycles = [114]; 2459} 2460def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>; 2461 2462def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> { 2463 let Latency = 178; 2464 let ResourceCycles = [178]; 2465} 2466def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>; 2467 2468def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2469 let Latency = 15; 2470 let NumMicroOps = 2; 2471} 2472def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>; 2473 2474def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> { 2475 let Latency = 2; 2476 let ResourceCycles = [2]; 2477} 2478def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>; 2479 2480def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2481 let Latency = 6; 2482 let NumMicroOps = 2; 2483 let ResourceCycles = [3, 1]; 2484} 2485def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>; 2486 2487def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2488 let Latency = 12; 2489} 2490def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>; 2491 2492def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2493 let Latency = 11; 2494} 2495def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>; 2496 2497def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2498 let Latency = 20; 2499 let NumMicroOps = 8; 2500 let ResourceCycles = [8, 8, 8, 8]; 2501} 2502def : InstRW<[A64FXWrite_SST1_W_RZ], 2503 (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>; 2504 2505def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2506 let Latency = 20; 2507 let NumMicroOps = 4; 2508 let ResourceCycles = [4, 4, 4, 4]; 2509} 2510def : InstRW<[A64FXWrite_SST1_D_RZ], 2511 (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>; 2512 2513def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2514 let Latency = 16; 2515 let NumMicroOps = 8; 2516 let ResourceCycles = [12, 8, 8]; 2517} 2518def : InstRW<[A64FXWrite_SST1_W_ZI], 2519 (instregex "^SST1[BH]_S_I", "^SST1W_I")>; 2520 2521def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2522 let Latency = 16; 2523 let NumMicroOps = 4; 2524 let ResourceCycles = [4, 4, 4]; 2525} 2526def : InstRW<[A64FXWrite_SST1_D_ZI], 2527 (instregex "^SST1[BHW]_D_I", "^SST1D_I")>; 2528 2529def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2530 let Latency = 12; 2531 let NumMicroOps = 3; 2532 let ResourceCycles = [8, 9]; 2533} 2534def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>; 2535 2536def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2537 let Latency = 11; 2538 let NumMicroOps = 2; 2539 let ResourceCycles = [2, 2]; 2540} 2541def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>; 2542 2543def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2544 let Latency = 12; 2545 let NumMicroOps = 3; 2546 let ResourceCycles = [2, 3]; 2547} 2548def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>; 2549 2550def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2551 let Latency = 15; 2552 let NumMicroOps = 4; 2553 let ResourceCycles = [12, 13]; 2554} 2555def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>; 2556 2557def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2558 let Latency = 11; 2559 let NumMicroOps = 3; 2560 let ResourceCycles = [3, 3]; 2561} 2562def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>; 2563 2564def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2565 let Latency = 12; 2566 let NumMicroOps = 4; 2567 let ResourceCycles = [3, 4]; 2568} 2569def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>; 2570 2571def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2572 let Latency = 15; 2573 let NumMicroOps = 5; 2574 let ResourceCycles = [16, 17]; 2575} 2576def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>; 2577 2578def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2579 let Latency = 11; 2580 let NumMicroOps = 4; 2581 let ResourceCycles = [4, 4]; 2582} 2583def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>; 2584 2585def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2586 let Latency = 12; 2587 let NumMicroOps = 5; 2588 let ResourceCycles = [4, 5]; 2589} 2590def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>; 2591 2592def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2593 let Latency = 11; 2594} 2595def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>; 2596 2597def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> { 2598 let Latency = 11; 2599} 2600def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>; 2601 2602def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2603 let Latency = 4; 2604} 2605def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>; 2606 2607def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2608 let Latency = 3; 2609 let NumMicroOps = 2; 2610} 2611def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>; 2612 2613} // SchedModel = A64FXModel 2614