1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Fujitsu A64FX processors. 10// 11//===----------------------------------------------------------------------===// 12 13def A64FXModel : SchedMachineModel { 14 let IssueWidth = 6; // 6 micro-ops dispatched at a time. 15 let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. 16 let LoadLatency = 5; // Optimistic load latency. 17 let MispredictPenalty = 12; // Extra cycles for mispredicted branch. 18 // Determined via a mix of micro-arch details and experimentation. 19 let LoopMicroOpBufferSize = 128; 20 let PostRAScheduler = 1; // Using PostRA sched. 21 let CompleteModel = 1; 22 23 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F, 24 [HasMTE, HasMatMulInt8, HasBF16, 25 HasPAuth, HasPAuthLR, HasCPA]); 26 let FullInstRWOverlapCheck = 0; 27} 28 29let SchedModel = A64FXModel in { 30 31// Define the issue ports. 32 33// A64FXIP* 34 35// Port 0 36def A64FXIPFLA : ProcResource<1>; 37 38// Port 1 39def A64FXIPPR : ProcResource<1>; 40 41// Port 2 42def A64FXIPEXA : ProcResource<1>; 43 44// Port 3 45def A64FXIPFLB : ProcResource<1>; 46 47// Port 4 48def A64FXIPEXB : ProcResource<1>; 49 50// Port 5 51def A64FXIPEAGA : ProcResource<1>; 52 53// Port 6 54def A64FXIPEAGB : ProcResource<1>; 55 56// Port 7 57def A64FXIPBR : ProcResource<1>; 58 59// Define groups for the functional units on each issue port. Each group 60// created will be used by a WriteRes later on. 61 62def A64FXGI7 : ProcResGroup<[A64FXIPBR]>; 63 64def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>; 65 66def A64FXGI1 : ProcResGroup<[A64FXIPPR]>; 67 68def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>; 69 70def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>; 71 72def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>; 73 74def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>; 75 76def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>; 77 78def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>; 79 80def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>; 81 82def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>; 83 84def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>; 85 86def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>; 87 88def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>; 89 90def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB, 91 A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>; 92 93def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> { 94 let Latency = 1; 95} 96 97def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 98 let Latency = 2; 99} 100 101def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 102 let Latency = 4; 103} 104 105def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 106 let Latency = 6; 107} 108 109def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 110 let Latency = 8; 111} 112 113def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 114 let Latency = 9; 115} 116 117def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> { 118 let Latency = 3; 119} 120 121def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 122 let Latency = 5; 123} 124 125def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 126 let Latency = 4; 127} 128 129def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 130 let Latency = 6; 131} 132 133def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 134 let Latency = 4; 135} 136 137def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 138 let Latency = 8; 139} 140 141def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 142 let Latency = 9; 143} 144 145def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 146 let Latency = 10; 147} 148 149def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 150 let Latency = 12; 151} 152 153def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 154 let Latency = 20; 155} 156 157def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 158 let Latency = 5; 159} 160 161def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 162 let Latency = 11; 163} 164 165def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> { 166 let Latency = 5; 167} 168 169def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 170 let Latency = 1; 171} 172 173def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 174 let Latency = 2; 175} 176 177def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> { 178 let Latency = 4; 179 let NumMicroOps = 4; 180} 181 182def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 183 let Latency = 1; 184} 185 186def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 187 let Latency = 5; 188} 189 190def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 191 let Latency = 8; 192} 193 194def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 195 let Latency = 11; 196} 197 198def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> { 199 let Latency = 5; 200 let NumMicroOps = 2; 201} 202 203def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> { 204 let Latency = 5; 205 let NumMicroOps = 3; 206} 207 208def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> { 209 let Latency = 5; 210 let NumMicroOps = 2; 211} 212 213def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> { 214 let Latency = 8; 215 let NumMicroOps = 2; 216} 217 218def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> { 219 let Latency = 11; 220 let NumMicroOps = 2; 221 222} 223 224def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> { 225 let Latency = 8; 226 let NumMicroOps = 3; 227} 228 229def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> { 230 let Latency = 11; 231 let NumMicroOps = 3; 232} 233 234def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> { 235 let Latency = 8; 236 let NumMicroOps = 4; 237} 238 239def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> { 240 let Latency = 11; 241 let NumMicroOps = 4; 242} 243 244def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> { 245 let Latency = 8; 246 let NumMicroOps = 2; 247} 248 249def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> { 250 let Latency = 11; 251 let NumMicroOps = 2; 252} 253 254def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> { 255 let Latency = 8; 256 let NumMicroOps = 3; 257} 258 259def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> { 260 let Latency = 11; 261 let NumMicroOps = 3; 262} 263 264def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> { 265 let Latency = 8; 266 let NumMicroOps = 4; 267} 268 269def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> { 270 let Latency = 11; 271 let NumMicroOps = 4; 272} 273 274def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> { 275 let Latency = 8; 276 let NumMicroOps = 5; 277} 278 279def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> { 280 let Latency = 11; 281 let NumMicroOps = 5; 282} 283 284def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> { 285 let Latency = 8; 286 let NumMicroOps = 2; 287} 288 289def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> { 290 let Latency = 8; 291 let NumMicroOps = 3; 292} 293 294def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> { 295 let Latency = 8; 296 let NumMicroOps = 4; 297} 298 299def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> { 300 let Latency = 8; 301 let NumMicroOps = 5; 302} 303 304def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> { 305 let Latency = 8; 306 let NumMicroOps = 6; 307} 308 309def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> { 310 let Latency = 8; 311 let NumMicroOps = 7; 312} 313 314def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> { 315 let Latency = 8; 316 let NumMicroOps = 8; 317} 318 319def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> { 320 let Latency = 8; 321 let NumMicroOps = 9; 322} 323 324def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> { 325 let Latency = 1; 326} 327 328def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> { 329 let Latency = 10; 330} 331 332def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> { 333 let Latency = 14; 334} 335 336def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> { 337 let Latency = 12; 338} 339 340def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> { 341 let Latency = 14; 342} 343 344def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> { 345 let Latency = 14; 346} 347 348def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> { 349 let Latency = 6; 350} 351 352def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> { 353 let Latency = 8; 354} 355 356def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> { 357 let Latency = 10; 358} 359 360def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> { 361 let Latency = 12; 362 let NumMicroOps = 6; 363} 364 365def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> { 366 let Latency = 14; 367 let NumMicroOps = 6; 368} 369 370def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> { 371 let Latency = 9; 372} 373 374def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> { 375 let Latency = 8; 376} 377 378 379def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> { 380 let Latency = 8; 381 let NumMicroOps = 3; 382} 383 384def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> { 385 let Latency = 8; 386 let NumMicroOps = 2; 387} 388 389def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> { 390 let Latency = 10; 391 let NumMicroOps = 3; 392} 393 394def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> { 395 let Latency = 10; 396 let NumMicroOps = 2; 397} 398 399 400def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> { 401 let Latency = 10; 402 let NumMicroOps = 3; 403} 404 405def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> { 406 let Latency = 15; 407 let NumMicroOps = 2; 408} 409 410def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> { 411 let Latency = 15; 412 let NumMicroOps = 3; 413} 414 415def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> { 416 let Latency = 10; 417 let NumMicroOps = 3; 418} 419 420def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> { 421 let Latency = 10; 422 let NumMicroOps = 2; 423} 424 425def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> { 426 let Latency = 15; 427 let NumMicroOps = 2; 428} 429 430def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> { 431 let Latency = 14; 432 let NumMicroOps = 7; 433} 434 435def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> { 436 let Latency = 5; 437} 438 439def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> { 440 let Latency = 10; 441} 442 443def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> { 444 let Latency = 9; 445} 446 447def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> { 448 let Latency = 12; 449} 450 451def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> { 452 let Latency = 25; 453} 454 455def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> { 456 let Latency = 10; 457 let NumMicroOps = 3; 458} 459 460def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> { 461 let Latency = 10; 462 let NumMicroOps = 5; 463} 464 465def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> { 466 let Latency = 10; 467 let NumMicroOps = 7; 468} 469 470def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> { 471 let Latency = 10; 472 let NumMicroOps = 9; 473} 474 475def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> { 476 let Latency = 0; 477} 478 479def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> { 480 let Latency = 0; 481} 482 483def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> { 484 let Latency = 0; 485} 486 487def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> { 488 let Latency = 0; 489} 490 491def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> { 492 let Latency = 0; 493} 494 495def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> { 496 let Latency = 0; 497} 498 499def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> { 500 let Latency = 0; 501} 502 503def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> { 504 let Latency = 0; 505} 506 507def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> { 508 let Latency = 0; 509} 510 511def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> { 512 let Latency = 0; 513} 514 515def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> { 516 let Latency = 1; 517} 518 519def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> { 520 let Latency = 1; 521} 522 523def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> { 524 let Latency = 1; 525} 526 527def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> { 528 let Latency = 1; 529} 530 531def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> { 532 let Latency = 7; 533} 534 535// Define commonly used read types. 536 537// No forwarding is provided for these types. 538def : ReadAdvance<ReadI, 0>; 539def : ReadAdvance<ReadISReg, 0>; 540def : ReadAdvance<ReadIEReg, 0>; 541def : ReadAdvance<ReadIM, 0>; 542def : ReadAdvance<ReadIMA, 0>; 543def : ReadAdvance<ReadID, 0>; 544def : ReadAdvance<ReadExtrHi, 0>; 545def : ReadAdvance<ReadAdrBase, 0>; 546def : ReadAdvance<ReadST, 0>; 547def : ReadAdvance<ReadVLD, 0>; 548 549//===----------------------------------------------------------------------===// 550// 3. Instruction Tables. 551 552//--- 553// 3.1 Branch Instructions 554//--- 555 556// Branch, immed 557// Branch and link, immed 558// Compare and branch 559def : WriteRes<WriteBr, [A64FXGI7]> { 560 let Latency = 1; 561} 562 563// Branch, register 564// Branch and link, register != LR 565// Branch and link, register = LR 566def : WriteRes<WriteBrReg, [A64FXGI7]> { 567 let Latency = 1; 568} 569 570def : WriteRes<WriteSys, []> { let Latency = 1; } 571def : WriteRes<WriteBarrier, []> { let Latency = 1; } 572def : WriteRes<WriteHint, []> { let Latency = 1; } 573 574def : WriteRes<WriteAtomic, []> { 575 let Latency = 4; 576} 577 578//--- 579// Branch 580//--- 581def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>; 582def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>; 583def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>; 584def : InstRW<[A64FXWrite_1Cyc_GI7], 585 (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; 586 587//--- 588// 3.2 Arithmetic and Logical Instructions 589// 3.3 Move and Shift Instructions 590//--- 591 592// ALU, basic 593// Conditional compare 594// Conditional select 595// Address generation 596def : WriteRes<WriteI, [A64FXGI2456]> { 597 let Latency = 1; 598} 599 600def : InstRW<[WriteI], 601 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 602 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 603 "ADC(W|X)r", 604 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 605 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 606 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 607 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 608 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 609 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 610 "CSINC(W|X)r", "CSINV(W|X)r", 611 "CSNEG(W|X)r")>; 612 613def : InstRW<[WriteI], (instrs COPY)>; 614 615// ALU, extend and/or shift 616def : WriteRes<WriteISReg, [A64FXGI2456]> { 617 let Latency = 2; 618} 619 620def : InstRW<[WriteISReg], 621 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 622 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 623 "ADC(W|X)r", 624 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 625 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 626 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 627 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 628 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 629 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 630 "CSINC(W|X)r", "CSINV(W|X)r", 631 "CSNEG(W|X)r")>; 632 633def : WriteRes<WriteIEReg, [A64FXGI2456]> { 634 let Latency = 1; 635} 636 637def : InstRW<[WriteIEReg], 638 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 639 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 640 "ADC(W|X)r", 641 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 642 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 643 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 644 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 645 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 646 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 647 "CSINC(W|X)r", "CSINV(W|X)r", 648 "CSNEG(W|X)r")>; 649 650// Move immed 651def : WriteRes<WriteImm, [A64FXGI2456]> { 652 let Latency = 1; 653} 654 655def : InstRW<[A64FXWrite_1Cyc_GI2456], 656 (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 657 658def : InstRW<[A64FXWrite_2Cyc_GI24], 659 (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; 660 661// Variable shift 662def : WriteRes<WriteIS, [A64FXGI2456]> { 663 let Latency = 1; 664} 665 666//--- 667// 3.4 Divide and Multiply Instructions 668//--- 669 670// Divide, W-form 671def : WriteRes<WriteID32, [A64FXGI4]> { 672 let Latency = 39; 673 let ReleaseAtCycles = [39]; 674} 675 676// Divide, X-form 677def : WriteRes<WriteID64, [A64FXGI4]> { 678 let Latency = 23; 679 let ReleaseAtCycles = [23]; 680} 681 682// Multiply accumulate, W-form 683def : WriteRes<WriteIM32, [A64FXGI2456]> { 684 let Latency = 5; 685} 686 687// Multiply accumulate, X-form 688def : WriteRes<WriteIM64, [A64FXGI2456]> { 689 let Latency = 5; 690} 691 692def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; 693def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; 694def : InstRW<[A64FXWrite_MADDL], 695 (instregex "(S|U)(MADDL|MSUBL)rrr")>; 696 697def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; 698def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; 699 700// Bitfield extract, two reg 701def : WriteRes<WriteExtr, [A64FXGI2456]> { 702 let Latency = 1; 703} 704 705// Multiply high 706def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>; 707 708// Miscellaneous Data-Processing Instructions 709// Bitfield extract 710def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>; 711 712// Bitifield move - basic 713def : InstRW<[A64FXWrite_1Cyc_GI24], 714 (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; 715 716// Bitfield move, insert 717def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>; 718def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>; 719 720// Count leading 721def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$", 722 "^CLZ(W|X)r$")>; 723 724// Reverse bits 725def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>; 726 727// Cryptography Extensions 728def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>; 729def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>; 730def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>; 731def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>; 732def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>; 733def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>; 734def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>; 735def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>; 736def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>; 737 738// CRC Instructions 739def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>; 740def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>; 741def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>; 742 743def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>; 744def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>; 745def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>; 746 747// Reverse bits/bytes 748// NOTE: Handled by WriteI. 749 750//--- 751// 3.6 Load Instructions 752// 3.10 FP Load Instructions 753//--- 754 755// Load register, literal 756// Load register, unscaled immed 757// Load register, immed unprivileged 758// Load register, unsigned immed 759def : WriteRes<WriteLD, [A64FXGI56]> { 760 let Latency = 4; 761} 762 763// Load register, immed post-index 764// NOTE: Handled by WriteLD, WriteI. 765// Load register, immed pre-index 766// NOTE: Handled by WriteLD, WriteAdr. 767def : WriteRes<WriteAdr, [A64FXGI2456]> { 768 let Latency = 1; 769} 770 771// Load pair, immed offset, normal 772// Load pair, immed offset, signed words, base != SP 773// Load pair, immed offset signed words, base = SP 774// LDP only breaks into *one* LS micro-op. Thus 775// the resources are handled by WriteLD. 776def : WriteRes<WriteLDHi, []> { 777 let Latency = 5; 778} 779 780// Load register offset, basic 781// Load register, register offset, scale by 4/8 782// Load register, register offset, scale by 2 783// Load register offset, extend 784// Load register, register offset, extend, scale by 4/8 785// Load register, register offset, extend, scale by 2 786def A64FXWriteLDIdx : SchedWriteVariant<[ 787 SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>, 788 SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>; 789def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>; 790 791def A64FXReadAdrBase : SchedReadVariant<[ 792 SchedVar<ScaledIdxPred, [ReadDefault]>, 793 SchedVar<NoSchedPred, [ReadDefault]>]>; 794def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>; 795 796// Load pair, immed pre-index, normal 797// Load pair, immed pre-index, signed words 798// Load pair, immed post-index, normal 799// Load pair, immed post-index, signed words 800// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 801 802def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>; 803def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>; 804def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>; 805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>; 806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>; 807 808def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>; 809def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>; 810def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>; 811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>; 812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>; 813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>; 814 815def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>; 816def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>; 817def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>; 818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>; 819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>; 820 821def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>; 822def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>; 823def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>; 824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>; 825 826def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>; 827def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>; 828def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>; 829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>; 830 831def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>; 832def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>; 833def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>; 834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>; 835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>; 836 837def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 838 (instrs LDPDpre)>; 839def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 840 (instrs LDPQpre)>; 841def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 842 (instrs LDPSpre)>; 843def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 844 (instrs LDPWpre)>; 845def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 846 (instrs LDPWpre)>; 847 848def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 849def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 850def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 855 856def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>; 857def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>; 858def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>; 859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>; 860 861def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>; 862def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>; 863def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>; 864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>; 865 866def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>; 867def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>; 868 869def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>; 870def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>; 871 872def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 873 (instrs LDPDpost)>; 874def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 875 (instrs LDPQpost)>; 876def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 877 (instrs LDPSpost)>; 878def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 879 (instrs LDPWpost)>; 880def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 881 (instrs LDPXpost)>; 882 883def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 884def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 885def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 890 891def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 892 (instrs LDPDpre)>; 893def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 894 (instrs LDPQpre)>; 895def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 896 (instrs LDPSpre)>; 897def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 898 (instrs LDPWpre)>; 899def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 900 (instrs LDPXpre)>; 901 902def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 903def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 904def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 909 910def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 911 (instrs LDPDpost)>; 912def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 913 (instrs LDPQpost)>; 914def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 915 (instrs LDPSpost)>; 916def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 917 (instrs LDPWpost)>; 918def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 919 (instrs LDPXpost)>; 920 921def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 922def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 923def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 928 929def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>; 930def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>; 931def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>; 932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>; 933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>; 934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>; 935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>; 936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>; 937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>; 938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>; 939 940def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>; 941def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>; 942def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>; 943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>; 944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>; 945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>; 946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>; 947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>; 948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>; 949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>; 950 951def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 952 (instrs LDRBroW)>; 953def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 954 (instrs LDRBroW)>; 955def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 956 (instrs LDRDroW)>; 957def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 958 (instrs LDRHroW)>; 959def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 960 (instrs LDRHHroW)>; 961def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 962 (instrs LDRQroW)>; 963def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 964 (instrs LDRSroW)>; 965def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 966 (instrs LDRSHWroW)>; 967def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 968 (instrs LDRSHXroW)>; 969def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 970 (instrs LDRWroW)>; 971def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 972 (instrs LDRXroW)>; 973def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 974 (instrs LDRBroX)>; 975def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 976 (instrs LDRDroX)>; 977def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 978 (instrs LDRHroX)>; 979def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 980 (instrs LDRHHroX)>; 981def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 982 (instrs LDRQroX)>; 983def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 984 (instrs LDRSroX)>; 985def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 986 (instrs LDRSHWroX)>; 987def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 988 (instrs LDRSHXroX)>; 989def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 990 (instrs LDRWroX)>; 991def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 992 (instrs LDRXroX)>; 993 994def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>; 995def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>; 996def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>; 997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>; 998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>; 999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>; 1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>; 1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>; 1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>; 1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>; 1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>; 1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>; 1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>; 1007 1008//--- 1009// Prefetch 1010//--- 1011def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>; 1012def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>; 1013def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>; 1014def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>; 1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>; 1016 1017//-- 1018// 3.7 Store Instructions 1019// 3.11 FP Store Instructions 1020//-- 1021 1022// Store register, unscaled immed 1023// Store register, immed unprivileged 1024// Store register, unsigned immed 1025def : WriteRes<WriteST, [A64FXGI56]> { 1026 let Latency = 1; 1027} 1028 1029// Store register, immed post-index 1030// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase 1031 1032// Store register, immed pre-index 1033// NOTE: Handled by WriteAdr, WriteST 1034 1035// Store register, register offset, basic 1036// Store register, register offset, scaled by 4/8 1037// Store register, register offset, scaled by 2 1038// Store register, register offset, extend 1039// Store register, register offset, extend, scale by 4/8 1040// Store register, register offset, extend, scale by 1 1041def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> { 1042 let Latency = 1; 1043} 1044 1045// Store pair, immed offset, W-form 1046// Store pair, immed offset, X-form 1047def : WriteRes<WriteSTP, [A64FXGI56]> { 1048 let Latency = 1; 1049} 1050 1051// Store pair, immed post-index, W-form 1052// Store pair, immed post-index, X-form 1053// Store pair, immed pre-index, W-form 1054// Store pair, immed pre-index, X-form 1055// NOTE: Handled by WriteAdr, WriteSTP. 1056 1057def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>; 1058def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>; 1059def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>; 1060def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>; 1061def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>; 1062def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>; 1063def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>; 1064def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>; 1065def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>; 1066 1067def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>; 1068def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>; 1069def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>; 1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>; 1071 1072def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>; 1073def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>; 1074def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>; 1075def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>; 1076 1077def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>; 1078def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>; 1079def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>; 1080def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>; 1081 1082def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1083def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1084def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1085def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1086def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1087def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1088def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1089def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1090def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1091def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1092def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1093def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1094 1095def : InstRW<[A64FXWrite_STP01], 1096 (instrs STPDpre, STPDpost)>; 1097def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1098 (instrs STPDpre, STPDpost)>; 1099def : InstRW<[A64FXWrite_STP01], 1100 (instrs STPDpre, STPDpost)>; 1101def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1102 (instrs STPDpre, STPDpost)>; 1103def : InstRW<[A64FXWrite_STP01], 1104 (instrs STPQpre, STPQpost)>; 1105def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1106 (instrs STPQpre, STPQpost)>; 1107def : InstRW<[A64FXWrite_STP01], 1108 (instrs STPQpre, STPQpost)>; 1109def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1110 (instrs STPQpre, STPQpost)>; 1111def : InstRW<[A64FXWrite_STP01], 1112 (instrs STPSpre, STPSpost)>; 1113def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1114 (instrs STPSpre, STPSpost)>; 1115def : InstRW<[A64FXWrite_STP01], 1116 (instrs STPSpre, STPSpost)>; 1117def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1118 (instrs STPSpre, STPSpost)>; 1119def : InstRW<[A64FXWrite_STP01], 1120 (instrs STPWpre, STPWpost)>; 1121def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1122 (instrs STPWpre, STPWpost)>; 1123def : InstRW<[A64FXWrite_STP01], 1124 (instrs STPWpre, STPWpost)>; 1125def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1126 (instrs STPWpre, STPWpost)>; 1127def : InstRW<[A64FXWrite_STP01], 1128 (instrs STPXpre, STPXpost)>; 1129def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1130 (instrs STPXpre, STPXpost)>; 1131def : InstRW<[A64FXWrite_STP01], 1132 (instrs STPXpre, STPXpost)>; 1133def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1134 (instrs STPXpre, STPXpost)>; 1135 1136def : InstRW<[WriteAdr, A64FXWrite_STP01], 1137 (instrs STRBpre, STRBpost)>; 1138def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1139 (instrs STRBpre, STRBpost)>; 1140def : InstRW<[WriteAdr, A64FXWrite_STP01], 1141 (instrs STRBpre, STRBpost)>; 1142def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1143 (instrs STRBpre, STRBpost)>; 1144def : InstRW<[WriteAdr, A64FXWrite_STP01], 1145 (instrs STRBBpre, STRBBpost)>; 1146def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1147 (instrs STRBBpre, STRBBpost)>; 1148def : InstRW<[WriteAdr, A64FXWrite_STP01], 1149 (instrs STRBBpre, STRBBpost)>; 1150def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1151 (instrs STRBBpre, STRBBpost)>; 1152def : InstRW<[WriteAdr, A64FXWrite_STP01], 1153 (instrs STRDpre, STRDpost)>; 1154def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1155 (instrs STRDpre, STRDpost)>; 1156def : InstRW<[WriteAdr, A64FXWrite_STP01], 1157 (instrs STRDpre, STRDpost)>; 1158def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1159 (instrs STRDpre, STRDpost)>; 1160def : InstRW<[WriteAdr, A64FXWrite_STP01], 1161 (instrs STRHpre, STRHpost)>; 1162def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1163 (instrs STRHpre, STRHpost)>; 1164def : InstRW<[WriteAdr, A64FXWrite_STP01], 1165 (instrs STRHpre, STRHpost)>; 1166def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1167 (instrs STRHpre, STRHpost)>; 1168def : InstRW<[WriteAdr, A64FXWrite_STP01], 1169 (instrs STRHHpre, STRHHpost)>; 1170def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1171 (instrs STRHHpre, STRHHpost)>; 1172def : InstRW<[WriteAdr, A64FXWrite_STP01], 1173 (instrs STRHHpre, STRHHpost)>; 1174def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1175 (instrs STRHHpre, STRHHpost)>; 1176def : InstRW<[WriteAdr, A64FXWrite_STP01], 1177 (instrs STRQpre, STRQpost)>; 1178def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1179 (instrs STRQpre, STRQpost)>; 1180def : InstRW<[WriteAdr, A64FXWrite_STP01], 1181 (instrs STRQpre, STRQpost)>; 1182def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1183 (instrs STRQpre, STRQpost)>; 1184def : InstRW<[WriteAdr, A64FXWrite_STP01], 1185 (instrs STRSpre, STRSpost)>; 1186def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1187 (instrs STRSpre, STRSpost)>; 1188def : InstRW<[WriteAdr, A64FXWrite_STP01], 1189 (instrs STRSpre, STRSpost)>; 1190def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1191 (instrs STRSpre, STRSpost)>; 1192def : InstRW<[WriteAdr, A64FXWrite_STP01], 1193 (instrs STRWpre, STRWpost)>; 1194def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1195 (instrs STRWpre, STRWpost)>; 1196def : InstRW<[WriteAdr, A64FXWrite_STP01], 1197 (instrs STRWpre, STRWpost)>; 1198def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1199 (instrs STRWpre, STRWpost)>; 1200def : InstRW<[WriteAdr, A64FXWrite_STP01], 1201 (instrs STRXpre, STRXpost)>; 1202def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1203 (instrs STRXpre, STRXpost)>; 1204def : InstRW<[WriteAdr, A64FXWrite_STP01], 1205 (instrs STRXpre, STRXpost)>; 1206def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1207 (instrs STRXpre, STRXpost)>; 1208 1209def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1210 (instrs STRBroW, STRBroX)>; 1211def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1212 (instrs STRBroW, STRBroX)>; 1213def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1214 (instrs STRBBroW, STRBBroX)>; 1215def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1216 (instrs STRBBroW, STRBBroX)>; 1217def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1218 (instrs STRDroW, STRDroX)>; 1219def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1220 (instrs STRDroW, STRDroX)>; 1221def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1222 (instrs STRHroW, STRHroX)>; 1223def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1224 (instrs STRHroW, STRHroX)>; 1225def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1226 (instrs STRHHroW, STRHHroX)>; 1227def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1228 (instrs STRHHroW, STRHHroX)>; 1229def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1230 (instrs STRQroW, STRQroX)>; 1231def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1232 (instrs STRQroW, STRQroX)>; 1233def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1234 (instrs STRSroW, STRSroX)>; 1235def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1236 (instrs STRSroW, STRSroX)>; 1237def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1238 (instrs STRWroW, STRWroX)>; 1239def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1240 (instrs STRWroW, STRWroX)>; 1241def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1242 (instrs STRXroW, STRXroX)>; 1243def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1244 (instrs STRXroW, STRXroX)>; 1245 1246//--- 1247// 3.8 FP Data Processing Instructions 1248//--- 1249 1250// FP absolute value 1251// FP min/max 1252// FP negate 1253def : WriteRes<WriteF, [A64FXGI03]> { 1254 let Latency = 4; 1255 let ReleaseAtCycles = [2]; 1256} 1257 1258// FP arithmetic 1259 1260def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>; 1261def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>; 1262 1263// FP compare 1264def : WriteRes<WriteFCmp, [A64FXGI03]> { 1265 let Latency = 4; 1266 let ReleaseAtCycles = [2]; 1267} 1268 1269// FP Div, Sqrt 1270def : WriteRes<WriteFDiv, [A64FXGI0]> { 1271 let Latency = 43; 1272} 1273 1274def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> { 1275 let Latency = 38; 1276} 1277 1278def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> { 1279 let Latency = 29; 1280} 1281 1282def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> { 1283 let Latency = 43; 1284} 1285 1286def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> { 1287 let Latency = 29; 1288} 1289 1290def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> { 1291 let Latency = 43; 1292} 1293 1294// FP divide, S-form 1295// FP square root, S-form 1296def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>; 1297def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>; 1298def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>; 1299def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 1300def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>; 1301def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>; 1302 1303// FP divide, D-form 1304// FP square root, D-form 1305def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>; 1306def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>; 1307def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>; 1308def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 1309def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>; 1310def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>; 1311 1312// FP round to integral 1313def : InstRW<[A64FXWrite_9Cyc_GI03], 1314 (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 1315 1316// FP select 1317def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>; 1318 1319//--- 1320// 3.9 FP Miscellaneous Instructions 1321//--- 1322 1323// FP convert, from vec to vec reg 1324// FP convert, from gen to vec reg 1325// FP convert, from vec to gen reg 1326def : WriteRes<WriteFCvt, [A64FXGI03]> { 1327 let Latency = 9; 1328 let ReleaseAtCycles = [2]; 1329} 1330 1331// FP move, immed 1332// FP move, register 1333def : WriteRes<WriteFImm, [A64FXGI0]> { 1334 let Latency = 4; 1335 let ReleaseAtCycles = [2]; 1336} 1337 1338// FP transfer, from gen to vec reg 1339// FP transfer, from vec to gen reg 1340def : WriteRes<WriteFCopy, [A64FXGI0]> { 1341 let Latency = 4; 1342 let ReleaseAtCycles = [2]; 1343} 1344 1345def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>; 1346def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>; 1347 1348//--- 1349// 3.12 ASIMD Integer Instructions 1350//--- 1351 1352// ASIMD absolute diff, D-form 1353// ASIMD absolute diff, Q-form 1354// ASIMD absolute diff accum, D-form 1355// ASIMD absolute diff accum, Q-form 1356// ASIMD absolute diff accum long 1357// ASIMD absolute diff long 1358// ASIMD arith, basic 1359// ASIMD arith, complex 1360// ASIMD compare 1361// ASIMD logical (AND, BIC, EOR) 1362// ASIMD max/min, basic 1363// ASIMD max/min, reduce, 4H/4S 1364// ASIMD max/min, reduce, 8B/8H 1365// ASIMD max/min, reduce, 16B 1366// ASIMD multiply, D-form 1367// ASIMD multiply, Q-form 1368// ASIMD multiply accumulate long 1369// ASIMD multiply accumulate saturating long 1370// ASIMD multiply long 1371// ASIMD pairwise add and accumulate 1372// ASIMD shift accumulate 1373// ASIMD shift by immed, basic 1374// ASIMD shift by immed and insert, basic, D-form 1375// ASIMD shift by immed and insert, basic, Q-form 1376// ASIMD shift by immed, complex 1377// ASIMD shift by register, basic, D-form 1378// ASIMD shift by register, basic, Q-form 1379// ASIMD shift by register, complex, D-form 1380// ASIMD shift by register, complex, Q-form 1381def : WriteRes<WriteVd, [A64FXGI03]> { 1382 let Latency = 4; 1383} 1384def : WriteRes<WriteVq, [A64FXGI03]> { 1385 let Latency = 4; 1386} 1387 1388// ASIMD arith, reduce, 4H/4S 1389// ASIMD arith, reduce, 8B/8H 1390// ASIMD arith, reduce, 16B 1391 1392// ASIMD logical (MVN (alias for NOT), ORN, ORR) 1393def : InstRW<[A64FXWrite_4Cyc_GI03], 1394 (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1395 1396// ASIMD arith, reduce 1397def : InstRW<[A64FXWrite_ADDLV], 1398 (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; 1399 1400// ASIMD polynomial (8x8) multiply long 1401def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>; 1402def : InstRW<[A64FXWrite_MULLV], 1403 (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; 1404def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>; 1405def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>; 1406 1407// ASIMD absolute diff accum, D-form 1408def : InstRW<[A64FXWrite_ABA], 1409 (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; 1410// ASIMD absolute diff accum, Q-form 1411def : InstRW<[A64FXWrite_ABA], 1412 (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; 1413// ASIMD absolute diff accum long 1414def : InstRW<[A64FXWrite_ABAL], 1415 (instregex "^[SU]ABAL")>; 1416// ASIMD arith, reduce, 4H/4S 1417def : InstRW<[A64FXWrite_ADDLV1], 1418 (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; 1419// ASIMD arith, reduce, 8B 1420def : InstRW<[A64FXWrite_ADDLV1], 1421 (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; 1422// ASIMD arith, reduce, 16B/16H 1423def : InstRW<[A64FXWrite_ADDLV1], 1424 (instregex "^[SU]?ADDL?Vv16i8v$")>; 1425// ASIMD max/min, reduce, 4H/4S 1426def : InstRW<[A64FXWrite_MINMAXV], 1427 (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; 1428// ASIMD max/min, reduce, 8B/8H 1429def : InstRW<[A64FXWrite_MINMAXV], 1430 (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; 1431// ASIMD max/min, reduce, 16B/16H 1432def : InstRW<[A64FXWrite_MINMAXV], 1433 (instregex "^[SU](MIN|MAX)Vv16i8v$")>; 1434// ASIMD multiply, D-form 1435def : InstRW<[A64FXWrite_PMUL], 1436 (instregex "^(P?MUL|SQR?DMUL)" # 1437 "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # 1438 "(_indexed)?$")>; 1439 1440// ASIMD multiply, Q-form 1441def : InstRW<[A64FXWrite_PMUL], 1442 (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1443 1444// ASIMD multiply, Q-form 1445def : InstRW<[A64FXWrite_SQRDMULH], 1446 (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1447 1448// ASIMD multiply accumulate, D-form 1449def : InstRW<[A64FXWrite_9Cyc_GI03], 1450 (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; 1451// ASIMD multiply accumulate, Q-form 1452def : InstRW<[A64FXWrite_9Cyc_GI03], 1453 (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; 1454// ASIMD shift accumulate 1455def : InstRW<[A64FXWrite_SRSRAV], 1456 (instregex "SRSRAv", "URSRAv")>; 1457def : InstRW<[A64FXWrite_SSRAV], 1458 (instregex "SSRAv", "USRAv")>; 1459 1460// ASIMD shift by immed, basic 1461def : InstRW<[A64FXWrite_RSHRN], 1462 (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>; 1463def : InstRW<[A64FXWrite_SHRN], 1464 (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>; 1465 1466def : InstRW<[A64FXWrite_6Cyc_GI3], 1467 (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>; 1468 1469// ASIMD shift by immed, complex 1470def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>; 1471def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>; 1472// ASIMD shift by register, basic, Q-form 1473def : InstRW<[A64FXWrite_6Cyc_GI3], 1474 (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1475// ASIMD shift by register, complex, D-form 1476def : InstRW<[A64FXWrite_6Cyc_GI3], 1477 (instregex "^[SU][QR]{1,2}SHL" # 1478 "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; 1479// ASIMD shift by register, complex, Q-form 1480def : InstRW<[A64FXWrite_6Cyc_GI3], 1481 (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; 1482 1483// ASIMD Arithmetic 1484def : InstRW<[A64FXWrite_4Cyc_GI03], 1485 (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; 1486def : InstRW<[A64FXWrite_4Cyc_GI03], 1487 (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; 1488def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>; 1489def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>; 1490def : InstRW<[A64FXWrite_4Cyc_GI03], 1491 (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", 1492 "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; 1493def : InstRW<[A64FXWrite_ADDP], 1494 (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; 1495def : InstRW<[A64FXWrite_4Cyc_GI03], 1496 (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # 1497 "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; 1498def : InstRW<[A64FXWrite_4Cyc_GI0], 1499 (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; 1500def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>; 1501def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>; 1502def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>; 1503def : InstRW<[A64FXWrite_MINMAXV], 1504 (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>; 1505def : InstRW<[A64FXWrite_ABA], 1506 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 1507def : InstRW<[A64FXWrite_4Cyc_GI03], 1508 (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>; 1509def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>; 1510def : InstRW<[A64FXWrite_SHRN], 1511 (instregex "^ADDHNv", "^SUBHNv")>; 1512def : InstRW<[A64FXWrite_RSHRN], 1513 (instregex "^RADDHNv", "^RSUBHNv")>; 1514def : InstRW<[A64FXWrite_4Cyc_GI03], 1515 (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", 1516 "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB", 1517 "^URHADD", "^USQADD")>; 1518 1519def : InstRW<[A64FXWrite_4Cyc_GI03], 1520 (instregex "^CMEQv", "^CMGEv", "^CMGTv", 1521 "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>; 1522def : InstRW<[A64FXWrite_MINMAXV], 1523 (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1524def : InstRW<[A64FXWrite_ADDP], 1525 (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1526def : InstRW<[A64FXWrite_4Cyc_GI03], 1527 (instregex "^SABDv", "^UABDv")>; 1528def : InstRW<[A64FXWrite_TBX1], 1529 (instregex "^SABDLv", "^UABDLv")>; 1530 1531//--- 1532// 3.13 ASIMD Floating-point Instructions 1533//--- 1534 1535def : WriteRes<WriteFMul, [A64FXGI03]> { 1536 let Latency = 9; 1537} 1538 1539// ASIMD FP absolute value 1540def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>; 1541 1542// ASIMD FP arith, normal, D-form 1543// ASIMD FP arith, normal, Q-form 1544def : InstRW<[A64FXWrite_9Cyc_GI03], 1545 (instregex "^FABDv", "^FADDv", "^FSUBv")>; 1546 1547// ASIMD FP arith, pairwise, D-form 1548// ASIMD FP arith, pairwise, Q-form 1549def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>; 1550 1551// ASIMD FP compare, D-form 1552// ASIMD FP compare, Q-form 1553def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>; 1554def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv", 1555 "^FCMGTv", "^FCMLEv", 1556 "^FCMLTv")>; 1557// ASIMD FP round, D-form 1558def : InstRW<[A64FXWrite_9Cyc_GI03], 1559 (instregex "^FRINT[AIMNPXZ](v2f32)")>; 1560// ASIMD FP round, Q-form 1561def : InstRW<[A64FXWrite_9Cyc_GI03], 1562 (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; 1563 1564// ASIMD FP convert, long 1565// ASIMD FP convert, narrow 1566// ASIMD FP convert, other, D-form 1567// ASIMD FP convert, other, Q-form 1568 1569// ASIMD FP convert, long and narrow 1570def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>; 1571// ASIMD FP convert, other, D-form 1572def : InstRW<[A64FXWrite_FCVTXNV], 1573 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; 1574// ASIMD FP convert, other, Q-form 1575def : InstRW<[A64FXWrite_FCVTXNV], 1576 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; 1577 1578// ASIMD FP divide, D-form, F32 1579def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>; 1580def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>; 1581 1582// ASIMD FP divide, Q-form, F32 1583def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>; 1584def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>; 1585 1586// ASIMD FP divide, Q-form, F64 1587def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>; 1588def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>; 1589 1590// ASIMD FP max/min, normal, D-form 1591// ASIMD FP max/min, normal, Q-form 1592def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv", 1593 "^FMINv", "^FMINNMv")>; 1594 1595// ASIMD FP max/min, pairwise, D-form 1596// ASIMD FP max/min, pairwise, Q-form 1597def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv", 1598 "^FMINPv", "^FMINNMPv")>; 1599 1600// ASIMD FP max/min, reduce 1601def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv", 1602 "^FMINVv", "^FMINNMVv")>; 1603 1604// ASIMD FP multiply, D-form, FZ 1605// ASIMD FP multiply, D-form, no FZ 1606// ASIMD FP multiply, Q-form, FZ 1607// ASIMD FP multiply, Q-form, no FZ 1608def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>; 1609def : InstRW<[A64FXWrite_FMULXE], 1610 (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; 1611def : InstRW<[A64FXWrite_FMULXE], 1612 (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; 1613 1614// ASIMD FP multiply accumulate, Dform, FZ 1615// ASIMD FP multiply accumulate, Dform, no FZ 1616// ASIMD FP multiply accumulate, Qform, FZ 1617// ASIMD FP multiply accumulate, Qform, no FZ 1618def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>; 1619def : InstRW<[A64FXWrite_FMULXE], 1620 (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; 1621def : InstRW<[A64FXWrite_FMULXE], 1622 (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; 1623 1624// ASIMD FP negate 1625def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>; 1626 1627//-- 1628// 3.14 ASIMD Miscellaneous Instructions 1629//-- 1630 1631// ASIMD bit reverse 1632def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>; 1633 1634// ASIMD bitwise insert, D-form 1635// ASIMD bitwise insert, Q-form 1636def : InstRW<[A64FXWrite_BIF], 1637 (instregex "^BIFv", "^BITv", "^BSLv")>; 1638 1639// ASIMD count, D-form 1640// ASIMD count, Q-form 1641def : InstRW<[A64FXWrite_4Cyc_GI0], 1642 (instregex "^CLSv", "^CLZv", "^CNTv")>; 1643 1644// ASIMD duplicate, gen reg 1645// ASIMD duplicate, element 1646def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; 1647def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>; 1648def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; 1649 1650// ASIMD extract 1651def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>; 1652 1653// ASIMD extract narrow 1654def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>; 1655 1656// ASIMD extract narrow, saturating 1657def : InstRW<[A64FXWrite_6Cyc_GI3], 1658 (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; 1659 1660// ASIMD insert, element to element 1661def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1662 1663// ASIMD transfer, element to gen reg 1664def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1665 1666// ASIMD move, integer immed 1667def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>; 1668 1669// ASIMD move, FP immed 1670def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>; 1671 1672// ASIMD table lookup, D-form 1673def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>; 1674def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>; 1675def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>; 1676def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>; 1677def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>; 1678def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>; 1679def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>; 1680def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>; 1681 1682// ASIMD table lookup, Q-form 1683def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>; 1684def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>; 1685def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>; 1686def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>; 1687def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>; 1688def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>; 1689def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>; 1690def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>; 1691 1692// ASIMD unzip/zip 1693def : InstRW<[A64FXWrite_6Cyc_GI0], 1694 (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; 1695 1696// ASIMD reciprocal estimate, D-form 1697// ASIMD reciprocal estimate, Q-form 1698def : InstRW<[A64FXWrite_4Cyc_GI03], 1699 (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", 1700 "^FRSQRTEv", "^URSQRTEv")>; 1701 1702// ASIMD reciprocal step, D-form, FZ 1703// ASIMD reciprocal step, D-form, no FZ 1704// ASIMD reciprocal step, Q-form, FZ 1705// ASIMD reciprocal step, Q-form, no FZ 1706def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>; 1707 1708// ASIMD reverse 1709def : InstRW<[A64FXWrite_4Cyc_GI03], 1710 (instregex "^REV16v", "^REV32v", "^REV64v")>; 1711 1712// ASIMD table lookup, D-form 1713// ASIMD table lookup, Q-form 1714def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>; 1715 1716// ASIMD transfer, element to word or word 1717def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1718 1719// ASIMD transfer, element to gen reg 1720def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>; 1721 1722// ASIMD transfer gen reg to element 1723def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1724 1725// ASIMD transpose 1726def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v", 1727 "^UZP1v", "^UZP2v")>; 1728 1729// ASIMD unzip/zip 1730def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>; 1731 1732//-- 1733// 3.15 ASIMD Load Instructions 1734//-- 1735 1736// ASIMD load, 1 element, multiple, 1 reg, D-form 1737// ASIMD load, 1 element, multiple, 1 reg, Q-form 1738def : InstRW<[A64FXWrite_8Cyc_GI56], 1739 (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>; 1740def : InstRW<[A64FXWrite_11Cyc_GI56], 1741 (instregex "^LD1Onev(16b|8h|4s)$")>; 1742def : InstRW<[A64FXWrite_LD108, WriteAdr], 1743 (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>; 1744def : InstRW<[A64FXWrite_LD109, WriteAdr], 1745 (instregex "^LD1Onev(16b|8h|4s)_POST$")>; 1746 1747// ASIMD load, 1 element, multiple, 2 reg, D-form 1748// ASIMD load, 1 element, multiple, 2 reg, Q-form 1749def : InstRW<[A64FXWrite_LD102], 1750 (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>; 1751def : InstRW<[A64FXWrite_LD103], 1752 (instregex "^LD1Twov(16b|8h|4s)$")>; 1753def : InstRW<[A64FXWrite_LD110, WriteAdr], 1754 (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>; 1755def : InstRW<[A64FXWrite_LD111, WriteAdr], 1756 (instregex "^LD1Twov(16b|8h|4s)_POST$")>; 1757 1758// ASIMD load, 1 element, multiple, 3 reg, D-form 1759// ASIMD load, 1 element, multiple, 3 reg, Q-form 1760def : InstRW<[A64FXWrite_LD104], 1761 (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>; 1762def : InstRW<[A64FXWrite_LD105], 1763 (instregex "^LD1Threev(16b|8h|4s)$")>; 1764def : InstRW<[A64FXWrite_LD112, WriteAdr], 1765 (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>; 1766def : InstRW<[A64FXWrite_LD113, WriteAdr], 1767 (instregex "^LD1Threev(16b|8h|4s)_POST$")>; 1768 1769// ASIMD load, 1 element, multiple, 4 reg, D-form 1770// ASIMD load, 1 element, multiple, 4 reg, Q-form 1771def : InstRW<[A64FXWrite_LD106], 1772 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>; 1773def : InstRW<[A64FXWrite_LD107], 1774 (instregex "^LD1Fourv(16b|8h|4s)$")>; 1775def : InstRW<[A64FXWrite_LD114, WriteAdr], 1776 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>; 1777def : InstRW<[A64FXWrite_LD115, WriteAdr], 1778 (instregex "^LD1Fourv(16b|8h|4s)_POST$")>; 1779 1780// ASIMD load, 1 element, one lane, B/H/S 1781// ASIMD load, 1 element, one lane, D 1782def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>; 1783def : InstRW<[A64FXWrite_LD1I1, WriteAdr], 1784 (instregex "^LD1i(8|16|32|64)_POST$")>; 1785 1786// ASIMD load, 1 element, all lanes, D-form, B/H/S 1787// ASIMD load, 1 element, all lanes, D-form, D 1788// ASIMD load, 1 element, all lanes, Q-form 1789def : InstRW<[A64FXWrite_8Cyc_GI03], 1790 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1791def : InstRW<[A64FXWrite_LD108, WriteAdr], 1792 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1793 1794// ASIMD load, 2 element, multiple, D-form, B/H/S 1795// ASIMD load, 2 element, multiple, Q-form, D 1796def : InstRW<[A64FXWrite_LD103], 1797 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1798def : InstRW<[A64FXWrite_LD111, WriteAdr], 1799 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1800 1801// ASIMD load, 2 element, one lane, B/H 1802// ASIMD load, 2 element, one lane, S 1803// ASIMD load, 2 element, one lane, D 1804def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>; 1805def : InstRW<[A64FXWrite_LD2I1, WriteAdr], 1806 (instregex "^LD2i(8|16|32|64)_POST$")>; 1807 1808// ASIMD load, 2 element, all lanes, D-form, B/H/S 1809// ASIMD load, 2 element, all lanes, D-form, D 1810// ASIMD load, 2 element, all lanes, Q-form 1811def : InstRW<[A64FXWrite_LD102], 1812 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1813def : InstRW<[A64FXWrite_LD110, WriteAdr], 1814 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1815 1816// ASIMD load, 3 element, multiple, D-form, B/H/S 1817// ASIMD load, 3 element, multiple, Q-form, B/H/S 1818// ASIMD load, 3 element, multiple, Q-form, D 1819def : InstRW<[A64FXWrite_LD105], 1820 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1821def : InstRW<[A64FXWrite_LD113, WriteAdr], 1822 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1823 1824// ASIMD load, 3 element, one lone, B/H 1825// ASIMD load, 3 element, one lane, S 1826// ASIMD load, 3 element, one lane, D 1827def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>; 1828def : InstRW<[A64FXWrite_LD3I1, WriteAdr], 1829 (instregex "^LD3i(8|16|32|64)_POST$")>; 1830 1831// ASIMD load, 3 element, all lanes, D-form, B/H/S 1832// ASIMD load, 3 element, all lanes, D-form, D 1833// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1834// ASIMD load, 3 element, all lanes, Q-form, D 1835def : InstRW<[A64FXWrite_LD104], 1836 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1837def : InstRW<[A64FXWrite_LD112, WriteAdr], 1838 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1839 1840// ASIMD load, 4 element, multiple, D-form, B/H/S 1841// ASIMD load, 4 element, multiple, Q-form, B/H/S 1842// ASIMD load, 4 element, multiple, Q-form, D 1843def : InstRW<[A64FXWrite_LD107], 1844 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1845def : InstRW<[A64FXWrite_LD115, WriteAdr], 1846 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1847 1848// ASIMD load, 4 element, one lane, B/H 1849// ASIMD load, 4 element, one lane, S 1850// ASIMD load, 4 element, one lane, D 1851def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>; 1852def : InstRW<[A64FXWrite_LD4I1, WriteAdr], 1853 (instregex "^LD4i(8|16|32|64)_POST$")>; 1854 1855// ASIMD load, 4 element, all lanes, D-form, B/H/S 1856// ASIMD load, 4 element, all lanes, D-form, D 1857// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1858// ASIMD load, 4 element, all lanes, Q-form, D 1859def : InstRW<[A64FXWrite_LD106], 1860 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1861def : InstRW<[A64FXWrite_LD114, WriteAdr], 1862 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1863 1864//-- 1865// 3.16 ASIMD Store Instructions 1866//-- 1867 1868// ASIMD store, 1 element, multiple, 1 reg, D-form 1869// ASIMD store, 1 element, multiple, 1 reg, Q-form 1870def : InstRW<[A64FXWrite_ST10], 1871 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1872def : InstRW<[A64FXWrite_ST14, WriteAdr], 1873 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1874 1875// ASIMD store, 1 element, multiple, 2 reg, D-form 1876// ASIMD store, 1 element, multiple, 2 reg, Q-form 1877def : InstRW<[A64FXWrite_ST11], 1878 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1879def : InstRW<[A64FXWrite_ST15, WriteAdr], 1880 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1881 1882// ASIMD store, 1 element, multiple, 3 reg, D-form 1883// ASIMD store, 1 element, multiple, 3 reg, Q-form 1884def : InstRW<[A64FXWrite_ST12], 1885 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1886def : InstRW<[A64FXWrite_ST16, WriteAdr], 1887 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1888 1889// ASIMD store, 1 element, multiple, 4 reg, D-form 1890// ASIMD store, 1 element, multiple, 4 reg, Q-form 1891def : InstRW<[A64FXWrite_ST13], 1892 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1893def : InstRW<[A64FXWrite_ST17, WriteAdr], 1894 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1895 1896// ASIMD store, 1 element, one lane, B/H/S 1897// ASIMD store, 1 element, one lane, D 1898def : InstRW<[A64FXWrite_ST10], 1899 (instregex "^ST1i(8|16|32|64)$")>; 1900def : InstRW<[A64FXWrite_ST14, WriteAdr], 1901 (instregex "^ST1i(8|16|32|64)_POST$")>; 1902 1903// ASIMD store, 2 element, multiple, D-form, B/H/S 1904// ASIMD store, 2 element, multiple, Q-form, B/H/S 1905// ASIMD store, 2 element, multiple, Q-form, D 1906def : InstRW<[A64FXWrite_ST11], 1907 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1908def : InstRW<[A64FXWrite_ST15, WriteAdr], 1909 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1910 1911// ASIMD store, 2 element, one lane, B/H/S 1912// ASIMD store, 2 element, one lane, D 1913def : InstRW<[A64FXWrite_ST11], 1914 (instregex "^ST2i(8|16|32|64)$")>; 1915def : InstRW<[A64FXWrite_ST15, WriteAdr], 1916 (instregex "^ST2i(8|16|32|64)_POST$")>; 1917 1918// ASIMD store, 3 element, multiple, D-form, B/H/S 1919// ASIMD store, 3 element, multiple, Q-form, B/H/S 1920// ASIMD store, 3 element, multiple, Q-form, D 1921def : InstRW<[A64FXWrite_ST12], 1922 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1923def : InstRW<[A64FXWrite_ST16, WriteAdr], 1924 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1925 1926// ASIMD store, 3 element, one lane, B/H 1927// ASIMD store, 3 element, one lane, S 1928// ASIMD store, 3 element, one lane, D 1929def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>; 1930def : InstRW<[A64FXWrite_ST16, WriteAdr], 1931 (instregex "^ST3i(8|16|32|64)_POST$")>; 1932 1933// ASIMD store, 4 element, multiple, D-form, B/H/S 1934// ASIMD store, 4 element, multiple, Q-form, B/H/S 1935// ASIMD store, 4 element, multiple, Q-form, D 1936def : InstRW<[A64FXWrite_ST13], 1937 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1938def : InstRW<[A64FXWrite_ST17, WriteAdr], 1939 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1940 1941// ASIMD store, 4 element, one lane, B/H 1942// ASIMD store, 4 element, one lane, S 1943// ASIMD store, 4 element, one lane, D 1944def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>; 1945def : InstRW<[A64FXWrite_ST17, WriteAdr], 1946 (instregex "^ST4i(8|16|32|64)_POST$")>; 1947 1948// V8.1a Atomics (LSE) 1949def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1950 (instrs CASB, CASH, CASW, CASX)>; 1951 1952def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1953 (instrs CASAB, CASAH, CASAW, CASAX)>; 1954 1955def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1956 (instrs CASLB, CASLH, CASLW, CASLX)>; 1957 1958def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1959 (instrs CASALB, CASALH, CASALW, CASALX)>; 1960 1961def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1962 (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; 1963 1964def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1965 (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; 1966 1967def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1968 (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; 1969 1970def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1971 (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; 1972 1973def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1974 (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; 1975 1976def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1977 (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; 1978 1979def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1980 (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; 1981 1982def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1983 (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; 1984 1985def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1986 (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; 1987 1988def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1989 (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; 1990 1991def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1992 (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; 1993 1994def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1995 (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; 1996 1997def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1998 (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; 1999 2000def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2001 (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; 2002 2003def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2004 (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; 2005 2006def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2007 (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; 2008 2009def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2010 (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; 2011 2012def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2013 (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, 2014 LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, 2015 LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, 2016 LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; 2017 2018def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2019 (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, 2020 LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, 2021 LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, 2022 LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; 2023 2024def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2025 (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, 2026 LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, 2027 LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, 2028 LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; 2029 2030def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2031 (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, 2032 LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, 2033 LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, 2034 LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; 2035 2036def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2037 (instrs SWPB, SWPH, SWPW, SWPX)>; 2038 2039def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2040 (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; 2041 2042def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2043 (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; 2044 2045def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2046 (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; 2047 2048def : InstRW<[A64FXWrite_STUR, WriteAtomic], 2049 (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; 2050 2051// SVE instructions 2052 2053// The modeling method for SVE instructions is more accurate than others. 2054// TODO: modify the model of other instructions similarly. 2055 2056def : InstRW<[A64FXWrite_4Cyc_GI0], 2057 (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z", 2058 "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P", 2059 "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI", 2060 "^SUBR?_ZI")>; 2061 2062def : InstRW<[A64FXWrite_6Cyc_GI0], 2063 (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR", 2064 "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z", 2065 "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>; 2066 2067def : InstRW<[A64FXWrite_9Cyc_GI0], 2068 (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z", 2069 "^INDEX_II_[SD]", "^MUL_ZI")>; 2070 2071def : InstRW<[A64FXWrite_4Cyc_GI3], 2072 (instregex "^CNT_Z")>; 2073 2074def : InstRW<[A64FXWrite_4Cyc_GI03], 2075 (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z", 2076 "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z", 2077 "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z", 2078 "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_", 2079 "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z", 2080 "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z", 2081 "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z", 2082 "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>; 2083 2084def : InstRW<[A64FXWrite_9Cyc_GI03 ], 2085 (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP", 2086 "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z", 2087 "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z", 2088 "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP", 2089 "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>; 2090 2091def : InstRW<[A64FXWrite_3Cyc_GI1], 2092 (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P", 2093 "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT", 2094 "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)", 2095 "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>; 2096 2097def : InstRW<[A64FXWrite_1Cyc_GI24], 2098 (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X", 2099 "^RDVLI")>; 2100 2101def : InstRW<[A64FXWrite_11Cyc_GI5], 2102 (instregex "^LDR_[PZ]XI")>; 2103 2104def : InstRW<[A64FXWrite_11Cyc_GI56], 2105 (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>; 2106 2107def A64FXWrite_None : SchedWriteRes<[]> { 2108} 2109def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>; 2110 2111def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> { 2112 let Latency = 15; 2113 let NumMicroOps = 2; 2114 let ReleaseAtCycles = [2]; 2115} 2116def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>; 2117 2118def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> { 2119 let Latency = 5; 2120 let NumMicroOps = 2; 2121 let ReleaseAtCycles = [2]; 2122} 2123def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>; 2124 2125def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> { 2126 let Latency = 8; 2127 let NumMicroOps = 2; 2128} 2129def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>; 2130 2131def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> { 2132 let Latency = 46; 2133 let NumMicroOps = 10; 2134 let ReleaseAtCycles = [10]; 2135} 2136def : InstRW<[A64FXWrite_Reduction4CycB], 2137 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>; 2138 2139def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> { 2140 let Latency = 42; 2141 let NumMicroOps = 9; 2142 let ReleaseAtCycles = [9]; 2143} 2144def : InstRW<[A64FXWrite_Reduction4CycH], 2145 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>; 2146 2147def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> { 2148 let Latency = 38; 2149 let NumMicroOps = 8; 2150 let ReleaseAtCycles = [8]; 2151} 2152def : InstRW<[A64FXWrite_Reduction4CycS], 2153 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>; 2154 2155def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> { 2156 let Latency = 34; 2157 let NumMicroOps = 7; 2158 let ReleaseAtCycles = [7]; 2159} 2160def : InstRW<[A64FXWrite_Reduction4CycD], 2161 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>; 2162 2163def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2164 let Latency = 29; 2165} 2166def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>; 2167 2168def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2169 let Latency = 4; 2170} 2171def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>; 2172 2173def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> { 2174 let Latency = 6; 2175} 2176def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>; 2177 2178def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2179 let Latency = 8; 2180} 2181def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>; 2182 2183def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> { 2184 let Latency = 2; 2185 let ReleaseAtCycles = [2]; 2186} 2187def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>; 2188 2189def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> { 2190 let Latency = 7; 2191 let NumMicroOps = 2; 2192} 2193def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>; 2194 2195def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2196 let Latency = 12; 2197} 2198def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>; 2199 2200def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> { 2201 let Latency = 75; 2202 let NumMicroOps = 11; 2203 let ReleaseAtCycles = [11]; 2204} 2205def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>; 2206 2207def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> { 2208 let Latency = 60; 2209 let NumMicroOps = 9; 2210 let ReleaseAtCycles = [9]; 2211} 2212def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>; 2213 2214def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> { 2215 let Latency = 45; 2216 let NumMicroOps = 7; 2217 let ReleaseAtCycles = [7]; 2218} 2219def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>; 2220 2221def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> { 2222 let Latency = 468; 2223 let NumMicroOps = 63; 2224 let ReleaseAtCycles = [63]; 2225} 2226def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>; 2227 2228def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> { 2229 let Latency = 228; 2230 let NumMicroOps = 31; 2231 let ReleaseAtCycles = [31]; 2232} 2233def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>; 2234 2235def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> { 2236 let Latency = 108; 2237 let NumMicroOps = 15; 2238 let ReleaseAtCycles = [15]; 2239} 2240def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>; 2241 2242def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2243 let Latency = 15; 2244 let NumMicroOps = 2; 2245} 2246def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>; 2247 2248def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> { 2249 let Latency = 15; 2250 let NumMicroOps = 3; 2251 let ReleaseAtCycles = [3]; 2252} 2253def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>; 2254 2255def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> { 2256 let Latency = 134; 2257 let ReleaseAtCycles = [134]; 2258} 2259def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>; 2260 2261def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> { 2262 let Latency = 98; 2263 let ReleaseAtCycles = [98]; 2264} 2265def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>; 2266 2267def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> { 2268 let Latency = 154; 2269 let ReleaseAtCycles = [154]; 2270} 2271def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>; 2272 2273def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> { 2274 let Latency = 54; 2275 let NumMicroOps = 11; 2276 let ReleaseAtCycles = [11]; 2277} 2278def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>; 2279 2280def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> { 2281 let Latency = 44; 2282 let NumMicroOps = 9; 2283 let ReleaseAtCycles = [9]; 2284} 2285def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>; 2286 2287def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> { 2288 let Latency = 34; 2289 let NumMicroOps = 7; 2290 let ReleaseAtCycles = [7]; 2291} 2292def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>; 2293 2294def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2295 let Latency = 17; 2296 let NumMicroOps = 2; 2297 let ReleaseAtCycles = [2, 2]; 2298} 2299def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>; 2300 2301def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2302 let Latency = 13; 2303 let NumMicroOps = 1; 2304} 2305def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>; 2306 2307def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> { 2308 let Latency = 13; 2309 let NumMicroOps = 2; 2310 let ReleaseAtCycles = [2]; 2311} 2312def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>; 2313 2314def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> { 2315 let Latency = 17; 2316 let NumMicroOps = 3; 2317 let ReleaseAtCycles = [2, 2, 1]; 2318} 2319def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>; 2320 2321def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2322 let Latency = 17; 2323 let NumMicroOps = 2; 2324 let ReleaseAtCycles = [2, 1]; 2325} 2326def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>; 2327 2328def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2329 let Latency = 10; 2330} 2331def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>; 2332 2333def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2334 let Latency = 25; 2335} 2336def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>; 2337 2338def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2339 let Latency = 19; 2340 let ReleaseAtCycles = [2, 4, 4]; 2341} 2342def : InstRW<[A64FXWrite_GLD_S_ZI], 2343 (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>; 2344 2345def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2346 let Latency = 16; 2347 let ReleaseAtCycles = [1, 2, 2]; 2348} 2349def : InstRW<[A64FXWrite_GLD_D_ZI], 2350 (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>; 2351 2352def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2353 let Latency = 23; 2354 let ReleaseAtCycles = [2, 1, 4, 4]; 2355} 2356def : InstRW<[A64FXWrite_GLD_S_RZ], 2357 (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>; 2358 2359def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2360 let Latency = 20; 2361 let ReleaseAtCycles = [1, 1, 2, 2]; 2362} 2363def : InstRW<[A64FXWrite_GLD_D_RZ], 2364 (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]", 2365 "^GLD(FF)?1S?[BHW]_D$")>; 2366 2367def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> { 2368 let Latency = 15; 2369 let NumMicroOps = 3; 2370 let ReleaseAtCycles = [9]; 2371} 2372def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>; 2373 2374def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2375 let Latency = 11; 2376 let NumMicroOps = 2; 2377 let ReleaseAtCycles = [2]; 2378} 2379def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>; 2380 2381def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> { 2382 let Latency = 12; 2383 let NumMicroOps = 3; 2384 let ReleaseAtCycles = [3]; 2385} 2386def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>; 2387 2388def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> { 2389 let Latency = 15; 2390 let NumMicroOps = 4; 2391 let ReleaseAtCycles = [13]; 2392} 2393def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>; 2394 2395def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2396 let Latency = 11; 2397 let NumMicroOps = 3; 2398 let ReleaseAtCycles = [3]; 2399} 2400def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>; 2401 2402def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> { 2403 let Latency = 12; 2404 let NumMicroOps = 4; 2405 let ReleaseAtCycles = [4]; 2406} 2407def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>; 2408 2409def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> { 2410 let Latency = 15; 2411 let NumMicroOps = 5; 2412 let ReleaseAtCycles = [17]; 2413} 2414def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>; 2415 2416def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2417 let Latency = 11; 2418 let NumMicroOps = 4; 2419 let ReleaseAtCycles = [4]; 2420} 2421def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>; 2422 2423def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> { 2424 let Latency = 12; 2425 let NumMicroOps = 5; 2426 let ReleaseAtCycles = [5]; 2427} 2428def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>; 2429 2430def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> { 2431} 2432def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>; 2433 2434def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2435 let ReleaseAtCycles = [2, 1, 4]; 2436} 2437def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>; 2438 2439def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2440 let ReleaseAtCycles = [2, 4]; 2441} 2442def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>; 2443 2444def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2445 let ReleaseAtCycles = [1, 1, 2]; 2446} 2447def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>; 2448 2449def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2450 let ReleaseAtCycles = [1, 2]; 2451} 2452def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>; 2453 2454def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> { 2455 let Latency = 114; 2456 let ReleaseAtCycles = [114]; 2457} 2458def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>; 2459 2460def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> { 2461 let Latency = 178; 2462 let ReleaseAtCycles = [178]; 2463} 2464def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>; 2465 2466def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2467 let Latency = 15; 2468 let NumMicroOps = 2; 2469} 2470def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>; 2471 2472def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> { 2473 let Latency = 2; 2474 let ReleaseAtCycles = [2]; 2475} 2476def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>; 2477 2478def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2479 let Latency = 6; 2480 let NumMicroOps = 2; 2481 let ReleaseAtCycles = [3, 1]; 2482} 2483def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>; 2484 2485def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2486 let Latency = 12; 2487} 2488def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>; 2489 2490def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2491 let Latency = 11; 2492} 2493def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>; 2494 2495def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2496 let Latency = 20; 2497 let NumMicroOps = 8; 2498 let ReleaseAtCycles = [8, 8, 8, 8]; 2499} 2500def : InstRW<[A64FXWrite_SST1_W_RZ], 2501 (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>; 2502 2503def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2504 let Latency = 20; 2505 let NumMicroOps = 4; 2506 let ReleaseAtCycles = [4, 4, 4, 4]; 2507} 2508def : InstRW<[A64FXWrite_SST1_D_RZ], 2509 (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>; 2510 2511def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2512 let Latency = 16; 2513 let NumMicroOps = 8; 2514 let ReleaseAtCycles = [12, 8, 8]; 2515} 2516def : InstRW<[A64FXWrite_SST1_W_ZI], 2517 (instregex "^SST1[BH]_S_I", "^SST1W_I")>; 2518 2519def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2520 let Latency = 16; 2521 let NumMicroOps = 4; 2522 let ReleaseAtCycles = [4, 4, 4]; 2523} 2524def : InstRW<[A64FXWrite_SST1_D_ZI], 2525 (instregex "^SST1[BHW]_D_I", "^SST1D_I")>; 2526 2527def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2528 let Latency = 12; 2529 let NumMicroOps = 3; 2530 let ReleaseAtCycles = [8, 9]; 2531} 2532def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>; 2533 2534def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2535 let Latency = 11; 2536 let NumMicroOps = 2; 2537 let ReleaseAtCycles = [2, 2]; 2538} 2539def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>; 2540 2541def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2542 let Latency = 12; 2543 let NumMicroOps = 3; 2544 let ReleaseAtCycles = [2, 3]; 2545} 2546def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>; 2547 2548def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2549 let Latency = 15; 2550 let NumMicroOps = 4; 2551 let ReleaseAtCycles = [12, 13]; 2552} 2553def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>; 2554 2555def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2556 let Latency = 11; 2557 let NumMicroOps = 3; 2558 let ReleaseAtCycles = [3, 3]; 2559} 2560def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>; 2561 2562def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2563 let Latency = 12; 2564 let NumMicroOps = 4; 2565 let ReleaseAtCycles = [3, 4]; 2566} 2567def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>; 2568 2569def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2570 let Latency = 15; 2571 let NumMicroOps = 5; 2572 let ReleaseAtCycles = [16, 17]; 2573} 2574def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>; 2575 2576def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2577 let Latency = 11; 2578 let NumMicroOps = 4; 2579 let ReleaseAtCycles = [4, 4]; 2580} 2581def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>; 2582 2583def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2584 let Latency = 12; 2585 let NumMicroOps = 5; 2586 let ReleaseAtCycles = [4, 5]; 2587} 2588def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>; 2589 2590def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2591 let Latency = 11; 2592} 2593def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>; 2594 2595def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> { 2596 let Latency = 11; 2597} 2598def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>; 2599 2600def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2601 let Latency = 4; 2602} 2603def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>; 2604 2605def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2606 let Latency = 3; 2607 let NumMicroOps = 2; 2608} 2609def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>; 2610 2611} // SchedModel = A64FXModel 2612