1//=- AArch64SchedA64FX.td - Fujitsu A64FX Scheduling Defs -*- tablegen -*-=// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the scheduling model for the Fujitsu A64FX processors. 10// 11//===----------------------------------------------------------------------===// 12 13def A64FXModel : SchedMachineModel { 14 let IssueWidth = 6; // 6 micro-ops dispatched at a time. 15 let MicroOpBufferSize = 180; // 180 entries in micro-op re-order buffer. 16 let LoadLatency = 5; // Optimistic load latency. 17 let MispredictPenalty = 12; // Extra cycles for mispredicted branch. 18 // Determined via a mix of micro-arch details and experimentation. 19 let LoopMicroOpBufferSize = 128; 20 let PostRAScheduler = 1; // Using PostRA sched. 21 let CompleteModel = 1; 22 23 list<Predicate> UnsupportedFeatures = !listconcat(SMEUnsupported.F, SVEUnsupported.F, 24 [HasMTE, HasMatMulInt8, HasBF16, 25 HasPAuth, HasPAuthLR, HasCPA, 26 HasCSSC]); 27 let FullInstRWOverlapCheck = 0; 28} 29 30let SchedModel = A64FXModel in { 31 32// Define the issue ports. 33 34// A64FXIP* 35 36// Port 0 37def A64FXIPFLA : ProcResource<1>; 38 39// Port 1 40def A64FXIPPR : ProcResource<1>; 41 42// Port 2 43def A64FXIPEXA : ProcResource<1>; 44 45// Port 3 46def A64FXIPFLB : ProcResource<1>; 47 48// Port 4 49def A64FXIPEXB : ProcResource<1>; 50 51// Port 5 52def A64FXIPEAGA : ProcResource<1>; 53 54// Port 6 55def A64FXIPEAGB : ProcResource<1>; 56 57// Port 7 58def A64FXIPBR : ProcResource<1>; 59 60// Define groups for the functional units on each issue port. Each group 61// created will be used by a WriteRes later on. 62 63def A64FXGI7 : ProcResGroup<[A64FXIPBR]>; 64 65def A64FXGI0 : ProcResGroup<[A64FXIPFLA]>; 66 67def A64FXGI1 : ProcResGroup<[A64FXIPPR]>; 68 69def A64FXGI2 : ProcResGroup<[A64FXIPEXA]>; 70 71def A64FXGI3 : ProcResGroup<[A64FXIPFLB]>; 72 73def A64FXGI4 : ProcResGroup<[A64FXIPEXB]>; 74 75def A64FXGI5 : ProcResGroup<[A64FXIPEAGA]>; 76 77def A64FXGI6 : ProcResGroup<[A64FXIPEAGB]>; 78 79def A64FXGI03 : ProcResGroup<[A64FXIPFLA, A64FXIPFLB]>; 80 81def A64FXGI01 : ProcResGroup<[A64FXIPFLA, A64FXIPPR]>; 82 83def A64FXGI24 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB]>; 84 85def A64FXGI56 : ProcResGroup<[A64FXIPEAGA, A64FXIPEAGB]>; 86 87def A64FXGI056 : ProcResGroup<[A64FXIPFLA, A64FXIPEAGA, A64FXIPEAGB]>; 88 89def A64FXGI2456 : ProcResGroup<[A64FXIPEXA, A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB]>; 90 91def A64FXAny : ProcResGroup<[A64FXIPFLA, A64FXIPPR, A64FXIPEXA, A64FXIPFLB, 92 A64FXIPEXB, A64FXIPEAGA, A64FXIPEAGB, A64FXIPBR]>; 93 94def A64FXWrite_1Cyc_GI7 : SchedWriteRes<[A64FXGI7]> { 95 let Latency = 1; 96} 97 98def A64FXWrite_2Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 99 let Latency = 2; 100} 101 102def A64FXWrite_4Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 103 let Latency = 4; 104} 105 106def A64FXWrite_6Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 107 let Latency = 6; 108} 109 110def A64FXWrite_8Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 111 let Latency = 8; 112} 113 114def A64FXWrite_9Cyc_GI0 : SchedWriteRes<[A64FXGI0]> { 115 let Latency = 9; 116} 117 118def A64FXWrite_3Cyc_GI1 : SchedWriteRes<[A64FXGI1]> { 119 let Latency = 3; 120} 121 122def A64FXWrite_5Cyc_GI2 : SchedWriteRes<[A64FXGI2]> { 123 let Latency = 5; 124} 125 126def A64FXWrite_4Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 127 let Latency = 4; 128} 129 130def A64FXWrite_6Cyc_GI3 : SchedWriteRes<[A64FXGI3]> { 131 let Latency = 6; 132} 133 134def A64FXWrite_4Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 135 let Latency = 4; 136} 137 138def A64FXWrite_8Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 139 let Latency = 8; 140} 141 142def A64FXWrite_9Cyc_GI03 : SchedWriteRes<[A64FXGI03]> { 143 let Latency = 9; 144} 145 146def A64FXWrite_10Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 147 let Latency = 10; 148} 149 150def A64FXWrite_12Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 151 let Latency = 12; 152} 153 154def A64FXWrite_20Cyc_GI4 : SchedWriteRes<[A64FXGI4]> { 155 let Latency = 20; 156} 157 158def A64FXWrite_5Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 159 let Latency = 5; 160} 161 162def A64FXWrite_11Cyc_GI5 : SchedWriteRes<[A64FXGI5]> { 163 let Latency = 11; 164} 165 166def A64FXWrite_5Cyc_GI6 : SchedWriteRes<[A64FXGI6]> { 167 let Latency = 5; 168} 169 170def A64FXWrite_1Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 171 let Latency = 1; 172} 173 174def A64FXWrite_2Cyc_GI24 : SchedWriteRes<[A64FXGI24]> { 175 let Latency = 2; 176} 177 178def A64FXWrite_4Cyc_NGI24 : SchedWriteRes<[A64FXGI24]> { 179 let Latency = 4; 180 let NumMicroOps = 4; 181} 182 183def A64FXWrite_1Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 184 let Latency = 1; 185} 186 187def A64FXWrite_5Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 188 let Latency = 5; 189} 190 191def A64FXWrite_8Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 192 let Latency = 8; 193} 194 195def A64FXWrite_11Cyc_GI56 : SchedWriteRes<[A64FXGI56]> { 196 let Latency = 11; 197} 198 199def A64FXWrite_LDNP: SchedWriteRes<[A64FXGI56]> { 200 let Latency = 5; 201 let NumMicroOps = 2; 202} 203 204def A64FXWrite_LDP01: SchedWriteRes<[A64FXGI2456]> { 205 let Latency = 5; 206 let NumMicroOps = 3; 207} 208 209def A64FXWrite_LDR01: SchedWriteRes<[A64FXGI2456]> { 210 let Latency = 5; 211 let NumMicroOps = 2; 212} 213 214def A64FXWrite_LD102: SchedWriteRes<[A64FXGI56]> { 215 let Latency = 8; 216 let NumMicroOps = 2; 217} 218 219def A64FXWrite_LD103: SchedWriteRes<[A64FXGI56]> { 220 let Latency = 11; 221 let NumMicroOps = 2; 222 223} 224 225def A64FXWrite_LD104: SchedWriteRes<[A64FXGI56]> { 226 let Latency = 8; 227 let NumMicroOps = 3; 228} 229 230def A64FXWrite_LD105: SchedWriteRes<[A64FXGI56]> { 231 let Latency = 11; 232 let NumMicroOps = 3; 233} 234 235def A64FXWrite_LD106: SchedWriteRes<[A64FXGI56]> { 236 let Latency = 8; 237 let NumMicroOps = 4; 238} 239 240def A64FXWrite_LD107: SchedWriteRes<[A64FXGI56]> { 241 let Latency = 11; 242 let NumMicroOps = 4; 243} 244 245def A64FXWrite_LD108: SchedWriteRes<[A64FXGI56]> { 246 let Latency = 8; 247 let NumMicroOps = 2; 248} 249 250def A64FXWrite_LD109: SchedWriteRes<[A64FXGI56]> { 251 let Latency = 11; 252 let NumMicroOps = 2; 253} 254 255def A64FXWrite_LD110: SchedWriteRes<[A64FXGI56]> { 256 let Latency = 8; 257 let NumMicroOps = 3; 258} 259 260def A64FXWrite_LD111: SchedWriteRes<[A64FXGI56]> { 261 let Latency = 11; 262 let NumMicroOps = 3; 263} 264 265def A64FXWrite_LD112: SchedWriteRes<[A64FXGI56]> { 266 let Latency = 8; 267 let NumMicroOps = 4; 268} 269 270def A64FXWrite_LD113: SchedWriteRes<[A64FXGI56]> { 271 let Latency = 11; 272 let NumMicroOps = 4; 273} 274 275def A64FXWrite_LD114: SchedWriteRes<[A64FXGI56]> { 276 let Latency = 8; 277 let NumMicroOps = 5; 278} 279 280def A64FXWrite_LD115: SchedWriteRes<[A64FXGI56]> { 281 let Latency = 11; 282 let NumMicroOps = 5; 283} 284 285def A64FXWrite_LD1I0: SchedWriteRes<[A64FXGI056]> { 286 let Latency = 8; 287 let NumMicroOps = 2; 288} 289 290def A64FXWrite_LD1I1: SchedWriteRes<[A64FXGI056]> { 291 let Latency = 8; 292 let NumMicroOps = 3; 293} 294 295def A64FXWrite_LD2I0: SchedWriteRes<[A64FXGI056]> { 296 let Latency = 8; 297 let NumMicroOps = 4; 298} 299 300def A64FXWrite_LD2I1: SchedWriteRes<[A64FXGI056]> { 301 let Latency = 8; 302 let NumMicroOps = 5; 303} 304 305def A64FXWrite_LD3I0: SchedWriteRes<[A64FXGI056]> { 306 let Latency = 8; 307 let NumMicroOps = 6; 308} 309 310def A64FXWrite_LD3I1: SchedWriteRes<[A64FXGI056]> { 311 let Latency = 8; 312 let NumMicroOps = 7; 313} 314 315def A64FXWrite_LD4I0: SchedWriteRes<[A64FXGI056]> { 316 let Latency = 8; 317 let NumMicroOps = 8; 318} 319 320def A64FXWrite_LD4I1: SchedWriteRes<[A64FXGI056]> { 321 let Latency = 8; 322 let NumMicroOps = 9; 323} 324 325def A64FXWrite_1Cyc_GI2456 : SchedWriteRes<[A64FXGI2456]> { 326 let Latency = 1; 327} 328 329def A64FXWrite_FMOV_GV : SchedWriteRes<[A64FXGI03]> { 330 let Latency = 10; 331} 332 333def A64FXWrite_FMOV_VG14 : SchedWriteRes<[A64FXGI03]> { 334 let Latency = 14; 335} 336 337def A64FXWrite_ADDLV : SchedWriteRes<[A64FXGI03]> { 338 let Latency = 12; 339} 340 341def A64FXWrite_MULLE : SchedWriteRes<[A64FXGI03]> { 342 let Latency = 14; 343} 344 345def A64FXWrite_MULLV : SchedWriteRes<[A64FXGI03]> { 346 let Latency = 14; 347} 348 349def A64FXWrite_MADDL : SchedWriteRes<[A64FXGI03]> { 350 let Latency = 6; 351} 352 353def A64FXWrite_ABA : SchedWriteRes<[A64FXGI03]> { 354 let Latency = 8; 355} 356 357def A64FXWrite_ABAL : SchedWriteRes<[A64FXGI03]> { 358 let Latency = 10; 359} 360 361def A64FXWrite_ADDLV1 : SchedWriteRes<[A64FXGI03]> { 362 let Latency = 12; 363 let NumMicroOps = 6; 364} 365 366def A64FXWrite_MINMAXV : SchedWriteRes<[A64FXGI03]> { 367 let Latency = 14; 368 let NumMicroOps = 6; 369} 370 371def A64FXWrite_SQRDMULH : SchedWriteRes<[A64FXGI03]> { 372 let Latency = 9; 373} 374 375def A64FXWrite_PMUL : SchedWriteRes<[A64FXGI03]> { 376 let Latency = 8; 377} 378 379 380def A64FXWrite_SRSRAV : SchedWriteRes<[A64FXGI03]> { 381 let Latency = 8; 382 let NumMicroOps = 3; 383} 384 385def A64FXWrite_SSRAV : SchedWriteRes<[A64FXGI03]> { 386 let Latency = 8; 387 let NumMicroOps = 2; 388} 389 390def A64FXWrite_RSHRN : SchedWriteRes<[A64FXGI03]> { 391 let Latency = 10; 392 let NumMicroOps = 3; 393} 394 395def A64FXWrite_SHRN : SchedWriteRes<[A64FXGI03]> { 396 let Latency = 10; 397 let NumMicroOps = 2; 398} 399 400 401def A64FXWrite_ADDP : SchedWriteRes<[A64FXGI03]> { 402 let Latency = 10; 403 let NumMicroOps = 3; 404} 405 406def A64FXWrite_FMULXE : SchedWriteRes<[A64FXGI03]> { 407 let Latency = 15; 408 let NumMicroOps = 2; 409} 410 411def A64FXWrite_FADDPV : SchedWriteRes<[A64FXGI03]> { 412 let Latency = 15; 413 let NumMicroOps = 3; 414} 415 416def A64FXWrite_SADALP : SchedWriteRes<[A64FXGI03]> { 417 let Latency = 10; 418 let NumMicroOps = 3; 419} 420 421def A64FXWrite_SADDLP : SchedWriteRes<[A64FXGI03]> { 422 let Latency = 10; 423 let NumMicroOps = 2; 424} 425 426def A64FXWrite_FCVTXNV : SchedWriteRes<[A64FXGI03]> { 427 let Latency = 15; 428 let NumMicroOps = 2; 429} 430 431def A64FXWrite_FMAXVVH : SchedWriteRes<[A64FXGI03]> { 432 let Latency = 14; 433 let NumMicroOps = 7; 434} 435 436def A64FXWrite_BIF : SchedWriteRes<[A64FXGI03]> { 437 let Latency = 5; 438} 439 440def A64FXWrite_DUPGENERAL : SchedWriteRes<[A64FXGI03]> { 441 let Latency = 10; 442} 443 444def A64FXWrite_SHA00 : SchedWriteRes<[A64FXGI0]> { 445 let Latency = 9; 446} 447 448def A64FXWrite_SHA01 : SchedWriteRes<[A64FXGI0]> { 449 let Latency = 12; 450} 451 452def A64FXWrite_SMOV : SchedWriteRes<[A64FXGI03]> { 453 let Latency = 25; 454} 455 456def A64FXWrite_TBX1 : SchedWriteRes<[A64FXGI03]> { 457 let Latency = 10; 458 let NumMicroOps = 3; 459} 460 461def A64FXWrite_TBX2 : SchedWriteRes<[A64FXGI03]> { 462 let Latency = 10; 463 let NumMicroOps = 5; 464} 465 466def A64FXWrite_TBX3 : SchedWriteRes<[A64FXGI03]> { 467 let Latency = 10; 468 let NumMicroOps = 7; 469} 470 471def A64FXWrite_TBX4 : SchedWriteRes<[A64FXGI03]> { 472 let Latency = 10; 473 let NumMicroOps = 9; 474} 475 476def A64FXWrite_PREF0: SchedWriteRes<[A64FXGI56]> { 477 let Latency = 0; 478} 479 480def A64FXWrite_PREF1: SchedWriteRes<[A64FXGI56]> { 481 let Latency = 0; 482} 483 484def A64FXWrite_SWP: SchedWriteRes<[A64FXGI56]> { 485 let Latency = 0; 486} 487 488def A64FXWrite_STUR: SchedWriteRes<[A64FXGI56]> { 489 let Latency = 0; 490} 491 492def A64FXWrite_STNP: SchedWriteRes<[A64FXGI56]> { 493 let Latency = 0; 494} 495 496def A64FXWrite_STP01: SchedWriteRes<[A64FXGI56]> { 497 let Latency = 0; 498} 499 500def A64FXWrite_ST10: SchedWriteRes<[A64FXGI56]> { 501 let Latency = 0; 502} 503 504def A64FXWrite_ST11: SchedWriteRes<[A64FXGI56]> { 505 let Latency = 0; 506} 507 508def A64FXWrite_ST12: SchedWriteRes<[A64FXGI56]> { 509 let Latency = 0; 510} 511 512def A64FXWrite_ST13: SchedWriteRes<[A64FXGI56]> { 513 let Latency = 0; 514} 515 516def A64FXWrite_ST14: SchedWriteRes<[A64FXGI56]> { 517 let Latency = 1; 518} 519 520def A64FXWrite_ST15: SchedWriteRes<[A64FXGI56]> { 521 let Latency = 1; 522} 523 524def A64FXWrite_ST16: SchedWriteRes<[A64FXGI56]> { 525 let Latency = 1; 526} 527 528def A64FXWrite_ST17: SchedWriteRes<[A64FXGI56]> { 529 let Latency = 1; 530} 531 532def A64FXWrite_CAS: SchedWriteRes<[A64FXGI56]> { 533 let Latency = 7; 534} 535 536// Define commonly used read types. 537 538// No forwarding is provided for these types. 539def : ReadAdvance<ReadI, 0>; 540def : ReadAdvance<ReadISReg, 0>; 541def : ReadAdvance<ReadIEReg, 0>; 542def : ReadAdvance<ReadIM, 0>; 543def : ReadAdvance<ReadIMA, 0>; 544def : ReadAdvance<ReadID, 0>; 545def : ReadAdvance<ReadExtrHi, 0>; 546def : ReadAdvance<ReadAdrBase, 0>; 547def : ReadAdvance<ReadST, 0>; 548def : ReadAdvance<ReadVLD, 0>; 549 550//===----------------------------------------------------------------------===// 551// 3. Instruction Tables. 552 553//--- 554// 3.1 Branch Instructions 555//--- 556 557// Branch, immed 558// Branch and link, immed 559// Compare and branch 560def : WriteRes<WriteBr, [A64FXGI7]> { 561 let Latency = 1; 562} 563 564// Branch, register 565// Branch and link, register != LR 566// Branch and link, register = LR 567def : WriteRes<WriteBrReg, [A64FXGI7]> { 568 let Latency = 1; 569} 570 571def : WriteRes<WriteSys, []> { let Latency = 1; } 572def : WriteRes<WriteBarrier, []> { let Latency = 1; } 573def : WriteRes<WriteHint, []> { let Latency = 1; } 574 575def : WriteRes<WriteAtomic, []> { 576 let Latency = 4; 577} 578 579//--- 580// Branch 581//--- 582def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs B, BL, BR, BLR)>; 583def : InstRW<[A64FXWrite_1Cyc_GI7], (instrs RET)>; 584def : InstRW<[A64FXWrite_1Cyc_GI7], (instregex "^B..$")>; 585def : InstRW<[A64FXWrite_1Cyc_GI7], 586 (instregex "^CBZ", "^CBNZ", "^TBZ", "^TBNZ")>; 587 588//--- 589// 3.2 Arithmetic and Logical Instructions 590// 3.3 Move and Shift Instructions 591//--- 592 593// ALU, basic 594// Conditional compare 595// Conditional select 596// Address generation 597def : WriteRes<WriteI, [A64FXGI2456]> { 598 let Latency = 1; 599} 600 601def : InstRW<[WriteI], 602 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 603 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 604 "ADC(W|X)r", 605 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 606 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 607 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 608 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 609 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 610 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 611 "CSINC(W|X)r", "CSINV(W|X)r", 612 "CSNEG(W|X)r")>; 613 614def : InstRW<[WriteI], (instrs COPY)>; 615 616// ALU, extend and/or shift 617def : WriteRes<WriteISReg, [A64FXGI2456]> { 618 let Latency = 2; 619} 620 621def : InstRW<[WriteISReg], 622 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 623 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 624 "ADC(W|X)r", 625 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 626 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 627 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 628 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 629 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 630 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 631 "CSINC(W|X)r", "CSINV(W|X)r", 632 "CSNEG(W|X)r")>; 633 634def : WriteRes<WriteIEReg, [A64FXGI2456]> { 635 let Latency = 1; 636} 637 638def : InstRW<[WriteIEReg], 639 (instregex "ADD?(W|X)r(i|r|s|x)", "ADDS?(W|X)r(i|r|s|x)(64)?", 640 "AND?(W|X)r(i|r|s|x)", "ANDS?(W|X)r(i|r|s|x)", 641 "ADC(W|X)r", 642 "BIC?(W|X)r(i|r|s|x)", "BICS?(W|X)r(i|r|s|x)", 643 "EON?(W|X)r(i|r|s|x)", "ORN?(W|X)r(i|r|s|x)", 644 "ORR?(W|X)r(i|r|s|x)", "SUB?(W|X)r(i|r|s|x)", 645 "SUBS?(W|X)r(i|r|s|x)", "SBC(W|X)r", 646 "SBCS(W|X)r", "CCMN(W|X)(i|r)", 647 "CCMP(W|X)(i|r)", "CSEL(W|X)r", 648 "CSINC(W|X)r", "CSINV(W|X)r", 649 "CSNEG(W|X)r")>; 650 651// Move immed 652def : WriteRes<WriteImm, [A64FXGI2456]> { 653 let Latency = 1; 654} 655 656def : InstRW<[A64FXWrite_1Cyc_GI2456], 657 (instrs MOVKWi, MOVKXi, MOVNWi, MOVNXi, MOVZWi, MOVZXi)>; 658 659def : InstRW<[A64FXWrite_2Cyc_GI24], 660 (instrs ASRVWr, ASRVXr, LSLVWr, LSLVXr, RORVWr, RORVXr)>; 661 662// Variable shift 663def : WriteRes<WriteIS, [A64FXGI2456]> { 664 let Latency = 1; 665} 666 667//--- 668// 3.4 Divide and Multiply Instructions 669//--- 670 671// Divide, W-form 672def : WriteRes<WriteID32, [A64FXGI4]> { 673 let Latency = 39; 674 let ReleaseAtCycles = [39]; 675} 676 677// Divide, X-form 678def : WriteRes<WriteID64, [A64FXGI4]> { 679 let Latency = 23; 680 let ReleaseAtCycles = [23]; 681} 682 683// Multiply accumulate, W-form 684def : WriteRes<WriteIM32, [A64FXGI2456]> { 685 let Latency = 5; 686} 687 688// Multiply accumulate, X-form 689def : WriteRes<WriteIM64, [A64FXGI2456]> { 690 let Latency = 5; 691} 692 693def : InstRW<[WriteIM32], (instrs MADDWrrr, MSUBWrrr)>; 694def : InstRW<[WriteIM32], (instrs MADDXrrr, MSUBXrrr)>; 695def : InstRW<[A64FXWrite_MADDL], 696 (instregex "(S|U)(MADDL|MSUBL)rrr")>; 697 698def : InstRW<[WriteID32], (instrs SDIVWr, UDIVWr)>; 699def : InstRW<[WriteID64], (instrs SDIVXr, UDIVXr)>; 700 701// Bitfield extract, two reg 702def : WriteRes<WriteExtr, [A64FXGI2456]> { 703 let Latency = 1; 704} 705 706// Multiply high 707def : InstRW<[A64FXWrite_5Cyc_GI2], (instrs SMULHrr, UMULHrr)>; 708 709// Miscellaneous Data-Processing Instructions 710// Bitfield extract 711def : InstRW<[A64FXWrite_2Cyc_GI24], (instrs EXTRWrri, EXTRXrri)>; 712 713// Bitifield move - basic 714def : InstRW<[A64FXWrite_1Cyc_GI24], 715 (instrs SBFMWri, SBFMXri, UBFMWri, UBFMXri)>; 716 717// Bitfield move, insert 718def : InstRW<[A64FXWrite_4Cyc_NGI24], (instregex "^BFM")>; 719def : InstRW<[A64FXWrite_1Cyc_GI24], (instregex "(S|U)?BFM.*")>; 720 721// Count leading 722def : InstRW<[A64FXWrite_2Cyc_GI0], (instregex "^CLS(W|X)r$", 723 "^CLZ(W|X)r$")>; 724 725// Reverse bits 726def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs RBITWr, RBITXr)>; 727 728// Cryptography Extensions 729def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AES[DE]")>; 730def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^AESI?MC")>; 731def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^PMULL")>; 732def : InstRW<[A64FXWrite_SHA00], (instregex "^SHA1SU0")>; 733def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA1(H|SU1)")>; 734def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA1[CMP]")>; 735def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU0")>; 736def : InstRW<[A64FXWrite_8Cyc_GI0], (instregex "^SHA256SU1")>; 737def : InstRW<[A64FXWrite_SHA01], (instregex "^SHA256(H|H2)")>; 738 739// CRC Instructions 740def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32Brr, CRC32Hrr)>; 741def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32Wrr)>; 742def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32Xrr)>; 743 744def : InstRW<[A64FXWrite_10Cyc_GI4], (instrs CRC32CBrr, CRC32CHrr)>; 745def : InstRW<[A64FXWrite_12Cyc_GI4], (instrs CRC32CWrr)>; 746def : InstRW<[A64FXWrite_20Cyc_GI4], (instrs CRC32CXrr)>; 747 748// Reverse bits/bytes 749// NOTE: Handled by WriteI. 750 751//--- 752// 3.6 Load Instructions 753// 3.10 FP Load Instructions 754//--- 755 756// Load register, literal 757// Load register, unscaled immed 758// Load register, immed unprivileged 759// Load register, unsigned immed 760def : WriteRes<WriteLD, [A64FXGI56]> { 761 let Latency = 4; 762} 763 764// Load register, immed post-index 765// NOTE: Handled by WriteLD, WriteI. 766// Load register, immed pre-index 767// NOTE: Handled by WriteLD, WriteAdr. 768def : WriteRes<WriteAdr, [A64FXGI2456]> { 769 let Latency = 1; 770} 771 772// Load pair, immed offset, normal 773// Load pair, immed offset, signed words, base != SP 774// Load pair, immed offset signed words, base = SP 775// LDP only breaks into *one* LS micro-op. Thus 776// the resources are handled by WriteLD. 777def : WriteRes<WriteLDHi, []> { 778 let Latency = 5; 779} 780 781// Load register offset, basic 782// Load register, register offset, scale by 4/8 783// Load register, register offset, scale by 2 784// Load register offset, extend 785// Load register, register offset, extend, scale by 4/8 786// Load register, register offset, extend, scale by 2 787def A64FXWriteLDIdx : SchedWriteVariant<[ 788 SchedVar<ScaledIdxPred, [A64FXWrite_1Cyc_GI56]>, 789 SchedVar<NoSchedPred, [A64FXWrite_1Cyc_GI56]>]>; 790def : SchedAlias<WriteLDIdx, A64FXWriteLDIdx>; 791 792def A64FXReadAdrBase : SchedReadVariant<[ 793 SchedVar<ScaledIdxPred, [ReadDefault]>, 794 SchedVar<NoSchedPred, [ReadDefault]>]>; 795def : SchedAlias<ReadAdrBase, A64FXReadAdrBase>; 796 797// Load pair, immed pre-index, normal 798// Load pair, immed pre-index, signed words 799// Load pair, immed post-index, normal 800// Load pair, immed post-index, signed words 801// NOTE: Handled by WriteLD, WriteLDHi, WriteAdr. 802 803def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPDi)>; 804def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPQi)>; 805def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPSi)>; 806def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPWi)>; 807def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDNPXi)>; 808 809def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPDi)>; 810def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPQi)>; 811def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSi)>; 812def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPSWi)>; 813def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPWi)>; 814def : InstRW<[A64FXWrite_LDNP, WriteLDHi], (instrs LDPXi)>; 815 816def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRBui)>; 817def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRDui)>; 818def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRHui)>; 819def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRQui)>; 820def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDRSui)>; 821 822def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRDl)>; 823def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRQl)>; 824def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRWl)>; 825def : InstRW<[A64FXWrite_5Cyc_GI6], (instrs LDRXl)>; 826 827def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRBi)>; 828def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRHi)>; 829def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRWi)>; 830def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRXi)>; 831 832def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBWi)>; 833def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSBXi)>; 834def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHWi)>; 835def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSHXi)>; 836def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDTRSWi)>; 837 838def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 839 (instrs LDPDpre)>; 840def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 841 (instrs LDPQpre)>; 842def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 843 (instrs LDPSpre)>; 844def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 845 (instrs LDPWpre)>; 846def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 847 (instrs LDPWpre)>; 848 849def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 850def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 851def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 852def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 853def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 854def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 855def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 856 857def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpre)>; 858def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpre)>; 859def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBWpost)>; 860def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSBXpost)>; 861 862def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpre)>; 863def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpre)>; 864def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHWpost)>; 865def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSHXpost)>; 866 867def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpre)>; 868def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBBpost)>; 869 870def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpre)>; 871def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHHpost)>; 872 873def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 874 (instrs LDPDpost)>; 875def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 876 (instrs LDPQpost)>; 877def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 878 (instrs LDPSpost)>; 879def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 880 (instrs LDPWpost)>; 881def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 882 (instrs LDPXpost)>; 883 884def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 885def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 886def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 887def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 888def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 889def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 890def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 891 892def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 893 (instrs LDPDpre)>; 894def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 895 (instrs LDPQpre)>; 896def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 897 (instrs LDPSpre)>; 898def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 899 (instrs LDPWpre)>; 900def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 901 (instrs LDPXpre)>; 902 903def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRBpre)>; 904def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRDpre)>; 905def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRHpre)>; 906def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRQpre)>; 907def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRSpre)>; 908def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRWpre)>; 909def : InstRW<[A64FXWrite_LDR01, WriteAdr], (instrs LDRXpre)>; 910 911def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 912 (instrs LDPDpost)>; 913def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 914 (instrs LDPQpost)>; 915def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 916 (instrs LDPSpost)>; 917def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 918 (instrs LDPWpost)>; 919def : InstRW<[A64FXWrite_LDP01, WriteLDHi, WriteAdr], 920 (instrs LDPXpost)>; 921 922def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRBpost)>; 923def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRDpost)>; 924def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRHpost)>; 925def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRQpost)>; 926def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRSpost)>; 927def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRWpost)>; 928def : InstRW<[A64FXWrite_LDR01, WriteI], (instrs LDRXpost)>; 929 930def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroW)>; 931def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroW)>; 932def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroW)>; 933def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroW)>; 934def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroW)>; 935def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroW)>; 936def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroW)>; 937def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroW)>; 938def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroW)>; 939def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroW)>; 940 941def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRBroX)>; 942def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRDroX)>; 943def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHHroX)>; 944def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRHroX)>; 945def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRQroX)>; 946def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSroX)>; 947def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHWroX)>; 948def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRSHXroX)>; 949def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRWroX)>; 950def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], (instrs LDRXroX)>; 951 952def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 953 (instrs LDRBroW)>; 954def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 955 (instrs LDRBroW)>; 956def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 957 (instrs LDRDroW)>; 958def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 959 (instrs LDRHroW)>; 960def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 961 (instrs LDRHHroW)>; 962def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 963 (instrs LDRQroW)>; 964def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 965 (instrs LDRSroW)>; 966def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 967 (instrs LDRSHWroW)>; 968def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 969 (instrs LDRSHXroW)>; 970def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 971 (instrs LDRWroW)>; 972def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 973 (instrs LDRXroW)>; 974def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 975 (instrs LDRBroX)>; 976def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 977 (instrs LDRDroX)>; 978def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 979 (instrs LDRHroX)>; 980def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 981 (instrs LDRHHroX)>; 982def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 983 (instrs LDRQroX)>; 984def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 985 (instrs LDRSroX)>; 986def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 987 (instrs LDRSHWroX)>; 988def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 989 (instrs LDRSHXroX)>; 990def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 991 (instrs LDRWroX)>; 992def : InstRW<[A64FXWrite_5Cyc_GI56, ReadAdrBase], 993 (instrs LDRXroX)>; 994 995def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBi)>; 996def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURBBi)>; 997def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURDi)>; 998def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHi)>; 999def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURHHi)>; 1000def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURQi)>; 1001def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSi)>; 1002def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURXi)>; 1003def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBWi)>; 1004def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSBXi)>; 1005def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHWi)>; 1006def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSHXi)>; 1007def : InstRW<[A64FXWrite_5Cyc_GI56], (instrs LDURSWi)>; 1008 1009//--- 1010// Prefetch 1011//--- 1012def : InstRW<[A64FXWrite_PREF0], (instrs PRFMl)>; 1013def : InstRW<[A64FXWrite_PREF1], (instrs PRFUMi)>; 1014def : InstRW<[A64FXWrite_PREF1], (instrs PRFMui)>; 1015def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroW)>; 1016def : InstRW<[A64FXWrite_PREF1], (instrs PRFMroX)>; 1017 1018//-- 1019// 3.7 Store Instructions 1020// 3.11 FP Store Instructions 1021//-- 1022 1023// Store register, unscaled immed 1024// Store register, immed unprivileged 1025// Store register, unsigned immed 1026def : WriteRes<WriteST, [A64FXGI56]> { 1027 let Latency = 1; 1028} 1029 1030// Store register, immed post-index 1031// NOTE: Handled by WriteAdr, WriteST, ReadAdrBase 1032 1033// Store register, immed pre-index 1034// NOTE: Handled by WriteAdr, WriteST 1035 1036// Store register, register offset, basic 1037// Store register, register offset, scaled by 4/8 1038// Store register, register offset, scaled by 2 1039// Store register, register offset, extend 1040// Store register, register offset, extend, scale by 4/8 1041// Store register, register offset, extend, scale by 1 1042def : WriteRes<WriteSTIdx, [A64FXGI56, A64FXGI2456]> { 1043 let Latency = 1; 1044} 1045 1046// Store pair, immed offset, W-form 1047// Store pair, immed offset, X-form 1048def : WriteRes<WriteSTP, [A64FXGI56]> { 1049 let Latency = 1; 1050} 1051 1052// Store pair, immed post-index, W-form 1053// Store pair, immed post-index, X-form 1054// Store pair, immed pre-index, W-form 1055// Store pair, immed pre-index, X-form 1056// NOTE: Handled by WriteAdr, WriteSTP. 1057 1058def : InstRW<[A64FXWrite_STUR], (instrs STURBi)>; 1059def : InstRW<[A64FXWrite_STUR], (instrs STURBBi)>; 1060def : InstRW<[A64FXWrite_STUR], (instrs STURDi)>; 1061def : InstRW<[A64FXWrite_STUR], (instrs STURHi)>; 1062def : InstRW<[A64FXWrite_STUR], (instrs STURHHi)>; 1063def : InstRW<[A64FXWrite_STUR], (instrs STURQi)>; 1064def : InstRW<[A64FXWrite_STUR], (instrs STURSi)>; 1065def : InstRW<[A64FXWrite_STUR], (instrs STURWi)>; 1066def : InstRW<[A64FXWrite_STUR], (instrs STURXi)>; 1067 1068def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRBi)>; 1069def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRHi)>; 1070def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRWi)>; 1071def : InstRW<[WriteAdr, A64FXWrite_STUR], (instrs STTRXi)>; 1072 1073def : InstRW<[A64FXWrite_STNP], (instrs STNPDi)>; 1074def : InstRW<[A64FXWrite_STNP], (instrs STNPQi)>; 1075def : InstRW<[A64FXWrite_STNP], (instrs STNPXi)>; 1076def : InstRW<[A64FXWrite_STNP], (instrs STNPWi)>; 1077 1078def : InstRW<[A64FXWrite_STNP], (instrs STPDi)>; 1079def : InstRW<[A64FXWrite_STNP], (instrs STPQi)>; 1080def : InstRW<[A64FXWrite_STNP], (instrs STPXi)>; 1081def : InstRW<[A64FXWrite_STNP], (instrs STPWi)>; 1082 1083def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1084def : InstRW<[A64FXWrite_STUR], (instrs STRBui)>; 1085def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1086def : InstRW<[A64FXWrite_STUR], (instrs STRDui)>; 1087def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1088def : InstRW<[A64FXWrite_STUR], (instrs STRHui)>; 1089def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1090def : InstRW<[A64FXWrite_STUR], (instrs STRQui)>; 1091def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1092def : InstRW<[A64FXWrite_STUR], (instrs STRXui)>; 1093def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1094def : InstRW<[A64FXWrite_STUR], (instrs STRWui)>; 1095 1096def : InstRW<[A64FXWrite_STP01], 1097 (instrs STPDpre, STPDpost)>; 1098def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1099 (instrs STPDpre, STPDpost)>; 1100def : InstRW<[A64FXWrite_STP01], 1101 (instrs STPDpre, STPDpost)>; 1102def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1103 (instrs STPDpre, STPDpost)>; 1104def : InstRW<[A64FXWrite_STP01], 1105 (instrs STPQpre, STPQpost)>; 1106def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1107 (instrs STPQpre, STPQpost)>; 1108def : InstRW<[A64FXWrite_STP01], 1109 (instrs STPQpre, STPQpost)>; 1110def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1111 (instrs STPQpre, STPQpost)>; 1112def : InstRW<[A64FXWrite_STP01], 1113 (instrs STPSpre, STPSpost)>; 1114def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1115 (instrs STPSpre, STPSpost)>; 1116def : InstRW<[A64FXWrite_STP01], 1117 (instrs STPSpre, STPSpost)>; 1118def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1119 (instrs STPSpre, STPSpost)>; 1120def : InstRW<[A64FXWrite_STP01], 1121 (instrs STPWpre, STPWpost)>; 1122def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1123 (instrs STPWpre, STPWpost)>; 1124def : InstRW<[A64FXWrite_STP01], 1125 (instrs STPWpre, STPWpost)>; 1126def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1127 (instrs STPWpre, STPWpost)>; 1128def : InstRW<[A64FXWrite_STP01], 1129 (instrs STPXpre, STPXpost)>; 1130def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1131 (instrs STPXpre, STPXpost)>; 1132def : InstRW<[A64FXWrite_STP01], 1133 (instrs STPXpre, STPXpost)>; 1134def : InstRW<[A64FXWrite_STP01, ReadAdrBase], 1135 (instrs STPXpre, STPXpost)>; 1136 1137def : InstRW<[WriteAdr, A64FXWrite_STP01], 1138 (instrs STRBpre, STRBpost)>; 1139def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1140 (instrs STRBpre, STRBpost)>; 1141def : InstRW<[WriteAdr, A64FXWrite_STP01], 1142 (instrs STRBpre, STRBpost)>; 1143def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1144 (instrs STRBpre, STRBpost)>; 1145def : InstRW<[WriteAdr, A64FXWrite_STP01], 1146 (instrs STRBBpre, STRBBpost)>; 1147def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1148 (instrs STRBBpre, STRBBpost)>; 1149def : InstRW<[WriteAdr, A64FXWrite_STP01], 1150 (instrs STRBBpre, STRBBpost)>; 1151def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1152 (instrs STRBBpre, STRBBpost)>; 1153def : InstRW<[WriteAdr, A64FXWrite_STP01], 1154 (instrs STRDpre, STRDpost)>; 1155def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1156 (instrs STRDpre, STRDpost)>; 1157def : InstRW<[WriteAdr, A64FXWrite_STP01], 1158 (instrs STRDpre, STRDpost)>; 1159def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1160 (instrs STRDpre, STRDpost)>; 1161def : InstRW<[WriteAdr, A64FXWrite_STP01], 1162 (instrs STRHpre, STRHpost)>; 1163def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1164 (instrs STRHpre, STRHpost)>; 1165def : InstRW<[WriteAdr, A64FXWrite_STP01], 1166 (instrs STRHpre, STRHpost)>; 1167def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1168 (instrs STRHpre, STRHpost)>; 1169def : InstRW<[WriteAdr, A64FXWrite_STP01], 1170 (instrs STRHHpre, STRHHpost)>; 1171def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1172 (instrs STRHHpre, STRHHpost)>; 1173def : InstRW<[WriteAdr, A64FXWrite_STP01], 1174 (instrs STRHHpre, STRHHpost)>; 1175def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1176 (instrs STRHHpre, STRHHpost)>; 1177def : InstRW<[WriteAdr, A64FXWrite_STP01], 1178 (instrs STRQpre, STRQpost)>; 1179def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1180 (instrs STRQpre, STRQpost)>; 1181def : InstRW<[WriteAdr, A64FXWrite_STP01], 1182 (instrs STRQpre, STRQpost)>; 1183def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1184 (instrs STRQpre, STRQpost)>; 1185def : InstRW<[WriteAdr, A64FXWrite_STP01], 1186 (instrs STRSpre, STRSpost)>; 1187def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1188 (instrs STRSpre, STRSpost)>; 1189def : InstRW<[WriteAdr, A64FXWrite_STP01], 1190 (instrs STRSpre, STRSpost)>; 1191def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1192 (instrs STRSpre, STRSpost)>; 1193def : InstRW<[WriteAdr, A64FXWrite_STP01], 1194 (instrs STRWpre, STRWpost)>; 1195def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1196 (instrs STRWpre, STRWpost)>; 1197def : InstRW<[WriteAdr, A64FXWrite_STP01], 1198 (instrs STRWpre, STRWpost)>; 1199def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1200 (instrs STRWpre, STRWpost)>; 1201def : InstRW<[WriteAdr, A64FXWrite_STP01], 1202 (instrs STRXpre, STRXpost)>; 1203def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1204 (instrs STRXpre, STRXpost)>; 1205def : InstRW<[WriteAdr, A64FXWrite_STP01], 1206 (instrs STRXpre, STRXpost)>; 1207def : InstRW<[WriteAdr, A64FXWrite_STP01, ReadAdrBase], 1208 (instrs STRXpre, STRXpost)>; 1209 1210def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1211 (instrs STRBroW, STRBroX)>; 1212def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1213 (instrs STRBroW, STRBroX)>; 1214def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1215 (instrs STRBBroW, STRBBroX)>; 1216def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1217 (instrs STRBBroW, STRBBroX)>; 1218def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1219 (instrs STRDroW, STRDroX)>; 1220def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1221 (instrs STRDroW, STRDroX)>; 1222def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1223 (instrs STRHroW, STRHroX)>; 1224def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1225 (instrs STRHroW, STRHroX)>; 1226def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1227 (instrs STRHHroW, STRHHroX)>; 1228def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1229 (instrs STRHHroW, STRHHroX)>; 1230def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1231 (instrs STRQroW, STRQroX)>; 1232def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1233 (instrs STRQroW, STRQroX)>; 1234def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1235 (instrs STRSroW, STRSroX)>; 1236def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1237 (instrs STRSroW, STRSroX)>; 1238def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1239 (instrs STRWroW, STRWroX)>; 1240def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1241 (instrs STRWroW, STRWroX)>; 1242def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1243 (instrs STRXroW, STRXroX)>; 1244def : InstRW<[A64FXWrite_STUR, ReadAdrBase], 1245 (instrs STRXroW, STRXroX)>; 1246 1247//--- 1248// 3.8 FP Data Processing Instructions 1249//--- 1250 1251// FP absolute value 1252// FP min/max 1253// FP negate 1254def : WriteRes<WriteF, [A64FXGI03]> { 1255 let Latency = 4; 1256 let ReleaseAtCycles = [2]; 1257} 1258 1259// FP arithmetic 1260 1261def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FADDDrr, FADDHrr)>; 1262def : InstRW<[A64FXWrite_4Cyc_GI03], (instrs FSUBDrr, FSUBHrr)>; 1263 1264// FP compare 1265def : WriteRes<WriteFCmp, [A64FXGI03]> { 1266 let Latency = 4; 1267 let ReleaseAtCycles = [2]; 1268} 1269 1270// FP Div, Sqrt 1271def : WriteRes<WriteFDiv, [A64FXGI0]> { 1272 let Latency = 43; 1273} 1274 1275def A64FXXWriteFDiv : SchedWriteRes<[A64FXGI0]> { 1276 let Latency = 38; 1277} 1278 1279def A64FXXWriteFDivSP : SchedWriteRes<[A64FXGI0]> { 1280 let Latency = 29; 1281} 1282 1283def A64FXXWriteFDivDP : SchedWriteRes<[A64FXGI0]> { 1284 let Latency = 43; 1285} 1286 1287def A64FXXWriteFSqrtSP : SchedWriteRes<[A64FXGI0]> { 1288 let Latency = 29; 1289} 1290 1291def A64FXXWriteFSqrtDP : SchedWriteRes<[A64FXGI0]> { 1292 let Latency = 43; 1293} 1294 1295// FP divide, S-form 1296// FP square root, S-form 1297def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVSrr)>; 1298def : InstRW<[A64FXXWriteFSqrtSP], (instrs FSQRTSr)>; 1299def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVv.*32$")>; 1300def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^.*SQRT.*32$")>; 1301def : InstRW<[A64FXXWriteFDivSP], (instregex "^FDIVSrr")>; 1302def : InstRW<[A64FXXWriteFSqrtSP], (instregex "^FSQRTSr")>; 1303 1304// FP divide, D-form 1305// FP square root, D-form 1306def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVDrr)>; 1307def : InstRW<[A64FXXWriteFSqrtDP], (instrs FSQRTDr)>; 1308def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVv.*64$")>; 1309def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^.*SQRT.*64$")>; 1310def : InstRW<[A64FXXWriteFDivDP], (instregex "^FDIVDrr")>; 1311def : InstRW<[A64FXXWriteFSqrtDP], (instregex "^FSQRTDr")>; 1312 1313// FP round to integral 1314def : InstRW<[A64FXWrite_9Cyc_GI03], 1315 (instregex "^FRINT(A|I|M|N|P|X|Z)(Sr|Dr)")>; 1316 1317// FP select 1318def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCSEL")>; 1319 1320//--- 1321// 3.9 FP Miscellaneous Instructions 1322//--- 1323 1324// FP convert, from vec to vec reg 1325// FP convert, from gen to vec reg 1326// FP convert, from vec to gen reg 1327def : WriteRes<WriteFCvt, [A64FXGI03]> { 1328 let Latency = 9; 1329 let ReleaseAtCycles = [2]; 1330} 1331 1332// FP move, immed 1333// FP move, register 1334def : WriteRes<WriteFImm, [A64FXGI0]> { 1335 let Latency = 4; 1336 let ReleaseAtCycles = [2]; 1337} 1338 1339// FP transfer, from gen to vec reg 1340// FP transfer, from vec to gen reg 1341def : WriteRes<WriteFCopy, [A64FXGI0]> { 1342 let Latency = 4; 1343 let ReleaseAtCycles = [2]; 1344} 1345 1346def : InstRW<[A64FXWrite_FMOV_GV], (instrs FMOVXDHighr)>; 1347def : InstRW<[A64FXWrite_FMOV_VG14], (instrs FMOVDXHighr)>; 1348 1349//--- 1350// 3.12 ASIMD Integer Instructions 1351//--- 1352 1353// ASIMD absolute diff, D-form 1354// ASIMD absolute diff, Q-form 1355// ASIMD absolute diff accum, D-form 1356// ASIMD absolute diff accum, Q-form 1357// ASIMD absolute diff accum long 1358// ASIMD absolute diff long 1359// ASIMD arith, basic 1360// ASIMD arith, complex 1361// ASIMD compare 1362// ASIMD logical (AND, BIC, EOR) 1363// ASIMD max/min, basic 1364// ASIMD max/min, reduce, 4H/4S 1365// ASIMD max/min, reduce, 8B/8H 1366// ASIMD max/min, reduce, 16B 1367// ASIMD multiply, D-form 1368// ASIMD multiply, Q-form 1369// ASIMD multiply accumulate long 1370// ASIMD multiply accumulate saturating long 1371// ASIMD multiply long 1372// ASIMD pairwise add and accumulate 1373// ASIMD shift accumulate 1374// ASIMD shift by immed, basic 1375// ASIMD shift by immed and insert, basic, D-form 1376// ASIMD shift by immed and insert, basic, Q-form 1377// ASIMD shift by immed, complex 1378// ASIMD shift by register, basic, D-form 1379// ASIMD shift by register, basic, Q-form 1380// ASIMD shift by register, complex, D-form 1381// ASIMD shift by register, complex, Q-form 1382def : WriteRes<WriteVd, [A64FXGI03]> { 1383 let Latency = 4; 1384} 1385def : WriteRes<WriteVq, [A64FXGI03]> { 1386 let Latency = 4; 1387} 1388 1389// ASIMD arith, reduce, 4H/4S 1390// ASIMD arith, reduce, 8B/8H 1391// ASIMD arith, reduce, 16B 1392 1393// ASIMD logical (MVN (alias for NOT), ORN, ORR) 1394def : InstRW<[A64FXWrite_4Cyc_GI03], 1395 (instregex "^ANDv", "^BICv", "^EORv", "^ORRv", "^ORNv", "^NOTv")>; 1396 1397// ASIMD arith, reduce 1398def : InstRW<[A64FXWrite_ADDLV], 1399 (instregex "^ADDVv", "^SADDLVv", "^UADDLVv")>; 1400 1401// ASIMD polynomial (8x8) multiply long 1402def : InstRW<[A64FXWrite_MULLE], (instregex "^(S|U|SQD)MULL")>; 1403def : InstRW<[A64FXWrite_MULLV], 1404 (instregex "(S|U|SQD)(MLAL|MLSL|MULL)v.*")>; 1405def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v8i8|v16i8)")>; 1406def : InstRW<[A64FXWrite_8Cyc_GI03], (instregex "^PMULL(v1i64|v2i64)")>; 1407 1408// ASIMD absolute diff accum, D-form 1409def : InstRW<[A64FXWrite_ABA], 1410 (instregex "^[SU]ABA(v8i8|v4i16|v2i32)$")>; 1411// ASIMD absolute diff accum, Q-form 1412def : InstRW<[A64FXWrite_ABA], 1413 (instregex "^[SU]ABA(v16i8|v8i16|v4i32)$")>; 1414// ASIMD absolute diff accum long 1415def : InstRW<[A64FXWrite_ABAL], 1416 (instregex "^[SU]ABAL")>; 1417// ASIMD arith, reduce, 4H/4S 1418def : InstRW<[A64FXWrite_ADDLV1], 1419 (instregex "^[SU]?ADDL?V(v8i8|v4i16|v2i32)v$")>; 1420// ASIMD arith, reduce, 8B 1421def : InstRW<[A64FXWrite_ADDLV1], 1422 (instregex "^[SU]?ADDL?V(v8i16|v4i32)v$")>; 1423// ASIMD arith, reduce, 16B/16H 1424def : InstRW<[A64FXWrite_ADDLV1], 1425 (instregex "^[SU]?ADDL?Vv16i8v$")>; 1426// ASIMD max/min, reduce, 4H/4S 1427def : InstRW<[A64FXWrite_MINMAXV], 1428 (instregex "^[SU](MIN|MAX)V(v4i16|v4i32)v$")>; 1429// ASIMD max/min, reduce, 8B/8H 1430def : InstRW<[A64FXWrite_MINMAXV], 1431 (instregex "^[SU](MIN|MAX)V(v8i8|v8i16)v$")>; 1432// ASIMD max/min, reduce, 16B/16H 1433def : InstRW<[A64FXWrite_MINMAXV], 1434 (instregex "^[SU](MIN|MAX)Vv16i8v$")>; 1435// ASIMD multiply, D-form 1436def : InstRW<[A64FXWrite_PMUL], 1437 (instregex "^(P?MUL|SQR?DMUL)" # 1438 "(v8i8|v4i16|v2i32|v1i8|v1i16|v1i32|v1i64)" # 1439 "(_indexed)?$")>; 1440 1441// ASIMD multiply, Q-form 1442def : InstRW<[A64FXWrite_PMUL], 1443 (instregex "^(P?MUL)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1444 1445// ASIMD multiply, Q-form 1446def : InstRW<[A64FXWrite_SQRDMULH], 1447 (instregex "^(SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>; 1448 1449// ASIMD multiply accumulate, D-form 1450def : InstRW<[A64FXWrite_9Cyc_GI03], 1451 (instregex "^ML[AS](v8i8|v4i16|v2i32)(_indexed)?$")>; 1452// ASIMD multiply accumulate, Q-form 1453def : InstRW<[A64FXWrite_9Cyc_GI03], 1454 (instregex "^ML[AS](v16i8|v8i16|v4i32)(_indexed)?$")>; 1455// ASIMD shift accumulate 1456def : InstRW<[A64FXWrite_SRSRAV], 1457 (instregex "SRSRAv", "URSRAv")>; 1458def : InstRW<[A64FXWrite_SSRAV], 1459 (instregex "SSRAv", "USRAv")>; 1460 1461// ASIMD shift by immed, basic 1462def : InstRW<[A64FXWrite_RSHRN], 1463 (instregex "RSHRNv", "SQRSHRNv", "SQRSHRUNv", "UQRSHRNv")>; 1464def : InstRW<[A64FXWrite_SHRN], 1465 (instregex "SHRNv", "SQSHRNv", "SQSHRUNv", "UQSHRNv")>; 1466 1467def : InstRW<[A64FXWrite_6Cyc_GI3], 1468 (instregex "SQXTNv", "SQXTUNv", "UQXTNv")>; 1469 1470// ASIMD shift by immed, complex 1471def : InstRW<[A64FXWrite_ABA], (instregex "^[SU]?(Q|R){1,2}SHR")>; 1472def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^SQSHLU")>; 1473// ASIMD shift by register, basic, Q-form 1474def : InstRW<[A64FXWrite_6Cyc_GI3], 1475 (instregex "^[SU]SHL(v16i8|v8i16|v4i32|v2i64)")>; 1476// ASIMD shift by register, complex, D-form 1477def : InstRW<[A64FXWrite_6Cyc_GI3], 1478 (instregex "^[SU][QR]{1,2}SHL" # 1479 "(v1i8|v1i16|v1i32|v1i64|v8i8|v4i16|v2i32|b|d|h|s)")>; 1480// ASIMD shift by register, complex, Q-form 1481def : InstRW<[A64FXWrite_6Cyc_GI3], 1482 (instregex "^[SU][QR]{1,2}SHL(v16i8|v8i16|v4i32|v2i64)")>; 1483 1484// ASIMD Arithmetic 1485def : InstRW<[A64FXWrite_4Cyc_GI03], 1486 (instregex "(ADD|SUB)(v8i8|v4i16|v2i32|v1i64)")>; 1487def : InstRW<[A64FXWrite_4Cyc_GI03], 1488 (instregex "(ADD|SUB)(v16i8|v8i16|v4i32|v2i64)")>; 1489def : InstRW<[A64FXWrite_SHRN], (instregex "(ADD|SUB)HNv.*")>; 1490def : InstRW<[A64FXWrite_RSHRN], (instregex "(RADD|RSUB)HNv.*")>; 1491def : InstRW<[A64FXWrite_4Cyc_GI03], 1492 (instregex "^SQADD", "^SQNEG", "^SQSUB", "^SRHADD", 1493 "^SUQADD", "^UQADD", "^UQSUB", "^URHADD", "^USQADD")>; 1494def : InstRW<[A64FXWrite_ADDP], 1495 (instregex "ADDP(v16i8|v8i16|v4i32|v2i64)")>; 1496def : InstRW<[A64FXWrite_4Cyc_GI03], 1497 (instregex "((AND|ORN|EOR|EON)S?(Xr[rsi]|v16i8|v8i16|v4i32)|" # 1498 "(ORR|BIC)S?(Xr[rs]|v16i8|v8i16|v4i32))")>; 1499def : InstRW<[A64FXWrite_4Cyc_GI0], 1500 (instregex "(CLS|CLZ|CNT)(v4i32|v8i16|v16i8)")>; 1501def : InstRW<[A64FXWrite_SADALP], (instregex "^SADALP", "^UADALP")>; 1502def : InstRW<[A64FXWrite_SADDLP], (instregex "^SADDLPv", "^UADDLPv")>; 1503def : InstRW<[A64FXWrite_ADDLV1], (instregex "^SADDLV", "^UADDLV")>; 1504def : InstRW<[A64FXWrite_MINMAXV], 1505 (instregex "^ADDVv", "^SMAXVv", "^UMAXVv", "^SMINVv", "^UMINVv")>; 1506def : InstRW<[A64FXWrite_ABA], 1507 (instregex "^SABAv", "^UABAv", "^SABALv", "^UABALv")>; 1508def : InstRW<[A64FXWrite_4Cyc_GI03], 1509 (instregex "^SQADDv", "^SQSUBv", "^UQADDv", "^UQSUBv")>; 1510def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^SUQADDv", "^USQADDv")>; 1511def : InstRW<[A64FXWrite_SHRN], 1512 (instregex "^ADDHNv", "^SUBHNv")>; 1513def : InstRW<[A64FXWrite_RSHRN], 1514 (instregex "^RADDHNv", "^RSUBHNv")>; 1515def : InstRW<[A64FXWrite_4Cyc_GI03], 1516 (instregex "^SQABS", "^SQADD", "^SQNEG", "^SQSUB", 1517 "^SRHADD", "^SUQADD", "^UQADD", "^UQSUB", 1518 "^URHADD", "^USQADD")>; 1519 1520def : InstRW<[A64FXWrite_4Cyc_GI03], 1521 (instregex "^CMEQv", "^CMGEv", "^CMGTv", 1522 "^CMLEv", "^CMLTv", "^CMHIv", "^CMHSv")>; 1523def : InstRW<[A64FXWrite_MINMAXV], 1524 (instregex "^SMAXv", "^SMINv", "^UMAXv", "^UMINv")>; 1525def : InstRW<[A64FXWrite_ADDP], 1526 (instregex "^SMAXPv", "^SMINPv", "^UMAXPv", "^UMINPv")>; 1527def : InstRW<[A64FXWrite_4Cyc_GI03], 1528 (instregex "^SABDv", "^UABDv")>; 1529def : InstRW<[A64FXWrite_TBX1], 1530 (instregex "^SABDLv", "^UABDLv")>; 1531 1532//--- 1533// 3.13 ASIMD Floating-point Instructions 1534//--- 1535 1536def : WriteRes<WriteFMul, [A64FXGI03]> { 1537 let Latency = 9; 1538} 1539 1540// ASIMD FP absolute value 1541def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FABSv")>; 1542 1543// ASIMD FP arith, normal, D-form 1544// ASIMD FP arith, normal, Q-form 1545def : InstRW<[A64FXWrite_9Cyc_GI03], 1546 (instregex "^FABDv", "^FADDv", "^FSUBv")>; 1547 1548// ASIMD FP arith, pairwise, D-form 1549// ASIMD FP arith, pairwise, Q-form 1550def : InstRW<[A64FXWrite_FADDPV], (instregex "^FADDPv")>; 1551 1552// ASIMD FP compare, D-form 1553// ASIMD FP compare, Q-form 1554def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FACGEv", "^FACGTv")>; 1555def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FCMEQv", "^FCMGEv", 1556 "^FCMGTv", "^FCMLEv", 1557 "^FCMLTv")>; 1558// ASIMD FP round, D-form 1559def : InstRW<[A64FXWrite_9Cyc_GI03], 1560 (instregex "^FRINT[AIMNPXZ](v2f32)")>; 1561// ASIMD FP round, Q-form 1562def : InstRW<[A64FXWrite_9Cyc_GI03], 1563 (instregex "^FRINT[AIMNPXZ](v4f32|v2f64)")>; 1564 1565// ASIMD FP convert, long 1566// ASIMD FP convert, narrow 1567// ASIMD FP convert, other, D-form 1568// ASIMD FP convert, other, Q-form 1569 1570// ASIMD FP convert, long and narrow 1571def : InstRW<[A64FXWrite_FCVTXNV], (instregex "^FCVT(L|N|XN)v")>; 1572// ASIMD FP convert, other, D-form 1573def : InstRW<[A64FXWrite_FCVTXNV], 1574 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v2f32|v1i32|v2i32|v1i64)")>; 1575// ASIMD FP convert, other, Q-form 1576def : InstRW<[A64FXWrite_FCVTXNV], 1577 (instregex "^[FVSU]CVT([AMNPZ][SU])?(_Int)?(v4f32|v2f64|v4i32|v2i64)")>; 1578 1579// ASIMD FP divide, D-form, F32 1580def : InstRW<[A64FXXWriteFDivSP], (instrs FDIVv2f32)>; 1581def : InstRW<[A64FXXWriteFDivSP], (instregex "FDIVv2f32")>; 1582 1583// ASIMD FP divide, Q-form, F32 1584def : InstRW<[A64FXXWriteFDiv], (instrs FDIVv4f32)>; 1585def : InstRW<[A64FXXWriteFDiv], (instregex "FDIVv4f32")>; 1586 1587// ASIMD FP divide, Q-form, F64 1588def : InstRW<[A64FXXWriteFDivDP], (instrs FDIVv2f64)>; 1589def : InstRW<[A64FXXWriteFDivDP], (instregex "FDIVv2f64")>; 1590 1591// ASIMD FP max/min, normal, D-form 1592// ASIMD FP max/min, normal, Q-form 1593def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMAXv", "^FMAXNMv", 1594 "^FMINv", "^FMINNMv")>; 1595 1596// ASIMD FP max/min, pairwise, D-form 1597// ASIMD FP max/min, pairwise, Q-form 1598def : InstRW<[A64FXWrite_ADDP], (instregex "^FMAXPv", "^FMAXNMPv", 1599 "^FMINPv", "^FMINNMPv")>; 1600 1601// ASIMD FP max/min, reduce 1602def : InstRW<[A64FXWrite_FMAXVVH], (instregex "^FMAXVv", "^FMAXNMVv", 1603 "^FMINVv", "^FMINNMVv")>; 1604 1605// ASIMD FP multiply, D-form, FZ 1606// ASIMD FP multiply, D-form, no FZ 1607// ASIMD FP multiply, Q-form, FZ 1608// ASIMD FP multiply, Q-form, no FZ 1609def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMULv", "^FMULXv")>; 1610def : InstRW<[A64FXWrite_FMULXE], 1611 (instregex "^FMULX?(v2f32|v1i32|v2i32|v1i64|32|64)")>; 1612def : InstRW<[A64FXWrite_FMULXE], 1613 (instregex "^FMULX?(v4f32|v2f64|v4i32|v2i64)")>; 1614 1615// ASIMD FP multiply accumulate, Dform, FZ 1616// ASIMD FP multiply accumulate, Dform, no FZ 1617// ASIMD FP multiply accumulate, Qform, FZ 1618// ASIMD FP multiply accumulate, Qform, no FZ 1619def : InstRW<[A64FXWrite_9Cyc_GI03], (instregex "^FMLAv", "^FMLSv")>; 1620def : InstRW<[A64FXWrite_FMULXE], 1621 (instregex "^FML[AS](v2f32|v1i32|v2i32|v1i64)")>; 1622def : InstRW<[A64FXWrite_FMULXE], 1623 (instregex "^FML[AS](v4f32|v2f64|v4i32|v2i64)")>; 1624 1625// ASIMD FP negate 1626def : InstRW<[A64FXWrite_4Cyc_GI03], (instregex "^FNEGv")>; 1627 1628//-- 1629// 3.14 ASIMD Miscellaneous Instructions 1630//-- 1631 1632// ASIMD bit reverse 1633def : InstRW<[A64FXWrite_1Cyc_GI2456], (instregex "^RBITv")>; 1634 1635// ASIMD bitwise insert, D-form 1636// ASIMD bitwise insert, Q-form 1637def : InstRW<[A64FXWrite_BIF], 1638 (instregex "^BIFv", "^BITv", "^BSLv")>; 1639 1640// ASIMD count, D-form 1641// ASIMD count, Q-form 1642def : InstRW<[A64FXWrite_4Cyc_GI0], 1643 (instregex "^CLSv", "^CLZv", "^CNTv")>; 1644 1645// ASIMD duplicate, gen reg 1646// ASIMD duplicate, element 1647def : InstRW<[A64FXWrite_DUPGENERAL], (instregex "^DUPv")>; 1648def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUP(i8|i16|i32|i64)$")>; 1649def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^DUPv.+gpr")>; 1650 1651// ASIMD extract 1652def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^EXTv")>; 1653 1654// ASIMD extract narrow 1655def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^XTNv")>; 1656 1657// ASIMD extract narrow, saturating 1658def : InstRW<[A64FXWrite_6Cyc_GI3], 1659 (instregex "^SQXTNv", "^SQXTUNv", "^UQXTNv")>; 1660 1661// ASIMD insert, element to element 1662def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1663 1664// ASIMD transfer, element to gen reg 1665def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1666 1667// ASIMD move, integer immed 1668def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^MOVIv")>; 1669 1670// ASIMD move, FP immed 1671def : InstRW<[A64FXWrite_4Cyc_GI0], (instregex "^FMOVv")>; 1672 1673// ASIMD table lookup, D-form 1674def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv8i8One")>; 1675def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv8i8Two")>; 1676def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv8i8Three")>; 1677def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv8i8Four")>; 1678def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv8i8One")>; 1679def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv8i8Two")>; 1680def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv8i8Three")>; 1681def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv8i8Four")>; 1682 1683// ASIMD table lookup, Q-form 1684def : InstRW<[A64FXWrite_6Cyc_GI3], (instregex "^TBLv16i8One")>; 1685def : InstRW<[A64FXWrite_TBX1], (instregex "^TBLv16i8Two")>; 1686def : InstRW<[A64FXWrite_TBX2], (instregex "^TBLv16i8Three")>; 1687def : InstRW<[A64FXWrite_TBX3], (instregex "^TBLv16i8Four")>; 1688def : InstRW<[A64FXWrite_TBX1], (instregex "^TBXv16i8One")>; 1689def : InstRW<[A64FXWrite_TBX2], (instregex "^TBXv16i8Two")>; 1690def : InstRW<[A64FXWrite_TBX3], (instregex "^TBXv16i8Three")>; 1691def : InstRW<[A64FXWrite_TBX4], (instregex "^TBXv16i8Four")>; 1692 1693// ASIMD unzip/zip 1694def : InstRW<[A64FXWrite_6Cyc_GI0], 1695 (instregex "^UZP1", "^UZP2", "^ZIP1", "^ZIP2")>; 1696 1697// ASIMD reciprocal estimate, D-form 1698// ASIMD reciprocal estimate, Q-form 1699def : InstRW<[A64FXWrite_4Cyc_GI03], 1700 (instregex "^FRECPEv", "^FRECPXv", "^URECPEv", 1701 "^FRSQRTEv", "^URSQRTEv")>; 1702 1703// ASIMD reciprocal step, D-form, FZ 1704// ASIMD reciprocal step, D-form, no FZ 1705// ASIMD reciprocal step, Q-form, FZ 1706// ASIMD reciprocal step, Q-form, no FZ 1707def : InstRW<[A64FXWrite_9Cyc_GI0], (instregex "^FRECPSv", "^FRSQRTSv")>; 1708 1709// ASIMD reverse 1710def : InstRW<[A64FXWrite_4Cyc_GI03], 1711 (instregex "^REV16v", "^REV32v", "^REV64v")>; 1712 1713// ASIMD table lookup, D-form 1714// ASIMD table lookup, Q-form 1715def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TBLv", "^TBXv")>; 1716 1717// ASIMD transfer, element to word or word 1718def : InstRW<[A64FXWrite_SMOV], (instregex "^[SU]MOVv")>; 1719 1720// ASIMD transfer, element to gen reg 1721def : InstRW<[A64FXWrite_SMOV], (instregex "(S|U)MOVv.*")>; 1722 1723// ASIMD transfer gen reg to element 1724def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^INSv")>; 1725 1726// ASIMD transpose 1727def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^TRN1v", "^TRN2v", 1728 "^UZP1v", "^UZP2v")>; 1729 1730// ASIMD unzip/zip 1731def : InstRW<[A64FXWrite_6Cyc_GI0], (instregex "^ZIP1v", "^ZIP2v")>; 1732 1733//-- 1734// 3.15 ASIMD Load Instructions 1735//-- 1736 1737// ASIMD load, 1 element, multiple, 1 reg, D-form 1738// ASIMD load, 1 element, multiple, 1 reg, Q-form 1739def : InstRW<[A64FXWrite_8Cyc_GI56], 1740 (instregex "^LD1Onev(8b|4h|2s|1d|2d)$")>; 1741def : InstRW<[A64FXWrite_11Cyc_GI56], 1742 (instregex "^LD1Onev(16b|8h|4s)$")>; 1743def : InstRW<[A64FXWrite_LD108, WriteAdr], 1744 (instregex "^LD1Onev(8b|4h|2s|1d|2d)_POST$")>; 1745def : InstRW<[A64FXWrite_LD109, WriteAdr], 1746 (instregex "^LD1Onev(16b|8h|4s)_POST$")>; 1747 1748// ASIMD load, 1 element, multiple, 2 reg, D-form 1749// ASIMD load, 1 element, multiple, 2 reg, Q-form 1750def : InstRW<[A64FXWrite_LD102], 1751 (instregex "^LD1Twov(8b|4h|2s|1d|2d)$")>; 1752def : InstRW<[A64FXWrite_LD103], 1753 (instregex "^LD1Twov(16b|8h|4s)$")>; 1754def : InstRW<[A64FXWrite_LD110, WriteAdr], 1755 (instregex "^LD1Twov(8b|4h|2s|1d|2d)_POST$")>; 1756def : InstRW<[A64FXWrite_LD111, WriteAdr], 1757 (instregex "^LD1Twov(16b|8h|4s)_POST$")>; 1758 1759// ASIMD load, 1 element, multiple, 3 reg, D-form 1760// ASIMD load, 1 element, multiple, 3 reg, Q-form 1761def : InstRW<[A64FXWrite_LD104], 1762 (instregex "^LD1Threev(8b|4h|2s|1d|2d)$")>; 1763def : InstRW<[A64FXWrite_LD105], 1764 (instregex "^LD1Threev(16b|8h|4s)$")>; 1765def : InstRW<[A64FXWrite_LD112, WriteAdr], 1766 (instregex "^LD1Threev(8b|4h|2s|1d|2d)_POST$")>; 1767def : InstRW<[A64FXWrite_LD113, WriteAdr], 1768 (instregex "^LD1Threev(16b|8h|4s)_POST$")>; 1769 1770// ASIMD load, 1 element, multiple, 4 reg, D-form 1771// ASIMD load, 1 element, multiple, 4 reg, Q-form 1772def : InstRW<[A64FXWrite_LD106], 1773 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)$")>; 1774def : InstRW<[A64FXWrite_LD107], 1775 (instregex "^LD1Fourv(16b|8h|4s)$")>; 1776def : InstRW<[A64FXWrite_LD114, WriteAdr], 1777 (instregex "^LD1Fourv(8b|4h|2s|1d|2d)_POST$")>; 1778def : InstRW<[A64FXWrite_LD115, WriteAdr], 1779 (instregex "^LD1Fourv(16b|8h|4s)_POST$")>; 1780 1781// ASIMD load, 1 element, one lane, B/H/S 1782// ASIMD load, 1 element, one lane, D 1783def : InstRW<[A64FXWrite_LD1I0], (instregex "^LD1i(8|16|32|64)$")>; 1784def : InstRW<[A64FXWrite_LD1I1, WriteAdr], 1785 (instregex "^LD1i(8|16|32|64)_POST$")>; 1786 1787// ASIMD load, 1 element, all lanes, D-form, B/H/S 1788// ASIMD load, 1 element, all lanes, D-form, D 1789// ASIMD load, 1 element, all lanes, Q-form 1790def : InstRW<[A64FXWrite_8Cyc_GI03], 1791 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1792def : InstRW<[A64FXWrite_LD108, WriteAdr], 1793 (instregex "^LD1Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1794 1795// ASIMD load, 2 element, multiple, D-form, B/H/S 1796// ASIMD load, 2 element, multiple, Q-form, D 1797def : InstRW<[A64FXWrite_LD103], 1798 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1799def : InstRW<[A64FXWrite_LD111, WriteAdr], 1800 (instregex "^LD2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1801 1802// ASIMD load, 2 element, one lane, B/H 1803// ASIMD load, 2 element, one lane, S 1804// ASIMD load, 2 element, one lane, D 1805def : InstRW<[A64FXWrite_LD2I0], (instregex "^LD2i(8|16|32|64)$")>; 1806def : InstRW<[A64FXWrite_LD2I1, WriteAdr], 1807 (instregex "^LD2i(8|16|32|64)_POST$")>; 1808 1809// ASIMD load, 2 element, all lanes, D-form, B/H/S 1810// ASIMD load, 2 element, all lanes, D-form, D 1811// ASIMD load, 2 element, all lanes, Q-form 1812def : InstRW<[A64FXWrite_LD102], 1813 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1814def : InstRW<[A64FXWrite_LD110, WriteAdr], 1815 (instregex "^LD2Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1816 1817// ASIMD load, 3 element, multiple, D-form, B/H/S 1818// ASIMD load, 3 element, multiple, Q-form, B/H/S 1819// ASIMD load, 3 element, multiple, Q-form, D 1820def : InstRW<[A64FXWrite_LD105], 1821 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1822def : InstRW<[A64FXWrite_LD113, WriteAdr], 1823 (instregex "^LD3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1824 1825// ASIMD load, 3 element, one lone, B/H 1826// ASIMD load, 3 element, one lane, S 1827// ASIMD load, 3 element, one lane, D 1828def : InstRW<[A64FXWrite_LD3I0], (instregex "^LD3i(8|16|32|64)$")>; 1829def : InstRW<[A64FXWrite_LD3I1, WriteAdr], 1830 (instregex "^LD3i(8|16|32|64)_POST$")>; 1831 1832// ASIMD load, 3 element, all lanes, D-form, B/H/S 1833// ASIMD load, 3 element, all lanes, D-form, D 1834// ASIMD load, 3 element, all lanes, Q-form, B/H/S 1835// ASIMD load, 3 element, all lanes, Q-form, D 1836def : InstRW<[A64FXWrite_LD104], 1837 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1838def : InstRW<[A64FXWrite_LD112, WriteAdr], 1839 (instregex "^LD3Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1840 1841// ASIMD load, 4 element, multiple, D-form, B/H/S 1842// ASIMD load, 4 element, multiple, Q-form, B/H/S 1843// ASIMD load, 4 element, multiple, Q-form, D 1844def : InstRW<[A64FXWrite_LD107], 1845 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1846def : InstRW<[A64FXWrite_LD115, WriteAdr], 1847 (instregex "^LD4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1848 1849// ASIMD load, 4 element, one lane, B/H 1850// ASIMD load, 4 element, one lane, S 1851// ASIMD load, 4 element, one lane, D 1852def : InstRW<[A64FXWrite_LD4I0], (instregex "^LD4i(8|16|32|64)$")>; 1853def : InstRW<[A64FXWrite_LD4I1, WriteAdr], 1854 (instregex "^LD4i(8|16|32|64)_POST$")>; 1855 1856// ASIMD load, 4 element, all lanes, D-form, B/H/S 1857// ASIMD load, 4 element, all lanes, D-form, D 1858// ASIMD load, 4 element, all lanes, Q-form, B/H/S 1859// ASIMD load, 4 element, all lanes, Q-form, D 1860def : InstRW<[A64FXWrite_LD106], 1861 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1862def : InstRW<[A64FXWrite_LD114, WriteAdr], 1863 (instregex "^LD4Rv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1864 1865//-- 1866// 3.16 ASIMD Store Instructions 1867//-- 1868 1869// ASIMD store, 1 element, multiple, 1 reg, D-form 1870// ASIMD store, 1 element, multiple, 1 reg, Q-form 1871def : InstRW<[A64FXWrite_ST10], 1872 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1873def : InstRW<[A64FXWrite_ST14, WriteAdr], 1874 (instregex "^ST1Onev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1875 1876// ASIMD store, 1 element, multiple, 2 reg, D-form 1877// ASIMD store, 1 element, multiple, 2 reg, Q-form 1878def : InstRW<[A64FXWrite_ST11], 1879 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1880def : InstRW<[A64FXWrite_ST15, WriteAdr], 1881 (instregex "^ST1Twov(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1882 1883// ASIMD store, 1 element, multiple, 3 reg, D-form 1884// ASIMD store, 1 element, multiple, 3 reg, Q-form 1885def : InstRW<[A64FXWrite_ST12], 1886 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1887def : InstRW<[A64FXWrite_ST16, WriteAdr], 1888 (instregex "^ST1Threev(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1889 1890// ASIMD store, 1 element, multiple, 4 reg, D-form 1891// ASIMD store, 1 element, multiple, 4 reg, Q-form 1892def : InstRW<[A64FXWrite_ST13], 1893 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)$")>; 1894def : InstRW<[A64FXWrite_ST17, WriteAdr], 1895 (instregex "^ST1Fourv(8b|4h|2s|1d|16b|8h|4s|2d)_POST$")>; 1896 1897// ASIMD store, 1 element, one lane, B/H/S 1898// ASIMD store, 1 element, one lane, D 1899def : InstRW<[A64FXWrite_ST10], 1900 (instregex "^ST1i(8|16|32|64)$")>; 1901def : InstRW<[A64FXWrite_ST14, WriteAdr], 1902 (instregex "^ST1i(8|16|32|64)_POST$")>; 1903 1904// ASIMD store, 2 element, multiple, D-form, B/H/S 1905// ASIMD store, 2 element, multiple, Q-form, B/H/S 1906// ASIMD store, 2 element, multiple, Q-form, D 1907def : InstRW<[A64FXWrite_ST11], 1908 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)$")>; 1909def : InstRW<[A64FXWrite_ST15, WriteAdr], 1910 (instregex "^ST2Twov(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1911 1912// ASIMD store, 2 element, one lane, B/H/S 1913// ASIMD store, 2 element, one lane, D 1914def : InstRW<[A64FXWrite_ST11], 1915 (instregex "^ST2i(8|16|32|64)$")>; 1916def : InstRW<[A64FXWrite_ST15, WriteAdr], 1917 (instregex "^ST2i(8|16|32|64)_POST$")>; 1918 1919// ASIMD store, 3 element, multiple, D-form, B/H/S 1920// ASIMD store, 3 element, multiple, Q-form, B/H/S 1921// ASIMD store, 3 element, multiple, Q-form, D 1922def : InstRW<[A64FXWrite_ST12], 1923 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)$")>; 1924def : InstRW<[A64FXWrite_ST16, WriteAdr], 1925 (instregex "^ST3Threev(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1926 1927// ASIMD store, 3 element, one lane, B/H 1928// ASIMD store, 3 element, one lane, S 1929// ASIMD store, 3 element, one lane, D 1930def : InstRW<[A64FXWrite_ST12], (instregex "^ST3i(8|16|32|64)$")>; 1931def : InstRW<[A64FXWrite_ST16, WriteAdr], 1932 (instregex "^ST3i(8|16|32|64)_POST$")>; 1933 1934// ASIMD store, 4 element, multiple, D-form, B/H/S 1935// ASIMD store, 4 element, multiple, Q-form, B/H/S 1936// ASIMD store, 4 element, multiple, Q-form, D 1937def : InstRW<[A64FXWrite_ST13], 1938 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)$")>; 1939def : InstRW<[A64FXWrite_ST17, WriteAdr], 1940 (instregex "^ST4Fourv(8b|4h|2s|16b|8h|4s|2d)_POST$")>; 1941 1942// ASIMD store, 4 element, one lane, B/H 1943// ASIMD store, 4 element, one lane, S 1944// ASIMD store, 4 element, one lane, D 1945def : InstRW<[A64FXWrite_ST13], (instregex "^ST4i(8|16|32|64)$")>; 1946def : InstRW<[A64FXWrite_ST17, WriteAdr], 1947 (instregex "^ST4i(8|16|32|64)_POST$")>; 1948 1949// V8.1a Atomics (LSE) 1950def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1951 (instrs CASB, CASH, CASW, CASX)>; 1952 1953def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1954 (instrs CASAB, CASAH, CASAW, CASAX)>; 1955 1956def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1957 (instrs CASLB, CASLH, CASLW, CASLX)>; 1958 1959def : InstRW<[A64FXWrite_CAS, WriteAtomic], 1960 (instrs CASALB, CASALH, CASALW, CASALX)>; 1961 1962def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1963 (instrs LDLARB, LDLARH, LDLARW, LDLARX)>; 1964 1965def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1966 (instrs LDADDB, LDADDH, LDADDW, LDADDX)>; 1967 1968def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1969 (instrs LDADDAB, LDADDAH, LDADDAW, LDADDAX)>; 1970 1971def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1972 (instrs LDADDLB, LDADDLH, LDADDLW, LDADDLX)>; 1973 1974def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1975 (instrs LDADDALB, LDADDALH, LDADDALW, LDADDALX)>; 1976 1977def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1978 (instrs LDCLRB, LDCLRH, LDCLRW, LDCLRX)>; 1979 1980def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1981 (instrs LDCLRAB, LDCLRAH, LDCLRAW, LDCLRAX)>; 1982 1983def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1984 (instrs LDCLRLB, LDCLRLH, LDCLRLW, LDCLRLX)>; 1985 1986def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1987 (instrs LDCLRALB, LDCLRALH, LDCLRALW, LDCLRALX)>; 1988 1989def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1990 (instrs LDEORB, LDEORH, LDEORW, LDEORX)>; 1991 1992def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1993 (instrs LDEORAB, LDEORAH, LDEORAW, LDEORAX)>; 1994 1995def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1996 (instrs LDEORLB, LDEORLH, LDEORLW, LDEORLX)>; 1997 1998def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 1999 (instrs LDEORALB, LDEORALH, LDEORALW, LDEORALX)>; 2000 2001def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2002 (instrs LDSETB, LDSETH, LDSETW, LDSETX)>; 2003 2004def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2005 (instrs LDSETAB, LDSETAH, LDSETAW, LDSETAX)>; 2006 2007def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2008 (instrs LDSETLB, LDSETLH, LDSETLW, LDSETLX)>; 2009 2010def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2011 (instrs LDSETALB, LDSETALH, LDSETALW, LDSETALX)>; 2012 2013def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2014 (instrs LDSMAXB, LDSMAXH, LDSMAXW, LDSMAXX, 2015 LDSMAXAB, LDSMAXAH, LDSMAXAW, LDSMAXAX, 2016 LDSMAXLB, LDSMAXLH, LDSMAXLW, LDSMAXLX, 2017 LDSMAXALB, LDSMAXALH, LDSMAXALW, LDSMAXALX)>; 2018 2019def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2020 (instrs LDSMINB, LDSMINH, LDSMINW, LDSMINX, 2021 LDSMINAB, LDSMINAH, LDSMINAW, LDSMINAX, 2022 LDSMINLB, LDSMINLH, LDSMINLW, LDSMINLX, 2023 LDSMINALB, LDSMINALH, LDSMINALW, LDSMINALX)>; 2024 2025def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2026 (instrs LDUMAXB, LDUMAXH, LDUMAXW, LDUMAXX, 2027 LDUMAXAB, LDUMAXAH, LDUMAXAW, LDUMAXAX, 2028 LDUMAXLB, LDUMAXLH, LDUMAXLW, LDUMAXLX, 2029 LDUMAXALB, LDUMAXALH, LDUMAXALW, LDUMAXALX)>; 2030 2031def : InstRW<[A64FXWrite_5Cyc_GI5, WriteAtomic], 2032 (instrs LDUMINB, LDUMINH, LDUMINW, LDUMINX, 2033 LDUMINAB, LDUMINAH, LDUMINAW, LDUMINAX, 2034 LDUMINLB, LDUMINLH, LDUMINLW, LDUMINLX, 2035 LDUMINALB, LDUMINALH, LDUMINALW, LDUMINALX)>; 2036 2037def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2038 (instrs SWPB, SWPH, SWPW, SWPX)>; 2039 2040def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2041 (instrs SWPAB, SWPAH, SWPAW, SWPAX)>; 2042 2043def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2044 (instrs SWPLB, SWPLH, SWPLW, SWPLX)>; 2045 2046def : InstRW<[A64FXWrite_SWP, WriteAtomic], 2047 (instrs SWPALB, SWPALH, SWPALW, SWPALX)>; 2048 2049def : InstRW<[A64FXWrite_STUR, WriteAtomic], 2050 (instrs STLLRB, STLLRH, STLLRW, STLLRX)>; 2051 2052// SVE instructions 2053 2054// The modeling method for SVE instructions is more accurate than others. 2055// TODO: modify the model of other instructions similarly. 2056 2057def : InstRW<[A64FXWrite_4Cyc_GI0], 2058 (instregex "^AND_ZI", "^CL[SZ]_Z", "^CPY_ZP[mz]I", "^DUP_ZZ?I", "^DUPM_Z", 2059 "^EOR_ZI", "^ORR_ZI", "^FCM(EQ|GT|GE|LT|LE|NE|UO)_P", 2060 "^FCPY_Z", "^F(MAX|MIN).*I_", "^NEG_Z", "^[SU](MAX|MIN)_ZI", 2061 "^SUBR?_ZI")>; 2062 2063def : InstRW<[A64FXWrite_6Cyc_GI0], 2064 (instregex "^CLAST[AB]_[VZ]", "^COMPACT_Z", "^CPY_ZPmV", "^DUP_ZR", 2065 "^EXT_Z", "^FDUP_Z", "^INSR_ZV", "^LAST[AB]_V", "^REV_Z", 2066 "^SPLICE_Z", "^[SU]UNPK(HI|LO)_Z", "^TBL_Z", "^TRN[12]_Z")>; 2067 2068def : InstRW<[A64FXWrite_9Cyc_GI0], 2069 (instregex "^F(ADD|SUBR?)_.*I_", "^FRECPS_Z", "^FRSQRTS_Z", 2070 "^INDEX_II_[SD]", "^MUL_ZI")>; 2071 2072def : InstRW<[A64FXWrite_4Cyc_GI3], 2073 (instregex "^CNT_Z")>; 2074 2075def : InstRW<[A64FXWrite_4Cyc_GI03], 2076 (instregex "^ABS_Z", "^ADD_Z", "^AND_Z[^I]", "^ASRR?_(WIDE_)?Z", 2077 "^BIC_Z", "^ADR_[SU]XTW_Z", "^CNOT_Z", "^DEC[BHWD]_Z", 2078 "^EOR_Z[^I]", "^INC[BHWD]_Z", "^ORR_Z[^I]", "^FABS_Z", 2079 "^FACG[ET]_P", "^FEXPA_Z", "^F(MAX|MIN)[^V]*Z_", 2080 "^FNEG_Z", "^FRECP[EX]_Z", "^FRSQRTE_Z", "^FTSSEL_Z", 2081 "^LS[LR]R?(_WIDE)?_Z", "^NOT_Z", "^RBIT_Z", "^REV[BHW]_Z", "^SABD_Z", 2082 "^SEL_Z", "^[SU](MAX|MIN)_ZP", "^[SU]Q(INC|DEC)[^P]_Z", 2083 "^SUBR?_Z[^I]", "^[SU]XT._Z", "^UABD_Z")>; 2084 2085def : InstRW<[A64FXWrite_9Cyc_GI03 ], 2086 (instregex "^FABD_Z", "^F(ADD|SUBR?)_.*Z_", "^FN?(MAD|MLA|MLS|MSB)_ZP", 2087 "^FMUL_(ZP|ZZZ_)", "^FMULX_Z", "^FCVT(ZS|ZU)?_Z", 2088 "^FRINT._Z", "^FSCALE_Z", "^FTMAD_Z", "^FTSMUL_Z", 2089 "^MAD_Z", "^MLA_Z", "^MLS_Z", "^MSB_Z", "^MUL_ZP", 2090 "^[SU]CVTF_Z", "^[SU]DOT_ZZZ_", "^[SU]MULH_Z")>; 2091 2092def : InstRW<[A64FXWrite_3Cyc_GI1], 2093 (instregex "^ANDS?_P", "^BICS?_P", "^BRK.*_P", "^EORS?_P", "^ORRS?_P", 2094 "^NANDS?_P", "^NORS?_P", "^ORNS?_P", "^PFALSE", "^PNEXT", 2095 "^PFIRST", "^PTEST", "^PTRUES?", "^PUNPK(HI|LO)", 2096 "^RDFFRS?", "^REV_P", "^SEL_P", "^TRN[12]_P")>; 2097 2098def : InstRW<[A64FXWrite_1Cyc_GI24], 2099 (instregex "^ADD[PV]L", "^CNT[BHWD]_X", "^DEC[BHWD]_X", "^INC[BHWD]_X", 2100 "^RDVLI")>; 2101 2102def : InstRW<[A64FXWrite_11Cyc_GI5], 2103 (instregex "^LDR_[PZ]XI")>; 2104 2105def : InstRW<[A64FXWrite_11Cyc_GI56], 2106 (instregex "^LD(NF|FF|NT)?1R?S?[BHSWDQ]")>; 2107 2108def A64FXWrite_None : SchedWriteRes<[]> { 2109} 2110def : InstRW<[A64FXWrite_None], (instregex "^SETFFR", "^MOVPRFX")>; 2111 2112def A64FXWrite_FMAIndexed : SchedWriteRes<[A64FXGI03]> { 2113 let Latency = 15; 2114 let NumMicroOps = 2; 2115 let ReleaseAtCycles = [2]; 2116} 2117def : InstRW<[A64FXWrite_FMAIndexed], (instregex "^F(MLA|MLS|MUL)_ZZZI")>; 2118 2119def A64FXWrite_ADR_LSL_Z : SchedWriteRes<[A64FXGI0]> { 2120 let Latency = 5; 2121 let NumMicroOps = 2; 2122 let ReleaseAtCycles = [2]; 2123} 2124def : InstRW<[A64FXWrite_ADR_LSL_Z], (instregex "^ADR_LSL_Z")>; 2125 2126def A64FXWrite_ASRD : SchedWriteRes<[A64FXGI0, A64FXGI01]> { 2127 let Latency = 8; 2128 let NumMicroOps = 2; 2129} 2130def : InstRW<[A64FXWrite_ASRD], (instregex "^ASRD_Z")>; 2131 2132def A64FXWrite_Reduction4CycB : SchedWriteRes<[A64FXGI03]> { 2133 let Latency = 46; 2134 let NumMicroOps = 10; 2135 let ReleaseAtCycles = [10]; 2136} 2137def : InstRW<[A64FXWrite_Reduction4CycB], 2138 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_B")>; 2139 2140def A64FXWrite_Reduction4CycH : SchedWriteRes<[A64FXGI03]> { 2141 let Latency = 42; 2142 let NumMicroOps = 9; 2143 let ReleaseAtCycles = [9]; 2144} 2145def : InstRW<[A64FXWrite_Reduction4CycH], 2146 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_H")>; 2147 2148def A64FXWrite_Reduction4CycS : SchedWriteRes<[A64FXGI03]> { 2149 let Latency = 38; 2150 let NumMicroOps = 8; 2151 let ReleaseAtCycles = [8]; 2152} 2153def : InstRW<[A64FXWrite_Reduction4CycS], 2154 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_S")>; 2155 2156def A64FXWrite_Reduction4CycD : SchedWriteRes<[A64FXGI03]> { 2157 let Latency = 34; 2158 let NumMicroOps = 7; 2159 let ReleaseAtCycles = [7]; 2160} 2161def : InstRW<[A64FXWrite_Reduction4CycD], 2162 (instregex "^(AND|EOR|OR|SADD|SMAX|SMIN|UADD|UMAX|UMIN)V_VPZ_D")>; 2163 2164def A64FXWrite_CLAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2165 let Latency = 29; 2166} 2167def : InstRW<[A64FXWrite_CLAST_R], (instregex "^CLAST[AB]_R")>; 2168 2169def A64FXWrite_CMP : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2170 let Latency = 4; 2171} 2172def : InstRW<[A64FXWrite_CMP], (instregex "^CMP.*_P")>; 2173 2174def A64FXWrite_CNTP : SchedWriteRes<[A64FXGI1, A64FXGI2]> { 2175 let Latency = 6; 2176} 2177def : InstRW<[A64FXWrite_CNTP], (instregex "^CNTP_X")>; 2178 2179def A64FXWrite_CPYScalar : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2180 let Latency = 8; 2181} 2182def : InstRW<[A64FXWrite_CPYScalar], (instregex "^CPY_ZPmR")>; 2183 2184def A64FXWrite_CTERM : SchedWriteRes<[A64FXGI24]> { 2185 let Latency = 2; 2186 let ReleaseAtCycles = [2]; 2187} 2188def : InstRW<[A64FXWrite_CTERM], (instregex "^CTERM")>; 2189 2190def A64FXWrite_INCPScalar : SchedWriteRes<[A64FXGI1, A64FXGI2, A64FXGI4]> { 2191 let Latency = 7; 2192 let NumMicroOps = 2; 2193} 2194def : InstRW<[A64FXWrite_INCPScalar], (instregex "^DECP_X", "^INCP_X")>; 2195 2196def A64FXWrite_INCPVector : SchedWriteRes<[A64FXGI0, A64FXGI1]> { 2197 let Latency = 12; 2198} 2199def : InstRW<[A64FXWrite_INCPVector], (instregex "^DECP_Z", "^INCP_Z")>; 2200 2201def A64FXWrite_FADDVH : SchedWriteRes<[A64FXGI03]> { 2202 let Latency = 75; 2203 let NumMicroOps = 11; 2204 let ReleaseAtCycles = [11]; 2205} 2206def : InstRW<[A64FXWrite_FADDVH], (instrs FADDV_VPZ_H)>; 2207 2208def A64FXWrite_FADDVS : SchedWriteRes<[A64FXGI03]> { 2209 let Latency = 60; 2210 let NumMicroOps = 9; 2211 let ReleaseAtCycles = [9]; 2212} 2213def : InstRW<[A64FXWrite_FADDVS], (instrs FADDV_VPZ_S)>; 2214 2215def A64FXWrite_FADDVD : SchedWriteRes<[A64FXGI03]> { 2216 let Latency = 45; 2217 let NumMicroOps = 7; 2218 let ReleaseAtCycles = [7]; 2219} 2220def : InstRW<[A64FXWrite_FADDVD], (instrs FADDV_VPZ_D)>; 2221 2222def A64FXWrite_FADDAH : SchedWriteRes<[A64FXGI03]> { 2223 let Latency = 468; 2224 let NumMicroOps = 63; 2225 let ReleaseAtCycles = [63]; 2226} 2227def : InstRW<[A64FXWrite_FADDAH], (instrs FADDA_VPZ_H)>; 2228 2229def A64FXWrite_FADDAS : SchedWriteRes<[A64FXGI03]> { 2230 let Latency = 228; 2231 let NumMicroOps = 31; 2232 let ReleaseAtCycles = [31]; 2233} 2234def : InstRW<[A64FXWrite_FADDAS], (instrs FADDA_VPZ_S)>; 2235 2236def A64FXWrite_FADDAD : SchedWriteRes<[A64FXGI03]> { 2237 let Latency = 108; 2238 let NumMicroOps = 15; 2239 let ReleaseAtCycles = [15]; 2240} 2241def : InstRW<[A64FXWrite_FADDAD], (instrs FADDA_VPZ_D)>; 2242 2243def A64FXWrite_FCADDZ : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2244 let Latency = 15; 2245 let NumMicroOps = 2; 2246} 2247def : InstRW<[A64FXWrite_FCADDZ], (instregex "^FCADD_Z")>; 2248 2249def A64FXWrite_FCMLAZ : SchedWriteRes<[A64FXGI03]> { 2250 let Latency = 15; 2251 let NumMicroOps = 3; 2252 let ReleaseAtCycles = [3]; 2253} 2254def : InstRW<[A64FXWrite_FCMLAZ], (instregex "^FCMLA_Z")>; 2255 2256def A64FXWrite_FDIVH : SchedWriteRes<[A64FXGI0]> { 2257 let Latency = 134; 2258 let ReleaseAtCycles = [134]; 2259} 2260def : InstRW<[A64FXWrite_FDIVH], (instregex "^F(DIVR?|SQRT)_Z.*_H")>; 2261 2262def A64FXWrite_FDIVS : SchedWriteRes<[A64FXGI0]> { 2263 let Latency = 98; 2264 let ReleaseAtCycles = [98]; 2265} 2266def : InstRW<[A64FXWrite_FDIVS], (instregex "^F(DIVR?|SQRT)_Z.*_S")>; 2267 2268def A64FXWrite_FDIVD : SchedWriteRes<[A64FXGI0]> { 2269 let Latency = 154; 2270 let ReleaseAtCycles = [154]; 2271} 2272def : InstRW<[A64FXWrite_FDIVD], (instregex "^F(DIVR?|SQRT)_Z.*_D")>; 2273 2274def A64FXWrite_FMAXVH : SchedWriteRes<[A64FXGI03]> { 2275 let Latency = 54; 2276 let NumMicroOps = 11; 2277 let ReleaseAtCycles = [11]; 2278} 2279def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_H")>; 2280 2281def A64FXWrite_FMAXVS : SchedWriteRes<[A64FXGI03]> { 2282 let Latency = 44; 2283 let NumMicroOps = 9; 2284 let ReleaseAtCycles = [9]; 2285} 2286def : InstRW<[A64FXWrite_FMAXVS], (instregex "^F(MAX|MIN)(NM)?V_VPZ_S")>; 2287 2288def A64FXWrite_FMAXVD : SchedWriteRes<[A64FXGI03]> { 2289 let Latency = 34; 2290 let NumMicroOps = 7; 2291 let ReleaseAtCycles = [7]; 2292} 2293def : InstRW<[A64FXWrite_FMAXVH], (instregex "^F(MAX|MIN)(NM)?V_VPZ_D")>; 2294 2295def A64FXWrite_INDEX_RI_BH : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2296 let Latency = 17; 2297 let NumMicroOps = 2; 2298 let ReleaseAtCycles = [2, 2]; 2299} 2300def : InstRW<[A64FXWrite_INDEX_RI_BH], (instregex "^INDEX_(RI|IR)_[BH]")>; 2301 2302def A64FXWrite_INDEX_RI_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2303 let Latency = 13; 2304 let NumMicroOps = 1; 2305} 2306def : InstRW<[A64FXWrite_INDEX_RI_SD], (instregex "^INDEX_(RI|IR)_[SD]")>; 2307 2308def A64FXWrite_INDEX_II_BH : SchedWriteRes<[A64FXGI0]> { 2309 let Latency = 13; 2310 let NumMicroOps = 2; 2311 let ReleaseAtCycles = [2]; 2312} 2313def : InstRW<[A64FXWrite_INDEX_II_BH], (instregex "^INDEX_II_[BH]")>; 2314 2315def A64FXWrite_INDEX_RR_BH : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI3]> { 2316 let Latency = 17; 2317 let NumMicroOps = 3; 2318 let ReleaseAtCycles = [2, 2, 1]; 2319} 2320def : InstRW<[A64FXWrite_INDEX_RR_BH], (instregex "^INDEX_RR_[BH]")>; 2321 2322def A64FXWrite_INDEX_RR_SD : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2323 let Latency = 17; 2324 let NumMicroOps = 2; 2325 let ReleaseAtCycles = [2, 1]; 2326} 2327def : InstRW<[A64FXWrite_INDEX_RR_SD], (instregex "^INDEX_RR_[SD]")>; 2328 2329def A64FXWrite_INSR_ZR : SchedWriteRes<[A64FXGI0, A64FXGI2]> { 2330 let Latency = 10; 2331} 2332def : InstRW<[A64FXWrite_INSR_ZR], (instregex "^INSR_ZR")>; 2333 2334def A64FXWrite_LAST_R : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2335 let Latency = 25; 2336} 2337def : InstRW<[A64FXWrite_CLAST_R], (instregex "^LAST[AB]_R")>; 2338 2339def A64FXWrite_GLD_S_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2340 let Latency = 19; 2341 let ReleaseAtCycles = [2, 4, 4]; 2342} 2343def : InstRW<[A64FXWrite_GLD_S_ZI], 2344 (instregex "^GLD(FF)?1W_IMM", "^GLD(FF)?1S?[BHW]_S_IMM")>; 2345 2346def A64FXWrite_GLD_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2347 let Latency = 16; 2348 let ReleaseAtCycles = [1, 2, 2]; 2349} 2350def : InstRW<[A64FXWrite_GLD_D_ZI], 2351 (instregex "^GLD(FF)?1D_IMM", "^GLD(FF)?1S?[BHW]_D_IMM")>; 2352 2353def A64FXWrite_GLD_S_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2354 let Latency = 23; 2355 let ReleaseAtCycles = [2, 1, 4, 4]; 2356} 2357def : InstRW<[A64FXWrite_GLD_S_RZ], 2358 (instregex "^GLD(FF)?1W_[^DI]", "^GLD(FF)?1S?[BHW]_S_[^I]")>; 2359 2360def A64FXWrite_GLD_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2361 let Latency = 20; 2362 let ReleaseAtCycles = [1, 1, 2, 2]; 2363} 2364def : InstRW<[A64FXWrite_GLD_D_RZ], 2365 (instregex "^GLD(FF)?1D_[^I]", "^GLD(FF)?1D$", "^GLD(FF)?1S?[BHW]_D_[^I]", 2366 "^GLD(FF)?1S?[BHW]_D$")>; 2367 2368def A64FXWrite_LD2_BH : SchedWriteRes<[A64FXGI56]> { 2369 let Latency = 15; 2370 let NumMicroOps = 3; 2371 let ReleaseAtCycles = [9]; 2372} 2373def : InstRW<[A64FXWrite_LD2_BH], (instregex "^LD2[BH]")>; 2374 2375def A64FXWrite_LD2_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2376 let Latency = 11; 2377 let NumMicroOps = 2; 2378 let ReleaseAtCycles = [2]; 2379} 2380def : InstRW<[A64FXWrite_LD2_WD_IMM], (instregex "^LD2[WD]_IMM")>; 2381 2382def A64FXWrite_LD2_WD : SchedWriteRes<[A64FXGI56]> { 2383 let Latency = 12; 2384 let NumMicroOps = 3; 2385 let ReleaseAtCycles = [3]; 2386} 2387def : InstRW<[A64FXWrite_LD2_WD], (instregex "^LD2[WD]$")>; 2388 2389def A64FXWrite_LD3_BH : SchedWriteRes<[A64FXGI56]> { 2390 let Latency = 15; 2391 let NumMicroOps = 4; 2392 let ReleaseAtCycles = [13]; 2393} 2394def : InstRW<[A64FXWrite_LD3_BH], (instregex "^LD3[BH]")>; 2395 2396def A64FXWrite_LD3_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2397 let Latency = 11; 2398 let NumMicroOps = 3; 2399 let ReleaseAtCycles = [3]; 2400} 2401def : InstRW<[A64FXWrite_LD3_WD_IMM], (instregex "^LD3[WD]_IMM")>; 2402 2403def A64FXWrite_LD3_WD : SchedWriteRes<[A64FXGI56]> { 2404 let Latency = 12; 2405 let NumMicroOps = 4; 2406 let ReleaseAtCycles = [4]; 2407} 2408def : InstRW<[A64FXWrite_LD3_WD], (instregex "^LD3[WD]$")>; 2409 2410def A64FXWrite_LD4_BH : SchedWriteRes<[A64FXGI56]> { 2411 let Latency = 15; 2412 let NumMicroOps = 5; 2413 let ReleaseAtCycles = [17]; 2414} 2415def : InstRW<[A64FXWrite_LD4_BH], (instregex "^LD4[BH]")>; 2416 2417def A64FXWrite_LD4_WD_IMM : SchedWriteRes<[A64FXGI56]> { 2418 let Latency = 11; 2419 let NumMicroOps = 4; 2420 let ReleaseAtCycles = [4]; 2421} 2422def : InstRW<[A64FXWrite_LD4_WD_IMM], (instregex "^LD4[WD]_IMM")>; 2423 2424def A64FXWrite_LD4_WD : SchedWriteRes<[A64FXGI56]> { 2425 let Latency = 12; 2426 let NumMicroOps = 5; 2427 let ReleaseAtCycles = [5]; 2428} 2429def : InstRW<[A64FXWrite_LD4_WD], (instregex "^LD4[WD]$")>; 2430 2431def A64FXWrite_PRF : SchedWriteRes<[A64FXGI56]> { 2432} 2433def : InstRW<[A64FXWrite_PRF], (instregex "^PRF._PR")>; 2434 2435def A64FXWrite_PRF_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2436 let ReleaseAtCycles = [2, 1, 4]; 2437} 2438def : InstRW<[A64FXWrite_PRF_W_RZ], (instregex "^PRF._S_[^P]")>; 2439 2440def A64FXWrite_PRF_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2441 let ReleaseAtCycles = [2, 4]; 2442} 2443def : InstRW<[A64FXWrite_PRF_W_ZI], (instregex "^PRF._S_PZI")>; 2444 2445def A64FXWrite_PRF_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI56]> { 2446 let ReleaseAtCycles = [1, 1, 2]; 2447} 2448def : InstRW<[A64FXWrite_PRF_D_RZ], (instregex "^PRF._D_[^P]")>; 2449 2450def A64FXWrite_PRF_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2451 let ReleaseAtCycles = [1, 2]; 2452} 2453def : InstRW<[A64FXWrite_PRF_D_ZI], (instregex "^PRF._D_PZI")>; 2454 2455def A64FXWrite_SDIV_S : SchedWriteRes<[A64FXGI0]> { 2456 let Latency = 114; 2457 let ReleaseAtCycles = [114]; 2458} 2459def : InstRW<[A64FXWrite_SDIV_S], (instregex "^[SU]DIVR?.*_S")>; 2460 2461def A64FXWrite_SDIV_D : SchedWriteRes<[A64FXGI0]> { 2462 let Latency = 178; 2463 let ReleaseAtCycles = [178]; 2464} 2465def : InstRW<[A64FXWrite_SDIV_D], (instregex "^[SU]DIVR?.*_D")>; 2466 2467def A64FXWrite_SDOT_I : SchedWriteRes<[A64FXGI0, A64FXGI3]> { 2468 let Latency = 15; 2469 let NumMicroOps = 2; 2470} 2471def : InstRW<[A64FXWrite_SDOT_I], (instregex "^[SU]DOT_ZZZI")>; 2472 2473def A64FXWrite_SQINC_Scalar : SchedWriteRes<[A64FXGI24]> { 2474 let Latency = 2; 2475 let ReleaseAtCycles = [2]; 2476} 2477def : InstRW<[A64FXWrite_SQINC_Scalar], (instregex "^[SU]Q(INC|DEC)[BHWD]_[WX]")>; 2478 2479def A64FXWrite_SQINCP_X : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2480 let Latency = 6; 2481 let NumMicroOps = 2; 2482 let ReleaseAtCycles = [3, 1]; 2483} 2484def : InstRW<[A64FXWrite_SQINCP_X], (instregex "^[SU]Q(INC|DEC)P_[WX]")>; 2485 2486def A64FXWrite_SQINCP_Z : SchedWriteRes<[A64FXGI24, A64FXGI3]> { 2487 let Latency = 12; 2488} 2489def : InstRW<[A64FXWrite_SQINCP_Z], (instregex "^[SU]Q(INC|DEC)P_Z")>; 2490 2491def A64FXWrite_ST1 : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2492 let Latency = 11; 2493} 2494def : InstRW<[A64FXWrite_ST1], (instregex "^ST(NT)?1[BHWD]")>; 2495 2496def A64FXWrite_SST1_W_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2497 let Latency = 20; 2498 let NumMicroOps = 8; 2499 let ReleaseAtCycles = [8, 8, 8, 8]; 2500} 2501def : InstRW<[A64FXWrite_SST1_W_RZ], 2502 (instregex "^SST1[BH]_S(_[^I]|$)", "^SST1W(_[^ID]|$)")>; 2503 2504def A64FXWrite_SST1_D_RZ : SchedWriteRes<[A64FXGI0, A64FXGI2, A64FXGI5, A64FXGI6]> { 2505 let Latency = 20; 2506 let NumMicroOps = 4; 2507 let ReleaseAtCycles = [4, 4, 4, 4]; 2508} 2509def : InstRW<[A64FXWrite_SST1_D_RZ], 2510 (instregex "^SST1[BHW]_D(_[^I]|$)", "^SST1D(_[^I]|$)")>; 2511 2512def A64FXWrite_SST1_W_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2513 let Latency = 16; 2514 let NumMicroOps = 8; 2515 let ReleaseAtCycles = [12, 8, 8]; 2516} 2517def : InstRW<[A64FXWrite_SST1_W_ZI], 2518 (instregex "^SST1[BH]_S_I", "^SST1W_I")>; 2519 2520def A64FXWrite_SST1_D_ZI : SchedWriteRes<[A64FXGI0, A64FXGI5, A64FXGI6]> { 2521 let Latency = 16; 2522 let NumMicroOps = 4; 2523 let ReleaseAtCycles = [4, 4, 4]; 2524} 2525def : InstRW<[A64FXWrite_SST1_D_ZI], 2526 (instregex "^SST1[BHW]_D_I", "^SST1D_I")>; 2527 2528def A64FXWrite_ST2_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2529 let Latency = 12; 2530 let NumMicroOps = 3; 2531 let ReleaseAtCycles = [8, 9]; 2532} 2533def : InstRW<[A64FXWrite_ST2_BH], (instregex "^ST2[BH]")>; 2534 2535def A64FXWrite_ST2_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2536 let Latency = 11; 2537 let NumMicroOps = 2; 2538 let ReleaseAtCycles = [2, 2]; 2539} 2540def : InstRW<[A64FXWrite_ST2_WD_RI], (instregex "^ST2[WD]$")>; 2541 2542def A64FXWrite_ST2_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2543 let Latency = 12; 2544 let NumMicroOps = 3; 2545 let ReleaseAtCycles = [2, 3]; 2546} 2547def : InstRW<[A64FXWrite_ST2_WD_RR], (instregex "^ST2[WD]_I")>; 2548 2549def A64FXWrite_ST3_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2550 let Latency = 15; 2551 let NumMicroOps = 4; 2552 let ReleaseAtCycles = [12, 13]; 2553} 2554def : InstRW<[A64FXWrite_ST3_BH], (instregex "^ST3[BH]")>; 2555 2556def A64FXWrite_ST3_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2557 let Latency = 11; 2558 let NumMicroOps = 3; 2559 let ReleaseAtCycles = [3, 3]; 2560} 2561def : InstRW<[A64FXWrite_ST3_WD_RI], (instregex "^ST3[WD]$")>; 2562 2563def A64FXWrite_ST3_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2564 let Latency = 12; 2565 let NumMicroOps = 4; 2566 let ReleaseAtCycles = [3, 4]; 2567} 2568def : InstRW<[A64FXWrite_ST3_WD_RR], (instregex "^ST3[WD]_I")>; 2569 2570def A64FXWrite_ST4_BH : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2571 let Latency = 15; 2572 let NumMicroOps = 5; 2573 let ReleaseAtCycles = [16, 17]; 2574} 2575def : InstRW<[A64FXWrite_ST4_BH], (instregex "^ST4[BH]")>; 2576 2577def A64FXWrite_ST4_WD_RI : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2578 let Latency = 11; 2579 let NumMicroOps = 4; 2580 let ReleaseAtCycles = [4, 4]; 2581} 2582def : InstRW<[A64FXWrite_ST4_WD_RI], (instregex "^ST4[WD]$")>; 2583 2584def A64FXWrite_ST4_WD_RR : SchedWriteRes<[A64FXGI0, A64FXGI56]> { 2585 let Latency = 12; 2586 let NumMicroOps = 5; 2587 let ReleaseAtCycles = [4, 5]; 2588} 2589def : InstRW<[A64FXWrite_ST4_WD_RR], (instregex "^ST4[WD]_I")>; 2590 2591def A64FXWrite_STR_P : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2592 let Latency = 11; 2593} 2594def : InstRW<[A64FXWrite_STR_P], (instrs STR_PXI)>; 2595 2596def A64FXWrite_STR_Z : SchedWriteRes<[A64FXGI0, A64FXGI5]> { 2597 let Latency = 11; 2598} 2599def : InstRW<[A64FXWrite_STR_Z], (instrs STR_ZXI)>; 2600 2601def A64FXWrite_WHILE : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2602 let Latency = 4; 2603} 2604def : InstRW<[A64FXWrite_WHILE], (instregex "^WHILEL._P")>; 2605 2606def A64FXWrite_WRFFR : SchedWriteRes<[A64FXGI3, A64FXGI5]> { 2607 let Latency = 3; 2608 let NumMicroOps = 2; 2609} 2610def : InstRW<[A64FXWrite_WRFFR], (instrs WRFFR)>; 2611 2612} // SchedModel = A64FXModel 2613