1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "POPCNTB8$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 145 (instregex "NAND(8)?(_rec)?$"), 146 (instregex "AND(C)?(8)?(_rec)?$"), 147 (instregex "NOR(8)?(_rec)?$"), 148 (instregex "OR(C)?(8)?(_rec)?$"), 149 (instregex "EQV(8)?(_rec)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?(O)?$"), 153 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 154 (instregex "LA(8)?$"), 155 COPY, 156 MCRF, 157 MCRXRX, 158 XSNABSDP, 159 XSNABSDPs, 160 XSXEXPDP, 161 XSABSDP, 162 XSNEGDP, 163 XSCPSGNDP, 164 MFVSRWZ, 165 MFVRWZ, 166 EXTSWSLI, 167 SRADI_32, 168 RLDIC, 169 RFEBB, 170 TBEGIN, 171 TRECHKPT, 172 NOP, 173 WAIT 174)>; 175 176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 177// single slice. However, since it is Restricted, it requires all 3 dispatches 178// (DISP) for that superslice. 179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 180 (instrs 181 (instregex "RLDC(L|R)$"), 182 (instregex "RLWIMI(8)?$"), 183 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 184 (instregex "M(F|T)OCRF(8)?$"), 185 (instregex "CR(6)?(UN)?SET$"), 186 (instregex "CR(N)?(OR|AND)(C)?$"), 187 (instregex "S(L|R)W(8)?$"), 188 (instregex "RLW(INM|NM)(8)?$"), 189 (instregex "F(N)?ABS(D|S)$"), 190 (instregex "FNEG(D|S)$"), 191 (instregex "FCPSGN(D|S)$"), 192 (instregex "SRAW(I)?$"), 193 (instregex "ISEL(8)?$"), 194 RLDIMI, 195 XSIEXPDP, 196 FMR, 197 CREQV, 198 CRNOT, 199 CRXOR, 200 TRECLAIM, 201 TSR, 202 TABORT 203)>; 204 205// Three cycle ALU vector operation that uses an entire superslice. 206// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 207// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 208def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 209 (instrs 210 (instregex "M(T|F)VSCR$"), 211 (instregex "VCMPNEZ(B|H|W)$"), 212 (instregex "VCMPEQU(B|H|W|D)$"), 213 (instregex "VCMPNE(B|H|W)$"), 214 (instregex "VABSDU(B|H|W)$"), 215 (instregex "VADDU(B|H|W)S$"), 216 (instregex "VAVG(S|U)(B|H|W)$"), 217 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 218 (instregex "VCMPBFP(_rec)?$"), 219 (instregex "VC(L|T)Z(B|H|W|D)$"), 220 (instregex "VADDS(B|H|W)S$"), 221 (instregex "V(MIN|MAX)FP$"), 222 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 223 VBPERMD, 224 VADDCUW, 225 VPOPCNTW, 226 VPOPCNTD, 227 VPRTYBD, 228 VPRTYBW, 229 VSHASIGMAD, 230 VSHASIGMAW, 231 VSUBSBS, 232 VSUBSHS, 233 VSUBSWS, 234 VSUBUBS, 235 VSUBUHS, 236 VSUBUWS, 237 VSUBCUW, 238 VCMPGTSB, 239 VCMPGTSB_rec, 240 VCMPGTSD, 241 VCMPGTSD_rec, 242 VCMPGTSH, 243 VCMPGTSH_rec, 244 VCMPGTSW, 245 VCMPGTSW_rec, 246 VCMPGTUB, 247 VCMPGTUB_rec, 248 VCMPGTUD, 249 VCMPGTUD_rec, 250 VCMPGTUH, 251 VCMPGTUH_rec, 252 VCMPGTUW, 253 VCMPGTUW_rec, 254 VCMPNEB_rec, 255 VCMPNEH_rec, 256 VCMPNEW_rec, 257 VCMPNEZB_rec, 258 VCMPNEZH_rec, 259 VCMPNEZW_rec, 260 VCMPEQUB_rec, 261 VCMPEQUD_rec, 262 VCMPEQUH_rec, 263 VCMPEQUW_rec, 264 XVCMPEQDP, 265 XVCMPEQDP_rec, 266 XVCMPEQSP, 267 XVCMPEQSP_rec, 268 XVCMPGEDP, 269 XVCMPGEDP_rec, 270 XVCMPGESP, 271 XVCMPGESP_rec, 272 XVCMPGTDP, 273 XVCMPGTDP_rec, 274 XVCMPGTSP, 275 XVCMPGTSP_rec, 276 XVMAXDP, 277 XVMAXSP, 278 XVMINDP, 279 XVMINSP, 280 XVTDIVDP, 281 XVTDIVSP, 282 XVTSQRTDP, 283 XVTSQRTSP, 284 XVTSTDCDP, 285 XVTSTDCSP, 286 XVXSIGDP, 287 XVXSIGSP 288)>; 289 290// 7 cycle DP vector operation that uses an entire superslice. 291// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 292// EXECO) and all three dispatches (DISP) to the given superslice. 293def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 294 (instrs 295 VADDFP, 296 VCTSXS, 297 VCTSXS_0, 298 VCTUXS, 299 VCTUXS_0, 300 VEXPTEFP, 301 VLOGEFP, 302 VMADDFP, 303 VMHADDSHS, 304 VNMSUBFP, 305 VREFP, 306 VRFIM, 307 VRFIN, 308 VRFIP, 309 VRFIZ, 310 VRSQRTEFP, 311 VSUBFP, 312 XVADDDP, 313 XVADDSP, 314 XVCVDPSP, 315 XVCVDPSXDS, 316 XVCVDPSXWS, 317 XVCVDPUXDS, 318 XVCVDPUXWS, 319 XVCVHPSP, 320 XVCVSPDP, 321 XVCVSPHP, 322 XVCVSPSXDS, 323 XVCVSPSXWS, 324 XVCVSPUXDS, 325 XVCVSPUXWS, 326 XVCVSXDDP, 327 XVCVSXDSP, 328 XVCVSXWDP, 329 XVCVSXWSP, 330 XVCVUXDDP, 331 XVCVUXDSP, 332 XVCVUXWDP, 333 XVCVUXWSP, 334 XVMADDADP, 335 XVMADDASP, 336 XVMADDMDP, 337 XVMADDMSP, 338 XVMSUBADP, 339 XVMSUBASP, 340 XVMSUBMDP, 341 XVMSUBMSP, 342 XVMULDP, 343 XVMULSP, 344 XVNMADDADP, 345 XVNMADDASP, 346 XVNMADDMDP, 347 XVNMADDMSP, 348 XVNMSUBADP, 349 XVNMSUBASP, 350 XVNMSUBMDP, 351 XVNMSUBMSP, 352 XVRDPI, 353 XVRDPIC, 354 XVRDPIM, 355 XVRDPIP, 356 XVRDPIZ, 357 XVREDP, 358 XVRESP, 359 XVRSPI, 360 XVRSPIC, 361 XVRSPIM, 362 XVRSPIP, 363 XVRSPIZ, 364 XVRSQRTEDP, 365 XVRSQRTESP, 366 XVSUBDP, 367 XVSUBSP, 368 VCFSX, 369 VCFSX_0, 370 VCFUX, 371 VCFUX_0, 372 VMHRADDSHS, 373 VMLADDUHM, 374 VMSUMMBM, 375 VMSUMSHM, 376 VMSUMSHS, 377 VMSUMUBM, 378 VMSUMUHM, 379 VMSUMUDM, 380 VMSUMUHS, 381 VMULESB, 382 VMULESH, 383 VMULESW, 384 VMULEUB, 385 VMULEUH, 386 VMULEUW, 387 VMULOSB, 388 VMULOSH, 389 VMULOSW, 390 VMULOUB, 391 VMULOUH, 392 VMULOUW, 393 VMULUWM, 394 VSUM2SWS, 395 VSUM4SBS, 396 VSUM4SHS, 397 VSUM4UBS, 398 VSUMSWS 399)>; 400 401// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 402// dispatch units for the superslice. 403def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 404 (instrs 405 (instregex "MADD(HD|HDU|LD|LD8)$"), 406 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 407)>; 408 409// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 410// dispatch units for the superslice. 411def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 412 (instrs 413 FRSP, 414 (instregex "FRI(N|P|Z|M)(D|S)$"), 415 (instregex "FRE(S)?$"), 416 (instregex "FADD(S)?$"), 417 (instregex "FMSUB(S)?$"), 418 (instregex "FMADD(S)?$"), 419 (instregex "FSUB(S)?$"), 420 (instregex "FCFID(U)?(S)?$"), 421 (instregex "FCTID(U)?(Z)?$"), 422 (instregex "FCTIW(U)?(Z)?$"), 423 (instregex "FRSQRTE(S)?$"), 424 FNMADDS, 425 FNMADD, 426 FNMSUBS, 427 FNMSUB, 428 FSELD, 429 FSELS, 430 FMULS, 431 FMUL, 432 XSMADDADP, 433 XSMADDASP, 434 XSMADDMDP, 435 XSMADDMSP, 436 XSMSUBADP, 437 XSMSUBASP, 438 XSMSUBMDP, 439 XSMSUBMSP, 440 XSMULDP, 441 XSMULSP, 442 XSNMADDADP, 443 XSNMADDASP, 444 XSNMADDMDP, 445 XSNMADDMSP, 446 XSNMSUBADP, 447 XSNMSUBASP, 448 XSNMSUBMDP, 449 XSNMSUBMSP 450)>; 451 452// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 453// These operations can be done in parallel. The DP is restricted so we need a 454// full 4 dispatches. 455def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 456 DISP_3SLOTS_1C, DISP_1C], 457 (instrs 458 (instregex "FSEL(D|S)_rec$") 459)>; 460 461// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 462def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 463 DISP_3SLOTS_1C, DISP_1C], 464 (instrs 465 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 466)>; 467 468// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 469// These operations must be done sequentially.The DP is restricted so we need a 470// full 4 dispatches. 471def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 472 DISP_3SLOTS_1C, DISP_1C], 473 (instrs 474 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 475 (instregex "FRE(S)?_rec$"), 476 (instregex "FADD(S)?_rec$"), 477 (instregex "FSUB(S)?_rec$"), 478 (instregex "F(N)?MSUB(S)?_rec$"), 479 (instregex "F(N)?MADD(S)?_rec$"), 480 (instregex "FCFID(U)?(S)?_rec$"), 481 (instregex "FCTID(U)?(Z)?_rec$"), 482 (instregex "FCTIW(U)?(Z)?_rec$"), 483 (instregex "FMUL(S)?_rec$"), 484 (instregex "FRSQRTE(S)?_rec$"), 485 FRSP_rec 486)>; 487 488// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 489def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 490 (instrs 491 XSADDDP, 492 XSADDSP, 493 XSCVDPHP, 494 XSCVDPSP, 495 XSCVDPSXDS, 496 XSCVDPSXDSs, 497 XSCVDPSXWS, 498 XSCVDPUXDS, 499 XSCVDPUXDSs, 500 XSCVDPUXWS, 501 XSCVDPSXWSs, 502 XSCVDPUXWSs, 503 XSCVHPDP, 504 XSCVSPDP, 505 XSCVSXDDP, 506 XSCVSXDSP, 507 XSCVUXDDP, 508 XSCVUXDSP, 509 XSRDPI, 510 XSRDPIC, 511 XSRDPIM, 512 XSRDPIP, 513 XSRDPIZ, 514 XSREDP, 515 XSRESP, 516 XSRSQRTEDP, 517 XSRSQRTESP, 518 XSSUBDP, 519 XSSUBSP, 520 XSCVDPSPN, 521 XSRSP 522)>; 523 524// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 525// superslice. That includes both exec pipelines (EXECO, EXECE) and one 526// dispatch. 527def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 528 (instrs 529 (instregex "LVS(L|R)$"), 530 (instregex "VSPLTIS(W|H|B)$"), 531 (instregex "VSPLT(W|H|B)(s)?$"), 532 (instregex "V_SETALLONES(B|H)?$"), 533 (instregex "VEXTRACTU(B|H|W)$"), 534 (instregex "VINSERT(B|H|W|D)$"), 535 MFVSRLD, 536 MTVSRWS, 537 VBPERMQ, 538 VCLZLSBB, 539 VCTZLSBB, 540 VEXTRACTD, 541 VEXTUBLX, 542 VEXTUBRX, 543 VEXTUHLX, 544 VEXTUHRX, 545 VEXTUWLX, 546 VEXTUWRX, 547 VGBBD, 548 VMRGHB, 549 VMRGHH, 550 VMRGHW, 551 VMRGLB, 552 VMRGLH, 553 VMRGLW, 554 VPERM, 555 VPERMR, 556 VPERMXOR, 557 VPKPX, 558 VPKSDSS, 559 VPKSDUS, 560 VPKSHSS, 561 VPKSHUS, 562 VPKSWSS, 563 VPKSWUS, 564 VPKUDUM, 565 VPKUDUS, 566 VPKUHUM, 567 VPKUHUS, 568 VPKUWUM, 569 VPKUWUS, 570 VPRTYBQ, 571 VSL, 572 VSLDOI, 573 VSLO, 574 VSLV, 575 VSR, 576 VSRO, 577 VSRV, 578 VUPKHPX, 579 VUPKHSB, 580 VUPKHSH, 581 VUPKHSW, 582 VUPKLPX, 583 VUPKLSB, 584 VUPKLSH, 585 VUPKLSW, 586 XXBRD, 587 XXBRH, 588 XXBRQ, 589 XXBRW, 590 XXEXTRACTUW, 591 XXINSERTW, 592 XXMRGHW, 593 XXMRGLW, 594 XXPERM, 595 XXPERMR, 596 XXSLDWI, 597 XXSLDWIs, 598 XXSPLTIB, 599 XXSPLTW, 600 XXSPLTWs, 601 XXPERMDI, 602 XXPERMDIs, 603 VADDCUQ, 604 VADDECUQ, 605 VADDEUQM, 606 VADDUQM, 607 VMUL10CUQ, 608 VMUL10ECUQ, 609 VMUL10EUQ, 610 VMUL10UQ, 611 VSUBCUQ, 612 VSUBECUQ, 613 VSUBEUQM, 614 VSUBUQM, 615 XSCMPEXPQP, 616 XSCMPOQP, 617 XSCMPUQP, 618 XSTSTDCQP, 619 XSXSIGQP, 620 BCDCFN_rec, 621 BCDCFZ_rec, 622 BCDCPSGN_rec, 623 BCDCTN_rec, 624 BCDCTZ_rec, 625 BCDSETSGN_rec, 626 BCDS_rec, 627 BCDTRUNC_rec, 628 BCDUS_rec, 629 BCDUTRUNC_rec, 630 BCDADD_rec, 631 BCDSUB_rec 632)>; 633 634// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 635// superslice. That includes both exec pipelines (EXECO, EXECE) and one 636// dispatch. 637def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 638 (instrs 639 BCDSR_rec, 640 XSADDQP, 641 XSADDQPO, 642 XSCVDPQP, 643 XSCVQPDP, 644 XSCVQPDPO, 645 XSCVQPSDZ, 646 XSCVQPSWZ, 647 XSCVQPUDZ, 648 XSCVQPUWZ, 649 XSCVSDQP, 650 XSCVUDQP, 651 XSRQPI, 652 XSRQPIX, 653 XSRQPXP, 654 XSSUBQP, 655 XSSUBQPO 656)>; 657 658// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 659// superslice. That includes both exec pipelines (EXECO, EXECE) and one 660// dispatch. 661def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 662 (instrs 663 BCDCTSQ_rec 664)>; 665 666// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 667// superslice. That includes both exec pipelines (EXECO, EXECE) and one 668// dispatch. 669def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 670 (instrs 671 XSMADDQP, 672 XSMADDQPO, 673 XSMSUBQP, 674 XSMSUBQPO, 675 XSMULQP, 676 XSMULQPO, 677 XSNMADDQP, 678 XSNMADDQPO, 679 XSNMSUBQP, 680 XSNMSUBQPO 681)>; 682 683// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 684// superslice. That includes both exec pipelines (EXECO, EXECE) and one 685// dispatch. 686def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 687 (instrs 688 BCDCFSQ_rec 689)>; 690 691// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 692// superslice. That includes both exec pipelines (EXECO, EXECE) and one 693// dispatch. 694def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 695 (instrs 696 XSDIVQP, 697 XSDIVQPO 698)>; 699 700// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 701// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 702// dispatches. 703def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 704 (instrs 705 XSSQRTQP, 706 XSSQRTQPO 707)>; 708 709// 6 Cycle Load uses a single slice. 710def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 711 (instrs 712 (instregex "LXVL(L)?") 713)>; 714 715// 5 Cycle Load uses a single slice. 716def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 717 (instrs 718 (instregex "LVE(B|H|W)X$"), 719 (instregex "LVX(L)?"), 720 (instregex "LXSI(B|H)ZX$"), 721 LXSDX, 722 LXVB16X, 723 LXVD2X, 724 LXVWSX, 725 LXSIWZX, 726 LXV, 727 LXVX, 728 LXSD, 729 DFLOADf64, 730 XFLOADf64, 731 LIWZX 732)>; 733 734// 4 Cycle Load uses a single slice. 735def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 736 (instrs 737 (instregex "DCB(F|T|ST)(EP)?$"), 738 (instregex "DCBZ(L)?(EP)?$"), 739 (instregex "DCBTST(EP)?$"), 740 (instregex "CP_COPY(8)?$"), 741 (instregex "ICBI(EP)?$"), 742 (instregex "ICBT(LS)?$"), 743 (instregex "LBARX(L)?$"), 744 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 745 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 746 (instregex "LH(A|B)RX(L)?(8)?$"), 747 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 748 (instregex "LWARX(L)?$"), 749 (instregex "LWBRX(8)?$"), 750 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 751 CP_ABORT, 752 DARN, 753 EnforceIEIO, 754 ISYNC, 755 MSGSYNC, 756 TLBSYNC, 757 SYNC, 758 LMW, 759 LSWI 760)>; 761 762// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 763// superslice. 764def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 765 (instrs 766 LFIWZX, 767 LFDX, 768 LFD 769)>; 770 771// Cracked Load Instructions. 772// Load instructions that can be done in parallel. 773def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 774 DISP_PAIR_1C], 775 (instrs 776 SLBIA, 777 SLBIE, 778 SLBMFEE, 779 SLBMFEV, 780 SLBMTE, 781 TLBIEL 782)>; 783 784// Cracked Load Instruction. 785// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 786// operations can be run in parallel. 787def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 788 DISP_PAIR_1C, DISP_PAIR_1C], 789 (instrs 790 (instregex "L(W|H)ZU(X)?(8)?$") 791)>; 792 793// Cracked TEND Instruction. 794// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 795// operations can be run in parallel. 796def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 797 DISP_1C, DISP_1C], 798 (instrs 799 TEND 800)>; 801 802 803// Cracked Store Instruction 804// Consecutive Store and ALU instructions. The store is restricted and requires 805// three dispatches. 806def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 807 DISP_3SLOTS_1C, DISP_1C], 808 (instrs 809 (instregex "ST(B|H|W|D)CX$") 810)>; 811 812// Cracked Load instruction. 813// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 814// operations cannot be done at the same time and so their latencies are added. 815def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 816 DISP_1C, DISP_1C], 817 (instrs 818 (instregex "LHA(X)?(8)?$"), 819 (instregex "CP_PASTE(8)?_rec$"), 820 (instregex "LWA(X)?(_32)?$"), 821 TCHECK 822)>; 823 824// Cracked Restricted Load instruction. 825// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 826// operations cannot be done at the same time and so their latencies are added. 827// Full 6 dispatches are required as this is both cracked and restricted. 828def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 829 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 830 (instrs 831 LFIWAX 832)>; 833 834// Cracked Load instruction. 835// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 836// operations cannot be done at the same time and so their latencies are added. 837// Full 4 dispatches are required as this is a cracked instruction. 838def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 839 (instrs 840 LXSIWAX, 841 LIWAX 842)>; 843 844// Cracked Load instruction. 845// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 846// cycles. The Load and ALU operations cannot be done at the same time and so 847// their latencies are added. 848// Full 6 dispatches are required as this is a restricted instruction. 849def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 850 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 851 (instrs 852 LFSX, 853 LFS 854)>; 855 856// Cracked Load instruction. 857// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 858// operations cannot be done at the same time and so their latencies are added. 859// Full 4 dispatches are required as this is a cracked instruction. 860def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 861 (instrs 862 LXSSP, 863 LXSSPX, 864 XFLOADf32, 865 DFLOADf32 866)>; 867 868// Cracked 3-Way Load Instruction 869// Load with two ALU operations that depend on each other 870def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 871 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 872 (instrs 873 (instregex "LHAU(X)?(8)?$"), 874 LWAUX 875)>; 876 877// Cracked Load that requires the PM resource. 878// Since the Load and the PM cannot be done at the same time the latencies are 879// added. Requires 8 cycles. Since the PM requires the full superslice we need 880// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 881// requires the remaining 1 dispatch. 882def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 883 DISP_1C, DISP_1C], 884 (instrs 885 LXVH8X, 886 LXVDSX, 887 LXVW4X 888)>; 889 890// Single slice Restricted store operation. The restricted operation requires 891// all three dispatches for the superslice. 892def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 893 (instrs 894 (instregex "STF(S|D|IWX|SX|DX)$"), 895 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 896 (instregex "STW(8)?$"), 897 (instregex "(D|X)FSTORE(f32|f64)$"), 898 (instregex "ST(W|H|D)BRX$"), 899 (instregex "ST(B|H|D)(8)?$"), 900 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 901 STIWX, 902 SLBIEG, 903 STMW, 904 STSWI, 905 TLBIE 906)>; 907 908// Vector Store Instruction 909// Requires the whole superslice and therefore requires one dispatch 910// as well as both the Even and Odd exec pipelines. 911def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 912 (instrs 913 (instregex "STVE(B|H|W)X$"), 914 (instregex "STVX(L)?$"), 915 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 916)>; 917 918// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 919// superslice. That includes both exec pipelines (EXECO, EXECE) and two 920// dispatches. 921def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 922 (instrs 923 (instregex "MTCTR(8)?(loop)?$"), 924 (instregex "MTLR(8)?$") 925)>; 926 927// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 928// superslice. That includes both exec pipelines (EXECO, EXECE) and two 929// dispatches. 930def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 931 (instrs 932 (instregex "M(T|F)VRSAVE(v)?$"), 933 (instregex "M(T|F)PMR$"), 934 (instregex "M(T|F)TB(8)?$"), 935 (instregex "MF(SPR|CTR|LR)(8)?$"), 936 (instregex "M(T|F)MSR(D)?$"), 937 (instregex "M(T|F)(U)?DSCR$"), 938 (instregex "MTSPR(8)?$") 939)>; 940 941// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 942// superslice. That includes both exec pipelines (EXECO, EXECE) and two 943// dispatches. 944def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 945 (instrs 946 DIVW, 947 DIVWO, 948 DIVWU, 949 DIVWUO, 950 MODSW 951)>; 952 953// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 954// superslice. That includes both exec pipelines (EXECO, EXECE) and two 955// dispatches. 956def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 957 (instrs 958 DIVWE, 959 DIVWEO, 960 DIVD, 961 DIVDO, 962 DIVWEU, 963 DIVWEUO, 964 DIVDU, 965 DIVDUO, 966 MODSD, 967 MODUD, 968 MODUW 969)>; 970 971// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 972// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 973// dispatches. 974def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 975 (instrs 976 DIVDE, 977 DIVDEO, 978 DIVDEU, 979 DIVDEUO 980)>; 981 982// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 983// and one full superslice for the DIV operation since there is only one DIV per 984// superslice. Latency of DIV plus ALU is 26. 985def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 986 DISP_EVEN_1C, DISP_1C], 987 (instrs 988 (instregex "DIVW(U)?(O)?_rec$") 989)>; 990 991// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 992// and one full superslice for the DIV operation since there is only one DIV per 993// superslice. Latency of DIV plus ALU is 26. 994def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 995 DISP_EVEN_1C, DISP_1C], 996 (instrs 997 DIVD_rec, 998 DIVDO_rec, 999 DIVDU_rec, 1000 DIVDUO_rec, 1001 DIVWE_rec, 1002 DIVWEO_rec, 1003 DIVWEU_rec, 1004 DIVWEUO_rec 1005)>; 1006 1007// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1008// and one full superslice for the DIV operation since there is only one DIV per 1009// superslice. Latency of DIV plus ALU is 42. 1010def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1011 DISP_EVEN_1C, DISP_1C], 1012 (instrs 1013 DIVDE_rec, 1014 DIVDEO_rec, 1015 DIVDEU_rec, 1016 DIVDEUO_rec 1017)>; 1018 1019// CR access instructions in _BrMCR, IIC_BrMCRX. 1020 1021// Cracked, restricted, ALU operations. 1022// Here the two ALU ops can actually be done in parallel and therefore the 1023// latencies are not added together. Otherwise this is like having two 1024// instructions running together on two pipelines and 6 dispatches. ALU ops are 1025// 2 cycles each. 1026def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1027 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1028 (instrs 1029 MTCRF, 1030 MTCRF8 1031)>; 1032 1033// Cracked ALU operations. 1034// Here the two ALU ops can actually be done in parallel and therefore the 1035// latencies are not added together. Otherwise this is like having two 1036// instructions running together on two pipelines and 2 dispatches. ALU ops are 1037// 2 cycles each. 1038def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1039 DISP_1C, DISP_1C], 1040 (instrs 1041 (instregex "ADDC(8)?(O)?_rec$"), 1042 (instregex "SUBFC(8)?(O)?_rec$") 1043)>; 1044 1045// Cracked ALU operations. 1046// Two ALU ops can be done in parallel. 1047// One is three cycle ALU the ohter is a two cycle ALU. 1048// One of the ALU ops is restricted the other is not so we have a total of 1049// 5 dispatches. 1050def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1051 DISP_3SLOTS_1C, DISP_1C], 1052 (instrs 1053 (instregex "F(N)?ABS(D|S)_rec$"), 1054 (instregex "FCPSGN(D|S)_rec$"), 1055 (instregex "FNEG(D|S)_rec$"), 1056 FMR_rec 1057)>; 1058 1059// Cracked ALU operations. 1060// Here the two ALU ops can actually be done in parallel and therefore the 1061// latencies are not added together. Otherwise this is like having two 1062// instructions running together on two pipelines and 2 dispatches. 1063// ALU ops are 3 cycles each. 1064def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1065 DISP_1C, DISP_1C], 1066 (instrs 1067 MCRFS 1068)>; 1069 1070// Cracked Restricted ALU operations. 1071// Here the two ALU ops can actually be done in parallel and therefore the 1072// latencies are not added together. Otherwise this is like having two 1073// instructions running together on two pipelines and 6 dispatches. 1074// ALU ops are 3 cycles each. 1075def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1076 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1077 (instrs 1078 (instregex "MTFSF(b|_rec)?$"), 1079 (instregex "MTFSFI(_rec)?$"), 1080 MTFSFIb 1081)>; 1082 1083// Cracked instruction made of two ALU ops. 1084// The two ops cannot be done in parallel. 1085// One of the ALU ops is restricted and takes 3 dispatches. 1086def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1087 DISP_3SLOTS_1C, DISP_1C], 1088 (instrs 1089 (instregex "RLD(I)?C(R|L)_rec$"), 1090 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1091 (instregex "SLW(8)?_rec$"), 1092 (instregex "SRAW(I)?_rec$"), 1093 (instregex "SRW(8)?_rec$"), 1094 RLDICL_32_rec, 1095 RLDIMI_rec 1096)>; 1097 1098// Cracked instruction made of two ALU ops. 1099// The two ops cannot be done in parallel. 1100// Both of the ALU ops are restricted and take 3 dispatches. 1101def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1102 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1103 (instrs 1104 (instregex "MFFS(L|CE|_rec)?$") 1105)>; 1106 1107// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1108// total of 6 cycles. All of the ALU operations are also restricted so each 1109// takes 3 dispatches for a total of 9. 1110def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1111 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1112 (instrs 1113 (instregex "MFCR(8)?$") 1114)>; 1115 1116// Cracked instruction made of two ALU ops. 1117// The two ops cannot be done in parallel. 1118def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1119 (instrs 1120 (instregex "EXTSWSLI_32_64_rec$"), 1121 (instregex "SRAD(I)?_rec$"), 1122 EXTSWSLI_rec, 1123 SLD_rec, 1124 SRD_rec, 1125 RLDIC_rec 1126)>; 1127 1128// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1129def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1130 (instrs 1131 FDIV 1132)>; 1133 1134// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1135def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1136 DISP_3SLOTS_1C, DISP_1C], 1137 (instrs 1138 FDIV_rec 1139)>; 1140 1141// 36 Cycle DP Instruction. 1142// Instruction can be done on a single slice. 1143def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1144 (instrs 1145 XSSQRTDP 1146)>; 1147 1148// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1149def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1150 (instrs 1151 FSQRT 1152)>; 1153 1154// 36 Cycle DP Vector Instruction. 1155def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1156 DISP_1C], 1157 (instrs 1158 XVSQRTDP 1159)>; 1160 1161// 27 Cycle DP Vector Instruction. 1162def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1163 DISP_1C], 1164 (instrs 1165 XVSQRTSP 1166)>; 1167 1168// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1169def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1170 DISP_3SLOTS_1C, DISP_1C], 1171 (instrs 1172 FSQRT_rec 1173)>; 1174 1175// 26 Cycle DP Instruction. 1176def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1177 (instrs 1178 XSSQRTSP 1179)>; 1180 1181// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1182def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1183 (instrs 1184 FSQRTS 1185)>; 1186 1187// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1188def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1189 DISP_3SLOTS_1C, DISP_1C], 1190 (instrs 1191 FSQRTS_rec 1192)>; 1193 1194// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1195def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1196 (instrs 1197 XSDIVDP 1198)>; 1199 1200// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1201def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1202 (instrs 1203 FDIVS 1204)>; 1205 1206// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1207def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1208 DISP_3SLOTS_1C, DISP_1C], 1209 (instrs 1210 FDIVS_rec 1211)>; 1212 1213// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1214def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1215 (instrs 1216 XSDIVSP 1217)>; 1218 1219// 24 Cycle DP Vector Instruction. Takes one full superslice. 1220// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1221// superslice. 1222def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1223 DISP_1C], 1224 (instrs 1225 XVDIVSP 1226)>; 1227 1228// 33 Cycle DP Vector Instruction. Takes one full superslice. 1229// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1230// superslice. 1231def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1232 DISP_1C], 1233 (instrs 1234 XVDIVDP 1235)>; 1236 1237// Instruction cracked into three pieces. One Load and two ALU operations. 1238// The Load and one of the ALU ops cannot be run at the same time and so the 1239// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1240// Both the load and the ALU that depends on it are restricted and so they take 1241// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1242// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1243def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1244 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1245 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1246 (instrs 1247 (instregex "LF(SU|SUX)$") 1248)>; 1249 1250// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1251// the store and so it can be run at the same time as the store. The store is 1252// also restricted. 1253def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1254 DISP_3SLOTS_1C, DISP_1C], 1255 (instrs 1256 (instregex "STF(S|D)U(X)?$"), 1257 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1258)>; 1259 1260// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1261// the load and so it can be run at the same time as the load. 1262def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1263 DISP_PAIR_1C, DISP_PAIR_1C], 1264 (instrs 1265 (instregex "LBZU(X)?(8)?$"), 1266 (instregex "LDU(X)?$") 1267)>; 1268 1269// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1270// the load and so it can be run at the same time as the load. The load is also 1271// restricted. 3 dispatches are from the restricted load while the other two 1272// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1273// is required for the ALU. 1274def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1275 DISP_3SLOTS_1C, DISP_1C], 1276 (instrs 1277 (instregex "LF(DU|DUX)$") 1278)>; 1279 1280// Crypto Instructions 1281 1282// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1283// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1284// dispatch. 1285def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1286 (instrs 1287 (instregex "VPMSUM(B|H|W|D)$"), 1288 (instregex "V(N)?CIPHER(LAST)?$"), 1289 VSBOX 1290)>; 1291 1292// Branch Instructions 1293 1294// Two Cycle Branch 1295def : InstRW<[P9_BR_2C, DISP_BR_1C], 1296 (instrs 1297 (instregex "BCCCTR(L)?(8)?$"), 1298 (instregex "BCCL(A|R|RL)?$"), 1299 (instregex "BCCTR(L)?(8)?(n)?$"), 1300 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1301 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1302 (instregex "BL(_TLS|_NOP)?(_RM)?$"), 1303 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"), 1304 (instregex "BLA(8|8_NOP)?(_RM)?$"), 1305 (instregex "BLR(8|L)?$"), 1306 (instregex "TAILB(A)?(8)?$"), 1307 (instregex "TAILBCTR(8)?$"), 1308 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1309 (instregex "BCLR(L)?(n)?$"), 1310 (instregex "BCTR(L)?(8)?(_RM)?$"), 1311 B, 1312 BA, 1313 BC, 1314 BCC, 1315 BCCA, 1316 BCL, 1317 BCLalways, 1318 BCLn, 1319 BCTRL8_LDinto_toc, 1320 BCTRL_LWZinto_toc, 1321 BCTRL8_LDinto_toc_RM, 1322 BCTRL_LWZinto_toc_RM, 1323 BCn, 1324 CTRL_DEP 1325)>; 1326 1327// Five Cycle Branch with a 2 Cycle ALU Op 1328// Operations must be done consecutively and not in parallel. 1329def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1330 (instrs 1331 ADDPCIS 1332)>; 1333 1334// Special Extracted Instructions For Atomics 1335 1336// Atomic Load 1337def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1338 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1339 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1340 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1341 (instrs 1342 (instregex "L(D|W)AT$") 1343)>; 1344 1345// Atomic Store 1346def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1347 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1348 (instrs 1349 (instregex "ST(D|W)AT$") 1350)>; 1351 1352// Signal Processing Engine (SPE) Instructions 1353// These instructions are not supported on Power 9 1354def : InstRW<[], 1355 (instrs 1356 BRINC, 1357 EVABS, 1358 EVEQV, 1359 EVMRA, 1360 EVNAND, 1361 EVNEG, 1362 (instregex "EVADD(I)?W$"), 1363 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1364 (instregex "EVAND(C)?$"), 1365 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1366 (instregex "EVCNTL(S|Z)W$"), 1367 (instregex "EVDIVW(S|U)$"), 1368 (instregex "EVEXTS(B|H)$"), 1369 (instregex "EVLD(H|W|D)(X)?$"), 1370 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1371 (instregex "EVLWHE(X)?$"), 1372 (instregex "EVLWHO(S|U)(X)?$"), 1373 (instregex "EVLW(H|W)SPLAT(X)?$"), 1374 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1375 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1376 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1377 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1378 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1379 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1380 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1381 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1382 (instregex "EVMWHUMI(A)?$"), 1383 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1384 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1385 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1386 (instregex "EVMWSSF(A|AA|AN)?$"), 1387 (instregex "EVMWUMI(A|AA|AN)?$"), 1388 (instregex "EV(N|X)?OR(C)?$"), 1389 (instregex "EVR(LW|LWI|NDW)$"), 1390 (instregex "EVSLW(I)?$"), 1391 (instregex "EVSPLAT(F)?I$"), 1392 (instregex "EVSRW(I)?(S|U)$"), 1393 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1394 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1395 (instregex "EVSUB(I)?FW$") 1396)> { let Unsupported = 1; } 1397 1398// General Instructions without scheduling support. 1399def : InstRW<[], 1400 (instrs 1401 (instregex "(H)?RFI(D)?$"), 1402 (instregex "DSS(ALL)?$"), 1403 (instregex "DST(ST)?(T)?(64)?$"), 1404 (instregex "ICBL(C|Q)$"), 1405 (instregex "L(W|H|B)EPX$"), 1406 (instregex "ST(W|H|B)EPX$"), 1407 (instregex "(L|ST)FDEPX$"), 1408 (instregex "M(T|F)SR(IN)?$"), 1409 (instregex "M(T|F)DCR$"), 1410 (instregex "NOP_GT_PWR(6|7)$"), 1411 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1412 (instregex "WRTEE(I)?$"), 1413 (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"), 1414 ATTN, 1415 CLRBHRB, 1416 MFBHRBE, 1417 MBAR, 1418 MSYNC, 1419 SLBSYNC, 1420 SLBFEE_rec, 1421 NAP, 1422 STOP, 1423 TRAP, 1424 RFCI, 1425 RFDI, 1426 RFMCI, 1427 SC, 1428 DCBA, 1429 DCBI, 1430 DCCCI, 1431 ICCCI, 1432 ADDEX, 1433 ADDEX8 1434)> { let Unsupported = 1; } 1435