1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "POPCNTB8$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 145 (instregex "NAND(8)?(_rec)?$"), 146 (instregex "AND(C)?(8)?(_rec)?$"), 147 (instregex "NOR(8)?(_rec)?$"), 148 (instregex "OR(C)?(8)?(_rec)?$"), 149 (instregex "EQV(8)?(_rec)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?(O)?$"), 153 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 154 (instregex "LA(8)?$"), 155 COPY, 156 MCRF, 157 MCRXRX, 158 XSNABSDP, 159 XSNABSDPs, 160 XSXEXPDP, 161 XSABSDP, 162 XSNEGDP, 163 XSCPSGNDP, 164 MFVSRWZ, 165 MFVRWZ, 166 EXTSWSLI, 167 SRADI_32, 168 RLDIC, 169 RFEBB, 170 TBEGIN, 171 TRECHKPT, 172 NOP, 173 WAIT 174)>; 175 176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 177// single slice. However, since it is Restricted, it requires all 3 dispatches 178// (DISP) for that superslice. 179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 180 (instrs 181 (instregex "RLDC(L|R)$"), 182 (instregex "RLWIMI(8)?$"), 183 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 184 (instregex "M(F|T)OCRF(8)?$"), 185 (instregex "CR(6)?(UN)?SET$"), 186 (instregex "CR(N)?(OR|AND)(C)?$"), 187 (instregex "S(L|R)W(8)?$"), 188 (instregex "RLW(INM|NM)(8)?$"), 189 (instregex "F(N)?ABS(D|S)$"), 190 (instregex "FNEG(D|S)$"), 191 (instregex "FCPSGN(D|S)$"), 192 (instregex "SRAW(I)?$"), 193 (instregex "ISEL(8)?$"), 194 RLDIMI, 195 XSIEXPDP, 196 FMR, 197 CREQV, 198 CRXOR, 199 TRECLAIM, 200 TSR, 201 TABORT 202)>; 203 204// Three cycle ALU vector operation that uses an entire superslice. 205// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 206// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 207def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 208 (instrs 209 (instregex "M(T|F)VSCR$"), 210 (instregex "VCMPNEZ(B|H|W)$"), 211 (instregex "VCMPEQU(B|H|W|D)$"), 212 (instregex "VCMPNE(B|H|W)$"), 213 (instregex "VABSDU(B|H|W)$"), 214 (instregex "VADDU(B|H|W)S$"), 215 (instregex "VAVG(S|U)(B|H|W)$"), 216 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 217 (instregex "VCMPBFP(_rec)?$"), 218 (instregex "VC(L|T)Z(B|H|W|D)$"), 219 (instregex "VADDS(B|H|W)S$"), 220 (instregex "V(MIN|MAX)FP$"), 221 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 222 VBPERMD, 223 VADDCUW, 224 VPOPCNTW, 225 VPOPCNTD, 226 VPRTYBD, 227 VPRTYBW, 228 VSHASIGMAD, 229 VSHASIGMAW, 230 VSUBSBS, 231 VSUBSHS, 232 VSUBSWS, 233 VSUBUBS, 234 VSUBUHS, 235 VSUBUWS, 236 VSUBCUW, 237 VCMPGTSB, 238 VCMPGTSB_rec, 239 VCMPGTSD, 240 VCMPGTSD_rec, 241 VCMPGTSH, 242 VCMPGTSH_rec, 243 VCMPGTSW, 244 VCMPGTSW_rec, 245 VCMPGTUB, 246 VCMPGTUB_rec, 247 VCMPGTUD, 248 VCMPGTUD_rec, 249 VCMPGTUH, 250 VCMPGTUH_rec, 251 VCMPGTUW, 252 VCMPGTUW_rec, 253 VCMPNEB_rec, 254 VCMPNEH_rec, 255 VCMPNEW_rec, 256 VCMPNEZB_rec, 257 VCMPNEZH_rec, 258 VCMPNEZW_rec, 259 VCMPEQUB_rec, 260 VCMPEQUD_rec, 261 VCMPEQUH_rec, 262 VCMPEQUW_rec, 263 XVCMPEQDP, 264 XVCMPEQDP_rec, 265 XVCMPEQSP, 266 XVCMPEQSP_rec, 267 XVCMPGEDP, 268 XVCMPGEDP_rec, 269 XVCMPGESP, 270 XVCMPGESP_rec, 271 XVCMPGTDP, 272 XVCMPGTDP_rec, 273 XVCMPGTSP, 274 XVCMPGTSP_rec, 275 XVMAXDP, 276 XVMAXSP, 277 XVMINDP, 278 XVMINSP, 279 XVTDIVDP, 280 XVTDIVSP, 281 XVTSQRTDP, 282 XVTSQRTSP, 283 XVTSTDCDP, 284 XVTSTDCSP, 285 XVXSIGDP, 286 XVXSIGSP 287)>; 288 289// 7 cycle DP vector operation that uses an entire superslice. 290// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 291// EXECO) and all three dispatches (DISP) to the given superslice. 292def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 293 (instrs 294 VADDFP, 295 VCTSXS, 296 VCTSXS_0, 297 VCTUXS, 298 VCTUXS_0, 299 VEXPTEFP, 300 VLOGEFP, 301 VMADDFP, 302 VMHADDSHS, 303 VNMSUBFP, 304 VREFP, 305 VRFIM, 306 VRFIN, 307 VRFIP, 308 VRFIZ, 309 VRSQRTEFP, 310 VSUBFP, 311 XVADDDP, 312 XVADDSP, 313 XVCVDPSP, 314 XVCVDPSXDS, 315 XVCVDPSXWS, 316 XVCVDPUXDS, 317 XVCVDPUXWS, 318 XVCVHPSP, 319 XVCVSPDP, 320 XVCVSPHP, 321 XVCVSPSXDS, 322 XVCVSPSXWS, 323 XVCVSPUXDS, 324 XVCVSPUXWS, 325 XVCVSXDDP, 326 XVCVSXDSP, 327 XVCVSXWDP, 328 XVCVSXWSP, 329 XVCVUXDDP, 330 XVCVUXDSP, 331 XVCVUXWDP, 332 XVCVUXWSP, 333 XVMADDADP, 334 XVMADDASP, 335 XVMADDMDP, 336 XVMADDMSP, 337 XVMSUBADP, 338 XVMSUBASP, 339 XVMSUBMDP, 340 XVMSUBMSP, 341 XVMULDP, 342 XVMULSP, 343 XVNMADDADP, 344 XVNMADDASP, 345 XVNMADDMDP, 346 XVNMADDMSP, 347 XVNMSUBADP, 348 XVNMSUBASP, 349 XVNMSUBMDP, 350 XVNMSUBMSP, 351 XVRDPI, 352 XVRDPIC, 353 XVRDPIM, 354 XVRDPIP, 355 XVRDPIZ, 356 XVREDP, 357 XVRESP, 358 XVRSPI, 359 XVRSPIC, 360 XVRSPIM, 361 XVRSPIP, 362 XVRSPIZ, 363 XVRSQRTEDP, 364 XVRSQRTESP, 365 XVSUBDP, 366 XVSUBSP, 367 VCFSX, 368 VCFSX_0, 369 VCFUX, 370 VCFUX_0, 371 VMHRADDSHS, 372 VMLADDUHM, 373 VMSUMMBM, 374 VMSUMSHM, 375 VMSUMSHS, 376 VMSUMUBM, 377 VMSUMUHM, 378 VMSUMUDM, 379 VMSUMUHS, 380 VMULESB, 381 VMULESH, 382 VMULESW, 383 VMULEUB, 384 VMULEUH, 385 VMULEUW, 386 VMULOSB, 387 VMULOSH, 388 VMULOSW, 389 VMULOUB, 390 VMULOUH, 391 VMULOUW, 392 VMULUWM, 393 VSUM2SWS, 394 VSUM4SBS, 395 VSUM4SHS, 396 VSUM4UBS, 397 VSUMSWS 398)>; 399 400// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 401// dispatch units for the superslice. 402def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 403 (instrs 404 (instregex "MADD(HD|HDU|LD|LD8)$"), 405 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 406)>; 407 408// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 409// dispatch units for the superslice. 410def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 411 (instrs 412 FRSP, 413 (instregex "FRI(N|P|Z|M)(D|S)$"), 414 (instregex "FRE(S)?$"), 415 (instregex "FADD(S)?$"), 416 (instregex "FMSUB(S)?$"), 417 (instregex "FMADD(S)?$"), 418 (instregex "FSUB(S)?$"), 419 (instregex "FCFID(U)?(S)?$"), 420 (instregex "FCTID(U)?(Z)?$"), 421 (instregex "FCTIW(U)?(Z)?$"), 422 (instregex "FRSQRTE(S)?$"), 423 FNMADDS, 424 FNMADD, 425 FNMSUBS, 426 FNMSUB, 427 FSELD, 428 FSELS, 429 FMULS, 430 FMUL, 431 XSMADDADP, 432 XSMADDASP, 433 XSMADDMDP, 434 XSMADDMSP, 435 XSMSUBADP, 436 XSMSUBASP, 437 XSMSUBMDP, 438 XSMSUBMSP, 439 XSMULDP, 440 XSMULSP, 441 XSNMADDADP, 442 XSNMADDASP, 443 XSNMADDMDP, 444 XSNMADDMSP, 445 XSNMSUBADP, 446 XSNMSUBASP, 447 XSNMSUBMDP, 448 XSNMSUBMSP 449)>; 450 451// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 452// These operations can be done in parallel. The DP is restricted so we need a 453// full 4 dispatches. 454def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 455 DISP_3SLOTS_1C, DISP_1C], 456 (instrs 457 (instregex "FSEL(D|S)_rec$") 458)>; 459 460// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 461def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 462 DISP_3SLOTS_1C, DISP_1C], 463 (instrs 464 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 465)>; 466 467// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 468// These operations must be done sequentially.The DP is restricted so we need a 469// full 4 dispatches. 470def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 471 DISP_3SLOTS_1C, DISP_1C], 472 (instrs 473 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 474 (instregex "FRE(S)?_rec$"), 475 (instregex "FADD(S)?_rec$"), 476 (instregex "FSUB(S)?_rec$"), 477 (instregex "F(N)?MSUB(S)?_rec$"), 478 (instregex "F(N)?MADD(S)?_rec$"), 479 (instregex "FCFID(U)?(S)?_rec$"), 480 (instregex "FCTID(U)?(Z)?_rec$"), 481 (instregex "FCTIW(U)?(Z)?_rec$"), 482 (instregex "FMUL(S)?_rec$"), 483 (instregex "FRSQRTE(S)?_rec$"), 484 FRSP_rec 485)>; 486 487// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 488def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 489 (instrs 490 XSADDDP, 491 XSADDSP, 492 XSCVDPHP, 493 XSCVDPSP, 494 XSCVDPSXDS, 495 XSCVDPSXDSs, 496 XSCVDPSXWS, 497 XSCVDPUXDS, 498 XSCVDPUXDSs, 499 XSCVDPUXWS, 500 XSCVDPSXWSs, 501 XSCVDPUXWSs, 502 XSCVHPDP, 503 XSCVSPDP, 504 XSCVSXDDP, 505 XSCVSXDSP, 506 XSCVUXDDP, 507 XSCVUXDSP, 508 XSRDPI, 509 XSRDPIC, 510 XSRDPIM, 511 XSRDPIP, 512 XSRDPIZ, 513 XSREDP, 514 XSRESP, 515 XSRSQRTEDP, 516 XSRSQRTESP, 517 XSSUBDP, 518 XSSUBSP, 519 XSCVDPSPN, 520 XSRSP 521)>; 522 523// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 524// superslice. That includes both exec pipelines (EXECO, EXECE) and one 525// dispatch. 526def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 527 (instrs 528 (instregex "LVS(L|R)$"), 529 (instregex "VSPLTIS(W|H|B)$"), 530 (instregex "VSPLT(W|H|B)(s)?$"), 531 (instregex "V_SETALLONES(B|H)?$"), 532 (instregex "VEXTRACTU(B|H|W)$"), 533 (instregex "VINSERT(B|H|W|D)$"), 534 MFVSRLD, 535 MTVSRWS, 536 VBPERMQ, 537 VCLZLSBB, 538 VCTZLSBB, 539 VEXTRACTD, 540 VEXTUBLX, 541 VEXTUBRX, 542 VEXTUHLX, 543 VEXTUHRX, 544 VEXTUWLX, 545 VEXTUWRX, 546 VGBBD, 547 VMRGHB, 548 VMRGHH, 549 VMRGHW, 550 VMRGLB, 551 VMRGLH, 552 VMRGLW, 553 VPERM, 554 VPERMR, 555 VPERMXOR, 556 VPKPX, 557 VPKSDSS, 558 VPKSDUS, 559 VPKSHSS, 560 VPKSHUS, 561 VPKSWSS, 562 VPKSWUS, 563 VPKUDUM, 564 VPKUDUS, 565 VPKUHUM, 566 VPKUHUS, 567 VPKUWUM, 568 VPKUWUS, 569 VPRTYBQ, 570 VSL, 571 VSLDOI, 572 VSLO, 573 VSLV, 574 VSR, 575 VSRO, 576 VSRV, 577 VUPKHPX, 578 VUPKHSB, 579 VUPKHSH, 580 VUPKHSW, 581 VUPKLPX, 582 VUPKLSB, 583 VUPKLSH, 584 VUPKLSW, 585 XXBRD, 586 XXBRH, 587 XXBRQ, 588 XXBRW, 589 XXEXTRACTUW, 590 XXINSERTW, 591 XXMRGHW, 592 XXMRGLW, 593 XXPERM, 594 XXPERMR, 595 XXSLDWI, 596 XXSLDWIs, 597 XXSPLTIB, 598 XXSPLTW, 599 XXSPLTWs, 600 XXPERMDI, 601 XXPERMDIs, 602 VADDCUQ, 603 VADDECUQ, 604 VADDEUQM, 605 VADDUQM, 606 VMUL10CUQ, 607 VMUL10ECUQ, 608 VMUL10EUQ, 609 VMUL10UQ, 610 VSUBCUQ, 611 VSUBECUQ, 612 VSUBEUQM, 613 VSUBUQM, 614 XSCMPEXPQP, 615 XSCMPOQP, 616 XSCMPUQP, 617 XSTSTDCQP, 618 XSXSIGQP, 619 BCDCFN_rec, 620 BCDCFZ_rec, 621 BCDCPSGN_rec, 622 BCDCTN_rec, 623 BCDCTZ_rec, 624 BCDSETSGN_rec, 625 BCDS_rec, 626 BCDTRUNC_rec, 627 BCDUS_rec, 628 BCDUTRUNC_rec, 629 BCDADD_rec, 630 BCDSUB_rec 631)>; 632 633// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 634// superslice. That includes both exec pipelines (EXECO, EXECE) and one 635// dispatch. 636def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 637 (instrs 638 BCDSR_rec, 639 XSADDQP, 640 XSADDQPO, 641 XSCVDPQP, 642 XSCVQPDP, 643 XSCVQPDPO, 644 XSCVQPSDZ, 645 XSCVQPSWZ, 646 XSCVQPUDZ, 647 XSCVQPUWZ, 648 XSCVSDQP, 649 XSCVUDQP, 650 XSRQPI, 651 XSRQPIX, 652 XSRQPXP, 653 XSSUBQP, 654 XSSUBQPO 655)>; 656 657// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 658// superslice. That includes both exec pipelines (EXECO, EXECE) and one 659// dispatch. 660def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 661 (instrs 662 BCDCTSQ_rec 663)>; 664 665// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 666// superslice. That includes both exec pipelines (EXECO, EXECE) and one 667// dispatch. 668def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 669 (instrs 670 XSMADDQP, 671 XSMADDQPO, 672 XSMSUBQP, 673 XSMSUBQPO, 674 XSMULQP, 675 XSMULQPO, 676 XSNMADDQP, 677 XSNMADDQPO, 678 XSNMSUBQP, 679 XSNMSUBQPO 680)>; 681 682// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 683// superslice. That includes both exec pipelines (EXECO, EXECE) and one 684// dispatch. 685def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 686 (instrs 687 BCDCFSQ_rec 688)>; 689 690// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 691// superslice. That includes both exec pipelines (EXECO, EXECE) and one 692// dispatch. 693def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 694 (instrs 695 XSDIVQP, 696 XSDIVQPO 697)>; 698 699// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 700// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 701// dispatches. 702def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 703 (instrs 704 XSSQRTQP, 705 XSSQRTQPO 706)>; 707 708// 6 Cycle Load uses a single slice. 709def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 710 (instrs 711 (instregex "LXVL(L)?") 712)>; 713 714// 5 Cycle Load uses a single slice. 715def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 716 (instrs 717 (instregex "LVE(B|H|W)X$"), 718 (instregex "LVX(L)?"), 719 (instregex "LXSI(B|H)ZX$"), 720 LXSDX, 721 LXVB16X, 722 LXVD2X, 723 LXVWSX, 724 LXSIWZX, 725 LXV, 726 LXVX, 727 LXSD, 728 DFLOADf64, 729 XFLOADf64, 730 LIWZX 731)>; 732 733// 4 Cycle Load uses a single slice. 734def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 735 (instrs 736 (instregex "DCB(F|T|ST)(EP)?$"), 737 (instregex "DCBZ(L)?(EP)?$"), 738 (instregex "DCBTST(EP)?$"), 739 (instregex "CP_COPY(8)?$"), 740 (instregex "ICBI(EP)?$"), 741 (instregex "ICBT(LS)?$"), 742 (instregex "LBARX(L)?$"), 743 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 744 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 745 (instregex "LH(A|B)RX(L)?(8)?$"), 746 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 747 (instregex "LWARX(L)?$"), 748 (instregex "LWBRX(8)?$"), 749 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 750 CP_ABORT, 751 DARN, 752 EnforceIEIO, 753 ISYNC, 754 MSGSYNC, 755 TLBSYNC, 756 SYNC, 757 LMW, 758 LSWI 759)>; 760 761// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 762// superslice. 763def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 764 (instrs 765 LFIWZX, 766 LFDX, 767 LFD 768)>; 769 770// Cracked Load Instructions. 771// Load instructions that can be done in parallel. 772def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 773 DISP_PAIR_1C], 774 (instrs 775 SLBIA, 776 SLBIE, 777 SLBMFEE, 778 SLBMFEV, 779 SLBMTE, 780 TLBIEL 781)>; 782 783// Cracked Load Instruction. 784// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 785// operations can be run in parallel. 786def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 787 DISP_PAIR_1C, DISP_PAIR_1C], 788 (instrs 789 (instregex "L(W|H)ZU(X)?(8)?$") 790)>; 791 792// Cracked TEND Instruction. 793// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 794// operations can be run in parallel. 795def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 796 DISP_1C, DISP_1C], 797 (instrs 798 TEND 799)>; 800 801 802// Cracked Store Instruction 803// Consecutive Store and ALU instructions. The store is restricted and requires 804// three dispatches. 805def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 806 DISP_3SLOTS_1C, DISP_1C], 807 (instrs 808 (instregex "ST(B|H|W|D)CX$") 809)>; 810 811// Cracked Load instruction. 812// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 813// operations cannot be done at the same time and so their latencies are added. 814def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 815 DISP_1C, DISP_1C], 816 (instrs 817 (instregex "LHA(X)?(8)?$"), 818 (instregex "CP_PASTE(8)?_rec$"), 819 (instregex "LWA(X)?(_32)?$"), 820 TCHECK 821)>; 822 823// Cracked Restricted Load instruction. 824// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 825// operations cannot be done at the same time and so their latencies are added. 826// Full 6 dispatches are required as this is both cracked and restricted. 827def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 828 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 829 (instrs 830 LFIWAX 831)>; 832 833// Cracked Load instruction. 834// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 835// operations cannot be done at the same time and so their latencies are added. 836// Full 4 dispatches are required as this is a cracked instruction. 837def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 838 (instrs 839 LXSIWAX, 840 LIWAX 841)>; 842 843// Cracked Load instruction. 844// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 845// cycles. The Load and ALU operations cannot be done at the same time and so 846// their latencies are added. 847// Full 6 dispatches are required as this is a restricted instruction. 848def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 849 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 850 (instrs 851 LFSX, 852 LFS 853)>; 854 855// Cracked Load instruction. 856// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 857// operations cannot be done at the same time and so their latencies are added. 858// Full 4 dispatches are required as this is a cracked instruction. 859def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 860 (instrs 861 LXSSP, 862 LXSSPX, 863 XFLOADf32, 864 DFLOADf32 865)>; 866 867// Cracked 3-Way Load Instruction 868// Load with two ALU operations that depend on each other 869def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 870 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 871 (instrs 872 (instregex "LHAU(X)?(8)?$"), 873 LWAUX 874)>; 875 876// Cracked Load that requires the PM resource. 877// Since the Load and the PM cannot be done at the same time the latencies are 878// added. Requires 8 cycles. Since the PM requires the full superslice we need 879// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 880// requires the remaining 1 dispatch. 881def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 882 DISP_1C, DISP_1C], 883 (instrs 884 LXVH8X, 885 LXVDSX, 886 LXVW4X 887)>; 888 889// Single slice Restricted store operation. The restricted operation requires 890// all three dispatches for the superslice. 891def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 892 (instrs 893 (instregex "STF(S|D|IWX|SX|DX)$"), 894 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 895 (instregex "STW(8)?$"), 896 (instregex "(D|X)FSTORE(f32|f64)$"), 897 (instregex "ST(W|H|D)BRX$"), 898 (instregex "ST(B|H|D)(8)?$"), 899 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 900 STIWX, 901 SLBIEG, 902 STMW, 903 STSWI, 904 TLBIE 905)>; 906 907// Vector Store Instruction 908// Requires the whole superslice and therefore requires one dispatch 909// as well as both the Even and Odd exec pipelines. 910def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 911 (instrs 912 (instregex "STVE(B|H|W)X$"), 913 (instregex "STVX(L)?$"), 914 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 915)>; 916 917// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 918// superslice. That includes both exec pipelines (EXECO, EXECE) and two 919// dispatches. 920def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 921 (instrs 922 (instregex "MTCTR(8)?(loop)?$"), 923 (instregex "MTLR(8)?$") 924)>; 925 926// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 927// superslice. That includes both exec pipelines (EXECO, EXECE) and two 928// dispatches. 929def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 930 (instrs 931 (instregex "M(T|F)VRSAVE(v)?$"), 932 (instregex "M(T|F)PMR$"), 933 (instregex "M(T|F)TB(8)?$"), 934 (instregex "MF(SPR|CTR|LR)(8)?$"), 935 (instregex "M(T|F)MSR(D)?$"), 936 (instregex "M(T|F)(U)?DSCR$"), 937 (instregex "MTSPR(8)?$") 938)>; 939 940// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 941// superslice. That includes both exec pipelines (EXECO, EXECE) and two 942// dispatches. 943def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 944 (instrs 945 DIVW, 946 DIVWO, 947 DIVWU, 948 DIVWUO, 949 MODSW 950)>; 951 952// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 953// superslice. That includes both exec pipelines (EXECO, EXECE) and two 954// dispatches. 955def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 956 (instrs 957 DIVWE, 958 DIVWEO, 959 DIVD, 960 DIVDO, 961 DIVWEU, 962 DIVWEUO, 963 DIVDU, 964 DIVDUO, 965 MODSD, 966 MODUD, 967 MODUW 968)>; 969 970// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 971// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 972// dispatches. 973def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 974 (instrs 975 DIVDE, 976 DIVDEO, 977 DIVDEU, 978 DIVDEUO 979)>; 980 981// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 982// and one full superslice for the DIV operation since there is only one DIV per 983// superslice. Latency of DIV plus ALU is 26. 984def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 985 DISP_EVEN_1C, DISP_1C], 986 (instrs 987 (instregex "DIVW(U)?(O)?_rec$") 988)>; 989 990// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 991// and one full superslice for the DIV operation since there is only one DIV per 992// superslice. Latency of DIV plus ALU is 26. 993def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 994 DISP_EVEN_1C, DISP_1C], 995 (instrs 996 DIVD_rec, 997 DIVDO_rec, 998 DIVDU_rec, 999 DIVDUO_rec, 1000 DIVWE_rec, 1001 DIVWEO_rec, 1002 DIVWEU_rec, 1003 DIVWEUO_rec 1004)>; 1005 1006// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1007// and one full superslice for the DIV operation since there is only one DIV per 1008// superslice. Latency of DIV plus ALU is 42. 1009def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1010 DISP_EVEN_1C, DISP_1C], 1011 (instrs 1012 DIVDE_rec, 1013 DIVDEO_rec, 1014 DIVDEU_rec, 1015 DIVDEUO_rec 1016)>; 1017 1018// CR access instructions in _BrMCR, IIC_BrMCRX. 1019 1020// Cracked, restricted, ALU operations. 1021// Here the two ALU ops can actually be done in parallel and therefore the 1022// latencies are not added together. Otherwise this is like having two 1023// instructions running together on two pipelines and 6 dispatches. ALU ops are 1024// 2 cycles each. 1025def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1026 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1027 (instrs 1028 MTCRF, 1029 MTCRF8 1030)>; 1031 1032// Cracked ALU operations. 1033// Here the two ALU ops can actually be done in parallel and therefore the 1034// latencies are not added together. Otherwise this is like having two 1035// instructions running together on two pipelines and 2 dispatches. ALU ops are 1036// 2 cycles each. 1037def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1038 DISP_1C, DISP_1C], 1039 (instrs 1040 (instregex "ADDC(8)?(O)?_rec$"), 1041 (instregex "SUBFC(8)?(O)?_rec$") 1042)>; 1043 1044// Cracked ALU operations. 1045// Two ALU ops can be done in parallel. 1046// One is three cycle ALU the ohter is a two cycle ALU. 1047// One of the ALU ops is restricted the other is not so we have a total of 1048// 5 dispatches. 1049def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1050 DISP_3SLOTS_1C, DISP_1C], 1051 (instrs 1052 (instregex "F(N)?ABS(D|S)_rec$"), 1053 (instregex "FCPSGN(D|S)_rec$"), 1054 (instregex "FNEG(D|S)_rec$"), 1055 FMR_rec 1056)>; 1057 1058// Cracked ALU operations. 1059// Here the two ALU ops can actually be done in parallel and therefore the 1060// latencies are not added together. Otherwise this is like having two 1061// instructions running together on two pipelines and 2 dispatches. 1062// ALU ops are 3 cycles each. 1063def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1064 DISP_1C, DISP_1C], 1065 (instrs 1066 MCRFS 1067)>; 1068 1069// Cracked Restricted ALU operations. 1070// Here the two ALU ops can actually be done in parallel and therefore the 1071// latencies are not added together. Otherwise this is like having two 1072// instructions running together on two pipelines and 6 dispatches. 1073// ALU ops are 3 cycles each. 1074def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1075 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1076 (instrs 1077 (instregex "MTFSF(b|_rec)?$"), 1078 (instregex "MTFSFI(_rec)?$"), 1079 MTFSFIb 1080)>; 1081 1082// Cracked instruction made of two ALU ops. 1083// The two ops cannot be done in parallel. 1084// One of the ALU ops is restricted and takes 3 dispatches. 1085def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1086 DISP_3SLOTS_1C, DISP_1C], 1087 (instrs 1088 (instregex "RLD(I)?C(R|L)_rec$"), 1089 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1090 (instregex "SLW(8)?_rec$"), 1091 (instregex "SRAW(I)?_rec$"), 1092 (instregex "SRW(8)?_rec$"), 1093 RLDICL_32_rec, 1094 RLDIMI_rec 1095)>; 1096 1097// Cracked instruction made of two ALU ops. 1098// The two ops cannot be done in parallel. 1099// Both of the ALU ops are restricted and take 3 dispatches. 1100def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1101 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1102 (instrs 1103 (instregex "MFFS(L|CE|_rec)?$") 1104)>; 1105 1106// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1107// total of 6 cycles. All of the ALU operations are also restricted so each 1108// takes 3 dispatches for a total of 9. 1109def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1110 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1111 (instrs 1112 (instregex "MFCR(8)?$") 1113)>; 1114 1115// Cracked instruction made of two ALU ops. 1116// The two ops cannot be done in parallel. 1117def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1118 (instrs 1119 (instregex "EXTSWSLI_32_64_rec$"), 1120 (instregex "SRAD(I)?_rec$"), 1121 EXTSWSLI_rec, 1122 SLD_rec, 1123 SRD_rec, 1124 RLDIC_rec 1125)>; 1126 1127// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1128def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1129 (instrs 1130 FDIV 1131)>; 1132 1133// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1134def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1135 DISP_3SLOTS_1C, DISP_1C], 1136 (instrs 1137 FDIV_rec 1138)>; 1139 1140// 36 Cycle DP Instruction. 1141// Instruction can be done on a single slice. 1142def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1143 (instrs 1144 XSSQRTDP 1145)>; 1146 1147// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1148def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1149 (instrs 1150 FSQRT 1151)>; 1152 1153// 36 Cycle DP Vector Instruction. 1154def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1155 DISP_1C], 1156 (instrs 1157 XVSQRTDP 1158)>; 1159 1160// 27 Cycle DP Vector Instruction. 1161def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1162 DISP_1C], 1163 (instrs 1164 XVSQRTSP 1165)>; 1166 1167// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1168def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1169 DISP_3SLOTS_1C, DISP_1C], 1170 (instrs 1171 FSQRT_rec 1172)>; 1173 1174// 26 Cycle DP Instruction. 1175def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1176 (instrs 1177 XSSQRTSP 1178)>; 1179 1180// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1181def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1182 (instrs 1183 FSQRTS 1184)>; 1185 1186// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1187def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1188 DISP_3SLOTS_1C, DISP_1C], 1189 (instrs 1190 FSQRTS_rec 1191)>; 1192 1193// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1194def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1195 (instrs 1196 XSDIVDP 1197)>; 1198 1199// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1200def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1201 (instrs 1202 FDIVS 1203)>; 1204 1205// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1206def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1207 DISP_3SLOTS_1C, DISP_1C], 1208 (instrs 1209 FDIVS_rec 1210)>; 1211 1212// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1213def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1214 (instrs 1215 XSDIVSP 1216)>; 1217 1218// 24 Cycle DP Vector Instruction. Takes one full superslice. 1219// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1220// superslice. 1221def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1222 DISP_1C], 1223 (instrs 1224 XVDIVSP 1225)>; 1226 1227// 33 Cycle DP Vector Instruction. Takes one full superslice. 1228// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1229// superslice. 1230def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1231 DISP_1C], 1232 (instrs 1233 XVDIVDP 1234)>; 1235 1236// Instruction cracked into three pieces. One Load and two ALU operations. 1237// The Load and one of the ALU ops cannot be run at the same time and so the 1238// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1239// Both the load and the ALU that depends on it are restricted and so they take 1240// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1241// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1242def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1243 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1244 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1245 (instrs 1246 (instregex "LF(SU|SUX)$") 1247)>; 1248 1249// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1250// the store and so it can be run at the same time as the store. The store is 1251// also restricted. 1252def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1253 DISP_3SLOTS_1C, DISP_1C], 1254 (instrs 1255 (instregex "STF(S|D)U(X)?$"), 1256 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1257)>; 1258 1259// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1260// the load and so it can be run at the same time as the load. 1261def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1262 DISP_PAIR_1C, DISP_PAIR_1C], 1263 (instrs 1264 (instregex "LBZU(X)?(8)?$"), 1265 (instregex "LDU(X)?$") 1266)>; 1267 1268// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1269// the load and so it can be run at the same time as the load. The load is also 1270// restricted. 3 dispatches are from the restricted load while the other two 1271// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1272// is required for the ALU. 1273def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1274 DISP_3SLOTS_1C, DISP_1C], 1275 (instrs 1276 (instregex "LF(DU|DUX)$") 1277)>; 1278 1279// Crypto Instructions 1280 1281// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1282// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1283// dispatch. 1284def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1285 (instrs 1286 (instregex "VPMSUM(B|H|W|D)$"), 1287 (instregex "V(N)?CIPHER(LAST)?$"), 1288 VSBOX 1289)>; 1290 1291// Branch Instructions 1292 1293// Two Cycle Branch 1294def : InstRW<[P9_BR_2C, DISP_BR_1C], 1295 (instrs 1296 (instregex "BCCCTR(L)?(8)?$"), 1297 (instregex "BCCL(A|R|RL)?$"), 1298 (instregex "BCCTR(L)?(8)?(n)?$"), 1299 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1300 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1301 (instregex "BL(_TLS|_NOP)?(_RM)?$"), 1302 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"), 1303 (instregex "BLA(8|8_NOP)?(_RM)?$"), 1304 (instregex "BLR(8|L)?$"), 1305 (instregex "TAILB(A)?(8)?$"), 1306 (instregex "TAILBCTR(8)?$"), 1307 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1308 (instregex "BCLR(L)?(n)?$"), 1309 (instregex "BCTR(L)?(8)?(_RM)?$"), 1310 B, 1311 BA, 1312 BC, 1313 BCC, 1314 BCCA, 1315 BCL, 1316 BCLalways, 1317 BCLn, 1318 BCTRL8_LDinto_toc, 1319 BCTRL_LWZinto_toc, 1320 BCTRL8_LDinto_toc_RM, 1321 BCTRL_LWZinto_toc_RM, 1322 BCn, 1323 CTRL_DEP 1324)>; 1325 1326// Five Cycle Branch with a 2 Cycle ALU Op 1327// Operations must be done consecutively and not in parallel. 1328def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1329 (instrs 1330 ADDPCIS 1331)>; 1332 1333// Special Extracted Instructions For Atomics 1334 1335// Atomic Load 1336def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1337 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1338 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1339 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1340 (instrs 1341 (instregex "L(D|W)AT$") 1342)>; 1343 1344// Atomic Store 1345def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1346 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1347 (instrs 1348 (instregex "ST(D|W)AT$") 1349)>; 1350 1351// Signal Processing Engine (SPE) Instructions 1352// These instructions are not supported on Power 9 1353def : InstRW<[], 1354 (instrs 1355 BRINC, 1356 EVABS, 1357 EVEQV, 1358 EVMRA, 1359 EVNAND, 1360 EVNEG, 1361 (instregex "EVADD(I)?W$"), 1362 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1363 (instregex "EVAND(C)?$"), 1364 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1365 (instregex "EVCNTL(S|Z)W$"), 1366 (instregex "EVDIVW(S|U)$"), 1367 (instregex "EVEXTS(B|H)$"), 1368 (instregex "EVLD(H|W|D)(X)?$"), 1369 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1370 (instregex "EVLWHE(X)?$"), 1371 (instregex "EVLWHO(S|U)(X)?$"), 1372 (instregex "EVLW(H|W)SPLAT(X)?$"), 1373 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1374 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1375 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1376 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1377 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1378 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1379 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1380 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1381 (instregex "EVMWHUMI(A)?$"), 1382 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1383 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1384 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1385 (instregex "EVMWSSF(A|AA|AN)?$"), 1386 (instregex "EVMWUMI(A|AA|AN)?$"), 1387 (instregex "EV(N|X)?OR(C)?$"), 1388 (instregex "EVR(LW|LWI|NDW)$"), 1389 (instregex "EVSLW(I)?$"), 1390 (instregex "EVSPLAT(F)?I$"), 1391 (instregex "EVSRW(I)?(S|U)$"), 1392 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1393 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1394 (instregex "EVSUB(I)?FW$") 1395)> { let Unsupported = 1; } 1396 1397// General Instructions without scheduling support. 1398def : InstRW<[], 1399 (instrs 1400 (instregex "(H)?RFI(D)?$"), 1401 (instregex "DSS(ALL)?$"), 1402 (instregex "DST(ST)?(T)?(64)?$"), 1403 (instregex "ICBL(C|Q)$"), 1404 (instregex "L(W|H|B)EPX$"), 1405 (instregex "ST(W|H|B)EPX$"), 1406 (instregex "(L|ST)FDEPX$"), 1407 (instregex "M(T|F)SR(IN)?$"), 1408 (instregex "M(T|F)DCR$"), 1409 (instregex "NOP_GT_PWR(6|7)$"), 1410 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1411 (instregex "WRTEE(I)?$"), 1412 (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"), 1413 ATTN, 1414 CLRBHRB, 1415 MFBHRBE, 1416 MBAR, 1417 MSYNC, 1418 SLBSYNC, 1419 SLBFEE_rec, 1420 NAP, 1421 STOP, 1422 TRAP, 1423 RFCI, 1424 RFDI, 1425 RFMCI, 1426 SC, 1427 DCBA, 1428 DCBI, 1429 DCCCI, 1430 ICCCI, 1431 ADDEX, 1432 ADDEX8 1433)> { let Unsupported = 1; } 1434