1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "ADD(I|IS)?(8)?$"), 142 (instregex "LI(S)?(8)?$"), 143 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 144 (instregex "NAND(8)?(_rec)?$"), 145 (instregex "AND(C)?(8)?(_rec)?$"), 146 (instregex "NOR(8)?(_rec)?$"), 147 (instregex "OR(C)?(8)?(_rec)?$"), 148 (instregex "EQV(8)?(_rec)?$"), 149 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 150 (instregex "ADD(4|8)(TLS)?(_)?$"), 151 (instregex "NEG(8)?(O)?$"), 152 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 153 COPY, 154 MCRF, 155 MCRXRX, 156 XSNABSDP, 157 XSXEXPDP, 158 XSABSDP, 159 XSNEGDP, 160 XSCPSGNDP, 161 MFVSRWZ, 162 MFVRWZ, 163 EXTSWSLI, 164 SRADI_32, 165 RLDIC, 166 RFEBB, 167 LA, 168 TBEGIN, 169 TRECHKPT, 170 NOP, 171 WAIT 172)>; 173 174// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 175// single slice. However, since it is Restricted, it requires all 3 dispatches 176// (DISP) for that superslice. 177def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 178 (instrs 179 (instregex "RLDC(L|R)$"), 180 (instregex "RLWIMI(8)?$"), 181 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 182 (instregex "M(F|T)OCRF(8)?$"), 183 (instregex "CR(6)?(UN)?SET$"), 184 (instregex "CR(N)?(OR|AND)(C)?$"), 185 (instregex "S(L|R)W(8)?$"), 186 (instregex "RLW(INM|NM)(8)?$"), 187 (instregex "F(N)?ABS(D|S)$"), 188 (instregex "FNEG(D|S)$"), 189 (instregex "FCPSGN(D|S)$"), 190 (instregex "SRAW(I)?$"), 191 (instregex "ISEL(8)?$"), 192 RLDIMI, 193 XSIEXPDP, 194 FMR, 195 CREQV, 196 CRXOR, 197 TRECLAIM, 198 TSR, 199 TABORT 200)>; 201 202// Three cycle ALU vector operation that uses an entire superslice. 203// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 204// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 205def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 206 (instrs 207 (instregex "M(T|F)VSCR$"), 208 (instregex "VCMPNEZ(B|H|W)$"), 209 (instregex "VCMPEQU(B|H|W|D)$"), 210 (instregex "VCMPNE(B|H|W)$"), 211 (instregex "VABSDU(B|H|W)$"), 212 (instregex "VADDU(B|H|W)S$"), 213 (instregex "VAVG(S|U)(B|H|W)$"), 214 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 215 (instregex "VCMPBFP(_rec)?$"), 216 (instregex "VC(L|T)Z(B|H|W|D)$"), 217 (instregex "VADDS(B|H|W)S$"), 218 (instregex "V(MIN|MAX)FP$"), 219 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 220 VBPERMD, 221 VADDCUW, 222 VPOPCNTW, 223 VPOPCNTD, 224 VPRTYBD, 225 VPRTYBW, 226 VSHASIGMAD, 227 VSHASIGMAW, 228 VSUBSBS, 229 VSUBSHS, 230 VSUBSWS, 231 VSUBUBS, 232 VSUBUHS, 233 VSUBUWS, 234 VSUBCUW, 235 VCMPGTSB, 236 VCMPGTSB_rec, 237 VCMPGTSD, 238 VCMPGTSD_rec, 239 VCMPGTSH, 240 VCMPGTSH_rec, 241 VCMPGTSW, 242 VCMPGTSW_rec, 243 VCMPGTUB, 244 VCMPGTUB_rec, 245 VCMPGTUD, 246 VCMPGTUD_rec, 247 VCMPGTUH, 248 VCMPGTUH_rec, 249 VCMPGTUW, 250 VCMPGTUW_rec, 251 VCMPNEB_rec, 252 VCMPNEH_rec, 253 VCMPNEW_rec, 254 VCMPNEZB_rec, 255 VCMPNEZH_rec, 256 VCMPNEZW_rec, 257 VCMPEQUB_rec, 258 VCMPEQUD_rec, 259 VCMPEQUH_rec, 260 VCMPEQUW_rec, 261 XVCMPEQDP, 262 XVCMPEQDP_rec, 263 XVCMPEQSP, 264 XVCMPEQSP_rec, 265 XVCMPGEDP, 266 XVCMPGEDP_rec, 267 XVCMPGESP, 268 XVCMPGESP_rec, 269 XVCMPGTDP, 270 XVCMPGTDP_rec, 271 XVCMPGTSP, 272 XVCMPGTSP_rec, 273 XVMAXDP, 274 XVMAXSP, 275 XVMINDP, 276 XVMINSP, 277 XVTDIVDP, 278 XVTDIVSP, 279 XVTSQRTDP, 280 XVTSQRTSP, 281 XVTSTDCDP, 282 XVTSTDCSP, 283 XVXSIGDP, 284 XVXSIGSP 285)>; 286 287// 7 cycle DP vector operation that uses an entire superslice. 288// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 289// EXECO) and all three dispatches (DISP) to the given superslice. 290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 291 (instrs 292 VADDFP, 293 VCTSXS, 294 VCTSXS_0, 295 VCTUXS, 296 VCTUXS_0, 297 VEXPTEFP, 298 VLOGEFP, 299 VMADDFP, 300 VMHADDSHS, 301 VNMSUBFP, 302 VREFP, 303 VRFIM, 304 VRFIN, 305 VRFIP, 306 VRFIZ, 307 VRSQRTEFP, 308 VSUBFP, 309 XVADDDP, 310 XVADDSP, 311 XVCVDPSP, 312 XVCVDPSXDS, 313 XVCVDPSXWS, 314 XVCVDPUXDS, 315 XVCVDPUXWS, 316 XVCVHPSP, 317 XVCVSPDP, 318 XVCVSPHP, 319 XVCVSPSXDS, 320 XVCVSPSXWS, 321 XVCVSPUXDS, 322 XVCVSPUXWS, 323 XVCVSXDDP, 324 XVCVSXDSP, 325 XVCVSXWDP, 326 XVCVSXWSP, 327 XVCVUXDDP, 328 XVCVUXDSP, 329 XVCVUXWDP, 330 XVCVUXWSP, 331 XVMADDADP, 332 XVMADDASP, 333 XVMADDMDP, 334 XVMADDMSP, 335 XVMSUBADP, 336 XVMSUBASP, 337 XVMSUBMDP, 338 XVMSUBMSP, 339 XVMULDP, 340 XVMULSP, 341 XVNMADDADP, 342 XVNMADDASP, 343 XVNMADDMDP, 344 XVNMADDMSP, 345 XVNMSUBADP, 346 XVNMSUBASP, 347 XVNMSUBMDP, 348 XVNMSUBMSP, 349 XVRDPI, 350 XVRDPIC, 351 XVRDPIM, 352 XVRDPIP, 353 XVRDPIZ, 354 XVREDP, 355 XVRESP, 356 XVRSPI, 357 XVRSPIC, 358 XVRSPIM, 359 XVRSPIP, 360 XVRSPIZ, 361 XVRSQRTEDP, 362 XVRSQRTESP, 363 XVSUBDP, 364 XVSUBSP, 365 VCFSX, 366 VCFSX_0, 367 VCFUX, 368 VCFUX_0, 369 VMHRADDSHS, 370 VMLADDUHM, 371 VMSUMMBM, 372 VMSUMSHM, 373 VMSUMSHS, 374 VMSUMUBM, 375 VMSUMUHM, 376 VMSUMUDM, 377 VMSUMUHS, 378 VMULESB, 379 VMULESH, 380 VMULESW, 381 VMULEUB, 382 VMULEUH, 383 VMULEUW, 384 VMULOSB, 385 VMULOSH, 386 VMULOSW, 387 VMULOUB, 388 VMULOUH, 389 VMULOUW, 390 VMULUWM, 391 VSUM2SWS, 392 VSUM4SBS, 393 VSUM4SHS, 394 VSUM4UBS, 395 VSUMSWS 396)>; 397 398// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 399// dispatch units for the superslice. 400def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 401 (instrs 402 (instregex "MADD(HD|HDU|LD|LD8)$"), 403 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 404)>; 405 406// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 407// dispatch units for the superslice. 408def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 409 (instrs 410 FRSP, 411 (instregex "FRI(N|P|Z|M)(D|S)$"), 412 (instregex "FRE(S)?$"), 413 (instregex "FADD(S)?$"), 414 (instregex "FMSUB(S)?$"), 415 (instregex "FMADD(S)?$"), 416 (instregex "FSUB(S)?$"), 417 (instregex "FCFID(U)?(S)?$"), 418 (instregex "FCTID(U)?(Z)?$"), 419 (instregex "FCTIW(U)?(Z)?$"), 420 (instregex "FRSQRTE(S)?$"), 421 FNMADDS, 422 FNMADD, 423 FNMSUBS, 424 FNMSUB, 425 FSELD, 426 FSELS, 427 FMULS, 428 FMUL, 429 XSMADDADP, 430 XSMADDASP, 431 XSMADDMDP, 432 XSMADDMSP, 433 XSMSUBADP, 434 XSMSUBASP, 435 XSMSUBMDP, 436 XSMSUBMSP, 437 XSMULDP, 438 XSMULSP, 439 XSNMADDADP, 440 XSNMADDASP, 441 XSNMADDMDP, 442 XSNMADDMSP, 443 XSNMSUBADP, 444 XSNMSUBASP, 445 XSNMSUBMDP, 446 XSNMSUBMSP 447)>; 448 449// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 450// These operations can be done in parallel. The DP is restricted so we need a 451// full 4 dispatches. 452def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 453 DISP_3SLOTS_1C, DISP_1C], 454 (instrs 455 (instregex "FSEL(D|S)_rec$") 456)>; 457 458// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 459def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 460 DISP_3SLOTS_1C, DISP_1C], 461 (instrs 462 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 463)>; 464 465// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 466// These operations must be done sequentially.The DP is restricted so we need a 467// full 4 dispatches. 468def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 469 DISP_3SLOTS_1C, DISP_1C], 470 (instrs 471 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 472 (instregex "FRE(S)?_rec$"), 473 (instregex "FADD(S)?_rec$"), 474 (instregex "FSUB(S)?_rec$"), 475 (instregex "F(N)?MSUB(S)?_rec$"), 476 (instregex "F(N)?MADD(S)?_rec$"), 477 (instregex "FCFID(U)?(S)?_rec$"), 478 (instregex "FCTID(U)?(Z)?_rec$"), 479 (instregex "FCTIW(U)?(Z)?_rec$"), 480 (instregex "FMUL(S)?_rec$"), 481 (instregex "FRSQRTE(S)?_rec$"), 482 FRSP_rec 483)>; 484 485// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 486def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 487 (instrs 488 XSADDDP, 489 XSADDSP, 490 XSCVDPHP, 491 XSCVDPSP, 492 XSCVDPSXDS, 493 XSCVDPSXDSs, 494 XSCVDPSXWS, 495 XSCVDPUXDS, 496 XSCVDPUXDSs, 497 XSCVDPUXWS, 498 XSCVDPSXWSs, 499 XSCVDPUXWSs, 500 XSCVHPDP, 501 XSCVSPDP, 502 XSCVSXDDP, 503 XSCVSXDSP, 504 XSCVUXDDP, 505 XSCVUXDSP, 506 XSRDPI, 507 XSRDPIC, 508 XSRDPIM, 509 XSRDPIP, 510 XSRDPIZ, 511 XSREDP, 512 XSRESP, 513 XSRSQRTEDP, 514 XSRSQRTESP, 515 XSSUBDP, 516 XSSUBSP, 517 XSCVDPSPN, 518 XSRSP 519)>; 520 521// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 522// superslice. That includes both exec pipelines (EXECO, EXECE) and one 523// dispatch. 524def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 525 (instrs 526 (instregex "LVS(L|R)$"), 527 (instregex "VSPLTIS(W|H|B)$"), 528 (instregex "VSPLT(W|H|B)(s)?$"), 529 (instregex "V_SETALLONES(B|H)?$"), 530 (instregex "VEXTRACTU(B|H|W)$"), 531 (instregex "VINSERT(B|H|W|D)$"), 532 MFVSRLD, 533 MTVSRWS, 534 VBPERMQ, 535 VCLZLSBB, 536 VCTZLSBB, 537 VEXTRACTD, 538 VEXTUBLX, 539 VEXTUBRX, 540 VEXTUHLX, 541 VEXTUHRX, 542 VEXTUWLX, 543 VEXTUWRX, 544 VGBBD, 545 VMRGHB, 546 VMRGHH, 547 VMRGHW, 548 VMRGLB, 549 VMRGLH, 550 VMRGLW, 551 VPERM, 552 VPERMR, 553 VPERMXOR, 554 VPKPX, 555 VPKSDSS, 556 VPKSDUS, 557 VPKSHSS, 558 VPKSHUS, 559 VPKSWSS, 560 VPKSWUS, 561 VPKUDUM, 562 VPKUDUS, 563 VPKUHUM, 564 VPKUHUS, 565 VPKUWUM, 566 VPKUWUS, 567 VPRTYBQ, 568 VSL, 569 VSLDOI, 570 VSLO, 571 VSLV, 572 VSR, 573 VSRO, 574 VSRV, 575 VUPKHPX, 576 VUPKHSB, 577 VUPKHSH, 578 VUPKHSW, 579 VUPKLPX, 580 VUPKLSB, 581 VUPKLSH, 582 VUPKLSW, 583 XXBRD, 584 XXBRH, 585 XXBRQ, 586 XXBRW, 587 XXEXTRACTUW, 588 XXINSERTW, 589 XXMRGHW, 590 XXMRGLW, 591 XXPERM, 592 XXPERMR, 593 XXSLDWI, 594 XXSLDWIs, 595 XXSPLTIB, 596 XXSPLTW, 597 XXSPLTWs, 598 XXPERMDI, 599 XXPERMDIs, 600 VADDCUQ, 601 VADDECUQ, 602 VADDEUQM, 603 VADDUQM, 604 VMUL10CUQ, 605 VMUL10ECUQ, 606 VMUL10EUQ, 607 VMUL10UQ, 608 VSUBCUQ, 609 VSUBECUQ, 610 VSUBEUQM, 611 VSUBUQM, 612 XSCMPEXPQP, 613 XSCMPOQP, 614 XSCMPUQP, 615 XSTSTDCQP, 616 XSXSIGQP, 617 BCDCFN_rec, 618 BCDCFZ_rec, 619 BCDCPSGN_rec, 620 BCDCTN_rec, 621 BCDCTZ_rec, 622 BCDSETSGN_rec, 623 BCDS_rec, 624 BCDTRUNC_rec, 625 BCDUS_rec, 626 BCDUTRUNC_rec 627)>; 628 629// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 630// superslice. That includes both exec pipelines (EXECO, EXECE) and one 631// dispatch. 632def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 633 (instrs 634 BCDSR_rec, 635 XSADDQP, 636 XSADDQPO, 637 XSCVDPQP, 638 XSCVQPDP, 639 XSCVQPDPO, 640 XSCVQPSDZ, 641 XSCVQPSWZ, 642 XSCVQPUDZ, 643 XSCVQPUWZ, 644 XSCVSDQP, 645 XSCVUDQP, 646 XSRQPI, 647 XSRQPIX, 648 XSRQPXP, 649 XSSUBQP, 650 XSSUBQPO 651)>; 652 653// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 654// superslice. That includes both exec pipelines (EXECO, EXECE) and one 655// dispatch. 656def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 657 (instrs 658 BCDCTSQ_rec 659)>; 660 661// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 662// superslice. That includes both exec pipelines (EXECO, EXECE) and one 663// dispatch. 664def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 665 (instrs 666 XSMADDQP, 667 XSMADDQPO, 668 XSMSUBQP, 669 XSMSUBQPO, 670 XSMULQP, 671 XSMULQPO, 672 XSNMADDQP, 673 XSNMADDQPO, 674 XSNMSUBQP, 675 XSNMSUBQPO 676)>; 677 678// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 679// superslice. That includes both exec pipelines (EXECO, EXECE) and one 680// dispatch. 681def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 682 (instrs 683 BCDCFSQ_rec 684)>; 685 686// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 687// superslice. That includes both exec pipelines (EXECO, EXECE) and one 688// dispatch. 689def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 690 (instrs 691 XSDIVQP, 692 XSDIVQPO 693)>; 694 695// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 696// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 697// dispatches. 698def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 699 (instrs 700 XSSQRTQP, 701 XSSQRTQPO 702)>; 703 704// 6 Cycle Load uses a single slice. 705def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 706 (instrs 707 (instregex "LXVL(L)?") 708)>; 709 710// 5 Cycle Load uses a single slice. 711def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 712 (instrs 713 (instregex "LVE(B|H|W)X$"), 714 (instregex "LVX(L)?"), 715 (instregex "LXSI(B|H)ZX$"), 716 LXSDX, 717 LXVB16X, 718 LXVD2X, 719 LXVWSX, 720 LXSIWZX, 721 LXV, 722 LXVX, 723 LXSD, 724 DFLOADf64, 725 XFLOADf64, 726 LIWZX 727)>; 728 729// 4 Cycle Load uses a single slice. 730def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 731 (instrs 732 (instregex "DCB(F|T|ST)(EP)?$"), 733 (instregex "DCBZ(L)?(EP)?$"), 734 (instregex "DCBTST(EP)?$"), 735 (instregex "CP_COPY(8)?$"), 736 (instregex "CP_PASTE(8)?$"), 737 (instregex "ICBI(EP)?$"), 738 (instregex "ICBT(LS)?$"), 739 (instregex "LBARX(L)?$"), 740 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 741 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 742 (instregex "LH(A|B)RX(L)?(8)?$"), 743 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 744 (instregex "LWARX(L)?$"), 745 (instregex "LWBRX(8)?$"), 746 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 747 CP_ABORT, 748 DARN, 749 EnforceIEIO, 750 ISYNC, 751 MSGSYNC, 752 TLBSYNC, 753 SYNC, 754 LMW, 755 LSWI 756)>; 757 758// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 759// superslice. 760def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 761 (instrs 762 LFIWZX, 763 LFDX, 764 LFD 765)>; 766 767// Cracked Load Instructions. 768// Load instructions that can be done in parallel. 769def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 770 DISP_PAIR_1C], 771 (instrs 772 SLBIA, 773 SLBIE, 774 SLBMFEE, 775 SLBMFEV, 776 SLBMTE, 777 TLBIEL 778)>; 779 780// Cracked Load Instruction. 781// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 782// operations can be run in parallel. 783def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 784 DISP_PAIR_1C, DISP_PAIR_1C], 785 (instrs 786 (instregex "L(W|H)ZU(X)?(8)?$") 787)>; 788 789// Cracked TEND Instruction. 790// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 791// operations can be run in parallel. 792def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 793 DISP_1C, DISP_1C], 794 (instrs 795 TEND 796)>; 797 798 799// Cracked Store Instruction 800// Consecutive Store and ALU instructions. The store is restricted and requires 801// three dispatches. 802def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 803 DISP_3SLOTS_1C, DISP_1C], 804 (instrs 805 (instregex "ST(B|H|W|D)CX$") 806)>; 807 808// Cracked Load Instruction. 809// Two consecutive load operations for a total of 8 cycles. 810def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 811 DISP_1C, DISP_1C], 812 (instrs 813 LDMX 814)>; 815 816// Cracked Load instruction. 817// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 818// operations cannot be done at the same time and so their latencies are added. 819def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 820 DISP_1C, DISP_1C], 821 (instrs 822 (instregex "LHA(X)?(8)?$"), 823 (instregex "CP_PASTE(8)?_rec$"), 824 (instregex "LWA(X)?(_32)?$"), 825 TCHECK 826)>; 827 828// Cracked Restricted Load instruction. 829// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 830// operations cannot be done at the same time and so their latencies are added. 831// Full 6 dispatches are required as this is both cracked and restricted. 832def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 833 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 834 (instrs 835 LFIWAX 836)>; 837 838// Cracked Load instruction. 839// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 840// operations cannot be done at the same time and so their latencies are added. 841// Full 4 dispatches are required as this is a cracked instruction. 842def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 843 (instrs 844 LXSIWAX, 845 LIWAX 846)>; 847 848// Cracked Load instruction. 849// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 850// cycles. The Load and ALU operations cannot be done at the same time and so 851// their latencies are added. 852// Full 6 dispatches are required as this is a restricted instruction. 853def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 854 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 855 (instrs 856 LFSX, 857 LFS 858)>; 859 860// Cracked Load instruction. 861// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 862// operations cannot be done at the same time and so their latencies are added. 863// Full 4 dispatches are required as this is a cracked instruction. 864def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 865 (instrs 866 LXSSP, 867 LXSSPX, 868 XFLOADf32, 869 DFLOADf32 870)>; 871 872// Cracked 3-Way Load Instruction 873// Load with two ALU operations that depend on each other 874def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 875 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 876 (instrs 877 (instregex "LHAU(X)?(8)?$"), 878 LWAUX 879)>; 880 881// Cracked Load that requires the PM resource. 882// Since the Load and the PM cannot be done at the same time the latencies are 883// added. Requires 8 cycles. Since the PM requires the full superslice we need 884// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 885// requires the remaining 1 dispatch. 886def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 887 DISP_1C, DISP_1C], 888 (instrs 889 LXVH8X, 890 LXVDSX, 891 LXVW4X 892)>; 893 894// Single slice Restricted store operation. The restricted operation requires 895// all three dispatches for the superslice. 896def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 897 (instrs 898 (instregex "STF(S|D|IWX|SX|DX)$"), 899 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 900 (instregex "STW(8)?$"), 901 (instregex "(D|X)FSTORE(f32|f64)$"), 902 (instregex "ST(W|H|D)BRX$"), 903 (instregex "ST(B|H|D)(8)?$"), 904 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 905 STIWX, 906 SLBIEG, 907 STMW, 908 STSWI, 909 TLBIE 910)>; 911 912// Vector Store Instruction 913// Requires the whole superslice and therefore requires one dispatch 914// as well as both the Even and Odd exec pipelines. 915def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 916 (instrs 917 (instregex "STVE(B|H|W)X$"), 918 (instregex "STVX(L)?$"), 919 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 920)>; 921 922// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 923// superslice. That includes both exec pipelines (EXECO, EXECE) and two 924// dispatches. 925def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 926 (instrs 927 (instregex "MTCTR(8)?(loop)?$"), 928 (instregex "MTLR(8)?$") 929)>; 930 931// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 932// superslice. That includes both exec pipelines (EXECO, EXECE) and two 933// dispatches. 934def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 935 (instrs 936 (instregex "M(T|F)VRSAVE(v)?$"), 937 (instregex "M(T|F)PMR$"), 938 (instregex "M(T|F)TB(8)?$"), 939 (instregex "MF(SPR|CTR|LR)(8)?$"), 940 (instregex "M(T|F)MSR(D)?$"), 941 (instregex "MTSPR(8)?$") 942)>; 943 944// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 945// superslice. That includes both exec pipelines (EXECO, EXECE) and two 946// dispatches. 947def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 948 (instrs 949 DIVW, 950 DIVWO, 951 DIVWU, 952 DIVWUO, 953 MODSW 954)>; 955 956// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 957// superslice. That includes both exec pipelines (EXECO, EXECE) and two 958// dispatches. 959def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 960 (instrs 961 DIVWE, 962 DIVWEO, 963 DIVD, 964 DIVDO, 965 DIVWEU, 966 DIVWEUO, 967 DIVDU, 968 DIVDUO, 969 MODSD, 970 MODUD, 971 MODUW 972)>; 973 974// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 975// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 976// dispatches. 977def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 978 (instrs 979 DIVDE, 980 DIVDEO, 981 DIVDEU, 982 DIVDEUO 983)>; 984 985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 986// and one full superslice for the DIV operation since there is only one DIV per 987// superslice. Latency of DIV plus ALU is 26. 988def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 989 DISP_EVEN_1C, DISP_1C], 990 (instrs 991 (instregex "DIVW(U)?(O)?_rec$") 992)>; 993 994// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 995// and one full superslice for the DIV operation since there is only one DIV per 996// superslice. Latency of DIV plus ALU is 26. 997def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 998 DISP_EVEN_1C, DISP_1C], 999 (instrs 1000 DIVD_rec, 1001 DIVDO_rec, 1002 DIVDU_rec, 1003 DIVDUO_rec, 1004 DIVWE_rec, 1005 DIVWEO_rec, 1006 DIVWEU_rec, 1007 DIVWEUO_rec 1008)>; 1009 1010// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1011// and one full superslice for the DIV operation since there is only one DIV per 1012// superslice. Latency of DIV plus ALU is 42. 1013def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1014 DISP_EVEN_1C, DISP_1C], 1015 (instrs 1016 DIVDE_rec, 1017 DIVDEO_rec, 1018 DIVDEU_rec, 1019 DIVDEUO_rec 1020)>; 1021 1022// CR access instructions in _BrMCR, IIC_BrMCRX. 1023 1024// Cracked, restricted, ALU operations. 1025// Here the two ALU ops can actually be done in parallel and therefore the 1026// latencies are not added together. Otherwise this is like having two 1027// instructions running together on two pipelines and 6 dispatches. ALU ops are 1028// 2 cycles each. 1029def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1030 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1031 (instrs 1032 MTCRF, 1033 MTCRF8 1034)>; 1035 1036// Cracked ALU operations. 1037// Here the two ALU ops can actually be done in parallel and therefore the 1038// latencies are not added together. Otherwise this is like having two 1039// instructions running together on two pipelines and 2 dispatches. ALU ops are 1040// 2 cycles each. 1041def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1042 DISP_1C, DISP_1C], 1043 (instrs 1044 (instregex "ADDC(8)?(O)?_rec$"), 1045 (instregex "SUBFC(8)?(O)?_rec$") 1046)>; 1047 1048// Cracked ALU operations. 1049// Two ALU ops can be done in parallel. 1050// One is three cycle ALU the ohter is a two cycle ALU. 1051// One of the ALU ops is restricted the other is not so we have a total of 1052// 5 dispatches. 1053def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1054 DISP_3SLOTS_1C, DISP_1C], 1055 (instrs 1056 (instregex "F(N)?ABS(D|S)_rec$"), 1057 (instregex "FCPSGN(D|S)_rec$"), 1058 (instregex "FNEG(D|S)_rec$"), 1059 FMR_rec 1060)>; 1061 1062// Cracked ALU operations. 1063// Here the two ALU ops can actually be done in parallel and therefore the 1064// latencies are not added together. Otherwise this is like having two 1065// instructions running together on two pipelines and 2 dispatches. 1066// ALU ops are 3 cycles each. 1067def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1068 DISP_1C, DISP_1C], 1069 (instrs 1070 MCRFS 1071)>; 1072 1073// Cracked Restricted ALU operations. 1074// Here the two ALU ops can actually be done in parallel and therefore the 1075// latencies are not added together. Otherwise this is like having two 1076// instructions running together on two pipelines and 6 dispatches. 1077// ALU ops are 3 cycles each. 1078def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1079 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1080 (instrs 1081 (instregex "MTFSF(b|_rec)?$"), 1082 (instregex "MTFSFI(_rec)?$") 1083)>; 1084 1085// Cracked instruction made of two ALU ops. 1086// The two ops cannot be done in parallel. 1087// One of the ALU ops is restricted and takes 3 dispatches. 1088def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1089 DISP_3SLOTS_1C, DISP_1C], 1090 (instrs 1091 (instregex "RLD(I)?C(R|L)_rec$"), 1092 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1093 (instregex "SLW(8)?_rec$"), 1094 (instregex "SRAW(I)?_rec$"), 1095 (instregex "SRW(8)?_rec$"), 1096 RLDICL_32_rec, 1097 RLDIMI_rec 1098)>; 1099 1100// Cracked instruction made of two ALU ops. 1101// The two ops cannot be done in parallel. 1102// Both of the ALU ops are restricted and take 3 dispatches. 1103def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1104 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1105 (instrs 1106 (instregex "MFFS(L|CE|_rec)?$") 1107)>; 1108 1109// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1110// total of 6 cycles. All of the ALU operations are also restricted so each 1111// takes 3 dispatches for a total of 9. 1112def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1113 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1114 (instrs 1115 (instregex "MFCR(8)?$") 1116)>; 1117 1118// Cracked instruction made of two ALU ops. 1119// The two ops cannot be done in parallel. 1120def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1121 (instrs 1122 (instregex "EXTSWSLI_32_64_rec$"), 1123 (instregex "SRAD(I)?_rec$"), 1124 EXTSWSLI_rec, 1125 SLD_rec, 1126 SRD_rec, 1127 RLDIC_rec 1128)>; 1129 1130// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1131def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1132 (instrs 1133 FDIV 1134)>; 1135 1136// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1137def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1138 DISP_3SLOTS_1C, DISP_1C], 1139 (instrs 1140 FDIV_rec 1141)>; 1142 1143// 36 Cycle DP Instruction. 1144// Instruction can be done on a single slice. 1145def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1146 (instrs 1147 XSSQRTDP 1148)>; 1149 1150// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1151def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1152 (instrs 1153 FSQRT 1154)>; 1155 1156// 36 Cycle DP Vector Instruction. 1157def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1158 DISP_1C], 1159 (instrs 1160 XVSQRTDP 1161)>; 1162 1163// 27 Cycle DP Vector Instruction. 1164def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1165 DISP_1C], 1166 (instrs 1167 XVSQRTSP 1168)>; 1169 1170// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1171def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1172 DISP_3SLOTS_1C, DISP_1C], 1173 (instrs 1174 FSQRT_rec 1175)>; 1176 1177// 26 Cycle DP Instruction. 1178def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1179 (instrs 1180 XSSQRTSP 1181)>; 1182 1183// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1184def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1185 (instrs 1186 FSQRTS 1187)>; 1188 1189// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1190def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1191 DISP_3SLOTS_1C, DISP_1C], 1192 (instrs 1193 FSQRTS_rec 1194)>; 1195 1196// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1197def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1198 (instrs 1199 XSDIVDP 1200)>; 1201 1202// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1203def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1204 (instrs 1205 FDIVS 1206)>; 1207 1208// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1209def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1210 DISP_3SLOTS_1C, DISP_1C], 1211 (instrs 1212 FDIVS_rec 1213)>; 1214 1215// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1216def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1217 (instrs 1218 XSDIVSP 1219)>; 1220 1221// 24 Cycle DP Vector Instruction. Takes one full superslice. 1222// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1223// superslice. 1224def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1225 DISP_1C], 1226 (instrs 1227 XVDIVSP 1228)>; 1229 1230// 33 Cycle DP Vector Instruction. Takes one full superslice. 1231// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1232// superslice. 1233def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1234 DISP_1C], 1235 (instrs 1236 XVDIVDP 1237)>; 1238 1239// Instruction cracked into three pieces. One Load and two ALU operations. 1240// The Load and one of the ALU ops cannot be run at the same time and so the 1241// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1242// Both the load and the ALU that depends on it are restricted and so they take 1243// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1244// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1245def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1246 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1247 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1248 (instrs 1249 (instregex "LF(SU|SUX)$") 1250)>; 1251 1252// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1253// the store and so it can be run at the same time as the store. The store is 1254// also restricted. 1255def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1256 DISP_3SLOTS_1C, DISP_1C], 1257 (instrs 1258 (instregex "STF(S|D)U(X)?$"), 1259 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1260)>; 1261 1262// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1263// the load and so it can be run at the same time as the load. 1264def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1265 DISP_PAIR_1C, DISP_PAIR_1C], 1266 (instrs 1267 (instregex "LBZU(X)?(8)?$"), 1268 (instregex "LDU(X)?$") 1269)>; 1270 1271// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1272// the load and so it can be run at the same time as the load. The load is also 1273// restricted. 3 dispatches are from the restricted load while the other two 1274// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1275// is required for the ALU. 1276def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1277 DISP_3SLOTS_1C, DISP_1C], 1278 (instrs 1279 (instregex "LF(DU|DUX)$") 1280)>; 1281 1282// Crypto Instructions 1283 1284// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1285// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1286// dispatch. 1287def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1288 (instrs 1289 (instregex "VPMSUM(B|H|W|D)$"), 1290 (instregex "V(N)?CIPHER(LAST)?$"), 1291 VSBOX 1292)>; 1293 1294// Branch Instructions 1295 1296// Two Cycle Branch 1297def : InstRW<[P9_BR_2C, DISP_BR_1C], 1298 (instrs 1299 (instregex "BCCCTR(L)?(8)?$"), 1300 (instregex "BCCL(A|R|RL)?$"), 1301 (instregex "BCCTR(L)?(8)?(n)?$"), 1302 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1303 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1304 (instregex "BL(_TLS|_NOP)?$"), 1305 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1306 (instregex "BLA(8|8_NOP)?$"), 1307 (instregex "BLR(8|L)?$"), 1308 (instregex "TAILB(A)?(8)?$"), 1309 (instregex "TAILBCTR(8)?$"), 1310 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1311 (instregex "BCLR(L)?(n)?$"), 1312 (instregex "BCTR(L)?(8)?$"), 1313 B, 1314 BA, 1315 BC, 1316 BCC, 1317 BCCA, 1318 BCL, 1319 BCLalways, 1320 BCLn, 1321 BCTRL8_LDinto_toc, 1322 BCTRL_LWZinto_toc, 1323 BCn, 1324 CTRL_DEP 1325)>; 1326 1327// Five Cycle Branch with a 2 Cycle ALU Op 1328// Operations must be done consecutively and not in parallel. 1329def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1330 (instrs 1331 ADDPCIS 1332)>; 1333 1334// Special Extracted Instructions For Atomics 1335 1336// Atomic Load 1337def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1338 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1339 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1340 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1341 (instrs 1342 (instregex "L(D|W)AT$") 1343)>; 1344 1345// Atomic Store 1346def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1347 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1348 (instrs 1349 (instregex "ST(D|W)AT$") 1350)>; 1351 1352// Signal Processing Engine (SPE) Instructions 1353// These instructions are not supported on Power 9 1354def : InstRW<[], 1355 (instrs 1356 BRINC, 1357 EVABS, 1358 EVEQV, 1359 EVMRA, 1360 EVNAND, 1361 EVNEG, 1362 (instregex "EVADD(I)?W$"), 1363 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1364 (instregex "EVAND(C)?$"), 1365 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1366 (instregex "EVCNTL(S|Z)W$"), 1367 (instregex "EVDIVW(S|U)$"), 1368 (instregex "EVEXTS(B|H)$"), 1369 (instregex "EVLD(H|W|D)(X)?$"), 1370 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1371 (instregex "EVLWHE(X)?$"), 1372 (instregex "EVLWHO(S|U)(X)?$"), 1373 (instregex "EVLW(H|W)SPLAT(X)?$"), 1374 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1375 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1376 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1377 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1378 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1379 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1380 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1381 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1382 (instregex "EVMWHUMI(A)?$"), 1383 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1384 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1385 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1386 (instregex "EVMWSSF(A|AA|AN)?$"), 1387 (instregex "EVMWUMI(A|AA|AN)?$"), 1388 (instregex "EV(N|X)?OR(C)?$"), 1389 (instregex "EVR(LW|LWI|NDW)$"), 1390 (instregex "EVSLW(I)?$"), 1391 (instregex "EVSPLAT(F)?I$"), 1392 (instregex "EVSRW(I)?(S|U)$"), 1393 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1394 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1395 (instregex "EVSUB(I)?FW$") 1396)> { let Unsupported = 1; } 1397 1398// General Instructions without scheduling support. 1399def : InstRW<[], 1400 (instrs 1401 (instregex "(H)?RFI(D)?$"), 1402 (instregex "DSS(ALL)?$"), 1403 (instregex "DST(ST)?(T)?(64)?$"), 1404 (instregex "ICBL(C|Q)$"), 1405 (instregex "L(W|H|B)EPX$"), 1406 (instregex "ST(W|H|B)EPX$"), 1407 (instregex "(L|ST)FDEPX$"), 1408 (instregex "M(T|F)SR(IN)?$"), 1409 (instregex "M(T|F)DCR$"), 1410 (instregex "NOP_GT_PWR(6|7)$"), 1411 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1412 (instregex "WRTEE(I)?$"), 1413 ATTN, 1414 CLRBHRB, 1415 MFBHRBE, 1416 MBAR, 1417 MSYNC, 1418 SLBSYNC, 1419 SLBFEE_rec, 1420 NAP, 1421 STOP, 1422 TRAP, 1423 RFCI, 1424 RFDI, 1425 RFMCI, 1426 SC, 1427 DCBA, 1428 DCBI, 1429 DCCCI, 1430 ICCCI 1431)> { let Unsupported = 1; } 1432