1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMPU(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "ADD(I|IS)?(8)?$"), 142 (instregex "LI(S)?(8)?$"), 143 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 144 (instregex "NAND(8)?(_rec)?$"), 145 (instregex "AND(C)?(8)?(_rec)?$"), 146 (instregex "NOR(8)?(_rec)?$"), 147 (instregex "OR(C)?(8)?(_rec)?$"), 148 (instregex "EQV(8)?(_rec)?$"), 149 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 150 (instregex "ADD(4|8)(TLS)?(_)?$"), 151 (instregex "NEG(8)?(O)?$"), 152 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 153 COPY, 154 MCRF, 155 MCRXRX, 156 XSNABSDP, 157 XSXEXPDP, 158 XSABSDP, 159 XSNEGDP, 160 XSCPSGNDP, 161 MFVSRWZ, 162 MFVRWZ, 163 EXTSWSLI, 164 SRADI_32, 165 RLDIC, 166 RFEBB, 167 LA, 168 TBEGIN, 169 TRECHKPT, 170 NOP, 171 WAIT 172)>; 173 174// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 175// single slice. However, since it is Restricted, it requires all 3 dispatches 176// (DISP) for that superslice. 177def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 178 (instrs 179 (instregex "RLDC(L|R)$"), 180 (instregex "RLWIMI(8)?$"), 181 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 182 (instregex "M(F|T)OCRF(8)?$"), 183 (instregex "CR(6)?(UN)?SET$"), 184 (instregex "CR(N)?(OR|AND)(C)?$"), 185 (instregex "S(L|R)W(8)?$"), 186 (instregex "RLW(INM|NM)(8)?$"), 187 (instregex "F(N)?ABS(D|S)$"), 188 (instregex "FNEG(D|S)$"), 189 (instregex "FCPSGN(D|S)$"), 190 (instregex "SRAW(I)?$"), 191 (instregex "ISEL(8)?$"), 192 RLDIMI, 193 XSIEXPDP, 194 FMR, 195 CREQV, 196 CRXOR, 197 TRECLAIM, 198 TSR, 199 TABORT 200)>; 201 202// Three cycle ALU vector operation that uses an entire superslice. 203// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 204// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 205def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 206 (instrs 207 (instregex "M(T|F)VSCR$"), 208 (instregex "VCMPNEZ(B|H|W)$"), 209 (instregex "VCMPEQU(B|H|W|D)$"), 210 (instregex "VCMPNE(B|H|W)$"), 211 (instregex "VABSDU(B|H|W)$"), 212 (instregex "VADDU(B|H|W)S$"), 213 (instregex "VAVG(S|U)(B|H|W)$"), 214 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 215 (instregex "VCMPBFP(_rec)?$"), 216 (instregex "VC(L|T)Z(B|H|W|D)$"), 217 (instregex "VADDS(B|H|W)S$"), 218 (instregex "V(MIN|MAX)FP$"), 219 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 220 VBPERMD, 221 VADDCUW, 222 VPOPCNTW, 223 VPOPCNTD, 224 VPRTYBD, 225 VPRTYBW, 226 VSHASIGMAD, 227 VSHASIGMAW, 228 VSUBSBS, 229 VSUBSHS, 230 VSUBSWS, 231 VSUBUBS, 232 VSUBUHS, 233 VSUBUWS, 234 VSUBCUW, 235 VCMPGTSB, 236 VCMPGTSB_rec, 237 VCMPGTSD, 238 VCMPGTSD_rec, 239 VCMPGTSH, 240 VCMPGTSH_rec, 241 VCMPGTSW, 242 VCMPGTSW_rec, 243 VCMPGTUB, 244 VCMPGTUB_rec, 245 VCMPGTUD, 246 VCMPGTUD_rec, 247 VCMPGTUH, 248 VCMPGTUH_rec, 249 VCMPGTUW, 250 VCMPGTUW_rec, 251 VCMPNEB_rec, 252 VCMPNEH_rec, 253 VCMPNEW_rec, 254 VCMPNEZB_rec, 255 VCMPNEZH_rec, 256 VCMPNEZW_rec, 257 VCMPEQUB_rec, 258 VCMPEQUD_rec, 259 VCMPEQUH_rec, 260 VCMPEQUW_rec, 261 XVCMPEQDP, 262 XVCMPEQDP_rec, 263 XVCMPEQSP, 264 XVCMPEQSP_rec, 265 XVCMPGEDP, 266 XVCMPGEDP_rec, 267 XVCMPGESP, 268 XVCMPGESP_rec, 269 XVCMPGTDP, 270 XVCMPGTDP_rec, 271 XVCMPGTSP, 272 XVCMPGTSP_rec, 273 XVMAXDP, 274 XVMAXSP, 275 XVMINDP, 276 XVMINSP, 277 XVTDIVDP, 278 XVTDIVSP, 279 XVTSQRTDP, 280 XVTSQRTSP, 281 XVTSTDCDP, 282 XVTSTDCSP, 283 XVXSIGDP, 284 XVXSIGSP 285)>; 286 287// 7 cycle DP vector operation that uses an entire superslice. 288// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 289// EXECO) and all three dispatches (DISP) to the given superslice. 290def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 291 (instrs 292 VADDFP, 293 VCTSXS, 294 VCTSXS_0, 295 VCTUXS, 296 VCTUXS_0, 297 VEXPTEFP, 298 VLOGEFP, 299 VMADDFP, 300 VMHADDSHS, 301 VNMSUBFP, 302 VREFP, 303 VRFIM, 304 VRFIN, 305 VRFIP, 306 VRFIZ, 307 VRSQRTEFP, 308 VSUBFP, 309 XVADDDP, 310 XVADDSP, 311 XVCVDPSP, 312 XVCVDPSXDS, 313 XVCVDPSXWS, 314 XVCVDPUXDS, 315 XVCVDPUXWS, 316 XVCVHPSP, 317 XVCVSPDP, 318 XVCVSPHP, 319 XVCVSPSXDS, 320 XVCVSPSXWS, 321 XVCVSPUXDS, 322 XVCVSPUXWS, 323 XVCVSXDDP, 324 XVCVSXDSP, 325 XVCVSXWDP, 326 XVCVSXWSP, 327 XVCVUXDDP, 328 XVCVUXDSP, 329 XVCVUXWDP, 330 XVCVUXWSP, 331 XVMADDADP, 332 XVMADDASP, 333 XVMADDMDP, 334 XVMADDMSP, 335 XVMSUBADP, 336 XVMSUBASP, 337 XVMSUBMDP, 338 XVMSUBMSP, 339 XVMULDP, 340 XVMULSP, 341 XVNMADDADP, 342 XVNMADDASP, 343 XVNMADDMDP, 344 XVNMADDMSP, 345 XVNMSUBADP, 346 XVNMSUBASP, 347 XVNMSUBMDP, 348 XVNMSUBMSP, 349 XVRDPI, 350 XVRDPIC, 351 XVRDPIM, 352 XVRDPIP, 353 XVRDPIZ, 354 XVREDP, 355 XVRESP, 356 XVRSPI, 357 XVRSPIC, 358 XVRSPIM, 359 XVRSPIP, 360 XVRSPIZ, 361 XVRSQRTEDP, 362 XVRSQRTESP, 363 XVSUBDP, 364 XVSUBSP, 365 VCFSX, 366 VCFSX_0, 367 VCFUX, 368 VCFUX_0, 369 VMHRADDSHS, 370 VMLADDUHM, 371 VMSUMMBM, 372 VMSUMSHM, 373 VMSUMSHS, 374 VMSUMUBM, 375 VMSUMUHM, 376 VMSUMUHS, 377 VMULESB, 378 VMULESH, 379 VMULESW, 380 VMULEUB, 381 VMULEUH, 382 VMULEUW, 383 VMULOSB, 384 VMULOSH, 385 VMULOSW, 386 VMULOUB, 387 VMULOUH, 388 VMULOUW, 389 VMULUWM, 390 VSUM2SWS, 391 VSUM4SBS, 392 VSUM4SHS, 393 VSUM4UBS, 394 VSUMSWS 395)>; 396 397// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 398// dispatch units for the superslice. 399def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 400 (instrs 401 (instregex "MADD(HD|HDU|LD|LD8)$"), 402 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 403)>; 404 405// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 406// dispatch units for the superslice. 407def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 408 (instrs 409 FRSP, 410 (instregex "FRI(N|P|Z|M)(D|S)$"), 411 (instregex "FRE(S)?$"), 412 (instregex "FADD(S)?$"), 413 (instregex "FMSUB(S)?$"), 414 (instregex "FMADD(S)?$"), 415 (instregex "FSUB(S)?$"), 416 (instregex "FCFID(U)?(S)?$"), 417 (instregex "FCTID(U)?(Z)?$"), 418 (instregex "FCTIW(U)?(Z)?$"), 419 (instregex "FRSQRTE(S)?$"), 420 FNMADDS, 421 FNMADD, 422 FNMSUBS, 423 FNMSUB, 424 FSELD, 425 FSELS, 426 FMULS, 427 FMUL, 428 XSMADDADP, 429 XSMADDASP, 430 XSMADDMDP, 431 XSMADDMSP, 432 XSMSUBADP, 433 XSMSUBASP, 434 XSMSUBMDP, 435 XSMSUBMSP, 436 XSMULDP, 437 XSMULSP, 438 XSNMADDADP, 439 XSNMADDASP, 440 XSNMADDMDP, 441 XSNMADDMSP, 442 XSNMSUBADP, 443 XSNMSUBASP, 444 XSNMSUBMDP, 445 XSNMSUBMSP 446)>; 447 448// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 449// These operations can be done in parallel. The DP is restricted so we need a 450// full 4 dispatches. 451def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 452 DISP_3SLOTS_1C, DISP_1C], 453 (instrs 454 (instregex "FSEL(D|S)_rec$") 455)>; 456 457// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 458def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 459 DISP_3SLOTS_1C, DISP_1C], 460 (instrs 461 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 462)>; 463 464// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 465// These operations must be done sequentially.The DP is restricted so we need a 466// full 4 dispatches. 467def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 468 DISP_3SLOTS_1C, DISP_1C], 469 (instrs 470 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 471 (instregex "FRE(S)?_rec$"), 472 (instregex "FADD(S)?_rec$"), 473 (instregex "FSUB(S)?_rec$"), 474 (instregex "F(N)?MSUB(S)?_rec$"), 475 (instregex "F(N)?MADD(S)?_rec$"), 476 (instregex "FCFID(U)?(S)?_rec$"), 477 (instregex "FCTID(U)?(Z)?_rec$"), 478 (instregex "FCTIW(U)?(Z)?_rec$"), 479 (instregex "FMUL(S)?_rec$"), 480 (instregex "FRSQRTE(S)?_rec$"), 481 FRSP_rec 482)>; 483 484// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 485def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 486 (instrs 487 XSADDDP, 488 XSADDSP, 489 XSCVDPHP, 490 XSCVDPSP, 491 XSCVDPSXDS, 492 XSCVDPSXDSs, 493 XSCVDPSXWS, 494 XSCVDPUXDS, 495 XSCVDPUXDSs, 496 XSCVDPUXWS, 497 XSCVDPSXWSs, 498 XSCVDPUXWSs, 499 XSCVHPDP, 500 XSCVSPDP, 501 XSCVSXDDP, 502 XSCVSXDSP, 503 XSCVUXDDP, 504 XSCVUXDSP, 505 XSRDPI, 506 XSRDPIC, 507 XSRDPIM, 508 XSRDPIP, 509 XSRDPIZ, 510 XSREDP, 511 XSRESP, 512 XSRSQRTEDP, 513 XSRSQRTESP, 514 XSSUBDP, 515 XSSUBSP, 516 XSCVDPSPN, 517 XSRSP 518)>; 519 520// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 521// superslice. That includes both exec pipelines (EXECO, EXECE) and one 522// dispatch. 523def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 524 (instrs 525 (instregex "LVS(L|R)$"), 526 (instregex "VSPLTIS(W|H|B)$"), 527 (instregex "VSPLT(W|H|B)(s)?$"), 528 (instregex "V_SETALLONES(B|H)?$"), 529 (instregex "VEXTRACTU(B|H|W)$"), 530 (instregex "VINSERT(B|H|W|D)$"), 531 MFVSRLD, 532 MTVSRWS, 533 VBPERMQ, 534 VCLZLSBB, 535 VCTZLSBB, 536 VEXTRACTD, 537 VEXTUBLX, 538 VEXTUBRX, 539 VEXTUHLX, 540 VEXTUHRX, 541 VEXTUWLX, 542 VEXTUWRX, 543 VGBBD, 544 VMRGHB, 545 VMRGHH, 546 VMRGHW, 547 VMRGLB, 548 VMRGLH, 549 VMRGLW, 550 VPERM, 551 VPERMR, 552 VPERMXOR, 553 VPKPX, 554 VPKSDSS, 555 VPKSDUS, 556 VPKSHSS, 557 VPKSHUS, 558 VPKSWSS, 559 VPKSWUS, 560 VPKUDUM, 561 VPKUDUS, 562 VPKUHUM, 563 VPKUHUS, 564 VPKUWUM, 565 VPKUWUS, 566 VPRTYBQ, 567 VSL, 568 VSLDOI, 569 VSLO, 570 VSLV, 571 VSR, 572 VSRO, 573 VSRV, 574 VUPKHPX, 575 VUPKHSB, 576 VUPKHSH, 577 VUPKHSW, 578 VUPKLPX, 579 VUPKLSB, 580 VUPKLSH, 581 VUPKLSW, 582 XXBRD, 583 XXBRH, 584 XXBRQ, 585 XXBRW, 586 XXEXTRACTUW, 587 XXINSERTW, 588 XXMRGHW, 589 XXMRGLW, 590 XXPERM, 591 XXPERMR, 592 XXSLDWI, 593 XXSLDWIs, 594 XXSPLTIB, 595 XXSPLTW, 596 XXSPLTWs, 597 XXPERMDI, 598 XXPERMDIs, 599 VADDCUQ, 600 VADDECUQ, 601 VADDEUQM, 602 VADDUQM, 603 VMUL10CUQ, 604 VMUL10ECUQ, 605 VMUL10EUQ, 606 VMUL10UQ, 607 VSUBCUQ, 608 VSUBECUQ, 609 VSUBEUQM, 610 VSUBUQM, 611 XSCMPEXPQP, 612 XSCMPOQP, 613 XSCMPUQP, 614 XSTSTDCQP, 615 XSXSIGQP, 616 BCDCFN_rec, 617 BCDCFZ_rec, 618 BCDCPSGN_rec, 619 BCDCTN_rec, 620 BCDCTZ_rec, 621 BCDSETSGN_rec, 622 BCDS_rec, 623 BCDTRUNC_rec, 624 BCDUS_rec, 625 BCDUTRUNC_rec 626)>; 627 628// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 629// superslice. That includes both exec pipelines (EXECO, EXECE) and one 630// dispatch. 631def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 632 (instrs 633 BCDSR_rec, 634 XSADDQP, 635 XSADDQPO, 636 XSCVDPQP, 637 XSCVQPDP, 638 XSCVQPDPO, 639 XSCVQPSDZ, 640 XSCVQPSWZ, 641 XSCVQPUDZ, 642 XSCVQPUWZ, 643 XSCVSDQP, 644 XSCVUDQP, 645 XSRQPI, 646 XSRQPIX, 647 XSRQPXP, 648 XSSUBQP, 649 XSSUBQPO 650)>; 651 652// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 653// superslice. That includes both exec pipelines (EXECO, EXECE) and one 654// dispatch. 655def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 656 (instrs 657 BCDCTSQ_rec 658)>; 659 660// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 661// superslice. That includes both exec pipelines (EXECO, EXECE) and one 662// dispatch. 663def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 664 (instrs 665 XSMADDQP, 666 XSMADDQPO, 667 XSMSUBQP, 668 XSMSUBQPO, 669 XSMULQP, 670 XSMULQPO, 671 XSNMADDQP, 672 XSNMADDQPO, 673 XSNMSUBQP, 674 XSNMSUBQPO 675)>; 676 677// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 678// superslice. That includes both exec pipelines (EXECO, EXECE) and one 679// dispatch. 680def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 681 (instrs 682 BCDCFSQ_rec 683)>; 684 685// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 686// superslice. That includes both exec pipelines (EXECO, EXECE) and one 687// dispatch. 688def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 689 (instrs 690 XSDIVQP, 691 XSDIVQPO 692)>; 693 694// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 695// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 696// dispatches. 697def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 698 (instrs 699 XSSQRTQP, 700 XSSQRTQPO 701)>; 702 703// 6 Cycle Load uses a single slice. 704def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 705 (instrs 706 (instregex "LXVL(L)?") 707)>; 708 709// 5 Cycle Load uses a single slice. 710def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 711 (instrs 712 (instregex "LVE(B|H|W)X$"), 713 (instregex "LVX(L)?"), 714 (instregex "LXSI(B|H)ZX$"), 715 LXSDX, 716 LXVB16X, 717 LXVD2X, 718 LXVWSX, 719 LXSIWZX, 720 LXV, 721 LXVX, 722 LXSD, 723 DFLOADf64, 724 XFLOADf64, 725 LIWZX 726)>; 727 728// 4 Cycle Load uses a single slice. 729def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 730 (instrs 731 (instregex "DCB(F|T|ST)(EP)?$"), 732 (instregex "DCBZ(L)?(EP)?$"), 733 (instregex "DCBTST(EP)?$"), 734 (instregex "CP_COPY(8)?$"), 735 (instregex "CP_PASTE(8)?$"), 736 (instregex "ICBI(EP)?$"), 737 (instregex "ICBT(LS)?$"), 738 (instregex "LBARX(L)?$"), 739 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 740 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 741 (instregex "LH(A|B)RX(L)?(8)?$"), 742 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 743 (instregex "LWARX(L)?$"), 744 (instregex "LWBRX(8)?$"), 745 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 746 CP_ABORT, 747 DARN, 748 EnforceIEIO, 749 ISYNC, 750 MSGSYNC, 751 TLBSYNC, 752 SYNC, 753 LMW, 754 LSWI 755)>; 756 757// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 758// superslice. 759def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 760 (instrs 761 LFIWZX, 762 LFDX, 763 LFD 764)>; 765 766// Cracked Load Instructions. 767// Load instructions that can be done in parallel. 768def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 769 DISP_PAIR_1C], 770 (instrs 771 SLBIA, 772 SLBIE, 773 SLBMFEE, 774 SLBMFEV, 775 SLBMTE, 776 TLBIEL 777)>; 778 779// Cracked Load Instruction. 780// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 781// operations can be run in parallel. 782def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 783 DISP_PAIR_1C, DISP_PAIR_1C], 784 (instrs 785 (instregex "L(W|H)ZU(X)?(8)?$") 786)>; 787 788// Cracked TEND Instruction. 789// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 790// operations can be run in parallel. 791def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 792 DISP_1C, DISP_1C], 793 (instrs 794 TEND 795)>; 796 797 798// Cracked Store Instruction 799// Consecutive Store and ALU instructions. The store is restricted and requires 800// three dispatches. 801def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 802 DISP_3SLOTS_1C, DISP_1C], 803 (instrs 804 (instregex "ST(B|H|W|D)CX$") 805)>; 806 807// Cracked Load Instruction. 808// Two consecutive load operations for a total of 8 cycles. 809def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 810 DISP_1C, DISP_1C], 811 (instrs 812 LDMX 813)>; 814 815// Cracked Load instruction. 816// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 817// operations cannot be done at the same time and so their latencies are added. 818def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 819 DISP_1C, DISP_1C], 820 (instrs 821 (instregex "LHA(X)?(8)?$"), 822 (instregex "CP_PASTE(8)?_rec$"), 823 (instregex "LWA(X)?(_32)?$"), 824 TCHECK 825)>; 826 827// Cracked Restricted Load instruction. 828// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 829// operations cannot be done at the same time and so their latencies are added. 830// Full 6 dispatches are required as this is both cracked and restricted. 831def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 832 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 833 (instrs 834 LFIWAX 835)>; 836 837// Cracked Load instruction. 838// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 839// operations cannot be done at the same time and so their latencies are added. 840// Full 4 dispatches are required as this is a cracked instruction. 841def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 842 (instrs 843 LXSIWAX, 844 LIWAX 845)>; 846 847// Cracked Load instruction. 848// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 849// cycles. The Load and ALU operations cannot be done at the same time and so 850// their latencies are added. 851// Full 6 dispatches are required as this is a restricted instruction. 852def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 853 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 854 (instrs 855 LFSX, 856 LFS 857)>; 858 859// Cracked Load instruction. 860// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 861// operations cannot be done at the same time and so their latencies are added. 862// Full 4 dispatches are required as this is a cracked instruction. 863def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 864 (instrs 865 LXSSP, 866 LXSSPX, 867 XFLOADf32, 868 DFLOADf32 869)>; 870 871// Cracked 3-Way Load Instruction 872// Load with two ALU operations that depend on each other 873def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 874 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 875 (instrs 876 (instregex "LHAU(X)?(8)?$"), 877 LWAUX 878)>; 879 880// Cracked Load that requires the PM resource. 881// Since the Load and the PM cannot be done at the same time the latencies are 882// added. Requires 8 cycles. Since the PM requires the full superslice we need 883// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 884// requires the remaining 1 dispatch. 885def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 886 DISP_1C, DISP_1C], 887 (instrs 888 LXVH8X, 889 LXVDSX, 890 LXVW4X 891)>; 892 893// Single slice Restricted store operation. The restricted operation requires 894// all three dispatches for the superslice. 895def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 896 (instrs 897 (instregex "STF(S|D|IWX|SX|DX)$"), 898 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 899 (instregex "STW(8)?$"), 900 (instregex "(D|X)FSTORE(f32|f64)$"), 901 (instregex "ST(W|H|D)BRX$"), 902 (instregex "ST(B|H|D)(8)?$"), 903 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 904 STIWX, 905 SLBIEG, 906 STMW, 907 STSWI, 908 TLBIE 909)>; 910 911// Vector Store Instruction 912// Requires the whole superslice and therefore requires one dispatch 913// as well as both the Even and Odd exec pipelines. 914def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 915 (instrs 916 (instregex "STVE(B|H|W)X$"), 917 (instregex "STVX(L)?$"), 918 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 919)>; 920 921// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 922// superslice. That includes both exec pipelines (EXECO, EXECE) and two 923// dispatches. 924def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 925 (instrs 926 (instregex "MTCTR(8)?(loop)?$"), 927 (instregex "MTLR(8)?$") 928)>; 929 930// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 931// superslice. That includes both exec pipelines (EXECO, EXECE) and two 932// dispatches. 933def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 934 (instrs 935 (instregex "M(T|F)VRSAVE(v)?$"), 936 (instregex "M(T|F)PMR$"), 937 (instregex "M(T|F)TB(8)?$"), 938 (instregex "MF(SPR|CTR|LR)(8)?$"), 939 (instregex "M(T|F)MSR(D)?$"), 940 (instregex "MTSPR(8)?$") 941)>; 942 943// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 944// superslice. That includes both exec pipelines (EXECO, EXECE) and two 945// dispatches. 946def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 947 (instrs 948 DIVW, 949 DIVWO, 950 DIVWU, 951 DIVWUO, 952 MODSW 953)>; 954 955// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 956// superslice. That includes both exec pipelines (EXECO, EXECE) and two 957// dispatches. 958def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 959 (instrs 960 DIVWE, 961 DIVWEO, 962 DIVD, 963 DIVDO, 964 DIVWEU, 965 DIVWEUO, 966 DIVDU, 967 DIVDUO, 968 MODSD, 969 MODUD, 970 MODUW 971)>; 972 973// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 974// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 975// dispatches. 976def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 977 (instrs 978 DIVDE, 979 DIVDEO, 980 DIVDEU, 981 DIVDEUO 982)>; 983 984// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 985// and one full superslice for the DIV operation since there is only one DIV per 986// superslice. Latency of DIV plus ALU is 26. 987def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 988 DISP_EVEN_1C, DISP_1C], 989 (instrs 990 (instregex "DIVW(U)?(O)?_rec$") 991)>; 992 993// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 994// and one full superslice for the DIV operation since there is only one DIV per 995// superslice. Latency of DIV plus ALU is 26. 996def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 997 DISP_EVEN_1C, DISP_1C], 998 (instrs 999 DIVD_rec, 1000 DIVDO_rec, 1001 DIVDU_rec, 1002 DIVDUO_rec, 1003 DIVWE_rec, 1004 DIVWEO_rec, 1005 DIVWEU_rec, 1006 DIVWEUO_rec 1007)>; 1008 1009// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1010// and one full superslice for the DIV operation since there is only one DIV per 1011// superslice. Latency of DIV plus ALU is 42. 1012def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1013 DISP_EVEN_1C, DISP_1C], 1014 (instrs 1015 DIVDE_rec, 1016 DIVDEO_rec, 1017 DIVDEU_rec, 1018 DIVDEUO_rec 1019)>; 1020 1021// CR access instructions in _BrMCR, IIC_BrMCRX. 1022 1023// Cracked, restricted, ALU operations. 1024// Here the two ALU ops can actually be done in parallel and therefore the 1025// latencies are not added together. Otherwise this is like having two 1026// instructions running together on two pipelines and 6 dispatches. ALU ops are 1027// 2 cycles each. 1028def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1029 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1030 (instrs 1031 MTCRF, 1032 MTCRF8 1033)>; 1034 1035// Cracked ALU operations. 1036// Here the two ALU ops can actually be done in parallel and therefore the 1037// latencies are not added together. Otherwise this is like having two 1038// instructions running together on two pipelines and 2 dispatches. ALU ops are 1039// 2 cycles each. 1040def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1041 DISP_1C, DISP_1C], 1042 (instrs 1043 (instregex "ADDC(8)?(O)?_rec$"), 1044 (instregex "SUBFC(8)?(O)?_rec$") 1045)>; 1046 1047// Cracked ALU operations. 1048// Two ALU ops can be done in parallel. 1049// One is three cycle ALU the ohter is a two cycle ALU. 1050// One of the ALU ops is restricted the other is not so we have a total of 1051// 5 dispatches. 1052def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1053 DISP_3SLOTS_1C, DISP_1C], 1054 (instrs 1055 (instregex "F(N)?ABS(D|S)_rec$"), 1056 (instregex "FCPSGN(D|S)_rec$"), 1057 (instregex "FNEG(D|S)_rec$"), 1058 FMR_rec 1059)>; 1060 1061// Cracked ALU operations. 1062// Here the two ALU ops can actually be done in parallel and therefore the 1063// latencies are not added together. Otherwise this is like having two 1064// instructions running together on two pipelines and 2 dispatches. 1065// ALU ops are 3 cycles each. 1066def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1067 DISP_1C, DISP_1C], 1068 (instrs 1069 MCRFS 1070)>; 1071 1072// Cracked Restricted ALU operations. 1073// Here the two ALU ops can actually be done in parallel and therefore the 1074// latencies are not added together. Otherwise this is like having two 1075// instructions running together on two pipelines and 6 dispatches. 1076// ALU ops are 3 cycles each. 1077def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1078 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1079 (instrs 1080 (instregex "MTFSF(b|_rec)?$"), 1081 (instregex "MTFSFI(_rec)?$") 1082)>; 1083 1084// Cracked instruction made of two ALU ops. 1085// The two ops cannot be done in parallel. 1086// One of the ALU ops is restricted and takes 3 dispatches. 1087def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1088 DISP_3SLOTS_1C, DISP_1C], 1089 (instrs 1090 (instregex "RLD(I)?C(R|L)_rec$"), 1091 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1092 (instregex "SLW(8)?_rec$"), 1093 (instregex "SRAW(I)?_rec$"), 1094 (instregex "SRW(8)?_rec$"), 1095 RLDICL_32_rec, 1096 RLDIMI_rec 1097)>; 1098 1099// Cracked instruction made of two ALU ops. 1100// The two ops cannot be done in parallel. 1101// Both of the ALU ops are restricted and take 3 dispatches. 1102def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1103 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1104 (instrs 1105 (instregex "MFFS(L|CE|_rec)?$") 1106)>; 1107 1108// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1109// total of 6 cycles. All of the ALU operations are also restricted so each 1110// takes 3 dispatches for a total of 9. 1111def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1112 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1113 (instrs 1114 (instregex "MFCR(8)?$") 1115)>; 1116 1117// Cracked instruction made of two ALU ops. 1118// The two ops cannot be done in parallel. 1119def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1120 (instrs 1121 (instregex "EXTSWSLI_32_64_rec$"), 1122 (instregex "SRAD(I)?_rec$"), 1123 EXTSWSLI_rec, 1124 SLD_rec, 1125 SRD_rec, 1126 RLDIC_rec 1127)>; 1128 1129// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1130def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1131 (instrs 1132 FDIV 1133)>; 1134 1135// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1136def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1137 DISP_3SLOTS_1C, DISP_1C], 1138 (instrs 1139 FDIV_rec 1140)>; 1141 1142// 36 Cycle DP Instruction. 1143// Instruction can be done on a single slice. 1144def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1145 (instrs 1146 XSSQRTDP 1147)>; 1148 1149// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1150def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1151 (instrs 1152 FSQRT 1153)>; 1154 1155// 36 Cycle DP Vector Instruction. 1156def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1157 DISP_1C], 1158 (instrs 1159 XVSQRTDP 1160)>; 1161 1162// 27 Cycle DP Vector Instruction. 1163def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1164 DISP_1C], 1165 (instrs 1166 XVSQRTSP 1167)>; 1168 1169// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1170def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1171 DISP_3SLOTS_1C, DISP_1C], 1172 (instrs 1173 FSQRT_rec 1174)>; 1175 1176// 26 Cycle DP Instruction. 1177def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1178 (instrs 1179 XSSQRTSP 1180)>; 1181 1182// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1183def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1184 (instrs 1185 FSQRTS 1186)>; 1187 1188// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1189def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1190 DISP_3SLOTS_1C, DISP_1C], 1191 (instrs 1192 FSQRTS_rec 1193)>; 1194 1195// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1196def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1197 (instrs 1198 XSDIVDP 1199)>; 1200 1201// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1202def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1203 (instrs 1204 FDIVS 1205)>; 1206 1207// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1208def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1209 DISP_3SLOTS_1C, DISP_1C], 1210 (instrs 1211 FDIVS_rec 1212)>; 1213 1214// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1215def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1216 (instrs 1217 XSDIVSP 1218)>; 1219 1220// 24 Cycle DP Vector Instruction. Takes one full superslice. 1221// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1222// superslice. 1223def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1224 DISP_1C], 1225 (instrs 1226 XVDIVSP 1227)>; 1228 1229// 33 Cycle DP Vector Instruction. Takes one full superslice. 1230// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1231// superslice. 1232def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1233 DISP_1C], 1234 (instrs 1235 XVDIVDP 1236)>; 1237 1238// Instruction cracked into three pieces. One Load and two ALU operations. 1239// The Load and one of the ALU ops cannot be run at the same time and so the 1240// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1241// Both the load and the ALU that depends on it are restricted and so they take 1242// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1243// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1244def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1245 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1246 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1247 (instrs 1248 (instregex "LF(SU|SUX)$") 1249)>; 1250 1251// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1252// the store and so it can be run at the same time as the store. The store is 1253// also restricted. 1254def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1255 DISP_3SLOTS_1C, DISP_1C], 1256 (instrs 1257 (instregex "STF(S|D)U(X)?$"), 1258 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1259)>; 1260 1261// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1262// the load and so it can be run at the same time as the load. 1263def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1264 DISP_PAIR_1C, DISP_PAIR_1C], 1265 (instrs 1266 (instregex "LBZU(X)?(8)?$"), 1267 (instregex "LDU(X)?$") 1268)>; 1269 1270// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1271// the load and so it can be run at the same time as the load. The load is also 1272// restricted. 3 dispatches are from the restricted load while the other two 1273// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1274// is required for the ALU. 1275def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1276 DISP_3SLOTS_1C, DISP_1C], 1277 (instrs 1278 (instregex "LF(DU|DUX)$") 1279)>; 1280 1281// Crypto Instructions 1282 1283// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1284// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1285// dispatch. 1286def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1287 (instrs 1288 (instregex "VPMSUM(B|H|W|D)$"), 1289 (instregex "V(N)?CIPHER(LAST)?$"), 1290 VSBOX 1291)>; 1292 1293// Branch Instructions 1294 1295// Two Cycle Branch 1296def : InstRW<[P9_BR_2C, DISP_BR_1C], 1297 (instrs 1298 (instregex "BCCCTR(L)?(8)?$"), 1299 (instregex "BCCL(A|R|RL)?$"), 1300 (instregex "BCCTR(L)?(8)?(n)?$"), 1301 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1302 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1303 (instregex "BL(_TLS|_NOP)?$"), 1304 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1305 (instregex "BLA(8|8_NOP)?$"), 1306 (instregex "BLR(8|L)?$"), 1307 (instregex "TAILB(A)?(8)?$"), 1308 (instregex "TAILBCTR(8)?$"), 1309 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1310 (instregex "BCLR(L)?(n)?$"), 1311 (instregex "BCTR(L)?(8)?$"), 1312 B, 1313 BA, 1314 BC, 1315 BCC, 1316 BCCA, 1317 BCL, 1318 BCLalways, 1319 BCLn, 1320 BCTRL8_LDinto_toc, 1321 BCTRL_LWZinto_toc, 1322 BCn, 1323 CTRL_DEP 1324)>; 1325 1326// Five Cycle Branch with a 2 Cycle ALU Op 1327// Operations must be done consecutively and not in parallel. 1328def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1329 (instrs 1330 ADDPCIS 1331)>; 1332 1333// Special Extracted Instructions For Atomics 1334 1335// Atomic Load 1336def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1337 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1338 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1339 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1340 (instrs 1341 (instregex "L(D|W)AT$") 1342)>; 1343 1344// Atomic Store 1345def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1346 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1347 (instrs 1348 (instregex "ST(D|W)AT$") 1349)>; 1350 1351// Signal Processing Engine (SPE) Instructions 1352// These instructions are not supported on Power 9 1353def : InstRW<[], 1354 (instrs 1355 BRINC, 1356 EVABS, 1357 EVEQV, 1358 EVMRA, 1359 EVNAND, 1360 EVNEG, 1361 (instregex "EVADD(I)?W$"), 1362 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1363 (instregex "EVAND(C)?$"), 1364 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1365 (instregex "EVCNTL(S|Z)W$"), 1366 (instregex "EVDIVW(S|U)$"), 1367 (instregex "EVEXTS(B|H)$"), 1368 (instregex "EVLD(H|W|D)(X)?$"), 1369 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1370 (instregex "EVLWHE(X)?$"), 1371 (instregex "EVLWHO(S|U)(X)?$"), 1372 (instregex "EVLW(H|W)SPLAT(X)?$"), 1373 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1374 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1375 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1376 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1377 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1378 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1379 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1380 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1381 (instregex "EVMWHUMI(A)?$"), 1382 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1383 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1384 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1385 (instregex "EVMWSSF(A|AA|AN)?$"), 1386 (instregex "EVMWUMI(A|AA|AN)?$"), 1387 (instregex "EV(N|X)?OR(C)?$"), 1388 (instregex "EVR(LW|LWI|NDW)$"), 1389 (instregex "EVSLW(I)?$"), 1390 (instregex "EVSPLAT(F)?I$"), 1391 (instregex "EVSRW(I)?(S|U)$"), 1392 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1393 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1394 (instregex "EVSUB(I)?FW$") 1395)> { let Unsupported = 1; } 1396 1397// General Instructions without scheduling support. 1398def : InstRW<[], 1399 (instrs 1400 (instregex "(H)?RFI(D)?$"), 1401 (instregex "DSS(ALL)?$"), 1402 (instregex "DST(ST)?(T)?(64)?$"), 1403 (instregex "ICBL(C|Q)$"), 1404 (instregex "L(W|H|B)EPX$"), 1405 (instregex "ST(W|H|B)EPX$"), 1406 (instregex "(L|ST)FDEPX$"), 1407 (instregex "M(T|F)SR(IN)?$"), 1408 (instregex "M(T|F)DCR$"), 1409 (instregex "NOP_GT_PWR(6|7)$"), 1410 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1411 (instregex "WRTEE(I)?$"), 1412 ATTN, 1413 CLRBHRB, 1414 MFBHRBE, 1415 MBAR, 1416 MSYNC, 1417 SLBSYNC, 1418 SLBFEE_rec, 1419 NAP, 1420 STOP, 1421 TRAP, 1422 RFCI, 1423 RFDI, 1424 RFMCI, 1425 SC, 1426 DCBA, 1427 DCBI, 1428 DCCCI, 1429 ICCCI 1430)> { let Unsupported = 1; } 1431