1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "POPCNTB8$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 145 (instregex "NAND(8)?(_rec)?$"), 146 (instregex "AND(C)?(8)?(_rec)?$"), 147 (instregex "NOR(8)?(_rec)?$"), 148 (instregex "OR(C)?(8)?(_rec)?$"), 149 (instregex "EQV(8)?(_rec)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?(O)?$"), 153 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 154 COPY, 155 MCRF, 156 MCRXRX, 157 XSNABSDP, 158 XSXEXPDP, 159 XSABSDP, 160 XSNEGDP, 161 XSCPSGNDP, 162 MFVSRWZ, 163 MFVRWZ, 164 EXTSWSLI, 165 SRADI_32, 166 RLDIC, 167 RFEBB, 168 LA, 169 TBEGIN, 170 TRECHKPT, 171 NOP, 172 WAIT 173)>; 174 175// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 176// single slice. However, since it is Restricted, it requires all 3 dispatches 177// (DISP) for that superslice. 178def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 179 (instrs 180 (instregex "RLDC(L|R)$"), 181 (instregex "RLWIMI(8)?$"), 182 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 183 (instregex "M(F|T)OCRF(8)?$"), 184 (instregex "CR(6)?(UN)?SET$"), 185 (instregex "CR(N)?(OR|AND)(C)?$"), 186 (instregex "S(L|R)W(8)?$"), 187 (instregex "RLW(INM|NM)(8)?$"), 188 (instregex "F(N)?ABS(D|S)$"), 189 (instregex "FNEG(D|S)$"), 190 (instregex "FCPSGN(D|S)$"), 191 (instregex "SRAW(I)?$"), 192 (instregex "ISEL(8)?$"), 193 RLDIMI, 194 XSIEXPDP, 195 FMR, 196 CREQV, 197 CRXOR, 198 TRECLAIM, 199 TSR, 200 TABORT 201)>; 202 203// Three cycle ALU vector operation that uses an entire superslice. 204// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 205// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 206def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 207 (instrs 208 (instregex "M(T|F)VSCR$"), 209 (instregex "VCMPNEZ(B|H|W)$"), 210 (instregex "VCMPEQU(B|H|W|D)$"), 211 (instregex "VCMPNE(B|H|W)$"), 212 (instregex "VABSDU(B|H|W)$"), 213 (instregex "VADDU(B|H|W)S$"), 214 (instregex "VAVG(S|U)(B|H|W)$"), 215 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 216 (instregex "VCMPBFP(_rec)?$"), 217 (instregex "VC(L|T)Z(B|H|W|D)$"), 218 (instregex "VADDS(B|H|W)S$"), 219 (instregex "V(MIN|MAX)FP$"), 220 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 221 VBPERMD, 222 VADDCUW, 223 VPOPCNTW, 224 VPOPCNTD, 225 VPRTYBD, 226 VPRTYBW, 227 VSHASIGMAD, 228 VSHASIGMAW, 229 VSUBSBS, 230 VSUBSHS, 231 VSUBSWS, 232 VSUBUBS, 233 VSUBUHS, 234 VSUBUWS, 235 VSUBCUW, 236 VCMPGTSB, 237 VCMPGTSB_rec, 238 VCMPGTSD, 239 VCMPGTSD_rec, 240 VCMPGTSH, 241 VCMPGTSH_rec, 242 VCMPGTSW, 243 VCMPGTSW_rec, 244 VCMPGTUB, 245 VCMPGTUB_rec, 246 VCMPGTUD, 247 VCMPGTUD_rec, 248 VCMPGTUH, 249 VCMPGTUH_rec, 250 VCMPGTUW, 251 VCMPGTUW_rec, 252 VCMPNEB_rec, 253 VCMPNEH_rec, 254 VCMPNEW_rec, 255 VCMPNEZB_rec, 256 VCMPNEZH_rec, 257 VCMPNEZW_rec, 258 VCMPEQUB_rec, 259 VCMPEQUD_rec, 260 VCMPEQUH_rec, 261 VCMPEQUW_rec, 262 XVCMPEQDP, 263 XVCMPEQDP_rec, 264 XVCMPEQSP, 265 XVCMPEQSP_rec, 266 XVCMPGEDP, 267 XVCMPGEDP_rec, 268 XVCMPGESP, 269 XVCMPGESP_rec, 270 XVCMPGTDP, 271 XVCMPGTDP_rec, 272 XVCMPGTSP, 273 XVCMPGTSP_rec, 274 XVMAXDP, 275 XVMAXSP, 276 XVMINDP, 277 XVMINSP, 278 XVTDIVDP, 279 XVTDIVSP, 280 XVTSQRTDP, 281 XVTSQRTSP, 282 XVTSTDCDP, 283 XVTSTDCSP, 284 XVXSIGDP, 285 XVXSIGSP 286)>; 287 288// 7 cycle DP vector operation that uses an entire superslice. 289// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 290// EXECO) and all three dispatches (DISP) to the given superslice. 291def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 292 (instrs 293 VADDFP, 294 VCTSXS, 295 VCTSXS_0, 296 VCTUXS, 297 VCTUXS_0, 298 VEXPTEFP, 299 VLOGEFP, 300 VMADDFP, 301 VMHADDSHS, 302 VNMSUBFP, 303 VREFP, 304 VRFIM, 305 VRFIN, 306 VRFIP, 307 VRFIZ, 308 VRSQRTEFP, 309 VSUBFP, 310 XVADDDP, 311 XVADDSP, 312 XVCVDPSP, 313 XVCVDPSXDS, 314 XVCVDPSXWS, 315 XVCVDPUXDS, 316 XVCVDPUXWS, 317 XVCVHPSP, 318 XVCVSPDP, 319 XVCVSPHP, 320 XVCVSPSXDS, 321 XVCVSPSXWS, 322 XVCVSPUXDS, 323 XVCVSPUXWS, 324 XVCVSXDDP, 325 XVCVSXDSP, 326 XVCVSXWDP, 327 XVCVSXWSP, 328 XVCVUXDDP, 329 XVCVUXDSP, 330 XVCVUXWDP, 331 XVCVUXWSP, 332 XVMADDADP, 333 XVMADDASP, 334 XVMADDMDP, 335 XVMADDMSP, 336 XVMSUBADP, 337 XVMSUBASP, 338 XVMSUBMDP, 339 XVMSUBMSP, 340 XVMULDP, 341 XVMULSP, 342 XVNMADDADP, 343 XVNMADDASP, 344 XVNMADDMDP, 345 XVNMADDMSP, 346 XVNMSUBADP, 347 XVNMSUBASP, 348 XVNMSUBMDP, 349 XVNMSUBMSP, 350 XVRDPI, 351 XVRDPIC, 352 XVRDPIM, 353 XVRDPIP, 354 XVRDPIZ, 355 XVREDP, 356 XVRESP, 357 XVRSPI, 358 XVRSPIC, 359 XVRSPIM, 360 XVRSPIP, 361 XVRSPIZ, 362 XVRSQRTEDP, 363 XVRSQRTESP, 364 XVSUBDP, 365 XVSUBSP, 366 VCFSX, 367 VCFSX_0, 368 VCFUX, 369 VCFUX_0, 370 VMHRADDSHS, 371 VMLADDUHM, 372 VMSUMMBM, 373 VMSUMSHM, 374 VMSUMSHS, 375 VMSUMUBM, 376 VMSUMUHM, 377 VMSUMUDM, 378 VMSUMUHS, 379 VMULESB, 380 VMULESH, 381 VMULESW, 382 VMULEUB, 383 VMULEUH, 384 VMULEUW, 385 VMULOSB, 386 VMULOSH, 387 VMULOSW, 388 VMULOUB, 389 VMULOUH, 390 VMULOUW, 391 VMULUWM, 392 VSUM2SWS, 393 VSUM4SBS, 394 VSUM4SHS, 395 VSUM4UBS, 396 VSUMSWS 397)>; 398 399// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 400// dispatch units for the superslice. 401def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 402 (instrs 403 (instregex "MADD(HD|HDU|LD|LD8)$"), 404 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 405)>; 406 407// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 408// dispatch units for the superslice. 409def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 410 (instrs 411 FRSP, 412 (instregex "FRI(N|P|Z|M)(D|S)$"), 413 (instregex "FRE(S)?$"), 414 (instregex "FADD(S)?$"), 415 (instregex "FMSUB(S)?$"), 416 (instregex "FMADD(S)?$"), 417 (instregex "FSUB(S)?$"), 418 (instregex "FCFID(U)?(S)?$"), 419 (instregex "FCTID(U)?(Z)?$"), 420 (instregex "FCTIW(U)?(Z)?$"), 421 (instregex "FRSQRTE(S)?$"), 422 FNMADDS, 423 FNMADD, 424 FNMSUBS, 425 FNMSUB, 426 FSELD, 427 FSELS, 428 FMULS, 429 FMUL, 430 XSMADDADP, 431 XSMADDASP, 432 XSMADDMDP, 433 XSMADDMSP, 434 XSMSUBADP, 435 XSMSUBASP, 436 XSMSUBMDP, 437 XSMSUBMSP, 438 XSMULDP, 439 XSMULSP, 440 XSNMADDADP, 441 XSNMADDASP, 442 XSNMADDMDP, 443 XSNMADDMSP, 444 XSNMSUBADP, 445 XSNMSUBASP, 446 XSNMSUBMDP, 447 XSNMSUBMSP 448)>; 449 450// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 451// These operations can be done in parallel. The DP is restricted so we need a 452// full 4 dispatches. 453def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 454 DISP_3SLOTS_1C, DISP_1C], 455 (instrs 456 (instregex "FSEL(D|S)_rec$") 457)>; 458 459// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 460def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 461 DISP_3SLOTS_1C, DISP_1C], 462 (instrs 463 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 464)>; 465 466// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 467// These operations must be done sequentially.The DP is restricted so we need a 468// full 4 dispatches. 469def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 470 DISP_3SLOTS_1C, DISP_1C], 471 (instrs 472 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 473 (instregex "FRE(S)?_rec$"), 474 (instregex "FADD(S)?_rec$"), 475 (instregex "FSUB(S)?_rec$"), 476 (instregex "F(N)?MSUB(S)?_rec$"), 477 (instregex "F(N)?MADD(S)?_rec$"), 478 (instregex "FCFID(U)?(S)?_rec$"), 479 (instregex "FCTID(U)?(Z)?_rec$"), 480 (instregex "FCTIW(U)?(Z)?_rec$"), 481 (instregex "FMUL(S)?_rec$"), 482 (instregex "FRSQRTE(S)?_rec$"), 483 FRSP_rec 484)>; 485 486// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 487def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 488 (instrs 489 XSADDDP, 490 XSADDSP, 491 XSCVDPHP, 492 XSCVDPSP, 493 XSCVDPSXDS, 494 XSCVDPSXDSs, 495 XSCVDPSXWS, 496 XSCVDPUXDS, 497 XSCVDPUXDSs, 498 XSCVDPUXWS, 499 XSCVDPSXWSs, 500 XSCVDPUXWSs, 501 XSCVHPDP, 502 XSCVSPDP, 503 XSCVSXDDP, 504 XSCVSXDSP, 505 XSCVUXDDP, 506 XSCVUXDSP, 507 XSRDPI, 508 XSRDPIC, 509 XSRDPIM, 510 XSRDPIP, 511 XSRDPIZ, 512 XSREDP, 513 XSRESP, 514 XSRSQRTEDP, 515 XSRSQRTESP, 516 XSSUBDP, 517 XSSUBSP, 518 XSCVDPSPN, 519 XSRSP 520)>; 521 522// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 523// superslice. That includes both exec pipelines (EXECO, EXECE) and one 524// dispatch. 525def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 526 (instrs 527 (instregex "LVS(L|R)$"), 528 (instregex "VSPLTIS(W|H|B)$"), 529 (instregex "VSPLT(W|H|B)(s)?$"), 530 (instregex "V_SETALLONES(B|H)?$"), 531 (instregex "VEXTRACTU(B|H|W)$"), 532 (instregex "VINSERT(B|H|W|D)$"), 533 MFVSRLD, 534 MTVSRWS, 535 VBPERMQ, 536 VCLZLSBB, 537 VCTZLSBB, 538 VEXTRACTD, 539 VEXTUBLX, 540 VEXTUBRX, 541 VEXTUHLX, 542 VEXTUHRX, 543 VEXTUWLX, 544 VEXTUWRX, 545 VGBBD, 546 VMRGHB, 547 VMRGHH, 548 VMRGHW, 549 VMRGLB, 550 VMRGLH, 551 VMRGLW, 552 VPERM, 553 VPERMR, 554 VPERMXOR, 555 VPKPX, 556 VPKSDSS, 557 VPKSDUS, 558 VPKSHSS, 559 VPKSHUS, 560 VPKSWSS, 561 VPKSWUS, 562 VPKUDUM, 563 VPKUDUS, 564 VPKUHUM, 565 VPKUHUS, 566 VPKUWUM, 567 VPKUWUS, 568 VPRTYBQ, 569 VSL, 570 VSLDOI, 571 VSLO, 572 VSLV, 573 VSR, 574 VSRO, 575 VSRV, 576 VUPKHPX, 577 VUPKHSB, 578 VUPKHSH, 579 VUPKHSW, 580 VUPKLPX, 581 VUPKLSB, 582 VUPKLSH, 583 VUPKLSW, 584 XXBRD, 585 XXBRH, 586 XXBRQ, 587 XXBRW, 588 XXEXTRACTUW, 589 XXINSERTW, 590 XXMRGHW, 591 XXMRGLW, 592 XXPERM, 593 XXPERMR, 594 XXSLDWI, 595 XXSLDWIs, 596 XXSPLTIB, 597 XXSPLTW, 598 XXSPLTWs, 599 XXPERMDI, 600 XXPERMDIs, 601 VADDCUQ, 602 VADDECUQ, 603 VADDEUQM, 604 VADDUQM, 605 VMUL10CUQ, 606 VMUL10ECUQ, 607 VMUL10EUQ, 608 VMUL10UQ, 609 VSUBCUQ, 610 VSUBECUQ, 611 VSUBEUQM, 612 VSUBUQM, 613 XSCMPEXPQP, 614 XSCMPOQP, 615 XSCMPUQP, 616 XSTSTDCQP, 617 XSXSIGQP, 618 BCDCFN_rec, 619 BCDCFZ_rec, 620 BCDCPSGN_rec, 621 BCDCTN_rec, 622 BCDCTZ_rec, 623 BCDSETSGN_rec, 624 BCDS_rec, 625 BCDTRUNC_rec, 626 BCDUS_rec, 627 BCDUTRUNC_rec 628)>; 629 630// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 631// superslice. That includes both exec pipelines (EXECO, EXECE) and one 632// dispatch. 633def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 634 (instrs 635 BCDSR_rec, 636 XSADDQP, 637 XSADDQPO, 638 XSCVDPQP, 639 XSCVQPDP, 640 XSCVQPDPO, 641 XSCVQPSDZ, 642 XSCVQPSWZ, 643 XSCVQPUDZ, 644 XSCVQPUWZ, 645 XSCVSDQP, 646 XSCVUDQP, 647 XSRQPI, 648 XSRQPIX, 649 XSRQPXP, 650 XSSUBQP, 651 XSSUBQPO 652)>; 653 654// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 655// superslice. That includes both exec pipelines (EXECO, EXECE) and one 656// dispatch. 657def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 658 (instrs 659 BCDCTSQ_rec 660)>; 661 662// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 663// superslice. That includes both exec pipelines (EXECO, EXECE) and one 664// dispatch. 665def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 666 (instrs 667 XSMADDQP, 668 XSMADDQPO, 669 XSMSUBQP, 670 XSMSUBQPO, 671 XSMULQP, 672 XSMULQPO, 673 XSNMADDQP, 674 XSNMADDQPO, 675 XSNMSUBQP, 676 XSNMSUBQPO 677)>; 678 679// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 680// superslice. That includes both exec pipelines (EXECO, EXECE) and one 681// dispatch. 682def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 683 (instrs 684 BCDCFSQ_rec 685)>; 686 687// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 688// superslice. That includes both exec pipelines (EXECO, EXECE) and one 689// dispatch. 690def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 691 (instrs 692 XSDIVQP, 693 XSDIVQPO 694)>; 695 696// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 697// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 698// dispatches. 699def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 700 (instrs 701 XSSQRTQP, 702 XSSQRTQPO 703)>; 704 705// 6 Cycle Load uses a single slice. 706def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 707 (instrs 708 (instregex "LXVL(L)?") 709)>; 710 711// 5 Cycle Load uses a single slice. 712def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 713 (instrs 714 (instregex "LVE(B|H|W)X$"), 715 (instregex "LVX(L)?"), 716 (instregex "LXSI(B|H)ZX$"), 717 LXSDX, 718 LXVB16X, 719 LXVD2X, 720 LXVWSX, 721 LXSIWZX, 722 LXV, 723 LXVX, 724 LXSD, 725 DFLOADf64, 726 XFLOADf64, 727 LIWZX 728)>; 729 730// 4 Cycle Load uses a single slice. 731def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 732 (instrs 733 (instregex "DCB(F|T|ST)(EP)?$"), 734 (instregex "DCBZ(L)?(EP)?$"), 735 (instregex "DCBTST(EP)?$"), 736 (instregex "CP_COPY(8)?$"), 737 (instregex "ICBI(EP)?$"), 738 (instregex "ICBT(LS)?$"), 739 (instregex "LBARX(L)?$"), 740 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 741 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 742 (instregex "LH(A|B)RX(L)?(8)?$"), 743 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 744 (instregex "LWARX(L)?$"), 745 (instregex "LWBRX(8)?$"), 746 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 747 CP_ABORT, 748 DARN, 749 EnforceIEIO, 750 ISYNC, 751 MSGSYNC, 752 TLBSYNC, 753 SYNC, 754 LMW, 755 LSWI 756)>; 757 758// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 759// superslice. 760def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 761 (instrs 762 LFIWZX, 763 LFDX, 764 LFD 765)>; 766 767// Cracked Load Instructions. 768// Load instructions that can be done in parallel. 769def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 770 DISP_PAIR_1C], 771 (instrs 772 SLBIA, 773 SLBIE, 774 SLBMFEE, 775 SLBMFEV, 776 SLBMTE, 777 TLBIEL 778)>; 779 780// Cracked Load Instruction. 781// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 782// operations can be run in parallel. 783def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 784 DISP_PAIR_1C, DISP_PAIR_1C], 785 (instrs 786 (instregex "L(W|H)ZU(X)?(8)?$") 787)>; 788 789// Cracked TEND Instruction. 790// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 791// operations can be run in parallel. 792def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 793 DISP_1C, DISP_1C], 794 (instrs 795 TEND 796)>; 797 798 799// Cracked Store Instruction 800// Consecutive Store and ALU instructions. The store is restricted and requires 801// three dispatches. 802def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 803 DISP_3SLOTS_1C, DISP_1C], 804 (instrs 805 (instregex "ST(B|H|W|D)CX$") 806)>; 807 808// Cracked Load Instruction. 809// Two consecutive load operations for a total of 8 cycles. 810def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 811 DISP_1C, DISP_1C], 812 (instrs 813 LDMX 814)>; 815 816// Cracked Load instruction. 817// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 818// operations cannot be done at the same time and so their latencies are added. 819def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 820 DISP_1C, DISP_1C], 821 (instrs 822 (instregex "LHA(X)?(8)?$"), 823 (instregex "CP_PASTE(8)?_rec$"), 824 (instregex "LWA(X)?(_32)?$"), 825 TCHECK 826)>; 827 828// Cracked Restricted Load instruction. 829// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 830// operations cannot be done at the same time and so their latencies are added. 831// Full 6 dispatches are required as this is both cracked and restricted. 832def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 833 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 834 (instrs 835 LFIWAX 836)>; 837 838// Cracked Load instruction. 839// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 840// operations cannot be done at the same time and so their latencies are added. 841// Full 4 dispatches are required as this is a cracked instruction. 842def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 843 (instrs 844 LXSIWAX, 845 LIWAX 846)>; 847 848// Cracked Load instruction. 849// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 850// cycles. The Load and ALU operations cannot be done at the same time and so 851// their latencies are added. 852// Full 6 dispatches are required as this is a restricted instruction. 853def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 854 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 855 (instrs 856 LFSX, 857 LFS 858)>; 859 860// Cracked Load instruction. 861// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 862// operations cannot be done at the same time and so their latencies are added. 863// Full 4 dispatches are required as this is a cracked instruction. 864def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 865 (instrs 866 LXSSP, 867 LXSSPX, 868 XFLOADf32, 869 DFLOADf32 870)>; 871 872// Cracked 3-Way Load Instruction 873// Load with two ALU operations that depend on each other 874def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 875 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 876 (instrs 877 (instregex "LHAU(X)?(8)?$"), 878 LWAUX 879)>; 880 881// Cracked Load that requires the PM resource. 882// Since the Load and the PM cannot be done at the same time the latencies are 883// added. Requires 8 cycles. Since the PM requires the full superslice we need 884// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 885// requires the remaining 1 dispatch. 886def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 887 DISP_1C, DISP_1C], 888 (instrs 889 LXVH8X, 890 LXVDSX, 891 LXVW4X 892)>; 893 894// Single slice Restricted store operation. The restricted operation requires 895// all three dispatches for the superslice. 896def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 897 (instrs 898 (instregex "STF(S|D|IWX|SX|DX)$"), 899 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 900 (instregex "STW(8)?$"), 901 (instregex "(D|X)FSTORE(f32|f64)$"), 902 (instregex "ST(W|H|D)BRX$"), 903 (instregex "ST(B|H|D)(8)?$"), 904 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 905 STIWX, 906 SLBIEG, 907 STMW, 908 STSWI, 909 TLBIE 910)>; 911 912// Vector Store Instruction 913// Requires the whole superslice and therefore requires one dispatch 914// as well as both the Even and Odd exec pipelines. 915def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 916 (instrs 917 (instregex "STVE(B|H|W)X$"), 918 (instregex "STVX(L)?$"), 919 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 920)>; 921 922// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 923// superslice. That includes both exec pipelines (EXECO, EXECE) and two 924// dispatches. 925def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 926 (instrs 927 (instregex "MTCTR(8)?(loop)?$"), 928 (instregex "MTLR(8)?$") 929)>; 930 931// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 932// superslice. That includes both exec pipelines (EXECO, EXECE) and two 933// dispatches. 934def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 935 (instrs 936 (instregex "M(T|F)VRSAVE(v)?$"), 937 (instregex "M(T|F)PMR$"), 938 (instregex "M(T|F)TB(8)?$"), 939 (instregex "MF(SPR|CTR|LR)(8)?$"), 940 (instregex "M(T|F)MSR(D)?$"), 941 (instregex "MTSPR(8)?$") 942)>; 943 944// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 945// superslice. That includes both exec pipelines (EXECO, EXECE) and two 946// dispatches. 947def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 948 (instrs 949 DIVW, 950 DIVWO, 951 DIVWU, 952 DIVWUO, 953 MODSW 954)>; 955 956// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 957// superslice. That includes both exec pipelines (EXECO, EXECE) and two 958// dispatches. 959def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 960 (instrs 961 DIVWE, 962 DIVWEO, 963 DIVD, 964 DIVDO, 965 DIVWEU, 966 DIVWEUO, 967 DIVDU, 968 DIVDUO, 969 MODSD, 970 MODUD, 971 MODUW 972)>; 973 974// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 975// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 976// dispatches. 977def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 978 (instrs 979 DIVDE, 980 DIVDEO, 981 DIVDEU, 982 DIVDEUO 983)>; 984 985// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 986// and one full superslice for the DIV operation since there is only one DIV per 987// superslice. Latency of DIV plus ALU is 26. 988def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 989 DISP_EVEN_1C, DISP_1C], 990 (instrs 991 (instregex "DIVW(U)?(O)?_rec$") 992)>; 993 994// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 995// and one full superslice for the DIV operation since there is only one DIV per 996// superslice. Latency of DIV plus ALU is 26. 997def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 998 DISP_EVEN_1C, DISP_1C], 999 (instrs 1000 DIVD_rec, 1001 DIVDO_rec, 1002 DIVDU_rec, 1003 DIVDUO_rec, 1004 DIVWE_rec, 1005 DIVWEO_rec, 1006 DIVWEU_rec, 1007 DIVWEUO_rec 1008)>; 1009 1010// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1011// and one full superslice for the DIV operation since there is only one DIV per 1012// superslice. Latency of DIV plus ALU is 42. 1013def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1014 DISP_EVEN_1C, DISP_1C], 1015 (instrs 1016 DIVDE_rec, 1017 DIVDEO_rec, 1018 DIVDEU_rec, 1019 DIVDEUO_rec 1020)>; 1021 1022// CR access instructions in _BrMCR, IIC_BrMCRX. 1023 1024// Cracked, restricted, ALU operations. 1025// Here the two ALU ops can actually be done in parallel and therefore the 1026// latencies are not added together. Otherwise this is like having two 1027// instructions running together on two pipelines and 6 dispatches. ALU ops are 1028// 2 cycles each. 1029def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1030 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1031 (instrs 1032 MTCRF, 1033 MTCRF8 1034)>; 1035 1036// Cracked ALU operations. 1037// Here the two ALU ops can actually be done in parallel and therefore the 1038// latencies are not added together. Otherwise this is like having two 1039// instructions running together on two pipelines and 2 dispatches. ALU ops are 1040// 2 cycles each. 1041def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1042 DISP_1C, DISP_1C], 1043 (instrs 1044 (instregex "ADDC(8)?(O)?_rec$"), 1045 (instregex "SUBFC(8)?(O)?_rec$") 1046)>; 1047 1048// Cracked ALU operations. 1049// Two ALU ops can be done in parallel. 1050// One is three cycle ALU the ohter is a two cycle ALU. 1051// One of the ALU ops is restricted the other is not so we have a total of 1052// 5 dispatches. 1053def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1054 DISP_3SLOTS_1C, DISP_1C], 1055 (instrs 1056 (instregex "F(N)?ABS(D|S)_rec$"), 1057 (instregex "FCPSGN(D|S)_rec$"), 1058 (instregex "FNEG(D|S)_rec$"), 1059 FMR_rec 1060)>; 1061 1062// Cracked ALU operations. 1063// Here the two ALU ops can actually be done in parallel and therefore the 1064// latencies are not added together. Otherwise this is like having two 1065// instructions running together on two pipelines and 2 dispatches. 1066// ALU ops are 3 cycles each. 1067def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1068 DISP_1C, DISP_1C], 1069 (instrs 1070 MCRFS 1071)>; 1072 1073// Cracked Restricted ALU operations. 1074// Here the two ALU ops can actually be done in parallel and therefore the 1075// latencies are not added together. Otherwise this is like having two 1076// instructions running together on two pipelines and 6 dispatches. 1077// ALU ops are 3 cycles each. 1078def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1079 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1080 (instrs 1081 (instregex "MTFSF(b|_rec)?$"), 1082 (instregex "MTFSFI(_rec)?$"), 1083 MTFSFIb 1084)>; 1085 1086// Cracked instruction made of two ALU ops. 1087// The two ops cannot be done in parallel. 1088// One of the ALU ops is restricted and takes 3 dispatches. 1089def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1090 DISP_3SLOTS_1C, DISP_1C], 1091 (instrs 1092 (instregex "RLD(I)?C(R|L)_rec$"), 1093 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1094 (instregex "SLW(8)?_rec$"), 1095 (instregex "SRAW(I)?_rec$"), 1096 (instregex "SRW(8)?_rec$"), 1097 RLDICL_32_rec, 1098 RLDIMI_rec 1099)>; 1100 1101// Cracked instruction made of two ALU ops. 1102// The two ops cannot be done in parallel. 1103// Both of the ALU ops are restricted and take 3 dispatches. 1104def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1105 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1106 (instrs 1107 (instregex "MFFS(L|CE|_rec)?$") 1108)>; 1109 1110// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1111// total of 6 cycles. All of the ALU operations are also restricted so each 1112// takes 3 dispatches for a total of 9. 1113def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1114 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1115 (instrs 1116 (instregex "MFCR(8)?$") 1117)>; 1118 1119// Cracked instruction made of two ALU ops. 1120// The two ops cannot be done in parallel. 1121def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1122 (instrs 1123 (instregex "EXTSWSLI_32_64_rec$"), 1124 (instregex "SRAD(I)?_rec$"), 1125 EXTSWSLI_rec, 1126 SLD_rec, 1127 SRD_rec, 1128 RLDIC_rec 1129)>; 1130 1131// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1132def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1133 (instrs 1134 FDIV 1135)>; 1136 1137// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1138def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1139 DISP_3SLOTS_1C, DISP_1C], 1140 (instrs 1141 FDIV_rec 1142)>; 1143 1144// 36 Cycle DP Instruction. 1145// Instruction can be done on a single slice. 1146def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1147 (instrs 1148 XSSQRTDP 1149)>; 1150 1151// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1152def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1153 (instrs 1154 FSQRT 1155)>; 1156 1157// 36 Cycle DP Vector Instruction. 1158def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1159 DISP_1C], 1160 (instrs 1161 XVSQRTDP 1162)>; 1163 1164// 27 Cycle DP Vector Instruction. 1165def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1166 DISP_1C], 1167 (instrs 1168 XVSQRTSP 1169)>; 1170 1171// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1172def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1173 DISP_3SLOTS_1C, DISP_1C], 1174 (instrs 1175 FSQRT_rec 1176)>; 1177 1178// 26 Cycle DP Instruction. 1179def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1180 (instrs 1181 XSSQRTSP 1182)>; 1183 1184// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1185def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1186 (instrs 1187 FSQRTS 1188)>; 1189 1190// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1191def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1192 DISP_3SLOTS_1C, DISP_1C], 1193 (instrs 1194 FSQRTS_rec 1195)>; 1196 1197// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1198def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1199 (instrs 1200 XSDIVDP 1201)>; 1202 1203// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1204def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1205 (instrs 1206 FDIVS 1207)>; 1208 1209// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1210def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1211 DISP_3SLOTS_1C, DISP_1C], 1212 (instrs 1213 FDIVS_rec 1214)>; 1215 1216// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1217def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1218 (instrs 1219 XSDIVSP 1220)>; 1221 1222// 24 Cycle DP Vector Instruction. Takes one full superslice. 1223// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1224// superslice. 1225def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1226 DISP_1C], 1227 (instrs 1228 XVDIVSP 1229)>; 1230 1231// 33 Cycle DP Vector Instruction. Takes one full superslice. 1232// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1233// superslice. 1234def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1235 DISP_1C], 1236 (instrs 1237 XVDIVDP 1238)>; 1239 1240// Instruction cracked into three pieces. One Load and two ALU operations. 1241// The Load and one of the ALU ops cannot be run at the same time and so the 1242// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1243// Both the load and the ALU that depends on it are restricted and so they take 1244// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1245// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1246def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1247 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1248 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1249 (instrs 1250 (instregex "LF(SU|SUX)$") 1251)>; 1252 1253// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1254// the store and so it can be run at the same time as the store. The store is 1255// also restricted. 1256def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1257 DISP_3SLOTS_1C, DISP_1C], 1258 (instrs 1259 (instregex "STF(S|D)U(X)?$"), 1260 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1261)>; 1262 1263// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1264// the load and so it can be run at the same time as the load. 1265def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1266 DISP_PAIR_1C, DISP_PAIR_1C], 1267 (instrs 1268 (instregex "LBZU(X)?(8)?$"), 1269 (instregex "LDU(X)?$") 1270)>; 1271 1272// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1273// the load and so it can be run at the same time as the load. The load is also 1274// restricted. 3 dispatches are from the restricted load while the other two 1275// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1276// is required for the ALU. 1277def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1278 DISP_3SLOTS_1C, DISP_1C], 1279 (instrs 1280 (instregex "LF(DU|DUX)$") 1281)>; 1282 1283// Crypto Instructions 1284 1285// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1286// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1287// dispatch. 1288def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1289 (instrs 1290 (instregex "VPMSUM(B|H|W|D)$"), 1291 (instregex "V(N)?CIPHER(LAST)?$"), 1292 VSBOX 1293)>; 1294 1295// Branch Instructions 1296 1297// Two Cycle Branch 1298def : InstRW<[P9_BR_2C, DISP_BR_1C], 1299 (instrs 1300 (instregex "BCCCTR(L)?(8)?$"), 1301 (instregex "BCCL(A|R|RL)?$"), 1302 (instregex "BCCTR(L)?(8)?(n)?$"), 1303 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1304 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1305 (instregex "BL(_TLS|_NOP)?$"), 1306 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1307 (instregex "BLA(8|8_NOP)?$"), 1308 (instregex "BLR(8|L)?$"), 1309 (instregex "TAILB(A)?(8)?$"), 1310 (instregex "TAILBCTR(8)?$"), 1311 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1312 (instregex "BCLR(L)?(n)?$"), 1313 (instregex "BCTR(L)?(8)?$"), 1314 B, 1315 BA, 1316 BC, 1317 BCC, 1318 BCCA, 1319 BCL, 1320 BCLalways, 1321 BCLn, 1322 BCTRL8_LDinto_toc, 1323 BCTRL_LWZinto_toc, 1324 BCn, 1325 CTRL_DEP 1326)>; 1327 1328// Five Cycle Branch with a 2 Cycle ALU Op 1329// Operations must be done consecutively and not in parallel. 1330def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1331 (instrs 1332 ADDPCIS 1333)>; 1334 1335// Special Extracted Instructions For Atomics 1336 1337// Atomic Load 1338def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1339 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1340 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1341 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1342 (instrs 1343 (instregex "L(D|W)AT$") 1344)>; 1345 1346// Atomic Store 1347def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1348 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1349 (instrs 1350 (instregex "ST(D|W)AT$") 1351)>; 1352 1353// Signal Processing Engine (SPE) Instructions 1354// These instructions are not supported on Power 9 1355def : InstRW<[], 1356 (instrs 1357 BRINC, 1358 EVABS, 1359 EVEQV, 1360 EVMRA, 1361 EVNAND, 1362 EVNEG, 1363 (instregex "EVADD(I)?W$"), 1364 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1365 (instregex "EVAND(C)?$"), 1366 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1367 (instregex "EVCNTL(S|Z)W$"), 1368 (instregex "EVDIVW(S|U)$"), 1369 (instregex "EVEXTS(B|H)$"), 1370 (instregex "EVLD(H|W|D)(X)?$"), 1371 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1372 (instregex "EVLWHE(X)?$"), 1373 (instregex "EVLWHO(S|U)(X)?$"), 1374 (instregex "EVLW(H|W)SPLAT(X)?$"), 1375 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1376 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1377 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1378 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1379 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1380 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1381 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1382 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1383 (instregex "EVMWHUMI(A)?$"), 1384 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1385 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1386 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1387 (instregex "EVMWSSF(A|AA|AN)?$"), 1388 (instregex "EVMWUMI(A|AA|AN)?$"), 1389 (instregex "EV(N|X)?OR(C)?$"), 1390 (instregex "EVR(LW|LWI|NDW)$"), 1391 (instregex "EVSLW(I)?$"), 1392 (instregex "EVSPLAT(F)?I$"), 1393 (instregex "EVSRW(I)?(S|U)$"), 1394 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1395 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1396 (instregex "EVSUB(I)?FW$") 1397)> { let Unsupported = 1; } 1398 1399// General Instructions without scheduling support. 1400def : InstRW<[], 1401 (instrs 1402 (instregex "(H)?RFI(D)?$"), 1403 (instregex "DSS(ALL)?$"), 1404 (instregex "DST(ST)?(T)?(64)?$"), 1405 (instregex "ICBL(C|Q)$"), 1406 (instregex "L(W|H|B)EPX$"), 1407 (instregex "ST(W|H|B)EPX$"), 1408 (instregex "(L|ST)FDEPX$"), 1409 (instregex "M(T|F)SR(IN)?$"), 1410 (instregex "M(T|F)DCR$"), 1411 (instregex "NOP_GT_PWR(6|7)$"), 1412 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1413 (instregex "WRTEE(I)?$"), 1414 (instregex "HASH(ST|STP|CHK|CHKP)$"), 1415 ATTN, 1416 CLRBHRB, 1417 MFBHRBE, 1418 MBAR, 1419 MSYNC, 1420 SLBSYNC, 1421 SLBFEE_rec, 1422 NAP, 1423 STOP, 1424 TRAP, 1425 RFCI, 1426 RFDI, 1427 RFMCI, 1428 SC, 1429 DCBA, 1430 DCBI, 1431 DCCCI, 1432 ICCCI, 1433 ADDEX 1434)> { let Unsupported = 1; } 1435