1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "POPCNTB8$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 145 (instregex "NAND(8)?(_rec)?$"), 146 (instregex "AND(C)?(8)?(_rec)?$"), 147 (instregex "NOR(8)?(_rec)?$"), 148 (instregex "OR(C)?(8)?(_rec)?$"), 149 (instregex "EQV(8)?(_rec)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?(O)?$"), 153 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 154 (instregex "LA(8)?$"), 155 COPY, 156 MCRF, 157 MCRXRX, 158 XSNABSDP, 159 XSNABSDPs, 160 XSXEXPDP, 161 XSABSDP, 162 XSNEGDP, 163 XSCPSGNDP, 164 MFVSRWZ, 165 MFVRWZ, 166 EXTSWSLI, 167 SRADI_32, 168 RLDIC, 169 RFEBB, 170 TBEGIN, 171 TRECHKPT, 172 NOP, 173 WAIT 174)>; 175 176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 177// single slice. However, since it is Restricted, it requires all 3 dispatches 178// (DISP) for that superslice. 179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 180 (instrs 181 (instregex "RLDC(L|R)$"), 182 (instregex "RLWIMI(8)?$"), 183 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 184 (instregex "M(F|T)OCRF(8)?$"), 185 (instregex "CR(6)?(UN)?SET$"), 186 (instregex "CR(N)?(OR|AND)(C)?$"), 187 (instregex "S(L|R)W(8)?$"), 188 (instregex "RLW(INM|NM)(8)?$"), 189 (instregex "F(N)?ABS(D|S)$"), 190 (instregex "FNEG(D|S)$"), 191 (instregex "FCPSGN(D|S)$"), 192 (instregex "SRAW(I)?$"), 193 (instregex "ISEL(8)?$"), 194 RLDIMI, 195 XSIEXPDP, 196 FMR, 197 CREQV, 198 CRNOT, 199 CRXOR, 200 TRECLAIM, 201 TSR, 202 TABORT 203)>; 204 205// Three cycle ALU vector operation that uses an entire superslice. 206// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 207// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 208def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 209 (instrs 210 (instregex "M(T|F)VSCR$"), 211 (instregex "VCMPNEZ(B|H|W)$"), 212 (instregex "VCMPEQU(B|H|W|D)$"), 213 (instregex "VCMPNE(B|H|W)$"), 214 (instregex "VABSDU(B|H|W)$"), 215 (instregex "VADDU(B|H|W)S$"), 216 (instregex "VAVG(S|U)(B|H|W)$"), 217 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 218 (instregex "VCMPBFP(_rec)?$"), 219 (instregex "VC(L|T)Z(B|H|W|D)$"), 220 (instregex "VADDS(B|H|W)S$"), 221 (instregex "V(MIN|MAX)FP$"), 222 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 223 VBPERMD, 224 VADDCUW, 225 VPOPCNTW, 226 VPOPCNTD, 227 VPRTYBD, 228 VPRTYBW, 229 VSHASIGMAD, 230 VSHASIGMAW, 231 VSUBSBS, 232 VSUBSHS, 233 VSUBSWS, 234 VSUBUBS, 235 VSUBUHS, 236 VSUBUWS, 237 VSUBCUW, 238 VCMPGTSB, 239 VCMPGTSB_rec, 240 VCMPGTSD, 241 VCMPGTSD_rec, 242 VCMPGTSH, 243 VCMPGTSH_rec, 244 VCMPGTSW, 245 VCMPGTSW_rec, 246 VCMPGTUB, 247 VCMPGTUB_rec, 248 VCMPGTUD, 249 VCMPGTUD_rec, 250 VCMPGTUH, 251 VCMPGTUH_rec, 252 VCMPGTUW, 253 VCMPGTUW_rec, 254 VCMPNEB_rec, 255 VCMPNEH_rec, 256 VCMPNEW_rec, 257 VCMPNEZB_rec, 258 VCMPNEZH_rec, 259 VCMPNEZW_rec, 260 VCMPEQUB_rec, 261 VCMPEQUD_rec, 262 VCMPEQUH_rec, 263 VCMPEQUW_rec, 264 XVCMPEQDP, 265 XVCMPEQDP_rec, 266 XVCMPEQSP, 267 XVCMPEQSP_rec, 268 XVCMPGEDP, 269 XVCMPGEDP_rec, 270 XVCMPGESP, 271 XVCMPGESP_rec, 272 XVCMPGTDP, 273 XVCMPGTDP_rec, 274 XVCMPGTSP, 275 XVCMPGTSP_rec, 276 XVMAXDP, 277 XVMAXSP, 278 XVMINDP, 279 XVMINSP, 280 XVTDIVDP, 281 XVTDIVSP, 282 XVTSQRTDP, 283 XVTSQRTSP, 284 XVTSTDCDP, 285 XVTSTDCSP, 286 XVXSIGDP, 287 XVXSIGSP 288)>; 289 290// 7 cycle DP vector operation that uses an entire superslice. 291// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 292// EXECO) and all three dispatches (DISP) to the given superslice. 293def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 294 (instrs 295 VADDFP, 296 VCTSXS, 297 VCTSXS_0, 298 VCTUXS, 299 VCTUXS_0, 300 VEXPTEFP, 301 VLOGEFP, 302 VMADDFP, 303 VMHADDSHS, 304 VNMSUBFP, 305 VREFP, 306 VRFIM, 307 VRFIN, 308 VRFIP, 309 VRFIZ, 310 VRSQRTEFP, 311 VSUBFP, 312 XVADDDP, 313 XVADDSP, 314 XVCVDPSP, 315 XVCVDPSXDS, 316 XVCVDPSXWS, 317 XVCVDPUXDS, 318 XVCVDPUXWS, 319 XVCVHPSP, 320 XVCVSPDP, 321 XVCVSPHP, 322 XVCVSPSXDS, 323 XVCVSPSXWS, 324 XVCVSPUXDS, 325 XVCVSPUXWS, 326 XVCVSXDDP, 327 XVCVSXDSP, 328 XVCVSXWDP, 329 XVCVSXWSP, 330 XVCVUXDDP, 331 XVCVUXDSP, 332 XVCVUXWDP, 333 XVCVUXWSP, 334 XVMADDADP, 335 XVMADDASP, 336 XVMADDMDP, 337 XVMADDMSP, 338 XVMSUBADP, 339 XVMSUBASP, 340 XVMSUBMDP, 341 XVMSUBMSP, 342 XVMULDP, 343 XVMULSP, 344 XVNMADDADP, 345 XVNMADDASP, 346 XVNMADDMDP, 347 XVNMADDMSP, 348 XVNMSUBADP, 349 XVNMSUBASP, 350 XVNMSUBMDP, 351 XVNMSUBMSP, 352 XVRDPI, 353 XVRDPIC, 354 XVRDPIM, 355 XVRDPIP, 356 XVRDPIZ, 357 XVREDP, 358 XVRESP, 359 XVRSPI, 360 XVRSPIC, 361 XVRSPIM, 362 XVRSPIP, 363 XVRSPIZ, 364 XVRSQRTEDP, 365 XVRSQRTESP, 366 XVSUBDP, 367 XVSUBSP, 368 VCFSX, 369 VCFSX_0, 370 VCFUX, 371 VCFUX_0, 372 VMHRADDSHS, 373 VMLADDUHM, 374 VMSUMMBM, 375 VMSUMSHM, 376 VMSUMSHS, 377 VMSUMUBM, 378 VMSUMUHM, 379 VMSUMUDM, 380 VMSUMUHS, 381 VMULESB, 382 VMULESH, 383 VMULESW, 384 VMULEUB, 385 VMULEUH, 386 VMULEUW, 387 VMULOSB, 388 VMULOSH, 389 VMULOSW, 390 VMULOUB, 391 VMULOUH, 392 VMULOUW, 393 VMULUWM, 394 VSUM2SWS, 395 VSUM4SBS, 396 VSUM4SHS, 397 VSUM4UBS, 398 VSUMSWS 399)>; 400 401// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 402// dispatch units for the superslice. 403def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 404 (instrs 405 (instregex "MADD(HD|HDU|LD|LD8)$"), 406 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 407)>; 408 409// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 410// dispatch units for the superslice. 411def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 412 (instrs 413 FRSP, 414 (instregex "FRI(N|P|Z|M)(D|S)$"), 415 (instregex "FRE(S)?$"), 416 (instregex "FADD(S)?$"), 417 (instregex "FMSUB(S)?$"), 418 (instregex "FMADD(S)?$"), 419 (instregex "FSUB(S)?$"), 420 (instregex "FCFID(U)?(S)?$"), 421 (instregex "FCTID(U)?(Z)?$"), 422 (instregex "FCTIW(U)?(Z)?$"), 423 (instregex "FRSQRTE(S)?$"), 424 FNMADDS, 425 FNMADD, 426 FNMSUBS, 427 FNMSUB, 428 FSELD, 429 FSELS, 430 FMULS, 431 FMUL, 432 XSMADDADP, 433 XSMADDASP, 434 XSMADDMDP, 435 XSMADDMSP, 436 XSMSUBADP, 437 XSMSUBASP, 438 XSMSUBMDP, 439 XSMSUBMSP, 440 XSMULDP, 441 XSMULSP, 442 XSNMADDADP, 443 XSNMADDASP, 444 XSNMADDMDP, 445 XSNMADDMSP, 446 XSNMSUBADP, 447 XSNMSUBASP, 448 XSNMSUBMDP, 449 XSNMSUBMSP 450)>; 451 452// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 453// These operations can be done in parallel. The DP is restricted so we need a 454// full 4 dispatches. 455def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 456 DISP_3SLOTS_1C, DISP_1C], 457 (instrs 458 (instregex "FSEL(D|S)_rec$") 459)>; 460 461// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 462def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 463 DISP_3SLOTS_1C, DISP_1C], 464 (instrs 465 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 466)>; 467 468// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 469// These operations must be done sequentially.The DP is restricted so we need a 470// full 4 dispatches. 471def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 472 DISP_3SLOTS_1C, DISP_1C], 473 (instrs 474 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 475 (instregex "FRE(S)?_rec$"), 476 (instregex "FADD(S)?_rec$"), 477 (instregex "FSUB(S)?_rec$"), 478 (instregex "F(N)?MSUB(S)?_rec$"), 479 (instregex "F(N)?MADD(S)?_rec$"), 480 (instregex "FCFID(U)?(S)?_rec$"), 481 (instregex "FCTID(U)?(Z)?_rec$"), 482 (instregex "FCTIW(U)?(Z)?_rec$"), 483 (instregex "FMUL(S)?_rec$"), 484 (instregex "FRSQRTE(S)?_rec$"), 485 FRSP_rec 486)>; 487 488// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 489def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 490 (instrs 491 XSADDDP, 492 XSADDSP, 493 XSCVDPHP, 494 XSCVDPSP, 495 XSCVDPSXDS, 496 XSCVDPSXDSs, 497 XSCVDPSXWS, 498 XSCVDPUXDS, 499 XSCVDPUXDSs, 500 XSCVDPUXWS, 501 XSCVDPSXWSs, 502 XSCVDPUXWSs, 503 XSCVHPDP, 504 XSCVSPDP, 505 XSCVSXDDP, 506 XSCVSXDSP, 507 XSCVUXDDP, 508 XSCVUXDSP, 509 XSRDPI, 510 XSRDPIC, 511 XSRDPIM, 512 XSRDPIP, 513 XSRDPIZ, 514 XSREDP, 515 XSRESP, 516 XSRSQRTEDP, 517 XSRSQRTESP, 518 XSSUBDP, 519 XSSUBSP, 520 XSCVDPSPN, 521 XSRSP 522)>; 523 524// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 525// superslice. That includes both exec pipelines (EXECO, EXECE) and one 526// dispatch. 527def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 528 (instrs 529 (instregex "LVS(L|R)$"), 530 (instregex "VSPLTIS(W|H|B)$"), 531 (instregex "VSPLT(W|H|B)(s)?$"), 532 (instregex "V_SETALLONES(B|H)?$"), 533 (instregex "VEXTRACTU(B|H|W)$"), 534 (instregex "VINSERT(B|H|W|D)$"), 535 MFVSRLD, 536 MTVSRWS, 537 VBPERMQ, 538 VCLZLSBB, 539 VCTZLSBB, 540 VEXTRACTD, 541 VEXTUBLX, 542 VEXTUBRX, 543 VEXTUHLX, 544 VEXTUHRX, 545 VEXTUWLX, 546 VEXTUWRX, 547 VGBBD, 548 VMRGHB, 549 VMRGHH, 550 VMRGHW, 551 VMRGLB, 552 VMRGLH, 553 VMRGLW, 554 VPERM, 555 VPERMR, 556 VPERMXOR, 557 VPKPX, 558 VPKSDSS, 559 VPKSDUS, 560 VPKSHSS, 561 VPKSHUS, 562 VPKSWSS, 563 VPKSWUS, 564 VPKUDUM, 565 VPKUDUS, 566 VPKUHUM, 567 VPKUHUS, 568 VPKUWUM, 569 VPKUWUS, 570 VPRTYBQ, 571 VSL, 572 VSLDOI, 573 VSLO, 574 VSLV, 575 VSR, 576 VSRO, 577 VSRV, 578 VUPKHPX, 579 VUPKHSB, 580 VUPKHSH, 581 VUPKHSW, 582 VUPKLPX, 583 VUPKLSB, 584 VUPKLSH, 585 VUPKLSW, 586 XXBRD, 587 XXBRH, 588 XXBRQ, 589 XXBRW, 590 XXEXTRACTUW, 591 XXINSERTW, 592 XXMRGHW, 593 XXMRGLW, 594 XXPERM, 595 XXPERMR, 596 XXSLDWI, 597 XXSLDWIs, 598 XXSPLTIB, 599 XXSPLTW, 600 XXSPLTWs, 601 XXPERMDI, 602 XXPERMDIs, 603 VADDCUQ, 604 VADDECUQ, 605 VADDEUQM, 606 VADDUQM, 607 VMUL10CUQ, 608 VMUL10ECUQ, 609 VMUL10EUQ, 610 VMUL10UQ, 611 VSUBCUQ, 612 VSUBECUQ, 613 VSUBEUQM, 614 VSUBUQM, 615 XSCMPEXPQP, 616 XSCMPOQP, 617 XSCMPUQP, 618 XSTSTDCQP, 619 XSXSIGQP, 620 BCDCFN_rec, 621 BCDCFZ_rec, 622 BCDCPSGN_rec, 623 BCDCTN_rec, 624 BCDCTZ_rec, 625 BCDSETSGN_rec, 626 BCDS_rec, 627 BCDTRUNC_rec, 628 BCDUS_rec, 629 BCDUTRUNC_rec, 630 BCDADD_rec, 631 BCDSUB_rec 632)>; 633 634// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 635// superslice. That includes both exec pipelines (EXECO, EXECE) and one 636// dispatch. 637def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 638 (instrs 639 BCDSR_rec, 640 XSADDQP, 641 XSADDQPO, 642 XSCVDPQP, 643 XSCVQPDP, 644 XSCVQPDPO, 645 XSCVQPSDZ, 646 XSCVQPSWZ, 647 XSCVQPUDZ, 648 XSCVQPUWZ, 649 XSCVSDQP, 650 XSCVUDQP, 651 XSRQPI, 652 XSRQPIX, 653 XSRQPXP, 654 XSSUBQP, 655 XSSUBQPO 656)>; 657 658// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 659// superslice. That includes both exec pipelines (EXECO, EXECE) and one 660// dispatch. 661def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 662 (instrs 663 BCDCTSQ_rec 664)>; 665 666// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 667// superslice. That includes both exec pipelines (EXECO, EXECE) and one 668// dispatch. 669def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 670 (instrs 671 XSMADDQP, 672 XSMADDQPO, 673 XSMSUBQP, 674 XSMSUBQPO, 675 XSMULQP, 676 XSMULQPO, 677 XSNMADDQP, 678 XSNMADDQPO, 679 XSNMSUBQP, 680 XSNMSUBQPO 681)>; 682 683// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 684// superslice. That includes both exec pipelines (EXECO, EXECE) and one 685// dispatch. 686def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 687 (instrs 688 BCDCFSQ_rec 689)>; 690 691// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 692// superslice. That includes both exec pipelines (EXECO, EXECE) and one 693// dispatch. 694def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 695 (instrs 696 XSDIVQP, 697 XSDIVQPO 698)>; 699 700// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 701// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 702// dispatches. 703def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 704 (instrs 705 XSSQRTQP, 706 XSSQRTQPO 707)>; 708 709// 6 Cycle Load uses a single slice. 710def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 711 (instrs 712 (instregex "LXVL(L)?") 713)>; 714 715// 5 Cycle Load uses a single slice. 716def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 717 (instrs 718 (instregex "LVE(B|H|W)X$"), 719 (instregex "LVX(L)?"), 720 (instregex "LXSI(B|H)ZX$"), 721 LXSDX, 722 LXVB16X, 723 LXVD2X, 724 LXVWSX, 725 LXSIWZX, 726 LXV, 727 LXVX, 728 LXSD, 729 DFLOADf64, 730 XFLOADf64, 731 LIWZX 732)>; 733 734// 4 Cycle Load uses a single slice. 735def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 736 (instrs 737 (instregex "DCB(F|T|ST)(EP)?$"), 738 (instregex "DCBZ(L)?(EP)?$"), 739 (instregex "DCBTST(EP)?$"), 740 (instregex "CP_COPY(8)?$"), 741 (instregex "ICBI(EP)?$"), 742 (instregex "ICBT(LS)?$"), 743 (instregex "LBARX(L)?$"), 744 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 745 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 746 (instregex "LH(A|B)RX(L)?(8)?$"), 747 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 748 (instregex "LWARX(L)?$"), 749 (instregex "LWBRX(8)?$"), 750 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 751 CP_ABORT, 752 DARN, 753 EnforceIEIO, 754 ISYNC, 755 MSGSYNC, 756 TLBSYNC, 757 SYNC, 758 LMW, 759 LSWI 760)>; 761 762// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 763// superslice. 764def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 765 (instrs 766 LFIWZX, 767 LFDX, 768 (instregex "LFDXTLS?(_)?$"), 769 LFD 770)>; 771 772// Cracked Load Instructions. 773// Load instructions that can be done in parallel. 774def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 775 DISP_PAIR_1C], 776 (instrs 777 SLBIA, 778 SLBIE, 779 SLBMFEE, 780 SLBMFEV, 781 SLBMTE, 782 TLBIEL 783)>; 784 785// Cracked Load Instruction. 786// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 787// operations can be run in parallel. 788def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 789 DISP_PAIR_1C, DISP_PAIR_1C], 790 (instrs 791 (instregex "L(W|H)ZU(X)?(8)?$") 792)>; 793 794// Cracked TEND Instruction. 795// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 796// operations can be run in parallel. 797def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 798 DISP_1C, DISP_1C], 799 (instrs 800 TEND 801)>; 802 803 804// Cracked Store Instruction 805// Consecutive Store and ALU instructions. The store is restricted and requires 806// three dispatches. 807def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 808 DISP_3SLOTS_1C, DISP_1C], 809 (instrs 810 (instregex "ST(B|H|W|D)CX$") 811)>; 812 813// Cracked Load instruction. 814// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 815// operations cannot be done at the same time and so their latencies are added. 816def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 817 DISP_1C, DISP_1C], 818 (instrs 819 (instregex "LHA(X)?(TLS)?(8)?(_32)?(_)?$"), 820 (instregex "CP_PASTE(8)?_rec$"), 821 (instregex "LWA(X)?(TLS)?(_32)?(_)?$"), 822 TCHECK 823)>; 824 825// Cracked Restricted Load instruction. 826// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 827// operations cannot be done at the same time and so their latencies are added. 828// Full 6 dispatches are required as this is both cracked and restricted. 829def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 830 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 831 (instrs 832 LFIWAX 833)>; 834 835// Cracked Load instruction. 836// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 837// operations cannot be done at the same time and so their latencies are added. 838// Full 4 dispatches are required as this is a cracked instruction. 839def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 840 (instrs 841 LXSIWAX, 842 LIWAX 843)>; 844 845// Cracked Load instruction. 846// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 847// cycles. The Load and ALU operations cannot be done at the same time and so 848// their latencies are added. 849// Full 6 dispatches are required as this is a restricted instruction. 850def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 851 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 852 (instrs 853 LFSX, 854 (instregex "LFSXTLS?(_)?$"), 855 LFS 856)>; 857 858// Cracked Load instruction. 859// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 860// operations cannot be done at the same time and so their latencies are added. 861// Full 4 dispatches are required as this is a cracked instruction. 862def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 863 (instrs 864 LXSSP, 865 LXSSPX, 866 XFLOADf32, 867 DFLOADf32 868)>; 869 870// Cracked 3-Way Load Instruction 871// Load with two ALU operations that depend on each other 872def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 873 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 874 (instrs 875 (instregex "LHAU(X)?(8)?$"), 876 LWAUX 877)>; 878 879// Cracked Load that requires the PM resource. 880// Since the Load and the PM cannot be done at the same time the latencies are 881// added. Requires 8 cycles. Since the PM requires the full superslice we need 882// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 883// requires the remaining 1 dispatch. 884def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 885 DISP_1C, DISP_1C], 886 (instrs 887 LXVH8X, 888 LXVDSX, 889 LXVW4X 890)>; 891 892// Single slice Restricted store operation. The restricted operation requires 893// all three dispatches for the superslice. 894def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 895 (instrs 896 (instregex "STF(S|D|IWX|SX|DX|SXTLS|DXTLS|SXTLS_|DXTLS_)$"), 897 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 898 (instregex "STW(8)?$"), 899 (instregex "(D|X)FSTORE(f32|f64)$"), 900 (instregex "ST(W|H|D)BRX$"), 901 (instregex "ST(B|H|D)(8)?$"), 902 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 903 STIWX, 904 SLBIEG, 905 STMW, 906 STSWI, 907 TLBIE 908)>; 909 910// Vector Store Instruction 911// Requires the whole superslice and therefore requires one dispatch 912// as well as both the Even and Odd exec pipelines. 913def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 914 (instrs 915 (instregex "STVE(B|H|W)X$"), 916 (instregex "STVX(L)?$"), 917 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 918)>; 919 920// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 921// superslice. That includes both exec pipelines (EXECO, EXECE) and two 922// dispatches. 923def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 924 (instrs 925 (instregex "MTCTR(8)?(loop)?$"), 926 (instregex "MTLR(8)?$") 927)>; 928 929// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 930// superslice. That includes both exec pipelines (EXECO, EXECE) and two 931// dispatches. 932def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 933 (instrs 934 (instregex "M(T|F)VRSAVE(v)?$"), 935 (instregex "M(T|F)PMR$"), 936 (instregex "M(T|F)TB(8)?$"), 937 (instregex "MF(SPR|CTR|LR)(8)?$"), 938 (instregex "M(T|F)MSR(D)?$"), 939 (instregex "M(T|F)(U)?DSCR$"), 940 (instregex "MTSPR(8)?$") 941)>; 942 943// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 944// superslice. That includes both exec pipelines (EXECO, EXECE) and two 945// dispatches. 946def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 947 (instrs 948 DIVW, 949 DIVWO, 950 DIVWU, 951 DIVWUO, 952 MODSW 953)>; 954 955// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 956// superslice. That includes both exec pipelines (EXECO, EXECE) and two 957// dispatches. 958def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 959 (instrs 960 DIVWE, 961 DIVWEO, 962 DIVD, 963 DIVDO, 964 DIVWEU, 965 DIVWEUO, 966 DIVDU, 967 DIVDUO, 968 MODSD, 969 MODUD, 970 MODUW 971)>; 972 973// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 974// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 975// dispatches. 976def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 977 (instrs 978 DIVDE, 979 DIVDEO, 980 DIVDEU, 981 DIVDEUO 982)>; 983 984// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 985// and one full superslice for the DIV operation since there is only one DIV per 986// superslice. Latency of DIV plus ALU is 26. 987def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 988 DISP_EVEN_1C, DISP_1C], 989 (instrs 990 (instregex "DIVW(U)?(O)?_rec$") 991)>; 992 993// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 994// and one full superslice for the DIV operation since there is only one DIV per 995// superslice. Latency of DIV plus ALU is 26. 996def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 997 DISP_EVEN_1C, DISP_1C], 998 (instrs 999 DIVD_rec, 1000 DIVDO_rec, 1001 DIVDU_rec, 1002 DIVDUO_rec, 1003 DIVWE_rec, 1004 DIVWEO_rec, 1005 DIVWEU_rec, 1006 DIVWEUO_rec 1007)>; 1008 1009// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1010// and one full superslice for the DIV operation since there is only one DIV per 1011// superslice. Latency of DIV plus ALU is 42. 1012def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1013 DISP_EVEN_1C, DISP_1C], 1014 (instrs 1015 DIVDE_rec, 1016 DIVDEO_rec, 1017 DIVDEU_rec, 1018 DIVDEUO_rec 1019)>; 1020 1021// CR access instructions in _BrMCR, IIC_BrMCRX. 1022 1023// Cracked, restricted, ALU operations. 1024// Here the two ALU ops can actually be done in parallel and therefore the 1025// latencies are not added together. Otherwise this is like having two 1026// instructions running together on two pipelines and 6 dispatches. ALU ops are 1027// 2 cycles each. 1028def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1029 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1030 (instrs 1031 MTCRF, 1032 MTCRF8 1033)>; 1034 1035// Cracked ALU operations. 1036// Here the two ALU ops can actually be done in parallel and therefore the 1037// latencies are not added together. Otherwise this is like having two 1038// instructions running together on two pipelines and 2 dispatches. ALU ops are 1039// 2 cycles each. 1040def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1041 DISP_1C, DISP_1C], 1042 (instrs 1043 (instregex "ADDC(8)?(O)?_rec$"), 1044 (instregex "SUBFC(8)?(O)?_rec$") 1045)>; 1046 1047// Cracked ALU operations. 1048// Two ALU ops can be done in parallel. 1049// One is three cycle ALU the ohter is a two cycle ALU. 1050// One of the ALU ops is restricted the other is not so we have a total of 1051// 5 dispatches. 1052def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1053 DISP_3SLOTS_1C, DISP_1C], 1054 (instrs 1055 (instregex "F(N)?ABS(D|S)_rec$"), 1056 (instregex "FCPSGN(D|S)_rec$"), 1057 (instregex "FNEG(D|S)_rec$"), 1058 FMR_rec 1059)>; 1060 1061// Cracked ALU operations. 1062// Here the two ALU ops can actually be done in parallel and therefore the 1063// latencies are not added together. Otherwise this is like having two 1064// instructions running together on two pipelines and 2 dispatches. 1065// ALU ops are 3 cycles each. 1066def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1067 DISP_1C, DISP_1C], 1068 (instrs 1069 MCRFS 1070)>; 1071 1072// Cracked Restricted ALU operations. 1073// Here the two ALU ops can actually be done in parallel and therefore the 1074// latencies are not added together. Otherwise this is like having two 1075// instructions running together on two pipelines and 6 dispatches. 1076// ALU ops are 3 cycles each. 1077def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1078 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1079 (instrs 1080 (instregex "MTFSF(b|_rec)?$"), 1081 (instregex "MTFSFI(_rec)?$"), 1082 MTFSFIb 1083)>; 1084 1085// Cracked instruction made of two ALU ops. 1086// The two ops cannot be done in parallel. 1087// One of the ALU ops is restricted and takes 3 dispatches. 1088def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1089 DISP_3SLOTS_1C, DISP_1C], 1090 (instrs 1091 (instregex "RLD(I)?C(R|L)_rec$"), 1092 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1093 (instregex "SLW(8)?_rec$"), 1094 (instregex "SRAW(I)?_rec$"), 1095 (instregex "SRW(8)?_rec$"), 1096 RLDICL_32_rec, 1097 RLDIMI_rec 1098)>; 1099 1100// Cracked instruction made of two ALU ops. 1101// The two ops cannot be done in parallel. 1102// Both of the ALU ops are restricted and take 3 dispatches. 1103def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1104 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1105 (instrs 1106 (instregex "MFFS(L|CE|_rec)?$") 1107)>; 1108 1109// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1110// total of 6 cycles. All of the ALU operations are also restricted so each 1111// takes 3 dispatches for a total of 9. 1112def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1113 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1114 (instrs 1115 (instregex "MFCR(8)?$") 1116)>; 1117 1118// Cracked instruction made of two ALU ops. 1119// The two ops cannot be done in parallel. 1120def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1121 (instrs 1122 (instregex "EXTSWSLI_32_64_rec$"), 1123 (instregex "SRAD(I)?_rec$"), 1124 EXTSWSLI_rec, 1125 SLD_rec, 1126 SRD_rec, 1127 RLDIC_rec 1128)>; 1129 1130// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1131def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1132 (instrs 1133 FDIV 1134)>; 1135 1136// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1137def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1138 DISP_3SLOTS_1C, DISP_1C], 1139 (instrs 1140 FDIV_rec 1141)>; 1142 1143// 36 Cycle DP Instruction. 1144// Instruction can be done on a single slice. 1145def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1146 (instrs 1147 XSSQRTDP 1148)>; 1149 1150// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1151def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1152 (instrs 1153 FSQRT 1154)>; 1155 1156// 36 Cycle DP Vector Instruction. 1157def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1158 DISP_1C], 1159 (instrs 1160 XVSQRTDP 1161)>; 1162 1163// 27 Cycle DP Vector Instruction. 1164def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1165 DISP_1C], 1166 (instrs 1167 XVSQRTSP 1168)>; 1169 1170// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1171def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1172 DISP_3SLOTS_1C, DISP_1C], 1173 (instrs 1174 FSQRT_rec 1175)>; 1176 1177// 26 Cycle DP Instruction. 1178def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1179 (instrs 1180 XSSQRTSP 1181)>; 1182 1183// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1184def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1185 (instrs 1186 FSQRTS 1187)>; 1188 1189// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1190def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1191 DISP_3SLOTS_1C, DISP_1C], 1192 (instrs 1193 FSQRTS_rec 1194)>; 1195 1196// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1197def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1198 (instrs 1199 XSDIVDP 1200)>; 1201 1202// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1203def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1204 (instrs 1205 FDIVS 1206)>; 1207 1208// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1209def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1210 DISP_3SLOTS_1C, DISP_1C], 1211 (instrs 1212 FDIVS_rec 1213)>; 1214 1215// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1216def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1217 (instrs 1218 XSDIVSP 1219)>; 1220 1221// 24 Cycle DP Vector Instruction. Takes one full superslice. 1222// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1223// superslice. 1224def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1225 DISP_1C], 1226 (instrs 1227 XVDIVSP 1228)>; 1229 1230// 33 Cycle DP Vector Instruction. Takes one full superslice. 1231// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1232// superslice. 1233def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1234 DISP_1C], 1235 (instrs 1236 XVDIVDP 1237)>; 1238 1239// Instruction cracked into three pieces. One Load and two ALU operations. 1240// The Load and one of the ALU ops cannot be run at the same time and so the 1241// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1242// Both the load and the ALU that depends on it are restricted and so they take 1243// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1244// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1245def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1246 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1247 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1248 (instrs 1249 (instregex "LF(SU|SUX)$") 1250)>; 1251 1252// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1253// the store and so it can be run at the same time as the store. The store is 1254// also restricted. 1255def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1256 DISP_3SLOTS_1C, DISP_1C], 1257 (instrs 1258 (instregex "STF(S|D)U(X)?$"), 1259 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1260)>; 1261 1262// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1263// the load and so it can be run at the same time as the load. 1264def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1265 DISP_PAIR_1C, DISP_PAIR_1C], 1266 (instrs 1267 (instregex "LBZU(X)?(8)?$"), 1268 (instregex "LDU(X)?$") 1269)>; 1270 1271// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1272// the load and so it can be run at the same time as the load. The load is also 1273// restricted. 3 dispatches are from the restricted load while the other two 1274// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1275// is required for the ALU. 1276def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1277 DISP_3SLOTS_1C, DISP_1C], 1278 (instrs 1279 (instregex "LF(DU|DUX)$") 1280)>; 1281 1282// Crypto Instructions 1283 1284// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1285// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1286// dispatch. 1287def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1288 (instrs 1289 (instregex "VPMSUM(B|H|W|D)$"), 1290 (instregex "V(N)?CIPHER(LAST)?$"), 1291 VSBOX 1292)>; 1293 1294// Branch Instructions 1295 1296// Two Cycle Branch 1297def : InstRW<[P9_BR_2C, DISP_BR_1C], 1298 (instrs 1299 (instregex "BCCCTR(L)?(8)?$"), 1300 (instregex "BCCL(A|R|RL)?$"), 1301 (instregex "BCCTR(L)?(8)?(n)?$"), 1302 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1303 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1304 (instregex "BL(_TLS|_NOP)?(_RM)?$"), 1305 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"), 1306 (instregex "BLA(8|8_NOP)?(_RM)?$"), 1307 (instregex "BLR(8|L)?$"), 1308 (instregex "TAILB(A)?(8)?$"), 1309 (instregex "TAILBCTR(8)?$"), 1310 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1311 (instregex "BCLR(L)?(n)?$"), 1312 (instregex "BCTR(L)?(8)?(_RM)?$"), 1313 B, 1314 BA, 1315 BC, 1316 BCC, 1317 BCCA, 1318 BCL, 1319 BCLalways, 1320 BCLn, 1321 BCTRL8_LDinto_toc, 1322 BCTRL_LWZinto_toc, 1323 BCTRL8_LDinto_toc_RM, 1324 BCTRL_LWZinto_toc_RM, 1325 BCn, 1326 CTRL_DEP 1327)>; 1328 1329// Five Cycle Branch with a 2 Cycle ALU Op 1330// Operations must be done consecutively and not in parallel. 1331def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1332 (instrs 1333 ADDPCIS 1334)>; 1335 1336// Special Extracted Instructions For Atomics 1337 1338// Atomic Load 1339def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1340 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1341 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1342 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1343 (instrs 1344 (instregex "L(D|W)AT$") 1345)>; 1346 1347// Atomic Store 1348def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1349 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1350 (instrs 1351 (instregex "ST(D|W)AT$") 1352)>; 1353 1354// Signal Processing Engine (SPE) Instructions 1355// These instructions are not supported on Power 9 1356def : InstRW<[], 1357 (instrs 1358 BRINC, 1359 EVABS, 1360 EVEQV, 1361 EVMRA, 1362 EVNAND, 1363 EVNEG, 1364 (instregex "EVADD(I)?W$"), 1365 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1366 (instregex "EVAND(C)?$"), 1367 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1368 (instregex "EVCNTL(S|Z)W$"), 1369 (instregex "EVDIVW(S|U)$"), 1370 (instregex "EVEXTS(B|H)$"), 1371 (instregex "EVLD(H|W|D)(X)?$"), 1372 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1373 (instregex "EVLWHE(X)?$"), 1374 (instregex "EVLWHO(S|U)(X)?$"), 1375 (instregex "EVLW(H|W)SPLAT(X)?$"), 1376 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1377 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1378 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1379 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1380 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1381 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1382 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1383 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1384 (instregex "EVMWHUMI(A)?$"), 1385 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1386 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1387 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1388 (instregex "EVMWSSF(A|AA|AN)?$"), 1389 (instregex "EVMWUMI(A|AA|AN)?$"), 1390 (instregex "EV(N|X)?OR(C)?$"), 1391 (instregex "EVR(LW|LWI|NDW)$"), 1392 (instregex "EVSLW(I)?$"), 1393 (instregex "EVSPLAT(F)?I$"), 1394 (instregex "EVSRW(I)?(S|U)$"), 1395 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1396 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1397 (instregex "EVSUB(I)?FW$") 1398)> { let Unsupported = 1; } 1399 1400// General Instructions without scheduling support. 1401def : InstRW<[], 1402 (instrs 1403 (instregex "(H)?RFI(D)?$"), 1404 (instregex "DSS(ALL)?$"), 1405 (instregex "DST(ST)?(T)?(64)?$"), 1406 (instregex "ICBL(C|Q)$"), 1407 (instregex "L(W|H|B)EPX$"), 1408 (instregex "ST(W|H|B)EPX$"), 1409 (instregex "(L|ST)FDEPX$"), 1410 (instregex "M(T|F)SR(IN)?$"), 1411 (instregex "M(T|F)DCR$"), 1412 (instregex "NOP_GT_PWR(6|7)$"), 1413 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1414 (instregex "WRTEE(I)?$"), 1415 (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"), 1416 ATTN, 1417 CLRBHRB, 1418 MFBHRBE, 1419 MBAR, 1420 MSYNC, 1421 SLBSYNC, 1422 SLBFEE_rec, 1423 NAP, 1424 STOP, 1425 TRAP, 1426 RFCI, 1427 RFDI, 1428 RFMCI, 1429 SC, 1430 DCBA, 1431 DCBI, 1432 DCCCI, 1433 ICCCI, 1434 ADDEX, 1435 ADDEX8, 1436 CDTBCD, CDTBCD8, 1437 CBCDTD, CBCDTD8, 1438 ADDG6S, ADDG6S8 1439)> { let Unsupported = 1; } 1440