1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLEQVOnes, 68 XXLNAND, 69 XXLNOR, 70 XXLOR, 71 XXLORf, 72 XXLORC, 73 XXLXOR, 74 XXLXORdpz, 75 XXLXORspz, 76 XXLXORz, 77 XXSEL, 78 XSABSQP, 79 XSCPSGNQP, 80 XSIEXPQP, 81 XSNABSQP, 82 XSNEGQP, 83 XSXEXPQP 84)>; 85 86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 87// single slice. However, since it is Restricted, it requires all 3 dispatches 88// (DISP) for that superslice. 89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 90 (instrs 91 (instregex "TABORT(D|W)C(I)?$"), 92 (instregex "MTFSB(0|1)$"), 93 (instregex "MFFSC(D)?RN(I)?$"), 94 (instregex "CMPRB(8)?$"), 95 (instregex "TD(I)?$"), 96 (instregex "TW(I)?$"), 97 (instregex "FCMP(O|U)(S|D)$"), 98 (instregex "XSTSTDC(S|D)P$"), 99 FTDIV, 100 FTSQRT, 101 CMPEQB 102)>; 103 104// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 106 (instrs 107 (instregex "XSMAX(C|J)?DP$"), 108 (instregex "XSMIN(C|J)?DP$"), 109 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 110 (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"), 111 (instregex "POPCNT(D|W)$"), 112 (instregex "CMPB(8)?$"), 113 (instregex "SETB(8)?$"), 114 XSTDIVDP, 115 XSTSQRTDP, 116 XSXSIGDP, 117 XSCVSPDPN, 118 BPERMD 119)>; 120 121// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 123 (instrs 124 (instregex "S(L|R)D$"), 125 (instregex "SRAD(I)?$"), 126 (instregex "EXTSWSLI_32_64$"), 127 (instregex "MFV(S)?RD$"), 128 (instregex "MTV(S)?RD$"), 129 (instregex "MTV(S)?RW(A|Z)$"), 130 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 131 (instregex "CMP(L)?D(I)?$"), 132 (instregex "SUBF(I)?C(8)?(O)?$"), 133 (instregex "ANDI(S)?(8)?(_rec)?$"), 134 (instregex "ADDC(8)?(O)?$"), 135 (instregex "ADDIC(8)?(_rec)?$"), 136 (instregex "ADD(8|4)(O)?(_rec)?$"), 137 (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"), 138 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"), 139 (instregex "NEG(8)?(O)?(_rec)?$"), 140 (instregex "POPCNTB$"), 141 (instregex "POPCNTB8$"), 142 (instregex "ADD(I|IS)?(8)?$"), 143 (instregex "LI(S)?(8)?$"), 144 (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"), 145 (instregex "NAND(8)?(_rec)?$"), 146 (instregex "AND(C)?(8)?(_rec)?$"), 147 (instregex "NOR(8)?(_rec)?$"), 148 (instregex "OR(C)?(8)?(_rec)?$"), 149 (instregex "EQV(8)?(_rec)?$"), 150 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"), 151 (instregex "ADD(4|8)(TLS)?(_)?$"), 152 (instregex "NEG(8)?(O)?$"), 153 (instregex "ADDI(S)?toc(HA|L)(8)?$"), 154 (instregex "LA(8)?$"), 155 COPY, 156 MCRF, 157 MCRXRX, 158 XSNABSDP, 159 XSXEXPDP, 160 XSABSDP, 161 XSNEGDP, 162 XSCPSGNDP, 163 MFVSRWZ, 164 MFVRWZ, 165 EXTSWSLI, 166 SRADI_32, 167 RLDIC, 168 RFEBB, 169 TBEGIN, 170 TRECHKPT, 171 NOP, 172 WAIT 173)>; 174 175// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 176// single slice. However, since it is Restricted, it requires all 3 dispatches 177// (DISP) for that superslice. 178def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 179 (instrs 180 (instregex "RLDC(L|R)$"), 181 (instregex "RLWIMI(8)?$"), 182 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 183 (instregex "M(F|T)OCRF(8)?$"), 184 (instregex "CR(6)?(UN)?SET$"), 185 (instregex "CR(N)?(OR|AND)(C)?$"), 186 (instregex "S(L|R)W(8)?$"), 187 (instregex "RLW(INM|NM)(8)?$"), 188 (instregex "F(N)?ABS(D|S)$"), 189 (instregex "FNEG(D|S)$"), 190 (instregex "FCPSGN(D|S)$"), 191 (instregex "SRAW(I)?$"), 192 (instregex "ISEL(8)?$"), 193 RLDIMI, 194 XSIEXPDP, 195 FMR, 196 CREQV, 197 CRXOR, 198 TRECLAIM, 199 TSR, 200 TABORT 201)>; 202 203// Three cycle ALU vector operation that uses an entire superslice. 204// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 205// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 206def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 207 (instrs 208 (instregex "M(T|F)VSCR$"), 209 (instregex "VCMPNEZ(B|H|W)$"), 210 (instregex "VCMPEQU(B|H|W|D)$"), 211 (instregex "VCMPNE(B|H|W)$"), 212 (instregex "VABSDU(B|H|W)$"), 213 (instregex "VADDU(B|H|W)S$"), 214 (instregex "VAVG(S|U)(B|H|W)$"), 215 (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"), 216 (instregex "VCMPBFP(_rec)?$"), 217 (instregex "VC(L|T)Z(B|H|W|D)$"), 218 (instregex "VADDS(B|H|W)S$"), 219 (instregex "V(MIN|MAX)FP$"), 220 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 221 VBPERMD, 222 VADDCUW, 223 VPOPCNTW, 224 VPOPCNTD, 225 VPRTYBD, 226 VPRTYBW, 227 VSHASIGMAD, 228 VSHASIGMAW, 229 VSUBSBS, 230 VSUBSHS, 231 VSUBSWS, 232 VSUBUBS, 233 VSUBUHS, 234 VSUBUWS, 235 VSUBCUW, 236 VCMPGTSB, 237 VCMPGTSB_rec, 238 VCMPGTSD, 239 VCMPGTSD_rec, 240 VCMPGTSH, 241 VCMPGTSH_rec, 242 VCMPGTSW, 243 VCMPGTSW_rec, 244 VCMPGTUB, 245 VCMPGTUB_rec, 246 VCMPGTUD, 247 VCMPGTUD_rec, 248 VCMPGTUH, 249 VCMPGTUH_rec, 250 VCMPGTUW, 251 VCMPGTUW_rec, 252 VCMPNEB_rec, 253 VCMPNEH_rec, 254 VCMPNEW_rec, 255 VCMPNEZB_rec, 256 VCMPNEZH_rec, 257 VCMPNEZW_rec, 258 VCMPEQUB_rec, 259 VCMPEQUD_rec, 260 VCMPEQUH_rec, 261 VCMPEQUW_rec, 262 XVCMPEQDP, 263 XVCMPEQDP_rec, 264 XVCMPEQSP, 265 XVCMPEQSP_rec, 266 XVCMPGEDP, 267 XVCMPGEDP_rec, 268 XVCMPGESP, 269 XVCMPGESP_rec, 270 XVCMPGTDP, 271 XVCMPGTDP_rec, 272 XVCMPGTSP, 273 XVCMPGTSP_rec, 274 XVMAXDP, 275 XVMAXSP, 276 XVMINDP, 277 XVMINSP, 278 XVTDIVDP, 279 XVTDIVSP, 280 XVTSQRTDP, 281 XVTSQRTSP, 282 XVTSTDCDP, 283 XVTSTDCSP, 284 XVXSIGDP, 285 XVXSIGSP 286)>; 287 288// 7 cycle DP vector operation that uses an entire superslice. 289// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 290// EXECO) and all three dispatches (DISP) to the given superslice. 291def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 292 (instrs 293 VADDFP, 294 VCTSXS, 295 VCTSXS_0, 296 VCTUXS, 297 VCTUXS_0, 298 VEXPTEFP, 299 VLOGEFP, 300 VMADDFP, 301 VMHADDSHS, 302 VNMSUBFP, 303 VREFP, 304 VRFIM, 305 VRFIN, 306 VRFIP, 307 VRFIZ, 308 VRSQRTEFP, 309 VSUBFP, 310 XVADDDP, 311 XVADDSP, 312 XVCVDPSP, 313 XVCVDPSXDS, 314 XVCVDPSXWS, 315 XVCVDPUXDS, 316 XVCVDPUXWS, 317 XVCVHPSP, 318 XVCVSPDP, 319 XVCVSPHP, 320 XVCVSPSXDS, 321 XVCVSPSXWS, 322 XVCVSPUXDS, 323 XVCVSPUXWS, 324 XVCVSXDDP, 325 XVCVSXDSP, 326 XVCVSXWDP, 327 XVCVSXWSP, 328 XVCVUXDDP, 329 XVCVUXDSP, 330 XVCVUXWDP, 331 XVCVUXWSP, 332 XVMADDADP, 333 XVMADDASP, 334 XVMADDMDP, 335 XVMADDMSP, 336 XVMSUBADP, 337 XVMSUBASP, 338 XVMSUBMDP, 339 XVMSUBMSP, 340 XVMULDP, 341 XVMULSP, 342 XVNMADDADP, 343 XVNMADDASP, 344 XVNMADDMDP, 345 XVNMADDMSP, 346 XVNMSUBADP, 347 XVNMSUBASP, 348 XVNMSUBMDP, 349 XVNMSUBMSP, 350 XVRDPI, 351 XVRDPIC, 352 XVRDPIM, 353 XVRDPIP, 354 XVRDPIZ, 355 XVREDP, 356 XVRESP, 357 XVRSPI, 358 XVRSPIC, 359 XVRSPIM, 360 XVRSPIP, 361 XVRSPIZ, 362 XVRSQRTEDP, 363 XVRSQRTESP, 364 XVSUBDP, 365 XVSUBSP, 366 VCFSX, 367 VCFSX_0, 368 VCFUX, 369 VCFUX_0, 370 VMHRADDSHS, 371 VMLADDUHM, 372 VMSUMMBM, 373 VMSUMSHM, 374 VMSUMSHS, 375 VMSUMUBM, 376 VMSUMUHM, 377 VMSUMUDM, 378 VMSUMUHS, 379 VMULESB, 380 VMULESH, 381 VMULESW, 382 VMULEUB, 383 VMULEUH, 384 VMULEUW, 385 VMULOSB, 386 VMULOSH, 387 VMULOSW, 388 VMULOUB, 389 VMULOUH, 390 VMULOUW, 391 VMULUWM, 392 VSUM2SWS, 393 VSUM4SBS, 394 VSUM4SHS, 395 VSUM4UBS, 396 VSUMSWS 397)>; 398 399// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 400// dispatch units for the superslice. 401def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 402 (instrs 403 (instregex "MADD(HD|HDU|LD|LD8)$"), 404 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 405)>; 406 407// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 408// dispatch units for the superslice. 409def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 410 (instrs 411 FRSP, 412 (instregex "FRI(N|P|Z|M)(D|S)$"), 413 (instregex "FRE(S)?$"), 414 (instregex "FADD(S)?$"), 415 (instregex "FMSUB(S)?$"), 416 (instregex "FMADD(S)?$"), 417 (instregex "FSUB(S)?$"), 418 (instregex "FCFID(U)?(S)?$"), 419 (instregex "FCTID(U)?(Z)?$"), 420 (instregex "FCTIW(U)?(Z)?$"), 421 (instregex "FRSQRTE(S)?$"), 422 FNMADDS, 423 FNMADD, 424 FNMSUBS, 425 FNMSUB, 426 FSELD, 427 FSELS, 428 FMULS, 429 FMUL, 430 XSMADDADP, 431 XSMADDASP, 432 XSMADDMDP, 433 XSMADDMSP, 434 XSMSUBADP, 435 XSMSUBASP, 436 XSMSUBMDP, 437 XSMSUBMSP, 438 XSMULDP, 439 XSMULSP, 440 XSNMADDADP, 441 XSNMADDASP, 442 XSNMADDMDP, 443 XSNMADDMSP, 444 XSNMSUBADP, 445 XSNMSUBASP, 446 XSNMSUBMDP, 447 XSNMSUBMSP 448)>; 449 450// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 451// These operations can be done in parallel. The DP is restricted so we need a 452// full 4 dispatches. 453def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 454 DISP_3SLOTS_1C, DISP_1C], 455 (instrs 456 (instregex "FSEL(D|S)_rec$") 457)>; 458 459// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 460def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 461 DISP_3SLOTS_1C, DISP_1C], 462 (instrs 463 (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$") 464)>; 465 466// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 467// These operations must be done sequentially.The DP is restricted so we need a 468// full 4 dispatches. 469def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 470 DISP_3SLOTS_1C, DISP_1C], 471 (instrs 472 (instregex "FRI(N|P|Z|M)(D|S)_rec$"), 473 (instregex "FRE(S)?_rec$"), 474 (instregex "FADD(S)?_rec$"), 475 (instregex "FSUB(S)?_rec$"), 476 (instregex "F(N)?MSUB(S)?_rec$"), 477 (instregex "F(N)?MADD(S)?_rec$"), 478 (instregex "FCFID(U)?(S)?_rec$"), 479 (instregex "FCTID(U)?(Z)?_rec$"), 480 (instregex "FCTIW(U)?(Z)?_rec$"), 481 (instregex "FMUL(S)?_rec$"), 482 (instregex "FRSQRTE(S)?_rec$"), 483 FRSP_rec 484)>; 485 486// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 487def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 488 (instrs 489 XSADDDP, 490 XSADDSP, 491 XSCVDPHP, 492 XSCVDPSP, 493 XSCVDPSXDS, 494 XSCVDPSXDSs, 495 XSCVDPSXWS, 496 XSCVDPUXDS, 497 XSCVDPUXDSs, 498 XSCVDPUXWS, 499 XSCVDPSXWSs, 500 XSCVDPUXWSs, 501 XSCVHPDP, 502 XSCVSPDP, 503 XSCVSXDDP, 504 XSCVSXDSP, 505 XSCVUXDDP, 506 XSCVUXDSP, 507 XSRDPI, 508 XSRDPIC, 509 XSRDPIM, 510 XSRDPIP, 511 XSRDPIZ, 512 XSREDP, 513 XSRESP, 514 XSRSQRTEDP, 515 XSRSQRTESP, 516 XSSUBDP, 517 XSSUBSP, 518 XSCVDPSPN, 519 XSRSP 520)>; 521 522// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 523// superslice. That includes both exec pipelines (EXECO, EXECE) and one 524// dispatch. 525def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 526 (instrs 527 (instregex "LVS(L|R)$"), 528 (instregex "VSPLTIS(W|H|B)$"), 529 (instregex "VSPLT(W|H|B)(s)?$"), 530 (instregex "V_SETALLONES(B|H)?$"), 531 (instregex "VEXTRACTU(B|H|W)$"), 532 (instregex "VINSERT(B|H|W|D)$"), 533 MFVSRLD, 534 MTVSRWS, 535 VBPERMQ, 536 VCLZLSBB, 537 VCTZLSBB, 538 VEXTRACTD, 539 VEXTUBLX, 540 VEXTUBRX, 541 VEXTUHLX, 542 VEXTUHRX, 543 VEXTUWLX, 544 VEXTUWRX, 545 VGBBD, 546 VMRGHB, 547 VMRGHH, 548 VMRGHW, 549 VMRGLB, 550 VMRGLH, 551 VMRGLW, 552 VPERM, 553 VPERMR, 554 VPERMXOR, 555 VPKPX, 556 VPKSDSS, 557 VPKSDUS, 558 VPKSHSS, 559 VPKSHUS, 560 VPKSWSS, 561 VPKSWUS, 562 VPKUDUM, 563 VPKUDUS, 564 VPKUHUM, 565 VPKUHUS, 566 VPKUWUM, 567 VPKUWUS, 568 VPRTYBQ, 569 VSL, 570 VSLDOI, 571 VSLO, 572 VSLV, 573 VSR, 574 VSRO, 575 VSRV, 576 VUPKHPX, 577 VUPKHSB, 578 VUPKHSH, 579 VUPKHSW, 580 VUPKLPX, 581 VUPKLSB, 582 VUPKLSH, 583 VUPKLSW, 584 XXBRD, 585 XXBRH, 586 XXBRQ, 587 XXBRW, 588 XXEXTRACTUW, 589 XXINSERTW, 590 XXMRGHW, 591 XXMRGLW, 592 XXPERM, 593 XXPERMR, 594 XXSLDWI, 595 XXSLDWIs, 596 XXSPLTIB, 597 XXSPLTW, 598 XXSPLTWs, 599 XXPERMDI, 600 XXPERMDIs, 601 VADDCUQ, 602 VADDECUQ, 603 VADDEUQM, 604 VADDUQM, 605 VMUL10CUQ, 606 VMUL10ECUQ, 607 VMUL10EUQ, 608 VMUL10UQ, 609 VSUBCUQ, 610 VSUBECUQ, 611 VSUBEUQM, 612 VSUBUQM, 613 XSCMPEXPQP, 614 XSCMPOQP, 615 XSCMPUQP, 616 XSTSTDCQP, 617 XSXSIGQP, 618 BCDCFN_rec, 619 BCDCFZ_rec, 620 BCDCPSGN_rec, 621 BCDCTN_rec, 622 BCDCTZ_rec, 623 BCDSETSGN_rec, 624 BCDS_rec, 625 BCDTRUNC_rec, 626 BCDUS_rec, 627 BCDUTRUNC_rec, 628 BCDADD_rec, 629 BCDSUB_rec 630)>; 631 632// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 633// superslice. That includes both exec pipelines (EXECO, EXECE) and one 634// dispatch. 635def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 636 (instrs 637 BCDSR_rec, 638 XSADDQP, 639 XSADDQPO, 640 XSCVDPQP, 641 XSCVQPDP, 642 XSCVQPDPO, 643 XSCVQPSDZ, 644 XSCVQPSWZ, 645 XSCVQPUDZ, 646 XSCVQPUWZ, 647 XSCVSDQP, 648 XSCVUDQP, 649 XSRQPI, 650 XSRQPIX, 651 XSRQPXP, 652 XSSUBQP, 653 XSSUBQPO 654)>; 655 656// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 657// superslice. That includes both exec pipelines (EXECO, EXECE) and one 658// dispatch. 659def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 660 (instrs 661 BCDCTSQ_rec 662)>; 663 664// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 665// superslice. That includes both exec pipelines (EXECO, EXECE) and one 666// dispatch. 667def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 668 (instrs 669 XSMADDQP, 670 XSMADDQPO, 671 XSMSUBQP, 672 XSMSUBQPO, 673 XSMULQP, 674 XSMULQPO, 675 XSNMADDQP, 676 XSNMADDQPO, 677 XSNMSUBQP, 678 XSNMSUBQPO 679)>; 680 681// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 682// superslice. That includes both exec pipelines (EXECO, EXECE) and one 683// dispatch. 684def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 685 (instrs 686 BCDCFSQ_rec 687)>; 688 689// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 690// superslice. That includes both exec pipelines (EXECO, EXECE) and one 691// dispatch. 692def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 693 (instrs 694 XSDIVQP, 695 XSDIVQPO 696)>; 697 698// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 699// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 700// dispatches. 701def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 702 (instrs 703 XSSQRTQP, 704 XSSQRTQPO 705)>; 706 707// 6 Cycle Load uses a single slice. 708def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 709 (instrs 710 (instregex "LXVL(L)?") 711)>; 712 713// 5 Cycle Load uses a single slice. 714def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 715 (instrs 716 (instregex "LVE(B|H|W)X$"), 717 (instregex "LVX(L)?"), 718 (instregex "LXSI(B|H)ZX$"), 719 LXSDX, 720 LXVB16X, 721 LXVD2X, 722 LXVWSX, 723 LXSIWZX, 724 LXV, 725 LXVX, 726 LXSD, 727 DFLOADf64, 728 XFLOADf64, 729 LIWZX 730)>; 731 732// 4 Cycle Load uses a single slice. 733def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 734 (instrs 735 (instregex "DCB(F|T|ST)(EP)?$"), 736 (instregex "DCBZ(L)?(EP)?$"), 737 (instregex "DCBTST(EP)?$"), 738 (instregex "CP_COPY(8)?$"), 739 (instregex "ICBI(EP)?$"), 740 (instregex "ICBT(LS)?$"), 741 (instregex "LBARX(L)?$"), 742 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 743 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 744 (instregex "LH(A|B)RX(L)?(8)?$"), 745 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 746 (instregex "LWARX(L)?$"), 747 (instregex "LWBRX(8)?$"), 748 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 749 CP_ABORT, 750 DARN, 751 EnforceIEIO, 752 ISYNC, 753 MSGSYNC, 754 TLBSYNC, 755 SYNC, 756 LMW, 757 LSWI 758)>; 759 760// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 761// superslice. 762def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 763 (instrs 764 LFIWZX, 765 LFDX, 766 LFD 767)>; 768 769// Cracked Load Instructions. 770// Load instructions that can be done in parallel. 771def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 772 DISP_PAIR_1C], 773 (instrs 774 SLBIA, 775 SLBIE, 776 SLBMFEE, 777 SLBMFEV, 778 SLBMTE, 779 TLBIEL 780)>; 781 782// Cracked Load Instruction. 783// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 784// operations can be run in parallel. 785def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 786 DISP_PAIR_1C, DISP_PAIR_1C], 787 (instrs 788 (instregex "L(W|H)ZU(X)?(8)?$") 789)>; 790 791// Cracked TEND Instruction. 792// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 793// operations can be run in parallel. 794def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 795 DISP_1C, DISP_1C], 796 (instrs 797 TEND 798)>; 799 800 801// Cracked Store Instruction 802// Consecutive Store and ALU instructions. The store is restricted and requires 803// three dispatches. 804def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 805 DISP_3SLOTS_1C, DISP_1C], 806 (instrs 807 (instregex "ST(B|H|W|D)CX$") 808)>; 809 810// Cracked Load Instruction. 811// Two consecutive load operations for a total of 8 cycles. 812def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 813 DISP_1C, DISP_1C], 814 (instrs 815 LDMX 816)>; 817 818// Cracked Load instruction. 819// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 820// operations cannot be done at the same time and so their latencies are added. 821def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 822 DISP_1C, DISP_1C], 823 (instrs 824 (instregex "LHA(X)?(8)?$"), 825 (instregex "CP_PASTE(8)?_rec$"), 826 (instregex "LWA(X)?(_32)?$"), 827 TCHECK 828)>; 829 830// Cracked Restricted Load instruction. 831// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 832// operations cannot be done at the same time and so their latencies are added. 833// Full 6 dispatches are required as this is both cracked and restricted. 834def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 835 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 836 (instrs 837 LFIWAX 838)>; 839 840// Cracked Load instruction. 841// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 842// operations cannot be done at the same time and so their latencies are added. 843// Full 4 dispatches are required as this is a cracked instruction. 844def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 845 (instrs 846 LXSIWAX, 847 LIWAX 848)>; 849 850// Cracked Load instruction. 851// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 852// cycles. The Load and ALU operations cannot be done at the same time and so 853// their latencies are added. 854// Full 6 dispatches are required as this is a restricted instruction. 855def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 856 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 857 (instrs 858 LFSX, 859 LFS 860)>; 861 862// Cracked Load instruction. 863// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 864// operations cannot be done at the same time and so their latencies are added. 865// Full 4 dispatches are required as this is a cracked instruction. 866def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 867 (instrs 868 LXSSP, 869 LXSSPX, 870 XFLOADf32, 871 DFLOADf32 872)>; 873 874// Cracked 3-Way Load Instruction 875// Load with two ALU operations that depend on each other 876def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 877 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 878 (instrs 879 (instregex "LHAU(X)?(8)?$"), 880 LWAUX 881)>; 882 883// Cracked Load that requires the PM resource. 884// Since the Load and the PM cannot be done at the same time the latencies are 885// added. Requires 8 cycles. Since the PM requires the full superslice we need 886// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 887// requires the remaining 1 dispatch. 888def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 889 DISP_1C, DISP_1C], 890 (instrs 891 LXVH8X, 892 LXVDSX, 893 LXVW4X 894)>; 895 896// Single slice Restricted store operation. The restricted operation requires 897// all three dispatches for the superslice. 898def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 899 (instrs 900 (instregex "STF(S|D|IWX|SX|DX)$"), 901 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 902 (instregex "STW(8)?$"), 903 (instregex "(D|X)FSTORE(f32|f64)$"), 904 (instregex "ST(W|H|D)BRX$"), 905 (instregex "ST(B|H|D)(8)?$"), 906 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 907 STIWX, 908 SLBIEG, 909 STMW, 910 STSWI, 911 TLBIE 912)>; 913 914// Vector Store Instruction 915// Requires the whole superslice and therefore requires one dispatch 916// as well as both the Even and Odd exec pipelines. 917def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 918 (instrs 919 (instregex "STVE(B|H|W)X$"), 920 (instregex "STVX(L)?$"), 921 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 922)>; 923 924// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 925// superslice. That includes both exec pipelines (EXECO, EXECE) and two 926// dispatches. 927def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 928 (instrs 929 (instregex "MTCTR(8)?(loop)?$"), 930 (instregex "MTLR(8)?$") 931)>; 932 933// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 934// superslice. That includes both exec pipelines (EXECO, EXECE) and two 935// dispatches. 936def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 937 (instrs 938 (instregex "M(T|F)VRSAVE(v)?$"), 939 (instregex "M(T|F)PMR$"), 940 (instregex "M(T|F)TB(8)?$"), 941 (instregex "MF(SPR|CTR|LR)(8)?$"), 942 (instregex "M(T|F)MSR(D)?$"), 943 (instregex "MTSPR(8)?$") 944)>; 945 946// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 947// superslice. That includes both exec pipelines (EXECO, EXECE) and two 948// dispatches. 949def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 950 (instrs 951 DIVW, 952 DIVWO, 953 DIVWU, 954 DIVWUO, 955 MODSW 956)>; 957 958// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 959// superslice. That includes both exec pipelines (EXECO, EXECE) and two 960// dispatches. 961def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 962 (instrs 963 DIVWE, 964 DIVWEO, 965 DIVD, 966 DIVDO, 967 DIVWEU, 968 DIVWEUO, 969 DIVDU, 970 DIVDUO, 971 MODSD, 972 MODUD, 973 MODUW 974)>; 975 976// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 977// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 978// dispatches. 979def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 980 (instrs 981 DIVDE, 982 DIVDEO, 983 DIVDEU, 984 DIVDEUO 985)>; 986 987// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 988// and one full superslice for the DIV operation since there is only one DIV per 989// superslice. Latency of DIV plus ALU is 26. 990def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 991 DISP_EVEN_1C, DISP_1C], 992 (instrs 993 (instregex "DIVW(U)?(O)?_rec$") 994)>; 995 996// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 997// and one full superslice for the DIV operation since there is only one DIV per 998// superslice. Latency of DIV plus ALU is 26. 999def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1000 DISP_EVEN_1C, DISP_1C], 1001 (instrs 1002 DIVD_rec, 1003 DIVDO_rec, 1004 DIVDU_rec, 1005 DIVDUO_rec, 1006 DIVWE_rec, 1007 DIVWEO_rec, 1008 DIVWEU_rec, 1009 DIVWEUO_rec 1010)>; 1011 1012// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1013// and one full superslice for the DIV operation since there is only one DIV per 1014// superslice. Latency of DIV plus ALU is 42. 1015def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1016 DISP_EVEN_1C, DISP_1C], 1017 (instrs 1018 DIVDE_rec, 1019 DIVDEO_rec, 1020 DIVDEU_rec, 1021 DIVDEUO_rec 1022)>; 1023 1024// CR access instructions in _BrMCR, IIC_BrMCRX. 1025 1026// Cracked, restricted, ALU operations. 1027// Here the two ALU ops can actually be done in parallel and therefore the 1028// latencies are not added together. Otherwise this is like having two 1029// instructions running together on two pipelines and 6 dispatches. ALU ops are 1030// 2 cycles each. 1031def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1032 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1033 (instrs 1034 MTCRF, 1035 MTCRF8 1036)>; 1037 1038// Cracked ALU operations. 1039// Here the two ALU ops can actually be done in parallel and therefore the 1040// latencies are not added together. Otherwise this is like having two 1041// instructions running together on two pipelines and 2 dispatches. ALU ops are 1042// 2 cycles each. 1043def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1044 DISP_1C, DISP_1C], 1045 (instrs 1046 (instregex "ADDC(8)?(O)?_rec$"), 1047 (instregex "SUBFC(8)?(O)?_rec$") 1048)>; 1049 1050// Cracked ALU operations. 1051// Two ALU ops can be done in parallel. 1052// One is three cycle ALU the ohter is a two cycle ALU. 1053// One of the ALU ops is restricted the other is not so we have a total of 1054// 5 dispatches. 1055def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1056 DISP_3SLOTS_1C, DISP_1C], 1057 (instrs 1058 (instregex "F(N)?ABS(D|S)_rec$"), 1059 (instregex "FCPSGN(D|S)_rec$"), 1060 (instregex "FNEG(D|S)_rec$"), 1061 FMR_rec 1062)>; 1063 1064// Cracked ALU operations. 1065// Here the two ALU ops can actually be done in parallel and therefore the 1066// latencies are not added together. Otherwise this is like having two 1067// instructions running together on two pipelines and 2 dispatches. 1068// ALU ops are 3 cycles each. 1069def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1070 DISP_1C, DISP_1C], 1071 (instrs 1072 MCRFS 1073)>; 1074 1075// Cracked Restricted ALU operations. 1076// Here the two ALU ops can actually be done in parallel and therefore the 1077// latencies are not added together. Otherwise this is like having two 1078// instructions running together on two pipelines and 6 dispatches. 1079// ALU ops are 3 cycles each. 1080def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1081 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1082 (instrs 1083 (instregex "MTFSF(b|_rec)?$"), 1084 (instregex "MTFSFI(_rec)?$"), 1085 MTFSFIb 1086)>; 1087 1088// Cracked instruction made of two ALU ops. 1089// The two ops cannot be done in parallel. 1090// One of the ALU ops is restricted and takes 3 dispatches. 1091def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1092 DISP_3SLOTS_1C, DISP_1C], 1093 (instrs 1094 (instregex "RLD(I)?C(R|L)_rec$"), 1095 (instregex "RLW(IMI|INM|NM)(8)?_rec$"), 1096 (instregex "SLW(8)?_rec$"), 1097 (instregex "SRAW(I)?_rec$"), 1098 (instregex "SRW(8)?_rec$"), 1099 RLDICL_32_rec, 1100 RLDIMI_rec 1101)>; 1102 1103// Cracked instruction made of two ALU ops. 1104// The two ops cannot be done in parallel. 1105// Both of the ALU ops are restricted and take 3 dispatches. 1106def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1107 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1108 (instrs 1109 (instregex "MFFS(L|CE|_rec)?$") 1110)>; 1111 1112// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1113// total of 6 cycles. All of the ALU operations are also restricted so each 1114// takes 3 dispatches for a total of 9. 1115def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1116 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1117 (instrs 1118 (instregex "MFCR(8)?$") 1119)>; 1120 1121// Cracked instruction made of two ALU ops. 1122// The two ops cannot be done in parallel. 1123def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1124 (instrs 1125 (instregex "EXTSWSLI_32_64_rec$"), 1126 (instregex "SRAD(I)?_rec$"), 1127 EXTSWSLI_rec, 1128 SLD_rec, 1129 SRD_rec, 1130 RLDIC_rec 1131)>; 1132 1133// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1134def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1135 (instrs 1136 FDIV 1137)>; 1138 1139// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1140def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1141 DISP_3SLOTS_1C, DISP_1C], 1142 (instrs 1143 FDIV_rec 1144)>; 1145 1146// 36 Cycle DP Instruction. 1147// Instruction can be done on a single slice. 1148def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1149 (instrs 1150 XSSQRTDP 1151)>; 1152 1153// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1154def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1155 (instrs 1156 FSQRT 1157)>; 1158 1159// 36 Cycle DP Vector Instruction. 1160def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1161 DISP_1C], 1162 (instrs 1163 XVSQRTDP 1164)>; 1165 1166// 27 Cycle DP Vector Instruction. 1167def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1168 DISP_1C], 1169 (instrs 1170 XVSQRTSP 1171)>; 1172 1173// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1174def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1175 DISP_3SLOTS_1C, DISP_1C], 1176 (instrs 1177 FSQRT_rec 1178)>; 1179 1180// 26 Cycle DP Instruction. 1181def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1182 (instrs 1183 XSSQRTSP 1184)>; 1185 1186// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1187def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1188 (instrs 1189 FSQRTS 1190)>; 1191 1192// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1193def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1194 DISP_3SLOTS_1C, DISP_1C], 1195 (instrs 1196 FSQRTS_rec 1197)>; 1198 1199// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1200def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1201 (instrs 1202 XSDIVDP 1203)>; 1204 1205// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1206def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1207 (instrs 1208 FDIVS 1209)>; 1210 1211// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1212def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1213 DISP_3SLOTS_1C, DISP_1C], 1214 (instrs 1215 FDIVS_rec 1216)>; 1217 1218// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1219def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1220 (instrs 1221 XSDIVSP 1222)>; 1223 1224// 24 Cycle DP Vector Instruction. Takes one full superslice. 1225// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1226// superslice. 1227def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1228 DISP_1C], 1229 (instrs 1230 XVDIVSP 1231)>; 1232 1233// 33 Cycle DP Vector Instruction. Takes one full superslice. 1234// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1235// superslice. 1236def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1237 DISP_1C], 1238 (instrs 1239 XVDIVDP 1240)>; 1241 1242// Instruction cracked into three pieces. One Load and two ALU operations. 1243// The Load and one of the ALU ops cannot be run at the same time and so the 1244// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1245// Both the load and the ALU that depends on it are restricted and so they take 1246// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1247// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1248def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1249 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1250 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1251 (instrs 1252 (instregex "LF(SU|SUX)$") 1253)>; 1254 1255// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1256// the store and so it can be run at the same time as the store. The store is 1257// also restricted. 1258def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1259 DISP_3SLOTS_1C, DISP_1C], 1260 (instrs 1261 (instregex "STF(S|D)U(X)?$"), 1262 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1263)>; 1264 1265// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1266// the load and so it can be run at the same time as the load. 1267def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1268 DISP_PAIR_1C, DISP_PAIR_1C], 1269 (instrs 1270 (instregex "LBZU(X)?(8)?$"), 1271 (instregex "LDU(X)?$") 1272)>; 1273 1274// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1275// the load and so it can be run at the same time as the load. The load is also 1276// restricted. 3 dispatches are from the restricted load while the other two 1277// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1278// is required for the ALU. 1279def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1280 DISP_3SLOTS_1C, DISP_1C], 1281 (instrs 1282 (instregex "LF(DU|DUX)$") 1283)>; 1284 1285// Crypto Instructions 1286 1287// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1288// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1289// dispatch. 1290def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1291 (instrs 1292 (instregex "VPMSUM(B|H|W|D)$"), 1293 (instregex "V(N)?CIPHER(LAST)?$"), 1294 VSBOX 1295)>; 1296 1297// Branch Instructions 1298 1299// Two Cycle Branch 1300def : InstRW<[P9_BR_2C, DISP_BR_1C], 1301 (instrs 1302 (instregex "BCCCTR(L)?(8)?$"), 1303 (instregex "BCCL(A|R|RL)?$"), 1304 (instregex "BCCTR(L)?(8)?(n)?$"), 1305 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1306 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1307 (instregex "BL(_TLS|_NOP)?(_RM)?$"), 1308 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"), 1309 (instregex "BLA(8|8_NOP)?(_RM)?$"), 1310 (instregex "BLR(8|L)?$"), 1311 (instregex "TAILB(A)?(8)?$"), 1312 (instregex "TAILBCTR(8)?$"), 1313 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1314 (instregex "BCLR(L)?(n)?$"), 1315 (instregex "BCTR(L)?(8)?(_RM)?$"), 1316 B, 1317 BA, 1318 BC, 1319 BCC, 1320 BCCA, 1321 BCL, 1322 BCLalways, 1323 BCLn, 1324 BCTRL8_LDinto_toc, 1325 BCTRL_LWZinto_toc, 1326 BCTRL8_LDinto_toc_RM, 1327 BCTRL_LWZinto_toc_RM, 1328 BCn, 1329 CTRL_DEP 1330)>; 1331 1332// Five Cycle Branch with a 2 Cycle ALU Op 1333// Operations must be done consecutively and not in parallel. 1334def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1335 (instrs 1336 ADDPCIS 1337)>; 1338 1339// Special Extracted Instructions For Atomics 1340 1341// Atomic Load 1342def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1343 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1344 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1345 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1346 (instrs 1347 (instregex "L(D|W)AT$") 1348)>; 1349 1350// Atomic Store 1351def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1352 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1353 (instrs 1354 (instregex "ST(D|W)AT$") 1355)>; 1356 1357// Signal Processing Engine (SPE) Instructions 1358// These instructions are not supported on Power 9 1359def : InstRW<[], 1360 (instrs 1361 BRINC, 1362 EVABS, 1363 EVEQV, 1364 EVMRA, 1365 EVNAND, 1366 EVNEG, 1367 (instregex "EVADD(I)?W$"), 1368 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1369 (instregex "EVAND(C)?$"), 1370 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1371 (instregex "EVCNTL(S|Z)W$"), 1372 (instregex "EVDIVW(S|U)$"), 1373 (instregex "EVEXTS(B|H)$"), 1374 (instregex "EVLD(H|W|D)(X)?$"), 1375 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1376 (instregex "EVLWHE(X)?$"), 1377 (instregex "EVLWHO(S|U)(X)?$"), 1378 (instregex "EVLW(H|W)SPLAT(X)?$"), 1379 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1380 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1381 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1382 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1383 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1384 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1385 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1386 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1387 (instregex "EVMWHUMI(A)?$"), 1388 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1389 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1390 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1391 (instregex "EVMWSSF(A|AA|AN)?$"), 1392 (instregex "EVMWUMI(A|AA|AN)?$"), 1393 (instregex "EV(N|X)?OR(C)?$"), 1394 (instregex "EVR(LW|LWI|NDW)$"), 1395 (instregex "EVSLW(I)?$"), 1396 (instregex "EVSPLAT(F)?I$"), 1397 (instregex "EVSRW(I)?(S|U)$"), 1398 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1399 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1400 (instregex "EVSUB(I)?FW$") 1401)> { let Unsupported = 1; } 1402 1403// General Instructions without scheduling support. 1404def : InstRW<[], 1405 (instrs 1406 (instregex "(H)?RFI(D)?$"), 1407 (instregex "DSS(ALL)?$"), 1408 (instregex "DST(ST)?(T)?(64)?$"), 1409 (instregex "ICBL(C|Q)$"), 1410 (instregex "L(W|H|B)EPX$"), 1411 (instregex "ST(W|H|B)EPX$"), 1412 (instregex "(L|ST)FDEPX$"), 1413 (instregex "M(T|F)SR(IN)?$"), 1414 (instregex "M(T|F)DCR$"), 1415 (instregex "NOP_GT_PWR(6|7)$"), 1416 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1417 (instregex "WRTEE(I)?$"), 1418 (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"), 1419 ATTN, 1420 CLRBHRB, 1421 MFBHRBE, 1422 MBAR, 1423 MSYNC, 1424 SLBSYNC, 1425 SLBFEE_rec, 1426 NAP, 1427 STOP, 1428 TRAP, 1429 RFCI, 1430 RFDI, 1431 RFMCI, 1432 SC, 1433 DCBA, 1434 DCBI, 1435 DCCCI, 1436 ICCCI, 1437 ADDEX, 1438 ADDEX8 1439)> { let Unsupported = 1; } 1440