1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLNAND, 68 XXLNOR, 69 XXLOR, 70 XXLORf, 71 XXLORC, 72 XXLXOR, 73 XXLXORdpz, 74 XXLXORspz, 75 XXLXORz, 76 XXSEL, 77 XSABSQP, 78 XSCPSGNQP, 79 XSIEXPQP, 80 XSNABSQP, 81 XSNEGQP, 82 XSXEXPQP 83)>; 84 85// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 86// single slice. However, since it is Restricted, it requires all 3 dispatches 87// (DISP) for that superslice. 88def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 89 (instrs 90 (instregex "TABORT(D|W)C(I)?$"), 91 (instregex "MTFSB(0|1)$"), 92 (instregex "MFFSC(D)?RN(I)?$"), 93 (instregex "CMPRB(8)?$"), 94 (instregex "TD(I)?$"), 95 (instregex "TW(I)?$"), 96 (instregex "FCMPU(S|D)$"), 97 (instregex "XSTSTDC(S|D)P$"), 98 FTDIV, 99 FTSQRT, 100 CMPEQB 101)>; 102 103// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 104def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 105 (instrs 106 (instregex "XSMAX(C|J)?DP$"), 107 (instregex "XSMIN(C|J)?DP$"), 108 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 109 (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"), 110 (instregex "POPCNT(D|W)$"), 111 (instregex "CMPB(8)?$"), 112 (instregex "SETB(8)?$"), 113 XSTDIVDP, 114 XSTSQRTDP, 115 XSXSIGDP, 116 XSCVSPDPN, 117 BPERMD 118)>; 119 120// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 121def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 122 (instrs 123 (instregex "S(L|R)D$"), 124 (instregex "SRAD(I)?$"), 125 (instregex "EXTSWSLI_32_64$"), 126 (instregex "MFV(S)?RD$"), 127 (instregex "MTVSRD$"), 128 (instregex "MTVSRW(A|Z)$"), 129 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 130 (instregex "CMP(L)?D(I)?$"), 131 (instregex "SUBF(I)?C(8)?$"), 132 (instregex "ANDI(S)?o(8)?$"), 133 (instregex "ADDC(8)?$"), 134 (instregex "ADDIC(8)?(o)?$"), 135 (instregex "ADD(8|4)(o)?$"), 136 (instregex "ADD(E|ME|ZE)(8)?(o)?$"), 137 (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"), 138 (instregex "NEG(8)?(o)?$"), 139 (instregex "POPCNTB$"), 140 (instregex "ADD(I|IS)?(8)?$"), 141 (instregex "LI(S)?(8)?$"), 142 (instregex "(X)?OR(I|IS)?(8)?(o)?$"), 143 (instregex "NAND(8)?(o)?$"), 144 (instregex "AND(C)?(8)?(o)?$"), 145 (instregex "NOR(8)?(o)?$"), 146 (instregex "OR(C)?(8)?(o)?$"), 147 (instregex "EQV(8)?(o)?$"), 148 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), 149 (instregex "ADD(4|8)(TLS)?(_)?$"), 150 (instregex "NEG(8)?$"), 151 (instregex "ADDI(S)?toc(HA|L)$"), 152 COPY, 153 MCRF, 154 MCRXRX, 155 XSNABSDP, 156 XSXEXPDP, 157 XSABSDP, 158 XSNEGDP, 159 XSCPSGNDP, 160 MFVSRWZ, 161 EXTSWSLI, 162 SRADI_32, 163 RLDIC, 164 RFEBB, 165 LA, 166 TBEGIN, 167 TRECHKPT, 168 NOP, 169 WAIT 170)>; 171 172// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 173// single slice. However, since it is Restricted, it requires all 3 dispatches 174// (DISP) for that superslice. 175def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 176 (instrs 177 (instregex "RLDC(L|R)$"), 178 (instregex "RLWIMI(8)?$"), 179 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 180 (instregex "M(F|T)OCRF(8)?$"), 181 (instregex "CR(6)?(UN)?SET$"), 182 (instregex "CR(N)?(OR|AND)(C)?$"), 183 (instregex "S(L|R)W(8)?$"), 184 (instregex "RLW(INM|NM)(8)?$"), 185 (instregex "F(N)?ABS(D|S)$"), 186 (instregex "FNEG(D|S)$"), 187 (instregex "FCPSGN(D|S)$"), 188 (instregex "SRAW(I)?$"), 189 (instregex "ISEL(8)?$"), 190 RLDIMI, 191 XSIEXPDP, 192 FMR, 193 CREQV, 194 CRXOR, 195 TRECLAIM, 196 TSR, 197 TABORT 198)>; 199 200// Three cycle ALU vector operation that uses an entire superslice. 201// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 202// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 203def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 204 (instrs 205 (instregex "M(T|F)VSCR$"), 206 (instregex "VCMPNEZ(B|H|W)$"), 207 (instregex "VCMPEQU(B|H|W|D)$"), 208 (instregex "VCMPNE(B|H|W)$"), 209 (instregex "VABSDU(B|H|W)$"), 210 (instregex "VADDU(B|H|W)S$"), 211 (instregex "VAVG(S|U)(B|H|W)$"), 212 (instregex "VCMP(EQ|GE|GT)FP(o)?$"), 213 (instregex "VCMPBFP(o)?$"), 214 (instregex "VC(L|T)Z(B|H|W|D)$"), 215 (instregex "VADDS(B|H|W)S$"), 216 (instregex "V(MIN|MAX)FP$"), 217 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 218 VBPERMD, 219 VADDCUW, 220 VPOPCNTW, 221 VPOPCNTD, 222 VPRTYBD, 223 VPRTYBW, 224 VSHASIGMAD, 225 VSHASIGMAW, 226 VSUBSBS, 227 VSUBSHS, 228 VSUBSWS, 229 VSUBUBS, 230 VSUBUHS, 231 VSUBUWS, 232 VSUBCUW, 233 VCMPGTSB, 234 VCMPGTSBo, 235 VCMPGTSD, 236 VCMPGTSDo, 237 VCMPGTSH, 238 VCMPGTSHo, 239 VCMPGTSW, 240 VCMPGTSWo, 241 VCMPGTUB, 242 VCMPGTUBo, 243 VCMPGTUD, 244 VCMPGTUDo, 245 VCMPGTUH, 246 VCMPGTUHo, 247 VCMPGTUW, 248 VCMPGTUWo, 249 VCMPNEBo, 250 VCMPNEHo, 251 VCMPNEWo, 252 VCMPNEZBo, 253 VCMPNEZHo, 254 VCMPNEZWo, 255 VCMPEQUBo, 256 VCMPEQUDo, 257 VCMPEQUHo, 258 VCMPEQUWo, 259 XVCMPEQDP, 260 XVCMPEQDPo, 261 XVCMPEQSP, 262 XVCMPEQSPo, 263 XVCMPGEDP, 264 XVCMPGEDPo, 265 XVCMPGESP, 266 XVCMPGESPo, 267 XVCMPGTDP, 268 XVCMPGTDPo, 269 XVCMPGTSP, 270 XVCMPGTSPo, 271 XVMAXDP, 272 XVMAXSP, 273 XVMINDP, 274 XVMINSP, 275 XVTDIVDP, 276 XVTDIVSP, 277 XVTSQRTDP, 278 XVTSQRTSP, 279 XVTSTDCDP, 280 XVTSTDCSP, 281 XVXSIGDP, 282 XVXSIGSP 283)>; 284 285// 7 cycle DP vector operation that uses an entire superslice. 286// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 287// EXECO) and all three dispatches (DISP) to the given superslice. 288def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 289 (instrs 290 VADDFP, 291 VCTSXS, 292 VCTSXS_0, 293 VCTUXS, 294 VCTUXS_0, 295 VEXPTEFP, 296 VLOGEFP, 297 VMADDFP, 298 VMHADDSHS, 299 VNMSUBFP, 300 VREFP, 301 VRFIM, 302 VRFIN, 303 VRFIP, 304 VRFIZ, 305 VRSQRTEFP, 306 VSUBFP, 307 XVADDDP, 308 XVADDSP, 309 XVCVDPSP, 310 XVCVDPSXDS, 311 XVCVDPSXWS, 312 XVCVDPUXDS, 313 XVCVDPUXWS, 314 XVCVHPSP, 315 XVCVSPDP, 316 XVCVSPHP, 317 XVCVSPSXDS, 318 XVCVSPSXWS, 319 XVCVSPUXDS, 320 XVCVSPUXWS, 321 XVCVSXDDP, 322 XVCVSXDSP, 323 XVCVSXWDP, 324 XVCVSXWSP, 325 XVCVUXDDP, 326 XVCVUXDSP, 327 XVCVUXWDP, 328 XVCVUXWSP, 329 XVMADDADP, 330 XVMADDASP, 331 XVMADDMDP, 332 XVMADDMSP, 333 XVMSUBADP, 334 XVMSUBASP, 335 XVMSUBMDP, 336 XVMSUBMSP, 337 XVMULDP, 338 XVMULSP, 339 XVNMADDADP, 340 XVNMADDASP, 341 XVNMADDMDP, 342 XVNMADDMSP, 343 XVNMSUBADP, 344 XVNMSUBASP, 345 XVNMSUBMDP, 346 XVNMSUBMSP, 347 XVRDPI, 348 XVRDPIC, 349 XVRDPIM, 350 XVRDPIP, 351 XVRDPIZ, 352 XVREDP, 353 XVRESP, 354 XVRSPI, 355 XVRSPIC, 356 XVRSPIM, 357 XVRSPIP, 358 XVRSPIZ, 359 XVRSQRTEDP, 360 XVRSQRTESP, 361 XVSUBDP, 362 XVSUBSP, 363 VCFSX, 364 VCFSX_0, 365 VCFUX, 366 VCFUX_0, 367 VMHRADDSHS, 368 VMLADDUHM, 369 VMSUMMBM, 370 VMSUMSHM, 371 VMSUMSHS, 372 VMSUMUBM, 373 VMSUMUHM, 374 VMSUMUHS, 375 VMULESB, 376 VMULESH, 377 VMULESW, 378 VMULEUB, 379 VMULEUH, 380 VMULEUW, 381 VMULOSB, 382 VMULOSH, 383 VMULOSW, 384 VMULOUB, 385 VMULOUH, 386 VMULOUW, 387 VMULUWM, 388 VSUM2SWS, 389 VSUM4SBS, 390 VSUM4SHS, 391 VSUM4UBS, 392 VSUMSWS 393)>; 394 395// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 396// dispatch units for the superslice. 397def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 398 (instrs 399 (instregex "MADD(HD|HDU|LD|LD8)$"), 400 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$") 401)>; 402 403// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 404// dispatch units for the superslice. 405def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 406 (instrs 407 FRSP, 408 (instregex "FRI(N|P|Z|M)(D|S)$"), 409 (instregex "FRE(S)?$"), 410 (instregex "FADD(S)?$"), 411 (instregex "FMSUB(S)?$"), 412 (instregex "FMADD(S)?$"), 413 (instregex "FSUB(S)?$"), 414 (instregex "FCFID(U)?(S)?$"), 415 (instregex "FCTID(U)?(Z)?$"), 416 (instregex "FCTIW(U)?(Z)?$"), 417 (instregex "FRSQRTE(S)?$"), 418 FNMADDS, 419 FNMADD, 420 FNMSUBS, 421 FNMSUB, 422 FSELD, 423 FSELS, 424 FMULS, 425 FMUL, 426 XSMADDADP, 427 XSMADDASP, 428 XSMADDMDP, 429 XSMADDMSP, 430 XSMSUBADP, 431 XSMSUBASP, 432 XSMSUBMDP, 433 XSMSUBMSP, 434 XSMULDP, 435 XSMULSP, 436 XSNMADDADP, 437 XSNMADDASP, 438 XSNMADDMDP, 439 XSNMADDMSP, 440 XSNMSUBADP, 441 XSNMSUBASP, 442 XSNMSUBMDP, 443 XSNMSUBMSP 444)>; 445 446// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 447// These operations can be done in parallel. The DP is restricted so we need a 448// full 4 dispatches. 449def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 450 DISP_3SLOTS_1C, DISP_1C], 451 (instrs 452 (instregex "FSEL(D|S)o$") 453)>; 454 455// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 456def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 457 DISP_3SLOTS_1C, DISP_1C], 458 (instrs 459 (instregex "MUL(H|L)(D|W)(U)?o$") 460)>; 461 462// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 463// These operations must be done sequentially.The DP is restricted so we need a 464// full 4 dispatches. 465def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 466 DISP_3SLOTS_1C, DISP_1C], 467 (instrs 468 (instregex "FRI(N|P|Z|M)(D|S)o$"), 469 (instregex "FRE(S)?o$"), 470 (instregex "FADD(S)?o$"), 471 (instregex "FSUB(S)?o$"), 472 (instregex "F(N)?MSUB(S)?o$"), 473 (instregex "F(N)?MADD(S)?o$"), 474 (instregex "FCFID(U)?(S)?o$"), 475 (instregex "FCTID(U)?(Z)?o$"), 476 (instregex "FCTIW(U)?(Z)?o$"), 477 (instregex "FMUL(S)?o$"), 478 (instregex "FRSQRTE(S)?o$"), 479 FRSPo 480)>; 481 482// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 483def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 484 (instrs 485 XSADDDP, 486 XSADDSP, 487 XSCVDPHP, 488 XSCVDPSP, 489 XSCVDPSXDS, 490 XSCVDPSXDSs, 491 XSCVDPSXWS, 492 XSCVDPUXDS, 493 XSCVDPUXDSs, 494 XSCVDPUXWS, 495 XSCVDPSXWSs, 496 XSCVDPUXWSs, 497 XSCVHPDP, 498 XSCVSPDP, 499 XSCVSXDDP, 500 XSCVSXDSP, 501 XSCVUXDDP, 502 XSCVUXDSP, 503 XSRDPI, 504 XSRDPIC, 505 XSRDPIM, 506 XSRDPIP, 507 XSRDPIZ, 508 XSREDP, 509 XSRESP, 510 XSRSQRTEDP, 511 XSRSQRTESP, 512 XSSUBDP, 513 XSSUBSP, 514 XSCVDPSPN, 515 XSRSP 516)>; 517 518// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 519// superslice. That includes both exec pipelines (EXECO, EXECE) and one 520// dispatch. 521def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 522 (instrs 523 (instregex "LVS(L|R)$"), 524 (instregex "VSPLTIS(W|H|B)$"), 525 (instregex "VSPLT(W|H|B)(s)?$"), 526 (instregex "V_SETALLONES(B|H)?$"), 527 (instregex "VEXTRACTU(B|H|W)$"), 528 (instregex "VINSERT(B|H|W|D)$"), 529 MFVSRLD, 530 MTVSRWS, 531 VBPERMQ, 532 VCLZLSBB, 533 VCTZLSBB, 534 VEXTRACTD, 535 VEXTUBLX, 536 VEXTUBRX, 537 VEXTUHLX, 538 VEXTUHRX, 539 VEXTUWLX, 540 VEXTUWRX, 541 VGBBD, 542 VMRGHB, 543 VMRGHH, 544 VMRGHW, 545 VMRGLB, 546 VMRGLH, 547 VMRGLW, 548 VPERM, 549 VPERMR, 550 VPERMXOR, 551 VPKPX, 552 VPKSDSS, 553 VPKSDUS, 554 VPKSHSS, 555 VPKSHUS, 556 VPKSWSS, 557 VPKSWUS, 558 VPKUDUM, 559 VPKUDUS, 560 VPKUHUM, 561 VPKUHUS, 562 VPKUWUM, 563 VPKUWUS, 564 VPRTYBQ, 565 VSL, 566 VSLDOI, 567 VSLO, 568 VSLV, 569 VSR, 570 VSRO, 571 VSRV, 572 VUPKHPX, 573 VUPKHSB, 574 VUPKHSH, 575 VUPKHSW, 576 VUPKLPX, 577 VUPKLSB, 578 VUPKLSH, 579 VUPKLSW, 580 XXBRD, 581 XXBRH, 582 XXBRQ, 583 XXBRW, 584 XXEXTRACTUW, 585 XXINSERTW, 586 XXMRGHW, 587 XXMRGLW, 588 XXPERM, 589 XXPERMR, 590 XXSLDWI, 591 XXSLDWIs, 592 XXSPLTIB, 593 XXSPLTW, 594 XXSPLTWs, 595 XXPERMDI, 596 XXPERMDIs, 597 VADDCUQ, 598 VADDECUQ, 599 VADDEUQM, 600 VADDUQM, 601 VMUL10CUQ, 602 VMUL10ECUQ, 603 VMUL10EUQ, 604 VMUL10UQ, 605 VSUBCUQ, 606 VSUBECUQ, 607 VSUBEUQM, 608 VSUBUQM, 609 XSCMPEXPQP, 610 XSCMPOQP, 611 XSCMPUQP, 612 XSTSTDCQP, 613 XSXSIGQP, 614 BCDCFNo, 615 BCDCFZo, 616 BCDCPSGNo, 617 BCDCTNo, 618 BCDCTZo, 619 BCDSETSGNo, 620 BCDSo, 621 BCDTRUNCo, 622 BCDUSo, 623 BCDUTRUNCo 624)>; 625 626// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 627// superslice. That includes both exec pipelines (EXECO, EXECE) and one 628// dispatch. 629def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 630 (instrs 631 BCDSRo, 632 XSADDQP, 633 XSADDQPO, 634 XSCVDPQP, 635 XSCVQPDP, 636 XSCVQPDPO, 637 XSCVQPSDZ, 638 XSCVQPSWZ, 639 XSCVQPUDZ, 640 XSCVQPUWZ, 641 XSCVSDQP, 642 XSCVUDQP, 643 XSRQPI, 644 XSRQPIX, 645 XSRQPXP, 646 XSSUBQP, 647 XSSUBQPO 648)>; 649 650// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 651// superslice. That includes both exec pipelines (EXECO, EXECE) and one 652// dispatch. 653def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 654 (instrs 655 BCDCTSQo 656)>; 657 658// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 659// superslice. That includes both exec pipelines (EXECO, EXECE) and one 660// dispatch. 661def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 662 (instrs 663 XSMADDQP, 664 XSMADDQPO, 665 XSMSUBQP, 666 XSMSUBQPO, 667 XSMULQP, 668 XSMULQPO, 669 XSNMADDQP, 670 XSNMADDQPO, 671 XSNMSUBQP, 672 XSNMSUBQPO 673)>; 674 675// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 676// superslice. That includes both exec pipelines (EXECO, EXECE) and one 677// dispatch. 678def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 679 (instrs 680 BCDCFSQo 681)>; 682 683// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 684// superslice. That includes both exec pipelines (EXECO, EXECE) and one 685// dispatch. 686def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 687 (instrs 688 XSDIVQP, 689 XSDIVQPO 690)>; 691 692// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 693// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 694// dispatches. 695def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 696 (instrs 697 XSSQRTQP, 698 XSSQRTQPO 699)>; 700 701// 6 Cycle Load uses a single slice. 702def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 703 (instrs 704 (instregex "LXVL(L)?") 705)>; 706 707// 5 Cycle Load uses a single slice. 708def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 709 (instrs 710 (instregex "LVE(B|H|W)X$"), 711 (instregex "LVX(L)?"), 712 (instregex "LXSI(B|H)ZX$"), 713 LXSDX, 714 LXVB16X, 715 LXVD2X, 716 LXVWSX, 717 LXSIWZX, 718 LXV, 719 LXVX, 720 LXSD, 721 DFLOADf64, 722 XFLOADf64, 723 LIWZX 724)>; 725 726// 4 Cycle Load uses a single slice. 727def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 728 (instrs 729 (instregex "DCB(F|T|ST)(EP)?$"), 730 (instregex "DCBZ(L)?(EP)?$"), 731 (instregex "DCBTST(EP)?$"), 732 (instregex "CP_COPY(8)?$"), 733 (instregex "CP_PASTE(8)?$"), 734 (instregex "ICBI(EP)?$"), 735 (instregex "ICBT(LS)?$"), 736 (instregex "LBARX(L)?$"), 737 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 738 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 739 (instregex "LH(A|B)RX(L)?(8)?$"), 740 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 741 (instregex "LWARX(L)?$"), 742 (instregex "LWBRX(8)?$"), 743 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 744 CP_ABORT, 745 DARN, 746 EnforceIEIO, 747 ISYNC, 748 MSGSYNC, 749 TLBSYNC, 750 SYNC, 751 LMW, 752 LSWI 753)>; 754 755// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 756// superslice. 757def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 758 (instrs 759 LFIWZX, 760 LFDX, 761 LFD 762)>; 763 764// Cracked Load Instructions. 765// Load instructions that can be done in parallel. 766def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 767 DISP_PAIR_1C], 768 (instrs 769 SLBIA, 770 SLBIE, 771 SLBMFEE, 772 SLBMFEV, 773 SLBMTE, 774 TLBIEL 775)>; 776 777// Cracked Load Instruction. 778// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 779// operations can be run in parallel. 780def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 781 DISP_PAIR_1C, DISP_PAIR_1C], 782 (instrs 783 (instregex "L(W|H)ZU(X)?(8)?$") 784)>; 785 786// Cracked TEND Instruction. 787// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 788// operations can be run in parallel. 789def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 790 DISP_1C, DISP_1C], 791 (instrs 792 TEND 793)>; 794 795 796// Cracked Store Instruction 797// Consecutive Store and ALU instructions. The store is restricted and requires 798// three dispatches. 799def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 800 DISP_3SLOTS_1C, DISP_1C], 801 (instrs 802 (instregex "ST(B|H|W|D)CX$") 803)>; 804 805// Cracked Load Instruction. 806// Two consecutive load operations for a total of 8 cycles. 807def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 808 DISP_1C, DISP_1C], 809 (instrs 810 LDMX 811)>; 812 813// Cracked Load instruction. 814// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 815// operations cannot be done at the same time and so their latencies are added. 816def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 817 DISP_1C, DISP_1C], 818 (instrs 819 (instregex "LHA(X)?(8)?$"), 820 (instregex "CP_PASTE(8)?o$"), 821 (instregex "LWA(X)?(_32)?$"), 822 TCHECK 823)>; 824 825// Cracked Restricted Load instruction. 826// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 827// operations cannot be done at the same time and so their latencies are added. 828// Full 6 dispatches are required as this is both cracked and restricted. 829def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 830 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 831 (instrs 832 LFIWAX 833)>; 834 835// Cracked Load instruction. 836// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 837// operations cannot be done at the same time and so their latencies are added. 838// Full 4 dispatches are required as this is a cracked instruction. 839def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 840 (instrs 841 LXSIWAX, 842 LIWAX 843)>; 844 845// Cracked Load instruction. 846// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 847// cycles. The Load and ALU operations cannot be done at the same time and so 848// their latencies are added. 849// Full 6 dispatches are required as this is a restricted instruction. 850def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 851 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 852 (instrs 853 LFSX, 854 LFS 855)>; 856 857// Cracked Load instruction. 858// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 859// operations cannot be done at the same time and so their latencies are added. 860// Full 4 dispatches are required as this is a cracked instruction. 861def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 862 (instrs 863 LXSSP, 864 LXSSPX, 865 XFLOADf32, 866 DFLOADf32 867)>; 868 869// Cracked 3-Way Load Instruction 870// Load with two ALU operations that depend on each other 871def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 872 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 873 (instrs 874 (instregex "LHAU(X)?(8)?$"), 875 LWAUX 876)>; 877 878// Cracked Load that requires the PM resource. 879// Since the Load and the PM cannot be done at the same time the latencies are 880// added. Requires 8 cycles. Since the PM requires the full superslice we need 881// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 882// requires the remaining 1 dispatch. 883def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 884 DISP_1C, DISP_1C], 885 (instrs 886 LXVH8X, 887 LXVDSX, 888 LXVW4X 889)>; 890 891// Single slice Restricted store operation. The restricted operation requires 892// all three dispatches for the superslice. 893def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 894 (instrs 895 (instregex "STF(S|D|IWX|SX|DX)$"), 896 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 897 (instregex "STW(8)?$"), 898 (instregex "(D|X)FSTORE(f32|f64)$"), 899 (instregex "ST(W|H|D)BRX$"), 900 (instregex "ST(B|H|D)(8)?$"), 901 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 902 STIWX, 903 SLBIEG, 904 STMW, 905 STSWI, 906 TLBIE 907)>; 908 909// Vector Store Instruction 910// Requires the whole superslice and therefore requires one dispatch 911// as well as both the Even and Odd exec pipelines. 912def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 913 (instrs 914 (instregex "STVE(B|H|W)X$"), 915 (instregex "STVX(L)?$"), 916 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 917)>; 918 919// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 920// superslice. That includes both exec pipelines (EXECO, EXECE) and two 921// dispatches. 922def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 923 (instrs 924 (instregex "MTCTR(8)?(loop)?$"), 925 (instregex "MTLR(8)?$") 926)>; 927 928// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 929// superslice. That includes both exec pipelines (EXECO, EXECE) and two 930// dispatches. 931def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 932 (instrs 933 (instregex "M(T|F)VRSAVE(v)?$"), 934 (instregex "M(T|F)PMR$"), 935 (instregex "M(T|F)TB(8)?$"), 936 (instregex "MF(SPR|CTR|LR)(8)?$"), 937 (instregex "M(T|F)MSR(D)?$"), 938 (instregex "MTSPR(8)?$") 939)>; 940 941// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 942// superslice. That includes both exec pipelines (EXECO, EXECE) and two 943// dispatches. 944def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 945 (instrs 946 DIVW, 947 DIVWU, 948 MODSW 949)>; 950 951// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 952// superslice. That includes both exec pipelines (EXECO, EXECE) and two 953// dispatches. 954def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 955 (instrs 956 DIVWE, 957 DIVD, 958 DIVWEU, 959 DIVDU, 960 MODSD, 961 MODUD, 962 MODUW 963)>; 964 965// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 966// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 967// dispatches. 968def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 969 (instrs 970 DIVDE, 971 DIVDEU 972)>; 973 974// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 975// and one full superslice for the DIV operation since there is only one DIV per 976// superslice. Latency of DIV plus ALU is 26. 977def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 978 DISP_EVEN_1C, DISP_1C], 979 (instrs 980 (instregex "DIVW(U)?(O)?o$") 981)>; 982 983// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 984// and one full superslice for the DIV operation since there is only one DIV per 985// superslice. Latency of DIV plus ALU is 26. 986def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 987 DISP_EVEN_1C, DISP_1C], 988 (instrs 989 DIVDo, 990 DIVDUo, 991 DIVWEo, 992 DIVWEUo 993)>; 994 995// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 996// and one full superslice for the DIV operation since there is only one DIV per 997// superslice. Latency of DIV plus ALU is 42. 998def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 999 DISP_EVEN_1C, DISP_1C], 1000 (instrs 1001 DIVDEo, 1002 DIVDEUo 1003)>; 1004 1005// CR access instructions in _BrMCR, IIC_BrMCRX. 1006 1007// Cracked, restricted, ALU operations. 1008// Here the two ALU ops can actually be done in parallel and therefore the 1009// latencies are not added together. Otherwise this is like having two 1010// instructions running together on two pipelines and 6 dispatches. ALU ops are 1011// 2 cycles each. 1012def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1013 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1014 (instrs 1015 MTCRF, 1016 MTCRF8 1017)>; 1018 1019// Cracked ALU operations. 1020// Here the two ALU ops can actually be done in parallel and therefore the 1021// latencies are not added together. Otherwise this is like having two 1022// instructions running together on two pipelines and 2 dispatches. ALU ops are 1023// 2 cycles each. 1024def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1025 DISP_1C, DISP_1C], 1026 (instrs 1027 (instregex "ADDC(8)?o$"), 1028 (instregex "SUBFC(8)?o$") 1029)>; 1030 1031// Cracked ALU operations. 1032// Two ALU ops can be done in parallel. 1033// One is three cycle ALU the ohter is a two cycle ALU. 1034// One of the ALU ops is restricted the other is not so we have a total of 1035// 5 dispatches. 1036def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1037 DISP_3SLOTS_1C, DISP_1C], 1038 (instrs 1039 (instregex "F(N)?ABS(D|S)o$"), 1040 (instregex "FCPSGN(D|S)o$"), 1041 (instregex "FNEG(D|S)o$"), 1042 FMRo 1043)>; 1044 1045// Cracked ALU operations. 1046// Here the two ALU ops can actually be done in parallel and therefore the 1047// latencies are not added together. Otherwise this is like having two 1048// instructions running together on two pipelines and 2 dispatches. 1049// ALU ops are 3 cycles each. 1050def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1051 DISP_1C, DISP_1C], 1052 (instrs 1053 MCRFS 1054)>; 1055 1056// Cracked Restricted ALU operations. 1057// Here the two ALU ops can actually be done in parallel and therefore the 1058// latencies are not added together. Otherwise this is like having two 1059// instructions running together on two pipelines and 6 dispatches. 1060// ALU ops are 3 cycles each. 1061def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1062 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1063 (instrs 1064 (instregex "MTFSF(b|o)?$"), 1065 (instregex "MTFSFI(o)?$") 1066)>; 1067 1068// Cracked instruction made of two ALU ops. 1069// The two ops cannot be done in parallel. 1070// One of the ALU ops is restricted and takes 3 dispatches. 1071def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1072 DISP_3SLOTS_1C, DISP_1C], 1073 (instrs 1074 (instregex "RLD(I)?C(R|L)o$"), 1075 (instregex "RLW(IMI|INM|NM)(8)?o$"), 1076 (instregex "SLW(8)?o$"), 1077 (instregex "SRAW(I)?o$"), 1078 (instregex "SRW(8)?o$"), 1079 RLDICL_32o, 1080 RLDIMIo 1081)>; 1082 1083// Cracked instruction made of two ALU ops. 1084// The two ops cannot be done in parallel. 1085// Both of the ALU ops are restricted and take 3 dispatches. 1086def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1087 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1088 (instrs 1089 (instregex "MFFS(L|CE|o)?$") 1090)>; 1091 1092// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1093// total of 6 cycles. All of the ALU operations are also restricted so each 1094// takes 3 dispatches for a total of 9. 1095def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1096 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1097 (instrs 1098 (instregex "MFCR(8)?$") 1099)>; 1100 1101// Cracked instruction made of two ALU ops. 1102// The two ops cannot be done in parallel. 1103def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1104 (instrs 1105 (instregex "EXTSWSLI_32_64o$"), 1106 (instregex "SRAD(I)?o$"), 1107 EXTSWSLIo, 1108 SLDo, 1109 SRDo, 1110 RLDICo 1111)>; 1112 1113// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1114def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1115 (instrs 1116 FDIV 1117)>; 1118 1119// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1120def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1121 DISP_3SLOTS_1C, DISP_1C], 1122 (instrs 1123 FDIVo 1124)>; 1125 1126// 36 Cycle DP Instruction. 1127// Instruction can be done on a single slice. 1128def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1129 (instrs 1130 XSSQRTDP 1131)>; 1132 1133// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1134def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1135 (instrs 1136 FSQRT 1137)>; 1138 1139// 36 Cycle DP Vector Instruction. 1140def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1141 DISP_1C], 1142 (instrs 1143 XVSQRTDP 1144)>; 1145 1146// 27 Cycle DP Vector Instruction. 1147def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1148 DISP_1C], 1149 (instrs 1150 XVSQRTSP 1151)>; 1152 1153// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1154def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1155 DISP_3SLOTS_1C, DISP_1C], 1156 (instrs 1157 FSQRTo 1158)>; 1159 1160// 26 Cycle DP Instruction. 1161def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1162 (instrs 1163 XSSQRTSP 1164)>; 1165 1166// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1167def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1168 (instrs 1169 FSQRTS 1170)>; 1171 1172// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1173def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1174 DISP_3SLOTS_1C, DISP_1C], 1175 (instrs 1176 FSQRTSo 1177)>; 1178 1179// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1180def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1181 (instrs 1182 XSDIVDP 1183)>; 1184 1185// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1186def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1187 (instrs 1188 FDIVS 1189)>; 1190 1191// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1192def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1193 DISP_3SLOTS_1C, DISP_1C], 1194 (instrs 1195 FDIVSo 1196)>; 1197 1198// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1199def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1200 (instrs 1201 XSDIVSP 1202)>; 1203 1204// 24 Cycle DP Vector Instruction. Takes one full superslice. 1205// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1206// superslice. 1207def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1208 DISP_1C], 1209 (instrs 1210 XVDIVSP 1211)>; 1212 1213// 33 Cycle DP Vector Instruction. Takes one full superslice. 1214// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1215// superslice. 1216def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1217 DISP_1C], 1218 (instrs 1219 XVDIVDP 1220)>; 1221 1222// Instruction cracked into three pieces. One Load and two ALU operations. 1223// The Load and one of the ALU ops cannot be run at the same time and so the 1224// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1225// Both the load and the ALU that depends on it are restricted and so they take 1226// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1227// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1228def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1229 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1230 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1231 (instrs 1232 (instregex "LF(SU|SUX)$") 1233)>; 1234 1235// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1236// the store and so it can be run at the same time as the store. The store is 1237// also restricted. 1238def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1239 DISP_3SLOTS_1C, DISP_1C], 1240 (instrs 1241 (instregex "STF(S|D)U(X)?$"), 1242 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1243)>; 1244 1245// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1246// the load and so it can be run at the same time as the load. 1247def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1248 DISP_PAIR_1C, DISP_PAIR_1C], 1249 (instrs 1250 (instregex "LBZU(X)?(8)?$"), 1251 (instregex "LDU(X)?$") 1252)>; 1253 1254// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1255// the load and so it can be run at the same time as the load. The load is also 1256// restricted. 3 dispatches are from the restricted load while the other two 1257// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1258// is required for the ALU. 1259def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1260 DISP_3SLOTS_1C, DISP_1C], 1261 (instrs 1262 (instregex "LF(DU|DUX)$") 1263)>; 1264 1265// Crypto Instructions 1266 1267// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1268// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1269// dispatch. 1270def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1271 (instrs 1272 (instregex "VPMSUM(B|H|W|D)$"), 1273 (instregex "V(N)?CIPHER(LAST)?$"), 1274 VSBOX 1275)>; 1276 1277// Branch Instructions 1278 1279// Two Cycle Branch 1280def : InstRW<[P9_BR_2C, DISP_BR_1C], 1281 (instrs 1282 (instregex "BCCCTR(L)?(8)?$"), 1283 (instregex "BCCL(A|R|RL)?$"), 1284 (instregex "BCCTR(L)?(8)?(n)?$"), 1285 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1286 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1287 (instregex "BL(_TLS|_NOP)?$"), 1288 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1289 (instregex "BLA(8|8_NOP)?$"), 1290 (instregex "BLR(8|L)?$"), 1291 (instregex "TAILB(A)?(8)?$"), 1292 (instregex "TAILBCTR(8)?$"), 1293 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1294 (instregex "BCLR(L)?(n)?$"), 1295 (instregex "BCTR(L)?(8)?$"), 1296 B, 1297 BA, 1298 BC, 1299 BCC, 1300 BCCA, 1301 BCL, 1302 BCLalways, 1303 BCLn, 1304 BCTRL8_LDinto_toc, 1305 BCn, 1306 CTRL_DEP 1307)>; 1308 1309// Five Cycle Branch with a 2 Cycle ALU Op 1310// Operations must be done consecutively and not in parallel. 1311def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1312 (instrs 1313 ADDPCIS 1314)>; 1315 1316// Special Extracted Instructions For Atomics 1317 1318// Atomic Load 1319def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1320 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1321 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1322 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1323 (instrs 1324 (instregex "L(D|W)AT$") 1325)>; 1326 1327// Atomic Store 1328def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1329 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1330 (instrs 1331 (instregex "ST(D|W)AT$") 1332)>; 1333 1334// Signal Processing Engine (SPE) Instructions 1335// These instructions are not supported on Power 9 1336def : InstRW<[], 1337 (instrs 1338 BRINC, 1339 EVABS, 1340 EVEQV, 1341 EVMRA, 1342 EVNAND, 1343 EVNEG, 1344 (instregex "EVADD(I)?W$"), 1345 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1346 (instregex "EVAND(C)?$"), 1347 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1348 (instregex "EVCNTL(S|Z)W$"), 1349 (instregex "EVDIVW(S|U)$"), 1350 (instregex "EVEXTS(B|H)$"), 1351 (instregex "EVLD(H|W|D)(X)?$"), 1352 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1353 (instregex "EVLWHE(X)?$"), 1354 (instregex "EVLWHO(S|U)(X)?$"), 1355 (instregex "EVLW(H|W)SPLAT(X)?$"), 1356 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1357 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1358 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1359 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1360 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1361 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1362 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1363 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1364 (instregex "EVMWHUMI(A)?$"), 1365 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1366 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1367 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1368 (instregex "EVMWSSF(A|AA|AN)?$"), 1369 (instregex "EVMWUMI(A|AA|AN)?$"), 1370 (instregex "EV(N|X)?OR(C)?$"), 1371 (instregex "EVR(LW|LWI|NDW)$"), 1372 (instregex "EVSLW(I)?$"), 1373 (instregex "EVSPLAT(F)?I$"), 1374 (instregex "EVSRW(I)?(S|U)$"), 1375 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1376 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1377 (instregex "EVSUB(I)?FW$") 1378)> { let Unsupported = 1; } 1379 1380// General Instructions without scheduling support. 1381def : InstRW<[], 1382 (instrs 1383 (instregex "(H)?RFI(D)?$"), 1384 (instregex "DSS(ALL)?$"), 1385 (instregex "DST(ST)?(T)?(64)?$"), 1386 (instregex "ICBL(C|Q)$"), 1387 (instregex "L(W|H|B)EPX$"), 1388 (instregex "ST(W|H|B)EPX$"), 1389 (instregex "(L|ST)FDEPX$"), 1390 (instregex "M(T|F)SR(IN)?$"), 1391 (instregex "M(T|F)DCR$"), 1392 (instregex "NOP_GT_PWR(6|7)$"), 1393 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1394 (instregex "WRTEE(I)?$"), 1395 ATTN, 1396 CLRBHRB, 1397 MFBHRBE, 1398 MBAR, 1399 MSYNC, 1400 SLBSYNC, 1401 SLBFEEo, 1402 NAP, 1403 STOP, 1404 TRAP, 1405 RFCI, 1406 RFDI, 1407 RFMCI, 1408 SC, 1409 DCBA, 1410 DCBI, 1411 DCCCI, 1412 ICCCI 1413)> { let Unsupported = 1; } 1414