1//===- P9InstrResources.td - P9 Instruction Resource Defs -*- tablegen -*-==// 2// 3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4// See https://llvm.org/LICENSE.txt for license information. 5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6// 7//===----------------------------------------------------------------------===// 8// 9// This file defines the resources required by P9 instructions. This is part of 10// the P9 processor model used for instruction scheduling. This file should 11// contain all the instructions that may be used on Power 9. This is not 12// just instructions that are new on Power 9 but also instructions that were 13// available on earlier architectures and are still used in Power 9. 14// 15// The makeup of the P9 CPU is modeled as follows: 16// - Each CPU is made up of two superslices. 17// - Each superslice is made up of two slices. Therefore, there are 4 slices 18// for each CPU. 19// - Up to 6 instructions can be dispatched to each CPU. Three per superslice. 20// - Each CPU has: 21// - One CY (Crypto) unit P9_CY_* 22// - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_* 23// - Two PM (Permute) units. One on each superslice. P9_PM_* 24// - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_* 25// - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_* 26// - Four DP (Floating Point) units. One on each slice. P9_DP_* 27// This also includes fixed point multiply add. 28// - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_* 29// - Four Load/Store Queues. P9_LS_* 30// - Each set of instructions will require a number of these resources. 31//===----------------------------------------------------------------------===// 32 33// Two cycle ALU vector operation that uses an entire superslice. 34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 37 (instrs 38 (instregex "VADDU(B|H|W|D)M$"), 39 (instregex "VAND(C)?$"), 40 (instregex "VEXTS(B|H|W)2(D|W)(s)?$"), 41 (instregex "V_SET0(B|H)?$"), 42 (instregex "VS(R|L)(B|H|W|D)$"), 43 (instregex "VSUBU(B|H|W|D)M$"), 44 (instregex "VPOPCNT(B|H)$"), 45 (instregex "VRL(B|H|W|D)$"), 46 (instregex "VSRA(B|H|W|D)$"), 47 (instregex "XV(N)?ABS(D|S)P$"), 48 (instregex "XVCPSGN(D|S)P$"), 49 (instregex "XV(I|X)EXP(D|S)P$"), 50 (instregex "VRL(D|W)(MI|NM)$"), 51 (instregex "VMRG(E|O)W$"), 52 MTVSRDD, 53 VEQV, 54 VNAND, 55 VNEGD, 56 VNEGW, 57 VNOR, 58 VOR, 59 VORC, 60 VSEL, 61 VXOR, 62 XVNEGDP, 63 XVNEGSP, 64 XXLAND, 65 XXLANDC, 66 XXLEQV, 67 XXLNAND, 68 XXLNOR, 69 XXLOR, 70 XXLORf, 71 XXLORC, 72 XXLXOR, 73 XXLXORdpz, 74 XXLXORspz, 75 XXLXORz, 76 XXSEL, 77 XSABSQP, 78 XSCPSGNQP, 79 XSIEXPQP, 80 XSNABSQP, 81 XSNEGQP, 82 XSXEXPQP 83)>; 84 85// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a 86// single slice. However, since it is Restricted, it requires all 3 dispatches 87// (DISP) for that superslice. 88def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C], 89 (instrs 90 (instregex "TABORT(D|W)C(I)?$"), 91 (instregex "MTFSB(0|1)$"), 92 (instregex "MFFSC(D)?RN(I)?$"), 93 (instregex "CMPRB(8)?$"), 94 (instregex "TD(I)?$"), 95 (instregex "TW(I)?$"), 96 (instregex "FCMPU(S|D)$"), 97 (instregex "XSTSTDC(S|D)P$"), 98 FTDIV, 99 FTSQRT, 100 CMPEQB 101)>; 102 103// Standard Dispatch ALU operation for 3 cycles. Only one slice used. 104def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C], 105 (instrs 106 (instregex "XSMAX(C|J)?DP$"), 107 (instregex "XSMIN(C|J)?DP$"), 108 (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"), 109 (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"), 110 (instregex "POPCNT(D|W)$"), 111 (instregex "CMPB(8)?$"), 112 (instregex "SETB(8)?$"), 113 XSTDIVDP, 114 XSTSQRTDP, 115 XSXSIGDP, 116 XSCVSPDPN, 117 BPERMD 118)>; 119 120// Standard Dispatch ALU operation for 2 cycles. Only one slice used. 121def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C], 122 (instrs 123 (instregex "S(L|R)D$"), 124 (instregex "SRAD(I)?$"), 125 (instregex "EXTSWSLI_32_64$"), 126 (instregex "MFV(S)?RD$"), 127 (instregex "MTVSRD$"), 128 (instregex "MTVSRW(A|Z)$"), 129 (instregex "CMP(WI|LWI|W|LW)(8)?$"), 130 (instregex "CMP(L)?D(I)?$"), 131 (instregex "SUBF(I)?C(8)?(O)?$"), 132 (instregex "ANDI(S)?o(8)?$"), 133 (instregex "ADDC(8)?(O)?$"), 134 (instregex "ADDIC(8)?(o)?$"), 135 (instregex "ADD(8|4)(O)?(o)?$"), 136 (instregex "ADD(E|ME|ZE)(8)?(O)?(o)?$"), 137 (instregex "SUBF(E|ME|ZE)?(8)?(O)?(o)?$"), 138 (instregex "NEG(8)?(O)?(o)?$"), 139 (instregex "POPCNTB$"), 140 (instregex "ADD(I|IS)?(8)?$"), 141 (instregex "LI(S)?(8)?$"), 142 (instregex "(X)?OR(I|IS)?(8)?(o)?$"), 143 (instregex "NAND(8)?(o)?$"), 144 (instregex "AND(C)?(8)?(o)?$"), 145 (instregex "NOR(8)?(o)?$"), 146 (instregex "OR(C)?(8)?(o)?$"), 147 (instregex "EQV(8)?(o)?$"), 148 (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"), 149 (instregex "ADD(4|8)(TLS)?(_)?$"), 150 (instregex "NEG(8)?(O)?$"), 151 (instregex "ADDI(S)?toc(HA|L)$"), 152 COPY, 153 MCRF, 154 MCRXRX, 155 XSNABSDP, 156 XSXEXPDP, 157 XSABSDP, 158 XSNEGDP, 159 XSCPSGNDP, 160 MFVSRWZ, 161 EXTSWSLI, 162 SRADI_32, 163 RLDIC, 164 RFEBB, 165 LA, 166 TBEGIN, 167 TRECHKPT, 168 NOP, 169 WAIT 170)>; 171 172// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a 173// single slice. However, since it is Restricted, it requires all 3 dispatches 174// (DISP) for that superslice. 175def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C], 176 (instrs 177 (instregex "RLDC(L|R)$"), 178 (instregex "RLWIMI(8)?$"), 179 (instregex "RLDIC(L|R)(_32)?(_64)?$"), 180 (instregex "M(F|T)OCRF(8)?$"), 181 (instregex "CR(6)?(UN)?SET$"), 182 (instregex "CR(N)?(OR|AND)(C)?$"), 183 (instregex "S(L|R)W(8)?$"), 184 (instregex "RLW(INM|NM)(8)?$"), 185 (instregex "F(N)?ABS(D|S)$"), 186 (instregex "FNEG(D|S)$"), 187 (instregex "FCPSGN(D|S)$"), 188 (instregex "SRAW(I)?$"), 189 (instregex "ISEL(8)?$"), 190 RLDIMI, 191 XSIEXPDP, 192 FMR, 193 CREQV, 194 CRXOR, 195 TRECLAIM, 196 TSR, 197 TABORT 198)>; 199 200// Three cycle ALU vector operation that uses an entire superslice. 201// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines 202// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice. 203def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 204 (instrs 205 (instregex "M(T|F)VSCR$"), 206 (instregex "VCMPNEZ(B|H|W)$"), 207 (instregex "VCMPEQU(B|H|W|D)$"), 208 (instregex "VCMPNE(B|H|W)$"), 209 (instregex "VABSDU(B|H|W)$"), 210 (instregex "VADDU(B|H|W)S$"), 211 (instregex "VAVG(S|U)(B|H|W)$"), 212 (instregex "VCMP(EQ|GE|GT)FP(o)?$"), 213 (instregex "VCMPBFP(o)?$"), 214 (instregex "VC(L|T)Z(B|H|W|D)$"), 215 (instregex "VADDS(B|H|W)S$"), 216 (instregex "V(MIN|MAX)FP$"), 217 (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"), 218 VBPERMD, 219 VADDCUW, 220 VPOPCNTW, 221 VPOPCNTD, 222 VPRTYBD, 223 VPRTYBW, 224 VSHASIGMAD, 225 VSHASIGMAW, 226 VSUBSBS, 227 VSUBSHS, 228 VSUBSWS, 229 VSUBUBS, 230 VSUBUHS, 231 VSUBUWS, 232 VSUBCUW, 233 VCMPGTSB, 234 VCMPGTSBo, 235 VCMPGTSD, 236 VCMPGTSDo, 237 VCMPGTSH, 238 VCMPGTSHo, 239 VCMPGTSW, 240 VCMPGTSWo, 241 VCMPGTUB, 242 VCMPGTUBo, 243 VCMPGTUD, 244 VCMPGTUDo, 245 VCMPGTUH, 246 VCMPGTUHo, 247 VCMPGTUW, 248 VCMPGTUWo, 249 VCMPNEBo, 250 VCMPNEHo, 251 VCMPNEWo, 252 VCMPNEZBo, 253 VCMPNEZHo, 254 VCMPNEZWo, 255 VCMPEQUBo, 256 VCMPEQUDo, 257 VCMPEQUHo, 258 VCMPEQUWo, 259 XVCMPEQDP, 260 XVCMPEQDPo, 261 XVCMPEQSP, 262 XVCMPEQSPo, 263 XVCMPGEDP, 264 XVCMPGEDPo, 265 XVCMPGESP, 266 XVCMPGESPo, 267 XVCMPGTDP, 268 XVCMPGTDPo, 269 XVCMPGTSP, 270 XVCMPGTSPo, 271 XVMAXDP, 272 XVMAXSP, 273 XVMINDP, 274 XVMINSP, 275 XVTDIVDP, 276 XVTDIVSP, 277 XVTSQRTDP, 278 XVTSQRTSP, 279 XVTSTDCDP, 280 XVTSTDCSP, 281 XVXSIGDP, 282 XVXSIGSP 283)>; 284 285// 7 cycle DP vector operation that uses an entire superslice. 286// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE, 287// EXECO) and all three dispatches (DISP) to the given superslice. 288def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 289 (instrs 290 VADDFP, 291 VCTSXS, 292 VCTSXS_0, 293 VCTUXS, 294 VCTUXS_0, 295 VEXPTEFP, 296 VLOGEFP, 297 VMADDFP, 298 VMHADDSHS, 299 VNMSUBFP, 300 VREFP, 301 VRFIM, 302 VRFIN, 303 VRFIP, 304 VRFIZ, 305 VRSQRTEFP, 306 VSUBFP, 307 XVADDDP, 308 XVADDSP, 309 XVCVDPSP, 310 XVCVDPSXDS, 311 XVCVDPSXWS, 312 XVCVDPUXDS, 313 XVCVDPUXWS, 314 XVCVHPSP, 315 XVCVSPDP, 316 XVCVSPHP, 317 XVCVSPSXDS, 318 XVCVSPSXWS, 319 XVCVSPUXDS, 320 XVCVSPUXWS, 321 XVCVSXDDP, 322 XVCVSXDSP, 323 XVCVSXWDP, 324 XVCVSXWSP, 325 XVCVUXDDP, 326 XVCVUXDSP, 327 XVCVUXWDP, 328 XVCVUXWSP, 329 XVMADDADP, 330 XVMADDASP, 331 XVMADDMDP, 332 XVMADDMSP, 333 XVMSUBADP, 334 XVMSUBASP, 335 XVMSUBMDP, 336 XVMSUBMSP, 337 XVMULDP, 338 XVMULSP, 339 XVNMADDADP, 340 XVNMADDASP, 341 XVNMADDMDP, 342 XVNMADDMSP, 343 XVNMSUBADP, 344 XVNMSUBASP, 345 XVNMSUBMDP, 346 XVNMSUBMSP, 347 XVRDPI, 348 XVRDPIC, 349 XVRDPIM, 350 XVRDPIP, 351 XVRDPIZ, 352 XVREDP, 353 XVRESP, 354 XVRSPI, 355 XVRSPIC, 356 XVRSPIM, 357 XVRSPIP, 358 XVRSPIZ, 359 XVRSQRTEDP, 360 XVRSQRTESP, 361 XVSUBDP, 362 XVSUBSP, 363 VCFSX, 364 VCFSX_0, 365 VCFUX, 366 VCFUX_0, 367 VMHRADDSHS, 368 VMLADDUHM, 369 VMSUMMBM, 370 VMSUMSHM, 371 VMSUMSHS, 372 VMSUMUBM, 373 VMSUMUHM, 374 VMSUMUHS, 375 VMULESB, 376 VMULESH, 377 VMULESW, 378 VMULEUB, 379 VMULEUH, 380 VMULEUW, 381 VMULOSB, 382 VMULOSH, 383 VMULOSW, 384 VMULOUB, 385 VMULOUH, 386 VMULOUW, 387 VMULUWM, 388 VSUM2SWS, 389 VSUM4SBS, 390 VSUM4SHS, 391 VSUM4UBS, 392 VSUMSWS 393)>; 394 395// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 396// dispatch units for the superslice. 397def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C], 398 (instrs 399 (instregex "MADD(HD|HDU|LD|LD8)$"), 400 (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$") 401)>; 402 403// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three 404// dispatch units for the superslice. 405def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C], 406 (instrs 407 FRSP, 408 (instregex "FRI(N|P|Z|M)(D|S)$"), 409 (instregex "FRE(S)?$"), 410 (instregex "FADD(S)?$"), 411 (instregex "FMSUB(S)?$"), 412 (instregex "FMADD(S)?$"), 413 (instregex "FSUB(S)?$"), 414 (instregex "FCFID(U)?(S)?$"), 415 (instregex "FCTID(U)?(Z)?$"), 416 (instregex "FCTIW(U)?(Z)?$"), 417 (instregex "FRSQRTE(S)?$"), 418 FNMADDS, 419 FNMADD, 420 FNMSUBS, 421 FNMSUB, 422 FSELD, 423 FSELS, 424 FMULS, 425 FMUL, 426 XSMADDADP, 427 XSMADDASP, 428 XSMADDMDP, 429 XSMADDMSP, 430 XSMSUBADP, 431 XSMSUBASP, 432 XSMSUBMDP, 433 XSMSUBMSP, 434 XSMULDP, 435 XSMULSP, 436 XSNMADDADP, 437 XSNMADDASP, 438 XSNMADDMDP, 439 XSNMADDMSP, 440 XSNMSUBADP, 441 XSNMSUBASP, 442 XSNMSUBMDP, 443 XSNMSUBMSP 444)>; 445 446// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 447// These operations can be done in parallel. The DP is restricted so we need a 448// full 4 dispatches. 449def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 450 DISP_3SLOTS_1C, DISP_1C], 451 (instrs 452 (instregex "FSEL(D|S)o$") 453)>; 454 455// 5 Cycle Restricted DP operation and one 2 cycle ALU operation. 456def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C, 457 DISP_3SLOTS_1C, DISP_1C], 458 (instrs 459 (instregex "MUL(H|L)(D|W)(U)?(O)?o$") 460)>; 461 462// 7 cycle Restricted DP operation and one 3 cycle ALU operation. 463// These operations must be done sequentially.The DP is restricted so we need a 464// full 4 dispatches. 465def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C, 466 DISP_3SLOTS_1C, DISP_1C], 467 (instrs 468 (instregex "FRI(N|P|Z|M)(D|S)o$"), 469 (instregex "FRE(S)?o$"), 470 (instregex "FADD(S)?o$"), 471 (instregex "FSUB(S)?o$"), 472 (instregex "F(N)?MSUB(S)?o$"), 473 (instregex "F(N)?MADD(S)?o$"), 474 (instregex "FCFID(U)?(S)?o$"), 475 (instregex "FCTID(U)?(Z)?o$"), 476 (instregex "FCTIW(U)?(Z)?o$"), 477 (instregex "FMUL(S)?o$"), 478 (instregex "FRSQRTE(S)?o$"), 479 FRSPo 480)>; 481 482// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units. 483def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C], 484 (instrs 485 XSADDDP, 486 XSADDSP, 487 XSCVDPHP, 488 XSCVDPSP, 489 XSCVDPSXDS, 490 XSCVDPSXDSs, 491 XSCVDPSXWS, 492 XSCVDPUXDS, 493 XSCVDPUXDSs, 494 XSCVDPUXWS, 495 XSCVDPSXWSs, 496 XSCVDPUXWSs, 497 XSCVHPDP, 498 XSCVSPDP, 499 XSCVSXDDP, 500 XSCVSXDSP, 501 XSCVUXDDP, 502 XSCVUXDSP, 503 XSRDPI, 504 XSRDPIC, 505 XSRDPIM, 506 XSRDPIP, 507 XSRDPIZ, 508 XSREDP, 509 XSRESP, 510 XSRSQRTEDP, 511 XSRSQRTESP, 512 XSSUBDP, 513 XSSUBSP, 514 XSCVDPSPN, 515 XSRSP 516)>; 517 518// Three Cycle PM operation. Only one PM unit per superslice so we use the whole 519// superslice. That includes both exec pipelines (EXECO, EXECE) and one 520// dispatch. 521def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 522 (instrs 523 (instregex "LVS(L|R)$"), 524 (instregex "VSPLTIS(W|H|B)$"), 525 (instregex "VSPLT(W|H|B)(s)?$"), 526 (instregex "V_SETALLONES(B|H)?$"), 527 (instregex "VEXTRACTU(B|H|W)$"), 528 (instregex "VINSERT(B|H|W|D)$"), 529 MFVSRLD, 530 MTVSRWS, 531 VBPERMQ, 532 VCLZLSBB, 533 VCTZLSBB, 534 VEXTRACTD, 535 VEXTUBLX, 536 VEXTUBRX, 537 VEXTUHLX, 538 VEXTUHRX, 539 VEXTUWLX, 540 VEXTUWRX, 541 VGBBD, 542 VMRGHB, 543 VMRGHH, 544 VMRGHW, 545 VMRGLB, 546 VMRGLH, 547 VMRGLW, 548 VPERM, 549 VPERMR, 550 VPERMXOR, 551 VPKPX, 552 VPKSDSS, 553 VPKSDUS, 554 VPKSHSS, 555 VPKSHUS, 556 VPKSWSS, 557 VPKSWUS, 558 VPKUDUM, 559 VPKUDUS, 560 VPKUHUM, 561 VPKUHUS, 562 VPKUWUM, 563 VPKUWUS, 564 VPRTYBQ, 565 VSL, 566 VSLDOI, 567 VSLO, 568 VSLV, 569 VSR, 570 VSRO, 571 VSRV, 572 VUPKHPX, 573 VUPKHSB, 574 VUPKHSH, 575 VUPKHSW, 576 VUPKLPX, 577 VUPKLSB, 578 VUPKLSH, 579 VUPKLSW, 580 XXBRD, 581 XXBRH, 582 XXBRQ, 583 XXBRW, 584 XXEXTRACTUW, 585 XXINSERTW, 586 XXMRGHW, 587 XXMRGLW, 588 XXPERM, 589 XXPERMR, 590 XXSLDWI, 591 XXSLDWIs, 592 XXSPLTIB, 593 XXSPLTW, 594 XXSPLTWs, 595 XXPERMDI, 596 XXPERMDIs, 597 VADDCUQ, 598 VADDECUQ, 599 VADDEUQM, 600 VADDUQM, 601 VMUL10CUQ, 602 VMUL10ECUQ, 603 VMUL10EUQ, 604 VMUL10UQ, 605 VSUBCUQ, 606 VSUBECUQ, 607 VSUBEUQM, 608 VSUBUQM, 609 XSCMPEXPQP, 610 XSCMPOQP, 611 XSCMPUQP, 612 XSTSTDCQP, 613 XSXSIGQP, 614 BCDCFNo, 615 BCDCFZo, 616 BCDCPSGNo, 617 BCDCTNo, 618 BCDCTZo, 619 BCDSETSGNo, 620 BCDSo, 621 BCDTRUNCo, 622 BCDUSo, 623 BCDUTRUNCo 624)>; 625 626// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 627// superslice. That includes both exec pipelines (EXECO, EXECE) and one 628// dispatch. 629def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 630 (instrs 631 BCDSRo, 632 XSADDQP, 633 XSADDQPO, 634 XSCVDPQP, 635 XSCVQPDP, 636 XSCVQPDPO, 637 XSCVQPSDZ, 638 XSCVQPSWZ, 639 XSCVQPUDZ, 640 XSCVQPUWZ, 641 XSCVSDQP, 642 XSCVUDQP, 643 XSRQPI, 644 XSRQPIX, 645 XSRQPXP, 646 XSSUBQP, 647 XSSUBQPO 648)>; 649 650// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 651// superslice. That includes both exec pipelines (EXECO, EXECE) and one 652// dispatch. 653def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 654 (instrs 655 BCDCTSQo 656)>; 657 658// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 659// superslice. That includes both exec pipelines (EXECO, EXECE) and one 660// dispatch. 661def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 662 (instrs 663 XSMADDQP, 664 XSMADDQPO, 665 XSMSUBQP, 666 XSMSUBQPO, 667 XSMULQP, 668 XSMULQPO, 669 XSNMADDQP, 670 XSNMADDQPO, 671 XSNMSUBQP, 672 XSNMSUBQPO 673)>; 674 675// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 676// superslice. That includes both exec pipelines (EXECO, EXECE) and one 677// dispatch. 678def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 679 (instrs 680 BCDCFSQo 681)>; 682 683// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 684// superslice. That includes both exec pipelines (EXECO, EXECE) and one 685// dispatch. 686def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 687 (instrs 688 XSDIVQP, 689 XSDIVQPO 690)>; 691 692// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole 693// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 694// dispatches. 695def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C], 696 (instrs 697 XSSQRTQP, 698 XSSQRTQPO 699)>; 700 701// 6 Cycle Load uses a single slice. 702def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C], 703 (instrs 704 (instregex "LXVL(L)?") 705)>; 706 707// 5 Cycle Load uses a single slice. 708def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C], 709 (instrs 710 (instregex "LVE(B|H|W)X$"), 711 (instregex "LVX(L)?"), 712 (instregex "LXSI(B|H)ZX$"), 713 LXSDX, 714 LXVB16X, 715 LXVD2X, 716 LXVWSX, 717 LXSIWZX, 718 LXV, 719 LXVX, 720 LXSD, 721 DFLOADf64, 722 XFLOADf64, 723 LIWZX 724)>; 725 726// 4 Cycle Load uses a single slice. 727def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C], 728 (instrs 729 (instregex "DCB(F|T|ST)(EP)?$"), 730 (instregex "DCBZ(L)?(EP)?$"), 731 (instregex "DCBTST(EP)?$"), 732 (instregex "CP_COPY(8)?$"), 733 (instregex "CP_PASTE(8)?$"), 734 (instregex "ICBI(EP)?$"), 735 (instregex "ICBT(LS)?$"), 736 (instregex "LBARX(L)?$"), 737 (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"), 738 (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"), 739 (instregex "LH(A|B)RX(L)?(8)?$"), 740 (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 741 (instregex "LWARX(L)?$"), 742 (instregex "LWBRX(8)?$"), 743 (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"), 744 CP_ABORT, 745 DARN, 746 EnforceIEIO, 747 ISYNC, 748 MSGSYNC, 749 TLBSYNC, 750 SYNC, 751 LMW, 752 LSWI 753)>; 754 755// 4 Cycle Restricted load uses a single slice but the dispatch for the whole 756// superslice. 757def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C], 758 (instrs 759 LFIWZX, 760 LFDX, 761 LFD 762)>; 763 764// Cracked Load Instructions. 765// Load instructions that can be done in parallel. 766def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C, 767 DISP_PAIR_1C], 768 (instrs 769 SLBIA, 770 SLBIE, 771 SLBMFEE, 772 SLBMFEV, 773 SLBMTE, 774 TLBIEL 775)>; 776 777// Cracked Load Instruction. 778// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 779// operations can be run in parallel. 780def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 781 DISP_PAIR_1C, DISP_PAIR_1C], 782 (instrs 783 (instregex "L(W|H)ZU(X)?(8)?$") 784)>; 785 786// Cracked TEND Instruction. 787// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU 788// operations can be run in parallel. 789def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C, 790 DISP_1C, DISP_1C], 791 (instrs 792 TEND 793)>; 794 795 796// Cracked Store Instruction 797// Consecutive Store and ALU instructions. The store is restricted and requires 798// three dispatches. 799def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, 800 DISP_3SLOTS_1C, DISP_1C], 801 (instrs 802 (instregex "ST(B|H|W|D)CX$") 803)>; 804 805// Cracked Load Instruction. 806// Two consecutive load operations for a total of 8 cycles. 807def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C, 808 DISP_1C, DISP_1C], 809 (instrs 810 LDMX 811)>; 812 813// Cracked Load instruction. 814// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 815// operations cannot be done at the same time and so their latencies are added. 816def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 817 DISP_1C, DISP_1C], 818 (instrs 819 (instregex "LHA(X)?(8)?$"), 820 (instregex "CP_PASTE(8)?o$"), 821 (instregex "LWA(X)?(_32)?$"), 822 TCHECK 823)>; 824 825// Cracked Restricted Load instruction. 826// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU 827// operations cannot be done at the same time and so their latencies are added. 828// Full 6 dispatches are required as this is both cracked and restricted. 829def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C, 830 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 831 (instrs 832 LFIWAX 833)>; 834 835// Cracked Load instruction. 836// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU 837// operations cannot be done at the same time and so their latencies are added. 838// Full 4 dispatches are required as this is a cracked instruction. 839def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 840 (instrs 841 LXSIWAX, 842 LIWAX 843)>; 844 845// Cracked Load instruction. 846// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7 847// cycles. The Load and ALU operations cannot be done at the same time and so 848// their latencies are added. 849// Full 6 dispatches are required as this is a restricted instruction. 850def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C, 851 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 852 (instrs 853 LFSX, 854 LFS 855)>; 856 857// Cracked Load instruction. 858// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU 859// operations cannot be done at the same time and so their latencies are added. 860// Full 4 dispatches are required as this is a cracked instruction. 861def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 862 (instrs 863 LXSSP, 864 LXSSPX, 865 XFLOADf32, 866 DFLOADf32 867)>; 868 869// Cracked 3-Way Load Instruction 870// Load with two ALU operations that depend on each other 871def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 872 DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C], 873 (instrs 874 (instregex "LHAU(X)?(8)?$"), 875 LWAUX 876)>; 877 878// Cracked Load that requires the PM resource. 879// Since the Load and the PM cannot be done at the same time the latencies are 880// added. Requires 8 cycles. Since the PM requires the full superslice we need 881// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load 882// requires the remaining 1 dispatch. 883def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C, 884 DISP_1C, DISP_1C], 885 (instrs 886 LXVH8X, 887 LXVDSX, 888 LXVW4X 889)>; 890 891// Single slice Restricted store operation. The restricted operation requires 892// all three dispatches for the superslice. 893def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C], 894 (instrs 895 (instregex "STF(S|D|IWX|SX|DX)$"), 896 (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"), 897 (instregex "STW(8)?$"), 898 (instregex "(D|X)FSTORE(f32|f64)$"), 899 (instregex "ST(W|H|D)BRX$"), 900 (instregex "ST(B|H|D)(8)?$"), 901 (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"), 902 STIWX, 903 SLBIEG, 904 STMW, 905 STSWI, 906 TLBIE 907)>; 908 909// Vector Store Instruction 910// Requires the whole superslice and therefore requires one dispatch 911// as well as both the Even and Odd exec pipelines. 912def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C], 913 (instrs 914 (instregex "STVE(B|H|W)X$"), 915 (instregex "STVX(L)?$"), 916 (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$") 917)>; 918 919// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 920// superslice. That includes both exec pipelines (EXECO, EXECE) and two 921// dispatches. 922def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 923 (instrs 924 (instregex "MTCTR(8)?(loop)?$"), 925 (instregex "MTLR(8)?$") 926)>; 927 928// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 929// superslice. That includes both exec pipelines (EXECO, EXECE) and two 930// dispatches. 931def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C], 932 (instrs 933 (instregex "M(T|F)VRSAVE(v)?$"), 934 (instregex "M(T|F)PMR$"), 935 (instregex "M(T|F)TB(8)?$"), 936 (instregex "MF(SPR|CTR|LR)(8)?$"), 937 (instregex "M(T|F)MSR(D)?$"), 938 (instregex "MTSPR(8)?$") 939)>; 940 941// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 942// superslice. That includes both exec pipelines (EXECO, EXECE) and two 943// dispatches. 944def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 945 (instrs 946 DIVW, 947 DIVWO, 948 DIVWU, 949 DIVWUO, 950 MODSW 951)>; 952 953// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 954// superslice. That includes both exec pipelines (EXECO, EXECE) and two 955// dispatches. 956def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 957 (instrs 958 DIVWE, 959 DIVWEO, 960 DIVD, 961 DIVDO, 962 DIVWEU, 963 DIVWEUO, 964 DIVDU, 965 DIVDUO, 966 MODSD, 967 MODUD, 968 MODUW 969)>; 970 971// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole 972// superslice. That includes both exec pipelines (EXECO, EXECE) and all three 973// dispatches. 974def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C], 975 (instrs 976 DIVDE, 977 DIVDEO, 978 DIVDEU, 979 DIVDEUO 980)>; 981 982// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 983// and one full superslice for the DIV operation since there is only one DIV per 984// superslice. Latency of DIV plus ALU is 26. 985def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 986 DISP_EVEN_1C, DISP_1C], 987 (instrs 988 (instregex "DIVW(U)?(O)?o$") 989)>; 990 991// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 992// and one full superslice for the DIV operation since there is only one DIV per 993// superslice. Latency of DIV plus ALU is 26. 994def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 995 DISP_EVEN_1C, DISP_1C], 996 (instrs 997 DIVDo, 998 DIVDOo, 999 DIVDUo, 1000 DIVDUOo, 1001 DIVWEo, 1002 DIVWEOo, 1003 DIVWEUo, 1004 DIVWEUOo 1005)>; 1006 1007// Cracked DIV and ALU operation. Requires one full slice for the ALU operation 1008// and one full superslice for the DIV operation since there is only one DIV per 1009// superslice. Latency of DIV plus ALU is 42. 1010def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C, 1011 DISP_EVEN_1C, DISP_1C], 1012 (instrs 1013 DIVDEo, 1014 DIVDEOo, 1015 DIVDEUo, 1016 DIVDEUOo 1017)>; 1018 1019// CR access instructions in _BrMCR, IIC_BrMCRX. 1020 1021// Cracked, restricted, ALU operations. 1022// Here the two ALU ops can actually be done in parallel and therefore the 1023// latencies are not added together. Otherwise this is like having two 1024// instructions running together on two pipelines and 6 dispatches. ALU ops are 1025// 2 cycles each. 1026def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1027 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1028 (instrs 1029 MTCRF, 1030 MTCRF8 1031)>; 1032 1033// Cracked ALU operations. 1034// Here the two ALU ops can actually be done in parallel and therefore the 1035// latencies are not added together. Otherwise this is like having two 1036// instructions running together on two pipelines and 2 dispatches. ALU ops are 1037// 2 cycles each. 1038def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C, 1039 DISP_1C, DISP_1C], 1040 (instrs 1041 (instregex "ADDC(8)?(O)?o$"), 1042 (instregex "SUBFC(8)?(O)?o$") 1043)>; 1044 1045// Cracked ALU operations. 1046// Two ALU ops can be done in parallel. 1047// One is three cycle ALU the ohter is a two cycle ALU. 1048// One of the ALU ops is restricted the other is not so we have a total of 1049// 5 dispatches. 1050def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1051 DISP_3SLOTS_1C, DISP_1C], 1052 (instrs 1053 (instregex "F(N)?ABS(D|S)o$"), 1054 (instregex "FCPSGN(D|S)o$"), 1055 (instregex "FNEG(D|S)o$"), 1056 FMRo 1057)>; 1058 1059// Cracked ALU operations. 1060// Here the two ALU ops can actually be done in parallel and therefore the 1061// latencies are not added together. Otherwise this is like having two 1062// instructions running together on two pipelines and 2 dispatches. 1063// ALU ops are 3 cycles each. 1064def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1065 DISP_1C, DISP_1C], 1066 (instrs 1067 MCRFS 1068)>; 1069 1070// Cracked Restricted ALU operations. 1071// Here the two ALU ops can actually be done in parallel and therefore the 1072// latencies are not added together. Otherwise this is like having two 1073// instructions running together on two pipelines and 6 dispatches. 1074// ALU ops are 3 cycles each. 1075def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C, 1076 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1077 (instrs 1078 (instregex "MTFSF(b|o)?$"), 1079 (instregex "MTFSFI(o)?$") 1080)>; 1081 1082// Cracked instruction made of two ALU ops. 1083// The two ops cannot be done in parallel. 1084// One of the ALU ops is restricted and takes 3 dispatches. 1085def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, 1086 DISP_3SLOTS_1C, DISP_1C], 1087 (instrs 1088 (instregex "RLD(I)?C(R|L)o$"), 1089 (instregex "RLW(IMI|INM|NM)(8)?o$"), 1090 (instregex "SLW(8)?o$"), 1091 (instregex "SRAW(I)?o$"), 1092 (instregex "SRW(8)?o$"), 1093 RLDICL_32o, 1094 RLDIMIo 1095)>; 1096 1097// Cracked instruction made of two ALU ops. 1098// The two ops cannot be done in parallel. 1099// Both of the ALU ops are restricted and take 3 dispatches. 1100def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C, 1101 DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1102 (instrs 1103 (instregex "MFFS(L|CE|o)?$") 1104)>; 1105 1106// Cracked ALU instruction composed of three consecutive 2 cycle loads for a 1107// total of 6 cycles. All of the ALU operations are also restricted so each 1108// takes 3 dispatches for a total of 9. 1109def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C, 1110 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C], 1111 (instrs 1112 (instregex "MFCR(8)?$") 1113)>; 1114 1115// Cracked instruction made of two ALU ops. 1116// The two ops cannot be done in parallel. 1117def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C], 1118 (instrs 1119 (instregex "EXTSWSLI_32_64o$"), 1120 (instregex "SRAD(I)?o$"), 1121 EXTSWSLIo, 1122 SLDo, 1123 SRDo, 1124 RLDICo 1125)>; 1126 1127// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1128def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C], 1129 (instrs 1130 FDIV 1131)>; 1132 1133// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1134def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C, 1135 DISP_3SLOTS_1C, DISP_1C], 1136 (instrs 1137 FDIVo 1138)>; 1139 1140// 36 Cycle DP Instruction. 1141// Instruction can be done on a single slice. 1142def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C], 1143 (instrs 1144 XSSQRTDP 1145)>; 1146 1147// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1148def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C], 1149 (instrs 1150 FSQRT 1151)>; 1152 1153// 36 Cycle DP Vector Instruction. 1154def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C, 1155 DISP_1C], 1156 (instrs 1157 XVSQRTDP 1158)>; 1159 1160// 27 Cycle DP Vector Instruction. 1161def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C, 1162 DISP_1C], 1163 (instrs 1164 XVSQRTSP 1165)>; 1166 1167// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1168def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C, 1169 DISP_3SLOTS_1C, DISP_1C], 1170 (instrs 1171 FSQRTo 1172)>; 1173 1174// 26 Cycle DP Instruction. 1175def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C], 1176 (instrs 1177 XSSQRTSP 1178)>; 1179 1180// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1181def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1182 (instrs 1183 FSQRTS 1184)>; 1185 1186// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU. 1187def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C, 1188 DISP_3SLOTS_1C, DISP_1C], 1189 (instrs 1190 FSQRTSo 1191)>; 1192 1193// 33 Cycle DP Instruction. Takes one slice and 1 dispatch. 1194def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C], 1195 (instrs 1196 XSDIVDP 1197)>; 1198 1199// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches. 1200def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C], 1201 (instrs 1202 FDIVS 1203)>; 1204 1205// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU. 1206def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C, 1207 DISP_3SLOTS_1C, DISP_1C], 1208 (instrs 1209 FDIVSo 1210)>; 1211 1212// 22 Cycle DP Instruction. Takes one slice and 1 dispatch. 1213def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C], 1214 (instrs 1215 XSDIVSP 1216)>; 1217 1218// 24 Cycle DP Vector Instruction. Takes one full superslice. 1219// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1220// superslice. 1221def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C, 1222 DISP_1C], 1223 (instrs 1224 XVDIVSP 1225)>; 1226 1227// 33 Cycle DP Vector Instruction. Takes one full superslice. 1228// Includes both EXECE, EXECO pipelines and 1 dispatch for the given 1229// superslice. 1230def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C, 1231 DISP_1C], 1232 (instrs 1233 XVDIVDP 1234)>; 1235 1236// Instruction cracked into three pieces. One Load and two ALU operations. 1237// The Load and one of the ALU ops cannot be run at the same time and so the 1238// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles. 1239// Both the load and the ALU that depends on it are restricted and so they take 1240// a total of 7 dispatches. The final 2 dispatches come from the second ALU op. 1241// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load. 1242def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C, 1243 IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1244 DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C], 1245 (instrs 1246 (instregex "LF(SU|SUX)$") 1247)>; 1248 1249// Cracked instruction made up of a Store and an ALU. The ALU does not depend on 1250// the store and so it can be run at the same time as the store. The store is 1251// also restricted. 1252def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C, 1253 DISP_3SLOTS_1C, DISP_1C], 1254 (instrs 1255 (instregex "STF(S|D)U(X)?$"), 1256 (instregex "ST(B|H|W|D)U(X)?(8)?$") 1257)>; 1258 1259// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1260// the load and so it can be run at the same time as the load. 1261def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1262 DISP_PAIR_1C, DISP_PAIR_1C], 1263 (instrs 1264 (instregex "LBZU(X)?(8)?$"), 1265 (instregex "LDU(X)?$") 1266)>; 1267 1268// Cracked instruction made up of a Load and an ALU. The ALU does not depend on 1269// the load and so it can be run at the same time as the load. The load is also 1270// restricted. 3 dispatches are from the restricted load while the other two 1271// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline 1272// is required for the ALU. 1273def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, 1274 DISP_3SLOTS_1C, DISP_1C], 1275 (instrs 1276 (instregex "LF(DU|DUX)$") 1277)>; 1278 1279// Crypto Instructions 1280 1281// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole 1282// superslice. That includes both exec pipelines (EXECO, EXECE) and one 1283// dispatch. 1284def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C], 1285 (instrs 1286 (instregex "VPMSUM(B|H|W|D)$"), 1287 (instregex "V(N)?CIPHER(LAST)?$"), 1288 VSBOX 1289)>; 1290 1291// Branch Instructions 1292 1293// Two Cycle Branch 1294def : InstRW<[P9_BR_2C, DISP_BR_1C], 1295 (instrs 1296 (instregex "BCCCTR(L)?(8)?$"), 1297 (instregex "BCCL(A|R|RL)?$"), 1298 (instregex "BCCTR(L)?(8)?(n)?$"), 1299 (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"), 1300 (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"), 1301 (instregex "BL(_TLS|_NOP)?$"), 1302 (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"), 1303 (instregex "BLA(8|8_NOP)?$"), 1304 (instregex "BLR(8|L)?$"), 1305 (instregex "TAILB(A)?(8)?$"), 1306 (instregex "TAILBCTR(8)?$"), 1307 (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"), 1308 (instregex "BCLR(L)?(n)?$"), 1309 (instregex "BCTR(L)?(8)?$"), 1310 B, 1311 BA, 1312 BC, 1313 BCC, 1314 BCCA, 1315 BCL, 1316 BCLalways, 1317 BCLn, 1318 BCTRL8_LDinto_toc, 1319 BCn, 1320 CTRL_DEP 1321)>; 1322 1323// Five Cycle Branch with a 2 Cycle ALU Op 1324// Operations must be done consecutively and not in parallel. 1325def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C], 1326 (instrs 1327 ADDPCIS 1328)>; 1329 1330// Special Extracted Instructions For Atomics 1331 1332// Atomic Load 1333def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C, 1334 IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C, 1335 IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, 1336 DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C], 1337 (instrs 1338 (instregex "L(D|W)AT$") 1339)>; 1340 1341// Atomic Store 1342def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, 1343 IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C], 1344 (instrs 1345 (instregex "ST(D|W)AT$") 1346)>; 1347 1348// Signal Processing Engine (SPE) Instructions 1349// These instructions are not supported on Power 9 1350def : InstRW<[], 1351 (instrs 1352 BRINC, 1353 EVABS, 1354 EVEQV, 1355 EVMRA, 1356 EVNAND, 1357 EVNEG, 1358 (instregex "EVADD(I)?W$"), 1359 (instregex "EVADD(SM|SS|UM|US)IAAW$"), 1360 (instregex "EVAND(C)?$"), 1361 (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"), 1362 (instregex "EVCNTL(S|Z)W$"), 1363 (instregex "EVDIVW(S|U)$"), 1364 (instregex "EVEXTS(B|H)$"), 1365 (instregex "EVLD(H|W|D)(X)?$"), 1366 (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"), 1367 (instregex "EVLWHE(X)?$"), 1368 (instregex "EVLWHO(S|U)(X)?$"), 1369 (instregex "EVLW(H|W)SPLAT(X)?$"), 1370 (instregex "EVMERGE(HI|LO|HILO|LOHI)$"), 1371 (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"), 1372 (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1373 (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"), 1374 (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"), 1375 (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"), 1376 (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"), 1377 (instregex "EVMWHS(M|S)(F|FA|I|IA)$"), 1378 (instregex "EVMWHUMI(A)?$"), 1379 (instregex "EVMWLS(M|S)IA(A|N)W$"), 1380 (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"), 1381 (instregex "EVMWSM(F|I)(A|AA|AN)?$"), 1382 (instregex "EVMWSSF(A|AA|AN)?$"), 1383 (instregex "EVMWUMI(A|AA|AN)?$"), 1384 (instregex "EV(N|X)?OR(C)?$"), 1385 (instregex "EVR(LW|LWI|NDW)$"), 1386 (instregex "EVSLW(I)?$"), 1387 (instregex "EVSPLAT(F)?I$"), 1388 (instregex "EVSRW(I)?(S|U)$"), 1389 (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"), 1390 (instregex "EVSUBF(S|U)(M|S)IAAW$"), 1391 (instregex "EVSUB(I)?FW$") 1392)> { let Unsupported = 1; } 1393 1394// General Instructions without scheduling support. 1395def : InstRW<[], 1396 (instrs 1397 (instregex "(H)?RFI(D)?$"), 1398 (instregex "DSS(ALL)?$"), 1399 (instregex "DST(ST)?(T)?(64)?$"), 1400 (instregex "ICBL(C|Q)$"), 1401 (instregex "L(W|H|B)EPX$"), 1402 (instregex "ST(W|H|B)EPX$"), 1403 (instregex "(L|ST)FDEPX$"), 1404 (instregex "M(T|F)SR(IN)?$"), 1405 (instregex "M(T|F)DCR$"), 1406 (instregex "NOP_GT_PWR(6|7)$"), 1407 (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"), 1408 (instregex "WRTEE(I)?$"), 1409 ATTN, 1410 CLRBHRB, 1411 MFBHRBE, 1412 MBAR, 1413 MSYNC, 1414 SLBSYNC, 1415 SLBFEEo, 1416 NAP, 1417 STOP, 1418 TRAP, 1419 RFCI, 1420 RFDI, 1421 RFMCI, 1422 SC, 1423 DCBA, 1424 DCBI, 1425 DCCCI, 1426 ICCCI 1427)> { let Unsupported = 1; } 1428