xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td (revision 63f537551380d2dab29fa402ad1269feae17e594)
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLEQVOnes,
68    XXLNAND,
69    XXLNOR,
70    XXLOR,
71    XXLORf,
72    XXLORC,
73    XXLXOR,
74    XXLXORdpz,
75    XXLXORspz,
76    XXLXORz,
77    XXSEL,
78    XSABSQP,
79    XSCPSGNQP,
80    XSIEXPQP,
81    XSNABSQP,
82    XSNEGQP,
83    XSXEXPQP
84)>;
85
86// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
87// single slice. However, since it is Restricted, it requires all 3 dispatches
88// (DISP) for that superslice.
89def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
90      (instrs
91    (instregex "TABORT(D|W)C(I)?$"),
92    (instregex "MTFSB(0|1)$"),
93    (instregex "MFFSC(D)?RN(I)?$"),
94    (instregex "CMPRB(8)?$"),
95    (instregex "TD(I)?$"),
96    (instregex "TW(I)?$"),
97    (instregex "FCMP(O|U)(S|D)$"),
98    (instregex "XSTSTDC(S|D)P$"),
99    FTDIV,
100    FTSQRT,
101    CMPEQB
102)>;
103
104// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
105def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
106      (instrs
107    (instregex "XSMAX(C|J)?DP$"),
108    (instregex "XSMIN(C|J)?DP$"),
109    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
110    (instregex "CNT(L|T)Z(D|W)(8)?(_rec)?$"),
111    (instregex "POPCNT(D|W)$"),
112    (instregex "CMPB(8)?$"),
113    (instregex "SETB(8)?$"),
114    XSTDIVDP,
115    XSTSQRTDP,
116    XSXSIGDP,
117    XSCVSPDPN,
118    BPERMD
119)>;
120
121// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
122def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
123      (instrs
124    (instregex "S(L|R)D$"),
125    (instregex "SRAD(I)?$"),
126    (instregex "EXTSWSLI_32_64$"),
127    (instregex "MFV(S)?RD$"),
128    (instregex "MTV(S)?RD$"),
129    (instregex "MTV(S)?RW(A|Z)$"),
130    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
131    (instregex "CMP(L)?D(I)?$"),
132    (instregex "SUBF(I)?C(8)?(O)?$"),
133    (instregex "ANDI(S)?(8)?(_rec)?$"),
134    (instregex "ADDC(8)?(O)?$"),
135    (instregex "ADDIC(8)?(_rec)?$"),
136    (instregex "ADD(8|4)(O)?(_rec)?$"),
137    (instregex "ADD(E|ME|ZE)(8)?(O)?(_rec)?$"),
138    (instregex "SUBF(E|ME|ZE)?(8)?(O)?(_rec)?$"),
139    (instregex "NEG(8)?(O)?(_rec)?$"),
140    (instregex "POPCNTB$"),
141    (instregex "POPCNTB8$"),
142    (instregex "ADD(I|IS)?(8)?$"),
143    (instregex "LI(S)?(8)?$"),
144    (instregex "(X)?OR(I|IS)?(8)?(_rec)?$"),
145    (instregex "NAND(8)?(_rec)?$"),
146    (instregex "AND(C)?(8)?(_rec)?$"),
147    (instregex "NOR(8)?(_rec)?$"),
148    (instregex "OR(C)?(8)?(_rec)?$"),
149    (instregex "EQV(8)?(_rec)?$"),
150    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(_rec)?$"),
151    (instregex "ADD(4|8)(TLS)?(_)?$"),
152    (instregex "NEG(8)?(O)?$"),
153    (instregex "ADDI(S)?toc(HA|L)(8)?$"),
154    (instregex "LA(8)?$"),
155    COPY,
156    MCRF,
157    MCRXRX,
158    XSNABSDP,
159    XSNABSDPs,
160    XSXEXPDP,
161    XSABSDP,
162    XSNEGDP,
163    XSCPSGNDP,
164    MFVSRWZ,
165    MFVRWZ,
166    EXTSWSLI,
167    SRADI_32,
168    RLDIC,
169    RFEBB,
170    TBEGIN,
171    TRECHKPT,
172    NOP,
173    WAIT
174)>;
175
176// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
177// single slice. However, since it is Restricted, it requires all 3 dispatches
178// (DISP) for that superslice.
179def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
180      (instrs
181    (instregex "RLDC(L|R)$"),
182    (instregex "RLWIMI(8)?$"),
183    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
184    (instregex "M(F|T)OCRF(8)?$"),
185    (instregex "CR(6)?(UN)?SET$"),
186    (instregex "CR(N)?(OR|AND)(C)?$"),
187    (instregex "S(L|R)W(8)?$"),
188    (instregex "RLW(INM|NM)(8)?$"),
189    (instregex "F(N)?ABS(D|S)$"),
190    (instregex "FNEG(D|S)$"),
191    (instregex "FCPSGN(D|S)$"),
192    (instregex "SRAW(I)?$"),
193    (instregex "ISEL(8)?$"),
194    RLDIMI,
195    XSIEXPDP,
196    FMR,
197    CREQV,
198    CRNOT,
199    CRXOR,
200    TRECLAIM,
201    TSR,
202    TABORT
203)>;
204
205// Three cycle ALU vector operation that uses an entire superslice.
206// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
207// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
208def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
209      (instrs
210    (instregex "M(T|F)VSCR$"),
211    (instregex "VCMPNEZ(B|H|W)$"),
212    (instregex "VCMPEQU(B|H|W|D)$"),
213    (instregex "VCMPNE(B|H|W)$"),
214    (instregex "VABSDU(B|H|W)$"),
215    (instregex "VADDU(B|H|W)S$"),
216    (instregex "VAVG(S|U)(B|H|W)$"),
217    (instregex "VCMP(EQ|GE|GT)FP(_rec)?$"),
218    (instregex "VCMPBFP(_rec)?$"),
219    (instregex "VC(L|T)Z(B|H|W|D)$"),
220    (instregex "VADDS(B|H|W)S$"),
221    (instregex "V(MIN|MAX)FP$"),
222    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
223    VBPERMD,
224    VADDCUW,
225    VPOPCNTW,
226    VPOPCNTD,
227    VPRTYBD,
228    VPRTYBW,
229    VSHASIGMAD,
230    VSHASIGMAW,
231    VSUBSBS,
232    VSUBSHS,
233    VSUBSWS,
234    VSUBUBS,
235    VSUBUHS,
236    VSUBUWS,
237    VSUBCUW,
238    VCMPGTSB,
239    VCMPGTSB_rec,
240    VCMPGTSD,
241    VCMPGTSD_rec,
242    VCMPGTSH,
243    VCMPGTSH_rec,
244    VCMPGTSW,
245    VCMPGTSW_rec,
246    VCMPGTUB,
247    VCMPGTUB_rec,
248    VCMPGTUD,
249    VCMPGTUD_rec,
250    VCMPGTUH,
251    VCMPGTUH_rec,
252    VCMPGTUW,
253    VCMPGTUW_rec,
254    VCMPNEB_rec,
255    VCMPNEH_rec,
256    VCMPNEW_rec,
257    VCMPNEZB_rec,
258    VCMPNEZH_rec,
259    VCMPNEZW_rec,
260    VCMPEQUB_rec,
261    VCMPEQUD_rec,
262    VCMPEQUH_rec,
263    VCMPEQUW_rec,
264    XVCMPEQDP,
265    XVCMPEQDP_rec,
266    XVCMPEQSP,
267    XVCMPEQSP_rec,
268    XVCMPGEDP,
269    XVCMPGEDP_rec,
270    XVCMPGESP,
271    XVCMPGESP_rec,
272    XVCMPGTDP,
273    XVCMPGTDP_rec,
274    XVCMPGTSP,
275    XVCMPGTSP_rec,
276    XVMAXDP,
277    XVMAXSP,
278    XVMINDP,
279    XVMINSP,
280    XVTDIVDP,
281    XVTDIVSP,
282    XVTSQRTDP,
283    XVTSQRTSP,
284    XVTSTDCDP,
285    XVTSTDCSP,
286    XVXSIGDP,
287    XVXSIGSP
288)>;
289
290// 7 cycle DP vector operation that uses an entire superslice.
291// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
292// EXECO) and all three dispatches (DISP) to the given superslice.
293def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
294      (instrs
295    VADDFP,
296    VCTSXS,
297    VCTSXS_0,
298    VCTUXS,
299    VCTUXS_0,
300    VEXPTEFP,
301    VLOGEFP,
302    VMADDFP,
303    VMHADDSHS,
304    VNMSUBFP,
305    VREFP,
306    VRFIM,
307    VRFIN,
308    VRFIP,
309    VRFIZ,
310    VRSQRTEFP,
311    VSUBFP,
312    XVADDDP,
313    XVADDSP,
314    XVCVDPSP,
315    XVCVDPSXDS,
316    XVCVDPSXWS,
317    XVCVDPUXDS,
318    XVCVDPUXWS,
319    XVCVHPSP,
320    XVCVSPDP,
321    XVCVSPHP,
322    XVCVSPSXDS,
323    XVCVSPSXWS,
324    XVCVSPUXDS,
325    XVCVSPUXWS,
326    XVCVSXDDP,
327    XVCVSXDSP,
328    XVCVSXWDP,
329    XVCVSXWSP,
330    XVCVUXDDP,
331    XVCVUXDSP,
332    XVCVUXWDP,
333    XVCVUXWSP,
334    XVMADDADP,
335    XVMADDASP,
336    XVMADDMDP,
337    XVMADDMSP,
338    XVMSUBADP,
339    XVMSUBASP,
340    XVMSUBMDP,
341    XVMSUBMSP,
342    XVMULDP,
343    XVMULSP,
344    XVNMADDADP,
345    XVNMADDASP,
346    XVNMADDMDP,
347    XVNMADDMSP,
348    XVNMSUBADP,
349    XVNMSUBASP,
350    XVNMSUBMDP,
351    XVNMSUBMSP,
352    XVRDPI,
353    XVRDPIC,
354    XVRDPIM,
355    XVRDPIP,
356    XVRDPIZ,
357    XVREDP,
358    XVRESP,
359    XVRSPI,
360    XVRSPIC,
361    XVRSPIM,
362    XVRSPIP,
363    XVRSPIZ,
364    XVRSQRTEDP,
365    XVRSQRTESP,
366    XVSUBDP,
367    XVSUBSP,
368    VCFSX,
369    VCFSX_0,
370    VCFUX,
371    VCFUX_0,
372    VMHRADDSHS,
373    VMLADDUHM,
374    VMSUMMBM,
375    VMSUMSHM,
376    VMSUMSHS,
377    VMSUMUBM,
378    VMSUMUHM,
379    VMSUMUDM,
380    VMSUMUHS,
381    VMULESB,
382    VMULESH,
383    VMULESW,
384    VMULEUB,
385    VMULEUH,
386    VMULEUW,
387    VMULOSB,
388    VMULOSH,
389    VMULOSW,
390    VMULOUB,
391    VMULOUH,
392    VMULOUW,
393    VMULUWM,
394    VSUM2SWS,
395    VSUM4SBS,
396    VSUM4SHS,
397    VSUM4UBS,
398    VSUMSWS
399)>;
400
401// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
402// dispatch units for the superslice.
403def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
404      (instrs
405    (instregex "MADD(HD|HDU|LD|LD8)$"),
406    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?(O)?$")
407)>;
408
409// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
410// dispatch units for the superslice.
411def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
412      (instrs
413    FRSP,
414    (instregex "FRI(N|P|Z|M)(D|S)$"),
415    (instregex "FRE(S)?$"),
416    (instregex "FADD(S)?$"),
417    (instregex "FMSUB(S)?$"),
418    (instregex "FMADD(S)?$"),
419    (instregex "FSUB(S)?$"),
420    (instregex "FCFID(U)?(S)?$"),
421    (instregex "FCTID(U)?(Z)?$"),
422    (instregex "FCTIW(U)?(Z)?$"),
423    (instregex "FRSQRTE(S)?$"),
424    FNMADDS,
425    FNMADD,
426    FNMSUBS,
427    FNMSUB,
428    FSELD,
429    FSELS,
430    FMULS,
431    FMUL,
432    XSMADDADP,
433    XSMADDASP,
434    XSMADDMDP,
435    XSMADDMSP,
436    XSMSUBADP,
437    XSMSUBASP,
438    XSMSUBMDP,
439    XSMSUBMSP,
440    XSMULDP,
441    XSMULSP,
442    XSNMADDADP,
443    XSNMADDASP,
444    XSNMADDMDP,
445    XSNMADDMSP,
446    XSNMSUBADP,
447    XSNMSUBASP,
448    XSNMSUBMDP,
449    XSNMSUBMSP
450)>;
451
452// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
453// These operations can be done in parallel. The DP is restricted so we need a
454// full 4 dispatches.
455def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
456              DISP_3SLOTS_1C, DISP_1C],
457      (instrs
458    (instregex "FSEL(D|S)_rec$")
459)>;
460
461// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
462def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
463              DISP_3SLOTS_1C, DISP_1C],
464      (instrs
465    (instregex "MUL(H|L)(D|W)(U)?(O)?_rec$")
466)>;
467
468// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
469// These operations must be done sequentially.The DP is restricted so we need a
470// full 4 dispatches.
471def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
472              DISP_3SLOTS_1C, DISP_1C],
473      (instrs
474    (instregex "FRI(N|P|Z|M)(D|S)_rec$"),
475    (instregex "FRE(S)?_rec$"),
476    (instregex "FADD(S)?_rec$"),
477    (instregex "FSUB(S)?_rec$"),
478    (instregex "F(N)?MSUB(S)?_rec$"),
479    (instregex "F(N)?MADD(S)?_rec$"),
480    (instregex "FCFID(U)?(S)?_rec$"),
481    (instregex "FCTID(U)?(Z)?_rec$"),
482    (instregex "FCTIW(U)?(Z)?_rec$"),
483    (instregex "FMUL(S)?_rec$"),
484    (instregex "FRSQRTE(S)?_rec$"),
485    FRSP_rec
486)>;
487
488// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
489def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
490      (instrs
491    XSADDDP,
492    XSADDSP,
493    XSCVDPHP,
494    XSCVDPSP,
495    XSCVDPSXDS,
496    XSCVDPSXDSs,
497    XSCVDPSXWS,
498    XSCVDPUXDS,
499    XSCVDPUXDSs,
500    XSCVDPUXWS,
501    XSCVDPSXWSs,
502    XSCVDPUXWSs,
503    XSCVHPDP,
504    XSCVSPDP,
505    XSCVSXDDP,
506    XSCVSXDSP,
507    XSCVUXDDP,
508    XSCVUXDSP,
509    XSRDPI,
510    XSRDPIC,
511    XSRDPIM,
512    XSRDPIP,
513    XSRDPIZ,
514    XSREDP,
515    XSRESP,
516    XSRSQRTEDP,
517    XSRSQRTESP,
518    XSSUBDP,
519    XSSUBSP,
520    XSCVDPSPN,
521    XSRSP
522)>;
523
524// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
525// superslice. That includes both exec pipelines (EXECO, EXECE) and one
526// dispatch.
527def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
528      (instrs
529    (instregex "LVS(L|R)$"),
530    (instregex "VSPLTIS(W|H|B)$"),
531    (instregex "VSPLT(W|H|B)(s)?$"),
532    (instregex "V_SETALLONES(B|H)?$"),
533    (instregex "VEXTRACTU(B|H|W)$"),
534    (instregex "VINSERT(B|H|W|D)$"),
535    MFVSRLD,
536    MTVSRWS,
537    VBPERMQ,
538    VCLZLSBB,
539    VCTZLSBB,
540    VEXTRACTD,
541    VEXTUBLX,
542    VEXTUBRX,
543    VEXTUHLX,
544    VEXTUHRX,
545    VEXTUWLX,
546    VEXTUWRX,
547    VGBBD,
548    VMRGHB,
549    VMRGHH,
550    VMRGHW,
551    VMRGLB,
552    VMRGLH,
553    VMRGLW,
554    VPERM,
555    VPERMR,
556    VPERMXOR,
557    VPKPX,
558    VPKSDSS,
559    VPKSDUS,
560    VPKSHSS,
561    VPKSHUS,
562    VPKSWSS,
563    VPKSWUS,
564    VPKUDUM,
565    VPKUDUS,
566    VPKUHUM,
567    VPKUHUS,
568    VPKUWUM,
569    VPKUWUS,
570    VPRTYBQ,
571    VSL,
572    VSLDOI,
573    VSLO,
574    VSLV,
575    VSR,
576    VSRO,
577    VSRV,
578    VUPKHPX,
579    VUPKHSB,
580    VUPKHSH,
581    VUPKHSW,
582    VUPKLPX,
583    VUPKLSB,
584    VUPKLSH,
585    VUPKLSW,
586    XXBRD,
587    XXBRH,
588    XXBRQ,
589    XXBRW,
590    XXEXTRACTUW,
591    XXINSERTW,
592    XXMRGHW,
593    XXMRGLW,
594    XXPERM,
595    XXPERMR,
596    XXSLDWI,
597    XXSLDWIs,
598    XXSPLTIB,
599    XXSPLTW,
600    XXSPLTWs,
601    XXPERMDI,
602    XXPERMDIs,
603    VADDCUQ,
604    VADDECUQ,
605    VADDEUQM,
606    VADDUQM,
607    VMUL10CUQ,
608    VMUL10ECUQ,
609    VMUL10EUQ,
610    VMUL10UQ,
611    VSUBCUQ,
612    VSUBECUQ,
613    VSUBEUQM,
614    VSUBUQM,
615    XSCMPEXPQP,
616    XSCMPOQP,
617    XSCMPUQP,
618    XSTSTDCQP,
619    XSXSIGQP,
620    BCDCFN_rec,
621    BCDCFZ_rec,
622    BCDCPSGN_rec,
623    BCDCTN_rec,
624    BCDCTZ_rec,
625    BCDSETSGN_rec,
626    BCDS_rec,
627    BCDTRUNC_rec,
628    BCDUS_rec,
629    BCDUTRUNC_rec,
630    BCDADD_rec,
631    BCDSUB_rec
632)>;
633
634// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
635// superslice. That includes both exec pipelines (EXECO, EXECE) and one
636// dispatch.
637def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
638      (instrs
639    BCDSR_rec,
640    XSADDQP,
641    XSADDQPO,
642    XSCVDPQP,
643    XSCVQPDP,
644    XSCVQPDPO,
645    XSCVQPSDZ,
646    XSCVQPSWZ,
647    XSCVQPUDZ,
648    XSCVQPUWZ,
649    XSCVSDQP,
650    XSCVUDQP,
651    XSRQPI,
652    XSRQPIX,
653    XSRQPXP,
654    XSSUBQP,
655    XSSUBQPO
656)>;
657
658// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
659// superslice. That includes both exec pipelines (EXECO, EXECE) and one
660// dispatch.
661def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
662      (instrs
663    BCDCTSQ_rec
664)>;
665
666// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
667// superslice. That includes both exec pipelines (EXECO, EXECE) and one
668// dispatch.
669def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
670      (instrs
671    XSMADDQP,
672    XSMADDQPO,
673    XSMSUBQP,
674    XSMSUBQPO,
675    XSMULQP,
676    XSMULQPO,
677    XSNMADDQP,
678    XSNMADDQPO,
679    XSNMSUBQP,
680    XSNMSUBQPO
681)>;
682
683// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
684// superslice. That includes both exec pipelines (EXECO, EXECE) and one
685// dispatch.
686def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
687      (instrs
688    BCDCFSQ_rec
689)>;
690
691// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
692// superslice. That includes both exec pipelines (EXECO, EXECE) and one
693// dispatch.
694def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
695      (instrs
696    XSDIVQP,
697    XSDIVQPO
698)>;
699
700// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
701// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
702// dispatches.
703def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
704      (instrs
705    XSSQRTQP,
706    XSSQRTQPO
707)>;
708
709// 6 Cycle Load uses a single slice.
710def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
711      (instrs
712    (instregex "LXVL(L)?")
713)>;
714
715// 5 Cycle Load uses a single slice.
716def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
717      (instrs
718    (instregex "LVE(B|H|W)X$"),
719    (instregex "LVX(L)?"),
720    (instregex "LXSI(B|H)ZX$"),
721    LXSDX,
722    LXVB16X,
723    LXVD2X,
724    LXVWSX,
725    LXSIWZX,
726    LXV,
727    LXVX,
728    LXSD,
729    DFLOADf64,
730    XFLOADf64,
731    LIWZX
732)>;
733
734// 4 Cycle Load uses a single slice.
735def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
736      (instrs
737    (instregex "DCB(F|T|ST)(EP)?$"),
738    (instregex "DCBZ(L)?(EP)?$"),
739    (instregex "DCBTST(EP)?$"),
740    (instregex "CP_COPY(8)?$"),
741    (instregex "ICBI(EP)?$"),
742    (instregex "ICBT(LS)?$"),
743    (instregex "LBARX(L)?$"),
744    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
745    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
746    (instregex "LH(A|B)RX(L)?(8)?$"),
747    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
748    (instregex "LWARX(L)?$"),
749    (instregex "LWBRX(8)?$"),
750    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
751    CP_ABORT,
752    DARN,
753    EnforceIEIO,
754    ISYNC,
755    MSGSYNC,
756    TLBSYNC,
757    SYNC,
758    LMW,
759    LSWI
760)>;
761
762// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
763// superslice.
764def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
765      (instrs
766    LFIWZX,
767    LFDX,
768    LFD
769)>;
770
771// Cracked Load Instructions.
772// Load instructions that can be done in parallel.
773def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
774              DISP_PAIR_1C],
775      (instrs
776    SLBIA,
777    SLBIE,
778    SLBMFEE,
779    SLBMFEV,
780    SLBMTE,
781    TLBIEL
782)>;
783
784// Cracked Load Instruction.
785// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
786// operations can be run in parallel.
787def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
788              DISP_PAIR_1C, DISP_PAIR_1C],
789      (instrs
790    (instregex "L(W|H)ZU(X)?(8)?$")
791)>;
792
793// Cracked TEND Instruction.
794// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
795// operations can be run in parallel.
796def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
797              DISP_1C, DISP_1C],
798      (instrs
799    TEND
800)>;
801
802
803// Cracked Store Instruction
804// Consecutive Store and ALU instructions. The store is restricted and requires
805// three dispatches.
806def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
807              DISP_3SLOTS_1C, DISP_1C],
808      (instrs
809    (instregex "ST(B|H|W|D)CX$")
810)>;
811
812// Cracked Load instruction.
813// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
814// operations cannot be done at the same time and so their latencies are added.
815def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
816              DISP_1C, DISP_1C],
817      (instrs
818    (instregex "LHA(X)?(8)?$"),
819    (instregex "CP_PASTE(8)?_rec$"),
820    (instregex "LWA(X)?(_32)?$"),
821    TCHECK
822)>;
823
824// Cracked Restricted Load instruction.
825// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
826// operations cannot be done at the same time and so their latencies are added.
827// Full 6 dispatches are required as this is both cracked and restricted.
828def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
829              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
830      (instrs
831    LFIWAX
832)>;
833
834// Cracked Load instruction.
835// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
836// operations cannot be done at the same time and so their latencies are added.
837// Full 4 dispatches are required as this is a cracked instruction.
838def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
839      (instrs
840    LXSIWAX,
841    LIWAX
842)>;
843
844// Cracked Load instruction.
845// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
846// cycles. The Load and ALU operations cannot be done at the same time and so
847// their latencies are added.
848// Full 6 dispatches are required as this is a restricted instruction.
849def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
850              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
851      (instrs
852    LFSX,
853    LFS
854)>;
855
856// Cracked Load instruction.
857// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
858// operations cannot be done at the same time and so their latencies are added.
859// Full 4 dispatches are required as this is a cracked instruction.
860def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
861      (instrs
862    LXSSP,
863    LXSSPX,
864    XFLOADf32,
865    DFLOADf32
866)>;
867
868// Cracked 3-Way Load Instruction
869// Load with two ALU operations that depend on each other
870def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
871              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
872      (instrs
873    (instregex "LHAU(X)?(8)?$"),
874    LWAUX
875)>;
876
877// Cracked Load that requires the PM resource.
878// Since the Load and the PM cannot be done at the same time the latencies are
879// added. Requires 8 cycles. Since the PM requires the full superslice we need
880// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
881// requires the remaining 1 dispatch.
882def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
883              DISP_1C, DISP_1C],
884      (instrs
885    LXVH8X,
886    LXVDSX,
887    LXVW4X
888)>;
889
890// Single slice Restricted store operation. The restricted operation requires
891// all three dispatches for the superslice.
892def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
893      (instrs
894    (instregex "STF(S|D|IWX|SX|DX)$"),
895    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
896    (instregex "STW(8)?$"),
897    (instregex "(D|X)FSTORE(f32|f64)$"),
898    (instregex "ST(W|H|D)BRX$"),
899    (instregex "ST(B|H|D)(8)?$"),
900    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
901    STIWX,
902    SLBIEG,
903    STMW,
904    STSWI,
905    TLBIE
906)>;
907
908// Vector Store Instruction
909// Requires the whole superslice and therefore requires one dispatch
910// as well as both the Even and Odd exec pipelines.
911def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
912      (instrs
913    (instregex "STVE(B|H|W)X$"),
914    (instregex "STVX(L)?$"),
915    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
916)>;
917
918// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
919// superslice. That includes both exec pipelines (EXECO, EXECE) and two
920// dispatches.
921def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
922      (instrs
923    (instregex "MTCTR(8)?(loop)?$"),
924    (instregex "MTLR(8)?$")
925)>;
926
927// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
928// superslice. That includes both exec pipelines (EXECO, EXECE) and two
929// dispatches.
930def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
931      (instrs
932    (instregex "M(T|F)VRSAVE(v)?$"),
933    (instregex "M(T|F)PMR$"),
934    (instregex "M(T|F)TB(8)?$"),
935    (instregex "MF(SPR|CTR|LR)(8)?$"),
936    (instregex "M(T|F)MSR(D)?$"),
937    (instregex "M(T|F)(U)?DSCR$"),
938    (instregex "MTSPR(8)?$")
939)>;
940
941// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
942// superslice. That includes both exec pipelines (EXECO, EXECE) and two
943// dispatches.
944def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
945      (instrs
946    DIVW,
947    DIVWO,
948    DIVWU,
949    DIVWUO,
950    MODSW
951)>;
952
953// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
954// superslice. That includes both exec pipelines (EXECO, EXECE) and two
955// dispatches.
956def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
957      (instrs
958    DIVWE,
959    DIVWEO,
960    DIVD,
961    DIVDO,
962    DIVWEU,
963    DIVWEUO,
964    DIVDU,
965    DIVDUO,
966    MODSD,
967    MODUD,
968    MODUW
969)>;
970
971// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
972// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
973// dispatches.
974def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
975      (instrs
976    DIVDE,
977    DIVDEO,
978    DIVDEU,
979    DIVDEUO
980)>;
981
982// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
983// and one full superslice for the DIV operation since there is only one DIV per
984// superslice. Latency of DIV plus ALU is 26.
985def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
986              DISP_EVEN_1C, DISP_1C],
987      (instrs
988    (instregex "DIVW(U)?(O)?_rec$")
989)>;
990
991// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
992// and one full superslice for the DIV operation since there is only one DIV per
993// superslice. Latency of DIV plus ALU is 26.
994def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
995              DISP_EVEN_1C, DISP_1C],
996      (instrs
997    DIVD_rec,
998    DIVDO_rec,
999    DIVDU_rec,
1000    DIVDUO_rec,
1001    DIVWE_rec,
1002    DIVWEO_rec,
1003    DIVWEU_rec,
1004    DIVWEUO_rec
1005)>;
1006
1007// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
1008// and one full superslice for the DIV operation since there is only one DIV per
1009// superslice. Latency of DIV plus ALU is 42.
1010def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
1011              DISP_EVEN_1C, DISP_1C],
1012      (instrs
1013    DIVDE_rec,
1014    DIVDEO_rec,
1015    DIVDEU_rec,
1016    DIVDEUO_rec
1017)>;
1018
1019// CR access instructions in _BrMCR, IIC_BrMCRX.
1020
1021// Cracked, restricted, ALU operations.
1022// Here the two ALU ops can actually be done in parallel and therefore the
1023// latencies are not added together. Otherwise this is like having two
1024// instructions running together on two pipelines and 6 dispatches. ALU ops are
1025// 2 cycles each.
1026def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1027              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1028      (instrs
1029    MTCRF,
1030    MTCRF8
1031)>;
1032
1033// Cracked ALU operations.
1034// Here the two ALU ops can actually be done in parallel and therefore the
1035// latencies are not added together. Otherwise this is like having two
1036// instructions running together on two pipelines and 2 dispatches. ALU ops are
1037// 2 cycles each.
1038def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1039              DISP_1C, DISP_1C],
1040      (instrs
1041    (instregex "ADDC(8)?(O)?_rec$"),
1042    (instregex "SUBFC(8)?(O)?_rec$")
1043)>;
1044
1045// Cracked ALU operations.
1046// Two ALU ops can be done in parallel.
1047// One is three cycle ALU the ohter is a two cycle ALU.
1048// One of the ALU ops is restricted the other is not so we have a total of
1049// 5 dispatches.
1050def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1051              DISP_3SLOTS_1C, DISP_1C],
1052      (instrs
1053    (instregex "F(N)?ABS(D|S)_rec$"),
1054    (instregex "FCPSGN(D|S)_rec$"),
1055    (instregex "FNEG(D|S)_rec$"),
1056    FMR_rec
1057)>;
1058
1059// Cracked ALU operations.
1060// Here the two ALU ops can actually be done in parallel and therefore the
1061// latencies are not added together. Otherwise this is like having two
1062// instructions running together on two pipelines and 2 dispatches.
1063// ALU ops are 3 cycles each.
1064def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1065              DISP_1C, DISP_1C],
1066      (instrs
1067    MCRFS
1068)>;
1069
1070// Cracked Restricted ALU operations.
1071// Here the two ALU ops can actually be done in parallel and therefore the
1072// latencies are not added together. Otherwise this is like having two
1073// instructions running together on two pipelines and 6 dispatches.
1074// ALU ops are 3 cycles each.
1075def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1076              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1077      (instrs
1078    (instregex "MTFSF(b|_rec)?$"),
1079    (instregex "MTFSFI(_rec)?$"),
1080    MTFSFIb
1081)>;
1082
1083// Cracked instruction made of two ALU ops.
1084// The two ops cannot be done in parallel.
1085// One of the ALU ops is restricted and takes 3 dispatches.
1086def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1087              DISP_3SLOTS_1C, DISP_1C],
1088      (instrs
1089    (instregex "RLD(I)?C(R|L)_rec$"),
1090    (instregex "RLW(IMI|INM|NM)(8)?_rec$"),
1091    (instregex "SLW(8)?_rec$"),
1092    (instregex "SRAW(I)?_rec$"),
1093    (instregex "SRW(8)?_rec$"),
1094    RLDICL_32_rec,
1095    RLDIMI_rec
1096)>;
1097
1098// Cracked instruction made of two ALU ops.
1099// The two ops cannot be done in parallel.
1100// Both of the ALU ops are restricted and take 3 dispatches.
1101def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1102              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1103      (instrs
1104    (instregex "MFFS(L|CE|_rec)?$")
1105)>;
1106
1107// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1108// total of 6 cycles. All of the ALU operations are also restricted so each
1109// takes 3 dispatches for a total of 9.
1110def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1111              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1112      (instrs
1113    (instregex "MFCR(8)?$")
1114)>;
1115
1116// Cracked instruction made of two ALU ops.
1117// The two ops cannot be done in parallel.
1118def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1119      (instrs
1120    (instregex "EXTSWSLI_32_64_rec$"),
1121    (instregex "SRAD(I)?_rec$"),
1122    EXTSWSLI_rec,
1123    SLD_rec,
1124    SRD_rec,
1125    RLDIC_rec
1126)>;
1127
1128// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1129def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1130      (instrs
1131    FDIV
1132)>;
1133
1134// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1135def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1136              DISP_3SLOTS_1C, DISP_1C],
1137      (instrs
1138    FDIV_rec
1139)>;
1140
1141// 36 Cycle DP Instruction.
1142// Instruction can be done on a single slice.
1143def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1144      (instrs
1145    XSSQRTDP
1146)>;
1147
1148// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1149def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1150      (instrs
1151    FSQRT
1152)>;
1153
1154// 36 Cycle DP Vector Instruction.
1155def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1156              DISP_1C],
1157      (instrs
1158    XVSQRTDP
1159)>;
1160
1161// 27 Cycle DP Vector Instruction.
1162def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1163              DISP_1C],
1164      (instrs
1165    XVSQRTSP
1166)>;
1167
1168// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1169def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1170              DISP_3SLOTS_1C, DISP_1C],
1171      (instrs
1172    FSQRT_rec
1173)>;
1174
1175// 26 Cycle DP Instruction.
1176def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1177      (instrs
1178    XSSQRTSP
1179)>;
1180
1181// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1182def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1183      (instrs
1184    FSQRTS
1185)>;
1186
1187// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1188def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1189              DISP_3SLOTS_1C, DISP_1C],
1190      (instrs
1191    FSQRTS_rec
1192)>;
1193
1194// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1195def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1196      (instrs
1197    XSDIVDP
1198)>;
1199
1200// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1201def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1202      (instrs
1203    FDIVS
1204)>;
1205
1206// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1207def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1208              DISP_3SLOTS_1C, DISP_1C],
1209      (instrs
1210    FDIVS_rec
1211)>;
1212
1213// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1214def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1215      (instrs
1216    XSDIVSP
1217)>;
1218
1219// 24 Cycle DP Vector Instruction. Takes one full superslice.
1220// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1221// superslice.
1222def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1223              DISP_1C],
1224      (instrs
1225    XVDIVSP
1226)>;
1227
1228// 33 Cycle DP Vector Instruction. Takes one full superslice.
1229// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1230// superslice.
1231def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1232              DISP_1C],
1233      (instrs
1234    XVDIVDP
1235)>;
1236
1237// Instruction cracked into three pieces. One Load and two ALU operations.
1238// The Load and one of the ALU ops cannot be run at the same time and so the
1239// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1240// Both the load and the ALU that depends on it are restricted and so they take
1241// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1242// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1243def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1244              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1245              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1246      (instrs
1247    (instregex "LF(SU|SUX)$")
1248)>;
1249
1250// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1251// the store and so it can be run at the same time as the store. The store is
1252// also restricted.
1253def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1254              DISP_3SLOTS_1C, DISP_1C],
1255      (instrs
1256    (instregex "STF(S|D)U(X)?$"),
1257    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1258)>;
1259
1260// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1261// the load and so it can be run at the same time as the load.
1262def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1263              DISP_PAIR_1C, DISP_PAIR_1C],
1264      (instrs
1265    (instregex "LBZU(X)?(8)?$"),
1266    (instregex "LDU(X)?$")
1267)>;
1268
1269// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1270// the load and so it can be run at the same time as the load. The load is also
1271// restricted. 3 dispatches are from the restricted load while the other two
1272// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1273// is required for the ALU.
1274def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1275              DISP_3SLOTS_1C, DISP_1C],
1276      (instrs
1277    (instregex "LF(DU|DUX)$")
1278)>;
1279
1280// Crypto Instructions
1281
1282// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1283// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1284// dispatch.
1285def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1286      (instrs
1287    (instregex "VPMSUM(B|H|W|D)$"),
1288    (instregex "V(N)?CIPHER(LAST)?$"),
1289    VSBOX
1290)>;
1291
1292// Branch Instructions
1293
1294// Two Cycle Branch
1295def : InstRW<[P9_BR_2C, DISP_BR_1C],
1296      (instrs
1297  (instregex "BCCCTR(L)?(8)?$"),
1298  (instregex "BCCL(A|R|RL)?$"),
1299  (instregex "BCCTR(L)?(8)?(n)?$"),
1300  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1301  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1302  (instregex "BL(_TLS|_NOP)?(_RM)?$"),
1303  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?(_RM)?$"),
1304  (instregex "BLA(8|8_NOP)?(_RM)?$"),
1305  (instregex "BLR(8|L)?$"),
1306  (instregex "TAILB(A)?(8)?$"),
1307  (instregex "TAILBCTR(8)?$"),
1308  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1309  (instregex "BCLR(L)?(n)?$"),
1310  (instregex "BCTR(L)?(8)?(_RM)?$"),
1311  B,
1312  BA,
1313  BC,
1314  BCC,
1315  BCCA,
1316  BCL,
1317  BCLalways,
1318  BCLn,
1319  BCTRL8_LDinto_toc,
1320  BCTRL_LWZinto_toc,
1321  BCTRL8_LDinto_toc_RM,
1322  BCTRL_LWZinto_toc_RM,
1323  BCn,
1324  CTRL_DEP
1325)>;
1326
1327// Five Cycle Branch with a 2 Cycle ALU Op
1328// Operations must be done consecutively and not in parallel.
1329def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1330      (instrs
1331    ADDPCIS
1332)>;
1333
1334// Special Extracted Instructions For Atomics
1335
1336// Atomic Load
1337def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1338              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1339              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1340              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1341      (instrs
1342    (instregex "L(D|W)AT$")
1343)>;
1344
1345// Atomic Store
1346def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1347              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1348      (instrs
1349    (instregex "ST(D|W)AT$")
1350)>;
1351
1352// Signal Processing Engine (SPE) Instructions
1353// These instructions are not supported on Power 9
1354def : InstRW<[],
1355    (instrs
1356  BRINC,
1357  EVABS,
1358  EVEQV,
1359  EVMRA,
1360  EVNAND,
1361  EVNEG,
1362  (instregex "EVADD(I)?W$"),
1363  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1364  (instregex "EVAND(C)?$"),
1365  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1366  (instregex "EVCNTL(S|Z)W$"),
1367  (instregex "EVDIVW(S|U)$"),
1368  (instregex "EVEXTS(B|H)$"),
1369  (instregex "EVLD(H|W|D)(X)?$"),
1370  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1371  (instregex "EVLWHE(X)?$"),
1372  (instregex "EVLWHO(S|U)(X)?$"),
1373  (instregex "EVLW(H|W)SPLAT(X)?$"),
1374  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1375  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1376  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1377  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1378  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1379  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1380  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1381  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1382  (instregex "EVMWHUMI(A)?$"),
1383  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1384  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1385  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1386  (instregex "EVMWSSF(A|AA|AN)?$"),
1387  (instregex "EVMWUMI(A|AA|AN)?$"),
1388  (instregex "EV(N|X)?OR(C)?$"),
1389  (instregex "EVR(LW|LWI|NDW)$"),
1390  (instregex "EVSLW(I)?$"),
1391  (instregex "EVSPLAT(F)?I$"),
1392  (instregex "EVSRW(I)?(S|U)$"),
1393  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1394  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1395  (instregex "EVSUB(I)?FW$")
1396)> { let Unsupported = 1; }
1397
1398// General Instructions without scheduling support.
1399def : InstRW<[],
1400    (instrs
1401  (instregex "(H)?RFI(D)?$"),
1402  (instregex "DSS(ALL)?$"),
1403  (instregex "DST(ST)?(T)?(64)?$"),
1404  (instregex "ICBL(C|Q)$"),
1405  (instregex "L(W|H|B)EPX$"),
1406  (instregex "ST(W|H|B)EPX$"),
1407  (instregex "(L|ST)FDEPX$"),
1408  (instregex "M(T|F)SR(IN)?$"),
1409  (instregex "M(T|F)DCR$"),
1410  (instregex "NOP_GT_PWR(6|7)$"),
1411  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1412  (instregex "WRTEE(I)?$"),
1413  (instregex "HASH(ST|STP|CHK|CHKP)(8)?$"),
1414  ATTN,
1415  CLRBHRB,
1416  MFBHRBE,
1417  MBAR,
1418  MSYNC,
1419  SLBSYNC,
1420  SLBFEE_rec,
1421  NAP,
1422  STOP,
1423  TRAP,
1424  RFCI,
1425  RFDI,
1426  RFMCI,
1427  SC,
1428  DCBA,
1429  DCBI,
1430  DCCCI,
1431  ICCCI,
1432  ADDEX,
1433  ADDEX8
1434)> { let Unsupported = 1; }
1435