xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/P9InstrResources.td (revision 47ce20aef1e636e601ef26a4bc7e05c64a000640)
1//===- P9InstrResources.td - P9 Instruction Resource Defs  -*- tablegen -*-==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the resources required by P9 instructions. This is part of
10// the P9 processor model used for instruction scheduling. This file should
11// contain all the instructions that may be used on Power 9. This is not
12// just instructions that are new on Power 9 but also instructions that were
13// available on earlier architectures and are still used in Power 9.
14//
15// The makeup of the P9 CPU is modeled as follows:
16//   - Each CPU is made up of two superslices.
17//   - Each superslice is made up of two slices. Therefore, there are 4 slices
18//   for each CPU.
19//   - Up to 6 instructions can be dispatched to each CPU. Three per superslice.
20//   - Each CPU has:
21//     - One CY (Crypto) unit P9_CY_*
22//     - One DFU (Decimal Floating Point and Quad Precision) unit P9_DFU_*
23//     - Two PM (Permute) units. One on each superslice. P9_PM_*
24//     - Two DIV (Fixed Point Divide) units. One on each superslize. P9_DIV_*
25//     - Four ALU (Fixed Point Arithmetic) units. One on each slice. P9_ALU_*
26//     - Four DP (Floating Point) units. One on each slice. P9_DP_*
27//       This also includes fixed point multiply add.
28//     - Four AGEN (Address Generation) units. One for each slice. P9_AGEN_*
29//     - Four Load/Store Queues. P9_LS_*
30//   - Each set of instructions will require a number of these resources.
31//===----------------------------------------------------------------------===//
32
33// Two cycle ALU vector operation that uses an entire superslice.
34// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
35// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
36def : InstRW<[P9_ALUE_2C, P9_ALUO_2C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
37      (instrs
38    (instregex "VADDU(B|H|W|D)M$"),
39    (instregex "VAND(C)?$"),
40    (instregex "VEXTS(B|H|W)2(D|W)(s)?$"),
41    (instregex "V_SET0(B|H)?$"),
42    (instregex "VS(R|L)(B|H|W|D)$"),
43    (instregex "VSUBU(B|H|W|D)M$"),
44    (instregex "VPOPCNT(B|H)$"),
45    (instregex "VRL(B|H|W|D)$"),
46    (instregex "VSRA(B|H|W|D)$"),
47    (instregex "XV(N)?ABS(D|S)P$"),
48    (instregex "XVCPSGN(D|S)P$"),
49    (instregex "XV(I|X)EXP(D|S)P$"),
50    (instregex "VRL(D|W)(MI|NM)$"),
51    (instregex "VMRG(E|O)W$"),
52    MTVSRDD,
53    VEQV,
54    VNAND,
55    VNEGD,
56    VNEGW,
57    VNOR,
58    VOR,
59    VORC,
60    VSEL,
61    VXOR,
62    XVNEGDP,
63    XVNEGSP,
64    XXLAND,
65    XXLANDC,
66    XXLEQV,
67    XXLNAND,
68    XXLNOR,
69    XXLOR,
70    XXLORf,
71    XXLORC,
72    XXLXOR,
73    XXLXORdpz,
74    XXLXORspz,
75    XXLXORz,
76    XXSEL,
77    XSABSQP,
78    XSCPSGNQP,
79    XSIEXPQP,
80    XSNABSQP,
81    XSNEGQP,
82    XSXEXPQP
83)>;
84
85// Restricted Dispatch ALU operation for 3 cycles. The operation runs on a
86// single slice. However, since it is Restricted, it requires all 3 dispatches
87// (DISP) for that superslice.
88def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_3SLOTS_1C],
89      (instrs
90    (instregex "TABORT(D|W)C(I)?$"),
91    (instregex "MTFSB(0|1)$"),
92    (instregex "MFFSC(D)?RN(I)?$"),
93    (instregex "CMPRB(8)?$"),
94    (instregex "TD(I)?$"),
95    (instregex "TW(I)?$"),
96    (instregex "FCMPU(S|D)$"),
97    (instregex "XSTSTDC(S|D)P$"),
98    FTDIV,
99    FTSQRT,
100    CMPEQB
101)>;
102
103// Standard Dispatch ALU operation for 3 cycles. Only one slice used.
104def : InstRW<[P9_ALU_3C, IP_EXEC_1C, DISP_1C],
105      (instrs
106    (instregex "XSMAX(C|J)?DP$"),
107    (instregex "XSMIN(C|J)?DP$"),
108    (instregex "XSCMP(EQ|EXP|GE|GT|O|U)DP$"),
109    (instregex "CNT(L|T)Z(D|W)(8)?(o)?$"),
110    (instregex "POPCNT(D|W)$"),
111    (instregex "CMPB(8)?$"),
112    (instregex "SETB(8)?$"),
113    XSTDIVDP,
114    XSTSQRTDP,
115    XSXSIGDP,
116    XSCVSPDPN,
117    BPERMD
118)>;
119
120// Standard Dispatch ALU operation for 2 cycles. Only one slice used.
121def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_1C],
122      (instrs
123    (instregex "S(L|R)D$"),
124    (instregex "SRAD(I)?$"),
125    (instregex "EXTSWSLI_32_64$"),
126    (instregex "MFV(S)?RD$"),
127    (instregex "MTVSRD$"),
128    (instregex "MTVSRW(A|Z)$"),
129    (instregex "CMP(WI|LWI|W|LW)(8)?$"),
130    (instregex "CMP(L)?D(I)?$"),
131    (instregex "SUBF(I)?C(8)?$"),
132    (instregex "ANDI(S)?o(8)?$"),
133    (instregex "ADDC(8)?$"),
134    (instregex "ADDIC(8)?(o)?$"),
135    (instregex "ADD(8|4)(o)?$"),
136    (instregex "ADD(E|ME|ZE)(8)?(o)?$"),
137    (instregex "SUBF(E|ME|ZE)?(8)?(o)?$"),
138    (instregex "NEG(8)?(o)?$"),
139    (instregex "POPCNTB$"),
140    (instregex "ADD(I|IS)?(8)?$"),
141    (instregex "LI(S)?(8)?$"),
142    (instregex "(X)?OR(I|IS)?(8)?(o)?$"),
143    (instregex "NAND(8)?(o)?$"),
144    (instregex "AND(C)?(8)?(o)?$"),
145    (instregex "NOR(8)?(o)?$"),
146    (instregex "OR(C)?(8)?(o)?$"),
147    (instregex "EQV(8)?(o)?$"),
148    (instregex "EXTS(B|H|W)(8)?(_32)?(_64)?(o)?$"),
149    (instregex "ADD(4|8)(TLS)?(_)?$"),
150    (instregex "NEG(8)?$"),
151    (instregex "ADDI(S)?toc(HA|L)$"),
152    COPY,
153    MCRF,
154    MCRXRX,
155    XSNABSDP,
156    XSXEXPDP,
157    XSABSDP,
158    XSNEGDP,
159    XSCPSGNDP,
160    MFVSRWZ,
161    EXTSWSLI,
162    SRADI_32,
163    RLDIC,
164    RFEBB,
165    LA,
166    TBEGIN,
167    TRECHKPT,
168    NOP,
169    WAIT
170)>;
171
172// Restricted Dispatch ALU operation for 2 cycles. The operation runs on a
173// single slice. However, since it is Restricted, it requires all 3 dispatches
174// (DISP) for that superslice.
175def : InstRW<[P9_ALU_2C, IP_EXEC_1C, DISP_3SLOTS_1C],
176      (instrs
177    (instregex "RLDC(L|R)$"),
178    (instregex "RLWIMI(8)?$"),
179    (instregex "RLDIC(L|R)(_32)?(_64)?$"),
180    (instregex "M(F|T)OCRF(8)?$"),
181    (instregex "CR(6)?(UN)?SET$"),
182    (instregex "CR(N)?(OR|AND)(C)?$"),
183    (instregex "S(L|R)W(8)?$"),
184    (instregex "RLW(INM|NM)(8)?$"),
185    (instregex "F(N)?ABS(D|S)$"),
186    (instregex "FNEG(D|S)$"),
187    (instregex "FCPSGN(D|S)$"),
188    (instregex "SRAW(I)?$"),
189    (instregex "ISEL(8)?$"),
190    RLDIMI,
191    XSIEXPDP,
192    FMR,
193    CREQV,
194    CRXOR,
195    TRECLAIM,
196    TSR,
197    TABORT
198)>;
199
200// Three cycle ALU vector operation that uses an entire superslice.
201// Uses both ALU units (the even ALUE and odd ALUO units), two pipelines
202// (EXECE, EXECO) and 1 dispatch (DISP) to the given superslice.
203def : InstRW<[P9_ALUE_3C, P9_ALUO_3C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
204      (instrs
205    (instregex "M(T|F)VSCR$"),
206    (instregex "VCMPNEZ(B|H|W)$"),
207    (instregex "VCMPEQU(B|H|W|D)$"),
208    (instregex "VCMPNE(B|H|W)$"),
209    (instregex "VABSDU(B|H|W)$"),
210    (instregex "VADDU(B|H|W)S$"),
211    (instregex "VAVG(S|U)(B|H|W)$"),
212    (instregex "VCMP(EQ|GE|GT)FP(o)?$"),
213    (instregex "VCMPBFP(o)?$"),
214    (instregex "VC(L|T)Z(B|H|W|D)$"),
215    (instregex "VADDS(B|H|W)S$"),
216    (instregex "V(MIN|MAX)FP$"),
217    (instregex "V(MIN|MAX)(S|U)(B|H|W|D)$"),
218    VBPERMD,
219    VADDCUW,
220    VPOPCNTW,
221    VPOPCNTD,
222    VPRTYBD,
223    VPRTYBW,
224    VSHASIGMAD,
225    VSHASIGMAW,
226    VSUBSBS,
227    VSUBSHS,
228    VSUBSWS,
229    VSUBUBS,
230    VSUBUHS,
231    VSUBUWS,
232    VSUBCUW,
233    VCMPGTSB,
234    VCMPGTSBo,
235    VCMPGTSD,
236    VCMPGTSDo,
237    VCMPGTSH,
238    VCMPGTSHo,
239    VCMPGTSW,
240    VCMPGTSWo,
241    VCMPGTUB,
242    VCMPGTUBo,
243    VCMPGTUD,
244    VCMPGTUDo,
245    VCMPGTUH,
246    VCMPGTUHo,
247    VCMPGTUW,
248    VCMPGTUWo,
249    VCMPNEBo,
250    VCMPNEHo,
251    VCMPNEWo,
252    VCMPNEZBo,
253    VCMPNEZHo,
254    VCMPNEZWo,
255    VCMPEQUBo,
256    VCMPEQUDo,
257    VCMPEQUHo,
258    VCMPEQUWo,
259    XVCMPEQDP,
260    XVCMPEQDPo,
261    XVCMPEQSP,
262    XVCMPEQSPo,
263    XVCMPGEDP,
264    XVCMPGEDPo,
265    XVCMPGESP,
266    XVCMPGESPo,
267    XVCMPGTDP,
268    XVCMPGTDPo,
269    XVCMPGTSP,
270    XVCMPGTSPo,
271    XVMAXDP,
272    XVMAXSP,
273    XVMINDP,
274    XVMINSP,
275    XVTDIVDP,
276    XVTDIVSP,
277    XVTSQRTDP,
278    XVTSQRTSP,
279    XVTSTDCDP,
280    XVTSTDCSP,
281    XVXSIGDP,
282    XVXSIGSP
283)>;
284
285// 7 cycle DP vector operation that uses an entire superslice.
286// Uses both DP units (the even DPE and odd DPO units), two pipelines (EXECE,
287// EXECO) and all three dispatches (DISP) to the given superslice.
288def : InstRW<[P9_DPE_7C, P9_DPO_7C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
289      (instrs
290    VADDFP,
291    VCTSXS,
292    VCTSXS_0,
293    VCTUXS,
294    VCTUXS_0,
295    VEXPTEFP,
296    VLOGEFP,
297    VMADDFP,
298    VMHADDSHS,
299    VNMSUBFP,
300    VREFP,
301    VRFIM,
302    VRFIN,
303    VRFIP,
304    VRFIZ,
305    VRSQRTEFP,
306    VSUBFP,
307    XVADDDP,
308    XVADDSP,
309    XVCVDPSP,
310    XVCVDPSXDS,
311    XVCVDPSXWS,
312    XVCVDPUXDS,
313    XVCVDPUXWS,
314    XVCVHPSP,
315    XVCVSPDP,
316    XVCVSPHP,
317    XVCVSPSXDS,
318    XVCVSPSXWS,
319    XVCVSPUXDS,
320    XVCVSPUXWS,
321    XVCVSXDDP,
322    XVCVSXDSP,
323    XVCVSXWDP,
324    XVCVSXWSP,
325    XVCVUXDDP,
326    XVCVUXDSP,
327    XVCVUXWDP,
328    XVCVUXWSP,
329    XVMADDADP,
330    XVMADDASP,
331    XVMADDMDP,
332    XVMADDMSP,
333    XVMSUBADP,
334    XVMSUBASP,
335    XVMSUBMDP,
336    XVMSUBMSP,
337    XVMULDP,
338    XVMULSP,
339    XVNMADDADP,
340    XVNMADDASP,
341    XVNMADDMDP,
342    XVNMADDMSP,
343    XVNMSUBADP,
344    XVNMSUBASP,
345    XVNMSUBMDP,
346    XVNMSUBMSP,
347    XVRDPI,
348    XVRDPIC,
349    XVRDPIM,
350    XVRDPIP,
351    XVRDPIZ,
352    XVREDP,
353    XVRESP,
354    XVRSPI,
355    XVRSPIC,
356    XVRSPIM,
357    XVRSPIP,
358    XVRSPIZ,
359    XVRSQRTEDP,
360    XVRSQRTESP,
361    XVSUBDP,
362    XVSUBSP,
363    VCFSX,
364    VCFSX_0,
365    VCFUX,
366    VCFUX_0,
367    VMHRADDSHS,
368    VMLADDUHM,
369    VMSUMMBM,
370    VMSUMSHM,
371    VMSUMSHS,
372    VMSUMUBM,
373    VMSUMUHM,
374    VMSUMUHS,
375    VMULESB,
376    VMULESH,
377    VMULESW,
378    VMULEUB,
379    VMULEUH,
380    VMULEUW,
381    VMULOSB,
382    VMULOSH,
383    VMULOSW,
384    VMULOUB,
385    VMULOUH,
386    VMULOUW,
387    VMULUWM,
388    VSUM2SWS,
389    VSUM4SBS,
390    VSUM4SHS,
391    VSUM4UBS,
392    VSUMSWS
393)>;
394
395// 5 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
396// dispatch units for the superslice.
397def : InstRW<[P9_DP_5C, IP_EXEC_1C, DISP_3SLOTS_1C],
398      (instrs
399    (instregex "MADD(HD|HDU|LD|LD8)$"),
400    (instregex "MUL(HD|HW|LD|LI|LI8|LW)(U)?$")
401)>;
402
403// 7 cycle Restricted DP operation. One DP unit, one EXEC pipeline and all three
404// dispatch units for the superslice.
405def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_3SLOTS_1C],
406      (instrs
407    FRSP,
408    (instregex "FRI(N|P|Z|M)(D|S)$"),
409    (instregex "FRE(S)?$"),
410    (instregex "FADD(S)?$"),
411    (instregex "FMSUB(S)?$"),
412    (instregex "FMADD(S)?$"),
413    (instregex "FSUB(S)?$"),
414    (instregex "FCFID(U)?(S)?$"),
415    (instregex "FCTID(U)?(Z)?$"),
416    (instregex "FCTIW(U)?(Z)?$"),
417    (instregex "FRSQRTE(S)?$"),
418    FNMADDS,
419    FNMADD,
420    FNMSUBS,
421    FNMSUB,
422    FSELD,
423    FSELS,
424    FMULS,
425    FMUL,
426    XSMADDADP,
427    XSMADDASP,
428    XSMADDMDP,
429    XSMADDMSP,
430    XSMSUBADP,
431    XSMSUBASP,
432    XSMSUBMDP,
433    XSMSUBMSP,
434    XSMULDP,
435    XSMULSP,
436    XSNMADDADP,
437    XSNMADDASP,
438    XSNMADDMDP,
439    XSNMADDMSP,
440    XSNMSUBADP,
441    XSNMSUBASP,
442    XSNMSUBMDP,
443    XSNMSUBMSP
444)>;
445
446// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
447// These operations can be done in parallel. The DP is restricted so we need a
448// full 4 dispatches.
449def : InstRW<[P9_DP_7C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
450              DISP_3SLOTS_1C, DISP_1C],
451      (instrs
452    (instregex "FSEL(D|S)o$")
453)>;
454
455// 5 Cycle Restricted DP operation and one 2 cycle ALU operation.
456def : InstRW<[P9_DPOpAndALUOp_7C, IP_EXEC_1C, IP_EXEC_1C,
457              DISP_3SLOTS_1C, DISP_1C],
458      (instrs
459    (instregex "MUL(H|L)(D|W)(U)?o$")
460)>;
461
462// 7 cycle Restricted DP operation and one 3 cycle ALU operation.
463// These operations must be done sequentially.The DP is restricted so we need a
464// full 4 dispatches.
465def : InstRW<[P9_DPOpAndALU2Op_10C, IP_EXEC_1C, IP_EXEC_1C,
466              DISP_3SLOTS_1C, DISP_1C],
467      (instrs
468    (instregex "FRI(N|P|Z|M)(D|S)o$"),
469    (instregex "FRE(S)?o$"),
470    (instregex "FADD(S)?o$"),
471    (instregex "FSUB(S)?o$"),
472    (instregex "F(N)?MSUB(S)?o$"),
473    (instregex "F(N)?MADD(S)?o$"),
474    (instregex "FCFID(U)?(S)?o$"),
475    (instregex "FCTID(U)?(Z)?o$"),
476    (instregex "FCTIW(U)?(Z)?o$"),
477    (instregex "FMUL(S)?o$"),
478    (instregex "FRSQRTE(S)?o$"),
479    FRSPo
480)>;
481
482// 7 cycle DP operation. One DP unit, one EXEC pipeline and 1 dispatch units.
483def : InstRW<[P9_DP_7C, IP_EXEC_1C, DISP_1C],
484      (instrs
485    XSADDDP,
486    XSADDSP,
487    XSCVDPHP,
488    XSCVDPSP,
489    XSCVDPSXDS,
490    XSCVDPSXDSs,
491    XSCVDPSXWS,
492    XSCVDPUXDS,
493    XSCVDPUXDSs,
494    XSCVDPUXWS,
495    XSCVDPSXWSs,
496    XSCVDPUXWSs,
497    XSCVHPDP,
498    XSCVSPDP,
499    XSCVSXDDP,
500    XSCVSXDSP,
501    XSCVUXDDP,
502    XSCVUXDSP,
503    XSRDPI,
504    XSRDPIC,
505    XSRDPIM,
506    XSRDPIP,
507    XSRDPIZ,
508    XSREDP,
509    XSRESP,
510    XSRSQRTEDP,
511    XSRSQRTESP,
512    XSSUBDP,
513    XSSUBSP,
514    XSCVDPSPN,
515    XSRSP
516)>;
517
518// Three Cycle PM operation. Only one PM unit per superslice so we use the whole
519// superslice. That includes both exec pipelines (EXECO, EXECE) and one
520// dispatch.
521def : InstRW<[P9_PM_3C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
522      (instrs
523    (instregex "LVS(L|R)$"),
524    (instregex "VSPLTIS(W|H|B)$"),
525    (instregex "VSPLT(W|H|B)(s)?$"),
526    (instregex "V_SETALLONES(B|H)?$"),
527    (instregex "VEXTRACTU(B|H|W)$"),
528    (instregex "VINSERT(B|H|W|D)$"),
529    MFVSRLD,
530    MTVSRWS,
531    VBPERMQ,
532    VCLZLSBB,
533    VCTZLSBB,
534    VEXTRACTD,
535    VEXTUBLX,
536    VEXTUBRX,
537    VEXTUHLX,
538    VEXTUHRX,
539    VEXTUWLX,
540    VEXTUWRX,
541    VGBBD,
542    VMRGHB,
543    VMRGHH,
544    VMRGHW,
545    VMRGLB,
546    VMRGLH,
547    VMRGLW,
548    VPERM,
549    VPERMR,
550    VPERMXOR,
551    VPKPX,
552    VPKSDSS,
553    VPKSDUS,
554    VPKSHSS,
555    VPKSHUS,
556    VPKSWSS,
557    VPKSWUS,
558    VPKUDUM,
559    VPKUDUS,
560    VPKUHUM,
561    VPKUHUS,
562    VPKUWUM,
563    VPKUWUS,
564    VPRTYBQ,
565    VSL,
566    VSLDOI,
567    VSLO,
568    VSLV,
569    VSR,
570    VSRO,
571    VSRV,
572    VUPKHPX,
573    VUPKHSB,
574    VUPKHSH,
575    VUPKHSW,
576    VUPKLPX,
577    VUPKLSB,
578    VUPKLSH,
579    VUPKLSW,
580    XXBRD,
581    XXBRH,
582    XXBRQ,
583    XXBRW,
584    XXEXTRACTUW,
585    XXINSERTW,
586    XXMRGHW,
587    XXMRGLW,
588    XXPERM,
589    XXPERMR,
590    XXSLDWI,
591    XXSLDWIs,
592    XXSPLTIB,
593    XXSPLTW,
594    XXSPLTWs,
595    XXPERMDI,
596    XXPERMDIs,
597    VADDCUQ,
598    VADDECUQ,
599    VADDEUQM,
600    VADDUQM,
601    VMUL10CUQ,
602    VMUL10ECUQ,
603    VMUL10EUQ,
604    VMUL10UQ,
605    VSUBCUQ,
606    VSUBECUQ,
607    VSUBEUQM,
608    VSUBUQM,
609    XSCMPEXPQP,
610    XSCMPOQP,
611    XSCMPUQP,
612    XSTSTDCQP,
613    XSXSIGQP,
614    BCDCFNo,
615    BCDCFZo,
616    BCDCPSGNo,
617    BCDCTNo,
618    BCDCTZo,
619    BCDSETSGNo,
620    BCDSo,
621    BCDTRUNCo,
622    BCDUSo,
623    BCDUTRUNCo
624)>;
625
626// 12 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
627// superslice. That includes both exec pipelines (EXECO, EXECE) and one
628// dispatch.
629def : InstRW<[P9_DFU_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
630      (instrs
631    BCDSRo,
632    XSADDQP,
633    XSADDQPO,
634    XSCVDPQP,
635    XSCVQPDP,
636    XSCVQPDPO,
637    XSCVQPSDZ,
638    XSCVQPSWZ,
639    XSCVQPUDZ,
640    XSCVQPUWZ,
641    XSCVSDQP,
642    XSCVUDQP,
643    XSRQPI,
644    XSRQPIX,
645    XSRQPXP,
646    XSSUBQP,
647    XSSUBQPO
648)>;
649
650// 23 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
651// superslice. That includes both exec pipelines (EXECO, EXECE) and one
652// dispatch.
653def : InstRW<[P9_DFU_23C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
654      (instrs
655    BCDCTSQo
656)>;
657
658// 24 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
659// superslice. That includes both exec pipelines (EXECO, EXECE) and one
660// dispatch.
661def : InstRW<[P9_DFU_24C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
662      (instrs
663    XSMADDQP,
664    XSMADDQPO,
665    XSMSUBQP,
666    XSMSUBQPO,
667    XSMULQP,
668    XSMULQPO,
669    XSNMADDQP,
670    XSNMADDQPO,
671    XSNMSUBQP,
672    XSNMSUBQPO
673)>;
674
675// 37 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
676// superslice. That includes both exec pipelines (EXECO, EXECE) and one
677// dispatch.
678def : InstRW<[P9_DFU_37C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
679      (instrs
680    BCDCFSQo
681)>;
682
683// 58 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
684// superslice. That includes both exec pipelines (EXECO, EXECE) and one
685// dispatch.
686def : InstRW<[P9_DFU_58C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
687      (instrs
688    XSDIVQP,
689    XSDIVQPO
690)>;
691
692// 76 Cycle DFU operation. Only one DFU unit per CPU so we use a whole
693// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
694// dispatches.
695def : InstRW<[P9_DFU_76C, IP_EXECE_1C, IP_EXECO_1C, DISP_1C],
696      (instrs
697    XSSQRTQP,
698    XSSQRTQPO
699)>;
700
701// 6 Cycle Load uses a single slice.
702def : InstRW<[P9_LS_6C, IP_AGEN_1C, DISP_1C],
703      (instrs
704    (instregex "LXVL(L)?")
705)>;
706
707// 5 Cycle Load uses a single slice.
708def : InstRW<[P9_LS_5C, IP_AGEN_1C, DISP_1C],
709      (instrs
710    (instregex "LVE(B|H|W)X$"),
711    (instregex "LVX(L)?"),
712    (instregex "LXSI(B|H)ZX$"),
713    LXSDX,
714    LXVB16X,
715    LXVD2X,
716    LXVWSX,
717    LXSIWZX,
718    LXV,
719    LXVX,
720    LXSD,
721    DFLOADf64,
722    XFLOADf64,
723    LIWZX
724)>;
725
726// 4 Cycle Load uses a single slice.
727def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_1C],
728      (instrs
729    (instregex "DCB(F|T|ST)(EP)?$"),
730    (instregex "DCBZ(L)?(EP)?$"),
731    (instregex "DCBTST(EP)?$"),
732    (instregex "CP_COPY(8)?$"),
733    (instregex "CP_PASTE(8)?$"),
734    (instregex "ICBI(EP)?$"),
735    (instregex "ICBT(LS)?$"),
736    (instregex "LBARX(L)?$"),
737    (instregex "LBZ(CIX|8|X|X8|XTLS|XTLS_32)?(_)?$"),
738    (instregex "LD(ARX|ARXL|BRX|CIX|X|XTLS)?(_)?$"),
739    (instregex "LH(A|B)RX(L)?(8)?$"),
740    (instregex "LHZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
741    (instregex "LWARX(L)?$"),
742    (instregex "LWBRX(8)?$"),
743    (instregex "LWZ(8|CIX|X|X8|XTLS|XTLS_32)?(_)?$"),
744    CP_ABORT,
745    DARN,
746    EnforceIEIO,
747    ISYNC,
748    MSGSYNC,
749    TLBSYNC,
750    SYNC,
751    LMW,
752    LSWI
753)>;
754
755// 4 Cycle Restricted load uses a single slice but the dispatch for the whole
756// superslice.
757def : InstRW<[P9_LS_4C, IP_AGEN_1C, DISP_3SLOTS_1C],
758      (instrs
759    LFIWZX,
760    LFDX,
761    LFD
762)>;
763
764// Cracked Load Instructions.
765// Load instructions that can be done in parallel.
766def : InstRW<[P9_LS_4C, P9_LS_4C, IP_AGEN_1C, IP_AGEN_1C,
767              DISP_PAIR_1C],
768      (instrs
769    SLBIA,
770    SLBIE,
771    SLBMFEE,
772    SLBMFEV,
773    SLBMTE,
774    TLBIEL
775)>;
776
777// Cracked Load Instruction.
778// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
779// operations can be run in parallel.
780def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
781              DISP_PAIR_1C, DISP_PAIR_1C],
782      (instrs
783    (instregex "L(W|H)ZU(X)?(8)?$")
784)>;
785
786// Cracked TEND Instruction.
787// Requires Load and ALU pieces totaling 6 cycles. The Load and ALU
788// operations can be run in parallel.
789def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_EXEC_1C, IP_AGEN_1C,
790              DISP_1C, DISP_1C],
791      (instrs
792    TEND
793)>;
794
795
796// Cracked Store Instruction
797// Consecutive Store and ALU instructions. The store is restricted and requires
798// three dispatches.
799def : InstRW<[P9_StoreAndALUOp_3C, IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C,
800              DISP_3SLOTS_1C, DISP_1C],
801      (instrs
802    (instregex "ST(B|H|W|D)CX$")
803)>;
804
805// Cracked Load Instruction.
806// Two consecutive load operations for a total of 8 cycles.
807def : InstRW<[P9_LoadAndLoadOp_8C, IP_AGEN_1C, IP_AGEN_1C,
808              DISP_1C, DISP_1C],
809      (instrs
810    LDMX
811)>;
812
813// Cracked Load instruction.
814// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
815// operations cannot be done at the same time and so their latencies are added.
816def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
817              DISP_1C, DISP_1C],
818      (instrs
819    (instregex "LHA(X)?(8)?$"),
820    (instregex "CP_PASTE(8)?o$"),
821    (instregex "LWA(X)?(_32)?$"),
822    TCHECK
823)>;
824
825// Cracked Restricted Load instruction.
826// Requires consecutive Load and ALU pieces totaling 6 cycles. The Load and ALU
827// operations cannot be done at the same time and so their latencies are added.
828// Full 6 dispatches are required as this is both cracked and restricted.
829def : InstRW<[P9_LoadAndALUOp_6C, IP_EXEC_1C, IP_AGEN_1C,
830              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
831      (instrs
832    LFIWAX
833)>;
834
835// Cracked Load instruction.
836// Requires consecutive Load and ALU pieces totaling 7 cycles. The Load and ALU
837// operations cannot be done at the same time and so their latencies are added.
838// Full 4 dispatches are required as this is a cracked instruction.
839def : InstRW<[P9_LoadAndALUOp_7C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
840      (instrs
841    LXSIWAX,
842    LIWAX
843)>;
844
845// Cracked Load instruction.
846// Requires consecutive Load (4 cycles) and ALU (3 cycles) pieces totaling 7
847// cycles. The Load and ALU operations cannot be done at the same time and so
848// their latencies are added.
849// Full 6 dispatches are required as this is a restricted instruction.
850def : InstRW<[P9_LoadAndALU2Op_7C, IP_AGEN_1C, IP_EXEC_1C,
851              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
852      (instrs
853    LFSX,
854    LFS
855)>;
856
857// Cracked Load instruction.
858// Requires consecutive Load and ALU pieces totaling 8 cycles. The Load and ALU
859// operations cannot be done at the same time and so their latencies are added.
860// Full 4 dispatches are required as this is a cracked instruction.
861def : InstRW<[P9_LoadAndALU2Op_8C, IP_AGEN_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
862      (instrs
863    LXSSP,
864    LXSSPX,
865    XFLOADf32,
866    DFLOADf32
867)>;
868
869// Cracked 3-Way Load Instruction
870// Load with two ALU operations that depend on each other
871def : InstRW<[P9_LoadAndALUOp_6C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
872              DISP_PAIR_1C, DISP_PAIR_1C, DISP_1C],
873      (instrs
874    (instregex "LHAU(X)?(8)?$"),
875    LWAUX
876)>;
877
878// Cracked Load that requires the PM resource.
879// Since the Load and the PM cannot be done at the same time the latencies are
880// added. Requires 8 cycles. Since the PM requires the full superslice we need
881// both EXECE, EXECO pipelines as well as 1 dispatch for the PM. The Load
882// requires the remaining 1 dispatch.
883def : InstRW<[P9_LoadAndPMOp_8C, IP_AGEN_1C, IP_EXECE_1C, IP_EXECO_1C,
884              DISP_1C, DISP_1C],
885      (instrs
886    LXVH8X,
887    LXVDSX,
888    LXVW4X
889)>;
890
891// Single slice Restricted store operation. The restricted operation requires
892// all three dispatches for the superslice.
893def : InstRW<[P9_LS_1C, IP_EXEC_1C, IP_AGEN_1C, DISP_3SLOTS_1C],
894      (instrs
895    (instregex "STF(S|D|IWX|SX|DX)$"),
896    (instregex "STXS(D|DX|SPX|IWX|IBX|IHX|SP)(v)?$"),
897    (instregex "STW(8)?$"),
898    (instregex "(D|X)FSTORE(f32|f64)$"),
899    (instregex "ST(W|H|D)BRX$"),
900    (instregex "ST(B|H|D)(8)?$"),
901    (instregex "ST(B|W|H|D)(CI)?X(TLS|TLS_32)?(8)?(_)?$"),
902    STIWX,
903    SLBIEG,
904    STMW,
905    STSWI,
906    TLBIE
907)>;
908
909// Vector Store Instruction
910// Requires the whole superslice and therefore requires one dispatch
911// as well as both the Even and Odd exec pipelines.
912def : InstRW<[P9_LS_1C, IP_EXECE_1C, IP_EXECO_1C, IP_AGEN_1C, DISP_1C],
913      (instrs
914    (instregex "STVE(B|H|W)X$"),
915    (instregex "STVX(L)?$"),
916    (instregex "STXV(B16X|H8X|W4X|D2X|L|LL|X)?$")
917)>;
918
919// 5 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
920// superslice. That includes both exec pipelines (EXECO, EXECE) and two
921// dispatches.
922def : InstRW<[P9_DIV_5C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
923      (instrs
924    (instregex "MTCTR(8)?(loop)?$"),
925    (instregex "MTLR(8)?$")
926)>;
927
928// 12 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
929// superslice. That includes both exec pipelines (EXECO, EXECE) and two
930// dispatches.
931def : InstRW<[P9_DIV_12C, IP_EXECE_1C, IP_EXECO_1C, DISP_EVEN_1C],
932      (instrs
933    (instregex "M(T|F)VRSAVE(v)?$"),
934    (instregex "M(T|F)PMR$"),
935    (instregex "M(T|F)TB(8)?$"),
936    (instregex "MF(SPR|CTR|LR)(8)?$"),
937    (instregex "M(T|F)MSR(D)?$"),
938    (instregex "MTSPR(8)?$")
939)>;
940
941// 16 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
942// superslice. That includes both exec pipelines (EXECO, EXECE) and two
943// dispatches.
944def : InstRW<[P9_DIV_16C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
945      (instrs
946    DIVW,
947    DIVWU,
948    MODSW
949)>;
950
951// 24 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
952// superslice. That includes both exec pipelines (EXECO, EXECE) and two
953// dispatches.
954def : InstRW<[P9_DIV_24C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
955      (instrs
956    DIVWE,
957    DIVD,
958    DIVWEU,
959    DIVDU,
960    MODSD,
961    MODUD,
962    MODUW
963)>;
964
965// 40 Cycle DIV operation. Only one DIV unit per superslice so we use the whole
966// superslice. That includes both exec pipelines (EXECO, EXECE) and all three
967// dispatches.
968def : InstRW<[P9_DIV_40C_8, IP_EXECO_1C, IP_EXECE_1C, DISP_EVEN_1C],
969      (instrs
970    DIVDE,
971    DIVDEU
972)>;
973
974// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
975// and one full superslice for the DIV operation since there is only one DIV per
976// superslice. Latency of DIV plus ALU is 26.
977def : InstRW<[P9_IntDivAndALUOp_18C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
978              DISP_EVEN_1C, DISP_1C],
979      (instrs
980    (instregex "DIVW(U)?(O)?o$")
981)>;
982
983// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
984// and one full superslice for the DIV operation since there is only one DIV per
985// superslice. Latency of DIV plus ALU is 26.
986def : InstRW<[P9_IntDivAndALUOp_26C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
987              DISP_EVEN_1C, DISP_1C],
988      (instrs
989    DIVDo,
990    DIVDUo,
991    DIVWEo,
992    DIVWEUo
993)>;
994
995// Cracked DIV and ALU operation. Requires one full slice for the ALU operation
996// and one full superslice for the DIV operation since there is only one DIV per
997// superslice. Latency of DIV plus ALU is 42.
998def : InstRW<[P9_IntDivAndALUOp_42C_8, IP_EXECE_1C, IP_EXECO_1C, IP_EXEC_1C,
999              DISP_EVEN_1C, DISP_1C],
1000      (instrs
1001    DIVDEo,
1002    DIVDEUo
1003)>;
1004
1005// CR access instructions in _BrMCR, IIC_BrMCRX.
1006
1007// Cracked, restricted, ALU operations.
1008// Here the two ALU ops can actually be done in parallel and therefore the
1009// latencies are not added together. Otherwise this is like having two
1010// instructions running together on two pipelines and 6 dispatches. ALU ops are
1011// 2 cycles each.
1012def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1013              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1014      (instrs
1015    MTCRF,
1016    MTCRF8
1017)>;
1018
1019// Cracked ALU operations.
1020// Here the two ALU ops can actually be done in parallel and therefore the
1021// latencies are not added together. Otherwise this is like having two
1022// instructions running together on two pipelines and 2 dispatches. ALU ops are
1023// 2 cycles each.
1024def : InstRW<[P9_ALU_2C, P9_ALU_2C, IP_EXEC_1C, IP_EXEC_1C,
1025              DISP_1C, DISP_1C],
1026      (instrs
1027    (instregex "ADDC(8)?o$"),
1028    (instregex "SUBFC(8)?o$")
1029)>;
1030
1031// Cracked ALU operations.
1032// Two ALU ops can be done in parallel.
1033// One is three cycle ALU the ohter is a two cycle ALU.
1034// One of the ALU ops is restricted the other is not so we have a total of
1035// 5 dispatches.
1036def : InstRW<[P9_ALU_2C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1037              DISP_3SLOTS_1C, DISP_1C],
1038      (instrs
1039    (instregex "F(N)?ABS(D|S)o$"),
1040    (instregex "FCPSGN(D|S)o$"),
1041    (instregex "FNEG(D|S)o$"),
1042    FMRo
1043)>;
1044
1045// Cracked ALU operations.
1046// Here the two ALU ops can actually be done in parallel and therefore the
1047// latencies are not added together. Otherwise this is like having two
1048// instructions running together on two pipelines and 2 dispatches.
1049// ALU ops are 3 cycles each.
1050def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1051              DISP_1C, DISP_1C],
1052      (instrs
1053    MCRFS
1054)>;
1055
1056// Cracked Restricted ALU operations.
1057// Here the two ALU ops can actually be done in parallel and therefore the
1058// latencies are not added together. Otherwise this is like having two
1059// instructions running together on two pipelines and 6 dispatches.
1060// ALU ops are 3 cycles each.
1061def : InstRW<[P9_ALU_3C, P9_ALU_3C, IP_EXEC_1C, IP_EXEC_1C,
1062              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1063      (instrs
1064    (instregex "MTFSF(b|o)?$"),
1065    (instregex "MTFSFI(o)?$")
1066)>;
1067
1068// Cracked instruction made of two ALU ops.
1069// The two ops cannot be done in parallel.
1070// One of the ALU ops is restricted and takes 3 dispatches.
1071def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C,
1072              DISP_3SLOTS_1C, DISP_1C],
1073      (instrs
1074    (instregex "RLD(I)?C(R|L)o$"),
1075    (instregex "RLW(IMI|INM|NM)(8)?o$"),
1076    (instregex "SLW(8)?o$"),
1077    (instregex "SRAW(I)?o$"),
1078    (instregex "SRW(8)?o$"),
1079    RLDICL_32o,
1080    RLDIMIo
1081)>;
1082
1083// Cracked instruction made of two ALU ops.
1084// The two ops cannot be done in parallel.
1085// Both of the ALU ops are restricted and take 3 dispatches.
1086def : InstRW<[P9_ALU2OpAndALU2Op_6C, IP_EXEC_1C, IP_EXEC_1C,
1087              DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1088      (instrs
1089    (instregex "MFFS(L|CE|o)?$")
1090)>;
1091
1092// Cracked ALU instruction composed of three consecutive 2 cycle loads for a
1093// total of 6 cycles. All of the ALU operations are also restricted so each
1094// takes 3 dispatches for a total of 9.
1095def : InstRW<[P9_ALUOpAndALUOpAndALUOp_6C, IP_EXEC_1C, IP_EXEC_1C, IP_EXEC_1C,
1096              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_3SLOTS_1C],
1097      (instrs
1098    (instregex "MFCR(8)?$")
1099)>;
1100
1101// Cracked instruction made of two ALU ops.
1102// The two ops cannot be done in parallel.
1103def : InstRW<[P9_ALUOpAndALUOp_4C, IP_EXEC_1C, IP_EXEC_1C, DISP_1C, DISP_1C],
1104      (instrs
1105    (instregex "EXTSWSLI_32_64o$"),
1106    (instregex "SRAD(I)?o$"),
1107    EXTSWSLIo,
1108    SLDo,
1109    SRDo,
1110    RLDICo
1111)>;
1112
1113// 33 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1114def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_3SLOTS_1C],
1115      (instrs
1116    FDIV
1117)>;
1118
1119// 33 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1120def : InstRW<[P9_DPOpAndALU2Op_36C_8, IP_EXEC_1C, IP_EXEC_1C,
1121              DISP_3SLOTS_1C, DISP_1C],
1122      (instrs
1123    FDIVo
1124)>;
1125
1126// 36 Cycle DP Instruction.
1127// Instruction can be done on a single slice.
1128def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_1C],
1129      (instrs
1130    XSSQRTDP
1131)>;
1132
1133// 36 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1134def : InstRW<[P9_DP_36C_10, IP_EXEC_1C, DISP_3SLOTS_1C],
1135      (instrs
1136    FSQRT
1137)>;
1138
1139// 36 Cycle DP Vector Instruction.
1140def : InstRW<[P9_DPE_36C_10, P9_DPO_36C_10, IP_EXECE_1C, IP_EXECO_1C,
1141              DISP_1C],
1142      (instrs
1143    XVSQRTDP
1144)>;
1145
1146// 27 Cycle DP Vector Instruction.
1147def : InstRW<[P9_DPE_27C_10, P9_DPO_27C_10, IP_EXECE_1C, IP_EXECO_1C,
1148              DISP_1C],
1149      (instrs
1150    XVSQRTSP
1151)>;
1152
1153// 36 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1154def : InstRW<[P9_DPOpAndALU2Op_39C_10, IP_EXEC_1C, IP_EXEC_1C,
1155              DISP_3SLOTS_1C, DISP_1C],
1156      (instrs
1157    FSQRTo
1158)>;
1159
1160// 26 Cycle DP Instruction.
1161def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_1C],
1162      (instrs
1163    XSSQRTSP
1164)>;
1165
1166// 26 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1167def : InstRW<[P9_DP_26C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1168      (instrs
1169    FSQRTS
1170)>;
1171
1172// 26 Cycle DP Instruction Restricted and Cracked with 3 Cycle ALU.
1173def : InstRW<[P9_DPOpAndALU2Op_29C_5, IP_EXEC_1C, IP_EXEC_1C,
1174              DISP_3SLOTS_1C, DISP_1C],
1175      (instrs
1176    FSQRTSo
1177)>;
1178
1179// 33 Cycle DP Instruction. Takes one slice and 1 dispatch.
1180def : InstRW<[P9_DP_33C_8, IP_EXEC_1C, DISP_1C],
1181      (instrs
1182    XSDIVDP
1183)>;
1184
1185// 22 Cycle DP Instruction Restricted. Takes one slice and 3 dispatches.
1186def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_3SLOTS_1C],
1187      (instrs
1188    FDIVS
1189)>;
1190
1191// 22 Cycle DP Instruction Restricted and Cracked with 2 Cycle ALU.
1192def : InstRW<[P9_DPOpAndALU2Op_25C_5, IP_EXEC_1C, IP_EXEC_1C,
1193              DISP_3SLOTS_1C, DISP_1C],
1194      (instrs
1195    FDIVSo
1196)>;
1197
1198// 22 Cycle DP Instruction. Takes one slice and 1 dispatch.
1199def : InstRW<[P9_DP_22C_5, IP_EXEC_1C, DISP_1C],
1200      (instrs
1201    XSDIVSP
1202)>;
1203
1204// 24 Cycle DP Vector Instruction. Takes one full superslice.
1205// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1206// superslice.
1207def : InstRW<[P9_DPE_24C_8, P9_DPO_24C_8, IP_EXECE_1C, IP_EXECO_1C,
1208              DISP_1C],
1209      (instrs
1210    XVDIVSP
1211)>;
1212
1213// 33 Cycle DP Vector Instruction. Takes one full superslice.
1214// Includes both EXECE, EXECO pipelines and 1 dispatch for the given
1215// superslice.
1216def : InstRW<[P9_DPE_33C_8, P9_DPO_33C_8, IP_EXECE_1C, IP_EXECO_1C,
1217              DISP_1C],
1218      (instrs
1219    XVDIVDP
1220)>;
1221
1222// Instruction cracked into three pieces. One Load and two ALU operations.
1223// The Load and one of the ALU ops cannot be run at the same time and so the
1224// latencies are added together for 6 cycles. The remainaing ALU is 2 cycles.
1225// Both the load and the ALU that depends on it are restricted and so they take
1226// a total of 7 dispatches. The final 2 dispatches come from the second ALU op.
1227// The two EXEC pipelines are for the 2 ALUs while the AGEN is for the load.
1228def : InstRW<[P9_LoadAndALU2Op_7C, P9_ALU_2C,
1229              IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1230              DISP_3SLOTS_1C, DISP_3SLOTS_1C, DISP_1C],
1231      (instrs
1232    (instregex "LF(SU|SUX)$")
1233)>;
1234
1235// Cracked instruction made up of a Store and an ALU. The ALU does not depend on
1236// the store and so it can be run at the same time as the store. The store is
1237// also restricted.
1238def : InstRW<[P9_LS_1C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C, IP_EXEC_1C,
1239              DISP_3SLOTS_1C, DISP_1C],
1240      (instrs
1241    (instregex "STF(S|D)U(X)?$"),
1242    (instregex "ST(B|H|W|D)U(X)?(8)?$")
1243)>;
1244
1245// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1246// the load and so it can be run at the same time as the load.
1247def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1248              DISP_PAIR_1C, DISP_PAIR_1C],
1249      (instrs
1250    (instregex "LBZU(X)?(8)?$"),
1251    (instregex "LDU(X)?$")
1252)>;
1253
1254// Cracked instruction made up of a Load and an ALU. The ALU does not depend on
1255// the load and so it can be run at the same time as the load. The load is also
1256// restricted. 3 dispatches are from the restricted load while the other two
1257// are from the ALU. The AGEN pipeline is from the load and the EXEC pipeline
1258// is required for the ALU.
1259def : InstRW<[P9_LS_4C, P9_ALU_2C, IP_AGEN_1C, IP_EXEC_1C,
1260              DISP_3SLOTS_1C, DISP_1C],
1261      (instrs
1262    (instregex "LF(DU|DUX)$")
1263)>;
1264
1265// Crypto Instructions
1266
1267// 6 Cycle CY operation. Only one CY unit per CPU so we use a whole
1268// superslice. That includes both exec pipelines (EXECO, EXECE) and one
1269// dispatch.
1270def : InstRW<[P9_CY_6C, IP_EXECO_1C, IP_EXECE_1C, DISP_1C],
1271      (instrs
1272    (instregex "VPMSUM(B|H|W|D)$"),
1273    (instregex "V(N)?CIPHER(LAST)?$"),
1274    VSBOX
1275)>;
1276
1277// Branch Instructions
1278
1279// Two Cycle Branch
1280def : InstRW<[P9_BR_2C, DISP_BR_1C],
1281      (instrs
1282  (instregex "BCCCTR(L)?(8)?$"),
1283  (instregex "BCCL(A|R|RL)?$"),
1284  (instregex "BCCTR(L)?(8)?(n)?$"),
1285  (instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
1286  (instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
1287  (instregex "BL(_TLS|_NOP)?$"),
1288  (instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
1289  (instregex "BLA(8|8_NOP)?$"),
1290  (instregex "BLR(8|L)?$"),
1291  (instregex "TAILB(A)?(8)?$"),
1292  (instregex "TAILBCTR(8)?$"),
1293  (instregex "gBC(A|Aat|CTR|CTRL|L|LA|LAat|LR|LRL|Lat|at)?$"),
1294  (instregex "BCLR(L)?(n)?$"),
1295  (instregex "BCTR(L)?(8)?$"),
1296  B,
1297  BA,
1298  BC,
1299  BCC,
1300  BCCA,
1301  BCL,
1302  BCLalways,
1303  BCLn,
1304  BCTRL8_LDinto_toc,
1305  BCn,
1306  CTRL_DEP
1307)>;
1308
1309// Five Cycle Branch with a 2 Cycle ALU Op
1310// Operations must be done consecutively and not in parallel.
1311def : InstRW<[P9_BROpAndALUOp_7C, IP_EXEC_1C, DISP_BR_1C, DISP_1C],
1312      (instrs
1313    ADDPCIS
1314)>;
1315
1316// Special Extracted Instructions For Atomics
1317
1318// Atomic Load
1319def : InstRW<[P9_LS_1C, P9_LS_1C, P9_LS_4C, P9_LS_4C, P9_LS_4C,
1320              IP_EXEC_1C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C, IP_AGEN_1C,
1321              IP_AGEN_1C, IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C,
1322              DISP_3SLOTS_1C, DISP_1C, DISP_1C, DISP_1C],
1323      (instrs
1324    (instregex "L(D|W)AT$")
1325)>;
1326
1327// Atomic Store
1328def : InstRW<[P9_LS_1C, P9_LS_4C, P9_LS_4C, IP_EXEC_1C, IP_AGEN_1C, IP_AGEN_1C,
1329              IP_AGEN_1C, DISP_1C, DISP_3SLOTS_1C, DISP_1C],
1330      (instrs
1331    (instregex "ST(D|W)AT$")
1332)>;
1333
1334// Signal Processing Engine (SPE) Instructions
1335// These instructions are not supported on Power 9
1336def : InstRW<[],
1337    (instrs
1338  BRINC,
1339  EVABS,
1340  EVEQV,
1341  EVMRA,
1342  EVNAND,
1343  EVNEG,
1344  (instregex "EVADD(I)?W$"),
1345  (instregex "EVADD(SM|SS|UM|US)IAAW$"),
1346  (instregex "EVAND(C)?$"),
1347  (instregex "EVCMP(EQ|GTS|GTU|LTS|LTU)$"),
1348  (instregex "EVCNTL(S|Z)W$"),
1349  (instregex "EVDIVW(S|U)$"),
1350  (instregex "EVEXTS(B|H)$"),
1351  (instregex "EVLD(H|W|D)(X)?$"),
1352  (instregex "EVLHH(E|OS|OU)SPLAT(X)?$"),
1353  (instregex "EVLWHE(X)?$"),
1354  (instregex "EVLWHO(S|U)(X)?$"),
1355  (instregex "EVLW(H|W)SPLAT(X)?$"),
1356  (instregex "EVMERGE(HI|LO|HILO|LOHI)$"),
1357  (instregex "EVMHEG(S|U)M(F|I)A(A|N)$"),
1358  (instregex "EVMHES(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1359  (instregex "EVMHEU(M|S)I(A|AA|AAW|ANW)?$"),
1360  (instregex "EVMHOG(U|S)M(F|I)A(A|N)$"),
1361  (instregex "EVMHOS(M|S)(F|I)(A|AA|AAW|ANW)?$"),
1362  (instregex "EVMHOU(M|S)I(A|AA|ANW|AAW)?$"),
1363  (instregex "EVMWHS(M|S)(F|FA|I|IA)$"),
1364  (instregex "EVMWHUMI(A)?$"),
1365  (instregex "EVMWLS(M|S)IA(A|N)W$"),
1366  (instregex "EVMWLU(M|S)I(A|AA|AAW|ANW)?$"),
1367  (instregex "EVMWSM(F|I)(A|AA|AN)?$"),
1368  (instregex "EVMWSSF(A|AA|AN)?$"),
1369  (instregex "EVMWUMI(A|AA|AN)?$"),
1370  (instregex "EV(N|X)?OR(C)?$"),
1371  (instregex "EVR(LW|LWI|NDW)$"),
1372  (instregex "EVSLW(I)?$"),
1373  (instregex "EVSPLAT(F)?I$"),
1374  (instregex "EVSRW(I)?(S|U)$"),
1375  (instregex "EVST(DD|DH|DW|WHE|WHO|WWE|WWO)(X)?$"),
1376  (instregex "EVSUBF(S|U)(M|S)IAAW$"),
1377  (instregex "EVSUB(I)?FW$")
1378)> { let Unsupported = 1; }
1379
1380// General Instructions without scheduling support.
1381def : InstRW<[],
1382    (instrs
1383  (instregex "(H)?RFI(D)?$"),
1384  (instregex "DSS(ALL)?$"),
1385  (instregex "DST(ST)?(T)?(64)?$"),
1386  (instregex "ICBL(C|Q)$"),
1387  (instregex "L(W|H|B)EPX$"),
1388  (instregex "ST(W|H|B)EPX$"),
1389  (instregex "(L|ST)FDEPX$"),
1390  (instregex "M(T|F)SR(IN)?$"),
1391  (instregex "M(T|F)DCR$"),
1392  (instregex "NOP_GT_PWR(6|7)$"),
1393  (instregex "TLB(IA|IVAX|SX|SX2|SX2D|LD|LI|RE|RE2|WE|WE2)$"),
1394  (instregex "WRTEE(I)?$"),
1395  ATTN,
1396  CLRBHRB,
1397  MFBHRBE,
1398  MBAR,
1399  MSYNC,
1400  SLBSYNC,
1401  SLBFEEo,
1402  NAP,
1403  STOP,
1404  TRAP,
1405  RFCI,
1406  RFDI,
1407  RFMCI,
1408  SC,
1409  DCBA,
1410  DCBI,
1411  DCCCI,
1412  ICCCI
1413)> { let Unsupported = 1; }
1414