xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/README_P9.txt (revision 1719886f6d08408b834d270c59ffcfd821c8f63a)
1//===- README_P9.txt - Notes for improving Power9 code gen ----------------===//
2
3TODO: Instructions Need Implement Instrinstics or Map to LLVM IR
4
5Altivec:
6- Vector Compare Not Equal (Zero):
7  vcmpneb(.) vcmpneh(.) vcmpnew(.)
8  vcmpnezb(.) vcmpnezh(.) vcmpnezw(.)
9  . Same as other VCMP*, use VCMP/VCMPo form (support intrinsic)
10
11- Vector Extract Unsigned: vextractub vextractuh vextractuw vextractd
12  . Don't use llvm extractelement because they have different semantics
13  . Use instrinstics:
14    (set v2i64:$vD, (int_ppc_altivec_vextractub v16i8:$vA, imm:$UIMM))
15    (set v2i64:$vD, (int_ppc_altivec_vextractuh v8i16:$vA, imm:$UIMM))
16    (set v2i64:$vD, (int_ppc_altivec_vextractuw v4i32:$vA, imm:$UIMM))
17    (set v2i64:$vD, (int_ppc_altivec_vextractd  v2i64:$vA, imm:$UIMM))
18
19- Vector Extract Unsigned Byte Left/Right-Indexed:
20  vextublx vextubrx vextuhlx vextuhrx vextuwlx vextuwrx
21  . Use instrinstics:
22    // Left-Indexed
23    (set i64:$rD, (int_ppc_altivec_vextublx i64:$rA, v16i8:$vB))
24    (set i64:$rD, (int_ppc_altivec_vextuhlx i64:$rA, v8i16:$vB))
25    (set i64:$rD, (int_ppc_altivec_vextuwlx i64:$rA, v4i32:$vB))
26
27    // Right-Indexed
28    (set i64:$rD, (int_ppc_altivec_vextubrx i64:$rA, v16i8:$vB))
29    (set i64:$rD, (int_ppc_altivec_vextuhrx i64:$rA, v8i16:$vB))
30    (set i64:$rD, (int_ppc_altivec_vextuwrx i64:$rA, v4i32:$vB))
31
32- Vector Insert Element Instructions: vinsertb vinsertd vinserth vinsertw
33    (set v16i8:$vD, (int_ppc_altivec_vinsertb v16i8:$vA, imm:$UIMM))
34    (set v8i16:$vD, (int_ppc_altivec_vinsertd v8i16:$vA, imm:$UIMM))
35    (set v4i32:$vD, (int_ppc_altivec_vinserth v4i32:$vA, imm:$UIMM))
36    (set v2i64:$vD, (int_ppc_altivec_vinsertw v2i64:$vA, imm:$UIMM))
37
38- Vector Count Leading/Trailing Zero LSB. Result is placed into GPR[rD]:
39  vclzlsbb vctzlsbb
40  . Use intrinsic:
41    (set i64:$rD, (int_ppc_altivec_vclzlsbb v16i8:$vB))
42    (set i64:$rD, (int_ppc_altivec_vctzlsbb v16i8:$vB))
43
44- Vector Count Trailing Zeros: vctzb vctzh vctzw vctzd
45  . Map to llvm cttz
46    (set v16i8:$vD, (cttz v16i8:$vB))     // vctzb
47    (set v8i16:$vD, (cttz v8i16:$vB))     // vctzh
48    (set v4i32:$vD, (cttz v4i32:$vB))     // vctzw
49    (set v2i64:$vD, (cttz v2i64:$vB))     // vctzd
50
51- Vector Extend Sign: vextsb2w vextsh2w vextsb2d vextsh2d vextsw2d
52  . vextsb2w:
53    (set v4i32:$vD, (sext v4i8:$vB))
54
55    // PowerISA_V3.0:
56    do i = 0 to 3
57       VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].byte[3])
58    end
59
60  . vextsh2w:
61    (set v4i32:$vD, (sext v4i16:$vB))
62
63    // PowerISA_V3.0:
64    do i = 0 to 3
65       VR[VRT].word[i] ← EXTS32(VR[VRB].word[i].hword[1])
66    end
67
68  . vextsb2d
69    (set v2i64:$vD, (sext v2i8:$vB))
70
71    // PowerISA_V3.0:
72    do i = 0 to 1
73       VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].byte[7])
74    end
75
76  . vextsh2d
77    (set v2i64:$vD, (sext v2i16:$vB))
78
79    // PowerISA_V3.0:
80    do i = 0 to 1
81       VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].hword[3])
82    end
83
84  . vextsw2d
85    (set v2i64:$vD, (sext v2i32:$vB))
86
87    // PowerISA_V3.0:
88    do i = 0 to 1
89       VR[VRT].dword[i] ← EXTS64(VR[VRB].dword[i].word[1])
90    end
91
92- Vector Integer Negate: vnegw vnegd
93  . Map to llvm ineg
94    (set v4i32:$rT, (ineg v4i32:$rA))       // vnegw
95    (set v2i64:$rT, (ineg v2i64:$rA))       // vnegd
96
97- Vector Parity Byte: vprtybw vprtybd vprtybq
98  . Use intrinsic:
99    (set v4i32:$rD, (int_ppc_altivec_vprtybw v4i32:$vB))
100    (set v2i64:$rD, (int_ppc_altivec_vprtybd v2i64:$vB))
101    (set v1i128:$rD, (int_ppc_altivec_vprtybq v1i128:$vB))
102
103- Vector (Bit) Permute (Right-indexed):
104  . vbpermd: Same as "vbpermq", use VX1_Int_Ty2:
105    VX1_Int_Ty2<1484, "vbpermd", int_ppc_altivec_vbpermd, v2i64, v2i64>;
106
107  . vpermr: use VA1a_Int_Ty3
108    VA1a_Int_Ty3<59, "vpermr", int_ppc_altivec_vpermr, v16i8, v16i8, v16i8>;
109
110- Vector Rotate Left Mask/Mask-Insert: vrlwnm vrlwmi vrldnm vrldmi
111  . Use intrinsic:
112    VX1_Int_Ty<389, "vrlwnm", int_ppc_altivec_vrlwnm, v4i32>;
113    VX1_Int_Ty<133, "vrlwmi", int_ppc_altivec_vrlwmi, v4i32>;
114    VX1_Int_Ty<453, "vrldnm", int_ppc_altivec_vrldnm, v2i64>;
115    VX1_Int_Ty<197, "vrldmi", int_ppc_altivec_vrldmi, v2i64>;
116
117- Vector Shift Left/Right: vslv vsrv
118  . Use intrinsic, don't map to llvm shl and lshr, because they have different
119    semantics, e.g. vslv:
120
121      do i = 0 to 15
122         sh ← VR[VRB].byte[i].bit[5:7]
123         VR[VRT].byte[i] ← src.byte[i:i+1].bit[sh:sh+7]
124      end
125
126    VR[VRT].byte[i] is composed of 2 bytes from src.byte[i:i+1]
127
128  . VX1_Int_Ty<1860, "vslv", int_ppc_altivec_vslv, v16i8>;
129    VX1_Int_Ty<1796, "vsrv", int_ppc_altivec_vsrv, v16i8>;
130
131- Vector Multiply-by-10 (& Write Carry) Unsigned Quadword:
132  vmul10uq vmul10cuq
133  . Use intrinsic:
134    VX1_Int_Ty<513, "vmul10uq",   int_ppc_altivec_vmul10uq,  v1i128>;
135    VX1_Int_Ty<  1, "vmul10cuq",  int_ppc_altivec_vmul10cuq, v1i128>;
136
137- Vector Multiply-by-10 Extended (& Write Carry) Unsigned Quadword:
138  vmul10euq vmul10ecuq
139  . Use intrinsic:
140    VX1_Int_Ty<577, "vmul10euq",  int_ppc_altivec_vmul10euq, v1i128>;
141    VX1_Int_Ty< 65, "vmul10ecuq", int_ppc_altivec_vmul10ecuq, v1i128>;
142
143- Decimal Convert From/to National/Zoned/Signed-QWord:
144  bcdcfn. bcdcfz. bcdctn. bcdctz. bcdcfsq. bcdctsq.
145  . Use instrinstics:
146    (set v1i128:$vD, (int_ppc_altivec_bcdcfno  v1i128:$vB, i1:$PS))
147    (set v1i128:$vD, (int_ppc_altivec_bcdcfzo  v1i128:$vB, i1:$PS))
148    (set v1i128:$vD, (int_ppc_altivec_bcdctno  v1i128:$vB))
149    (set v1i128:$vD, (int_ppc_altivec_bcdctzo  v1i128:$vB, i1:$PS))
150    (set v1i128:$vD, (int_ppc_altivec_bcdcfsqo v1i128:$vB, i1:$PS))
151    (set v1i128:$vD, (int_ppc_altivec_bcdctsqo v1i128:$vB))
152
153- Decimal Copy-Sign/Set-Sign: bcdcpsgn. bcdsetsgn.
154  . Use instrinstics:
155    (set v1i128:$vD, (int_ppc_altivec_bcdcpsgno v1i128:$vA, v1i128:$vB))
156    (set v1i128:$vD, (int_ppc_altivec_bcdsetsgno v1i128:$vB, i1:$PS))
157
158- Decimal Shift/Unsigned-Shift/Shift-and-Round: bcds. bcdus. bcdsr.
159  . Use instrinstics:
160    (set v1i128:$vD, (int_ppc_altivec_bcdso  v1i128:$vA, v1i128:$vB, i1:$PS))
161    (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
162    (set v1i128:$vD, (int_ppc_altivec_bcdsro v1i128:$vA, v1i128:$vB, i1:$PS))
163
164  . Note! Their VA is accessed only 1 byte, i.e. VA.byte[7]
165
166- Decimal (Unsigned) Truncate: bcdtrunc. bcdutrunc.
167  . Use instrinstics:
168    (set v1i128:$vD, (int_ppc_altivec_bcdso  v1i128:$vA, v1i128:$vB, i1:$PS))
169    (set v1i128:$vD, (int_ppc_altivec_bcduso v1i128:$vA, v1i128:$vB))
170
171  . Note! Their VA is accessed only 2 byte, i.e. VA.hword[3] (VA.bit[48:63])
172
173VSX:
174- QP Copy Sign: xscpsgnqp
175  . Similar to xscpsgndp
176  . (set f128:$vT, (fcopysign f128:$vB, f128:$vA)
177
178- QP Absolute/Negative-Absolute/Negate: xsabsqp xsnabsqp xsnegqp
179  . Similar to xsabsdp/xsnabsdp/xsnegdp
180  . (set f128:$vT, (fabs f128:$vB))             // xsabsqp
181    (set f128:$vT, (fneg (fabs f128:$vB)))      // xsnabsqp
182    (set f128:$vT, (fneg f128:$vB))             // xsnegqp
183
184- QP Add/Divide/Multiply/Subtract/Square-Root:
185  xsaddqp xsdivqp xsmulqp xssubqp xssqrtqp
186  . Similar to xsadddp
187  . isCommutable = 1
188    (set f128:$vT, (fadd f128:$vA, f128:$vB))   // xsaddqp
189    (set f128:$vT, (fmul f128:$vA, f128:$vB))   // xsmulqp
190
191  . isCommutable = 0
192    (set f128:$vT, (fdiv f128:$vA, f128:$vB))   // xsdivqp
193    (set f128:$vT, (fsub f128:$vA, f128:$vB))   // xssubqp
194    (set f128:$vT, (fsqrt f128:$vB)))           // xssqrtqp
195
196- Round to Odd of QP Add/Divide/Multiply/Subtract/Square-Root:
197  xsaddqpo xsdivqpo xsmulqpo xssubqpo xssqrtqpo
198  . Similar to xsrsqrtedp??
199      def XSRSQRTEDP : XX2Form<60, 74,
200                               (outs vsfrc:$XT), (ins vsfrc:$XB),
201                               "xsrsqrtedp $XT, $XB", IIC_VecFP,
202                               [(set f64:$XT, (PPCfrsqrte f64:$XB))]>;
203
204  . Define DAG Node in PPCInstrInfo.td:
205    def PPCfaddrto: SDNode<"PPCISD::FADDRTO", SDTFPBinOp, []>;
206    def PPCfdivrto: SDNode<"PPCISD::FDIVRTO", SDTFPBinOp, []>;
207    def PPCfmulrto: SDNode<"PPCISD::FMULRTO", SDTFPBinOp, []>;
208    def PPCfsubrto: SDNode<"PPCISD::FSUBRTO", SDTFPBinOp, []>;
209    def PPCfsqrtrto: SDNode<"PPCISD::FSQRTRTO", SDTFPUnaryOp, []>;
210
211    DAG patterns of each instruction (PPCInstrVSX.td):
212    . isCommutable = 1
213      (set f128:$vT, (PPCfaddrto f128:$vA, f128:$vB))   // xsaddqpo
214      (set f128:$vT, (PPCfmulrto f128:$vA, f128:$vB))   // xsmulqpo
215
216    . isCommutable = 0
217      (set f128:$vT, (PPCfdivrto f128:$vA, f128:$vB))   // xsdivqpo
218      (set f128:$vT, (PPCfsubrto f128:$vA, f128:$vB))   // xssubqpo
219      (set f128:$vT, (PPCfsqrtrto f128:$vB))            // xssqrtqpo
220
221- QP (Negative) Multiply-{Add/Subtract}: xsmaddqp xsmsubqp xsnmaddqp xsnmsubqp
222  . Ref: xsmaddadp/xsmsubadp/xsnmaddadp/xsnmsubadp
223
224  . isCommutable = 1
225    // xsmaddqp
226    [(set f128:$vT, (fma f128:$vA, f128:$vB, f128:$vTi))]>,
227    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
228    AltVSXFMARel;
229
230    // xsmsubqp
231    [(set f128:$vT, (fma f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
232    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
233    AltVSXFMARel;
234
235    // xsnmaddqp
236    [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, f128:$vTi)))]>,
237    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
238    AltVSXFMARel;
239
240    // xsnmsubqp
241    [(set f128:$vT, (fneg (fma f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
242    RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
243    AltVSXFMARel;
244
245- Round to Odd of QP (Negative) Multiply-{Add/Subtract}:
246  xsmaddqpo xsmsubqpo xsnmaddqpo xsnmsubqpo
247  . Similar to xsrsqrtedp??
248
249  . Define DAG Node in PPCInstrInfo.td:
250    def PPCfmarto: SDNode<"PPCISD::FMARTO", SDTFPTernaryOp, []>;
251
252    It looks like we only need to define "PPCfmarto" for these instructions,
253    because according to PowerISA_V3.0, these instructions perform RTO on
254    fma's result:
255        xsmaddqp(o)
256        v      ← bfp_MULTIPLY_ADD(src1, src3, src2)
257        rnd    ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
258        result ← bfp_CONVERT_TO_BFP128(rnd)
259
260        xsmsubqp(o)
261        v      ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
262        rnd    ← bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v)
263        result ← bfp_CONVERT_TO_BFP128(rnd)
264
265        xsnmaddqp(o)
266        v      ← bfp_MULTIPLY_ADD(src1,src3,src2)
267        rnd    ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
268        result ← bfp_CONVERT_TO_BFP128(rnd)
269
270        xsnmsubqp(o)
271        v      ← bfp_MULTIPLY_ADD(src1, src3, bfp_NEGATE(src2))
272        rnd    ← bfp_NEGATE(bfp_ROUND_TO_BFP128(RO, FPSCR.RN, v))
273        result ← bfp_CONVERT_TO_BFP128(rnd)
274
275    DAG patterns of each instruction (PPCInstrVSX.td):
276    . isCommutable = 1
277      // xsmaddqpo
278      [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, f128:$vTi))]>,
279      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
280      AltVSXFMARel;
281
282      // xsmsubqpo
283      [(set f128:$vT, (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi)))]>,
284      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
285      AltVSXFMARel;
286
287      // xsnmaddqpo
288      [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, f128:$vTi)))]>,
289      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
290      AltVSXFMARel;
291
292      // xsnmsubqpo
293      [(set f128:$vT, (fneg (PPCfmarto f128:$vA, f128:$vB, (fneg f128:$vTi))))]>,
294      RegConstraint<"$vTi = $vT">, NoEncode<"$vTi">,
295      AltVSXFMARel;
296
297- QP Compare Ordered/Unordered: xscmpoqp xscmpuqp
298  . ref: XSCMPUDP
299      def XSCMPUDP : XX3Form_1<60, 35,
300                               (outs crrc:$crD), (ins vsfrc:$XA, vsfrc:$XB),
301                               "xscmpudp $crD, $XA, $XB", IIC_FPCompare, []>;
302
303  . No SDAG, intrinsic, builtin are required??
304    Or llvm fcmp order/unorder compare??
305
306- DP/QP Compare Exponents: xscmpexpdp xscmpexpqp
307  . No SDAG, intrinsic, builtin are required?
308
309- DP Compare ==, >=, >, !=: xscmpeqdp xscmpgedp xscmpgtdp xscmpnedp
310  . I checked existing instruction "XSCMPUDP". They are different in target
311    register. "XSCMPUDP" write to CR field, xscmp*dp write to VSX register
312
313  . Use intrinsic:
314    (set i128:$XT, (int_ppc_vsx_xscmpeqdp f64:$XA, f64:$XB))
315    (set i128:$XT, (int_ppc_vsx_xscmpgedp f64:$XA, f64:$XB))
316    (set i128:$XT, (int_ppc_vsx_xscmpgtdp f64:$XA, f64:$XB))
317    (set i128:$XT, (int_ppc_vsx_xscmpnedp f64:$XA, f64:$XB))
318
319- Vector Compare Not Equal: xvcmpnedp xvcmpnedp. xvcmpnesp xvcmpnesp.
320  . Similar to xvcmpeqdp:
321      defm XVCMPEQDP : XX3Form_Rcr<60, 99,
322                                 "xvcmpeqdp", "$XT, $XA, $XB", IIC_VecFPCompare,
323                                 int_ppc_vsx_xvcmpeqdp, v2i64, v2f64>;
324
325  . So we should use "XX3Form_Rcr" to implement intrinsic
326
327- Convert DP -> QP: xscvdpqp
328  . Similar to XSCVDPSP:
329      def XSCVDPSP : XX2Form<60, 265,
330                          (outs vsfrc:$XT), (ins vsfrc:$XB),
331                          "xscvdpsp $XT, $XB", IIC_VecFP, []>;
332  . So, No SDAG, intrinsic, builtin are required??
333
334- Round & Convert QP -> DP (dword[1] is set to zero): xscvqpdp xscvqpdpo
335  . Similar to XSCVDPSP
336  . No SDAG, intrinsic, builtin are required??
337
338- Truncate & Convert QP -> (Un)Signed (D)Word (dword[1] is set to zero):
339  xscvqpsdz xscvqpswz xscvqpudz xscvqpuwz
340  . According to PowerISA_V3.0, these are similar to "XSCVDPSXDS", "XSCVDPSXWS",
341    "XSCVDPUXDS", "XSCVDPUXWS"
342
343  . DAG patterns:
344    (set f128:$XT, (PPCfctidz f128:$XB))    // xscvqpsdz
345    (set f128:$XT, (PPCfctiwz f128:$XB))    // xscvqpswz
346    (set f128:$XT, (PPCfctiduz f128:$XB))   // xscvqpudz
347    (set f128:$XT, (PPCfctiwuz f128:$XB))   // xscvqpuwz
348
349- Convert (Un)Signed DWord -> QP: xscvsdqp xscvudqp
350  . Similar to XSCVSXDSP
351  . (set f128:$XT, (PPCfcfids f64:$XB))     // xscvsdqp
352    (set f128:$XT, (PPCfcfidus f64:$XB))    // xscvudqp
353
354- (Round &) Convert DP <-> HP: xscvdphp xscvhpdp
355  . Similar to XSCVDPSP
356  . No SDAG, intrinsic, builtin are required??
357
358- Vector HP -> SP: xvcvhpsp xvcvsphp
359  . Similar to XVCVDPSP:
360      def XVCVDPSP : XX2Form<60, 393,
361                          (outs vsrc:$XT), (ins vsrc:$XB),
362                          "xvcvdpsp $XT, $XB", IIC_VecFP, []>;
363  . No SDAG, intrinsic, builtin are required??
364
365- Round to Quad-Precision Integer: xsrqpi xsrqpix
366  . These are combination of "XSRDPI", "XSRDPIC", "XSRDPIM", .., because you
367    need to assign rounding mode in instruction
368  . Provide builtin?
369    (set f128:$vT, (int_ppc_vsx_xsrqpi f128:$vB))
370    (set f128:$vT, (int_ppc_vsx_xsrqpix f128:$vB))
371
372- Round Quad-Precision to Double-Extended Precision (fp80): xsrqpxp
373  . Provide builtin?
374    (set f128:$vT, (int_ppc_vsx_xsrqpxp f128:$vB))
375
376Fixed Point Facility:
377
378- Exploit cmprb and cmpeqb (perhaps for something like
379  isalpha/isdigit/isupper/islower and isspace respectivelly). This can
380  perhaps be done through a builtin.
381
382- Provide testing for cnttz[dw]
383- Insert Exponent DP/QP: xsiexpdp xsiexpqp
384  . Use intrinsic?
385  . xsiexpdp:
386    // Note: rA and rB are the unsigned integer value.
387    (set f128:$XT, (int_ppc_vsx_xsiexpdp i64:$rA, i64:$rB))
388
389  . xsiexpqp:
390    (set f128:$vT, (int_ppc_vsx_xsiexpqp f128:$vA, f64:$vB))
391
392- Extract Exponent/Significand DP/QP: xsxexpdp xsxsigdp xsxexpqp xsxsigqp
393  . Use intrinsic?
394  . (set i64:$rT, (int_ppc_vsx_xsxexpdp f64$XB))    // xsxexpdp
395    (set i64:$rT, (int_ppc_vsx_xsxsigdp f64$XB))    // xsxsigdp
396    (set f128:$vT, (int_ppc_vsx_xsxexpqp f128$vB))  // xsxexpqp
397    (set f128:$vT, (int_ppc_vsx_xsxsigqp f128$vB))  // xsxsigqp
398
399- Vector Insert Word: xxinsertw
400  - Useful for inserting f32/i32 elements into vectors (the element to be
401    inserted needs to be prepared)
402  . Note: llvm has insertelem in "Vector Operations"
403    ; yields <n x <ty>>
404    <result> = insertelement <n x <ty>> <val>, <ty> <elt>, <ty2> <idx>
405
406    But how to map to it??
407    [(set v1f128:$XT, (insertelement v1f128:$XTi, f128:$XB, i4:$UIMM))]>,
408    RegConstraint<"$XTi = $XT">, NoEncode<"$XTi">,
409
410  . Or use intrinsic?
411    (set v1f128:$XT, (int_ppc_vsx_xxinsertw v1f128:$XTi, f128:$XB, i4:$UIMM))
412
413- Vector Extract Unsigned Word: xxextractuw
414  - Not useful for extraction of f32 from v4f32 (the current pattern is better -
415    shift->convert)
416  - It is useful for (uint_to_fp (vector_extract v4i32, N))
417  - Unfortunately, it can't be used for (sint_to_fp (vector_extract v4i32, N))
418  . Note: llvm has extractelement in "Vector Operations"
419    ; yields <ty>
420    <result> = extractelement <n x <ty>> <val>, <ty2> <idx>
421
422    How to map to it??
423    [(set f128:$XT, (extractelement v1f128:$XB, i4:$UIMM))]
424
425  . Or use intrinsic?
426    (set f128:$XT, (int_ppc_vsx_xxextractuw v1f128:$XB, i4:$UIMM))
427
428- Vector Insert Exponent DP/SP: xviexpdp xviexpsp
429  . Use intrinsic
430    (set v2f64:$XT, (int_ppc_vsx_xviexpdp v2f64:$XA, v2f64:$XB))
431    (set v4f32:$XT, (int_ppc_vsx_xviexpsp v4f32:$XA, v4f32:$XB))
432
433- Vector Extract Exponent/Significand DP/SP: xvxexpdp xvxexpsp xvxsigdp xvxsigsp
434  . Use intrinsic
435    (set v2f64:$XT, (int_ppc_vsx_xvxexpdp v2f64:$XB))
436    (set v4f32:$XT, (int_ppc_vsx_xvxexpsp v4f32:$XB))
437    (set v2f64:$XT, (int_ppc_vsx_xvxsigdp v2f64:$XB))
438    (set v4f32:$XT, (int_ppc_vsx_xvxsigsp v4f32:$XB))
439
440- Test Data Class SP/DP/QP: xststdcsp xststdcdp xststdcqp
441  . No SDAG, intrinsic, builtin are required?
442    Because it seems that we have no way to map BF field?
443
444    Instruction Form: [PO T XO B XO BX TX]
445    Asm: xststd* BF,XB,DCMX
446
447    BF is an index to CR register field.
448
449- Vector Test Data Class SP/DP: xvtstdcsp xvtstdcdp
450  . Use intrinsic
451    (set v4f32:$XT, (int_ppc_vsx_xvtstdcsp v4f32:$XB, i7:$DCMX))
452    (set v2f64:$XT, (int_ppc_vsx_xvtstdcdp v2f64:$XB, i7:$DCMX))
453
454- Maximum/Minimum Type-C/Type-J DP: xsmaxcdp xsmaxjdp xsmincdp xsminjdp
455  . PowerISA_V3.0:
456    "xsmaxcdp can be used to implement the C/C++/Java conditional operation
457     (x>y)?x:y for single-precision and double-precision arguments."
458
459    Note! c type and j type have different behavior when:
460    1. Either input is NaN
461    2. Both input are +-Infinity, +-Zero
462
463  . dtype map to llvm fmaxnum/fminnum
464    jtype use intrinsic
465
466  . xsmaxcdp xsmincdp
467    (set f64:$XT, (fmaxnum f64:$XA, f64:$XB))
468    (set f64:$XT, (fminnum f64:$XA, f64:$XB))
469
470  . xsmaxjdp xsminjdp
471    (set f64:$XT, (int_ppc_vsx_xsmaxjdp f64:$XA, f64:$XB))
472    (set f64:$XT, (int_ppc_vsx_xsminjdp f64:$XA, f64:$XB))
473
474- Vector Byte-Reverse H/W/D/Q Word: xxbrh xxbrw xxbrd xxbrq
475  . Use intrinsic
476    (set v8i16:$XT, (int_ppc_vsx_xxbrh v8i16:$XB))
477    (set v4i32:$XT, (int_ppc_vsx_xxbrw v4i32:$XB))
478    (set v2i64:$XT, (int_ppc_vsx_xxbrd v2i64:$XB))
479    (set v1i128:$XT, (int_ppc_vsx_xxbrq v1i128:$XB))
480
481- Vector Permute: xxperm xxpermr
482  . I have checked "PPCxxswapd" in PPCInstrVSX.td, but they are different
483  . Use intrinsic
484    (set v16i8:$XT, (int_ppc_vsx_xxperm v16i8:$XA, v16i8:$XB))
485    (set v16i8:$XT, (int_ppc_vsx_xxpermr v16i8:$XA, v16i8:$XB))
486
487- Vector Splat Immediate Byte: xxspltib
488  . Similar to XXSPLTW:
489      def XXSPLTW : XX2Form_2<60, 164,
490                           (outs vsrc:$XT), (ins vsrc:$XB, u2imm:$UIM),
491                           "xxspltw $XT, $XB, $UIM", IIC_VecPerm, []>;
492
493  . No SDAG, intrinsic, builtin are required?
494
495- Load/Store Vector: lxv stxv
496  . Has likely SDAG match:
497    (set v?:$XT, (load ix16addr:$src))
498    (set v?:$XT, (store ix16addr:$dst))
499
500  . Need define ix16addr in PPCInstrInfo.td
501    ix16addr: 16-byte aligned, see "def memrix16" in PPCInstrInfo.td
502
503- Load/Store Vector Indexed: lxvx stxvx
504  . Has likely SDAG match:
505    (set v?:$XT, (load xoaddr:$src))
506    (set v?:$XT, (store xoaddr:$dst))
507
508- Load/Store DWord: lxsd stxsd
509  . Similar to lxsdx/stxsdx:
510    def LXSDX : XX1Form<31, 588,
511                        (outs vsfrc:$XT), (ins memrr:$src),
512                        "lxsdx $XT, $src", IIC_LdStLFD,
513                        [(set f64:$XT, (load xoaddr:$src))]>;
514
515  . (set f64:$XT, (load iaddrX4:$src))
516    (set f64:$XT, (store iaddrX4:$dst))
517
518- Load/Store SP, with conversion from/to DP: lxssp stxssp
519  . Similar to lxsspx/stxsspx:
520    def LXSSPX : XX1Form<31, 524, (outs vssrc:$XT), (ins memrr:$src),
521                         "lxsspx $XT, $src", IIC_LdStLFD,
522                         [(set f32:$XT, (load xoaddr:$src))]>;
523
524  . (set f32:$XT, (load iaddrX4:$src))
525    (set f32:$XT, (store iaddrX4:$dst))
526
527- Load as Integer Byte/Halfword & Zero Indexed: lxsibzx lxsihzx
528  . Similar to lxsiwzx:
529    def LXSIWZX : XX1Form<31, 12, (outs vsfrc:$XT), (ins memrr:$src),
530                          "lxsiwzx $XT, $src", IIC_LdStLFD,
531                          [(set f64:$XT, (PPClfiwzx xoaddr:$src))]>;
532
533  . (set f64:$XT, (PPClfiwzx xoaddr:$src))
534
535- Store as Integer Byte/Halfword Indexed: stxsibx stxsihx
536  . Similar to stxsiwx:
537    def STXSIWX : XX1Form<31, 140, (outs), (ins vsfrc:$XT, memrr:$dst),
538                          "stxsiwx $XT, $dst", IIC_LdStSTFD,
539                          [(PPCstfiwx f64:$XT, xoaddr:$dst)]>;
540
541  . (PPCstfiwx f64:$XT, xoaddr:$dst)
542
543- Load Vector Halfword*8/Byte*16 Indexed: lxvh8x lxvb16x
544  . Similar to lxvd2x/lxvw4x:
545    def LXVD2X : XX1Form<31, 844,
546                         (outs vsrc:$XT), (ins memrr:$src),
547                         "lxvd2x $XT, $src", IIC_LdStLFD,
548                         [(set v2f64:$XT, (int_ppc_vsx_lxvd2x xoaddr:$src))]>;
549
550  . (set v8i16:$XT, (int_ppc_vsx_lxvh8x xoaddr:$src))
551    (set v16i8:$XT, (int_ppc_vsx_lxvb16x xoaddr:$src))
552
553- Store Vector Halfword*8/Byte*16 Indexed: stxvh8x stxvb16x
554  . Similar to stxvd2x/stxvw4x:
555    def STXVD2X : XX1Form<31, 972,
556                         (outs), (ins vsrc:$XT, memrr:$dst),
557                         "stxvd2x $XT, $dst", IIC_LdStSTFD,
558                         [(store v2f64:$XT, xoaddr:$dst)]>;
559
560  . (store v8i16:$XT, xoaddr:$dst)
561    (store v16i8:$XT, xoaddr:$dst)
562
563- Load/Store Vector (Left-justified) with Length: lxvl lxvll stxvl stxvll
564  . Likely needs an intrinsic
565  . (set v?:$XT, (int_ppc_vsx_lxvl xoaddr:$src))
566    (set v?:$XT, (int_ppc_vsx_lxvll xoaddr:$src))
567
568  . (int_ppc_vsx_stxvl xoaddr:$dst))
569    (int_ppc_vsx_stxvll xoaddr:$dst))
570
571- Load Vector Word & Splat Indexed: lxvwsx
572  . Likely needs an intrinsic
573  . (set v?:$XT, (int_ppc_vsx_lxvwsx xoaddr:$src))
574
575Atomic operations (l[dw]at, st[dw]at):
576- Provide custom lowering for common atomic operations to use these
577  instructions with the correct Function Code
578- Ensure the operands are in the correct register (i.e. RT+1, RT+2)
579- Provide builtins since not all FC's necessarily have an existing LLVM
580  atomic operation
581
582Move to CR from XER Extended (mcrxrx):
583- Is there a use for this in LLVM?
584
585Fixed Point Facility:
586
587- Copy-Paste Facility: copy copy_first cp_abort paste paste. paste_last
588  . Use instrinstics:
589    (int_ppc_copy_first i32:$rA, i32:$rB)
590    (int_ppc_copy i32:$rA, i32:$rB)
591
592    (int_ppc_paste i32:$rA, i32:$rB)
593    (int_ppc_paste_last i32:$rA, i32:$rB)
594
595    (int_cp_abort)
596
597- Message Synchronize: msgsync
598- SLB*: slbieg slbsync
599- stop
600  . No instrinstics
601