xref: /freebsd/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCISelLowering.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- PPCISelLowering.h - PPC32 DAG Lowering Interface --------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that PPC uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
15 #define LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
16 
17 #include "PPCInstrInfo.h"
18 #include "llvm/CodeGen/CallingConvLower.h"
19 #include "llvm/CodeGen/MachineFunction.h"
20 #include "llvm/CodeGen/MachineMemOperand.h"
21 #include "llvm/CodeGen/SelectionDAG.h"
22 #include "llvm/CodeGen/SelectionDAGNodes.h"
23 #include "llvm/CodeGen/TargetLowering.h"
24 #include "llvm/CodeGen/ValueTypes.h"
25 #include "llvm/CodeGenTypes/MachineValueType.h"
26 #include "llvm/IR/Attributes.h"
27 #include "llvm/IR/CallingConv.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Metadata.h"
31 #include "llvm/IR/Type.h"
32 #include <optional>
33 #include <utility>
34 
35 namespace llvm {
36 
37   namespace PPCISD {
38 
39   // When adding a NEW PPCISD node please add it to the correct position in
40   // the enum. The order of elements in this enum matters!
41   // Values that are added between FIRST_MEMORY_OPCODE and LAST_MEMORY_OPCODE
42   // are considered memory opcodes and are treated differently than other
43   // entries.
44   enum NodeType : unsigned {
45     // Start the numbering where the builtin ops and target ops leave off.
46     FIRST_NUMBER = ISD::BUILTIN_OP_END,
47 
48     /// FSEL - Traditional three-operand fsel node.
49     ///
50     FSEL,
51 
52     /// XSMAXC[DQ]P, XSMINC[DQ]P - C-type min/max instructions.
53     XSMAXC,
54     XSMINC,
55 
56     /// FCFID - The FCFID instruction, taking an f64 operand and producing
57     /// and f64 value containing the FP representation of the integer that
58     /// was temporarily in the f64 operand.
59     FCFID,
60 
61     /// Newer FCFID[US] integer-to-floating-point conversion instructions for
62     /// unsigned integers and single-precision outputs.
63     FCFIDU,
64     FCFIDS,
65     FCFIDUS,
66 
67     /// FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64
68     /// operand, producing an f64 value containing the integer representation
69     /// of that FP value.
70     FCTIDZ,
71     FCTIWZ,
72 
73     /// Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for
74     /// unsigned integers with round toward zero.
75     FCTIDUZ,
76     FCTIWUZ,
77 
78     /// VEXTS, ByteWidth - takes an input in VSFRC and produces an output in
79     /// VSFRC that is sign-extended from ByteWidth to a 64-byte integer.
80     VEXTS,
81 
82     /// Reciprocal estimate instructions (unary FP ops).
83     FRE,
84     FRSQRTE,
85 
86     /// Test instruction for software square root.
87     FTSQRT,
88 
89     /// Square root instruction.
90     FSQRT,
91 
92     /// VPERM - The PPC VPERM Instruction.
93     ///
94     VPERM,
95 
96     /// XXSPLT - The PPC VSX splat instructions
97     ///
98     XXSPLT,
99 
100     /// XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for
101     /// converting immediate single precision numbers to double precision
102     /// vector or scalar.
103     XXSPLTI_SP_TO_DP,
104 
105     /// XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
106     ///
107     XXSPLTI32DX,
108 
109     /// VECINSERT - The PPC vector insert instruction
110     ///
111     VECINSERT,
112 
113     /// VECSHL - The PPC vector shift left instruction
114     ///
115     VECSHL,
116 
117     /// XXPERMDI - The PPC XXPERMDI instruction
118     ///
119     XXPERMDI,
120     XXPERM,
121 
122     /// The CMPB instruction (takes two operands of i32 or i64).
123     CMPB,
124 
125     /// Hi/Lo - These represent the high and low 16-bit parts of a global
126     /// address respectively.  These nodes have two operands, the first of
127     /// which must be a TargetGlobalAddress, and the second of which must be a
128     /// Constant.  Selected naively, these turn into 'lis G+C' and 'li G+C',
129     /// though these are usually folded into other nodes.
130     Hi,
131     Lo,
132 
133     /// The following two target-specific nodes are used for calls through
134     /// function pointers in the 64-bit SVR4 ABI.
135 
136     /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
137     /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
138     /// compute an allocation on the stack.
139     DYNALLOC,
140 
141     /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
142     /// compute an offset from native SP to the address  of the most recent
143     /// dynamic alloca.
144     DYNAREAOFFSET,
145 
146     /// To avoid stack clash, allocation is performed by block and each block is
147     /// probed.
148     PROBED_ALLOCA,
149 
150     /// The result of the mflr at function entry, used for PIC code.
151     GlobalBaseReg,
152 
153     /// These nodes represent PPC shifts.
154     ///
155     /// For scalar types, only the last `n + 1` bits of the shift amounts
156     /// are used, where n is log2(sizeof(element) * 8). See sld/slw, etc.
157     /// for exact behaviors.
158     ///
159     /// For vector types, only the last n bits are used. See vsld.
160     SRL,
161     SRA,
162     SHL,
163 
164     /// These nodes represent PPC arithmetic operations with carry.
165     ADDC,
166     ADDE,
167     SUBC,
168     SUBE,
169 
170     /// FNMSUB - Negated multiply-subtract instruction.
171     FNMSUB,
172 
173     /// EXTSWSLI = The PPC extswsli instruction, which does an extend-sign
174     /// word and shift left immediate.
175     EXTSWSLI,
176 
177     /// The combination of sra[wd]i and addze used to implemented signed
178     /// integer division by a power of 2. The first operand is the dividend,
179     /// and the second is the constant shift amount (representing the
180     /// divisor).
181     SRA_ADDZE,
182 
183     /// CALL - A direct function call.
184     /// CALL_NOP is a call with the special NOP which follows 64-bit
185     /// CALL_NOTOC the caller does not use the TOC.
186     /// SVR4 calls and 32-bit/64-bit AIX calls.
187     CALL,
188     CALL_NOP,
189     CALL_NOTOC,
190 
191     /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
192     /// MTCTR instruction.
193     MTCTR,
194 
195     /// CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a
196     /// BCTRL instruction.
197     BCTRL,
198 
199     /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
200     /// instruction and the TOC reload required on 64-bit ELF, 32-bit AIX
201     /// and 64-bit AIX.
202     BCTRL_LOAD_TOC,
203 
204     /// The variants that implicitly define rounding mode for calls with
205     /// strictfp semantics.
206     CALL_RM,
207     CALL_NOP_RM,
208     CALL_NOTOC_RM,
209     BCTRL_RM,
210     BCTRL_LOAD_TOC_RM,
211 
212     /// Return with a glue operand, matched by 'blr'
213     RET_GLUE,
214 
215     /// R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
216     /// This copies the bits corresponding to the specified CRREG into the
217     /// resultant GPR.  Bits corresponding to other CR regs are undefined.
218     MFOCRF,
219 
220     /// Direct move from a VSX register to a GPR
221     MFVSR,
222 
223     /// Direct move from a GPR to a VSX register (algebraic)
224     MTVSRA,
225 
226     /// Direct move from a GPR to a VSX register (zero)
227     MTVSRZ,
228 
229     /// Direct move of 2 consecutive GPR to a VSX register.
230     BUILD_FP128,
231 
232     /// BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and
233     /// EXTRACT_ELEMENT but take f64 arguments instead of i64, as i64 is
234     /// unsupported for this target.
235     /// Merge 2 GPRs to a single SPE register.
236     BUILD_SPE64,
237 
238     /// Extract SPE register component, second argument is high or low.
239     EXTRACT_SPE,
240 
241     /// Extract a subvector from signed integer vector and convert to FP.
242     /// It is primarily used to convert a (widened) illegal integer vector
243     /// type to a legal floating point vector type.
244     /// For example v2i32 -> widened to v4i32 -> v2f64
245     SINT_VEC_TO_FP,
246 
247     /// Extract a subvector from unsigned integer vector and convert to FP.
248     /// As with SINT_VEC_TO_FP, used for converting illegal types.
249     UINT_VEC_TO_FP,
250 
251     /// PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to
252     /// place the value into the least significant element of the most
253     /// significant doubleword in the vector. This is not element zero for
254     /// anything smaller than a doubleword on either endianness. This node has
255     /// the same semantics as SCALAR_TO_VECTOR except that the value remains in
256     /// the aforementioned location in the vector register.
257     SCALAR_TO_VECTOR_PERMUTED,
258 
259     // FIXME: Remove these once the ANDI glue bug is fixed:
260     /// i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the
261     /// eq or gt bit of CR0 after executing andi. x, 1. This is used to
262     /// implement truncation of i32 or i64 to i1.
263     ANDI_rec_1_EQ_BIT,
264     ANDI_rec_1_GT_BIT,
265 
266     // READ_TIME_BASE - A read of the 64-bit time-base register on a 32-bit
267     // target (returns (Lo, Hi)). It takes a chain operand.
268     READ_TIME_BASE,
269 
270     // EH_SJLJ_SETJMP - SjLj exception handling setjmp.
271     EH_SJLJ_SETJMP,
272 
273     // EH_SJLJ_LONGJMP - SjLj exception handling longjmp.
274     EH_SJLJ_LONGJMP,
275 
276     /// RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP*
277     /// instructions.  For lack of better number, we use the opcode number
278     /// encoding for the OPC field to identify the compare.  For example, 838
279     /// is VCMPGTSH.
280     VCMP,
281 
282     /// RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the
283     /// altivec VCMP*_rec instructions.  For lack of better number, we use the
284     /// opcode number encoding for the OPC field to identify the compare.  For
285     /// example, 838 is VCMPGTSH.
286     VCMP_rec,
287 
288     /// CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This
289     /// corresponds to the COND_BRANCH pseudo instruction.  CRRC is the
290     /// condition register to branch on, OPC is the branch opcode to use (e.g.
291     /// PPC::BLE), DESTBB is the destination block to branch to, and INFLAG is
292     /// an optional input flag argument.
293     COND_BRANCH,
294 
295     /// CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based
296     /// loops.
297     BDNZ,
298     BDZ,
299 
300     /// F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding
301     /// towards zero.  Used only as part of the long double-to-int
302     /// conversion sequence.
303     FADDRTZ,
304 
305     /// F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
306     MFFS,
307 
308     /// TC_RETURN - A tail call return.
309     ///   operand #0 chain
310     ///   operand #1 callee (register or absolute)
311     ///   operand #2 stack adjustment
312     ///   operand #3 optional in flag
313     TC_RETURN,
314 
315     /// ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
316     CR6SET,
317     CR6UNSET,
318 
319     /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by initial-exec TLS
320     /// for non-position independent code on PPC32.
321     PPC32_GOT,
322 
323     /// GPRC = address of _GLOBAL_OFFSET_TABLE_. Used by general dynamic and
324     /// local dynamic TLS and position indendepent code on PPC32.
325     PPC32_PICGOT,
326 
327     /// G8RC = ADDIS_GOT_TPREL_HA %x2, Symbol - Used by the initial-exec
328     /// TLS model, produces an ADDIS8 instruction that adds the GOT
329     /// base to sym\@got\@tprel\@ha.
330     ADDIS_GOT_TPREL_HA,
331 
332     /// G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec
333     /// TLS model, produces a LD instruction with base register G8RReg
334     /// and offset sym\@got\@tprel\@l.  This completes the addition that
335     /// finds the offset of "sym" relative to the thread pointer.
336     LD_GOT_TPREL_L,
337 
338     /// G8RC = ADD_TLS G8RReg, Symbol - Can be used by the initial-exec
339     /// and local-exec TLS models, produces an ADD instruction that adds
340     /// the contents of G8RReg to the thread pointer.  Symbol contains a
341     /// relocation sym\@tls which is to be replaced by the thread pointer
342     /// and identifies to the linker that the instruction is part of a
343     /// TLS sequence.
344     ADD_TLS,
345 
346     /// G8RC = ADDIS_TLSGD_HA %x2, Symbol - For the general-dynamic TLS
347     /// model, produces an ADDIS8 instruction that adds the GOT base
348     /// register to sym\@got\@tlsgd\@ha.
349     ADDIS_TLSGD_HA,
350 
351     /// %x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
352     /// model, produces an ADDI8 instruction that adds G8RReg to
353     /// sym\@got\@tlsgd\@l and stores the result in X3.  Hidden by
354     /// ADDIS_TLSGD_L_ADDR until after register assignment.
355     ADDI_TLSGD_L,
356 
357     /// %x3 = GET_TLS_ADDR %x3, Symbol - For the general-dynamic TLS
358     /// model, produces a call to __tls_get_addr(sym\@tlsgd).  Hidden by
359     /// ADDIS_TLSGD_L_ADDR until after register assignment.
360     GET_TLS_ADDR,
361 
362     /// %x3 = GET_TPOINTER - Used for the local- and initial-exec TLS model on
363     /// 32-bit AIX, produces a call to .__get_tpointer to retrieve the thread
364     /// pointer. At the end of the call, the thread pointer is found in R3.
365     GET_TPOINTER,
366 
367     /// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
368     /// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
369     /// register assignment.
370     ADDI_TLSGD_L_ADDR,
371 
372     /// GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
373     /// G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY
374     /// Op that combines two register copies of TOC entries
375     /// (region handle into R3 and variable offset into R4) followed by a
376     /// GET_TLS_ADDR node which will be expanded to a call to .__tls_get_addr.
377     /// This node is used in 64-bit mode as well (in which case the result is
378     /// G8RC and inputs are X3/X4).
379     TLSGD_AIX,
380 
381     /// %x3 = GET_TLS_MOD_AIX _$TLSML - For the AIX local-dynamic TLS model,
382     /// produces a call to .__tls_get_mod(_$TLSML\@ml).
383     GET_TLS_MOD_AIX,
384 
385     /// [GP|G8]RC = TLSLD_AIX, TOC_ENTRY(module handle)
386     /// Op that requires a single input of the module handle TOC entry in R3,
387     /// and generates a GET_TLS_MOD_AIX node which will be expanded into a call
388     /// to .__tls_get_mod. This node is used in both 32-bit and 64-bit modes.
389     /// The only difference is the register class.
390     TLSLD_AIX,
391 
392     /// G8RC = ADDIS_TLSLD_HA %x2, Symbol - For the local-dynamic TLS
393     /// model, produces an ADDIS8 instruction that adds the GOT base
394     /// register to sym\@got\@tlsld\@ha.
395     ADDIS_TLSLD_HA,
396 
397     /// %x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
398     /// model, produces an ADDI8 instruction that adds G8RReg to
399     /// sym\@got\@tlsld\@l and stores the result in X3.  Hidden by
400     /// ADDIS_TLSLD_L_ADDR until after register assignment.
401     ADDI_TLSLD_L,
402 
403     /// %x3 = GET_TLSLD_ADDR %x3, Symbol - For the local-dynamic TLS
404     /// model, produces a call to __tls_get_addr(sym\@tlsld).  Hidden by
405     /// ADDIS_TLSLD_L_ADDR until after register assignment.
406     GET_TLSLD_ADDR,
407 
408     /// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
409     /// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
410     /// following register assignment.
411     ADDI_TLSLD_L_ADDR,
412 
413     /// G8RC = ADDIS_DTPREL_HA %x3, Symbol - For the local-dynamic TLS
414     /// model, produces an ADDIS8 instruction that adds X3 to
415     /// sym\@dtprel\@ha.
416     ADDIS_DTPREL_HA,
417 
418     /// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
419     /// model, produces an ADDI8 instruction that adds G8RReg to
420     /// sym\@got\@dtprel\@l.
421     ADDI_DTPREL_L,
422 
423     /// G8RC = PADDI_DTPREL %x3, Symbol - For the pc-rel based local-dynamic TLS
424     /// model, produces a PADDI8 instruction that adds X3 to sym\@dtprel.
425     PADDI_DTPREL,
426 
427     /// VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded
428     /// during instruction selection to optimize a BUILD_VECTOR into
429     /// operations on splats.  This is necessary to avoid losing these
430     /// optimizations due to constant folding.
431     VADD_SPLAT,
432 
433     /// CHAIN = SC CHAIN, Imm128 - System call.  The 7-bit unsigned
434     /// operand identifies the operating system entry point.
435     SC,
436 
437     /// CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
438     CLRBHRB,
439 
440     /// GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch
441     /// history rolling buffer entry.
442     MFBHRBE,
443 
444     /// CHAIN = RFEBB CHAIN, State - Return from event-based branch.
445     RFEBB,
446 
447     /// VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little
448     /// endian.  Maps to an xxswapd instruction that corrects an lxvd2x
449     /// or stxvd2x instruction.  The chain is necessary because the
450     /// sequence replaces a load and needs to provide the same number
451     /// of outputs.
452     XXSWAPD,
453 
454     /// An SDNode for swaps that are not associated with any loads/stores
455     /// and thereby have no chain.
456     SWAP_NO_CHAIN,
457 
458     /// FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or
459     /// lower (IDX=1) half of v4f32 to v2f64.
460     FP_EXTEND_HALF,
461 
462     /// MAT_PCREL_ADDR = Materialize a PC Relative address. This can be done
463     /// either through an add like PADDI or through a PC Relative load like
464     /// PLD.
465     MAT_PCREL_ADDR,
466 
467     /// TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for
468     /// TLS global address when using dynamic access models. This can be done
469     /// through an add like PADDI.
470     TLS_DYNAMIC_MAT_PCREL_ADDR,
471 
472     /// TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address
473     /// when using local exec access models, and when prefixed instructions are
474     /// available. This is used with ADD_TLS to produce an add like PADDI.
475     TLS_LOCAL_EXEC_MAT_ADDR,
476 
477     /// ACC_BUILD = Build an accumulator register from 4 VSX registers.
478     ACC_BUILD,
479 
480     /// PAIR_BUILD = Build a vector pair register from 2 VSX registers.
481     PAIR_BUILD,
482 
483     /// EXTRACT_VSX_REG = Extract one of the underlying vsx registers of
484     /// an accumulator or pair register. This node is needed because
485     /// EXTRACT_SUBVECTOR expects the input and output vectors to have the same
486     /// element type.
487     EXTRACT_VSX_REG,
488 
489     /// XXMFACC = This corresponds to the xxmfacc instruction.
490     XXMFACC,
491 
492     // Constrained conversion from floating point to int
493     FIRST_STRICTFP_OPCODE,
494     STRICT_FCTIDZ = FIRST_STRICTFP_OPCODE,
495     STRICT_FCTIWZ,
496     STRICT_FCTIDUZ,
497     STRICT_FCTIWUZ,
498 
499     /// Constrained integer-to-floating-point conversion instructions.
500     STRICT_FCFID,
501     STRICT_FCFIDU,
502     STRICT_FCFIDS,
503     STRICT_FCFIDUS,
504 
505     /// Constrained floating point add in round-to-zero mode.
506     STRICT_FADDRTZ,
507     LAST_STRICTFP_OPCODE = STRICT_FADDRTZ,
508 
509     /// SETBC - The ISA 3.1 (P10) SETBC instruction.
510     SETBC,
511 
512     /// SETBCR - The ISA 3.1 (P10) SETBCR instruction.
513     SETBCR,
514 
515     // NOTE: The nodes below may require PC-Rel specific patterns if the
516     // address could be PC-Relative. When adding new nodes below, consider
517     // whether or not the address can be PC-Relative and add the corresponding
518     // PC-relative patterns and tests.
519 
520     /// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
521     /// byte-swapping store instruction.  It byte-swaps the low "Type" bits of
522     /// the GPRC input, then stores it through Ptr.  Type can be either i16 or
523     /// i32.
524     FIRST_MEMORY_OPCODE,
525     STBRX = FIRST_MEMORY_OPCODE,
526 
527     /// GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a
528     /// byte-swapping load instruction.  It loads "Type" bits, byte swaps it,
529     /// then puts it in the bottom bits of the GPRC.  TYPE can be either i16
530     /// or i32.
531     LBRX,
532 
533     /// STFIWX - The STFIWX instruction.  The first operand is an input token
534     /// chain, then an f64 value to store, then an address to store it to.
535     STFIWX,
536 
537     /// GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point
538     /// load which sign-extends from a 32-bit integer value into the
539     /// destination 64-bit register.
540     LFIWAX,
541 
542     /// GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point
543     /// load which zero-extends from a 32-bit integer value into the
544     /// destination 64-bit register.
545     LFIWZX,
546 
547     /// GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an
548     /// integer smaller than 64 bits into a VSR. The integer is zero-extended.
549     /// This can be used for converting loaded integers to floating point.
550     LXSIZX,
551 
552     /// STXSIX - The STXSI[bh]X instruction. The first operand is an input
553     /// chain, then an f64 value to store, then an address to store it to,
554     /// followed by a byte-width for the store.
555     STXSIX,
556 
557     /// VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
558     /// Maps directly to an lxvd2x instruction that will be followed by
559     /// an xxswapd.
560     LXVD2X,
561 
562     /// LXVRZX - Load VSX Vector Rightmost and Zero Extend
563     /// This node represents v1i128 BUILD_VECTOR of a zero extending load
564     /// instruction from <byte, halfword, word, or doubleword> to i128.
565     /// Allows utilization of the Load VSX Vector Rightmost Instructions.
566     LXVRZX,
567 
568     /// VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
569     /// Maps directly to one of lxvd2x/lxvw4x/lxvh8x/lxvb16x depending on
570     /// the vector type to load vector in big-endian element order.
571     LOAD_VEC_BE,
572 
573     /// VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a
574     /// v2f32 value into the lower half of a VSR register.
575     LD_VSX_LH,
576 
577     /// VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory
578     /// instructions such as LXVDSX, LXVWSX.
579     LD_SPLAT,
580 
581     /// VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
582     /// that zero-extends.
583     ZEXT_LD_SPLAT,
584 
585     /// VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory
586     /// that sign-extends.
587     SEXT_LD_SPLAT,
588 
589     /// CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
590     /// Maps directly to an stxvd2x instruction that will be preceded by
591     /// an xxswapd.
592     STXVD2X,
593 
594     /// CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
595     /// Maps directly to one of stxvd2x/stxvw4x/stxvh8x/stxvb16x depending on
596     /// the vector type to store vector in big-endian element order.
597     STORE_VEC_BE,
598 
599     /// Store scalar integers from VSR.
600     ST_VSR_SCAL_INT,
601 
602     /// ATOMIC_CMP_SWAP - the exact same as the target-independent nodes
603     /// except they ensure that the compare input is zero-extended for
604     /// sub-word versions because the atomic loads zero-extend.
605     ATOMIC_CMP_SWAP_8,
606     ATOMIC_CMP_SWAP_16,
607 
608     /// CHAIN,Glue = STORE_COND CHAIN, GPR, Ptr
609     /// The store conditional instruction ST[BHWD]ARX that produces a glue
610     /// result to attach it to a conditional branch.
611     STORE_COND,
612 
613     /// GPRC = TOC_ENTRY GA, TOC
614     /// Loads the entry for GA from the TOC, where the TOC base is given by
615     /// the last operand.
616     TOC_ENTRY,
617     LAST_MEMORY_OPCODE = TOC_ENTRY,
618   };
619 
620   } // end namespace PPCISD
621 
622   /// Define some predicates that are used for node matching.
623   namespace PPC {
624 
625     /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
626     /// VPKUHUM instruction.
627     bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
628                               SelectionDAG &DAG);
629 
630     /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
631     /// VPKUWUM instruction.
632     bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
633                               SelectionDAG &DAG);
634 
635     /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
636     /// VPKUDUM instruction.
637     bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind,
638                               SelectionDAG &DAG);
639 
640     /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
641     /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes).
642     bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
643                             unsigned ShuffleKind, SelectionDAG &DAG);
644 
645     /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
646     /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes).
647     bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize,
648                             unsigned ShuffleKind, SelectionDAG &DAG);
649 
650     /// isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for
651     /// a VMRGEW or VMRGOW instruction
652     bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven,
653                              unsigned ShuffleKind, SelectionDAG &DAG);
654     /// isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable
655     /// for a XXSLDWI instruction.
656     bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
657                               bool &Swap, bool IsLE);
658 
659     /// isXXBRHShuffleMask - Return true if this is a shuffle mask suitable
660     /// for a XXBRH instruction.
661     bool isXXBRHShuffleMask(ShuffleVectorSDNode *N);
662 
663     /// isXXBRWShuffleMask - Return true if this is a shuffle mask suitable
664     /// for a XXBRW instruction.
665     bool isXXBRWShuffleMask(ShuffleVectorSDNode *N);
666 
667     /// isXXBRDShuffleMask - Return true if this is a shuffle mask suitable
668     /// for a XXBRD instruction.
669     bool isXXBRDShuffleMask(ShuffleVectorSDNode *N);
670 
671     /// isXXBRQShuffleMask - Return true if this is a shuffle mask suitable
672     /// for a XXBRQ instruction.
673     bool isXXBRQShuffleMask(ShuffleVectorSDNode *N);
674 
675     /// isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable
676     /// for a XXPERMDI instruction.
677     bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
678                               bool &Swap, bool IsLE);
679 
680     /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the
681     /// shift amount, otherwise return -1.
682     int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
683                             SelectionDAG &DAG);
684 
685     /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
686     /// specifies a splat of a single element that is suitable for input to
687     /// VSPLTB/VSPLTH/VSPLTW.
688     bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize);
689 
690     /// isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by
691     /// the XXINSERTW instruction introduced in ISA 3.0. This is essentially any
692     /// shuffle of v4f32/v4i32 vectors that just inserts one element from one
693     /// vector into the other. This function will also set a couple of
694     /// output parameters for how much the source vector needs to be shifted and
695     /// what byte number needs to be specified for the instruction to put the
696     /// element in the desired location of the target vector.
697     bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
698                          unsigned &InsertAtByte, bool &Swap, bool IsLE);
699 
700     /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
701     /// appropriate for PPC mnemonics (which have a big endian bias - namely
702     /// elements are counted from the left of the vector register).
703     unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
704                                         SelectionDAG &DAG);
705 
706     /// get_VSPLTI_elt - If this is a build_vector of constants which can be
707     /// formed by using a vspltis[bhw] instruction of the specified element
708     /// size, return the constant being splatted.  The ByteSize field indicates
709     /// the number of bytes of each element [124] -> [bhw].
710     SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG);
711 
712     // Flags for computing the optimal addressing mode for loads and stores.
713     enum MemOpFlags {
714       MOF_None = 0,
715 
716       // Extension mode for integer loads.
717       MOF_SExt = 1,
718       MOF_ZExt = 1 << 1,
719       MOF_NoExt = 1 << 2,
720 
721       // Address computation flags.
722       MOF_NotAddNorCst = 1 << 5,      // Not const. or sum of ptr and scalar.
723       MOF_RPlusSImm16 = 1 << 6,       // Reg plus signed 16-bit constant.
724       MOF_RPlusLo = 1 << 7,           // Reg plus signed 16-bit relocation
725       MOF_RPlusSImm16Mult4 = 1 << 8,  // Reg plus 16-bit signed multiple of 4.
726       MOF_RPlusSImm16Mult16 = 1 << 9, // Reg plus 16-bit signed multiple of 16.
727       MOF_RPlusSImm34 = 1 << 10,      // Reg plus 34-bit signed constant.
728       MOF_RPlusR = 1 << 11,           // Sum of two variables.
729       MOF_PCRel = 1 << 12,            // PC-Relative relocation.
730       MOF_AddrIsSImm32 = 1 << 13,     // A simple 32-bit constant.
731 
732       // The in-memory type.
733       MOF_SubWordInt = 1 << 15,
734       MOF_WordInt = 1 << 16,
735       MOF_DoubleWordInt = 1 << 17,
736       MOF_ScalarFloat = 1 << 18, // Scalar single or double precision.
737       MOF_Vector = 1 << 19,      // Vector types and quad precision scalars.
738       MOF_Vector256 = 1 << 20,
739 
740       // Subtarget features.
741       MOF_SubtargetBeforeP9 = 1 << 22,
742       MOF_SubtargetP9 = 1 << 23,
743       MOF_SubtargetP10 = 1 << 24,
744       MOF_SubtargetSPE = 1 << 25
745     };
746 
747     // The addressing modes for loads and stores.
748     enum AddrMode {
749       AM_None,
750       AM_DForm,
751       AM_DSForm,
752       AM_DQForm,
753       AM_PrefixDForm,
754       AM_XForm,
755       AM_PCRel
756     };
757   } // end namespace PPC
758 
759   class PPCTargetLowering : public TargetLowering {
760     const PPCSubtarget &Subtarget;
761 
762   public:
763     explicit PPCTargetLowering(const PPCTargetMachine &TM,
764                                const PPCSubtarget &STI);
765 
766     /// getTargetNodeName() - This method returns the name of a target specific
767     /// DAG node.
768     const char *getTargetNodeName(unsigned Opcode) const override;
769 
isSelectSupported(SelectSupportKind Kind)770     bool isSelectSupported(SelectSupportKind Kind) const override {
771       // PowerPC does not support scalar condition selects on vectors.
772       return (Kind != SelectSupportKind::ScalarCondVectorVal);
773     }
774 
775     /// getPreferredVectorAction - The code we generate when vector types are
776     /// legalized by promoting the integer element type is often much worse
777     /// than code we generate if we widen the type for applicable vector types.
778     /// The issue with promoting is that the vector is scalaraized, individual
779     /// elements promoted and then the vector is rebuilt. So say we load a pair
780     /// of v4i8's and shuffle them. This will turn into a mess of 8 extending
781     /// loads, moves back into VSR's (or memory ops if we don't have moves) and
782     /// then the VPERM for the shuffle. All in all a very slow sequence.
getPreferredVectorAction(MVT VT)783     TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT)
784       const override {
785       // Default handling for scalable and single-element vectors.
786       if (VT.isScalableVector() || VT.getVectorNumElements() == 1)
787         return TargetLoweringBase::getPreferredVectorAction(VT);
788 
789       // Split and promote vNi1 vectors so we don't produce v256i1/v512i1
790       // types as those are only for MMA instructions.
791       if (VT.getScalarSizeInBits() == 1 && VT.getSizeInBits() > 16)
792         return TypeSplitVector;
793       if (VT.getScalarSizeInBits() == 1)
794         return TypePromoteInteger;
795 
796       // Widen vectors that have reasonably sized elements.
797       if (VT.getScalarSizeInBits() % 8 == 0)
798         return TypeWidenVector;
799       return TargetLoweringBase::getPreferredVectorAction(VT);
800     }
801 
802     bool useSoftFloat() const override;
803 
804     bool hasSPE() const;
805 
getScalarShiftAmountTy(const DataLayout &,EVT)806     MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override {
807       return MVT::i32;
808     }
809 
isCheapToSpeculateCttz(Type * Ty)810     bool isCheapToSpeculateCttz(Type *Ty) const override {
811       return true;
812     }
813 
isCheapToSpeculateCtlz(Type * Ty)814     bool isCheapToSpeculateCtlz(Type *Ty) const override {
815       return true;
816     }
817 
818     bool
819     shallExtractConstSplatVectorElementToStore(Type *VectorTy,
820                                                unsigned ElemSizeInBits,
821                                                unsigned &Index) const override;
822 
isCtlzFast()823     bool isCtlzFast() const override {
824       return true;
825     }
826 
isEqualityCmpFoldedWithSignedCmp()827     bool isEqualityCmpFoldedWithSignedCmp() const override {
828       return false;
829     }
830 
hasAndNotCompare(SDValue)831     bool hasAndNotCompare(SDValue) const override {
832       return true;
833     }
834 
835     bool preferIncOfAddToSubOfNot(EVT VT) const override;
836 
convertSetCCLogicToBitwiseLogic(EVT VT)837     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
838       return VT.isScalarInteger();
839     }
840 
841     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
842                                  bool OptForSize, NegatibleCost &Cost,
843                                  unsigned Depth = 0) const override;
844 
845     /// getSetCCResultType - Return the ISD::SETCC ValueType
846     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
847                            EVT VT) const override;
848 
849     /// Return true if target always benefits from combining into FMA for a
850     /// given value type. This must typically return false on targets where FMA
851     /// takes more cycles to execute than FADD.
852     bool enableAggressiveFMAFusion(EVT VT) const override;
853 
854     /// getPreIndexedAddressParts - returns true by value, base pointer and
855     /// offset pointer and addressing mode by reference if the node's address
856     /// can be legally represented as pre-indexed load / store address.
857     bool getPreIndexedAddressParts(SDNode *N, SDValue &Base,
858                                    SDValue &Offset,
859                                    ISD::MemIndexedMode &AM,
860                                    SelectionDAG &DAG) const override;
861 
862     /// SelectAddressEVXRegReg - Given the specified addressed, check to see if
863     /// it can be more efficiently represented as [r+imm].
864     bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index,
865                                 SelectionDAG &DAG) const;
866 
867     /// SelectAddressRegReg - Given the specified addressed, check to see if it
868     /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment
869     /// is non-zero, only accept displacement which is not suitable for [r+imm].
870     /// Returns false if it can be represented by [r+imm], which are preferred.
871     bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index,
872                              SelectionDAG &DAG,
873                              MaybeAlign EncodingAlignment = std::nullopt) const;
874 
875     /// SelectAddressRegImm - Returns true if the address N can be represented
876     /// by a base register plus a signed 16-bit displacement [r+imm], and if it
877     /// is not better represented as reg+reg. If \p EncodingAlignment is
878     /// non-zero, only accept displacements suitable for instruction encoding
879     /// requirement, i.e. multiples of 4 for DS form.
880     bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base,
881                              SelectionDAG &DAG,
882                              MaybeAlign EncodingAlignment) const;
883     bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base,
884                                SelectionDAG &DAG) const;
885 
886     /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
887     /// represented as an indexed [r+r] operation.
888     bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index,
889                                  SelectionDAG &DAG) const;
890 
891     /// SelectAddressPCRel - Represent the specified address as pc relative to
892     /// be represented as [pc+imm]
893     bool SelectAddressPCRel(SDValue N, SDValue &Base) const;
894 
895     Sched::Preference getSchedulingPreference(SDNode *N) const override;
896 
897     /// LowerOperation - Provide custom lowering hooks for some operations.
898     ///
899     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
900 
901     /// ReplaceNodeResults - Replace the results of node with an illegal result
902     /// type with new values built out of custom code.
903     ///
904     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
905                             SelectionDAG &DAG) const override;
906 
907     SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const;
908     SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const;
909 
910     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
911 
912     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
913                           SmallVectorImpl<SDNode *> &Created) const override;
914 
915     Register getRegisterByName(const char* RegName, LLT VT,
916                                const MachineFunction &MF) const override;
917 
918     void computeKnownBitsForTargetNode(const SDValue Op,
919                                        KnownBits &Known,
920                                        const APInt &DemandedElts,
921                                        const SelectionDAG &DAG,
922                                        unsigned Depth = 0) const override;
923 
924     Align getPrefLoopAlignment(MachineLoop *ML) const override;
925 
shouldInsertFencesForAtomic(const Instruction * I)926     bool shouldInsertFencesForAtomic(const Instruction *I) const override {
927       return true;
928     }
929 
930     Value *emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr,
931                           AtomicOrdering Ord) const override;
932 
933     Value *emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr,
934                                 AtomicOrdering Ord) const override;
935 
936     Instruction *emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst,
937                                   AtomicOrdering Ord) const override;
938     Instruction *emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst,
939                                    AtomicOrdering Ord) const override;
940 
941     bool shouldInlineQuadwordAtomics() const;
942 
943     TargetLowering::AtomicExpansionKind
944     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
945 
946     TargetLowering::AtomicExpansionKind
947     shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
948 
949     Value *emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder,
950                                         AtomicRMWInst *AI, Value *AlignedAddr,
951                                         Value *Incr, Value *Mask,
952                                         Value *ShiftAmt,
953                                         AtomicOrdering Ord) const override;
954     Value *emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder,
955                                             AtomicCmpXchgInst *CI,
956                                             Value *AlignedAddr, Value *CmpVal,
957                                             Value *NewVal, Value *Mask,
958                                             AtomicOrdering Ord) const override;
959 
960     MachineBasicBlock *
961     EmitInstrWithCustomInserter(MachineInstr &MI,
962                                 MachineBasicBlock *MBB) const override;
963     MachineBasicBlock *EmitAtomicBinary(MachineInstr &MI,
964                                         MachineBasicBlock *MBB,
965                                         unsigned AtomicSize,
966                                         unsigned BinOpcode,
967                                         unsigned CmpOpcode = 0,
968                                         unsigned CmpPred = 0) const;
969     MachineBasicBlock *EmitPartwordAtomicBinary(MachineInstr &MI,
970                                                 MachineBasicBlock *MBB,
971                                                 bool is8bit,
972                                                 unsigned Opcode,
973                                                 unsigned CmpOpcode = 0,
974                                                 unsigned CmpPred = 0) const;
975 
976     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
977                                         MachineBasicBlock *MBB) const;
978 
979     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
980                                          MachineBasicBlock *MBB) const;
981 
982     MachineBasicBlock *emitProbedAlloca(MachineInstr &MI,
983                                         MachineBasicBlock *MBB) const;
984 
985     bool hasInlineStackProbe(const MachineFunction &MF) const override;
986 
987     unsigned getStackProbeSize(const MachineFunction &MF) const;
988 
989     ConstraintType getConstraintType(StringRef Constraint) const override;
990 
991     /// Examine constraint string and operand type and determine a weight value.
992     /// The operand object must already have been set up with the operand type.
993     ConstraintWeight getSingleConstraintMatchWeight(
994       AsmOperandInfo &info, const char *constraint) const override;
995 
996     std::pair<unsigned, const TargetRegisterClass *>
997     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
998                                  StringRef Constraint, MVT VT) const override;
999 
1000     /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1001     /// function arguments in the caller parameter area.
1002     Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1003 
1004     /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
1005     /// vector.  If it is invalid, don't add anything to Ops.
1006     void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1007                                       std::vector<SDValue> &Ops,
1008                                       SelectionDAG &DAG) const override;
1009 
1010     InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode)1011     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1012       if (ConstraintCode == "es")
1013         return InlineAsm::ConstraintCode::es;
1014       else if (ConstraintCode == "Q")
1015         return InlineAsm::ConstraintCode::Q;
1016       else if (ConstraintCode == "Z")
1017         return InlineAsm::ConstraintCode::Z;
1018       else if (ConstraintCode == "Zy")
1019         return InlineAsm::ConstraintCode::Zy;
1020       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1021     }
1022 
1023     void CollectTargetIntrinsicOperands(const CallInst &I,
1024                                  SmallVectorImpl<SDValue> &Ops,
1025                                  SelectionDAG &DAG) const override;
1026 
1027     /// isLegalAddressingMode - Return true if the addressing mode represented
1028     /// by AM is legal for this target, for a load/store of the specified type.
1029     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1030                                Type *Ty, unsigned AS,
1031                                Instruction *I = nullptr) const override;
1032 
1033     /// isLegalICmpImmediate - Return true if the specified immediate is legal
1034     /// icmp immediate, that is the target has icmp instructions which can
1035     /// compare a register against the immediate without having to materialize
1036     /// the immediate into a register.
1037     bool isLegalICmpImmediate(int64_t Imm) const override;
1038 
1039     /// isLegalAddImmediate - Return true if the specified immediate is legal
1040     /// add immediate, that is the target has add instructions which can
1041     /// add a register and the immediate without having to materialize
1042     /// the immediate into a register.
1043     bool isLegalAddImmediate(int64_t Imm) const override;
1044 
1045     /// isTruncateFree - Return true if it's free to truncate a value of
1046     /// type Ty1 to type Ty2. e.g. On PPC it's free to truncate a i64 value in
1047     /// register X1 to i32 by referencing its sub-register R1.
1048     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1049     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1050 
1051     bool isZExtFree(SDValue Val, EVT VT2) const override;
1052 
1053     bool isFPExtFree(EVT DestVT, EVT SrcVT) const override;
1054 
1055     /// Returns true if it is beneficial to convert a load of a constant
1056     /// to just the constant itself.
1057     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1058                                            Type *Ty) const override;
1059 
convertSelectOfConstantsToMath(EVT VT)1060     bool convertSelectOfConstantsToMath(EVT VT) const override {
1061       return true;
1062     }
1063 
1064     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1065                                 SDValue C) const override;
1066 
isDesirableToTransformToIntegerOp(unsigned Opc,EVT VT)1067     bool isDesirableToTransformToIntegerOp(unsigned Opc,
1068                                            EVT VT) const override {
1069       // Only handle float load/store pair because float(fpr) load/store
1070       // instruction has more cycles than integer(gpr) load/store in PPC.
1071       if (Opc != ISD::LOAD && Opc != ISD::STORE)
1072         return false;
1073       if (VT != MVT::f32 && VT != MVT::f64)
1074         return false;
1075 
1076       return true;
1077     }
1078 
1079     // Returns true if the address of the global is stored in TOC entry.
1080     bool isAccessedAsGotIndirect(SDValue N) const;
1081 
1082     bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override;
1083 
1084     bool getTgtMemIntrinsic(IntrinsicInfo &Info,
1085                             const CallInst &I,
1086                             MachineFunction &MF,
1087                             unsigned Intrinsic) const override;
1088 
1089     /// It returns EVT::Other if the type should be determined using generic
1090     /// target-independent logic.
1091     EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
1092                             const AttributeList &FuncAttributes) const override;
1093 
1094     /// Is unaligned memory access allowed for the given type, and is it fast
1095     /// relative to software emulation.
1096     bool allowsMisalignedMemoryAccesses(
1097         EVT VT, unsigned AddrSpace, Align Alignment = Align(1),
1098         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1099         unsigned *Fast = nullptr) const override;
1100 
1101     /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster
1102     /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be
1103     /// expanded to FMAs when this method returns true, otherwise fmuladd is
1104     /// expanded to fmul + fadd.
1105     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1106                                     EVT VT) const override;
1107 
1108     bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override;
1109 
1110     /// isProfitableToHoist - Check if it is profitable to hoist instruction
1111     /// \p I to its dominator block.
1112     /// For example, it is not profitable if \p I and it's only user can form a
1113     /// FMA instruction, because Powerpc prefers FMADD.
1114     bool isProfitableToHoist(Instruction *I) const override;
1115 
1116     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1117 
1118     // Should we expand the build vector with shuffles?
1119     bool
1120     shouldExpandBuildVectorWithShuffles(EVT VT,
1121                                         unsigned DefinedValues) const override;
1122 
1123     // Keep the zero-extensions for arguments to libcalls.
shouldKeepZExtForFP16Conv()1124     bool shouldKeepZExtForFP16Conv() const override { return true; }
1125 
1126     /// createFastISel - This method returns a target-specific FastISel object,
1127     /// or null if the target does not support "fast" instruction selection.
1128     FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
1129                              const TargetLibraryInfo *LibInfo) const override;
1130 
1131     /// Returns true if an argument of type Ty needs to be passed in a
1132     /// contiguous block of registers in calling convention CallConv.
functionArgumentNeedsConsecutiveRegisters(Type * Ty,CallingConv::ID CallConv,bool isVarArg,const DataLayout & DL)1133     bool functionArgumentNeedsConsecutiveRegisters(
1134         Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1135         const DataLayout &DL) const override {
1136       // We support any array type as "consecutive" block in the parameter
1137       // save area.  The element type defines the alignment requirement and
1138       // whether the argument should go in GPRs, FPRs, or VRs if available.
1139       //
1140       // Note that clang uses this capability both to implement the ELFv2
1141       // homogeneous float/vector aggregate ABI, and to avoid having to use
1142       // "byval" when passing aggregates that might fully fit in registers.
1143       return Ty->isArrayTy();
1144     }
1145 
1146     /// If a physical register, this returns the register that receives the
1147     /// exception address on entry to an EH pad.
1148     Register
1149     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1150 
1151     /// If a physical register, this returns the register that receives the
1152     /// exception typeid on entry to a landing pad.
1153     Register
1154     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1155 
1156     /// Override to support customized stack guard loading.
1157     bool useLoadStackGuardNode(const Module &M) const override;
1158     void insertSSPDeclarations(Module &M) const override;
1159     Value *getSDagStackGuard(const Module &M) const override;
1160 
1161     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1162                       bool ForCodeSize) const override;
1163 
1164     unsigned getJumpTableEncoding() const override;
1165     bool isJumpTableRelative() const override;
1166     SDValue getPICJumpTableRelocBase(SDValue Table,
1167                                      SelectionDAG &DAG) const override;
1168     const MCExpr *getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1169                                                unsigned JTI,
1170                                                MCContext &Ctx) const override;
1171 
1172     /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
1173     /// compute the address flags of the node, get the optimal address mode
1174     /// based on the flags, and set the Base and Disp based on the address mode.
1175     PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N,
1176                                         SDValue &Disp, SDValue &Base,
1177                                         SelectionDAG &DAG,
1178                                         MaybeAlign Align) const;
1179     /// SelectForceXFormMode - Given the specified address, force it to be
1180     /// represented as an indexed [r+r] operation (an XForm instruction).
1181     PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base,
1182                                        SelectionDAG &DAG) const;
1183 
1184     bool splitValueIntoRegisterParts(
1185         SelectionDAG & DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
1186         unsigned NumParts, MVT PartVT, std::optional<CallingConv::ID> CC)
1187         const override;
1188     /// Structure that collects some common arguments that get passed around
1189     /// between the functions for call lowering.
1190     struct CallFlags {
1191       const CallingConv::ID CallConv;
1192       const bool IsTailCall : 1;
1193       const bool IsVarArg : 1;
1194       const bool IsPatchPoint : 1;
1195       const bool IsIndirect : 1;
1196       const bool HasNest : 1;
1197       const bool NoMerge : 1;
1198 
CallFlagsCallFlags1199       CallFlags(CallingConv::ID CC, bool IsTailCall, bool IsVarArg,
1200                 bool IsPatchPoint, bool IsIndirect, bool HasNest, bool NoMerge)
1201           : CallConv(CC), IsTailCall(IsTailCall), IsVarArg(IsVarArg),
1202             IsPatchPoint(IsPatchPoint), IsIndirect(IsIndirect),
1203             HasNest(HasNest), NoMerge(NoMerge) {}
1204     };
1205 
1206     CCAssignFn *ccAssignFnForCall(CallingConv::ID CC, bool Return,
1207                                   bool IsVarArg) const;
1208     bool supportsTailCallFor(const CallBase *CB) const;
1209 
1210   private:
1211     struct ReuseLoadInfo {
1212       SDValue Ptr;
1213       SDValue Chain;
1214       SDValue ResChain;
1215       MachinePointerInfo MPI;
1216       bool IsDereferenceable = false;
1217       bool IsInvariant = false;
1218       Align Alignment;
1219       AAMDNodes AAInfo;
1220       const MDNode *Ranges = nullptr;
1221 
1222       ReuseLoadInfo() = default;
1223 
MMOFlagsReuseLoadInfo1224       MachineMemOperand::Flags MMOFlags() const {
1225         MachineMemOperand::Flags F = MachineMemOperand::MONone;
1226         if (IsDereferenceable)
1227           F |= MachineMemOperand::MODereferenceable;
1228         if (IsInvariant)
1229           F |= MachineMemOperand::MOInvariant;
1230         return F;
1231       }
1232     };
1233 
1234     // Map that relates a set of common address flags to PPC addressing modes.
1235     std::map<PPC::AddrMode, SmallVector<unsigned, 16>> AddrModesMap;
1236     void initializeAddrModeMap();
1237 
1238     bool canReuseLoadAddress(SDValue Op, EVT MemVT, ReuseLoadInfo &RLI,
1239                              SelectionDAG &DAG,
1240                              ISD::LoadExtType ET = ISD::NON_EXTLOAD) const;
1241 
1242     void LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
1243                                 SelectionDAG &DAG, const SDLoc &dl) const;
1244     SDValue LowerFP_TO_INTDirectMove(SDValue Op, SelectionDAG &DAG,
1245                                      const SDLoc &dl) const;
1246 
1247     bool directMoveIsProfitable(const SDValue &Op) const;
1248     SDValue LowerINT_TO_FPDirectMove(SDValue Op, SelectionDAG &DAG,
1249                                      const SDLoc &dl) const;
1250 
1251     SDValue LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
1252                                  const SDLoc &dl) const;
1253 
1254     SDValue LowerTRUNCATEVector(SDValue Op, SelectionDAG &DAG) const;
1255 
1256     SDValue getFramePointerFrameIndex(SelectionDAG & DAG) const;
1257     SDValue getReturnAddrFrameIndex(SelectionDAG & DAG) const;
1258 
1259     bool IsEligibleForTailCallOptimization(
1260         const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
1261         CallingConv::ID CallerCC, bool isVarArg,
1262         const SmallVectorImpl<ISD::InputArg> &Ins) const;
1263 
1264     bool IsEligibleForTailCallOptimization_64SVR4(
1265         const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
1266         CallingConv::ID CallerCC, const CallBase *CB, bool isVarArg,
1267         const SmallVectorImpl<ISD::OutputArg> &Outs,
1268         const SmallVectorImpl<ISD::InputArg> &Ins, const Function *CallerFunc,
1269         bool isCalleeExternalSymbol) const;
1270 
1271     bool isEligibleForTCO(const GlobalValue *CalleeGV, CallingConv::ID CalleeCC,
1272                           CallingConv::ID CallerCC, const CallBase *CB,
1273                           bool isVarArg,
1274                           const SmallVectorImpl<ISD::OutputArg> &Outs,
1275                           const SmallVectorImpl<ISD::InputArg> &Ins,
1276                           const Function *CallerFunc,
1277                           bool isCalleeExternalSymbol) const;
1278 
1279     SDValue EmitTailCallLoadFPAndRetAddr(SelectionDAG &DAG, int SPDiff,
1280                                          SDValue Chain, SDValue &LROpOut,
1281                                          SDValue &FPOpOut,
1282                                          const SDLoc &dl) const;
1283 
1284     SDValue getTOCEntry(SelectionDAG &DAG, const SDLoc &dl, SDValue GA) const;
1285 
1286     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1287     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1288     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1289     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1290     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1291     SDValue LowerGlobalTLSAddressAIX(SDValue Op, SelectionDAG &DAG) const;
1292     SDValue LowerGlobalTLSAddressLinux(SDValue Op, SelectionDAG &DAG) const;
1293     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1294     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1295     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1296     SDValue LowerSSUBO(SDValue Op, SelectionDAG &DAG) const;
1297     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1298     SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1299     SDValue LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const;
1300     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1301     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1302     SDValue LowerVACOPY(SDValue Op, SelectionDAG &DAG) const;
1303     SDValue LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
1304     SDValue LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1305     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1306     SDValue LowerEH_DWARF_CFA(SDValue Op, SelectionDAG &DAG) const;
1307     SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
1308     SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
1309     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1310     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
1311     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
1312                            const SDLoc &dl) const;
1313     SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1314     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1315     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1316     SDValue LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const;
1317     SDValue LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const;
1318     SDValue LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const;
1319     SDValue LowerFunnelShift(SDValue Op, SelectionDAG &DAG) const;
1320     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerVPERM(SDValue Op, SelectionDAG &DAG, ArrayRef<int> PermMask,
1323                        EVT VT, SDValue V1, SDValue V2) const;
1324     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1325     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1326     SDValue LowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const;
1327     SDValue LowerBSWAP(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerATOMIC_CMP_SWAP(SDValue Op, SelectionDAG &DAG) const;
1329     SDValue LowerIS_FPCLASS(SDValue Op, SelectionDAG &DAG) const;
1330     SDValue LowerADDSUBO_CARRY(SDValue Op, SelectionDAG &DAG) const;
1331     SDValue LowerADDSUBO(SDValue Op, SelectionDAG &DAG) const;
1332     SDValue lowerToLibCall(const char *LibCallName, SDValue Op,
1333                            SelectionDAG &DAG) const;
1334     SDValue lowerLibCallBasedOnType(const char *LibCallFloatName,
1335                                     const char *LibCallDoubleName, SDValue Op,
1336                                     SelectionDAG &DAG) const;
1337     bool isLowringToMASSFiniteSafe(SDValue Op) const;
1338     bool isLowringToMASSSafe(SDValue Op) const;
1339     bool isScalarMASSConversionEnabled() const;
1340     SDValue lowerLibCallBase(const char *LibCallDoubleName,
1341                              const char *LibCallFloatName,
1342                              const char *LibCallDoubleNameFinite,
1343                              const char *LibCallFloatNameFinite, SDValue Op,
1344                              SelectionDAG &DAG) const;
1345     SDValue lowerPow(SDValue Op, SelectionDAG &DAG) const;
1346     SDValue lowerSin(SDValue Op, SelectionDAG &DAG) const;
1347     SDValue lowerCos(SDValue Op, SelectionDAG &DAG) const;
1348     SDValue lowerLog(SDValue Op, SelectionDAG &DAG) const;
1349     SDValue lowerLog10(SDValue Op, SelectionDAG &DAG) const;
1350     SDValue lowerExp(SDValue Op, SelectionDAG &DAG) const;
1351     SDValue LowerATOMIC_LOAD_STORE(SDValue Op, SelectionDAG &DAG) const;
1352     SDValue LowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1353     SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) const;
1354     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1355     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1356     SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
1357 
1358     SDValue LowerVectorLoad(SDValue Op, SelectionDAG &DAG) const;
1359     SDValue LowerVectorStore(SDValue Op, SelectionDAG &DAG) const;
1360     SDValue LowerDMFVectorLoad(SDValue Op, SelectionDAG &DAG) const;
1361     SDValue LowerDMFVectorStore(SDValue Op, SelectionDAG &DAG) const;
1362 
1363     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1364                             CallingConv::ID CallConv, bool isVarArg,
1365                             const SmallVectorImpl<ISD::InputArg> &Ins,
1366                             const SDLoc &dl, SelectionDAG &DAG,
1367                             SmallVectorImpl<SDValue> &InVals) const;
1368 
1369     SDValue FinishCall(CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
1370                        SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
1371                        SDValue InGlue, SDValue Chain, SDValue CallSeqStart,
1372                        SDValue &Callee, int SPDiff, unsigned NumBytes,
1373                        const SmallVectorImpl<ISD::InputArg> &Ins,
1374                        SmallVectorImpl<SDValue> &InVals,
1375                        const CallBase *CB) const;
1376 
1377     SDValue
1378     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1379                          const SmallVectorImpl<ISD::InputArg> &Ins,
1380                          const SDLoc &dl, SelectionDAG &DAG,
1381                          SmallVectorImpl<SDValue> &InVals) const override;
1382 
1383     SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
1384                       SmallVectorImpl<SDValue> &InVals) const override;
1385 
1386     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1387                         bool isVarArg,
1388                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1389                         LLVMContext &Context, const Type *RetTy) const override;
1390 
1391     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1392                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1393                         const SmallVectorImpl<SDValue> &OutVals,
1394                         const SDLoc &dl, SelectionDAG &DAG) const override;
1395 
1396     SDValue extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT,
1397                               SelectionDAG &DAG, SDValue ArgVal,
1398                               const SDLoc &dl) const;
1399 
1400     SDValue LowerFormalArguments_AIX(
1401         SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1402         const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1403         SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1404     SDValue LowerFormalArguments_64SVR4(
1405         SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1406         const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1407         SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1408     SDValue LowerFormalArguments_32SVR4(
1409         SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1410         const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1411         SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const;
1412 
1413     SDValue createMemcpyOutsideCallSeq(SDValue Arg, SDValue PtrOff,
1414                                        SDValue CallSeqStart,
1415                                        ISD::ArgFlagsTy Flags, SelectionDAG &DAG,
1416                                        const SDLoc &dl) const;
1417 
1418     SDValue LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
1419                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1420                              const SmallVectorImpl<SDValue> &OutVals,
1421                              const SmallVectorImpl<ISD::InputArg> &Ins,
1422                              const SDLoc &dl, SelectionDAG &DAG,
1423                              SmallVectorImpl<SDValue> &InVals,
1424                              const CallBase *CB) const;
1425     SDValue LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallFlags CFlags,
1426                              const SmallVectorImpl<ISD::OutputArg> &Outs,
1427                              const SmallVectorImpl<SDValue> &OutVals,
1428                              const SmallVectorImpl<ISD::InputArg> &Ins,
1429                              const SDLoc &dl, SelectionDAG &DAG,
1430                              SmallVectorImpl<SDValue> &InVals,
1431                              const CallBase *CB) const;
1432     SDValue LowerCall_AIX(SDValue Chain, SDValue Callee, CallFlags CFlags,
1433                           const SmallVectorImpl<ISD::OutputArg> &Outs,
1434                           const SmallVectorImpl<SDValue> &OutVals,
1435                           const SmallVectorImpl<ISD::InputArg> &Ins,
1436                           const SDLoc &dl, SelectionDAG &DAG,
1437                           SmallVectorImpl<SDValue> &InVals,
1438                           const CallBase *CB) const;
1439 
1440     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1441     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1442     SDValue LowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
1443 
1444     SDValue DAGCombineExtBoolTrunc(SDNode *N, DAGCombinerInfo &DCI) const;
1445     SDValue DAGCombineBuildVector(SDNode *N, DAGCombinerInfo &DCI) const;
1446     SDValue DAGCombineTruncBoolExt(SDNode *N, DAGCombinerInfo &DCI) const;
1447     SDValue combineStoreFPToInt(SDNode *N, DAGCombinerInfo &DCI) const;
1448     SDValue combineFPToIntToFP(SDNode *N, DAGCombinerInfo &DCI) const;
1449     SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
1450     SDValue combineVectorShift(SDNode *N, DAGCombinerInfo &DCI) const;
1451     SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
1452     SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
1453     SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
1454     SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
1455     SDValue combineFMALike(SDNode *N, DAGCombinerInfo &DCI) const;
1456     SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
1457     SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
1458     SDValue combineVectorShuffle(ShuffleVectorSDNode *SVN,
1459                                  SelectionDAG &DAG) const;
1460     SDValue combineVReverseMemOP(ShuffleVectorSDNode *SVN, LSBaseSDNode *LSBase,
1461                                  DAGCombinerInfo &DCI) const;
1462 
1463     /// ConvertSETCCToSubtract - looks at SETCC that compares ints. It replaces
1464     /// SETCC with integer subtraction when (1) there is a legal way of doing it
1465     /// (2) keeping the result of comparison in GPR has performance benefit.
1466     SDValue ConvertSETCCToSubtract(SDNode *N, DAGCombinerInfo &DCI) const;
1467 
1468     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1469                             int &RefinementSteps, bool &UseOneConstNR,
1470                             bool Reciprocal) const override;
1471     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1472                              int &RefinementSteps) const override;
1473     SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG,
1474                              const DenormalMode &Mode) const override;
1475     SDValue getSqrtResultForDenormInput(SDValue Operand,
1476                                         SelectionDAG &DAG) const override;
1477     unsigned combineRepeatedFPDivisors() const override;
1478 
1479     SDValue
1480     combineElementTruncationToVectorTruncation(SDNode *N,
1481                                                DAGCombinerInfo &DCI) const;
1482 
1483     /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be
1484     /// handled by the VINSERTH instruction introduced in ISA 3.0. This is
1485     /// essentially any shuffle of v8i16 vectors that just inserts one element
1486     /// from one vector into the other.
1487     SDValue lowerToVINSERTH(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1488 
1489     /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be
1490     /// handled by the VINSERTB instruction introduced in ISA 3.0. This is
1491     /// essentially v16i8 vector version of VINSERTH.
1492     SDValue lowerToVINSERTB(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1493 
1494     /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
1495     /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1.
1496     SDValue lowerToXXSPLTI32DX(ShuffleVectorSDNode *N, SelectionDAG &DAG) const;
1497 
1498     // Return whether the call instruction can potentially be optimized to a
1499     // tail call. This will cause the optimizers to attempt to move, or
1500     // duplicate return instructions to help enable tail call optimizations.
1501     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1502     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1503 
1504     /// getAddrModeForFlags - Based on the set of address flags, select the most
1505     /// optimal instruction format to match by.
1506     PPC::AddrMode getAddrModeForFlags(unsigned Flags) const;
1507 
1508     /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
1509     /// the address flags of the load/store instruction that is to be matched.
1510     /// The address flags are stored in a map, which is then searched
1511     /// through to determine the optimal load/store instruction format.
1512     unsigned computeMOFlags(const SDNode *Parent, SDValue N,
1513                             SelectionDAG &DAG) const;
1514   }; // end class PPCTargetLowering
1515 
1516   namespace PPC {
1517 
1518     FastISel *createFastISel(FunctionLoweringInfo &FuncInfo,
1519                              const TargetLibraryInfo *LibInfo);
1520 
1521   } // end namespace PPC
1522 
1523   bool isIntS16Immediate(SDNode *N, int16_t &Imm);
1524   bool isIntS16Immediate(SDValue Op, int16_t &Imm);
1525   bool isIntS34Immediate(SDNode *N, int64_t &Imm);
1526   bool isIntS34Immediate(SDValue Op, int64_t &Imm);
1527 
1528   bool convertToNonDenormSingle(APInt &ArgAPInt);
1529   bool convertToNonDenormSingle(APFloat &ArgAPFloat);
1530   bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat);
1531 
1532 } // end namespace llvm
1533 
1534 #endif // LLVM_LIB_TARGET_POWERPC_PPCISELLOWERING_H
1535