xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// The same as ISD::CopyFromReg except that this node makes it explicit
85     /// that it may lower to an x87 FPU stack pop. Optimizations should be more
86     /// cautious when handling this node than a normal CopyFromReg to avoid
87     /// removing a required FPU stack pop. A key requirement is optimizations
88     /// should not optimize any users of a chain that contains a
89     /// POP_FROM_X87_REG to use a chain from a point earlier than the
90     /// POP_FROM_X87_REG (which may remove a required FPU stack pop).
91     POP_FROM_X87_REG,
92 
93     // Pseudo for a call to an imported function to ensure the correct machine
94     // instruction is emitted for Import Call Optimization.
95     IMP_CALL,
96 
97     /// X86 compare and logical compare instructions.
98     CMP,
99     FCMP,
100     COMI,
101     UCOMI,
102 
103     // X86 compare with Intrinsics similar to COMI.
104     COMX,
105     UCOMX,
106 
107     /// X86 bit-test instructions.
108     BT,
109 
110     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
111     /// operand, usually produced by a CMP instruction.
112     SETCC,
113 
114     /// X86 Select
115     SELECTS,
116 
117     // Same as SETCC except it's materialized with a sbb and the value is all
118     // one's or all zero's.
119     SETCC_CARRY, // R = carry_bit ? ~0 : 0
120 
121     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
122     /// Operands are two FP values to compare; result is a mask of
123     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
124     FSETCC,
125 
126     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
127     /// and a version with SAE.
128     FSETCCM,
129     FSETCCM_SAE,
130 
131     /// X86 conditional moves. Operand 0 and operand 1 are the two values
132     /// to select from. Operand 2 is the condition code, and operand 3 is the
133     /// flag operand produced by a CMP or TEST instruction.
134     CMOV,
135 
136     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
137     /// is the block to branch if condition is true, operand 2 is the
138     /// condition code, and operand 3 is the flag operand produced by a CMP
139     /// or TEST instruction.
140     BRCOND,
141 
142     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
143     /// operand 1 is the target address.
144     NT_BRIND,
145 
146     /// Return with a glue operand. Operand 0 is the chain operand, operand
147     /// 1 is the number of bytes of stack to pop.
148     RET_GLUE,
149 
150     /// Return from interrupt. Operand 0 is the number of bytes to pop.
151     IRET,
152 
153     /// Repeat fill, corresponds to X86::REP_STOSx.
154     REP_STOS,
155 
156     /// Repeat move, corresponds to X86::REP_MOVSx.
157     REP_MOVS,
158 
159     /// On Darwin, this node represents the result of the popl
160     /// at function entry, used for PIC code.
161     GlobalBaseReg,
162 
163     /// A wrapper node for TargetConstantPool, TargetJumpTable,
164     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
165     /// MCSymbol and TargetBlockAddress.
166     Wrapper,
167 
168     /// Special wrapper used under X86-64 PIC mode for RIP
169     /// relative displacements.
170     WrapperRIP,
171 
172     /// Copies a 64-bit value from an MMX vector to the low word
173     /// of an XMM vector, with the high word zero filled.
174     MOVQ2DQ,
175 
176     /// Copies a 64-bit value from the low word of an XMM vector
177     /// to an MMX vector.
178     MOVDQ2Q,
179 
180     /// Copies a 32-bit value from the low word of a MMX
181     /// vector to a GPR.
182     MMX_MOVD2W,
183 
184     /// Copies a GPR into the low 32-bit word of a MMX vector
185     /// and zero out the high word.
186     MMX_MOVW2D,
187 
188     /// Extract an 8-bit value from a vector and zero extend it to
189     /// i32, corresponds to X86::PEXTRB.
190     PEXTRB,
191 
192     /// Extract a 16-bit value from a vector and zero extend it to
193     /// i32, corresponds to X86::PEXTRW.
194     PEXTRW,
195 
196     /// Insert any element of a 4 x float vector into any element
197     /// of a destination 4 x floatvector.
198     INSERTPS,
199 
200     /// Insert the lower 8-bits of a 32-bit value to a vector,
201     /// corresponds to X86::PINSRB.
202     PINSRB,
203 
204     /// Insert the lower 16-bits of a 32-bit value to a vector,
205     /// corresponds to X86::PINSRW.
206     PINSRW,
207 
208     /// Shuffle 16 8-bit values within a vector.
209     PSHUFB,
210 
211     /// Compute Sum of Absolute Differences.
212     PSADBW,
213     /// Compute Double Block Packed Sum-Absolute-Differences
214     DBPSADBW,
215 
216     /// Bitwise Logical AND NOT of Packed FP values.
217     ANDNP,
218 
219     /// Blend where the selector is an immediate.
220     BLENDI,
221 
222     /// Dynamic (non-constant condition) vector blend where only the sign bits
223     /// of the condition elements are used. This is used to enforce that the
224     /// condition mask is not valid for generic VSELECT optimizations. This
225     /// is also used to implement the intrinsics.
226     /// Operands are in VSELECT order: MASK, TRUE, FALSE
227     BLENDV,
228 
229     /// Combined add and sub on an FP vector.
230     ADDSUB,
231 
232     //  FP vector ops with rounding mode.
233     FADD_RND,
234     FADDS,
235     FADDS_RND,
236     FSUB_RND,
237     FSUBS,
238     FSUBS_RND,
239     FMUL_RND,
240     FMULS,
241     FMULS_RND,
242     FDIV_RND,
243     FDIVS,
244     FDIVS_RND,
245     FMAX_SAE,
246     FMAXS_SAE,
247     FMIN_SAE,
248     FMINS_SAE,
249     FSQRT_RND,
250     FSQRTS,
251     FSQRTS_RND,
252 
253     // FP vector get exponent.
254     FGETEXP,
255     FGETEXP_SAE,
256     FGETEXPS,
257     FGETEXPS_SAE,
258     // Extract Normalized Mantissas.
259     VGETMANT,
260     VGETMANT_SAE,
261     VGETMANTS,
262     VGETMANTS_SAE,
263     // FP Scale.
264     SCALEF,
265     SCALEF_RND,
266     SCALEFS,
267     SCALEFS_RND,
268 
269     /// Integer horizontal add/sub.
270     HADD,
271     HSUB,
272 
273     /// Floating point horizontal add/sub.
274     FHADD,
275     FHSUB,
276 
277     // Detect Conflicts Within a Vector
278     CONFLICT,
279 
280     /// Floating point max and min.
281     FMAX,
282     FMIN,
283 
284     /// Commutative FMIN and FMAX.
285     FMAXC,
286     FMINC,
287 
288     /// Scalar intrinsic floating point max and min.
289     FMAXS,
290     FMINS,
291 
292     /// Floating point reciprocal-sqrt and reciprocal approximation.
293     /// Note that these typically require refinement
294     /// in order to obtain suitable precision.
295     FRSQRT,
296     FRCP,
297 
298     // AVX-512 reciprocal approximations with a little more precision.
299     RSQRT14,
300     RSQRT14S,
301     RCP14,
302     RCP14S,
303 
304     // Thread Local Storage.
305     TLSADDR,
306 
307     // Thread Local Storage. A call to get the start address
308     // of the TLS block for the current module.
309     TLSBASEADDR,
310 
311     // Thread Local Storage.  When calling to an OS provided
312     // thunk at the address from an earlier relocation.
313     TLSCALL,
314 
315     // Thread Local Storage. A descriptor containing pointer to
316     // code and to argument to get the TLS offset for the symbol.
317     TLSDESC,
318 
319     // Exception Handling helpers.
320     EH_RETURN,
321 
322     // SjLj exception handling setjmp.
323     EH_SJLJ_SETJMP,
324 
325     // SjLj exception handling longjmp.
326     EH_SJLJ_LONGJMP,
327 
328     // SjLj exception handling dispatch.
329     EH_SJLJ_SETUP_DISPATCH,
330 
331     /// Tail call return. See X86TargetLowering::LowerCall for
332     /// the list of operands.
333     TC_RETURN,
334 
335     // Vector move to low scalar and zero higher vector elements.
336     VZEXT_MOVL,
337 
338     // Vector integer truncate.
339     VTRUNC,
340     // Vector integer truncate with unsigned/signed saturation.
341     VTRUNCUS,
342     VTRUNCS,
343 
344     // Masked version of the above. Used when less than a 128-bit result is
345     // produced since the mask only applies to the lower elements and can't
346     // be represented by a select.
347     // SRC, PASSTHRU, MASK
348     VMTRUNC,
349     VMTRUNCUS,
350     VMTRUNCS,
351 
352     // Vector FP extend.
353     VFPEXT,
354     VFPEXT_SAE,
355     VFPEXTS,
356     VFPEXTS_SAE,
357 
358     // Vector FP round.
359     VFPROUND,
360     // Convert TWO packed single data to one packed data
361     VFPROUND2,
362     VFPROUND2_RND,
363     VFPROUND_RND,
364     VFPROUNDS,
365     VFPROUNDS_RND,
366 
367     // Masked version of above. Used for v2f64->v4f32.
368     // SRC, PASSTHRU, MASK
369     VMFPROUND,
370 
371     // 128-bit vector logical left / right shift
372     VSHLDQ,
373     VSRLDQ,
374 
375     // Vector shift elements
376     VSHL,
377     VSRL,
378     VSRA,
379 
380     // Vector variable shift
381     VSHLV,
382     VSRLV,
383     VSRAV,
384 
385     // Vector shift elements by immediate
386     VSHLI,
387     VSRLI,
388     VSRAI,
389 
390     // Shifts of mask registers.
391     KSHIFTL,
392     KSHIFTR,
393 
394     // Bit rotate by immediate
395     VROTLI,
396     VROTRI,
397 
398     // Vector packed double/float comparison.
399     CMPP,
400 
401     // Vector integer comparisons.
402     PCMPEQ,
403     PCMPGT,
404 
405     // v8i16 Horizontal minimum and position.
406     PHMINPOS,
407 
408     MULTISHIFT,
409 
410     /// Vector comparison generating mask bits for fp and
411     /// integer signed and unsigned data types.
412     CMPM,
413     // Vector mask comparison generating mask bits for FP values.
414     CMPMM,
415     // Vector mask comparison with SAE for FP values.
416     CMPMM_SAE,
417 
418     // Arithmetic operations with FLAGS results.
419     ADD,
420     SUB,
421     ADC,
422     SBB,
423     SMUL,
424     UMUL,
425     OR,
426     XOR,
427     AND,
428 
429     // Bit field extract.
430     BEXTR,
431     BEXTRI,
432 
433     // Zero High Bits Starting with Specified Bit Position.
434     BZHI,
435 
436     // Parallel extract and deposit.
437     PDEP,
438     PEXT,
439 
440     // X86-specific multiply by immediate.
441     MUL_IMM,
442 
443     // Vector sign bit extraction.
444     MOVMSK,
445 
446     // Vector bitwise comparisons.
447     PTEST,
448 
449     // Vector packed fp sign bitwise comparisons.
450     TESTP,
451 
452     // OR/AND test for masks.
453     KORTEST,
454     KTEST,
455 
456     // ADD for masks.
457     KADD,
458 
459     // Several flavors of instructions with vector shuffle behaviors.
460     // Saturated signed/unnsigned packing.
461     PACKSS,
462     PACKUS,
463     // Intra-lane alignr.
464     PALIGNR,
465     // AVX512 inter-lane alignr.
466     VALIGN,
467     PSHUFD,
468     PSHUFHW,
469     PSHUFLW,
470     SHUFP,
471     // VBMI2 Concat & Shift.
472     VSHLD,
473     VSHRD,
474     VSHLDV,
475     VSHRDV,
476     // Shuffle Packed Values at 128-bit granularity.
477     SHUF128,
478     MOVDDUP,
479     MOVSHDUP,
480     MOVSLDUP,
481     MOVLHPS,
482     MOVHLPS,
483     MOVSD,
484     MOVSS,
485     MOVSH,
486     UNPCKL,
487     UNPCKH,
488     VPERMILPV,
489     VPERMILPI,
490     VPERMI,
491     VPERM2X128,
492 
493     // Variable Permute (VPERM).
494     // Res = VPERMV MaskV, V0
495     VPERMV,
496 
497     // 3-op Variable Permute (VPERMT2).
498     // Res = VPERMV3 V0, MaskV, V1
499     VPERMV3,
500 
501     // Bitwise ternary logic.
502     VPTERNLOG,
503     // Fix Up Special Packed Float32/64 values.
504     VFIXUPIMM,
505     VFIXUPIMM_SAE,
506     VFIXUPIMMS,
507     VFIXUPIMMS_SAE,
508     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
509     VRANGE,
510     VRANGE_SAE,
511     VRANGES,
512     VRANGES_SAE,
513     // Reduce - Perform Reduction Transformation on scalar\packed FP.
514     VREDUCE,
515     VREDUCE_SAE,
516     VREDUCES,
517     VREDUCES_SAE,
518     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
519     // Also used by the legacy (V)ROUND intrinsics where we mask out the
520     // scaling part of the immediate.
521     VRNDSCALE,
522     VRNDSCALE_SAE,
523     VRNDSCALES,
524     VRNDSCALES_SAE,
525     // Tests Types Of a FP Values for packed types.
526     VFPCLASS,
527     // Tests Types Of a FP Values for scalar types.
528     VFPCLASSS,
529 
530     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
531     // a vector, this node may change the vector length as part of the splat.
532     VBROADCAST,
533     // Broadcast mask to vector.
534     VBROADCASTM,
535 
536     /// SSE4A Extraction and Insertion.
537     EXTRQI,
538     INSERTQI,
539 
540     // XOP arithmetic/logical shifts.
541     VPSHA,
542     VPSHL,
543     // XOP signed/unsigned integer comparisons.
544     VPCOM,
545     VPCOMU,
546     // XOP packed permute bytes.
547     VPPERM,
548     // XOP two source permutation.
549     VPERMIL2,
550 
551     // Vector multiply packed unsigned doubleword integers.
552     PMULUDQ,
553     // Vector multiply packed signed doubleword integers.
554     PMULDQ,
555     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
556     MULHRS,
557 
558     // Multiply and Add Packed Integers.
559     VPMADDUBSW,
560     VPMADDWD,
561 
562     // AVX512IFMA multiply and add.
563     // NOTE: These are different than the instruction and perform
564     // op0 x op1 + op2.
565     VPMADD52L,
566     VPMADD52H,
567 
568     // VNNI
569     VPDPBUSD,
570     VPDPBUSDS,
571     VPDPWSSD,
572     VPDPWSSDS,
573 
574     // FMA nodes.
575     // We use the target independent ISD::FMA for the non-inverted case.
576     FNMADD,
577     FMSUB,
578     FNMSUB,
579     FMADDSUB,
580     FMSUBADD,
581 
582     // FMA with rounding mode.
583     FMADD_RND,
584     FNMADD_RND,
585     FMSUB_RND,
586     FNMSUB_RND,
587     FMADDSUB_RND,
588     FMSUBADD_RND,
589 
590     // AVX512-FP16 complex addition and multiplication.
591     VFMADDC,
592     VFMADDC_RND,
593     VFCMADDC,
594     VFCMADDC_RND,
595 
596     VFMULC,
597     VFMULC_RND,
598     VFCMULC,
599     VFCMULC_RND,
600 
601     VFMADDCSH,
602     VFMADDCSH_RND,
603     VFCMADDCSH,
604     VFCMADDCSH_RND,
605 
606     VFMULCSH,
607     VFMULCSH_RND,
608     VFCMULCSH,
609     VFCMULCSH_RND,
610 
611     VPDPBSUD,
612     VPDPBSUDS,
613     VPDPBUUD,
614     VPDPBUUDS,
615     VPDPBSSD,
616     VPDPBSSDS,
617 
618     VPDPWSUD,
619     VPDPWSUDS,
620     VPDPWUSD,
621     VPDPWUSDS,
622     VPDPWUUD,
623     VPDPWUUDS,
624 
625     VMINMAX,
626     VMINMAX_SAE,
627     VMINMAXS,
628     VMINMAXS_SAE,
629 
630     CVTP2IBS,
631     CVTP2IUBS,
632     CVTP2IBS_RND,
633     CVTP2IUBS_RND,
634     CVTTP2IBS,
635     CVTTP2IUBS,
636     CVTTP2IBS_SAE,
637     CVTTP2IUBS_SAE,
638 
639     MPSADBW,
640 
641     VCVT2PH2BF8,
642     VCVT2PH2BF8S,
643     VCVT2PH2HF8,
644     VCVT2PH2HF8S,
645     VCVTBIASPH2BF8,
646     VCVTBIASPH2BF8S,
647     VCVTBIASPH2HF8,
648     VCVTBIASPH2HF8S,
649     VCVTPH2BF8,
650     VCVTPH2BF8S,
651     VCVTPH2HF8,
652     VCVTPH2HF8S,
653     VMCVTBIASPH2BF8,
654     VMCVTBIASPH2BF8S,
655     VMCVTBIASPH2HF8,
656     VMCVTBIASPH2HF8S,
657     VMCVTPH2BF8,
658     VMCVTPH2BF8S,
659     VMCVTPH2HF8,
660     VMCVTPH2HF8S,
661     VCVTHF82PH,
662 
663     // Compress and expand.
664     COMPRESS,
665     EXPAND,
666 
667     // Bits shuffle
668     VPSHUFBITQMB,
669 
670     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
671     SINT_TO_FP_RND,
672     UINT_TO_FP_RND,
673     SCALAR_SINT_TO_FP,
674     SCALAR_UINT_TO_FP,
675     SCALAR_SINT_TO_FP_RND,
676     SCALAR_UINT_TO_FP_RND,
677 
678     // Vector float/double to signed/unsigned integer.
679     CVTP2SI,
680     CVTP2UI,
681     CVTP2SI_RND,
682     CVTP2UI_RND,
683     // Scalar float/double to signed/unsigned integer.
684     CVTS2SI,
685     CVTS2UI,
686     CVTS2SI_RND,
687     CVTS2UI_RND,
688 
689     // Vector float/double to signed/unsigned integer with truncation.
690     CVTTP2SI,
691     CVTTP2UI,
692     CVTTP2SI_SAE,
693     CVTTP2UI_SAE,
694 
695     // Saturation enabled Vector float/double to signed/unsigned
696     // integer with truncation.
697     CVTTP2SIS,
698     CVTTP2UIS,
699     CVTTP2SIS_SAE,
700     CVTTP2UIS_SAE,
701     // Masked versions of above. Used for v2f64 to v4i32.
702     // SRC, PASSTHRU, MASK
703     MCVTTP2SIS,
704     MCVTTP2UIS,
705 
706     // Scalar float/double to signed/unsigned integer with truncation.
707     CVTTS2SI,
708     CVTTS2UI,
709     CVTTS2SI_SAE,
710     CVTTS2UI_SAE,
711 
712     // Vector signed/unsigned integer to float/double.
713     CVTSI2P,
714     CVTUI2P,
715 
716     // Scalar float/double to signed/unsigned integer with saturation.
717     CVTTS2SIS,
718     CVTTS2UIS,
719     CVTTS2SIS_SAE,
720     CVTTS2UIS_SAE,
721 
722     // Masked versions of above. Used for v2f64->v4f32.
723     // SRC, PASSTHRU, MASK
724     MCVTP2SI,
725     MCVTP2UI,
726     MCVTTP2SI,
727     MCVTTP2UI,
728     MCVTSI2P,
729     MCVTUI2P,
730 
731     // Custom handling for FP_TO_xINT_SAT
732     FP_TO_SINT_SAT,
733     FP_TO_UINT_SAT,
734 
735     // Vector float to bfloat16.
736     // Convert packed single data to packed BF16 data
737     CVTNEPS2BF16,
738     // Masked version of above.
739     // SRC, PASSTHRU, MASK
740     MCVTNEPS2BF16,
741 
742     // Dot product of BF16/FP16 pairs to accumulated into
743     // packed single precision.
744     DPBF16PS,
745     DPFP16PS,
746 
747     // A stack checking function call. On Windows it's _chkstk call.
748     DYN_ALLOCA,
749 
750     // For allocating variable amounts of stack space when using
751     // segmented stacks. Check if the current stacklet has enough space, and
752     // falls back to heap allocation if not.
753     SEG_ALLOCA,
754 
755     // For allocating stack space when using stack clash protector.
756     // Allocation is performed by block, and each block is probed.
757     PROBED_ALLOCA,
758 
759     // Memory barriers.
760     MFENCE,
761 
762     // Get a random integer and indicate whether it is valid in CF.
763     RDRAND,
764 
765     // Get a NIST SP800-90B & C compliant random integer and
766     // indicate whether it is valid in CF.
767     RDSEED,
768 
769     // Protection keys
770     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
771     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
772     // value for ECX.
773     RDPKRU,
774     WRPKRU,
775 
776     // SSE42 string comparisons.
777     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
778     // will emit one or two instructions based on which results are used. If
779     // flags and index/mask this allows us to use a single instruction since
780     // we won't have to pick and opcode for flags. Instead we can rely on the
781     // DAG to CSE everything and decide at isel.
782     PCMPISTR,
783     PCMPESTR,
784 
785     // Test if in transactional execution.
786     XTEST,
787 
788     // Conversions between float and half-float.
789     CVTPS2PH,
790     CVTPS2PH_SAE,
791     CVTPH2PS,
792     CVTPH2PS_SAE,
793 
794     // Masked version of above.
795     // SRC, RND, PASSTHRU, MASK
796     MCVTPS2PH,
797     MCVTPS2PH_SAE,
798 
799     // Galois Field Arithmetic Instructions
800     GF2P8AFFINEINVQB,
801     GF2P8AFFINEQB,
802     GF2P8MULB,
803 
804     // LWP insert record.
805     LWPINS,
806 
807     // User level wait
808     UMWAIT,
809     TPAUSE,
810 
811     // Enqueue Stores Instructions
812     ENQCMD,
813     ENQCMDS,
814 
815     // For avx512-vp2intersect
816     VP2INTERSECT,
817 
818     // User level interrupts - testui
819     TESTUI,
820 
821     // Perform an FP80 add after changing precision control in FPCW.
822     FP80_ADD,
823 
824     // Conditional compare instructions
825     CCMP,
826     CTEST,
827 
828     /// X86 strict FP compare instructions.
829     FIRST_STRICTFP_OPCODE,
830     STRICT_FCMP = FIRST_STRICTFP_OPCODE,
831     STRICT_FCMPS,
832 
833     // Vector packed double/float comparison.
834     STRICT_CMPP,
835 
836     /// Vector comparison generating mask bits for fp and
837     /// integer signed and unsigned data types.
838     STRICT_CMPM,
839 
840     // Vector float/double to signed/unsigned integer with truncation.
841     STRICT_CVTTP2SI,
842     STRICT_CVTTP2UI,
843 
844     // Vector FP extend.
845     STRICT_VFPEXT,
846 
847     // Vector FP round.
848     STRICT_VFPROUND,
849 
850     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
851     // Also used by the legacy (V)ROUND intrinsics where we mask out the
852     // scaling part of the immediate.
853     STRICT_VRNDSCALE,
854 
855     // Vector signed/unsigned integer to float/double.
856     STRICT_CVTSI2P,
857     STRICT_CVTUI2P,
858 
859     // Strict FMA nodes.
860     STRICT_FNMADD,
861     STRICT_FMSUB,
862     STRICT_FNMSUB,
863 
864     // Conversions between float and half-float.
865     STRICT_CVTPS2PH,
866     STRICT_CVTPH2PS,
867 
868     // Perform an FP80 add after changing precision control in FPCW.
869     STRICT_FP80_ADD,
870 
871     /// Floating point max and min.
872     STRICT_FMAX,
873     STRICT_FMIN,
874     LAST_STRICTFP_OPCODE = STRICT_FMIN,
875 
876     // Compare and swap.
877     FIRST_MEMORY_OPCODE,
878     LCMPXCHG_DAG = FIRST_MEMORY_OPCODE,
879     LCMPXCHG8_DAG,
880     LCMPXCHG16_DAG,
881     LCMPXCHG16_SAVE_RBX_DAG,
882 
883     /// LOCK-prefixed arithmetic read-modify-write instructions.
884     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
885     LADD,
886     LSUB,
887     LOR,
888     LXOR,
889     LAND,
890     LBTS,
891     LBTC,
892     LBTR,
893     LBTS_RM,
894     LBTC_RM,
895     LBTR_RM,
896 
897     /// RAO arithmetic instructions.
898     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
899     AADD,
900     AOR,
901     AXOR,
902     AAND,
903 
904     // Load, scalar_to_vector, and zero extend.
905     VZEXT_LOAD,
906 
907     // extract_vector_elt, store.
908     VEXTRACT_STORE,
909 
910     // scalar broadcast from memory.
911     VBROADCAST_LOAD,
912 
913     // subvector broadcast from memory.
914     SUBV_BROADCAST_LOAD,
915 
916     // Store FP control word into i16 memory.
917     FNSTCW16m,
918 
919     // Load FP control word from i16 memory.
920     FLDCW16m,
921 
922     // Store x87 FPU environment into memory.
923     FNSTENVm,
924 
925     // Load x87 FPU environment from memory.
926     FLDENVm,
927 
928     /// This instruction implements FP_TO_SINT with the
929     /// integer destination in memory and a FP reg source.  This corresponds
930     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
931     /// has two inputs (token chain and address) and two outputs (int value
932     /// and token chain). Memory VT specifies the type to store to.
933     FP_TO_INT_IN_MEM,
934 
935     /// This instruction implements SINT_TO_FP with the
936     /// integer source in memory and FP reg result.  This corresponds to the
937     /// X86::FILD*m instructions. It has two inputs (token chain and address)
938     /// and two outputs (FP value and token chain). The integer source type is
939     /// specified by the memory VT.
940     FILD,
941 
942     /// This instruction implements a fp->int store from FP stack
943     /// slots. This corresponds to the fist instruction. It takes a
944     /// chain operand, value to store, address, and glue. The memory VT
945     /// specifies the type to store as.
946     FIST,
947 
948     /// This instruction implements an extending load to FP stack slots.
949     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
950     /// operand, and ptr to load from. The memory VT specifies the type to
951     /// load from.
952     FLD,
953 
954     /// This instruction implements a truncating store from FP stack
955     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
956     /// chain operand, value to store, address, and glue. The memory VT
957     /// specifies the type to store as.
958     FST,
959 
960     /// These instructions grab the address of the next argument
961     /// from a va_list. (reads and modifies the va_list in memory)
962     VAARG_64,
963     VAARG_X32,
964 
965     // Vector truncating store with unsigned/signed saturation
966     VTRUNCSTOREUS,
967     VTRUNCSTORES,
968     // Vector truncating masked store with unsigned/signed saturation
969     VMTRUNCSTOREUS,
970     VMTRUNCSTORES,
971 
972     // X86 specific gather and scatter
973     MGATHER,
974     MSCATTER,
975 
976     // Key locker nodes that produce flags.
977     AESENC128KL,
978     AESDEC128KL,
979     AESENC256KL,
980     AESDEC256KL,
981     AESENCWIDE128KL,
982     AESDECWIDE128KL,
983     AESENCWIDE256KL,
984     AESDECWIDE256KL,
985 
986     /// Compare and Add if Condition is Met. Compare value in operand 2 with
987     /// value in memory of operand 1. If condition of operand 4 is met, add
988     /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
989     /// always updated with the original value from operand 1.
990     CMPCCXADD,
991 
992     // Save xmm argument registers to the stack, according to %al. An operator
993     // is needed so that this can be expanded with control flow.
994     VASTART_SAVE_XMM_REGS,
995 
996     // Conditional load/store instructions
997     CLOAD,
998     CSTORE,
999     LAST_MEMORY_OPCODE = CSTORE,
1000   };
1001   } // end namespace X86ISD
1002 
1003   namespace X86 {
1004     /// Current rounding mode is represented in bits 11:10 of FPSR. These
1005     /// values are same as corresponding constants for rounding mode used
1006     /// in glibc.
1007     enum RoundingMode {
1008       rmToNearest   = 0,        // FE_TONEAREST
1009       rmDownward    = 1 << 10,  // FE_DOWNWARD
1010       rmUpward      = 2 << 10,  // FE_UPWARD
1011       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
1012       rmMask        = 3 << 10   // Bit mask selecting rounding mode
1013     };
1014   }
1015 
1016   /// Define some predicates that are used for node matching.
1017   namespace X86 {
1018     /// Returns true if Elt is a constant zero or floating point constant +0.0.
1019     bool isZeroNode(SDValue Elt);
1020 
1021     /// Returns true of the given offset can be
1022     /// fit into displacement field of the instruction.
1023     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
1024                                       bool hasSymbolicDisplacement);
1025 
1026     /// Determines whether the callee is required to pop its
1027     /// own arguments. Callee pop is necessary to support tail calls.
1028     bool isCalleePop(CallingConv::ID CallingConv,
1029                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
1030 
1031     /// If Op is a constant whose elements are all the same constant or
1032     /// undefined, return true and return the constant value in \p SplatVal.
1033     /// If we have undef bits that don't cover an entire element, we treat these
1034     /// as zero if AllowPartialUndefs is set, else we fail and return false.
1035     bool isConstantSplat(SDValue Op, APInt &SplatVal,
1036                          bool AllowPartialUndefs = true);
1037 
1038     /// Check if Op is a load operation that could be folded into some other x86
1039     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
1040     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
1041                      bool AssumeSingleUse = false);
1042 
1043     /// Check if Op is a load operation that could be folded into a vector splat
1044     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
1045     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
1046                                          const X86Subtarget &Subtarget,
1047                                          bool AssumeSingleUse = false);
1048 
1049     /// Check if Op is a value that could be used to fold a store into some
1050     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
1051     bool mayFoldIntoStore(SDValue Op);
1052 
1053     /// Check if Op is an operation that could be folded into a zero extend x86
1054     /// instruction.
1055     bool mayFoldIntoZeroExtend(SDValue Op);
1056 
1057     /// True if the target supports the extended frame for async Swift
1058     /// functions.
1059     bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
1060                                             const MachineFunction &MF);
1061   } // end namespace X86
1062 
1063   //===--------------------------------------------------------------------===//
1064   //  X86 Implementation of the TargetLowering interface
1065   class X86TargetLowering final : public TargetLowering {
1066   public:
1067     explicit X86TargetLowering(const X86TargetMachine &TM,
1068                                const X86Subtarget &STI);
1069 
1070     unsigned getJumpTableEncoding() const override;
1071     bool useSoftFloat() const override;
1072 
1073     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
1074                                ArgListTy &Args) const override;
1075 
getScalarShiftAmountTy(const DataLayout &,EVT VT)1076     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
1077       return MVT::i8;
1078     }
1079 
1080     const MCExpr *
1081     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
1082                               const MachineBasicBlock *MBB, unsigned uid,
1083                               MCContext &Ctx) const override;
1084 
1085     /// Returns relocation base for the given PIC jumptable.
1086     SDValue getPICJumpTableRelocBase(SDValue Table,
1087                                      SelectionDAG &DAG) const override;
1088     const MCExpr *
1089     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1090                                  unsigned JTI, MCContext &Ctx) const override;
1091 
1092     /// Return the desired alignment for ByVal aggregate
1093     /// function arguments in the caller parameter area. For X86, aggregates
1094     /// that contains are placed at 16-byte boundaries while the rest are at
1095     /// 4-byte boundaries.
1096     Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override;
1097 
1098     EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op,
1099                             const AttributeList &FuncAttributes) const override;
1100 
1101     /// Returns true if it's safe to use load / store of the
1102     /// specified type to expand memcpy / memset inline. This is mostly true
1103     /// for all types except for some special cases. For example, on X86
1104     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1105     /// also does type conversion. Note the specified type doesn't have to be
1106     /// legal as the hook is used before type legalization.
1107     bool isSafeMemOpType(MVT VT) const override;
1108 
1109     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1110 
1111     /// Returns true if the target allows unaligned memory accesses of the
1112     /// specified type. Returns whether it is "fast" in the last argument.
1113     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1114                                         MachineMemOperand::Flags Flags,
1115                                         unsigned *Fast) const override;
1116 
1117     /// This function returns true if the memory access is aligned or if the
1118     /// target allows this specific unaligned memory access. If the access is
1119     /// allowed, the optional final parameter returns a relative speed of the
1120     /// access (as defined by the target).
1121     bool allowsMemoryAccess(
1122         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1123         Align Alignment,
1124         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1125         unsigned *Fast = nullptr) const override;
1126 
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,const MachineMemOperand & MMO,unsigned * Fast)1127     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1128                             const MachineMemOperand &MMO,
1129                             unsigned *Fast) const {
1130       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1131                                 MMO.getAlign(), MMO.getFlags(), Fast);
1132     }
1133 
1134     /// Provide custom lowering hooks for some operations.
1135     ///
1136     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1137 
1138     /// Replace the results of node with an illegal result
1139     /// type with new values built out of custom code.
1140     ///
1141     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1142                             SelectionDAG &DAG) const override;
1143 
1144     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1145 
1146     bool preferABDSToABSWithNSW(EVT VT) const override;
1147 
1148     bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1149                                    EVT ExtVT) const override;
1150 
1151     bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1152                                            EVT VT) const override;
1153 
1154     /// Return true if the target has native support for
1155     /// the specified value type and it is 'desirable' to use the type for the
1156     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1157     /// instruction encodings are longer and some i16 instructions are slow.
1158     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1159 
1160     /// Return true if the target has native support for the
1161     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1162     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1163     /// and some i16 instructions are slow.
1164     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1165 
1166     /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1167     /// integer, None otherwise.
1168     TargetLowering::AndOrSETCCFoldKind
1169     isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1170                                        const SDNode *SETCC0,
1171                                        const SDNode *SETCC1) const override;
1172 
1173     /// Return the newly negated expression if the cost is not expensive and
1174     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1175     /// do the negation.
1176     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1177                                  bool LegalOperations, bool ForCodeSize,
1178                                  NegatibleCost &Cost,
1179                                  unsigned Depth) const override;
1180 
1181     MachineBasicBlock *
1182     EmitInstrWithCustomInserter(MachineInstr &MI,
1183                                 MachineBasicBlock *MBB) const override;
1184 
1185     /// This method returns the name of a target specific DAG node.
1186     const char *getTargetNodeName(unsigned Opcode) const override;
1187 
1188     /// Do not merge vector stores after legalization because that may conflict
1189     /// with x86-specific store splitting optimizations.
mergeStoresAfterLegalization(EVT MemVT)1190     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1191       return !MemVT.isVector();
1192     }
1193 
1194     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1195                           const MachineFunction &MF) const override;
1196 
1197     bool isCheapToSpeculateCttz(Type *Ty) const override;
1198 
1199     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1200 
1201     bool isCtlzFast() const override;
1202 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1203     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1204       // If the pair to store is a mixture of float and int values, we will
1205       // save two bitwise instructions and one float-to-int instruction and
1206       // increase one store instruction. There is potentially a more
1207       // significant benefit because it avoids the float->int domain switch
1208       // for input value. So It is more likely a win.
1209       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1210           (LTy.isInteger() && HTy.isFloatingPoint()))
1211         return true;
1212       // If the pair only contains int values, we will save two bitwise
1213       // instructions and increase one store instruction (costing one more
1214       // store buffer). Since the benefit is more blurred so we leave
1215       // such pair out until we get testcase to prove it is a win.
1216       return false;
1217     }
1218 
1219     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1220 
1221     bool hasAndNotCompare(SDValue Y) const override;
1222 
1223     bool hasAndNot(SDValue Y) const override;
1224 
1225     bool hasBitTest(SDValue X, SDValue Y) const override;
1226 
1227     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1228         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1229         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1230         SelectionDAG &DAG) const override;
1231 
1232     unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1233         EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1234         const APInt &ShiftOrRotateAmt,
1235         const std::optional<APInt> &AndMask) const override;
1236 
1237     bool preferScalarizeSplat(SDNode *N) const override;
1238 
1239     CondMergingParams
1240     getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1241                                   const Value *Rhs) const override;
1242 
1243     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1244                                            CombineLevel Level) const override;
1245 
1246     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1247 
1248     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1249     shouldTransformSignedTruncationCheck(EVT XVT,
1250                                          unsigned KeptBits) const override {
1251       // For vectors, we don't have a preference..
1252       if (XVT.isVector())
1253         return false;
1254 
1255       auto VTIsOk = [](EVT VT) -> bool {
1256         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1257                VT == MVT::i64;
1258       };
1259 
1260       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1261       // XVT will be larger than KeptBitsVT.
1262       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1263       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1264     }
1265 
1266     ShiftLegalizationStrategy
1267     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1268                                        unsigned ExpansionFactor) const override;
1269 
1270     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1271 
shouldConvertFpToSat(unsigned Op,EVT FPVT,EVT VT)1272     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1273       // Converting to sat variants holds little benefit on X86 as we will just
1274       // need to saturate the value back using fp arithmatic.
1275       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1276     }
1277 
convertSetCCLogicToBitwiseLogic(EVT VT)1278     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1279       return VT.isScalarInteger();
1280     }
1281 
1282     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1283     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1284 
1285     /// Return the value type to use for ISD::SETCC.
1286     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1287                            EVT VT) const override;
1288 
1289     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1290                                       const APInt &DemandedElts,
1291                                       TargetLoweringOpt &TLO) const override;
1292 
1293     /// Determine which of the bits specified in Mask are known to be either
1294     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1295     void computeKnownBitsForTargetNode(const SDValue Op,
1296                                        KnownBits &Known,
1297                                        const APInt &DemandedElts,
1298                                        const SelectionDAG &DAG,
1299                                        unsigned Depth = 0) const override;
1300 
1301     /// Determine the number of bits in the operation that are sign bits.
1302     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1303                                              const APInt &DemandedElts,
1304                                              const SelectionDAG &DAG,
1305                                              unsigned Depth) const override;
1306 
1307     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1308                                                  const APInt &DemandedElts,
1309                                                  APInt &KnownUndef,
1310                                                  APInt &KnownZero,
1311                                                  TargetLoweringOpt &TLO,
1312                                                  unsigned Depth) const override;
1313 
1314     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1315                                                     const APInt &DemandedElts,
1316                                                     unsigned MaskIndex,
1317                                                     TargetLoweringOpt &TLO,
1318                                                     unsigned Depth) const;
1319 
1320     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1321                                            const APInt &DemandedBits,
1322                                            const APInt &DemandedElts,
1323                                            KnownBits &Known,
1324                                            TargetLoweringOpt &TLO,
1325                                            unsigned Depth) const override;
1326 
1327     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1328         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1329         SelectionDAG &DAG, unsigned Depth) const override;
1330 
1331     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1332         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1333         bool PoisonOnly, unsigned Depth) const override;
1334 
1335     bool canCreateUndefOrPoisonForTargetNode(
1336         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1337         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1338 
1339     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1340                                    APInt &UndefElts, const SelectionDAG &DAG,
1341                                    unsigned Depth) const override;
1342 
isTargetCanonicalConstantNode(SDValue Op)1343     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1344       // Peek through bitcasts/extracts/inserts to see if we have a vector
1345       // load/broadcast from memory.
1346       while (Op.getOpcode() == ISD::BITCAST ||
1347              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1348              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1349               Op.getOperand(0).isUndef()))
1350         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1351 
1352       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1353              Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD ||
1354              (Op.getOpcode() == ISD::LOAD &&
1355               getTargetConstantFromLoad(cast<LoadSDNode>(Op))) ||
1356              TargetLowering::isTargetCanonicalConstantNode(Op);
1357     }
1358 
1359     bool isTargetCanonicalSelect(SDNode *N) const override;
1360 
1361     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1362 
1363     SDValue unwrapAddress(SDValue N) const override;
1364 
1365     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1366 
1367     bool ExpandInlineAsm(CallInst *CI) const override;
1368 
1369     ConstraintType getConstraintType(StringRef Constraint) const override;
1370 
1371     /// Examine constraint string and operand type and determine a weight value.
1372     /// The operand object must already have been set up with the operand type.
1373     ConstraintWeight
1374       getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1375                                      const char *Constraint) const override;
1376 
1377     const char *LowerXConstraint(EVT ConstraintVT) const override;
1378 
1379     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1380     /// add anything to Ops. If hasMemory is true it means one of the asm
1381     /// constraint of the inline asm instruction being processed is 'm'.
1382     void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1383                                       std::vector<SDValue> &Ops,
1384                                       SelectionDAG &DAG) const override;
1385 
1386     InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode)1387     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1388       if (ConstraintCode == "v")
1389         return InlineAsm::ConstraintCode::v;
1390       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1391     }
1392 
1393     /// Handle Lowering flag assembly outputs.
1394     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1395                                         const SDLoc &DL,
1396                                         const AsmOperandInfo &Constraint,
1397                                         SelectionDAG &DAG) const override;
1398 
1399     /// Given a physical register constraint
1400     /// (e.g. {edx}), return the register number and the register class for the
1401     /// register.  This should only be used for C_Register constraints.  On
1402     /// error, this returns a register number of 0.
1403     std::pair<unsigned, const TargetRegisterClass *>
1404     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1405                                  StringRef Constraint, MVT VT) const override;
1406 
1407     /// Return true if the addressing mode represented
1408     /// by AM is legal for this target, for a load/store of the specified type.
1409     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1410                                Type *Ty, unsigned AS,
1411                                Instruction *I = nullptr) const override;
1412 
1413     bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1414 
1415     /// Return true if the specified immediate is legal
1416     /// icmp immediate, that is the target has icmp instructions which can
1417     /// compare a register against the immediate without having to materialize
1418     /// the immediate into a register.
1419     bool isLegalICmpImmediate(int64_t Imm) const override;
1420 
1421     /// Return true if the specified immediate is legal
1422     /// add immediate, that is the target has add instructions which can
1423     /// add a register and the immediate without having to materialize
1424     /// the immediate into a register.
1425     bool isLegalAddImmediate(int64_t Imm) const override;
1426 
1427     bool isLegalStoreImmediate(int64_t Imm) const override;
1428 
1429     /// Add x86-specific opcodes to the default list.
1430     bool isBinOp(unsigned Opcode) const override;
1431 
1432     /// Returns true if the opcode is a commutative binary operation.
1433     bool isCommutativeBinOp(unsigned Opcode) const override;
1434 
1435     /// Return true if it's free to truncate a value of
1436     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1437     /// register EAX to i16 by referencing its sub-register AX.
1438     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1439     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1440 
1441     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1442 
1443     /// Return true if any actual instruction that defines a
1444     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1445     /// register. This does not necessarily include registers defined in
1446     /// unknown ways, such as incoming arguments, or copies from unknown
1447     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1448     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1449     /// all instructions that define 32-bit values implicit zero-extend the
1450     /// result out to 64 bits.
1451     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1452     bool isZExtFree(EVT VT1, EVT VT2) const override;
1453     bool isZExtFree(SDValue Val, EVT VT2) const override;
1454 
1455     bool shouldConvertPhiType(Type *From, Type *To) const override;
1456 
1457     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1458     /// extend node) is profitable.
1459     bool isVectorLoadExtDesirable(SDValue) const override;
1460 
1461     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1462     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1463     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1464     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1465                                     EVT VT) const override;
1466 
1467     /// Return true if it's profitable to narrow operations of type SrcVT to
1468     /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1469     /// from i32 to i16.
1470     bool isNarrowingProfitable(SDNode *N, EVT SrcVT, EVT DestVT) const override;
1471 
1472     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode, EVT VT,
1473                                               unsigned SelectOpcode, SDValue X,
1474                                               SDValue Y) const override;
1475 
1476     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1477     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1478     /// true and stores the intrinsic information into the IntrinsicInfo that was
1479     /// passed to the function.
1480     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1481                             MachineFunction &MF,
1482                             unsigned Intrinsic) const override;
1483 
1484     /// Returns true if the target can instruction select the
1485     /// specified FP immediate natively. If false, the legalizer will
1486     /// materialize the FP immediate as a load from a constant pool.
1487     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1488                       bool ForCodeSize) const override;
1489 
1490     /// Targets can use this to indicate that they only support *some*
1491     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1492     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1493     /// be legal.
1494     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1495 
1496     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1497     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1498     /// constant pool entry.
1499     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1500 
1501     /// Returns true if lowering to a jump table is allowed.
1502     bool areJTsAllowed(const Function *Fn) const override;
1503 
1504     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1505                                         EVT ConditionVT) const override;
1506 
1507     /// If true, then instruction selection should
1508     /// seek to shrink the FP constant of the specified type to a smaller type
1509     /// in order to save space and / or reduce runtime.
1510     bool ShouldShrinkFPConstant(EVT VT) const override;
1511 
1512     /// Return true if we believe it is correct and profitable to reduce the
1513     /// load node to a smaller type.
1514     bool
1515     shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, EVT NewVT,
1516                           std::optional<unsigned> ByteOffset) const override;
1517 
1518     /// Return true if the specified scalar FP type is computed in an SSE
1519     /// register, not on the X87 floating point stack.
1520     bool isScalarFPTypeInSSEReg(EVT VT) const;
1521 
1522     /// Returns true if it is beneficial to convert a load of a constant
1523     /// to just the constant itself.
1524     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1525                                            Type *Ty) const override;
1526 
1527     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1528 
1529     bool convertSelectOfConstantsToMath(EVT VT) const override;
1530 
1531     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1532                                 SDValue C) const override;
1533 
1534     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1535     /// with this index.
1536     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1537                                  unsigned Index) const override;
1538 
1539     /// Scalar ops always have equal or better analysis/performance/power than
1540     /// the vector equivalent, so this always makes sense if the scalar op is
1541     /// supported.
shouldScalarizeBinop(SDValue)1542     bool shouldScalarizeBinop(SDValue) const override;
1543 
1544     /// Extract of a scalar FP value from index 0 of a vector is free.
1545     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1546       EVT EltVT = VT.getScalarType();
1547       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1548     }
1549 
1550     /// Overflow nodes should get combined/lowered to optimal instructions
1551     /// (they should allow eliminating explicit compares by getting flags from
1552     /// math ops).
1553     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1554                               bool MathUsed) const override;
1555 
storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)1556     bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1557                                       unsigned AddrSpace) const override {
1558       // If we can replace more than 2 scalar stores, there will be a reduction
1559       // in instructions even after we add a vector constant load.
1560       return IsZero || NumElem > 2;
1561     }
1562 
1563     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1564                                  const SelectionDAG &DAG,
1565                                  const MachineMemOperand &MMO) const override;
1566 
1567     Register getRegisterByName(const char* RegName, LLT VT,
1568                                const MachineFunction &MF) const override;
1569 
1570     /// If a physical register, this returns the register that receives the
1571     /// exception address on entry to an EH pad.
1572     Register
1573     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1574 
1575     /// If a physical register, this returns the register that receives the
1576     /// exception typeid on entry to a landing pad.
1577     Register
1578     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1579 
1580     bool needsFixedCatchObjects() const override;
1581 
1582     /// This method returns a target specific FastISel object,
1583     /// or null if the target does not support "fast" ISel.
1584     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1585                              const TargetLibraryInfo *libInfo) const override;
1586 
1587     /// If the target has a standard location for the stack protector cookie,
1588     /// returns the address of that location. Otherwise, returns nullptr.
1589     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1590 
1591     bool useLoadStackGuardNode(const Module &M) const override;
1592     bool useStackGuardXorFP() const override;
1593     void insertSSPDeclarations(Module &M) const override;
1594     Value *getSDagStackGuard(const Module &M) const override;
1595     Function *getSSPStackGuardCheck(const Module &M) const override;
1596     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1597                                 const SDLoc &DL) const override;
1598 
1599 
1600     /// Return true if the target stores SafeStack pointer at a fixed offset in
1601     /// some non-standard address space, and populates the address space and
1602     /// offset as appropriate.
1603     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1604 
1605     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1606                                           SDValue Chain, SDValue Pointer,
1607                                           MachinePointerInfo PtrInfo,
1608                                           Align Alignment,
1609                                           SelectionDAG &DAG) const;
1610 
1611     /// Customize the preferred legalization strategy for certain types.
1612     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1613 
softPromoteHalfType()1614     bool softPromoteHalfType() const override { return true; }
1615 
1616     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1617                                       EVT VT) const override;
1618 
1619     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1620                                            CallingConv::ID CC,
1621                                            EVT VT) const override;
1622 
1623     unsigned getVectorTypeBreakdownForCallingConv(
1624         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1625         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1626 
1627     bool functionArgumentNeedsConsecutiveRegisters(
1628         Type *Ty, CallingConv::ID CallConv, bool isVarArg,
1629         const DataLayout &DL) const override;
1630 
1631     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1632 
1633     bool supportSwiftError() const override;
1634 
supportKCFIBundles()1635     bool supportKCFIBundles() const override { return true; }
1636 
1637     MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1638                                 MachineBasicBlock::instr_iterator &MBBI,
1639                                 const TargetInstrInfo *TII) const override;
1640 
1641     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1642     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1643     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1644 
1645     unsigned getStackProbeSize(const MachineFunction &MF) const;
1646 
hasVectorBlend()1647     bool hasVectorBlend() const override { return true; }
1648 
getMaxSupportedInterleaveFactor()1649     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1650 
1651     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1652                                  unsigned OpNo) const override;
1653 
1654     SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1655                             MachineMemOperand *MMO, SDValue &NewLoad,
1656                             SDValue Ptr, SDValue PassThru,
1657                             SDValue Mask) const override;
1658     SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1659                              MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1660                              SDValue Mask) const override;
1661 
1662     /// Lower interleaved load(s) into target specific
1663     /// instructions/intrinsics.
1664     bool lowerInterleavedLoad(LoadInst *LI,
1665                               ArrayRef<ShuffleVectorInst *> Shuffles,
1666                               ArrayRef<unsigned> Indices,
1667                               unsigned Factor) const override;
1668 
1669     /// Lower interleaved store(s) into target specific
1670     /// instructions/intrinsics.
1671     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1672                                unsigned Factor) const override;
1673 
1674     SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1675                                    int JTI, SelectionDAG &DAG) const override;
1676 
1677     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1678 
getTypeToTransformTo(LLVMContext & Context,EVT VT)1679     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1680       if (VT == MVT::f80)
1681         return EVT::getIntegerVT(Context, 96);
1682       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1683     }
1684 
1685   protected:
1686     std::pair<const TargetRegisterClass *, uint8_t>
1687     findRepresentativeClass(const TargetRegisterInfo *TRI,
1688                             MVT VT) const override;
1689 
1690   private:
1691     /// Keep a reference to the X86Subtarget around so that we can
1692     /// make the right decision when generating code for different targets.
1693     const X86Subtarget &Subtarget;
1694 
1695     /// A list of legal FP immediates.
1696     std::vector<APFloat> LegalFPImmediates;
1697 
1698     /// Indicate that this x86 target can instruction
1699     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1700     void addLegalFPImmediate(const APFloat& Imm) {
1701       LegalFPImmediates.push_back(Imm);
1702     }
1703 
1704     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1705                             CallingConv::ID CallConv, bool isVarArg,
1706                             const SmallVectorImpl<ISD::InputArg> &Ins,
1707                             const SDLoc &dl, SelectionDAG &DAG,
1708                             SmallVectorImpl<SDValue> &InVals,
1709                             uint32_t *RegMask) const;
1710     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1711                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1712                              const SDLoc &dl, SelectionDAG &DAG,
1713                              const CCValAssign &VA, MachineFrameInfo &MFI,
1714                              unsigned i) const;
1715     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1716                              const SDLoc &dl, SelectionDAG &DAG,
1717                              const CCValAssign &VA,
1718                              ISD::ArgFlagsTy Flags, bool isByval) const;
1719 
1720     // Call lowering helpers.
1721 
1722     /// Check whether the call is eligible for tail call optimization. Targets
1723     /// that want to do tail call optimization should implement this function.
1724     bool IsEligibleForTailCallOptimization(
1725         TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1726         SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1727     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1728                                     SDValue Chain, bool IsTailCall,
1729                                     bool Is64Bit, int FPDiff,
1730                                     const SDLoc &dl) const;
1731 
1732     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1733                                          SelectionDAG &DAG) const;
1734 
1735     unsigned getAddressSpace() const;
1736 
1737     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1738                             SDValue &Chain) const;
1739     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1740 
1741     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1742     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1743     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1744     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1745 
1746     unsigned getGlobalWrapperKind(const GlobalValue *GV,
1747                                   const unsigned char OpFlags) const;
1748     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1749     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1750     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1751     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1752     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1753 
1754     /// Creates target global address or external symbol nodes for calls or
1755     /// other uses.
1756     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, bool ForCall,
1757                                   bool *IsImpCall) const;
1758 
1759     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1760     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1761     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1762     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1763     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1764     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1765     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1766     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1767     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1768     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1769     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1770     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1771     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1772     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1773     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1774     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1775     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1776     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1777     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1778     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1779     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1780     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1781     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1782     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1783     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1784     SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1785     SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1786     SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1787     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1788     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1789                                     SDValue &Chain) const;
1790     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1791     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1792     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1793     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1794     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1795     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1796     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1797 
1798     SDValue
1799     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1800                          const SmallVectorImpl<ISD::InputArg> &Ins,
1801                          const SDLoc &dl, SelectionDAG &DAG,
1802                          SmallVectorImpl<SDValue> &InVals) const override;
1803     SDValue LowerCall(CallLoweringInfo &CLI,
1804                       SmallVectorImpl<SDValue> &InVals) const override;
1805 
1806     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1807                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1808                         const SmallVectorImpl<SDValue> &OutVals,
1809                         const SDLoc &dl, SelectionDAG &DAG) const override;
1810 
supportSplitCSR(MachineFunction * MF)1811     bool supportSplitCSR(MachineFunction *MF) const override {
1812       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1813           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1814     }
1815     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1816     void insertCopiesSplitCSR(
1817       MachineBasicBlock *Entry,
1818       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1819 
1820     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1821 
1822     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1823 
1824     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1825                             ISD::NodeType ExtendKind) const override;
1826 
1827     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1828                         bool isVarArg,
1829                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1830                         LLVMContext &Context,
1831                         const Type *RetTy) const override;
1832 
1833     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1834     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1835 
1836     TargetLoweringBase::AtomicExpansionKind
1837     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1838     TargetLoweringBase::AtomicExpansionKind
1839     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1840     TargetLoweringBase::AtomicExpansionKind
1841     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1842     TargetLoweringBase::AtomicExpansionKind
1843     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1844     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1845     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1846 
1847     LoadInst *
1848     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1849 
1850     bool needsCmpXchgNb(Type *MemType) const;
1851 
1852     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1853                                 MachineBasicBlock *DispatchBB, int FI) const;
1854 
1855     // Utility function to emit the low-level va_arg code for X86-64.
1856     MachineBasicBlock *
1857     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1858 
1859     /// Utility function to emit the xmm reg save portion of va_start.
1860     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1861                                                  MachineInstr &MI2,
1862                                                  MachineBasicBlock *BB) const;
1863 
1864     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1865                                          MachineBasicBlock *BB) const;
1866 
1867     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1868                                            MachineBasicBlock *BB) const;
1869 
1870     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1871                                             MachineBasicBlock *BB) const;
1872 
1873     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1874                                                MachineBasicBlock *BB) const;
1875 
1876     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1877                                           MachineBasicBlock *BB) const;
1878 
1879     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1880                                                 MachineBasicBlock *BB) const;
1881 
1882     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1883                                         MachineBasicBlock *MBB) const;
1884 
1885     void emitSetJmpShadowStackFix(MachineInstr &MI,
1886                                   MachineBasicBlock *MBB) const;
1887 
1888     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1889                                          MachineBasicBlock *MBB) const;
1890 
1891     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1892                                                  MachineBasicBlock *MBB) const;
1893 
1894     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1895                                              MachineBasicBlock *MBB) const;
1896 
1897     MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1898                                               MachineBasicBlock *MBB) const;
1899 
1900     /// Emit flags for the given setcc condition and operands. Also returns the
1901     /// corresponding X86 condition code constant in X86CC.
1902     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1903                               const SDLoc &dl, SelectionDAG &DAG,
1904                               SDValue &X86CC) const;
1905 
1906     bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1907                                              SDValue IntPow2) const override;
1908 
1909     /// Check if replacement of SQRT with RSQRT should be disabled.
1910     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1911 
1912     /// Use rsqrt* to speed up sqrt calculations.
1913     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1914                             int &RefinementSteps, bool &UseOneConstNR,
1915                             bool Reciprocal) const override;
1916 
1917     /// Use rcp* to speed up fdiv calculations.
1918     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1919                              int &RefinementSteps) const override;
1920 
1921     /// Reassociate floating point divisions into multiply by reciprocal.
1922     unsigned combineRepeatedFPDivisors() const override;
1923 
1924     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1925                           SmallVectorImpl<SDNode *> &Created) const override;
1926 
1927     SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1928                     SDValue V2) const;
1929   };
1930 
1931   namespace X86 {
1932     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1933                              const TargetLibraryInfo *libInfo);
1934   } // end namespace X86
1935 
1936   // X86 specific Gather/Scatter nodes.
1937   // The class has the same order of operands as MaskedGatherScatterSDNode for
1938   // convenience.
1939   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1940   public:
1941     // This is a intended as a utility and should never be directly created.
1942     X86MaskedGatherScatterSDNode() = delete;
1943     ~X86MaskedGatherScatterSDNode() = delete;
1944 
getBasePtr()1945     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1946     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1947     const SDValue &getMask()    const { return getOperand(2); }
getScale()1948     const SDValue &getScale()   const { return getOperand(5); }
1949 
classof(const SDNode * N)1950     static bool classof(const SDNode *N) {
1951       return N->getOpcode() == X86ISD::MGATHER ||
1952              N->getOpcode() == X86ISD::MSCATTER;
1953     }
1954   };
1955 
1956   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1957   public:
getPassThru()1958     const SDValue &getPassThru() const { return getOperand(1); }
1959 
classof(const SDNode * N)1960     static bool classof(const SDNode *N) {
1961       return N->getOpcode() == X86ISD::MGATHER;
1962     }
1963   };
1964 
1965   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1966   public:
getValue()1967     const SDValue &getValue() const { return getOperand(1); }
1968 
classof(const SDNode * N)1969     static bool classof(const SDNode *N) {
1970       return N->getOpcode() == X86ISD::MSCATTER;
1971     }
1972   };
1973 
1974   /// Generate unpacklo/unpackhi shuffle mask.
1975   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1976                                bool Unary);
1977 
1978   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1979   /// imposed by AVX and specific to the unary pattern. Example:
1980   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1981   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1982   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1983 
1984 } // end namespace llvm
1985 
1986 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1987