xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a glue operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_GLUE,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     /// Integer horizontal add/sub.
253     HADD,
254     HSUB,
255 
256     /// Floating point horizontal add/sub.
257     FHADD,
258     FHSUB,
259 
260     // Detect Conflicts Within a Vector
261     CONFLICT,
262 
263     /// Floating point max and min.
264     FMAX,
265     FMIN,
266 
267     /// Commutative FMIN and FMAX.
268     FMAXC,
269     FMINC,
270 
271     /// Scalar intrinsic floating point max and min.
272     FMAXS,
273     FMINS,
274 
275     /// Floating point reciprocal-sqrt and reciprocal approximation.
276     /// Note that these typically require refinement
277     /// in order to obtain suitable precision.
278     FRSQRT,
279     FRCP,
280 
281     // AVX-512 reciprocal approximations with a little more precision.
282     RSQRT14,
283     RSQRT14S,
284     RCP14,
285     RCP14S,
286 
287     // Thread Local Storage.
288     TLSADDR,
289 
290     // Thread Local Storage. A call to get the start address
291     // of the TLS block for the current module.
292     TLSBASEADDR,
293 
294     // Thread Local Storage.  When calling to an OS provided
295     // thunk at the address from an earlier relocation.
296     TLSCALL,
297 
298     // Thread Local Storage. A descriptor containing pointer to
299     // code and to argument to get the TLS offset for the symbol.
300     TLSDESC,
301 
302     // Exception Handling helpers.
303     EH_RETURN,
304 
305     // SjLj exception handling setjmp.
306     EH_SJLJ_SETJMP,
307 
308     // SjLj exception handling longjmp.
309     EH_SJLJ_LONGJMP,
310 
311     // SjLj exception handling dispatch.
312     EH_SJLJ_SETUP_DISPATCH,
313 
314     /// Tail call return. See X86TargetLowering::LowerCall for
315     /// the list of operands.
316     TC_RETURN,
317 
318     // Vector move to low scalar and zero higher vector elements.
319     VZEXT_MOVL,
320 
321     // Vector integer truncate.
322     VTRUNC,
323     // Vector integer truncate with unsigned/signed saturation.
324     VTRUNCUS,
325     VTRUNCS,
326 
327     // Masked version of the above. Used when less than a 128-bit result is
328     // produced since the mask only applies to the lower elements and can't
329     // be represented by a select.
330     // SRC, PASSTHRU, MASK
331     VMTRUNC,
332     VMTRUNCUS,
333     VMTRUNCS,
334 
335     // Vector FP extend.
336     VFPEXT,
337     VFPEXT_SAE,
338     VFPEXTS,
339     VFPEXTS_SAE,
340 
341     // Vector FP round.
342     VFPROUND,
343     VFPROUND_RND,
344     VFPROUNDS,
345     VFPROUNDS_RND,
346 
347     // Masked version of above. Used for v2f64->v4f32.
348     // SRC, PASSTHRU, MASK
349     VMFPROUND,
350 
351     // 128-bit vector logical left / right shift
352     VSHLDQ,
353     VSRLDQ,
354 
355     // Vector shift elements
356     VSHL,
357     VSRL,
358     VSRA,
359 
360     // Vector variable shift
361     VSHLV,
362     VSRLV,
363     VSRAV,
364 
365     // Vector shift elements by immediate
366     VSHLI,
367     VSRLI,
368     VSRAI,
369 
370     // Shifts of mask registers.
371     KSHIFTL,
372     KSHIFTR,
373 
374     // Bit rotate by immediate
375     VROTLI,
376     VROTRI,
377 
378     // Vector packed double/float comparison.
379     CMPP,
380 
381     // Vector integer comparisons.
382     PCMPEQ,
383     PCMPGT,
384 
385     // v8i16 Horizontal minimum and position.
386     PHMINPOS,
387 
388     MULTISHIFT,
389 
390     /// Vector comparison generating mask bits for fp and
391     /// integer signed and unsigned data types.
392     CMPM,
393     // Vector mask comparison generating mask bits for FP values.
394     CMPMM,
395     // Vector mask comparison with SAE for FP values.
396     CMPMM_SAE,
397 
398     // Arithmetic operations with FLAGS results.
399     ADD,
400     SUB,
401     ADC,
402     SBB,
403     SMUL,
404     UMUL,
405     OR,
406     XOR,
407     AND,
408 
409     // Bit field extract.
410     BEXTR,
411     BEXTRI,
412 
413     // Zero High Bits Starting with Specified Bit Position.
414     BZHI,
415 
416     // Parallel extract and deposit.
417     PDEP,
418     PEXT,
419 
420     // X86-specific multiply by immediate.
421     MUL_IMM,
422 
423     // Vector sign bit extraction.
424     MOVMSK,
425 
426     // Vector bitwise comparisons.
427     PTEST,
428 
429     // Vector packed fp sign bitwise comparisons.
430     TESTP,
431 
432     // OR/AND test for masks.
433     KORTEST,
434     KTEST,
435 
436     // ADD for masks.
437     KADD,
438 
439     // Several flavors of instructions with vector shuffle behaviors.
440     // Saturated signed/unnsigned packing.
441     PACKSS,
442     PACKUS,
443     // Intra-lane alignr.
444     PALIGNR,
445     // AVX512 inter-lane alignr.
446     VALIGN,
447     PSHUFD,
448     PSHUFHW,
449     PSHUFLW,
450     SHUFP,
451     // VBMI2 Concat & Shift.
452     VSHLD,
453     VSHRD,
454     VSHLDV,
455     VSHRDV,
456     // Shuffle Packed Values at 128-bit granularity.
457     SHUF128,
458     MOVDDUP,
459     MOVSHDUP,
460     MOVSLDUP,
461     MOVLHPS,
462     MOVHLPS,
463     MOVSD,
464     MOVSS,
465     MOVSH,
466     UNPCKL,
467     UNPCKH,
468     VPERMILPV,
469     VPERMILPI,
470     VPERMI,
471     VPERM2X128,
472 
473     // Variable Permute (VPERM).
474     // Res = VPERMV MaskV, V0
475     VPERMV,
476 
477     // 3-op Variable Permute (VPERMT2).
478     // Res = VPERMV3 V0, MaskV, V1
479     VPERMV3,
480 
481     // Bitwise ternary logic.
482     VPTERNLOG,
483     // Fix Up Special Packed Float32/64 values.
484     VFIXUPIMM,
485     VFIXUPIMM_SAE,
486     VFIXUPIMMS,
487     VFIXUPIMMS_SAE,
488     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
489     VRANGE,
490     VRANGE_SAE,
491     VRANGES,
492     VRANGES_SAE,
493     // Reduce - Perform Reduction Transformation on scalar\packed FP.
494     VREDUCE,
495     VREDUCE_SAE,
496     VREDUCES,
497     VREDUCES_SAE,
498     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
499     // Also used by the legacy (V)ROUND intrinsics where we mask out the
500     // scaling part of the immediate.
501     VRNDSCALE,
502     VRNDSCALE_SAE,
503     VRNDSCALES,
504     VRNDSCALES_SAE,
505     // Tests Types Of a FP Values for packed types.
506     VFPCLASS,
507     // Tests Types Of a FP Values for scalar types.
508     VFPCLASSS,
509 
510     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
511     // a vector, this node may change the vector length as part of the splat.
512     VBROADCAST,
513     // Broadcast mask to vector.
514     VBROADCASTM,
515 
516     /// SSE4A Extraction and Insertion.
517     EXTRQI,
518     INSERTQI,
519 
520     // XOP arithmetic/logical shifts.
521     VPSHA,
522     VPSHL,
523     // XOP signed/unsigned integer comparisons.
524     VPCOM,
525     VPCOMU,
526     // XOP packed permute bytes.
527     VPPERM,
528     // XOP two source permutation.
529     VPERMIL2,
530 
531     // Vector multiply packed unsigned doubleword integers.
532     PMULUDQ,
533     // Vector multiply packed signed doubleword integers.
534     PMULDQ,
535     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
536     MULHRS,
537 
538     // Multiply and Add Packed Integers.
539     VPMADDUBSW,
540     VPMADDWD,
541 
542     // AVX512IFMA multiply and add.
543     // NOTE: These are different than the instruction and perform
544     // op0 x op1 + op2.
545     VPMADD52L,
546     VPMADD52H,
547 
548     // VNNI
549     VPDPBUSD,
550     VPDPBUSDS,
551     VPDPWSSD,
552     VPDPWSSDS,
553 
554     // FMA nodes.
555     // We use the target independent ISD::FMA for the non-inverted case.
556     FNMADD,
557     FMSUB,
558     FNMSUB,
559     FMADDSUB,
560     FMSUBADD,
561 
562     // FMA with rounding mode.
563     FMADD_RND,
564     FNMADD_RND,
565     FMSUB_RND,
566     FNMSUB_RND,
567     FMADDSUB_RND,
568     FMSUBADD_RND,
569 
570     // AVX512-FP16 complex addition and multiplication.
571     VFMADDC,
572     VFMADDC_RND,
573     VFCMADDC,
574     VFCMADDC_RND,
575 
576     VFMULC,
577     VFMULC_RND,
578     VFCMULC,
579     VFCMULC_RND,
580 
581     VFMADDCSH,
582     VFMADDCSH_RND,
583     VFCMADDCSH,
584     VFCMADDCSH_RND,
585 
586     VFMULCSH,
587     VFMULCSH_RND,
588     VFCMULCSH,
589     VFCMULCSH_RND,
590 
591     VPDPBSUD,
592     VPDPBSUDS,
593     VPDPBUUD,
594     VPDPBUUDS,
595     VPDPBSSD,
596     VPDPBSSDS,
597 
598     // Compress and expand.
599     COMPRESS,
600     EXPAND,
601 
602     // Bits shuffle
603     VPSHUFBITQMB,
604 
605     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
606     SINT_TO_FP_RND,
607     UINT_TO_FP_RND,
608     SCALAR_SINT_TO_FP,
609     SCALAR_UINT_TO_FP,
610     SCALAR_SINT_TO_FP_RND,
611     SCALAR_UINT_TO_FP_RND,
612 
613     // Vector float/double to signed/unsigned integer.
614     CVTP2SI,
615     CVTP2UI,
616     CVTP2SI_RND,
617     CVTP2UI_RND,
618     // Scalar float/double to signed/unsigned integer.
619     CVTS2SI,
620     CVTS2UI,
621     CVTS2SI_RND,
622     CVTS2UI_RND,
623 
624     // Vector float/double to signed/unsigned integer with truncation.
625     CVTTP2SI,
626     CVTTP2UI,
627     CVTTP2SI_SAE,
628     CVTTP2UI_SAE,
629     // Scalar float/double to signed/unsigned integer with truncation.
630     CVTTS2SI,
631     CVTTS2UI,
632     CVTTS2SI_SAE,
633     CVTTS2UI_SAE,
634 
635     // Vector signed/unsigned integer to float/double.
636     CVTSI2P,
637     CVTUI2P,
638 
639     // Masked versions of above. Used for v2f64->v4f32.
640     // SRC, PASSTHRU, MASK
641     MCVTP2SI,
642     MCVTP2UI,
643     MCVTTP2SI,
644     MCVTTP2UI,
645     MCVTSI2P,
646     MCVTUI2P,
647 
648     // Vector float to bfloat16.
649     // Convert TWO packed single data to one packed BF16 data
650     CVTNE2PS2BF16,
651     // Convert packed single data to packed BF16 data
652     CVTNEPS2BF16,
653     // Masked version of above.
654     // SRC, PASSTHRU, MASK
655     MCVTNEPS2BF16,
656 
657     // Dot product of BF16 pairs to accumulated into
658     // packed single precision.
659     DPBF16PS,
660 
661     // A stack checking function call. On Windows it's _chkstk call.
662     DYN_ALLOCA,
663 
664     // For allocating variable amounts of stack space when using
665     // segmented stacks. Check if the current stacklet has enough space, and
666     // falls back to heap allocation if not.
667     SEG_ALLOCA,
668 
669     // For allocating stack space when using stack clash protector.
670     // Allocation is performed by block, and each block is probed.
671     PROBED_ALLOCA,
672 
673     // Memory barriers.
674     MFENCE,
675 
676     // Get a random integer and indicate whether it is valid in CF.
677     RDRAND,
678 
679     // Get a NIST SP800-90B & C compliant random integer and
680     // indicate whether it is valid in CF.
681     RDSEED,
682 
683     // Protection keys
684     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
685     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
686     // value for ECX.
687     RDPKRU,
688     WRPKRU,
689 
690     // SSE42 string comparisons.
691     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
692     // will emit one or two instructions based on which results are used. If
693     // flags and index/mask this allows us to use a single instruction since
694     // we won't have to pick and opcode for flags. Instead we can rely on the
695     // DAG to CSE everything and decide at isel.
696     PCMPISTR,
697     PCMPESTR,
698 
699     // Test if in transactional execution.
700     XTEST,
701 
702     // Conversions between float and half-float.
703     CVTPS2PH,
704     CVTPS2PH_SAE,
705     CVTPH2PS,
706     CVTPH2PS_SAE,
707 
708     // Masked version of above.
709     // SRC, RND, PASSTHRU, MASK
710     MCVTPS2PH,
711     MCVTPS2PH_SAE,
712 
713     // Galois Field Arithmetic Instructions
714     GF2P8AFFINEINVQB,
715     GF2P8AFFINEQB,
716     GF2P8MULB,
717 
718     // LWP insert record.
719     LWPINS,
720 
721     // User level wait
722     UMWAIT,
723     TPAUSE,
724 
725     // Enqueue Stores Instructions
726     ENQCMD,
727     ENQCMDS,
728 
729     // For avx512-vp2intersect
730     VP2INTERSECT,
731 
732     // User level interrupts - testui
733     TESTUI,
734 
735     // Perform an FP80 add after changing precision control in FPCW.
736     FP80_ADD,
737 
738     // Conditional compare instructions
739     CCMP,
740     CTEST,
741 
742     /// X86 strict FP compare instructions.
743     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
744     STRICT_FCMPS,
745 
746     // Vector packed double/float comparison.
747     STRICT_CMPP,
748 
749     /// Vector comparison generating mask bits for fp and
750     /// integer signed and unsigned data types.
751     STRICT_CMPM,
752 
753     // Vector float/double to signed/unsigned integer with truncation.
754     STRICT_CVTTP2SI,
755     STRICT_CVTTP2UI,
756 
757     // Vector FP extend.
758     STRICT_VFPEXT,
759 
760     // Vector FP round.
761     STRICT_VFPROUND,
762 
763     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
764     // Also used by the legacy (V)ROUND intrinsics where we mask out the
765     // scaling part of the immediate.
766     STRICT_VRNDSCALE,
767 
768     // Vector signed/unsigned integer to float/double.
769     STRICT_CVTSI2P,
770     STRICT_CVTUI2P,
771 
772     // Strict FMA nodes.
773     STRICT_FNMADD,
774     STRICT_FMSUB,
775     STRICT_FNMSUB,
776 
777     // Conversions between float and half-float.
778     STRICT_CVTPS2PH,
779     STRICT_CVTPH2PS,
780 
781     // Perform an FP80 add after changing precision control in FPCW.
782     STRICT_FP80_ADD,
783 
784     // WARNING: Only add nodes here if they are strict FP nodes. Non-memory and
785     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
786 
787     // Compare and swap.
788     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
789     LCMPXCHG8_DAG,
790     LCMPXCHG16_DAG,
791     LCMPXCHG16_SAVE_RBX_DAG,
792 
793     /// LOCK-prefixed arithmetic read-modify-write instructions.
794     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
795     LADD,
796     LSUB,
797     LOR,
798     LXOR,
799     LAND,
800     LBTS,
801     LBTC,
802     LBTR,
803     LBTS_RM,
804     LBTC_RM,
805     LBTR_RM,
806 
807     /// RAO arithmetic instructions.
808     /// OUTCHAIN = AADD(INCHAIN, PTR, RHS)
809     AADD,
810     AOR,
811     AXOR,
812     AAND,
813 
814     // Load, scalar_to_vector, and zero extend.
815     VZEXT_LOAD,
816 
817     // extract_vector_elt, store.
818     VEXTRACT_STORE,
819 
820     // scalar broadcast from memory.
821     VBROADCAST_LOAD,
822 
823     // subvector broadcast from memory.
824     SUBV_BROADCAST_LOAD,
825 
826     // Store FP control word into i16 memory.
827     FNSTCW16m,
828 
829     // Load FP control word from i16 memory.
830     FLDCW16m,
831 
832     // Store x87 FPU environment into memory.
833     FNSTENVm,
834 
835     // Load x87 FPU environment from memory.
836     FLDENVm,
837 
838     /// This instruction implements FP_TO_SINT with the
839     /// integer destination in memory and a FP reg source.  This corresponds
840     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
841     /// has two inputs (token chain and address) and two outputs (int value
842     /// and token chain). Memory VT specifies the type to store to.
843     FP_TO_INT_IN_MEM,
844 
845     /// This instruction implements SINT_TO_FP with the
846     /// integer source in memory and FP reg result.  This corresponds to the
847     /// X86::FILD*m instructions. It has two inputs (token chain and address)
848     /// and two outputs (FP value and token chain). The integer source type is
849     /// specified by the memory VT.
850     FILD,
851 
852     /// This instruction implements a fp->int store from FP stack
853     /// slots. This corresponds to the fist instruction. It takes a
854     /// chain operand, value to store, address, and glue. The memory VT
855     /// specifies the type to store as.
856     FIST,
857 
858     /// This instruction implements an extending load to FP stack slots.
859     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
860     /// operand, and ptr to load from. The memory VT specifies the type to
861     /// load from.
862     FLD,
863 
864     /// This instruction implements a truncating store from FP stack
865     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
866     /// chain operand, value to store, address, and glue. The memory VT
867     /// specifies the type to store as.
868     FST,
869 
870     /// These instructions grab the address of the next argument
871     /// from a va_list. (reads and modifies the va_list in memory)
872     VAARG_64,
873     VAARG_X32,
874 
875     // Vector truncating store with unsigned/signed saturation
876     VTRUNCSTOREUS,
877     VTRUNCSTORES,
878     // Vector truncating masked store with unsigned/signed saturation
879     VMTRUNCSTOREUS,
880     VMTRUNCSTORES,
881 
882     // X86 specific gather and scatter
883     MGATHER,
884     MSCATTER,
885 
886     // Key locker nodes that produce flags.
887     AESENC128KL,
888     AESDEC128KL,
889     AESENC256KL,
890     AESDEC256KL,
891     AESENCWIDE128KL,
892     AESDECWIDE128KL,
893     AESENCWIDE256KL,
894     AESDECWIDE256KL,
895 
896     /// Compare and Add if Condition is Met. Compare value in operand 2 with
897     /// value in memory of operand 1. If condition of operand 4 is met, add
898     /// value operand 3 to m32 and write new value in operand 1. Operand 2 is
899     /// always updated with the original value from operand 1.
900     CMPCCXADD,
901 
902     // Save xmm argument registers to the stack, according to %al. An operator
903     // is needed so that this can be expanded with control flow.
904     VASTART_SAVE_XMM_REGS,
905 
906     // Conditional load/store instructions
907     CLOAD,
908     CSTORE,
909 
910     // WARNING: Do not add anything in the end unless you want the node to
911     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
912     // opcodes will be thought as target memory ops!
913   };
914   } // end namespace X86ISD
915 
916   namespace X86 {
917     /// Current rounding mode is represented in bits 11:10 of FPSR. These
918     /// values are same as corresponding constants for rounding mode used
919     /// in glibc.
920     enum RoundingMode {
921       rmToNearest   = 0,        // FE_TONEAREST
922       rmDownward    = 1 << 10,  // FE_DOWNWARD
923       rmUpward      = 2 << 10,  // FE_UPWARD
924       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
925       rmMask        = 3 << 10   // Bit mask selecting rounding mode
926     };
927   }
928 
929   /// Define some predicates that are used for node matching.
930   namespace X86 {
931     /// Returns true if Elt is a constant zero or floating point constant +0.0.
932     bool isZeroNode(SDValue Elt);
933 
934     /// Returns true of the given offset can be
935     /// fit into displacement field of the instruction.
936     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
937                                       bool hasSymbolicDisplacement);
938 
939     /// Determines whether the callee is required to pop its
940     /// own arguments. Callee pop is necessary to support tail calls.
941     bool isCalleePop(CallingConv::ID CallingConv,
942                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
943 
944     /// If Op is a constant whose elements are all the same constant or
945     /// undefined, return true and return the constant value in \p SplatVal.
946     /// If we have undef bits that don't cover an entire element, we treat these
947     /// as zero if AllowPartialUndefs is set, else we fail and return false.
948     bool isConstantSplat(SDValue Op, APInt &SplatVal,
949                          bool AllowPartialUndefs = true);
950 
951     /// Check if Op is a load operation that could be folded into some other x86
952     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
953     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
954                      bool AssumeSingleUse = false);
955 
956     /// Check if Op is a load operation that could be folded into a vector splat
957     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
958     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
959                                          const X86Subtarget &Subtarget,
960                                          bool AssumeSingleUse = false);
961 
962     /// Check if Op is a value that could be used to fold a store into some
963     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
964     bool mayFoldIntoStore(SDValue Op);
965 
966     /// Check if Op is an operation that could be folded into a zero extend x86
967     /// instruction.
968     bool mayFoldIntoZeroExtend(SDValue Op);
969 
970     /// True if the target supports the extended frame for async Swift
971     /// functions.
972     bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget,
973                                             const MachineFunction &MF);
974   } // end namespace X86
975 
976   //===--------------------------------------------------------------------===//
977   //  X86 Implementation of the TargetLowering interface
978   class X86TargetLowering final : public TargetLowering {
979   public:
980     explicit X86TargetLowering(const X86TargetMachine &TM,
981                                const X86Subtarget &STI);
982 
983     unsigned getJumpTableEncoding() const override;
984     bool useSoftFloat() const override;
985 
986     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
987                                ArgListTy &Args) const override;
988 
getScalarShiftAmountTy(const DataLayout &,EVT VT)989     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
990       return MVT::i8;
991     }
992 
993     const MCExpr *
994     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
995                               const MachineBasicBlock *MBB, unsigned uid,
996                               MCContext &Ctx) const override;
997 
998     /// Returns relocation base for the given PIC jumptable.
999     SDValue getPICJumpTableRelocBase(SDValue Table,
1000                                      SelectionDAG &DAG) const override;
1001     const MCExpr *
1002     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
1003                                  unsigned JTI, MCContext &Ctx) const override;
1004 
1005     /// Return the desired alignment for ByVal aggregate
1006     /// function arguments in the caller parameter area. For X86, aggregates
1007     /// that contains are placed at 16-byte boundaries while the rest are at
1008     /// 4-byte boundaries.
1009     uint64_t getByValTypeAlignment(Type *Ty,
1010                                    const DataLayout &DL) const override;
1011 
1012     EVT getOptimalMemOpType(const MemOp &Op,
1013                             const AttributeList &FuncAttributes) const override;
1014 
1015     /// Returns true if it's safe to use load / store of the
1016     /// specified type to expand memcpy / memset inline. This is mostly true
1017     /// for all types except for some special cases. For example, on X86
1018     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
1019     /// also does type conversion. Note the specified type doesn't have to be
1020     /// legal as the hook is used before type legalization.
1021     bool isSafeMemOpType(MVT VT) const override;
1022 
1023     bool isMemoryAccessFast(EVT VT, Align Alignment) const;
1024 
1025     /// Returns true if the target allows unaligned memory accesses of the
1026     /// specified type. Returns whether it is "fast" in the last argument.
1027     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
1028                                         MachineMemOperand::Flags Flags,
1029                                         unsigned *Fast) const override;
1030 
1031     /// This function returns true if the memory access is aligned or if the
1032     /// target allows this specific unaligned memory access. If the access is
1033     /// allowed, the optional final parameter returns a relative speed of the
1034     /// access (as defined by the target).
1035     bool allowsMemoryAccess(
1036         LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
1037         Align Alignment,
1038         MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1039         unsigned *Fast = nullptr) const override;
1040 
allowsMemoryAccess(LLVMContext & Context,const DataLayout & DL,EVT VT,const MachineMemOperand & MMO,unsigned * Fast)1041     bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1042                             const MachineMemOperand &MMO,
1043                             unsigned *Fast) const {
1044       return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
1045                                 MMO.getAlign(), MMO.getFlags(), Fast);
1046     }
1047 
1048     /// Provide custom lowering hooks for some operations.
1049     ///
1050     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
1051 
1052     /// Replace the results of node with an illegal result
1053     /// type with new values built out of custom code.
1054     ///
1055     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
1056                             SelectionDAG &DAG) const override;
1057 
1058     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
1059 
1060     bool preferABDSToABSWithNSW(EVT VT) const override;
1061 
1062     bool preferSextInRegOfTruncate(EVT TruncVT, EVT VT,
1063                                    EVT ExtVT) const override;
1064 
1065     bool isXAndYEqZeroPreferableToXAndYEqY(ISD::CondCode Cond,
1066                                            EVT VT) const override;
1067 
1068     /// Return true if the target has native support for
1069     /// the specified value type and it is 'desirable' to use the type for the
1070     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1071     /// instruction encodings are longer and some i16 instructions are slow.
1072     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1073 
1074     /// Return true if the target has native support for the
1075     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1076     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1077     /// and some i16 instructions are slow.
1078     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1079 
1080     /// Return prefered fold type, Abs if this is a vector, AddAnd if its an
1081     /// integer, None otherwise.
1082     TargetLowering::AndOrSETCCFoldKind
1083     isDesirableToCombineLogicOpOfSETCC(const SDNode *LogicOp,
1084                                        const SDNode *SETCC0,
1085                                        const SDNode *SETCC1) const override;
1086 
1087     /// Return the newly negated expression if the cost is not expensive and
1088     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1089     /// do the negation.
1090     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1091                                  bool LegalOperations, bool ForCodeSize,
1092                                  NegatibleCost &Cost,
1093                                  unsigned Depth) const override;
1094 
1095     MachineBasicBlock *
1096     EmitInstrWithCustomInserter(MachineInstr &MI,
1097                                 MachineBasicBlock *MBB) const override;
1098 
1099     /// This method returns the name of a target specific DAG node.
1100     const char *getTargetNodeName(unsigned Opcode) const override;
1101 
1102     /// Do not merge vector stores after legalization because that may conflict
1103     /// with x86-specific store splitting optimizations.
mergeStoresAfterLegalization(EVT MemVT)1104     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1105       return !MemVT.isVector();
1106     }
1107 
1108     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1109                           const MachineFunction &MF) const override;
1110 
1111     bool isCheapToSpeculateCttz(Type *Ty) const override;
1112 
1113     bool isCheapToSpeculateCtlz(Type *Ty) const override;
1114 
1115     bool isCtlzFast() const override;
1116 
isMultiStoresCheaperThanBitsMerge(EVT LTy,EVT HTy)1117     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1118       // If the pair to store is a mixture of float and int values, we will
1119       // save two bitwise instructions and one float-to-int instruction and
1120       // increase one store instruction. There is potentially a more
1121       // significant benefit because it avoids the float->int domain switch
1122       // for input value. So It is more likely a win.
1123       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1124           (LTy.isInteger() && HTy.isFloatingPoint()))
1125         return true;
1126       // If the pair only contains int values, we will save two bitwise
1127       // instructions and increase one store instruction (costing one more
1128       // store buffer). Since the benefit is more blurred so we leave
1129       // such pair out until we get testcase to prove it is a win.
1130       return false;
1131     }
1132 
1133     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1134 
1135     bool hasAndNotCompare(SDValue Y) const override;
1136 
1137     bool hasAndNot(SDValue Y) const override;
1138 
1139     bool hasBitTest(SDValue X, SDValue Y) const override;
1140 
1141     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1142         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1143         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1144         SelectionDAG &DAG) const override;
1145 
1146     unsigned preferedOpcodeForCmpEqPiecesOfOperand(
1147         EVT VT, unsigned ShiftOpc, bool MayTransformRotate,
1148         const APInt &ShiftOrRotateAmt,
1149         const std::optional<APInt> &AndMask) const override;
1150 
1151     bool preferScalarizeSplat(SDNode *N) const override;
1152 
1153     CondMergingParams
1154     getJumpConditionMergingParams(Instruction::BinaryOps Opc, const Value *Lhs,
1155                                   const Value *Rhs) const override;
1156 
1157     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1158                                            CombineLevel Level) const override;
1159 
1160     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1161 
1162     bool
shouldTransformSignedTruncationCheck(EVT XVT,unsigned KeptBits)1163     shouldTransformSignedTruncationCheck(EVT XVT,
1164                                          unsigned KeptBits) const override {
1165       // For vectors, we don't have a preference..
1166       if (XVT.isVector())
1167         return false;
1168 
1169       auto VTIsOk = [](EVT VT) -> bool {
1170         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1171                VT == MVT::i64;
1172       };
1173 
1174       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1175       // XVT will be larger than KeptBitsVT.
1176       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1177       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1178     }
1179 
1180     ShiftLegalizationStrategy
1181     preferredShiftLegalizationStrategy(SelectionDAG &DAG, SDNode *N,
1182                                        unsigned ExpansionFactor) const override;
1183 
1184     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1185 
shouldConvertFpToSat(unsigned Op,EVT FPVT,EVT VT)1186     bool shouldConvertFpToSat(unsigned Op, EVT FPVT, EVT VT) const override {
1187       // Converting to sat variants holds little benefit on X86 as we will just
1188       // need to saturate the value back using fp arithmatic.
1189       return Op != ISD::FP_TO_UINT_SAT && isOperationLegalOrCustom(Op, VT);
1190     }
1191 
convertSetCCLogicToBitwiseLogic(EVT VT)1192     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1193       return VT.isScalarInteger();
1194     }
1195 
1196     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1197     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1198 
1199     /// Return the value type to use for ISD::SETCC.
1200     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1201                            EVT VT) const override;
1202 
1203     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1204                                       const APInt &DemandedElts,
1205                                       TargetLoweringOpt &TLO) const override;
1206 
1207     /// Determine which of the bits specified in Mask are known to be either
1208     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1209     void computeKnownBitsForTargetNode(const SDValue Op,
1210                                        KnownBits &Known,
1211                                        const APInt &DemandedElts,
1212                                        const SelectionDAG &DAG,
1213                                        unsigned Depth = 0) const override;
1214 
1215     /// Determine the number of bits in the operation that are sign bits.
1216     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1217                                              const APInt &DemandedElts,
1218                                              const SelectionDAG &DAG,
1219                                              unsigned Depth) const override;
1220 
1221     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1222                                                  const APInt &DemandedElts,
1223                                                  APInt &KnownUndef,
1224                                                  APInt &KnownZero,
1225                                                  TargetLoweringOpt &TLO,
1226                                                  unsigned Depth) const override;
1227 
1228     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1229                                                     const APInt &DemandedElts,
1230                                                     unsigned MaskIndex,
1231                                                     TargetLoweringOpt &TLO,
1232                                                     unsigned Depth) const;
1233 
1234     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1235                                            const APInt &DemandedBits,
1236                                            const APInt &DemandedElts,
1237                                            KnownBits &Known,
1238                                            TargetLoweringOpt &TLO,
1239                                            unsigned Depth) const override;
1240 
1241     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1242         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1243         SelectionDAG &DAG, unsigned Depth) const override;
1244 
1245     bool isGuaranteedNotToBeUndefOrPoisonForTargetNode(
1246         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1247         bool PoisonOnly, unsigned Depth) const override;
1248 
1249     bool canCreateUndefOrPoisonForTargetNode(
1250         SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
1251         bool PoisonOnly, bool ConsiderFlags, unsigned Depth) const override;
1252 
1253     bool isSplatValueForTargetNode(SDValue Op, const APInt &DemandedElts,
1254                                    APInt &UndefElts, const SelectionDAG &DAG,
1255                                    unsigned Depth) const override;
1256 
isTargetCanonicalConstantNode(SDValue Op)1257     bool isTargetCanonicalConstantNode(SDValue Op) const override {
1258       // Peek through bitcasts/extracts/inserts to see if we have a broadcast
1259       // vector from memory.
1260       while (Op.getOpcode() == ISD::BITCAST ||
1261              Op.getOpcode() == ISD::EXTRACT_SUBVECTOR ||
1262              (Op.getOpcode() == ISD::INSERT_SUBVECTOR &&
1263               Op.getOperand(0).isUndef()))
1264         Op = Op.getOperand(Op.getOpcode() == ISD::INSERT_SUBVECTOR ? 1 : 0);
1265 
1266       return Op.getOpcode() == X86ISD::VBROADCAST_LOAD ||
1267              TargetLowering::isTargetCanonicalConstantNode(Op);
1268     }
1269 
1270     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1271 
1272     SDValue unwrapAddress(SDValue N) const override;
1273 
1274     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1275 
1276     bool ExpandInlineAsm(CallInst *CI) const override;
1277 
1278     ConstraintType getConstraintType(StringRef Constraint) const override;
1279 
1280     /// Examine constraint string and operand type and determine a weight value.
1281     /// The operand object must already have been set up with the operand type.
1282     ConstraintWeight
1283       getSingleConstraintMatchWeight(AsmOperandInfo &Info,
1284                                      const char *Constraint) const override;
1285 
1286     const char *LowerXConstraint(EVT ConstraintVT) const override;
1287 
1288     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1289     /// add anything to Ops. If hasMemory is true it means one of the asm
1290     /// constraint of the inline asm instruction being processed is 'm'.
1291     void LowerAsmOperandForConstraint(SDValue Op, StringRef Constraint,
1292                                       std::vector<SDValue> &Ops,
1293                                       SelectionDAG &DAG) const override;
1294 
1295     InlineAsm::ConstraintCode
getInlineAsmMemConstraint(StringRef ConstraintCode)1296     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1297       if (ConstraintCode == "v")
1298         return InlineAsm::ConstraintCode::v;
1299       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1300     }
1301 
1302     /// Handle Lowering flag assembly outputs.
1303     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1304                                         const SDLoc &DL,
1305                                         const AsmOperandInfo &Constraint,
1306                                         SelectionDAG &DAG) const override;
1307 
1308     /// Given a physical register constraint
1309     /// (e.g. {edx}), return the register number and the register class for the
1310     /// register.  This should only be used for C_Register constraints.  On
1311     /// error, this returns a register number of 0.
1312     std::pair<unsigned, const TargetRegisterClass *>
1313     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1314                                  StringRef Constraint, MVT VT) const override;
1315 
1316     /// Return true if the addressing mode represented
1317     /// by AM is legal for this target, for a load/store of the specified type.
1318     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1319                                Type *Ty, unsigned AS,
1320                                Instruction *I = nullptr) const override;
1321 
1322     bool addressingModeSupportsTLS(const GlobalValue &GV) const override;
1323 
1324     /// Return true if the specified immediate is legal
1325     /// icmp immediate, that is the target has icmp instructions which can
1326     /// compare a register against the immediate without having to materialize
1327     /// the immediate into a register.
1328     bool isLegalICmpImmediate(int64_t Imm) const override;
1329 
1330     /// Return true if the specified immediate is legal
1331     /// add immediate, that is the target has add instructions which can
1332     /// add a register and the immediate without having to materialize
1333     /// the immediate into a register.
1334     bool isLegalAddImmediate(int64_t Imm) const override;
1335 
1336     bool isLegalStoreImmediate(int64_t Imm) const override;
1337 
1338     /// This is used to enable splatted operand transforms for vector shifts
1339     /// and vector funnel shifts.
1340     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1341 
1342     /// Add x86-specific opcodes to the default list.
1343     bool isBinOp(unsigned Opcode) const override;
1344 
1345     /// Returns true if the opcode is a commutative binary operation.
1346     bool isCommutativeBinOp(unsigned Opcode) const override;
1347 
1348     /// Return true if it's free to truncate a value of
1349     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1350     /// register EAX to i16 by referencing its sub-register AX.
1351     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1352     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1353 
1354     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1355 
1356     /// Return true if any actual instruction that defines a
1357     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1358     /// register. This does not necessarily include registers defined in
1359     /// unknown ways, such as incoming arguments, or copies from unknown
1360     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1361     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1362     /// all instructions that define 32-bit values implicit zero-extend the
1363     /// result out to 64 bits.
1364     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1365     bool isZExtFree(EVT VT1, EVT VT2) const override;
1366     bool isZExtFree(SDValue Val, EVT VT2) const override;
1367 
1368     bool shouldSinkOperands(Instruction *I,
1369                             SmallVectorImpl<Use *> &Ops) const override;
1370     bool shouldConvertPhiType(Type *From, Type *To) const override;
1371 
1372     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1373     /// extend node) is profitable.
1374     bool isVectorLoadExtDesirable(SDValue) const override;
1375 
1376     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1377     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1378     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1379     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1380                                     EVT VT) const override;
1381 
1382     /// Return true if it's profitable to narrow operations of type SrcVT to
1383     /// DestVT. e.g. on x86, it's profitable to narrow from i32 to i8 but not
1384     /// from i32 to i16.
1385     bool isNarrowingProfitable(EVT SrcVT, EVT DestVT) const override;
1386 
1387     bool shouldFoldSelectWithIdentityConstant(unsigned BinOpcode,
1388                                               EVT VT) const override;
1389 
1390     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1391     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1392     /// true and stores the intrinsic information into the IntrinsicInfo that was
1393     /// passed to the function.
1394     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1395                             MachineFunction &MF,
1396                             unsigned Intrinsic) const override;
1397 
1398     /// Returns true if the target can instruction select the
1399     /// specified FP immediate natively. If false, the legalizer will
1400     /// materialize the FP immediate as a load from a constant pool.
1401     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1402                       bool ForCodeSize) const override;
1403 
1404     /// Targets can use this to indicate that they only support *some*
1405     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1406     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1407     /// be legal.
1408     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1409 
1410     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1411     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1412     /// constant pool entry.
1413     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1414 
1415     /// Returns true if lowering to a jump table is allowed.
1416     bool areJTsAllowed(const Function *Fn) const override;
1417 
1418     MVT getPreferredSwitchConditionType(LLVMContext &Context,
1419                                         EVT ConditionVT) const override;
1420 
1421     /// If true, then instruction selection should
1422     /// seek to shrink the FP constant of the specified type to a smaller type
1423     /// in order to save space and / or reduce runtime.
1424     bool ShouldShrinkFPConstant(EVT VT) const override;
1425 
1426     /// Return true if we believe it is correct and profitable to reduce the
1427     /// load node to a smaller type.
1428     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1429                                EVT NewVT) const override;
1430 
1431     /// Return true if the specified scalar FP type is computed in an SSE
1432     /// register, not on the X87 floating point stack.
1433     bool isScalarFPTypeInSSEReg(EVT VT) const;
1434 
1435     /// Returns true if it is beneficial to convert a load of a constant
1436     /// to just the constant itself.
1437     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1438                                            Type *Ty) const override;
1439 
1440     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1441 
1442     bool convertSelectOfConstantsToMath(EVT VT) const override;
1443 
1444     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1445                                 SDValue C) const override;
1446 
1447     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1448     /// with this index.
1449     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1450                                  unsigned Index) const override;
1451 
1452     /// Scalar ops always have equal or better analysis/performance/power than
1453     /// the vector equivalent, so this always makes sense if the scalar op is
1454     /// supported.
shouldScalarizeBinop(SDValue)1455     bool shouldScalarizeBinop(SDValue) const override;
1456 
1457     /// Extract of a scalar FP value from index 0 of a vector is free.
1458     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1459       EVT EltVT = VT.getScalarType();
1460       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1461     }
1462 
1463     /// Overflow nodes should get combined/lowered to optimal instructions
1464     /// (they should allow eliminating explicit compares by getting flags from
1465     /// math ops).
1466     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1467                               bool MathUsed) const override;
1468 
storeOfVectorConstantIsCheap(bool IsZero,EVT MemVT,unsigned NumElem,unsigned AddrSpace)1469     bool storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, unsigned NumElem,
1470                                       unsigned AddrSpace) const override {
1471       // If we can replace more than 2 scalar stores, there will be a reduction
1472       // in instructions even after we add a vector constant load.
1473       return IsZero || NumElem > 2;
1474     }
1475 
1476     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1477                                  const SelectionDAG &DAG,
1478                                  const MachineMemOperand &MMO) const override;
1479 
1480     Register getRegisterByName(const char* RegName, LLT VT,
1481                                const MachineFunction &MF) const override;
1482 
1483     /// If a physical register, this returns the register that receives the
1484     /// exception address on entry to an EH pad.
1485     Register
1486     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1487 
1488     /// If a physical register, this returns the register that receives the
1489     /// exception typeid on entry to a landing pad.
1490     Register
1491     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1492 
1493     bool needsFixedCatchObjects() const override;
1494 
1495     /// This method returns a target specific FastISel object,
1496     /// or null if the target does not support "fast" ISel.
1497     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1498                              const TargetLibraryInfo *libInfo) const override;
1499 
1500     /// If the target has a standard location for the stack protector cookie,
1501     /// returns the address of that location. Otherwise, returns nullptr.
1502     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1503 
1504     bool useLoadStackGuardNode() const override;
1505     bool useStackGuardXorFP() const override;
1506     void insertSSPDeclarations(Module &M) const override;
1507     Value *getSDagStackGuard(const Module &M) const override;
1508     Function *getSSPStackGuardCheck(const Module &M) const override;
1509     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1510                                 const SDLoc &DL) const override;
1511 
1512 
1513     /// Return true if the target stores SafeStack pointer at a fixed offset in
1514     /// some non-standard address space, and populates the address space and
1515     /// offset as appropriate.
1516     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1517 
1518     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1519                                           SDValue Chain, SDValue Pointer,
1520                                           MachinePointerInfo PtrInfo,
1521                                           Align Alignment,
1522                                           SelectionDAG &DAG) const;
1523 
1524     /// Customize the preferred legalization strategy for certain types.
1525     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1526 
softPromoteHalfType()1527     bool softPromoteHalfType() const override { return true; }
1528 
1529     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1530                                       EVT VT) const override;
1531 
1532     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1533                                            CallingConv::ID CC,
1534                                            EVT VT) const override;
1535 
1536     unsigned getVectorTypeBreakdownForCallingConv(
1537         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1538         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1539 
1540     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1541 
1542     bool supportSwiftError() const override;
1543 
supportKCFIBundles()1544     bool supportKCFIBundles() const override { return true; }
1545 
1546     MachineInstr *EmitKCFICheck(MachineBasicBlock &MBB,
1547                                 MachineBasicBlock::instr_iterator &MBBI,
1548                                 const TargetInstrInfo *TII) const override;
1549 
1550     bool hasStackProbeSymbol(const MachineFunction &MF) const override;
1551     bool hasInlineStackProbe(const MachineFunction &MF) const override;
1552     StringRef getStackProbeSymbolName(const MachineFunction &MF) const override;
1553 
1554     unsigned getStackProbeSize(const MachineFunction &MF) const;
1555 
hasVectorBlend()1556     bool hasVectorBlend() const override { return true; }
1557 
getMaxSupportedInterleaveFactor()1558     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1559 
1560     bool isInlineAsmTargetBranch(const SmallVectorImpl<StringRef> &AsmStrs,
1561                                  unsigned OpNo) const override;
1562 
1563     SDValue visitMaskedLoad(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1564                             MachineMemOperand *MMO, SDValue &NewLoad,
1565                             SDValue Ptr, SDValue PassThru,
1566                             SDValue Mask) const override;
1567     SDValue visitMaskedStore(SelectionDAG &DAG, const SDLoc &DL, SDValue Chain,
1568                              MachineMemOperand *MMO, SDValue Ptr, SDValue Val,
1569                              SDValue Mask) const override;
1570 
1571     /// Lower interleaved load(s) into target specific
1572     /// instructions/intrinsics.
1573     bool lowerInterleavedLoad(LoadInst *LI,
1574                               ArrayRef<ShuffleVectorInst *> Shuffles,
1575                               ArrayRef<unsigned> Indices,
1576                               unsigned Factor) const override;
1577 
1578     /// Lower interleaved store(s) into target specific
1579     /// instructions/intrinsics.
1580     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1581                                unsigned Factor) const override;
1582 
1583     SDValue expandIndirectJTBranch(const SDLoc &dl, SDValue Value, SDValue Addr,
1584                                    int JTI, SelectionDAG &DAG) const override;
1585 
1586     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1587 
getTypeToTransformTo(LLVMContext & Context,EVT VT)1588     EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override {
1589       if (VT == MVT::f80)
1590         return EVT::getIntegerVT(Context, 96);
1591       return TargetLoweringBase::getTypeToTransformTo(Context, VT);
1592     }
1593 
1594   protected:
1595     std::pair<const TargetRegisterClass *, uint8_t>
1596     findRepresentativeClass(const TargetRegisterInfo *TRI,
1597                             MVT VT) const override;
1598 
1599   private:
1600     /// Keep a reference to the X86Subtarget around so that we can
1601     /// make the right decision when generating code for different targets.
1602     const X86Subtarget &Subtarget;
1603 
1604     /// A list of legal FP immediates.
1605     std::vector<APFloat> LegalFPImmediates;
1606 
1607     /// Indicate that this x86 target can instruction
1608     /// select the specified FP immediate natively.
addLegalFPImmediate(const APFloat & Imm)1609     void addLegalFPImmediate(const APFloat& Imm) {
1610       LegalFPImmediates.push_back(Imm);
1611     }
1612 
1613     SDValue LowerCallResult(SDValue Chain, SDValue InGlue,
1614                             CallingConv::ID CallConv, bool isVarArg,
1615                             const SmallVectorImpl<ISD::InputArg> &Ins,
1616                             const SDLoc &dl, SelectionDAG &DAG,
1617                             SmallVectorImpl<SDValue> &InVals,
1618                             uint32_t *RegMask) const;
1619     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1620                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1621                              const SDLoc &dl, SelectionDAG &DAG,
1622                              const CCValAssign &VA, MachineFrameInfo &MFI,
1623                              unsigned i) const;
1624     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1625                              const SDLoc &dl, SelectionDAG &DAG,
1626                              const CCValAssign &VA,
1627                              ISD::ArgFlagsTy Flags, bool isByval) const;
1628 
1629     // Call lowering helpers.
1630 
1631     /// Check whether the call is eligible for tail call optimization. Targets
1632     /// that want to do tail call optimization should implement this function.
1633     bool IsEligibleForTailCallOptimization(
1634         TargetLowering::CallLoweringInfo &CLI, CCState &CCInfo,
1635         SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const;
1636     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1637                                     SDValue Chain, bool IsTailCall,
1638                                     bool Is64Bit, int FPDiff,
1639                                     const SDLoc &dl) const;
1640 
1641     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1642                                          SelectionDAG &DAG) const;
1643 
1644     unsigned getAddressSpace() const;
1645 
1646     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1647                             SDValue &Chain) const;
1648     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1649 
1650     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1651     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1652     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1653     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1654 
1655     unsigned getGlobalWrapperKind(const GlobalValue *GV,
1656                                   const unsigned char OpFlags) const;
1657     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1658     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1659     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1660     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1661     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1662 
1663     /// Creates target global address or external symbol nodes for calls or
1664     /// other uses.
1665     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1666                                   bool ForCall) const;
1667 
1668     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1669     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1670     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1671     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1672     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1673     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1674     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1675     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1676     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1677     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1678     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1679     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1680     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1681     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1682     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1683     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1684     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1685     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1686     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1687     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1688     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1689     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1690     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1691     SDValue LowerGET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1692     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1693     SDValue LowerGET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1694     SDValue LowerSET_FPENV_MEM(SDValue Op, SelectionDAG &DAG) const;
1695     SDValue LowerRESET_FPENV(SDValue Op, SelectionDAG &DAG) const;
1696     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1697     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1698                                     SDValue &Chain) const;
1699     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1700     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1701     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1702     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1703     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1704     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1705     SDValue LowerFP_TO_BF16(SDValue Op, SelectionDAG &DAG) const;
1706 
1707     SDValue
1708     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1709                          const SmallVectorImpl<ISD::InputArg> &Ins,
1710                          const SDLoc &dl, SelectionDAG &DAG,
1711                          SmallVectorImpl<SDValue> &InVals) const override;
1712     SDValue LowerCall(CallLoweringInfo &CLI,
1713                       SmallVectorImpl<SDValue> &InVals) const override;
1714 
1715     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1716                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1717                         const SmallVectorImpl<SDValue> &OutVals,
1718                         const SDLoc &dl, SelectionDAG &DAG) const override;
1719 
supportSplitCSR(MachineFunction * MF)1720     bool supportSplitCSR(MachineFunction *MF) const override {
1721       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1722           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1723     }
1724     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1725     void insertCopiesSplitCSR(
1726       MachineBasicBlock *Entry,
1727       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1728 
1729     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1730 
1731     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1732 
1733     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1734                             ISD::NodeType ExtendKind) const override;
1735 
1736     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1737                         bool isVarArg,
1738                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1739                         LLVMContext &Context) const override;
1740 
1741     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1742     ArrayRef<MCPhysReg> getRoundingControlRegisters() const override;
1743 
1744     TargetLoweringBase::AtomicExpansionKind
1745     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1746     TargetLoweringBase::AtomicExpansionKind
1747     shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1748     TargetLoweringBase::AtomicExpansionKind
1749     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1750     TargetLoweringBase::AtomicExpansionKind
1751     shouldExpandLogicAtomicRMWInIR(AtomicRMWInst *AI) const;
1752     void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1753     void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const override;
1754 
1755     LoadInst *
1756     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1757 
1758     bool needsCmpXchgNb(Type *MemType) const;
1759 
1760     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1761                                 MachineBasicBlock *DispatchBB, int FI) const;
1762 
1763     // Utility function to emit the low-level va_arg code for X86-64.
1764     MachineBasicBlock *
1765     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1766 
1767     /// Utility function to emit the xmm reg save portion of va_start.
1768     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1769                                                  MachineInstr &MI2,
1770                                                  MachineBasicBlock *BB) const;
1771 
1772     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1773                                          MachineBasicBlock *BB) const;
1774 
1775     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1776                                            MachineBasicBlock *BB) const;
1777 
1778     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1779                                             MachineBasicBlock *BB) const;
1780 
1781     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1782                                                MachineBasicBlock *BB) const;
1783 
1784     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1785                                           MachineBasicBlock *BB) const;
1786 
1787     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1788                                           MachineBasicBlock *BB) const;
1789 
1790     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1791                                                 MachineBasicBlock *BB) const;
1792 
1793     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1794                                         MachineBasicBlock *MBB) const;
1795 
1796     void emitSetJmpShadowStackFix(MachineInstr &MI,
1797                                   MachineBasicBlock *MBB) const;
1798 
1799     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1800                                          MachineBasicBlock *MBB) const;
1801 
1802     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1803                                                  MachineBasicBlock *MBB) const;
1804 
1805     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1806                                              MachineBasicBlock *MBB) const;
1807 
1808     MachineBasicBlock *emitPatchableEventCall(MachineInstr &MI,
1809                                               MachineBasicBlock *MBB) const;
1810 
1811     /// Emit flags for the given setcc condition and operands. Also returns the
1812     /// corresponding X86 condition code constant in X86CC.
1813     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1814                               const SDLoc &dl, SelectionDAG &DAG,
1815                               SDValue &X86CC) const;
1816 
1817     bool optimizeFMulOrFDivAsShiftAddBitcast(SDNode *N, SDValue FPConst,
1818                                              SDValue IntPow2) const override;
1819 
1820     /// Check if replacement of SQRT with RSQRT should be disabled.
1821     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1822 
1823     /// Use rsqrt* to speed up sqrt calculations.
1824     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1825                             int &RefinementSteps, bool &UseOneConstNR,
1826                             bool Reciprocal) const override;
1827 
1828     /// Use rcp* to speed up fdiv calculations.
1829     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1830                              int &RefinementSteps) const override;
1831 
1832     /// Reassociate floating point divisions into multiply by reciprocal.
1833     unsigned combineRepeatedFPDivisors() const override;
1834 
1835     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1836                           SmallVectorImpl<SDNode *> &Created) const override;
1837 
1838     SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
1839                     SDValue V2) const;
1840   };
1841 
1842   namespace X86 {
1843     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1844                              const TargetLibraryInfo *libInfo);
1845   } // end namespace X86
1846 
1847   // X86 specific Gather/Scatter nodes.
1848   // The class has the same order of operands as MaskedGatherScatterSDNode for
1849   // convenience.
1850   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1851   public:
1852     // This is a intended as a utility and should never be directly created.
1853     X86MaskedGatherScatterSDNode() = delete;
1854     ~X86MaskedGatherScatterSDNode() = delete;
1855 
getBasePtr()1856     const SDValue &getBasePtr() const { return getOperand(3); }
getIndex()1857     const SDValue &getIndex()   const { return getOperand(4); }
getMask()1858     const SDValue &getMask()    const { return getOperand(2); }
getScale()1859     const SDValue &getScale()   const { return getOperand(5); }
1860 
classof(const SDNode * N)1861     static bool classof(const SDNode *N) {
1862       return N->getOpcode() == X86ISD::MGATHER ||
1863              N->getOpcode() == X86ISD::MSCATTER;
1864     }
1865   };
1866 
1867   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1868   public:
getPassThru()1869     const SDValue &getPassThru() const { return getOperand(1); }
1870 
classof(const SDNode * N)1871     static bool classof(const SDNode *N) {
1872       return N->getOpcode() == X86ISD::MGATHER;
1873     }
1874   };
1875 
1876   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1877   public:
getValue()1878     const SDValue &getValue() const { return getOperand(1); }
1879 
classof(const SDNode * N)1880     static bool classof(const SDNode *N) {
1881       return N->getOpcode() == X86ISD::MSCATTER;
1882     }
1883   };
1884 
1885   /// Generate unpacklo/unpackhi shuffle mask.
1886   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1887                                bool Unary);
1888 
1889   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1890   /// imposed by AVX and specific to the unary pattern. Example:
1891   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1892   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1893   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1894 
1895 } // end namespace llvm
1896 
1897 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1898