xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/TargetLowering.h"
18 
19 namespace llvm {
20   class X86Subtarget;
21   class X86TargetMachine;
22 
23   namespace X86ISD {
24     // X86 Specific DAG Nodes
25   enum NodeType : unsigned {
26     // Start the numbering where the builtin ops leave off.
27     FIRST_NUMBER = ISD::BUILTIN_OP_END,
28 
29     /// Bit scan forward.
30     BSF,
31     /// Bit scan reverse.
32     BSR,
33 
34     /// X86 funnel/double shift i16 instructions. These correspond to
35     /// X86::SHLDW and X86::SHRDW instructions which have different amt
36     /// modulo rules to generic funnel shifts.
37     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
38     FSHL,
39     FSHR,
40 
41     /// Bitwise logical AND of floating point values. This corresponds
42     /// to X86::ANDPS or X86::ANDPD.
43     FAND,
44 
45     /// Bitwise logical OR of floating point values. This corresponds
46     /// to X86::ORPS or X86::ORPD.
47     FOR,
48 
49     /// Bitwise logical XOR of floating point values. This corresponds
50     /// to X86::XORPS or X86::XORPD.
51     FXOR,
52 
53     ///  Bitwise logical ANDNOT of floating point values. This
54     /// corresponds to X86::ANDNPS or X86::ANDNPD.
55     FANDN,
56 
57     /// These operations represent an abstract X86 call
58     /// instruction, which includes a bunch of information.  In particular the
59     /// operands of these node are:
60     ///
61     ///     #0 - The incoming token chain
62     ///     #1 - The callee
63     ///     #2 - The number of arg bytes the caller pushes on the stack.
64     ///     #3 - The number of arg bytes the callee pops off the stack.
65     ///     #4 - The value to pass in AL/AX/EAX (optional)
66     ///     #5 - The value to pass in DL/DX/EDX (optional)
67     ///
68     /// The result values of these nodes are:
69     ///
70     ///     #0 - The outgoing token chain
71     ///     #1 - The first register result value (optional)
72     ///     #2 - The second register result value (optional)
73     ///
74     CALL,
75 
76     /// Same as call except it adds the NoTrack prefix.
77     NT_CALL,
78 
79     /// X86 compare and logical compare instructions.
80     CMP,
81     FCMP,
82     COMI,
83     UCOMI,
84 
85     /// X86 bit-test instructions.
86     BT,
87 
88     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
89     /// operand, usually produced by a CMP instruction.
90     SETCC,
91 
92     /// X86 Select
93     SELECTS,
94 
95     // Same as SETCC except it's materialized with a sbb and the value is all
96     // one's or all zero's.
97     SETCC_CARRY, // R = carry_bit ? ~0 : 0
98 
99     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
100     /// Operands are two FP values to compare; result is a mask of
101     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
102     FSETCC,
103 
104     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
105     /// and a version with SAE.
106     FSETCCM,
107     FSETCCM_SAE,
108 
109     /// X86 conditional moves. Operand 0 and operand 1 are the two values
110     /// to select from. Operand 2 is the condition code, and operand 3 is the
111     /// flag operand produced by a CMP or TEST instruction.
112     CMOV,
113 
114     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
115     /// is the block to branch if condition is true, operand 2 is the
116     /// condition code, and operand 3 is the flag operand produced by a CMP
117     /// or TEST instruction.
118     BRCOND,
119 
120     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
121     /// operand 1 is the target address.
122     NT_BRIND,
123 
124     /// Return with a flag operand. Operand 0 is the chain operand, operand
125     /// 1 is the number of bytes of stack to pop.
126     RET_FLAG,
127 
128     /// Return from interrupt. Operand 0 is the number of bytes to pop.
129     IRET,
130 
131     /// Repeat fill, corresponds to X86::REP_STOSx.
132     REP_STOS,
133 
134     /// Repeat move, corresponds to X86::REP_MOVSx.
135     REP_MOVS,
136 
137     /// On Darwin, this node represents the result of the popl
138     /// at function entry, used for PIC code.
139     GlobalBaseReg,
140 
141     /// A wrapper node for TargetConstantPool, TargetJumpTable,
142     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
143     /// MCSymbol and TargetBlockAddress.
144     Wrapper,
145 
146     /// Special wrapper used under X86-64 PIC mode for RIP
147     /// relative displacements.
148     WrapperRIP,
149 
150     /// Copies a 64-bit value from an MMX vector to the low word
151     /// of an XMM vector, with the high word zero filled.
152     MOVQ2DQ,
153 
154     /// Copies a 64-bit value from the low word of an XMM vector
155     /// to an MMX vector.
156     MOVDQ2Q,
157 
158     /// Copies a 32-bit value from the low word of a MMX
159     /// vector to a GPR.
160     MMX_MOVD2W,
161 
162     /// Copies a GPR into the low 32-bit word of a MMX vector
163     /// and zero out the high word.
164     MMX_MOVW2D,
165 
166     /// Extract an 8-bit value from a vector and zero extend it to
167     /// i32, corresponds to X86::PEXTRB.
168     PEXTRB,
169 
170     /// Extract a 16-bit value from a vector and zero extend it to
171     /// i32, corresponds to X86::PEXTRW.
172     PEXTRW,
173 
174     /// Insert any element of a 4 x float vector into any element
175     /// of a destination 4 x floatvector.
176     INSERTPS,
177 
178     /// Insert the lower 8-bits of a 32-bit value to a vector,
179     /// corresponds to X86::PINSRB.
180     PINSRB,
181 
182     /// Insert the lower 16-bits of a 32-bit value to a vector,
183     /// corresponds to X86::PINSRW.
184     PINSRW,
185 
186     /// Shuffle 16 8-bit values within a vector.
187     PSHUFB,
188 
189     /// Compute Sum of Absolute Differences.
190     PSADBW,
191     /// Compute Double Block Packed Sum-Absolute-Differences
192     DBPSADBW,
193 
194     /// Bitwise Logical AND NOT of Packed FP values.
195     ANDNP,
196 
197     /// Blend where the selector is an immediate.
198     BLENDI,
199 
200     /// Dynamic (non-constant condition) vector blend where only the sign bits
201     /// of the condition elements are used. This is used to enforce that the
202     /// condition mask is not valid for generic VSELECT optimizations. This
203     /// is also used to implement the intrinsics.
204     /// Operands are in VSELECT order: MASK, TRUE, FALSE
205     BLENDV,
206 
207     /// Combined add and sub on an FP vector.
208     ADDSUB,
209 
210     //  FP vector ops with rounding mode.
211     FADD_RND,
212     FADDS,
213     FADDS_RND,
214     FSUB_RND,
215     FSUBS,
216     FSUBS_RND,
217     FMUL_RND,
218     FMULS,
219     FMULS_RND,
220     FDIV_RND,
221     FDIVS,
222     FDIVS_RND,
223     FMAX_SAE,
224     FMAXS_SAE,
225     FMIN_SAE,
226     FMINS_SAE,
227     FSQRT_RND,
228     FSQRTS,
229     FSQRTS_RND,
230 
231     // FP vector get exponent.
232     FGETEXP,
233     FGETEXP_SAE,
234     FGETEXPS,
235     FGETEXPS_SAE,
236     // Extract Normalized Mantissas.
237     VGETMANT,
238     VGETMANT_SAE,
239     VGETMANTS,
240     VGETMANTS_SAE,
241     // FP Scale.
242     SCALEF,
243     SCALEF_RND,
244     SCALEFS,
245     SCALEFS_RND,
246 
247     // Unsigned Integer average.
248     AVG,
249 
250     /// Integer horizontal add/sub.
251     HADD,
252     HSUB,
253 
254     /// Floating point horizontal add/sub.
255     FHADD,
256     FHSUB,
257 
258     // Detect Conflicts Within a Vector
259     CONFLICT,
260 
261     /// Floating point max and min.
262     FMAX,
263     FMIN,
264 
265     /// Commutative FMIN and FMAX.
266     FMAXC,
267     FMINC,
268 
269     /// Scalar intrinsic floating point max and min.
270     FMAXS,
271     FMINS,
272 
273     /// Floating point reciprocal-sqrt and reciprocal approximation.
274     /// Note that these typically require refinement
275     /// in order to obtain suitable precision.
276     FRSQRT,
277     FRCP,
278 
279     // AVX-512 reciprocal approximations with a little more precision.
280     RSQRT14,
281     RSQRT14S,
282     RCP14,
283     RCP14S,
284 
285     // Thread Local Storage.
286     TLSADDR,
287 
288     // Thread Local Storage. A call to get the start address
289     // of the TLS block for the current module.
290     TLSBASEADDR,
291 
292     // Thread Local Storage.  When calling to an OS provided
293     // thunk at the address from an earlier relocation.
294     TLSCALL,
295 
296     // Exception Handling helpers.
297     EH_RETURN,
298 
299     // SjLj exception handling setjmp.
300     EH_SJLJ_SETJMP,
301 
302     // SjLj exception handling longjmp.
303     EH_SJLJ_LONGJMP,
304 
305     // SjLj exception handling dispatch.
306     EH_SJLJ_SETUP_DISPATCH,
307 
308     /// Tail call return. See X86TargetLowering::LowerCall for
309     /// the list of operands.
310     TC_RETURN,
311 
312     // Vector move to low scalar and zero higher vector elements.
313     VZEXT_MOVL,
314 
315     // Vector integer truncate.
316     VTRUNC,
317     // Vector integer truncate with unsigned/signed saturation.
318     VTRUNCUS,
319     VTRUNCS,
320 
321     // Masked version of the above. Used when less than a 128-bit result is
322     // produced since the mask only applies to the lower elements and can't
323     // be represented by a select.
324     // SRC, PASSTHRU, MASK
325     VMTRUNC,
326     VMTRUNCUS,
327     VMTRUNCS,
328 
329     // Vector FP extend.
330     VFPEXT,
331     VFPEXT_SAE,
332     VFPEXTS,
333     VFPEXTS_SAE,
334 
335     // Vector FP round.
336     VFPROUND,
337     VFPROUND_RND,
338     VFPROUNDS,
339     VFPROUNDS_RND,
340 
341     // Masked version of above. Used for v2f64->v4f32.
342     // SRC, PASSTHRU, MASK
343     VMFPROUND,
344 
345     // 128-bit vector logical left / right shift
346     VSHLDQ,
347     VSRLDQ,
348 
349     // Vector shift elements
350     VSHL,
351     VSRL,
352     VSRA,
353 
354     // Vector variable shift
355     VSHLV,
356     VSRLV,
357     VSRAV,
358 
359     // Vector shift elements by immediate
360     VSHLI,
361     VSRLI,
362     VSRAI,
363 
364     // Shifts of mask registers.
365     KSHIFTL,
366     KSHIFTR,
367 
368     // Bit rotate by immediate
369     VROTLI,
370     VROTRI,
371 
372     // Vector packed double/float comparison.
373     CMPP,
374 
375     // Vector integer comparisons.
376     PCMPEQ,
377     PCMPGT,
378 
379     // v8i16 Horizontal minimum and position.
380     PHMINPOS,
381 
382     MULTISHIFT,
383 
384     /// Vector comparison generating mask bits for fp and
385     /// integer signed and unsigned data types.
386     CMPM,
387     // Vector comparison with SAE for FP values
388     CMPM_SAE,
389 
390     // Arithmetic operations with FLAGS results.
391     ADD,
392     SUB,
393     ADC,
394     SBB,
395     SMUL,
396     UMUL,
397     OR,
398     XOR,
399     AND,
400 
401     // Bit field extract.
402     BEXTR,
403 
404     // Zero High Bits Starting with Specified Bit Position.
405     BZHI,
406 
407     // Parallel extract and deposit.
408     PDEP,
409     PEXT,
410 
411     // X86-specific multiply by immediate.
412     MUL_IMM,
413 
414     // Vector sign bit extraction.
415     MOVMSK,
416 
417     // Vector bitwise comparisons.
418     PTEST,
419 
420     // Vector packed fp sign bitwise comparisons.
421     TESTP,
422 
423     // OR/AND test for masks.
424     KORTEST,
425     KTEST,
426 
427     // ADD for masks.
428     KADD,
429 
430     // Several flavors of instructions with vector shuffle behaviors.
431     // Saturated signed/unnsigned packing.
432     PACKSS,
433     PACKUS,
434     // Intra-lane alignr.
435     PALIGNR,
436     // AVX512 inter-lane alignr.
437     VALIGN,
438     PSHUFD,
439     PSHUFHW,
440     PSHUFLW,
441     SHUFP,
442     // VBMI2 Concat & Shift.
443     VSHLD,
444     VSHRD,
445     VSHLDV,
446     VSHRDV,
447     // Shuffle Packed Values at 128-bit granularity.
448     SHUF128,
449     MOVDDUP,
450     MOVSHDUP,
451     MOVSLDUP,
452     MOVLHPS,
453     MOVHLPS,
454     MOVSD,
455     MOVSS,
456     UNPCKL,
457     UNPCKH,
458     VPERMILPV,
459     VPERMILPI,
460     VPERMI,
461     VPERM2X128,
462 
463     // Variable Permute (VPERM).
464     // Res = VPERMV MaskV, V0
465     VPERMV,
466 
467     // 3-op Variable Permute (VPERMT2).
468     // Res = VPERMV3 V0, MaskV, V1
469     VPERMV3,
470 
471     // Bitwise ternary logic.
472     VPTERNLOG,
473     // Fix Up Special Packed Float32/64 values.
474     VFIXUPIMM,
475     VFIXUPIMM_SAE,
476     VFIXUPIMMS,
477     VFIXUPIMMS_SAE,
478     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
479     VRANGE,
480     VRANGE_SAE,
481     VRANGES,
482     VRANGES_SAE,
483     // Reduce - Perform Reduction Transformation on scalar\packed FP.
484     VREDUCE,
485     VREDUCE_SAE,
486     VREDUCES,
487     VREDUCES_SAE,
488     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
489     // Also used by the legacy (V)ROUND intrinsics where we mask out the
490     // scaling part of the immediate.
491     VRNDSCALE,
492     VRNDSCALE_SAE,
493     VRNDSCALES,
494     VRNDSCALES_SAE,
495     // Tests Types Of a FP Values for packed types.
496     VFPCLASS,
497     // Tests Types Of a FP Values for scalar types.
498     VFPCLASSS,
499 
500     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
501     // a vector, this node may change the vector length as part of the splat.
502     VBROADCAST,
503     // Broadcast mask to vector.
504     VBROADCASTM,
505     // Broadcast subvector to vector.
506     SUBV_BROADCAST,
507 
508     /// SSE4A Extraction and Insertion.
509     EXTRQI,
510     INSERTQI,
511 
512     // XOP arithmetic/logical shifts.
513     VPSHA,
514     VPSHL,
515     // XOP signed/unsigned integer comparisons.
516     VPCOM,
517     VPCOMU,
518     // XOP packed permute bytes.
519     VPPERM,
520     // XOP two source permutation.
521     VPERMIL2,
522 
523     // Vector multiply packed unsigned doubleword integers.
524     PMULUDQ,
525     // Vector multiply packed signed doubleword integers.
526     PMULDQ,
527     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
528     MULHRS,
529 
530     // Multiply and Add Packed Integers.
531     VPMADDUBSW,
532     VPMADDWD,
533 
534     // AVX512IFMA multiply and add.
535     // NOTE: These are different than the instruction and perform
536     // op0 x op1 + op2.
537     VPMADD52L,
538     VPMADD52H,
539 
540     // VNNI
541     VPDPBUSD,
542     VPDPBUSDS,
543     VPDPWSSD,
544     VPDPWSSDS,
545 
546     // FMA nodes.
547     // We use the target independent ISD::FMA for the non-inverted case.
548     FNMADD,
549     FMSUB,
550     FNMSUB,
551     FMADDSUB,
552     FMSUBADD,
553 
554     // FMA with rounding mode.
555     FMADD_RND,
556     FNMADD_RND,
557     FMSUB_RND,
558     FNMSUB_RND,
559     FMADDSUB_RND,
560     FMSUBADD_RND,
561 
562     // Compress and expand.
563     COMPRESS,
564     EXPAND,
565 
566     // Bits shuffle
567     VPSHUFBITQMB,
568 
569     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
570     SINT_TO_FP_RND,
571     UINT_TO_FP_RND,
572     SCALAR_SINT_TO_FP,
573     SCALAR_UINT_TO_FP,
574     SCALAR_SINT_TO_FP_RND,
575     SCALAR_UINT_TO_FP_RND,
576 
577     // Vector float/double to signed/unsigned integer.
578     CVTP2SI,
579     CVTP2UI,
580     CVTP2SI_RND,
581     CVTP2UI_RND,
582     // Scalar float/double to signed/unsigned integer.
583     CVTS2SI,
584     CVTS2UI,
585     CVTS2SI_RND,
586     CVTS2UI_RND,
587 
588     // Vector float/double to signed/unsigned integer with truncation.
589     CVTTP2SI,
590     CVTTP2UI,
591     CVTTP2SI_SAE,
592     CVTTP2UI_SAE,
593     // Scalar float/double to signed/unsigned integer with truncation.
594     CVTTS2SI,
595     CVTTS2UI,
596     CVTTS2SI_SAE,
597     CVTTS2UI_SAE,
598 
599     // Vector signed/unsigned integer to float/double.
600     CVTSI2P,
601     CVTUI2P,
602 
603     // Masked versions of above. Used for v2f64->v4f32.
604     // SRC, PASSTHRU, MASK
605     MCVTP2SI,
606     MCVTP2UI,
607     MCVTTP2SI,
608     MCVTTP2UI,
609     MCVTSI2P,
610     MCVTUI2P,
611 
612     // Vector float to bfloat16.
613     // Convert TWO packed single data to one packed BF16 data
614     CVTNE2PS2BF16,
615     // Convert packed single data to packed BF16 data
616     CVTNEPS2BF16,
617     // Masked version of above.
618     // SRC, PASSTHRU, MASK
619     MCVTNEPS2BF16,
620 
621     // Dot product of BF16 pairs to accumulated into
622     // packed single precision.
623     DPBF16PS,
624 
625     // Save xmm argument registers to the stack, according to %al. An operator
626     // is needed so that this can be expanded with control flow.
627     VASTART_SAVE_XMM_REGS,
628 
629     // Windows's _chkstk call to do stack probing.
630     WIN_ALLOCA,
631 
632     // For allocating variable amounts of stack space when using
633     // segmented stacks. Check if the current stacklet has enough space, and
634     // falls back to heap allocation if not.
635     SEG_ALLOCA,
636 
637     // For allocating stack space when using stack clash protector.
638     // Allocation is performed by block, and each block is probed.
639     PROBED_ALLOCA,
640 
641     // Memory barriers.
642     MEMBARRIER,
643     MFENCE,
644 
645     // Get a random integer and indicate whether it is valid in CF.
646     RDRAND,
647 
648     // Get a NIST SP800-90B & C compliant random integer and
649     // indicate whether it is valid in CF.
650     RDSEED,
651 
652     // Protection keys
653     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
654     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
655     // value for ECX.
656     RDPKRU,
657     WRPKRU,
658 
659     // SSE42 string comparisons.
660     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
661     // will emit one or two instructions based on which results are used. If
662     // flags and index/mask this allows us to use a single instruction since
663     // we won't have to pick and opcode for flags. Instead we can rely on the
664     // DAG to CSE everything and decide at isel.
665     PCMPISTR,
666     PCMPESTR,
667 
668     // Test if in transactional execution.
669     XTEST,
670 
671     // ERI instructions.
672     RSQRT28,
673     RSQRT28_SAE,
674     RSQRT28S,
675     RSQRT28S_SAE,
676     RCP28,
677     RCP28_SAE,
678     RCP28S,
679     RCP28S_SAE,
680     EXP2,
681     EXP2_SAE,
682 
683     // Conversions between float and half-float.
684     CVTPS2PH,
685     CVTPH2PS,
686     CVTPH2PS_SAE,
687 
688     // Masked version of above.
689     // SRC, RND, PASSTHRU, MASK
690     MCVTPS2PH,
691 
692     // Galois Field Arithmetic Instructions
693     GF2P8AFFINEINVQB,
694     GF2P8AFFINEQB,
695     GF2P8MULB,
696 
697     // LWP insert record.
698     LWPINS,
699 
700     // User level wait
701     UMWAIT,
702     TPAUSE,
703 
704     // Enqueue Stores Instructions
705     ENQCMD,
706     ENQCMDS,
707 
708     // For avx512-vp2intersect
709     VP2INTERSECT,
710 
711     /// X86 strict FP compare instructions.
712     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
713     STRICT_FCMPS,
714 
715     // Vector packed double/float comparison.
716     STRICT_CMPP,
717 
718     /// Vector comparison generating mask bits for fp and
719     /// integer signed and unsigned data types.
720     STRICT_CMPM,
721 
722     // Vector float/double to signed/unsigned integer with truncation.
723     STRICT_CVTTP2SI,
724     STRICT_CVTTP2UI,
725 
726     // Vector FP extend.
727     STRICT_VFPEXT,
728 
729     // Vector FP round.
730     STRICT_VFPROUND,
731 
732     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
733     // Also used by the legacy (V)ROUND intrinsics where we mask out the
734     // scaling part of the immediate.
735     STRICT_VRNDSCALE,
736 
737     // Vector signed/unsigned integer to float/double.
738     STRICT_CVTSI2P,
739     STRICT_CVTUI2P,
740 
741     // Strict FMA nodes.
742     STRICT_FNMADD,
743     STRICT_FMSUB,
744     STRICT_FNMSUB,
745 
746     // Conversions between float and half-float.
747     STRICT_CVTPS2PH,
748     STRICT_CVTPH2PS,
749 
750     // Compare and swap.
751     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
752     LCMPXCHG8_DAG,
753     LCMPXCHG16_DAG,
754     LCMPXCHG8_SAVE_EBX_DAG,
755     LCMPXCHG16_SAVE_RBX_DAG,
756 
757     /// LOCK-prefixed arithmetic read-modify-write instructions.
758     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
759     LADD,
760     LSUB,
761     LOR,
762     LXOR,
763     LAND,
764 
765     // Load, scalar_to_vector, and zero extend.
766     VZEXT_LOAD,
767 
768     // extract_vector_elt, store.
769     VEXTRACT_STORE,
770 
771     // scalar broadcast from memory
772     VBROADCAST_LOAD,
773 
774     // Store FP control world into i16 memory.
775     FNSTCW16m,
776 
777     /// This instruction implements FP_TO_SINT with the
778     /// integer destination in memory and a FP reg source.  This corresponds
779     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
780     /// has two inputs (token chain and address) and two outputs (int value
781     /// and token chain). Memory VT specifies the type to store to.
782     FP_TO_INT_IN_MEM,
783 
784     /// This instruction implements SINT_TO_FP with the
785     /// integer source in memory and FP reg result.  This corresponds to the
786     /// X86::FILD*m instructions. It has two inputs (token chain and address)
787     /// and two outputs (FP value and token chain). The integer source type is
788     /// specified by the memory VT.
789     FILD,
790 
791     /// This instruction implements a fp->int store from FP stack
792     /// slots. This corresponds to the fist instruction. It takes a
793     /// chain operand, value to store, address, and glue. The memory VT
794     /// specifies the type to store as.
795     FIST,
796 
797     /// This instruction implements an extending load to FP stack slots.
798     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
799     /// operand, and ptr to load from. The memory VT specifies the type to
800     /// load from.
801     FLD,
802 
803     /// This instruction implements a truncating store from FP stack
804     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
805     /// chain operand, value to store, address, and glue. The memory VT
806     /// specifies the type to store as.
807     FST,
808 
809     /// This instruction grabs the address of the next argument
810     /// from a va_list. (reads and modifies the va_list in memory)
811     VAARG_64,
812 
813     // Vector truncating store with unsigned/signed saturation
814     VTRUNCSTOREUS,
815     VTRUNCSTORES,
816     // Vector truncating masked store with unsigned/signed saturation
817     VMTRUNCSTOREUS,
818     VMTRUNCSTORES,
819 
820     // X86 specific gather and scatter
821     MGATHER,
822     MSCATTER,
823 
824     // WARNING: Do not add anything in the end unless you want the node to
825     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
826     // opcodes will be thought as target memory ops!
827   };
828   } // end namespace X86ISD
829 
830   /// Define some predicates that are used for node matching.
831   namespace X86 {
832     /// Returns true if Elt is a constant zero or floating point constant +0.0.
833     bool isZeroNode(SDValue Elt);
834 
835     /// Returns true of the given offset can be
836     /// fit into displacement field of the instruction.
837     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
838                                       bool hasSymbolicDisplacement = true);
839 
840     /// Determines whether the callee is required to pop its
841     /// own arguments. Callee pop is necessary to support tail calls.
842     bool isCalleePop(CallingConv::ID CallingConv,
843                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
844 
845     /// If Op is a constant whose elements are all the same constant or
846     /// undefined, return true and return the constant value in \p SplatVal.
847     /// If we have undef bits that don't cover an entire element, we treat these
848     /// as zero if AllowPartialUndefs is set, else we fail and return false.
849     bool isConstantSplat(SDValue Op, APInt &SplatVal,
850                          bool AllowPartialUndefs = true);
851   } // end namespace X86
852 
853   //===--------------------------------------------------------------------===//
854   //  X86 Implementation of the TargetLowering interface
855   class X86TargetLowering final : public TargetLowering {
856   public:
857     explicit X86TargetLowering(const X86TargetMachine &TM,
858                                const X86Subtarget &STI);
859 
860     unsigned getJumpTableEncoding() const override;
861     bool useSoftFloat() const override;
862 
863     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
864                                ArgListTy &Args) const override;
865 
866     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
867       return MVT::i8;
868     }
869 
870     const MCExpr *
871     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
872                               const MachineBasicBlock *MBB, unsigned uid,
873                               MCContext &Ctx) const override;
874 
875     /// Returns relocation base for the given PIC jumptable.
876     SDValue getPICJumpTableRelocBase(SDValue Table,
877                                      SelectionDAG &DAG) const override;
878     const MCExpr *
879     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
880                                  unsigned JTI, MCContext &Ctx) const override;
881 
882     /// Return the desired alignment for ByVal aggregate
883     /// function arguments in the caller parameter area. For X86, aggregates
884     /// that contains are placed at 16-byte boundaries while the rest are at
885     /// 4-byte boundaries.
886     unsigned getByValTypeAlignment(Type *Ty,
887                                    const DataLayout &DL) const override;
888 
889     EVT getOptimalMemOpType(const MemOp &Op,
890                             const AttributeList &FuncAttributes) const override;
891 
892     /// Returns true if it's safe to use load / store of the
893     /// specified type to expand memcpy / memset inline. This is mostly true
894     /// for all types except for some special cases. For example, on X86
895     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
896     /// also does type conversion. Note the specified type doesn't have to be
897     /// legal as the hook is used before type legalization.
898     bool isSafeMemOpType(MVT VT) const override;
899 
900     /// Returns true if the target allows unaligned memory accesses of the
901     /// specified type. Returns whether it is "fast" in the last argument.
902     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
903                                         MachineMemOperand::Flags Flags,
904                                         bool *Fast) const override;
905 
906     /// Provide custom lowering hooks for some operations.
907     ///
908     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
909 
910     /// Places new result values for the node in Results (their number
911     /// and types must exactly match those of the original return values of
912     /// the node), or leaves Results empty, which indicates that the node is not
913     /// to be custom lowered after all.
914     void LowerOperationWrapper(SDNode *N,
915                                SmallVectorImpl<SDValue> &Results,
916                                SelectionDAG &DAG) const override;
917 
918     /// Replace the results of node with an illegal result
919     /// type with new values built out of custom code.
920     ///
921     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
922                             SelectionDAG &DAG) const override;
923 
924     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
925 
926     /// Return true if the target has native support for
927     /// the specified value type and it is 'desirable' to use the type for the
928     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
929     /// instruction encodings are longer and some i16 instructions are slow.
930     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
931 
932     /// Return true if the target has native support for the
933     /// specified value type and it is 'desirable' to use the type. e.g. On x86
934     /// i16 is legal, but undesirable since i16 instruction encodings are longer
935     /// and some i16 instructions are slow.
936     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
937 
938     /// Return the newly negated expression if the cost is not expensive and
939     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
940     /// do the negation.
941     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
942                                  bool LegalOperations, bool ForCodeSize,
943                                  NegatibleCost &Cost,
944                                  unsigned Depth) const override;
945 
946     MachineBasicBlock *
947     EmitInstrWithCustomInserter(MachineInstr &MI,
948                                 MachineBasicBlock *MBB) const override;
949 
950     /// This method returns the name of a target specific DAG node.
951     const char *getTargetNodeName(unsigned Opcode) const override;
952 
953     /// Do not merge vector stores after legalization because that may conflict
954     /// with x86-specific store splitting optimizations.
955     bool mergeStoresAfterLegalization(EVT MemVT) const override {
956       return !MemVT.isVector();
957     }
958 
959     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
960                           const SelectionDAG &DAG) const override;
961 
962     bool isCheapToSpeculateCttz() const override;
963 
964     bool isCheapToSpeculateCtlz() const override;
965 
966     bool isCtlzFast() const override;
967 
968     bool hasBitPreservingFPLogic(EVT VT) const override {
969       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
970     }
971 
972     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
973       // If the pair to store is a mixture of float and int values, we will
974       // save two bitwise instructions and one float-to-int instruction and
975       // increase one store instruction. There is potentially a more
976       // significant benefit because it avoids the float->int domain switch
977       // for input value. So It is more likely a win.
978       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
979           (LTy.isInteger() && HTy.isFloatingPoint()))
980         return true;
981       // If the pair only contains int values, we will save two bitwise
982       // instructions and increase one store instruction (costing one more
983       // store buffer). Since the benefit is more blurred so we leave
984       // such pair out until we get testcase to prove it is a win.
985       return false;
986     }
987 
988     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
989 
990     bool hasAndNotCompare(SDValue Y) const override;
991 
992     bool hasAndNot(SDValue Y) const override;
993 
994     bool hasBitTest(SDValue X, SDValue Y) const override;
995 
996     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
997         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
998         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
999         SelectionDAG &DAG) const override;
1000 
1001     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1002                                            CombineLevel Level) const override;
1003 
1004     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1005 
1006     bool
1007     shouldTransformSignedTruncationCheck(EVT XVT,
1008                                          unsigned KeptBits) const override {
1009       // For vectors, we don't have a preference..
1010       if (XVT.isVector())
1011         return false;
1012 
1013       auto VTIsOk = [](EVT VT) -> bool {
1014         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1015                VT == MVT::i64;
1016       };
1017 
1018       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1019       // XVT will be larger than KeptBitsVT.
1020       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1021       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1022     }
1023 
1024     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1025 
1026     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1027 
1028     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1029       return VT.isScalarInteger();
1030     }
1031 
1032     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1033     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1034 
1035     /// Return the value type to use for ISD::SETCC.
1036     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1037                            EVT VT) const override;
1038 
1039     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1040                                       const APInt &DemandedElts,
1041                                       TargetLoweringOpt &TLO) const override;
1042 
1043     /// Determine which of the bits specified in Mask are known to be either
1044     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1045     void computeKnownBitsForTargetNode(const SDValue Op,
1046                                        KnownBits &Known,
1047                                        const APInt &DemandedElts,
1048                                        const SelectionDAG &DAG,
1049                                        unsigned Depth = 0) const override;
1050 
1051     /// Determine the number of bits in the operation that are sign bits.
1052     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1053                                              const APInt &DemandedElts,
1054                                              const SelectionDAG &DAG,
1055                                              unsigned Depth) const override;
1056 
1057     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1058                                                  const APInt &DemandedElts,
1059                                                  APInt &KnownUndef,
1060                                                  APInt &KnownZero,
1061                                                  TargetLoweringOpt &TLO,
1062                                                  unsigned Depth) const override;
1063 
1064     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1065                                                     const APInt &DemandedElts,
1066                                                     unsigned MaskIndex,
1067                                                     TargetLoweringOpt &TLO,
1068                                                     unsigned Depth) const;
1069 
1070     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1071                                            const APInt &DemandedBits,
1072                                            const APInt &DemandedElts,
1073                                            KnownBits &Known,
1074                                            TargetLoweringOpt &TLO,
1075                                            unsigned Depth) const override;
1076 
1077     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1078         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1079         SelectionDAG &DAG, unsigned Depth) const override;
1080 
1081     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1082 
1083     SDValue unwrapAddress(SDValue N) const override;
1084 
1085     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1086 
1087     bool ExpandInlineAsm(CallInst *CI) const override;
1088 
1089     ConstraintType getConstraintType(StringRef Constraint) const override;
1090 
1091     /// Examine constraint string and operand type and determine a weight value.
1092     /// The operand object must already have been set up with the operand type.
1093     ConstraintWeight
1094       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1095                                      const char *constraint) const override;
1096 
1097     const char *LowerXConstraint(EVT ConstraintVT) const override;
1098 
1099     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1100     /// add anything to Ops. If hasMemory is true it means one of the asm
1101     /// constraint of the inline asm instruction being processed is 'm'.
1102     void LowerAsmOperandForConstraint(SDValue Op,
1103                                       std::string &Constraint,
1104                                       std::vector<SDValue> &Ops,
1105                                       SelectionDAG &DAG) const override;
1106 
1107     unsigned
1108     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1109       if (ConstraintCode == "o")
1110         return InlineAsm::Constraint_o;
1111       else if (ConstraintCode == "v")
1112         return InlineAsm::Constraint_v;
1113       else if (ConstraintCode == "X")
1114         return InlineAsm::Constraint_X;
1115       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1116     }
1117 
1118     /// Handle Lowering flag assembly outputs.
1119     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
1120                                         const AsmOperandInfo &Constraint,
1121                                         SelectionDAG &DAG) const override;
1122 
1123     /// Given a physical register constraint
1124     /// (e.g. {edx}), return the register number and the register class for the
1125     /// register.  This should only be used for C_Register constraints.  On
1126     /// error, this returns a register number of 0.
1127     std::pair<unsigned, const TargetRegisterClass *>
1128     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1129                                  StringRef Constraint, MVT VT) const override;
1130 
1131     /// Return true if the addressing mode represented
1132     /// by AM is legal for this target, for a load/store of the specified type.
1133     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1134                                Type *Ty, unsigned AS,
1135                                Instruction *I = nullptr) const override;
1136 
1137     /// Return true if the specified immediate is legal
1138     /// icmp immediate, that is the target has icmp instructions which can
1139     /// compare a register against the immediate without having to materialize
1140     /// the immediate into a register.
1141     bool isLegalICmpImmediate(int64_t Imm) const override;
1142 
1143     /// Return true if the specified immediate is legal
1144     /// add immediate, that is the target has add instructions which can
1145     /// add a register and the immediate without having to materialize
1146     /// the immediate into a register.
1147     bool isLegalAddImmediate(int64_t Imm) const override;
1148 
1149     bool isLegalStoreImmediate(int64_t Imm) const override;
1150 
1151     /// Return the cost of the scaling factor used in the addressing
1152     /// mode represented by AM for this target, for a load/store
1153     /// of the specified type.
1154     /// If the AM is supported, the return value must be >= 0.
1155     /// If the AM is not supported, it returns a negative value.
1156     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1157                              unsigned AS) const override;
1158 
1159     /// This is used to enable splatted operand transforms for vector shifts
1160     /// and vector funnel shifts.
1161     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1162 
1163     /// Add x86-specific opcodes to the default list.
1164     bool isBinOp(unsigned Opcode) const override;
1165 
1166     /// Returns true if the opcode is a commutative binary operation.
1167     bool isCommutativeBinOp(unsigned Opcode) const override;
1168 
1169     /// Return true if it's free to truncate a value of
1170     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1171     /// register EAX to i16 by referencing its sub-register AX.
1172     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1173     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1174 
1175     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1176 
1177     /// Return true if any actual instruction that defines a
1178     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1179     /// register. This does not necessarily include registers defined in
1180     /// unknown ways, such as incoming arguments, or copies from unknown
1181     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1182     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1183     /// all instructions that define 32-bit values implicit zero-extend the
1184     /// result out to 64 bits.
1185     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1186     bool isZExtFree(EVT VT1, EVT VT2) const override;
1187     bool isZExtFree(SDValue Val, EVT VT2) const override;
1188 
1189     bool shouldSinkOperands(Instruction *I,
1190                             SmallVectorImpl<Use *> &Ops) const override;
1191     bool shouldConvertPhiType(Type *From, Type *To) const override;
1192 
1193     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1194     /// extend node) is profitable.
1195     bool isVectorLoadExtDesirable(SDValue) const override;
1196 
1197     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1198     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1199     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1200     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1201                                     EVT VT) const override;
1202 
1203     /// Return true if it's profitable to narrow
1204     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1205     /// from i32 to i8 but not from i32 to i16.
1206     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1207 
1208     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1209     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1210     /// true and stores the intrinsic information into the IntrinsicInfo that was
1211     /// passed to the function.
1212     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1213                             MachineFunction &MF,
1214                             unsigned Intrinsic) const override;
1215 
1216     /// Returns true if the target can instruction select the
1217     /// specified FP immediate natively. If false, the legalizer will
1218     /// materialize the FP immediate as a load from a constant pool.
1219     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1220                       bool ForCodeSize) const override;
1221 
1222     /// Targets can use this to indicate that they only support *some*
1223     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1224     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1225     /// be legal.
1226     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1227 
1228     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1229     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1230     /// constant pool entry.
1231     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1232 
1233     /// Returns true if lowering to a jump table is allowed.
1234     bool areJTsAllowed(const Function *Fn) const override;
1235 
1236     /// If true, then instruction selection should
1237     /// seek to shrink the FP constant of the specified type to a smaller type
1238     /// in order to save space and / or reduce runtime.
1239     bool ShouldShrinkFPConstant(EVT VT) const override {
1240       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1241       // expensive than a straight movsd. On the other hand, it's important to
1242       // shrink long double fp constant since fldt is very slow.
1243       return !X86ScalarSSEf64 || VT == MVT::f80;
1244     }
1245 
1246     /// Return true if we believe it is correct and profitable to reduce the
1247     /// load node to a smaller type.
1248     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1249                                EVT NewVT) const override;
1250 
1251     /// Return true if the specified scalar FP type is computed in an SSE
1252     /// register, not on the X87 floating point stack.
1253     bool isScalarFPTypeInSSEReg(EVT VT) const {
1254       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1255              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1256     }
1257 
1258     /// Returns true if it is beneficial to convert a load of a constant
1259     /// to just the constant itself.
1260     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1261                                            Type *Ty) const override;
1262 
1263     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1264 
1265     bool convertSelectOfConstantsToMath(EVT VT) const override;
1266 
1267     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1268                                 SDValue C) const override;
1269 
1270     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1271     /// with this index.
1272     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1273                                  unsigned Index) const override;
1274 
1275     /// Scalar ops always have equal or better analysis/performance/power than
1276     /// the vector equivalent, so this always makes sense if the scalar op is
1277     /// supported.
1278     bool shouldScalarizeBinop(SDValue) const override;
1279 
1280     /// Extract of a scalar FP value from index 0 of a vector is free.
1281     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1282       EVT EltVT = VT.getScalarType();
1283       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1284     }
1285 
1286     /// Overflow nodes should get combined/lowered to optimal instructions
1287     /// (they should allow eliminating explicit compares by getting flags from
1288     /// math ops).
1289     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1290                               bool MathUsed) const override;
1291 
1292     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1293                                       unsigned AddrSpace) const override {
1294       // If we can replace more than 2 scalar stores, there will be a reduction
1295       // in instructions even after we add a vector constant load.
1296       return NumElem > 2;
1297     }
1298 
1299     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1300                                  const SelectionDAG &DAG,
1301                                  const MachineMemOperand &MMO) const override;
1302 
1303     /// Intel processors have a unified instruction and data cache
1304     const char * getClearCacheBuiltinName() const override {
1305       return nullptr; // nothing to do, move along.
1306     }
1307 
1308     Register getRegisterByName(const char* RegName, LLT VT,
1309                                const MachineFunction &MF) const override;
1310 
1311     /// If a physical register, this returns the register that receives the
1312     /// exception address on entry to an EH pad.
1313     Register
1314     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1315 
1316     /// If a physical register, this returns the register that receives the
1317     /// exception typeid on entry to a landing pad.
1318     Register
1319     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1320 
1321     virtual bool needsFixedCatchObjects() const override;
1322 
1323     /// This method returns a target specific FastISel object,
1324     /// or null if the target does not support "fast" ISel.
1325     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1326                              const TargetLibraryInfo *libInfo) const override;
1327 
1328     /// If the target has a standard location for the stack protector cookie,
1329     /// returns the address of that location. Otherwise, returns nullptr.
1330     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1331 
1332     bool useLoadStackGuardNode() const override;
1333     bool useStackGuardXorFP() const override;
1334     void insertSSPDeclarations(Module &M) const override;
1335     Value *getSDagStackGuard(const Module &M) const override;
1336     Function *getSSPStackGuardCheck(const Module &M) const override;
1337     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1338                                 const SDLoc &DL) const override;
1339 
1340 
1341     /// Return true if the target stores SafeStack pointer at a fixed offset in
1342     /// some non-standard address space, and populates the address space and
1343     /// offset as appropriate.
1344     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1345 
1346     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1347                                           SDValue Chain, SDValue Pointer,
1348                                           MachinePointerInfo PtrInfo,
1349                                           Align Alignment,
1350                                           SelectionDAG &DAG) const;
1351 
1352     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1353 
1354     /// Customize the preferred legalization strategy for certain types.
1355     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1356 
1357     bool softPromoteHalfType() const override { return true; }
1358 
1359     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1360                                       EVT VT) const override;
1361 
1362     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1363                                            CallingConv::ID CC,
1364                                            EVT VT) const override;
1365 
1366     unsigned getVectorTypeBreakdownForCallingConv(
1367         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1368         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1369 
1370     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1371 
1372     bool supportSwiftError() const override;
1373 
1374     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1375     bool hasInlineStackProbe(MachineFunction &MF) const override;
1376     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1377 
1378     unsigned getStackProbeSize(MachineFunction &MF) const;
1379 
1380     bool hasVectorBlend() const override { return true; }
1381 
1382     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1383 
1384     /// Lower interleaved load(s) into target specific
1385     /// instructions/intrinsics.
1386     bool lowerInterleavedLoad(LoadInst *LI,
1387                               ArrayRef<ShuffleVectorInst *> Shuffles,
1388                               ArrayRef<unsigned> Indices,
1389                               unsigned Factor) const override;
1390 
1391     /// Lower interleaved store(s) into target specific
1392     /// instructions/intrinsics.
1393     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1394                                unsigned Factor) const override;
1395 
1396     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1397                                    SDValue Addr, SelectionDAG &DAG)
1398                                    const override;
1399 
1400   protected:
1401     std::pair<const TargetRegisterClass *, uint8_t>
1402     findRepresentativeClass(const TargetRegisterInfo *TRI,
1403                             MVT VT) const override;
1404 
1405   private:
1406     /// Keep a reference to the X86Subtarget around so that we can
1407     /// make the right decision when generating code for different targets.
1408     const X86Subtarget &Subtarget;
1409 
1410     /// Select between SSE or x87 floating point ops.
1411     /// When SSE is available, use it for f32 operations.
1412     /// When SSE2 is available, use it for f64 operations.
1413     bool X86ScalarSSEf32;
1414     bool X86ScalarSSEf64;
1415 
1416     /// A list of legal FP immediates.
1417     std::vector<APFloat> LegalFPImmediates;
1418 
1419     /// Indicate that this x86 target can instruction
1420     /// select the specified FP immediate natively.
1421     void addLegalFPImmediate(const APFloat& Imm) {
1422       LegalFPImmediates.push_back(Imm);
1423     }
1424 
1425     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1426                             CallingConv::ID CallConv, bool isVarArg,
1427                             const SmallVectorImpl<ISD::InputArg> &Ins,
1428                             const SDLoc &dl, SelectionDAG &DAG,
1429                             SmallVectorImpl<SDValue> &InVals,
1430                             uint32_t *RegMask) const;
1431     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1432                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1433                              const SDLoc &dl, SelectionDAG &DAG,
1434                              const CCValAssign &VA, MachineFrameInfo &MFI,
1435                              unsigned i) const;
1436     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1437                              const SDLoc &dl, SelectionDAG &DAG,
1438                              const CCValAssign &VA,
1439                              ISD::ArgFlagsTy Flags, bool isByval) const;
1440 
1441     // Call lowering helpers.
1442 
1443     /// Check whether the call is eligible for tail call optimization. Targets
1444     /// that want to do tail call optimization should implement this function.
1445     bool IsEligibleForTailCallOptimization(SDValue Callee,
1446                                            CallingConv::ID CalleeCC,
1447                                            bool isVarArg,
1448                                            bool isCalleeStructRet,
1449                                            bool isCallerStructRet,
1450                                            Type *RetTy,
1451                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1452                                     const SmallVectorImpl<SDValue> &OutVals,
1453                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1454                                            SelectionDAG& DAG) const;
1455     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1456                                     SDValue Chain, bool IsTailCall,
1457                                     bool Is64Bit, int FPDiff,
1458                                     const SDLoc &dl) const;
1459 
1460     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1461                                          SelectionDAG &DAG) const;
1462 
1463     unsigned getAddressSpace(void) const;
1464 
1465     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1466                             SDValue &Chain) const;
1467     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1468 
1469     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1470     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1471     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1472     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1473 
1474     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1475                                   const unsigned char OpFlags = 0) const;
1476     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1477     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1478     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1479     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1480     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1481 
1482     /// Creates target global address or external symbol nodes for calls or
1483     /// other uses.
1484     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1485                                   bool ForCall) const;
1486 
1487     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1488     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1489     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1490     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1491     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1492     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1493     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1494     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1495     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1496     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1497     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1498     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1499     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1500     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1501     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1502     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1503     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1504     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1505     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1506     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1507     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1508     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1509     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1510     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1511     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1512     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1513     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1514     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1515     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1516 
1517     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1518                           RTLIB::Libcall Call) const;
1519 
1520     SDValue
1521     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1522                          const SmallVectorImpl<ISD::InputArg> &Ins,
1523                          const SDLoc &dl, SelectionDAG &DAG,
1524                          SmallVectorImpl<SDValue> &InVals) const override;
1525     SDValue LowerCall(CallLoweringInfo &CLI,
1526                       SmallVectorImpl<SDValue> &InVals) const override;
1527 
1528     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1529                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1530                         const SmallVectorImpl<SDValue> &OutVals,
1531                         const SDLoc &dl, SelectionDAG &DAG) const override;
1532 
1533     bool supportSplitCSR(MachineFunction *MF) const override {
1534       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1535           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1536     }
1537     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1538     void insertCopiesSplitCSR(
1539       MachineBasicBlock *Entry,
1540       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1541 
1542     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1543 
1544     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1545 
1546     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1547                             ISD::NodeType ExtendKind) const override;
1548 
1549     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1550                         bool isVarArg,
1551                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1552                         LLVMContext &Context) const override;
1553 
1554     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1555 
1556     TargetLoweringBase::AtomicExpansionKind
1557     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1558     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1559     TargetLoweringBase::AtomicExpansionKind
1560     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1561 
1562     LoadInst *
1563     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1564 
1565     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1566     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1567 
1568     bool needsCmpXchgNb(Type *MemType) const;
1569 
1570     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1571                                 MachineBasicBlock *DispatchBB, int FI) const;
1572 
1573     // Utility function to emit the low-level va_arg code for X86-64.
1574     MachineBasicBlock *
1575     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1576                                   MachineBasicBlock *MBB) const;
1577 
1578     /// Utility function to emit the xmm reg save portion of va_start.
1579     MachineBasicBlock *
1580     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1581                                              MachineBasicBlock *BB) const;
1582 
1583     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1584                                                  MachineInstr &MI2,
1585                                                  MachineBasicBlock *BB) const;
1586 
1587     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1588                                          MachineBasicBlock *BB) const;
1589 
1590     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1591                                            MachineBasicBlock *BB) const;
1592 
1593     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1594                                             MachineBasicBlock *BB) const;
1595 
1596     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1597                                                MachineBasicBlock *BB) const;
1598 
1599     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1600                                           MachineBasicBlock *BB) const;
1601 
1602     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1603                                           MachineBasicBlock *BB) const;
1604 
1605     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1606                                                 MachineBasicBlock *BB) const;
1607 
1608     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1609                                         MachineBasicBlock *MBB) const;
1610 
1611     void emitSetJmpShadowStackFix(MachineInstr &MI,
1612                                   MachineBasicBlock *MBB) const;
1613 
1614     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1615                                          MachineBasicBlock *MBB) const;
1616 
1617     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1618                                                  MachineBasicBlock *MBB) const;
1619 
1620     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1621                                              MachineBasicBlock *MBB) const;
1622 
1623     /// Emit flags for the given setcc condition and operands. Also returns the
1624     /// corresponding X86 condition code constant in X86CC.
1625     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1626                               const SDLoc &dl, SelectionDAG &DAG,
1627                               SDValue &X86CC) const;
1628 
1629     /// Check if replacement of SQRT with RSQRT should be disabled.
1630     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1631 
1632     /// Use rsqrt* to speed up sqrt calculations.
1633     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1634                             int &RefinementSteps, bool &UseOneConstNR,
1635                             bool Reciprocal) const override;
1636 
1637     /// Use rcp* to speed up fdiv calculations.
1638     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1639                              int &RefinementSteps) const override;
1640 
1641     /// Reassociate floating point divisions into multiply by reciprocal.
1642     unsigned combineRepeatedFPDivisors() const override;
1643 
1644     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1645                           SmallVectorImpl<SDNode *> &Created) const override;
1646   };
1647 
1648   namespace X86 {
1649     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1650                              const TargetLibraryInfo *libInfo);
1651   } // end namespace X86
1652 
1653   // X86 specific Gather/Scatter nodes.
1654   // The class has the same order of operands as MaskedGatherScatterSDNode for
1655   // convenience.
1656   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1657   public:
1658     // This is a intended as a utility and should never be directly created.
1659     X86MaskedGatherScatterSDNode() = delete;
1660     ~X86MaskedGatherScatterSDNode() = delete;
1661 
1662     const SDValue &getBasePtr() const { return getOperand(3); }
1663     const SDValue &getIndex()   const { return getOperand(4); }
1664     const SDValue &getMask()    const { return getOperand(2); }
1665     const SDValue &getScale()   const { return getOperand(5); }
1666 
1667     static bool classof(const SDNode *N) {
1668       return N->getOpcode() == X86ISD::MGATHER ||
1669              N->getOpcode() == X86ISD::MSCATTER;
1670     }
1671   };
1672 
1673   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1674   public:
1675     const SDValue &getPassThru() const { return getOperand(1); }
1676 
1677     static bool classof(const SDNode *N) {
1678       return N->getOpcode() == X86ISD::MGATHER;
1679     }
1680   };
1681 
1682   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1683   public:
1684     const SDValue &getValue() const { return getOperand(1); }
1685 
1686     static bool classof(const SDNode *N) {
1687       return N->getOpcode() == X86ISD::MSCATTER;
1688     }
1689   };
1690 
1691   /// Generate unpacklo/unpackhi shuffle mask.
1692   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1693                                bool Unary);
1694 
1695   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1696   /// imposed by AVX and specific to the unary pattern. Example:
1697   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1698   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1699   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1700 
1701 } // end namespace llvm
1702 
1703 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1704