xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision 5e801ac66d24704442eba426ed13c3effb8a34e7)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/TargetLowering.h"
19 
20 namespace llvm {
21   class X86Subtarget;
22   class X86TargetMachine;
23 
24   namespace X86ISD {
25     // X86 Specific DAG Nodes
26   enum NodeType : unsigned {
27     // Start the numbering where the builtin ops leave off.
28     FIRST_NUMBER = ISD::BUILTIN_OP_END,
29 
30     /// Bit scan forward.
31     BSF,
32     /// Bit scan reverse.
33     BSR,
34 
35     /// X86 funnel/double shift i16 instructions. These correspond to
36     /// X86::SHLDW and X86::SHRDW instructions which have different amt
37     /// modulo rules to generic funnel shifts.
38     /// NOTE: The operand order matches ISD::FSHL/FSHR not SHLD/SHRD.
39     FSHL,
40     FSHR,
41 
42     /// Bitwise logical AND of floating point values. This corresponds
43     /// to X86::ANDPS or X86::ANDPD.
44     FAND,
45 
46     /// Bitwise logical OR of floating point values. This corresponds
47     /// to X86::ORPS or X86::ORPD.
48     FOR,
49 
50     /// Bitwise logical XOR of floating point values. This corresponds
51     /// to X86::XORPS or X86::XORPD.
52     FXOR,
53 
54     ///  Bitwise logical ANDNOT of floating point values. This
55     /// corresponds to X86::ANDNPS or X86::ANDNPD.
56     FANDN,
57 
58     /// These operations represent an abstract X86 call
59     /// instruction, which includes a bunch of information.  In particular the
60     /// operands of these node are:
61     ///
62     ///     #0 - The incoming token chain
63     ///     #1 - The callee
64     ///     #2 - The number of arg bytes the caller pushes on the stack.
65     ///     #3 - The number of arg bytes the callee pops off the stack.
66     ///     #4 - The value to pass in AL/AX/EAX (optional)
67     ///     #5 - The value to pass in DL/DX/EDX (optional)
68     ///
69     /// The result values of these nodes are:
70     ///
71     ///     #0 - The outgoing token chain
72     ///     #1 - The first register result value (optional)
73     ///     #2 - The second register result value (optional)
74     ///
75     CALL,
76 
77     /// Same as call except it adds the NoTrack prefix.
78     NT_CALL,
79 
80     // Pseudo for a OBJC call that gets emitted together with a special
81     // marker instruction.
82     CALL_RVMARKER,
83 
84     /// X86 compare and logical compare instructions.
85     CMP,
86     FCMP,
87     COMI,
88     UCOMI,
89 
90     /// X86 bit-test instructions.
91     BT,
92 
93     /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
94     /// operand, usually produced by a CMP instruction.
95     SETCC,
96 
97     /// X86 Select
98     SELECTS,
99 
100     // Same as SETCC except it's materialized with a sbb and the value is all
101     // one's or all zero's.
102     SETCC_CARRY, // R = carry_bit ? ~0 : 0
103 
104     /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
105     /// Operands are two FP values to compare; result is a mask of
106     /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
107     FSETCC,
108 
109     /// X86 FP SETCC, similar to above, but with output as an i1 mask and
110     /// and a version with SAE.
111     FSETCCM,
112     FSETCCM_SAE,
113 
114     /// X86 conditional moves. Operand 0 and operand 1 are the two values
115     /// to select from. Operand 2 is the condition code, and operand 3 is the
116     /// flag operand produced by a CMP or TEST instruction.
117     CMOV,
118 
119     /// X86 conditional branches. Operand 0 is the chain operand, operand 1
120     /// is the block to branch if condition is true, operand 2 is the
121     /// condition code, and operand 3 is the flag operand produced by a CMP
122     /// or TEST instruction.
123     BRCOND,
124 
125     /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
126     /// operand 1 is the target address.
127     NT_BRIND,
128 
129     /// Return with a flag operand. Operand 0 is the chain operand, operand
130     /// 1 is the number of bytes of stack to pop.
131     RET_FLAG,
132 
133     /// Return from interrupt. Operand 0 is the number of bytes to pop.
134     IRET,
135 
136     /// Repeat fill, corresponds to X86::REP_STOSx.
137     REP_STOS,
138 
139     /// Repeat move, corresponds to X86::REP_MOVSx.
140     REP_MOVS,
141 
142     /// On Darwin, this node represents the result of the popl
143     /// at function entry, used for PIC code.
144     GlobalBaseReg,
145 
146     /// A wrapper node for TargetConstantPool, TargetJumpTable,
147     /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
148     /// MCSymbol and TargetBlockAddress.
149     Wrapper,
150 
151     /// Special wrapper used under X86-64 PIC mode for RIP
152     /// relative displacements.
153     WrapperRIP,
154 
155     /// Copies a 64-bit value from an MMX vector to the low word
156     /// of an XMM vector, with the high word zero filled.
157     MOVQ2DQ,
158 
159     /// Copies a 64-bit value from the low word of an XMM vector
160     /// to an MMX vector.
161     MOVDQ2Q,
162 
163     /// Copies a 32-bit value from the low word of a MMX
164     /// vector to a GPR.
165     MMX_MOVD2W,
166 
167     /// Copies a GPR into the low 32-bit word of a MMX vector
168     /// and zero out the high word.
169     MMX_MOVW2D,
170 
171     /// Extract an 8-bit value from a vector and zero extend it to
172     /// i32, corresponds to X86::PEXTRB.
173     PEXTRB,
174 
175     /// Extract a 16-bit value from a vector and zero extend it to
176     /// i32, corresponds to X86::PEXTRW.
177     PEXTRW,
178 
179     /// Insert any element of a 4 x float vector into any element
180     /// of a destination 4 x floatvector.
181     INSERTPS,
182 
183     /// Insert the lower 8-bits of a 32-bit value to a vector,
184     /// corresponds to X86::PINSRB.
185     PINSRB,
186 
187     /// Insert the lower 16-bits of a 32-bit value to a vector,
188     /// corresponds to X86::PINSRW.
189     PINSRW,
190 
191     /// Shuffle 16 8-bit values within a vector.
192     PSHUFB,
193 
194     /// Compute Sum of Absolute Differences.
195     PSADBW,
196     /// Compute Double Block Packed Sum-Absolute-Differences
197     DBPSADBW,
198 
199     /// Bitwise Logical AND NOT of Packed FP values.
200     ANDNP,
201 
202     /// Blend where the selector is an immediate.
203     BLENDI,
204 
205     /// Dynamic (non-constant condition) vector blend where only the sign bits
206     /// of the condition elements are used. This is used to enforce that the
207     /// condition mask is not valid for generic VSELECT optimizations. This
208     /// is also used to implement the intrinsics.
209     /// Operands are in VSELECT order: MASK, TRUE, FALSE
210     BLENDV,
211 
212     /// Combined add and sub on an FP vector.
213     ADDSUB,
214 
215     //  FP vector ops with rounding mode.
216     FADD_RND,
217     FADDS,
218     FADDS_RND,
219     FSUB_RND,
220     FSUBS,
221     FSUBS_RND,
222     FMUL_RND,
223     FMULS,
224     FMULS_RND,
225     FDIV_RND,
226     FDIVS,
227     FDIVS_RND,
228     FMAX_SAE,
229     FMAXS_SAE,
230     FMIN_SAE,
231     FMINS_SAE,
232     FSQRT_RND,
233     FSQRTS,
234     FSQRTS_RND,
235 
236     // FP vector get exponent.
237     FGETEXP,
238     FGETEXP_SAE,
239     FGETEXPS,
240     FGETEXPS_SAE,
241     // Extract Normalized Mantissas.
242     VGETMANT,
243     VGETMANT_SAE,
244     VGETMANTS,
245     VGETMANTS_SAE,
246     // FP Scale.
247     SCALEF,
248     SCALEF_RND,
249     SCALEFS,
250     SCALEFS_RND,
251 
252     // Unsigned Integer average.
253     AVG,
254 
255     /// Integer horizontal add/sub.
256     HADD,
257     HSUB,
258 
259     /// Floating point horizontal add/sub.
260     FHADD,
261     FHSUB,
262 
263     // Detect Conflicts Within a Vector
264     CONFLICT,
265 
266     /// Floating point max and min.
267     FMAX,
268     FMIN,
269 
270     /// Commutative FMIN and FMAX.
271     FMAXC,
272     FMINC,
273 
274     /// Scalar intrinsic floating point max and min.
275     FMAXS,
276     FMINS,
277 
278     /// Floating point reciprocal-sqrt and reciprocal approximation.
279     /// Note that these typically require refinement
280     /// in order to obtain suitable precision.
281     FRSQRT,
282     FRCP,
283 
284     // AVX-512 reciprocal approximations with a little more precision.
285     RSQRT14,
286     RSQRT14S,
287     RCP14,
288     RCP14S,
289 
290     // Thread Local Storage.
291     TLSADDR,
292 
293     // Thread Local Storage. A call to get the start address
294     // of the TLS block for the current module.
295     TLSBASEADDR,
296 
297     // Thread Local Storage.  When calling to an OS provided
298     // thunk at the address from an earlier relocation.
299     TLSCALL,
300 
301     // Exception Handling helpers.
302     EH_RETURN,
303 
304     // SjLj exception handling setjmp.
305     EH_SJLJ_SETJMP,
306 
307     // SjLj exception handling longjmp.
308     EH_SJLJ_LONGJMP,
309 
310     // SjLj exception handling dispatch.
311     EH_SJLJ_SETUP_DISPATCH,
312 
313     /// Tail call return. See X86TargetLowering::LowerCall for
314     /// the list of operands.
315     TC_RETURN,
316 
317     // Vector move to low scalar and zero higher vector elements.
318     VZEXT_MOVL,
319 
320     // Vector integer truncate.
321     VTRUNC,
322     // Vector integer truncate with unsigned/signed saturation.
323     VTRUNCUS,
324     VTRUNCS,
325 
326     // Masked version of the above. Used when less than a 128-bit result is
327     // produced since the mask only applies to the lower elements and can't
328     // be represented by a select.
329     // SRC, PASSTHRU, MASK
330     VMTRUNC,
331     VMTRUNCUS,
332     VMTRUNCS,
333 
334     // Vector FP extend.
335     VFPEXT,
336     VFPEXT_SAE,
337     VFPEXTS,
338     VFPEXTS_SAE,
339 
340     // Vector FP round.
341     VFPROUND,
342     VFPROUND_RND,
343     VFPROUNDS,
344     VFPROUNDS_RND,
345 
346     // Masked version of above. Used for v2f64->v4f32.
347     // SRC, PASSTHRU, MASK
348     VMFPROUND,
349 
350     // 128-bit vector logical left / right shift
351     VSHLDQ,
352     VSRLDQ,
353 
354     // Vector shift elements
355     VSHL,
356     VSRL,
357     VSRA,
358 
359     // Vector variable shift
360     VSHLV,
361     VSRLV,
362     VSRAV,
363 
364     // Vector shift elements by immediate
365     VSHLI,
366     VSRLI,
367     VSRAI,
368 
369     // Shifts of mask registers.
370     KSHIFTL,
371     KSHIFTR,
372 
373     // Bit rotate by immediate
374     VROTLI,
375     VROTRI,
376 
377     // Vector packed double/float comparison.
378     CMPP,
379 
380     // Vector integer comparisons.
381     PCMPEQ,
382     PCMPGT,
383 
384     // v8i16 Horizontal minimum and position.
385     PHMINPOS,
386 
387     MULTISHIFT,
388 
389     /// Vector comparison generating mask bits for fp and
390     /// integer signed and unsigned data types.
391     CMPM,
392     // Vector mask comparison generating mask bits for FP values.
393     CMPMM,
394     // Vector mask comparison with SAE for FP values.
395     CMPMM_SAE,
396 
397     // Arithmetic operations with FLAGS results.
398     ADD,
399     SUB,
400     ADC,
401     SBB,
402     SMUL,
403     UMUL,
404     OR,
405     XOR,
406     AND,
407 
408     // Bit field extract.
409     BEXTR,
410     BEXTRI,
411 
412     // Zero High Bits Starting with Specified Bit Position.
413     BZHI,
414 
415     // Parallel extract and deposit.
416     PDEP,
417     PEXT,
418 
419     // X86-specific multiply by immediate.
420     MUL_IMM,
421 
422     // Vector sign bit extraction.
423     MOVMSK,
424 
425     // Vector bitwise comparisons.
426     PTEST,
427 
428     // Vector packed fp sign bitwise comparisons.
429     TESTP,
430 
431     // OR/AND test for masks.
432     KORTEST,
433     KTEST,
434 
435     // ADD for masks.
436     KADD,
437 
438     // Several flavors of instructions with vector shuffle behaviors.
439     // Saturated signed/unnsigned packing.
440     PACKSS,
441     PACKUS,
442     // Intra-lane alignr.
443     PALIGNR,
444     // AVX512 inter-lane alignr.
445     VALIGN,
446     PSHUFD,
447     PSHUFHW,
448     PSHUFLW,
449     SHUFP,
450     // VBMI2 Concat & Shift.
451     VSHLD,
452     VSHRD,
453     VSHLDV,
454     VSHRDV,
455     // Shuffle Packed Values at 128-bit granularity.
456     SHUF128,
457     MOVDDUP,
458     MOVSHDUP,
459     MOVSLDUP,
460     MOVLHPS,
461     MOVHLPS,
462     MOVSD,
463     MOVSS,
464     MOVSH,
465     UNPCKL,
466     UNPCKH,
467     VPERMILPV,
468     VPERMILPI,
469     VPERMI,
470     VPERM2X128,
471 
472     // Variable Permute (VPERM).
473     // Res = VPERMV MaskV, V0
474     VPERMV,
475 
476     // 3-op Variable Permute (VPERMT2).
477     // Res = VPERMV3 V0, MaskV, V1
478     VPERMV3,
479 
480     // Bitwise ternary logic.
481     VPTERNLOG,
482     // Fix Up Special Packed Float32/64 values.
483     VFIXUPIMM,
484     VFIXUPIMM_SAE,
485     VFIXUPIMMS,
486     VFIXUPIMMS_SAE,
487     // Range Restriction Calculation For Packed Pairs of Float32/64 values.
488     VRANGE,
489     VRANGE_SAE,
490     VRANGES,
491     VRANGES_SAE,
492     // Reduce - Perform Reduction Transformation on scalar\packed FP.
493     VREDUCE,
494     VREDUCE_SAE,
495     VREDUCES,
496     VREDUCES_SAE,
497     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
498     // Also used by the legacy (V)ROUND intrinsics where we mask out the
499     // scaling part of the immediate.
500     VRNDSCALE,
501     VRNDSCALE_SAE,
502     VRNDSCALES,
503     VRNDSCALES_SAE,
504     // Tests Types Of a FP Values for packed types.
505     VFPCLASS,
506     // Tests Types Of a FP Values for scalar types.
507     VFPCLASSS,
508 
509     // Broadcast (splat) scalar or element 0 of a vector. If the operand is
510     // a vector, this node may change the vector length as part of the splat.
511     VBROADCAST,
512     // Broadcast mask to vector.
513     VBROADCASTM,
514 
515     /// SSE4A Extraction and Insertion.
516     EXTRQI,
517     INSERTQI,
518 
519     // XOP arithmetic/logical shifts.
520     VPSHA,
521     VPSHL,
522     // XOP signed/unsigned integer comparisons.
523     VPCOM,
524     VPCOMU,
525     // XOP packed permute bytes.
526     VPPERM,
527     // XOP two source permutation.
528     VPERMIL2,
529 
530     // Vector multiply packed unsigned doubleword integers.
531     PMULUDQ,
532     // Vector multiply packed signed doubleword integers.
533     PMULDQ,
534     // Vector Multiply Packed UnsignedIntegers with Round and Scale.
535     MULHRS,
536 
537     // Multiply and Add Packed Integers.
538     VPMADDUBSW,
539     VPMADDWD,
540 
541     // AVX512IFMA multiply and add.
542     // NOTE: These are different than the instruction and perform
543     // op0 x op1 + op2.
544     VPMADD52L,
545     VPMADD52H,
546 
547     // VNNI
548     VPDPBUSD,
549     VPDPBUSDS,
550     VPDPWSSD,
551     VPDPWSSDS,
552 
553     // FMA nodes.
554     // We use the target independent ISD::FMA for the non-inverted case.
555     FNMADD,
556     FMSUB,
557     FNMSUB,
558     FMADDSUB,
559     FMSUBADD,
560 
561     // FMA with rounding mode.
562     FMADD_RND,
563     FNMADD_RND,
564     FMSUB_RND,
565     FNMSUB_RND,
566     FMADDSUB_RND,
567     FMSUBADD_RND,
568 
569     // AVX512-FP16 complex addition and multiplication.
570     VFMADDC,
571     VFMADDC_RND,
572     VFCMADDC,
573     VFCMADDC_RND,
574 
575     VFMULC,
576     VFMULC_RND,
577     VFCMULC,
578     VFCMULC_RND,
579 
580     VFMADDCSH,
581     VFMADDCSH_RND,
582     VFCMADDCSH,
583     VFCMADDCSH_RND,
584 
585     VFMULCSH,
586     VFMULCSH_RND,
587     VFCMULCSH,
588     VFCMULCSH_RND,
589 
590     // Compress and expand.
591     COMPRESS,
592     EXPAND,
593 
594     // Bits shuffle
595     VPSHUFBITQMB,
596 
597     // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
598     SINT_TO_FP_RND,
599     UINT_TO_FP_RND,
600     SCALAR_SINT_TO_FP,
601     SCALAR_UINT_TO_FP,
602     SCALAR_SINT_TO_FP_RND,
603     SCALAR_UINT_TO_FP_RND,
604 
605     // Vector float/double to signed/unsigned integer.
606     CVTP2SI,
607     CVTP2UI,
608     CVTP2SI_RND,
609     CVTP2UI_RND,
610     // Scalar float/double to signed/unsigned integer.
611     CVTS2SI,
612     CVTS2UI,
613     CVTS2SI_RND,
614     CVTS2UI_RND,
615 
616     // Vector float/double to signed/unsigned integer with truncation.
617     CVTTP2SI,
618     CVTTP2UI,
619     CVTTP2SI_SAE,
620     CVTTP2UI_SAE,
621     // Scalar float/double to signed/unsigned integer with truncation.
622     CVTTS2SI,
623     CVTTS2UI,
624     CVTTS2SI_SAE,
625     CVTTS2UI_SAE,
626 
627     // Vector signed/unsigned integer to float/double.
628     CVTSI2P,
629     CVTUI2P,
630 
631     // Masked versions of above. Used for v2f64->v4f32.
632     // SRC, PASSTHRU, MASK
633     MCVTP2SI,
634     MCVTP2UI,
635     MCVTTP2SI,
636     MCVTTP2UI,
637     MCVTSI2P,
638     MCVTUI2P,
639 
640     // Vector float to bfloat16.
641     // Convert TWO packed single data to one packed BF16 data
642     CVTNE2PS2BF16,
643     // Convert packed single data to packed BF16 data
644     CVTNEPS2BF16,
645     // Masked version of above.
646     // SRC, PASSTHRU, MASK
647     MCVTNEPS2BF16,
648 
649     // Dot product of BF16 pairs to accumulated into
650     // packed single precision.
651     DPBF16PS,
652 
653     // A stack checking function call. On Windows it's _chkstk call.
654     DYN_ALLOCA,
655 
656     // For allocating variable amounts of stack space when using
657     // segmented stacks. Check if the current stacklet has enough space, and
658     // falls back to heap allocation if not.
659     SEG_ALLOCA,
660 
661     // For allocating stack space when using stack clash protector.
662     // Allocation is performed by block, and each block is probed.
663     PROBED_ALLOCA,
664 
665     // Memory barriers.
666     MEMBARRIER,
667     MFENCE,
668 
669     // Get a random integer and indicate whether it is valid in CF.
670     RDRAND,
671 
672     // Get a NIST SP800-90B & C compliant random integer and
673     // indicate whether it is valid in CF.
674     RDSEED,
675 
676     // Protection keys
677     // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
678     // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
679     // value for ECX.
680     RDPKRU,
681     WRPKRU,
682 
683     // SSE42 string comparisons.
684     // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
685     // will emit one or two instructions based on which results are used. If
686     // flags and index/mask this allows us to use a single instruction since
687     // we won't have to pick and opcode for flags. Instead we can rely on the
688     // DAG to CSE everything and decide at isel.
689     PCMPISTR,
690     PCMPESTR,
691 
692     // Test if in transactional execution.
693     XTEST,
694 
695     // ERI instructions.
696     RSQRT28,
697     RSQRT28_SAE,
698     RSQRT28S,
699     RSQRT28S_SAE,
700     RCP28,
701     RCP28_SAE,
702     RCP28S,
703     RCP28S_SAE,
704     EXP2,
705     EXP2_SAE,
706 
707     // Conversions between float and half-float.
708     CVTPS2PH,
709     CVTPH2PS,
710     CVTPH2PS_SAE,
711 
712     // Masked version of above.
713     // SRC, RND, PASSTHRU, MASK
714     MCVTPS2PH,
715 
716     // Galois Field Arithmetic Instructions
717     GF2P8AFFINEINVQB,
718     GF2P8AFFINEQB,
719     GF2P8MULB,
720 
721     // LWP insert record.
722     LWPINS,
723 
724     // User level wait
725     UMWAIT,
726     TPAUSE,
727 
728     // Enqueue Stores Instructions
729     ENQCMD,
730     ENQCMDS,
731 
732     // For avx512-vp2intersect
733     VP2INTERSECT,
734 
735     // User level interrupts - testui
736     TESTUI,
737 
738     /// X86 strict FP compare instructions.
739     STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
740     STRICT_FCMPS,
741 
742     // Vector packed double/float comparison.
743     STRICT_CMPP,
744 
745     /// Vector comparison generating mask bits for fp and
746     /// integer signed and unsigned data types.
747     STRICT_CMPM,
748 
749     // Vector float/double to signed/unsigned integer with truncation.
750     STRICT_CVTTP2SI,
751     STRICT_CVTTP2UI,
752 
753     // Vector FP extend.
754     STRICT_VFPEXT,
755 
756     // Vector FP round.
757     STRICT_VFPROUND,
758 
759     // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
760     // Also used by the legacy (V)ROUND intrinsics where we mask out the
761     // scaling part of the immediate.
762     STRICT_VRNDSCALE,
763 
764     // Vector signed/unsigned integer to float/double.
765     STRICT_CVTSI2P,
766     STRICT_CVTUI2P,
767 
768     // Strict FMA nodes.
769     STRICT_FNMADD,
770     STRICT_FMSUB,
771     STRICT_FNMSUB,
772 
773     // Conversions between float and half-float.
774     STRICT_CVTPS2PH,
775     STRICT_CVTPH2PS,
776 
777     // WARNING: Only add nodes here if they are stric FP nodes. Non-memory and
778     // non-strict FP nodes should be above FIRST_TARGET_STRICTFP_OPCODE.
779 
780     // Compare and swap.
781     LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
782     LCMPXCHG8_DAG,
783     LCMPXCHG16_DAG,
784     LCMPXCHG16_SAVE_RBX_DAG,
785 
786     /// LOCK-prefixed arithmetic read-modify-write instructions.
787     /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
788     LADD,
789     LSUB,
790     LOR,
791     LXOR,
792     LAND,
793 
794     // Load, scalar_to_vector, and zero extend.
795     VZEXT_LOAD,
796 
797     // extract_vector_elt, store.
798     VEXTRACT_STORE,
799 
800     // scalar broadcast from memory.
801     VBROADCAST_LOAD,
802 
803     // subvector broadcast from memory.
804     SUBV_BROADCAST_LOAD,
805 
806     // Store FP control word into i16 memory.
807     FNSTCW16m,
808 
809     // Load FP control word from i16 memory.
810     FLDCW16m,
811 
812     /// This instruction implements FP_TO_SINT with the
813     /// integer destination in memory and a FP reg source.  This corresponds
814     /// to the X86::FIST*m instructions and the rounding mode change stuff. It
815     /// has two inputs (token chain and address) and two outputs (int value
816     /// and token chain). Memory VT specifies the type to store to.
817     FP_TO_INT_IN_MEM,
818 
819     /// This instruction implements SINT_TO_FP with the
820     /// integer source in memory and FP reg result.  This corresponds to the
821     /// X86::FILD*m instructions. It has two inputs (token chain and address)
822     /// and two outputs (FP value and token chain). The integer source type is
823     /// specified by the memory VT.
824     FILD,
825 
826     /// This instruction implements a fp->int store from FP stack
827     /// slots. This corresponds to the fist instruction. It takes a
828     /// chain operand, value to store, address, and glue. The memory VT
829     /// specifies the type to store as.
830     FIST,
831 
832     /// This instruction implements an extending load to FP stack slots.
833     /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
834     /// operand, and ptr to load from. The memory VT specifies the type to
835     /// load from.
836     FLD,
837 
838     /// This instruction implements a truncating store from FP stack
839     /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
840     /// chain operand, value to store, address, and glue. The memory VT
841     /// specifies the type to store as.
842     FST,
843 
844     /// These instructions grab the address of the next argument
845     /// from a va_list. (reads and modifies the va_list in memory)
846     VAARG_64,
847     VAARG_X32,
848 
849     // Vector truncating store with unsigned/signed saturation
850     VTRUNCSTOREUS,
851     VTRUNCSTORES,
852     // Vector truncating masked store with unsigned/signed saturation
853     VMTRUNCSTOREUS,
854     VMTRUNCSTORES,
855 
856     // X86 specific gather and scatter
857     MGATHER,
858     MSCATTER,
859 
860     // Key locker nodes that produce flags.
861     AESENC128KL,
862     AESDEC128KL,
863     AESENC256KL,
864     AESDEC256KL,
865     AESENCWIDE128KL,
866     AESDECWIDE128KL,
867     AESENCWIDE256KL,
868     AESDECWIDE256KL,
869 
870     // Save xmm argument registers to the stack, according to %al. An operator
871     // is needed so that this can be expanded with control flow.
872     VASTART_SAVE_XMM_REGS,
873 
874     // WARNING: Do not add anything in the end unless you want the node to
875     // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
876     // opcodes will be thought as target memory ops!
877   };
878   } // end namespace X86ISD
879 
880   namespace X86 {
881     /// Current rounding mode is represented in bits 11:10 of FPSR. These
882     /// values are same as corresponding constants for rounding mode used
883     /// in glibc.
884     enum RoundingMode {
885       rmToNearest   = 0,        // FE_TONEAREST
886       rmDownward    = 1 << 10,  // FE_DOWNWARD
887       rmUpward      = 2 << 10,  // FE_UPWARD
888       rmTowardZero  = 3 << 10,  // FE_TOWARDZERO
889       rmMask        = 3 << 10   // Bit mask selecting rounding mode
890     };
891   }
892 
893   /// Define some predicates that are used for node matching.
894   namespace X86 {
895     /// Returns true if Elt is a constant zero or floating point constant +0.0.
896     bool isZeroNode(SDValue Elt);
897 
898     /// Returns true of the given offset can be
899     /// fit into displacement field of the instruction.
900     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
901                                       bool hasSymbolicDisplacement);
902 
903     /// Determines whether the callee is required to pop its
904     /// own arguments. Callee pop is necessary to support tail calls.
905     bool isCalleePop(CallingConv::ID CallingConv,
906                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
907 
908     /// If Op is a constant whose elements are all the same constant or
909     /// undefined, return true and return the constant value in \p SplatVal.
910     /// If we have undef bits that don't cover an entire element, we treat these
911     /// as zero if AllowPartialUndefs is set, else we fail and return false.
912     bool isConstantSplat(SDValue Op, APInt &SplatVal,
913                          bool AllowPartialUndefs = true);
914 
915     /// Check if Op is a load operation that could be folded into some other x86
916     /// instruction as a memory operand. Example: vpaddd (%rdi), %xmm0, %xmm0.
917     bool mayFoldLoad(SDValue Op, const X86Subtarget &Subtarget,
918                      bool AssumeSingleUse = false);
919 
920     /// Check if Op is a load operation that could be folded into a vector splat
921     /// instruction as a memory operand. Example: vbroadcastss 16(%rdi), %xmm2.
922     bool mayFoldLoadIntoBroadcastFromMem(SDValue Op, MVT EltVT,
923                                          const X86Subtarget &Subtarget,
924                                          bool AssumeSingleUse = false);
925 
926     /// Check if Op is a value that could be used to fold a store into some
927     /// other x86 instruction as a memory operand. Ex: pextrb $0, %xmm0, (%rdi).
928     bool mayFoldIntoStore(SDValue Op);
929 
930     /// Check if Op is an operation that could be folded into a zero extend x86
931     /// instruction.
932     bool mayFoldIntoZeroExtend(SDValue Op);
933   } // end namespace X86
934 
935   //===--------------------------------------------------------------------===//
936   //  X86 Implementation of the TargetLowering interface
937   class X86TargetLowering final : public TargetLowering {
938   public:
939     explicit X86TargetLowering(const X86TargetMachine &TM,
940                                const X86Subtarget &STI);
941 
942     unsigned getJumpTableEncoding() const override;
943     bool useSoftFloat() const override;
944 
945     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
946                                ArgListTy &Args) const override;
947 
948     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
949       return MVT::i8;
950     }
951 
952     const MCExpr *
953     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
954                               const MachineBasicBlock *MBB, unsigned uid,
955                               MCContext &Ctx) const override;
956 
957     /// Returns relocation base for the given PIC jumptable.
958     SDValue getPICJumpTableRelocBase(SDValue Table,
959                                      SelectionDAG &DAG) const override;
960     const MCExpr *
961     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
962                                  unsigned JTI, MCContext &Ctx) const override;
963 
964     /// Return the desired alignment for ByVal aggregate
965     /// function arguments in the caller parameter area. For X86, aggregates
966     /// that contains are placed at 16-byte boundaries while the rest are at
967     /// 4-byte boundaries.
968     uint64_t getByValTypeAlignment(Type *Ty,
969                                    const DataLayout &DL) const override;
970 
971     EVT getOptimalMemOpType(const MemOp &Op,
972                             const AttributeList &FuncAttributes) const override;
973 
974     /// Returns true if it's safe to use load / store of the
975     /// specified type to expand memcpy / memset inline. This is mostly true
976     /// for all types except for some special cases. For example, on X86
977     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
978     /// also does type conversion. Note the specified type doesn't have to be
979     /// legal as the hook is used before type legalization.
980     bool isSafeMemOpType(MVT VT) const override;
981 
982     /// Returns true if the target allows unaligned memory accesses of the
983     /// specified type. Returns whether it is "fast" in the last argument.
984     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment,
985                                         MachineMemOperand::Flags Flags,
986                                         bool *Fast) const override;
987 
988     /// Provide custom lowering hooks for some operations.
989     ///
990     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
991 
992     /// Replace the results of node with an illegal result
993     /// type with new values built out of custom code.
994     ///
995     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
996                             SelectionDAG &DAG) const override;
997 
998     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
999 
1000     /// Return true if the target has native support for
1001     /// the specified value type and it is 'desirable' to use the type for the
1002     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
1003     /// instruction encodings are longer and some i16 instructions are slow.
1004     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
1005 
1006     /// Return true if the target has native support for the
1007     /// specified value type and it is 'desirable' to use the type. e.g. On x86
1008     /// i16 is legal, but undesirable since i16 instruction encodings are longer
1009     /// and some i16 instructions are slow.
1010     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
1011 
1012     /// Return the newly negated expression if the cost is not expensive and
1013     /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
1014     /// do the negation.
1015     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
1016                                  bool LegalOperations, bool ForCodeSize,
1017                                  NegatibleCost &Cost,
1018                                  unsigned Depth) const override;
1019 
1020     MachineBasicBlock *
1021     EmitInstrWithCustomInserter(MachineInstr &MI,
1022                                 MachineBasicBlock *MBB) const override;
1023 
1024     /// This method returns the name of a target specific DAG node.
1025     const char *getTargetNodeName(unsigned Opcode) const override;
1026 
1027     /// Do not merge vector stores after legalization because that may conflict
1028     /// with x86-specific store splitting optimizations.
1029     bool mergeStoresAfterLegalization(EVT MemVT) const override {
1030       return !MemVT.isVector();
1031     }
1032 
1033     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
1034                           const MachineFunction &MF) const override;
1035 
1036     bool isCheapToSpeculateCttz() const override;
1037 
1038     bool isCheapToSpeculateCtlz() const override;
1039 
1040     bool isCtlzFast() const override;
1041 
1042     bool hasBitPreservingFPLogic(EVT VT) const override {
1043       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector() ||
1044              (VT == MVT::f16 && X86ScalarSSEf16);
1045     }
1046 
1047     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
1048       // If the pair to store is a mixture of float and int values, we will
1049       // save two bitwise instructions and one float-to-int instruction and
1050       // increase one store instruction. There is potentially a more
1051       // significant benefit because it avoids the float->int domain switch
1052       // for input value. So It is more likely a win.
1053       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
1054           (LTy.isInteger() && HTy.isFloatingPoint()))
1055         return true;
1056       // If the pair only contains int values, we will save two bitwise
1057       // instructions and increase one store instruction (costing one more
1058       // store buffer). Since the benefit is more blurred so we leave
1059       // such pair out until we get testcase to prove it is a win.
1060       return false;
1061     }
1062 
1063     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
1064 
1065     bool hasAndNotCompare(SDValue Y) const override;
1066 
1067     bool hasAndNot(SDValue Y) const override;
1068 
1069     bool hasBitTest(SDValue X, SDValue Y) const override;
1070 
1071     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
1072         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
1073         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
1074         SelectionDAG &DAG) const override;
1075 
1076     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
1077                                            CombineLevel Level) const override;
1078 
1079     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
1080 
1081     bool
1082     shouldTransformSignedTruncationCheck(EVT XVT,
1083                                          unsigned KeptBits) const override {
1084       // For vectors, we don't have a preference..
1085       if (XVT.isVector())
1086         return false;
1087 
1088       auto VTIsOk = [](EVT VT) -> bool {
1089         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
1090                VT == MVT::i64;
1091       };
1092 
1093       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
1094       // XVT will be larger than KeptBitsVT.
1095       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
1096       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
1097     }
1098 
1099     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
1100 
1101     bool shouldSplatInsEltVarIndex(EVT VT) const override;
1102 
1103     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
1104       return VT.isScalarInteger();
1105     }
1106 
1107     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
1108     MVT hasFastEqualityCompare(unsigned NumBits) const override;
1109 
1110     /// Return the value type to use for ISD::SETCC.
1111     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
1112                            EVT VT) const override;
1113 
1114     bool targetShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
1115                                       const APInt &DemandedElts,
1116                                       TargetLoweringOpt &TLO) const override;
1117 
1118     /// Determine which of the bits specified in Mask are known to be either
1119     /// zero or one and return them in the KnownZero/KnownOne bitsets.
1120     void computeKnownBitsForTargetNode(const SDValue Op,
1121                                        KnownBits &Known,
1122                                        const APInt &DemandedElts,
1123                                        const SelectionDAG &DAG,
1124                                        unsigned Depth = 0) const override;
1125 
1126     /// Determine the number of bits in the operation that are sign bits.
1127     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
1128                                              const APInt &DemandedElts,
1129                                              const SelectionDAG &DAG,
1130                                              unsigned Depth) const override;
1131 
1132     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
1133                                                  const APInt &DemandedElts,
1134                                                  APInt &KnownUndef,
1135                                                  APInt &KnownZero,
1136                                                  TargetLoweringOpt &TLO,
1137                                                  unsigned Depth) const override;
1138 
1139     bool SimplifyDemandedVectorEltsForTargetShuffle(SDValue Op,
1140                                                     const APInt &DemandedElts,
1141                                                     unsigned MaskIndex,
1142                                                     TargetLoweringOpt &TLO,
1143                                                     unsigned Depth) const;
1144 
1145     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
1146                                            const APInt &DemandedBits,
1147                                            const APInt &DemandedElts,
1148                                            KnownBits &Known,
1149                                            TargetLoweringOpt &TLO,
1150                                            unsigned Depth) const override;
1151 
1152     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
1153         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
1154         SelectionDAG &DAG, unsigned Depth) const override;
1155 
1156     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
1157 
1158     SDValue unwrapAddress(SDValue N) const override;
1159 
1160     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
1161 
1162     bool ExpandInlineAsm(CallInst *CI) const override;
1163 
1164     ConstraintType getConstraintType(StringRef Constraint) const override;
1165 
1166     /// Examine constraint string and operand type and determine a weight value.
1167     /// The operand object must already have been set up with the operand type.
1168     ConstraintWeight
1169       getSingleConstraintMatchWeight(AsmOperandInfo &info,
1170                                      const char *constraint) const override;
1171 
1172     const char *LowerXConstraint(EVT ConstraintVT) const override;
1173 
1174     /// Lower the specified operand into the Ops vector. If it is invalid, don't
1175     /// add anything to Ops. If hasMemory is true it means one of the asm
1176     /// constraint of the inline asm instruction being processed is 'm'.
1177     void LowerAsmOperandForConstraint(SDValue Op,
1178                                       std::string &Constraint,
1179                                       std::vector<SDValue> &Ops,
1180                                       SelectionDAG &DAG) const override;
1181 
1182     unsigned
1183     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1184       if (ConstraintCode == "v")
1185         return InlineAsm::Constraint_v;
1186       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1187     }
1188 
1189     /// Handle Lowering flag assembly outputs.
1190     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
1191                                         const SDLoc &DL,
1192                                         const AsmOperandInfo &Constraint,
1193                                         SelectionDAG &DAG) const override;
1194 
1195     /// Given a physical register constraint
1196     /// (e.g. {edx}), return the register number and the register class for the
1197     /// register.  This should only be used for C_Register constraints.  On
1198     /// error, this returns a register number of 0.
1199     std::pair<unsigned, const TargetRegisterClass *>
1200     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1201                                  StringRef Constraint, MVT VT) const override;
1202 
1203     /// Return true if the addressing mode represented
1204     /// by AM is legal for this target, for a load/store of the specified type.
1205     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1206                                Type *Ty, unsigned AS,
1207                                Instruction *I = nullptr) const override;
1208 
1209     /// Return true if the specified immediate is legal
1210     /// icmp immediate, that is the target has icmp instructions which can
1211     /// compare a register against the immediate without having to materialize
1212     /// the immediate into a register.
1213     bool isLegalICmpImmediate(int64_t Imm) const override;
1214 
1215     /// Return true if the specified immediate is legal
1216     /// add immediate, that is the target has add instructions which can
1217     /// add a register and the immediate without having to materialize
1218     /// the immediate into a register.
1219     bool isLegalAddImmediate(int64_t Imm) const override;
1220 
1221     bool isLegalStoreImmediate(int64_t Imm) const override;
1222 
1223     /// Return the cost of the scaling factor used in the addressing
1224     /// mode represented by AM for this target, for a load/store
1225     /// of the specified type.
1226     /// If the AM is supported, the return value must be >= 0.
1227     /// If the AM is not supported, it returns a negative value.
1228     InstructionCost getScalingFactorCost(const DataLayout &DL,
1229                                          const AddrMode &AM, Type *Ty,
1230                                          unsigned AS) const override;
1231 
1232     /// This is used to enable splatted operand transforms for vector shifts
1233     /// and vector funnel shifts.
1234     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1235 
1236     /// Add x86-specific opcodes to the default list.
1237     bool isBinOp(unsigned Opcode) const override;
1238 
1239     /// Returns true if the opcode is a commutative binary operation.
1240     bool isCommutativeBinOp(unsigned Opcode) const override;
1241 
1242     /// Return true if it's free to truncate a value of
1243     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1244     /// register EAX to i16 by referencing its sub-register AX.
1245     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1246     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1247 
1248     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1249 
1250     /// Return true if any actual instruction that defines a
1251     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1252     /// register. This does not necessarily include registers defined in
1253     /// unknown ways, such as incoming arguments, or copies from unknown
1254     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1255     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1256     /// all instructions that define 32-bit values implicit zero-extend the
1257     /// result out to 64 bits.
1258     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1259     bool isZExtFree(EVT VT1, EVT VT2) const override;
1260     bool isZExtFree(SDValue Val, EVT VT2) const override;
1261 
1262     bool shouldSinkOperands(Instruction *I,
1263                             SmallVectorImpl<Use *> &Ops) const override;
1264     bool shouldConvertPhiType(Type *From, Type *To) const override;
1265 
1266     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1267     /// extend node) is profitable.
1268     bool isVectorLoadExtDesirable(SDValue) const override;
1269 
1270     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1271     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1272     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1273     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1274                                     EVT VT) const override;
1275 
1276     /// Return true if it's profitable to narrow
1277     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1278     /// from i32 to i8 but not from i32 to i16.
1279     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1280 
1281     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1282     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1283     /// true and stores the intrinsic information into the IntrinsicInfo that was
1284     /// passed to the function.
1285     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1286                             MachineFunction &MF,
1287                             unsigned Intrinsic) const override;
1288 
1289     /// Returns true if the target can instruction select the
1290     /// specified FP immediate natively. If false, the legalizer will
1291     /// materialize the FP immediate as a load from a constant pool.
1292     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1293                       bool ForCodeSize) const override;
1294 
1295     /// Targets can use this to indicate that they only support *some*
1296     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1297     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1298     /// be legal.
1299     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1300 
1301     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1302     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1303     /// constant pool entry.
1304     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1305 
1306     /// Returns true if lowering to a jump table is allowed.
1307     bool areJTsAllowed(const Function *Fn) const override;
1308 
1309     /// If true, then instruction selection should
1310     /// seek to shrink the FP constant of the specified type to a smaller type
1311     /// in order to save space and / or reduce runtime.
1312     bool ShouldShrinkFPConstant(EVT VT) const override {
1313       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1314       // expensive than a straight movsd. On the other hand, it's important to
1315       // shrink long double fp constant since fldt is very slow.
1316       return !X86ScalarSSEf64 || VT == MVT::f80;
1317     }
1318 
1319     /// Return true if we believe it is correct and profitable to reduce the
1320     /// load node to a smaller type.
1321     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1322                                EVT NewVT) const override;
1323 
1324     /// Return true if the specified scalar FP type is computed in an SSE
1325     /// register, not on the X87 floating point stack.
1326     bool isScalarFPTypeInSSEReg(EVT VT) const {
1327       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1328              (VT == MVT::f32 && X86ScalarSSEf32) || // f32 is when SSE1
1329              (VT == MVT::f16 && X86ScalarSSEf16);   // f16 is when AVX512FP16
1330     }
1331 
1332     /// Returns true if it is beneficial to convert a load of a constant
1333     /// to just the constant itself.
1334     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1335                                            Type *Ty) const override;
1336 
1337     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1338 
1339     bool convertSelectOfConstantsToMath(EVT VT) const override;
1340 
1341     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1342                                 SDValue C) const override;
1343 
1344     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1345     /// with this index.
1346     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1347                                  unsigned Index) const override;
1348 
1349     /// Scalar ops always have equal or better analysis/performance/power than
1350     /// the vector equivalent, so this always makes sense if the scalar op is
1351     /// supported.
1352     bool shouldScalarizeBinop(SDValue) const override;
1353 
1354     /// Extract of a scalar FP value from index 0 of a vector is free.
1355     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1356       EVT EltVT = VT.getScalarType();
1357       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1358     }
1359 
1360     /// Overflow nodes should get combined/lowered to optimal instructions
1361     /// (they should allow eliminating explicit compares by getting flags from
1362     /// math ops).
1363     bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
1364                               bool MathUsed) const override;
1365 
1366     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1367                                       unsigned AddrSpace) const override {
1368       // If we can replace more than 2 scalar stores, there will be a reduction
1369       // in instructions even after we add a vector constant load.
1370       return NumElem > 2;
1371     }
1372 
1373     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1374                                  const SelectionDAG &DAG,
1375                                  const MachineMemOperand &MMO) const override;
1376 
1377     /// Intel processors have a unified instruction and data cache
1378     const char * getClearCacheBuiltinName() const override {
1379       return nullptr; // nothing to do, move along.
1380     }
1381 
1382     Register getRegisterByName(const char* RegName, LLT VT,
1383                                const MachineFunction &MF) const override;
1384 
1385     /// If a physical register, this returns the register that receives the
1386     /// exception address on entry to an EH pad.
1387     Register
1388     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1389 
1390     /// If a physical register, this returns the register that receives the
1391     /// exception typeid on entry to a landing pad.
1392     Register
1393     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1394 
1395     virtual bool needsFixedCatchObjects() const override;
1396 
1397     /// This method returns a target specific FastISel object,
1398     /// or null if the target does not support "fast" ISel.
1399     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1400                              const TargetLibraryInfo *libInfo) const override;
1401 
1402     /// If the target has a standard location for the stack protector cookie,
1403     /// returns the address of that location. Otherwise, returns nullptr.
1404     Value *getIRStackGuard(IRBuilderBase &IRB) const override;
1405 
1406     bool useLoadStackGuardNode() const override;
1407     bool useStackGuardXorFP() const override;
1408     void insertSSPDeclarations(Module &M) const override;
1409     Value *getSDagStackGuard(const Module &M) const override;
1410     Function *getSSPStackGuardCheck(const Module &M) const override;
1411     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1412                                 const SDLoc &DL) const override;
1413 
1414 
1415     /// Return true if the target stores SafeStack pointer at a fixed offset in
1416     /// some non-standard address space, and populates the address space and
1417     /// offset as appropriate.
1418     Value *getSafeStackPointerLocation(IRBuilderBase &IRB) const override;
1419 
1420     std::pair<SDValue, SDValue> BuildFILD(EVT DstVT, EVT SrcVT, const SDLoc &DL,
1421                                           SDValue Chain, SDValue Pointer,
1422                                           MachinePointerInfo PtrInfo,
1423                                           Align Alignment,
1424                                           SelectionDAG &DAG) const;
1425 
1426     /// Customize the preferred legalization strategy for certain types.
1427     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1428 
1429     bool softPromoteHalfType() const override { return true; }
1430 
1431     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1432                                       EVT VT) const override;
1433 
1434     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1435                                            CallingConv::ID CC,
1436                                            EVT VT) const override;
1437 
1438     unsigned getVectorTypeBreakdownForCallingConv(
1439         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1440         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1441 
1442     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1443 
1444     bool supportSwiftError() const override;
1445 
1446     bool hasStackProbeSymbol(MachineFunction &MF) const override;
1447     bool hasInlineStackProbe(MachineFunction &MF) const override;
1448     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1449 
1450     unsigned getStackProbeSize(MachineFunction &MF) const;
1451 
1452     bool hasVectorBlend() const override { return true; }
1453 
1454     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1455 
1456     /// Lower interleaved load(s) into target specific
1457     /// instructions/intrinsics.
1458     bool lowerInterleavedLoad(LoadInst *LI,
1459                               ArrayRef<ShuffleVectorInst *> Shuffles,
1460                               ArrayRef<unsigned> Indices,
1461                               unsigned Factor) const override;
1462 
1463     /// Lower interleaved store(s) into target specific
1464     /// instructions/intrinsics.
1465     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1466                                unsigned Factor) const override;
1467 
1468     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1469                                    SDValue Addr, SelectionDAG &DAG)
1470                                    const override;
1471 
1472     Align getPrefLoopAlignment(MachineLoop *ML) const override;
1473 
1474   protected:
1475     std::pair<const TargetRegisterClass *, uint8_t>
1476     findRepresentativeClass(const TargetRegisterInfo *TRI,
1477                             MVT VT) const override;
1478 
1479   private:
1480     /// Keep a reference to the X86Subtarget around so that we can
1481     /// make the right decision when generating code for different targets.
1482     const X86Subtarget &Subtarget;
1483 
1484     /// Select between SSE or x87 floating point ops.
1485     /// When SSE is available, use it for f32 operations.
1486     /// When SSE2 is available, use it for f64 operations.
1487     bool X86ScalarSSEf32;
1488     bool X86ScalarSSEf64;
1489     bool X86ScalarSSEf16;
1490 
1491     /// A list of legal FP immediates.
1492     std::vector<APFloat> LegalFPImmediates;
1493 
1494     /// Indicate that this x86 target can instruction
1495     /// select the specified FP immediate natively.
1496     void addLegalFPImmediate(const APFloat& Imm) {
1497       LegalFPImmediates.push_back(Imm);
1498     }
1499 
1500     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1501                             CallingConv::ID CallConv, bool isVarArg,
1502                             const SmallVectorImpl<ISD::InputArg> &Ins,
1503                             const SDLoc &dl, SelectionDAG &DAG,
1504                             SmallVectorImpl<SDValue> &InVals,
1505                             uint32_t *RegMask) const;
1506     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1507                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1508                              const SDLoc &dl, SelectionDAG &DAG,
1509                              const CCValAssign &VA, MachineFrameInfo &MFI,
1510                              unsigned i) const;
1511     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1512                              const SDLoc &dl, SelectionDAG &DAG,
1513                              const CCValAssign &VA,
1514                              ISD::ArgFlagsTy Flags, bool isByval) const;
1515 
1516     // Call lowering helpers.
1517 
1518     /// Check whether the call is eligible for tail call optimization. Targets
1519     /// that want to do tail call optimization should implement this function.
1520     bool IsEligibleForTailCallOptimization(
1521         SDValue Callee, CallingConv::ID CalleeCC, bool IsCalleeStackStructRet,
1522         bool isVarArg, Type *RetTy, const SmallVectorImpl<ISD::OutputArg> &Outs,
1523         const SmallVectorImpl<SDValue> &OutVals,
1524         const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const;
1525     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1526                                     SDValue Chain, bool IsTailCall,
1527                                     bool Is64Bit, int FPDiff,
1528                                     const SDLoc &dl) const;
1529 
1530     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1531                                          SelectionDAG &DAG) const;
1532 
1533     unsigned getAddressSpace(void) const;
1534 
1535     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned,
1536                             SDValue &Chain) const;
1537     SDValue LRINT_LLRINTHelper(SDNode *N, SelectionDAG &DAG) const;
1538 
1539     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1540     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1541     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1542     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1543 
1544     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1545                                   const unsigned char OpFlags = 0) const;
1546     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1547     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1548     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1549     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1550     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1551 
1552     /// Creates target global address or external symbol nodes for calls or
1553     /// other uses.
1554     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1555                                   bool ForCall) const;
1556 
1557     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1558     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1559     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1560     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1561     SDValue LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const;
1562     SDValue LowerLRINT_LLRINT(SDValue Op, SelectionDAG &DAG) const;
1563     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1564     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1565     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1566     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1567     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1568     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1569     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1570     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1571     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1572     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1573     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1574     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1575     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1576     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1577     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1578     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1579     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1580     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1581     SDValue LowerSET_ROUNDING(SDValue Op, SelectionDAG &DAG) const;
1582     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1583     SDValue LowerWin64_FP_TO_INT128(SDValue Op, SelectionDAG &DAG,
1584                                     SDValue &Chain) const;
1585     SDValue LowerWin64_INT128_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1586     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1587     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1588     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1589     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1590     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1591 
1592     SDValue
1593     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1594                          const SmallVectorImpl<ISD::InputArg> &Ins,
1595                          const SDLoc &dl, SelectionDAG &DAG,
1596                          SmallVectorImpl<SDValue> &InVals) const override;
1597     SDValue LowerCall(CallLoweringInfo &CLI,
1598                       SmallVectorImpl<SDValue> &InVals) const override;
1599 
1600     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1601                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1602                         const SmallVectorImpl<SDValue> &OutVals,
1603                         const SDLoc &dl, SelectionDAG &DAG) const override;
1604 
1605     bool supportSplitCSR(MachineFunction *MF) const override {
1606       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1607           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1608     }
1609     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1610     void insertCopiesSplitCSR(
1611       MachineBasicBlock *Entry,
1612       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1613 
1614     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1615 
1616     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1617 
1618     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1619                             ISD::NodeType ExtendKind) const override;
1620 
1621     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1622                         bool isVarArg,
1623                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1624                         LLVMContext &Context) const override;
1625 
1626     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1627 
1628     TargetLoweringBase::AtomicExpansionKind
1629     shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
1630     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1631     TargetLoweringBase::AtomicExpansionKind
1632     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1633 
1634     LoadInst *
1635     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1636 
1637     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1638     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1639 
1640     bool needsCmpXchgNb(Type *MemType) const;
1641 
1642     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1643                                 MachineBasicBlock *DispatchBB, int FI) const;
1644 
1645     // Utility function to emit the low-level va_arg code for X86-64.
1646     MachineBasicBlock *
1647     EmitVAARGWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
1648 
1649     /// Utility function to emit the xmm reg save portion of va_start.
1650     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1651                                                  MachineInstr &MI2,
1652                                                  MachineBasicBlock *BB) const;
1653 
1654     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1655                                          MachineBasicBlock *BB) const;
1656 
1657     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1658                                            MachineBasicBlock *BB) const;
1659 
1660     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1661                                             MachineBasicBlock *BB) const;
1662 
1663     MachineBasicBlock *EmitLoweredProbedAlloca(MachineInstr &MI,
1664                                                MachineBasicBlock *BB) const;
1665 
1666     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1667                                           MachineBasicBlock *BB) const;
1668 
1669     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1670                                           MachineBasicBlock *BB) const;
1671 
1672     MachineBasicBlock *EmitLoweredIndirectThunk(MachineInstr &MI,
1673                                                 MachineBasicBlock *BB) const;
1674 
1675     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1676                                         MachineBasicBlock *MBB) const;
1677 
1678     void emitSetJmpShadowStackFix(MachineInstr &MI,
1679                                   MachineBasicBlock *MBB) const;
1680 
1681     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1682                                          MachineBasicBlock *MBB) const;
1683 
1684     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1685                                                  MachineBasicBlock *MBB) const;
1686 
1687     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1688                                              MachineBasicBlock *MBB) const;
1689 
1690     /// Emit flags for the given setcc condition and operands. Also returns the
1691     /// corresponding X86 condition code constant in X86CC.
1692     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1693                               const SDLoc &dl, SelectionDAG &DAG,
1694                               SDValue &X86CC) const;
1695 
1696     /// Check if replacement of SQRT with RSQRT should be disabled.
1697     bool isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const override;
1698 
1699     /// Use rsqrt* to speed up sqrt calculations.
1700     SDValue getSqrtEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1701                             int &RefinementSteps, bool &UseOneConstNR,
1702                             bool Reciprocal) const override;
1703 
1704     /// Use rcp* to speed up fdiv calculations.
1705     SDValue getRecipEstimate(SDValue Op, SelectionDAG &DAG, int Enabled,
1706                              int &RefinementSteps) const override;
1707 
1708     /// Reassociate floating point divisions into multiply by reciprocal.
1709     unsigned combineRepeatedFPDivisors() const override;
1710 
1711     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1712                           SmallVectorImpl<SDNode *> &Created) const override;
1713   };
1714 
1715   namespace X86 {
1716     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1717                              const TargetLibraryInfo *libInfo);
1718   } // end namespace X86
1719 
1720   // X86 specific Gather/Scatter nodes.
1721   // The class has the same order of operands as MaskedGatherScatterSDNode for
1722   // convenience.
1723   class X86MaskedGatherScatterSDNode : public MemIntrinsicSDNode {
1724   public:
1725     // This is a intended as a utility and should never be directly created.
1726     X86MaskedGatherScatterSDNode() = delete;
1727     ~X86MaskedGatherScatterSDNode() = delete;
1728 
1729     const SDValue &getBasePtr() const { return getOperand(3); }
1730     const SDValue &getIndex()   const { return getOperand(4); }
1731     const SDValue &getMask()    const { return getOperand(2); }
1732     const SDValue &getScale()   const { return getOperand(5); }
1733 
1734     static bool classof(const SDNode *N) {
1735       return N->getOpcode() == X86ISD::MGATHER ||
1736              N->getOpcode() == X86ISD::MSCATTER;
1737     }
1738   };
1739 
1740   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1741   public:
1742     const SDValue &getPassThru() const { return getOperand(1); }
1743 
1744     static bool classof(const SDNode *N) {
1745       return N->getOpcode() == X86ISD::MGATHER;
1746     }
1747   };
1748 
1749   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1750   public:
1751     const SDValue &getValue() const { return getOperand(1); }
1752 
1753     static bool classof(const SDNode *N) {
1754       return N->getOpcode() == X86ISD::MSCATTER;
1755     }
1756   };
1757 
1758   /// Generate unpacklo/unpackhi shuffle mask.
1759   void createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, bool Lo,
1760                                bool Unary);
1761 
1762   /// Similar to unpacklo/unpackhi, but without the 128-bit lane limitation
1763   /// imposed by AVX and specific to the unary pattern. Example:
1764   /// v8iX Lo --> <0, 0, 1, 1, 2, 2, 3, 3>
1765   /// v8iX Hi --> <4, 4, 5, 5, 6, 6, 7, 7>
1766   void createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, bool Lo);
1767 
1768 } // end namespace llvm
1769 
1770 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1771