xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision e40139ff33b48b56a24c808b166b04b8ee6f5b21)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 
21 namespace llvm {
22   class X86Subtarget;
23   class X86TargetMachine;
24 
25   namespace X86ISD {
26     // X86 Specific DAG Nodes
27     enum NodeType : unsigned {
28       // Start the numbering where the builtin ops leave off.
29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
30 
31       /// Bit scan forward.
32       BSF,
33       /// Bit scan reverse.
34       BSR,
35 
36       /// Double shift instructions. These correspond to
37       /// X86::SHLDxx and X86::SHRDxx instructions.
38       SHLD,
39       SHRD,
40 
41       /// Bitwise logical AND of floating point values. This corresponds
42       /// to X86::ANDPS or X86::ANDPD.
43       FAND,
44 
45       /// Bitwise logical OR of floating point values. This corresponds
46       /// to X86::ORPS or X86::ORPD.
47       FOR,
48 
49       /// Bitwise logical XOR of floating point values. This corresponds
50       /// to X86::XORPS or X86::XORPD.
51       FXOR,
52 
53       ///  Bitwise logical ANDNOT of floating point values. This
54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
55       FANDN,
56 
57       /// These operations represent an abstract X86 call
58       /// instruction, which includes a bunch of information.  In particular the
59       /// operands of these node are:
60       ///
61       ///     #0 - The incoming token chain
62       ///     #1 - The callee
63       ///     #2 - The number of arg bytes the caller pushes on the stack.
64       ///     #3 - The number of arg bytes the callee pops off the stack.
65       ///     #4 - The value to pass in AL/AX/EAX (optional)
66       ///     #5 - The value to pass in DL/DX/EDX (optional)
67       ///
68       /// The result values of these nodes are:
69       ///
70       ///     #0 - The outgoing token chain
71       ///     #1 - The first register result value (optional)
72       ///     #2 - The second register result value (optional)
73       ///
74       CALL,
75 
76       /// Same as call except it adds the NoTrack prefix.
77       NT_CALL,
78 
79       /// X86 compare and logical compare instructions.
80       CMP, COMI, UCOMI,
81 
82       /// X86 bit-test instructions.
83       BT,
84 
85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
86       /// operand, usually produced by a CMP instruction.
87       SETCC,
88 
89       /// X86 Select
90       SELECTS,
91 
92       // Same as SETCC except it's materialized with a sbb and the value is all
93       // one's or all zero's.
94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
95 
96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
97       /// Operands are two FP values to compare; result is a mask of
98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
99       FSETCC,
100 
101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
102       /// and a version with SAE.
103       FSETCCM, FSETCCM_SAE,
104 
105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
106       /// to select from. Operand 2 is the condition code, and operand 3 is the
107       /// flag operand produced by a CMP or TEST instruction.
108       CMOV,
109 
110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
111       /// is the block to branch if condition is true, operand 2 is the
112       /// condition code, and operand 3 is the flag operand produced by a CMP
113       /// or TEST instruction.
114       BRCOND,
115 
116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
117       /// operand 1 is the target address.
118       NT_BRIND,
119 
120       /// Return with a flag operand. Operand 0 is the chain operand, operand
121       /// 1 is the number of bytes of stack to pop.
122       RET_FLAG,
123 
124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
125       IRET,
126 
127       /// Repeat fill, corresponds to X86::REP_STOSx.
128       REP_STOS,
129 
130       /// Repeat move, corresponds to X86::REP_MOVSx.
131       REP_MOVS,
132 
133       /// On Darwin, this node represents the result of the popl
134       /// at function entry, used for PIC code.
135       GlobalBaseReg,
136 
137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
139       /// MCSymbol and TargetBlockAddress.
140       Wrapper,
141 
142       /// Special wrapper used under X86-64 PIC mode for RIP
143       /// relative displacements.
144       WrapperRIP,
145 
146       /// Copies a 64-bit value from an MMX vector to the low word
147       /// of an XMM vector, with the high word zero filled.
148       MOVQ2DQ,
149 
150       /// Copies a 64-bit value from the low word of an XMM vector
151       /// to an MMX vector.
152       MOVDQ2Q,
153 
154       /// Copies a 32-bit value from the low word of a MMX
155       /// vector to a GPR.
156       MMX_MOVD2W,
157 
158       /// Copies a GPR into the low 32-bit word of a MMX vector
159       /// and zero out the high word.
160       MMX_MOVW2D,
161 
162       /// Extract an 8-bit value from a vector and zero extend it to
163       /// i32, corresponds to X86::PEXTRB.
164       PEXTRB,
165 
166       /// Extract a 16-bit value from a vector and zero extend it to
167       /// i32, corresponds to X86::PEXTRW.
168       PEXTRW,
169 
170       /// Insert any element of a 4 x float vector into any element
171       /// of a destination 4 x floatvector.
172       INSERTPS,
173 
174       /// Insert the lower 8-bits of a 32-bit value to a vector,
175       /// corresponds to X86::PINSRB.
176       PINSRB,
177 
178       /// Insert the lower 16-bits of a 32-bit value to a vector,
179       /// corresponds to X86::PINSRW.
180       PINSRW,
181 
182       /// Shuffle 16 8-bit values within a vector.
183       PSHUFB,
184 
185       /// Compute Sum of Absolute Differences.
186       PSADBW,
187       /// Compute Double Block Packed Sum-Absolute-Differences
188       DBPSADBW,
189 
190       /// Bitwise Logical AND NOT of Packed FP values.
191       ANDNP,
192 
193       /// Blend where the selector is an immediate.
194       BLENDI,
195 
196       /// Dynamic (non-constant condition) vector blend where only the sign bits
197       /// of the condition elements are used. This is used to enforce that the
198       /// condition mask is not valid for generic VSELECT optimizations. This
199       /// is also used to implement the intrinsics.
200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
201       BLENDV,
202 
203       /// Combined add and sub on an FP vector.
204       ADDSUB,
205 
206       //  FP vector ops with rounding mode.
207       FADD_RND, FADDS, FADDS_RND,
208       FSUB_RND, FSUBS, FSUBS_RND,
209       FMUL_RND, FMULS, FMULS_RND,
210       FDIV_RND, FDIVS, FDIVS_RND,
211       FMAX_SAE, FMAXS_SAE,
212       FMIN_SAE, FMINS_SAE,
213       FSQRT_RND, FSQRTS, FSQRTS_RND,
214 
215       // FP vector get exponent.
216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
217       // Extract Normalized Mantissas.
218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
219       // FP Scale.
220       SCALEF, SCALEF_RND,
221       SCALEFS, SCALEFS_RND,
222 
223       // Unsigned Integer average.
224       AVG,
225 
226       /// Integer horizontal add/sub.
227       HADD,
228       HSUB,
229 
230       /// Floating point horizontal add/sub.
231       FHADD,
232       FHSUB,
233 
234       // Detect Conflicts Within a Vector
235       CONFLICT,
236 
237       /// Floating point max and min.
238       FMAX, FMIN,
239 
240       /// Commutative FMIN and FMAX.
241       FMAXC, FMINC,
242 
243       /// Scalar intrinsic floating point max and min.
244       FMAXS, FMINS,
245 
246       /// Floating point reciprocal-sqrt and reciprocal approximation.
247       /// Note that these typically require refinement
248       /// in order to obtain suitable precision.
249       FRSQRT, FRCP,
250 
251       // AVX-512 reciprocal approximations with a little more precision.
252       RSQRT14, RSQRT14S, RCP14, RCP14S,
253 
254       // Thread Local Storage.
255       TLSADDR,
256 
257       // Thread Local Storage. A call to get the start address
258       // of the TLS block for the current module.
259       TLSBASEADDR,
260 
261       // Thread Local Storage.  When calling to an OS provided
262       // thunk at the address from an earlier relocation.
263       TLSCALL,
264 
265       // Exception Handling helpers.
266       EH_RETURN,
267 
268       // SjLj exception handling setjmp.
269       EH_SJLJ_SETJMP,
270 
271       // SjLj exception handling longjmp.
272       EH_SJLJ_LONGJMP,
273 
274       // SjLj exception handling dispatch.
275       EH_SJLJ_SETUP_DISPATCH,
276 
277       /// Tail call return. See X86TargetLowering::LowerCall for
278       /// the list of operands.
279       TC_RETURN,
280 
281       // Vector move to low scalar and zero higher vector elements.
282       VZEXT_MOVL,
283 
284       // Vector integer truncate.
285       VTRUNC,
286       // Vector integer truncate with unsigned/signed saturation.
287       VTRUNCUS, VTRUNCS,
288 
289       // Masked version of the above. Used when less than a 128-bit result is
290       // produced since the mask only applies to the lower elements and can't
291       // be represented by a select.
292       // SRC, PASSTHRU, MASK
293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
294 
295       // Vector FP extend.
296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
297 
298       // Vector FP round.
299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
300 
301       // Masked version of above. Used for v2f64->v4f32.
302       // SRC, PASSTHRU, MASK
303       VMFPROUND,
304 
305       // 128-bit vector logical left / right shift
306       VSHLDQ, VSRLDQ,
307 
308       // Vector shift elements
309       VSHL, VSRL, VSRA,
310 
311       // Vector variable shift
312       VSHLV, VSRLV, VSRAV,
313 
314       // Vector shift elements by immediate
315       VSHLI, VSRLI, VSRAI,
316 
317       // Shifts of mask registers.
318       KSHIFTL, KSHIFTR,
319 
320       // Bit rotate by immediate
321       VROTLI, VROTRI,
322 
323       // Vector packed double/float comparison.
324       CMPP,
325 
326       // Vector integer comparisons.
327       PCMPEQ, PCMPGT,
328 
329       // v8i16 Horizontal minimum and position.
330       PHMINPOS,
331 
332       MULTISHIFT,
333 
334       /// Vector comparison generating mask bits for fp and
335       /// integer signed and unsigned data types.
336       CMPM,
337       // Vector comparison with SAE for FP values
338       CMPM_SAE,
339 
340       // Arithmetic operations with FLAGS results.
341       ADD, SUB, ADC, SBB, SMUL, UMUL,
342       OR, XOR, AND,
343 
344       // Bit field extract.
345       BEXTR,
346 
347       // Zero High Bits Starting with Specified Bit Position.
348       BZHI,
349 
350       // X86-specific multiply by immediate.
351       MUL_IMM,
352 
353       // Vector sign bit extraction.
354       MOVMSK,
355 
356       // Vector bitwise comparisons.
357       PTEST,
358 
359       // Vector packed fp sign bitwise comparisons.
360       TESTP,
361 
362       // OR/AND test for masks.
363       KORTEST,
364       KTEST,
365 
366       // ADD for masks.
367       KADD,
368 
369       // Several flavors of instructions with vector shuffle behaviors.
370       // Saturated signed/unnsigned packing.
371       PACKSS,
372       PACKUS,
373       // Intra-lane alignr.
374       PALIGNR,
375       // AVX512 inter-lane alignr.
376       VALIGN,
377       PSHUFD,
378       PSHUFHW,
379       PSHUFLW,
380       SHUFP,
381       // VBMI2 Concat & Shift.
382       VSHLD,
383       VSHRD,
384       VSHLDV,
385       VSHRDV,
386       //Shuffle Packed Values at 128-bit granularity.
387       SHUF128,
388       MOVDDUP,
389       MOVSHDUP,
390       MOVSLDUP,
391       MOVLHPS,
392       MOVHLPS,
393       MOVSD,
394       MOVSS,
395       UNPCKL,
396       UNPCKH,
397       VPERMILPV,
398       VPERMILPI,
399       VPERMI,
400       VPERM2X128,
401 
402       // Variable Permute (VPERM).
403       // Res = VPERMV MaskV, V0
404       VPERMV,
405 
406       // 3-op Variable Permute (VPERMT2).
407       // Res = VPERMV3 V0, MaskV, V1
408       VPERMV3,
409 
410       // Bitwise ternary logic.
411       VPTERNLOG,
412       // Fix Up Special Packed Float32/64 values.
413       VFIXUPIMM, VFIXUPIMM_SAE,
414       VFIXUPIMMS, VFIXUPIMMS_SAE,
415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
421       // scaling part of the immediate.
422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
423       // Tests Types Of a FP Values for packed types.
424       VFPCLASS,
425       // Tests Types Of a FP Values for scalar types.
426       VFPCLASSS,
427 
428       // Broadcast (splat) scalar or element 0 of a vector. If the operand is
429       // a vector, this node may change the vector length as part of the splat.
430       VBROADCAST,
431       // Broadcast mask to vector.
432       VBROADCASTM,
433       // Broadcast subvector to vector.
434       SUBV_BROADCAST,
435 
436       /// SSE4A Extraction and Insertion.
437       EXTRQI, INSERTQI,
438 
439       // XOP arithmetic/logical shifts.
440       VPSHA, VPSHL,
441       // XOP signed/unsigned integer comparisons.
442       VPCOM, VPCOMU,
443       // XOP packed permute bytes.
444       VPPERM,
445       // XOP two source permutation.
446       VPERMIL2,
447 
448       // Vector multiply packed unsigned doubleword integers.
449       PMULUDQ,
450       // Vector multiply packed signed doubleword integers.
451       PMULDQ,
452       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
453       MULHRS,
454 
455       // Multiply and Add Packed Integers.
456       VPMADDUBSW, VPMADDWD,
457 
458       // AVX512IFMA multiply and add.
459       // NOTE: These are different than the instruction and perform
460       // op0 x op1 + op2.
461       VPMADD52L, VPMADD52H,
462 
463       // VNNI
464       VPDPBUSD,
465       VPDPBUSDS,
466       VPDPWSSD,
467       VPDPWSSDS,
468 
469       // FMA nodes.
470       // We use the target independent ISD::FMA for the non-inverted case.
471       FNMADD,
472       FMSUB,
473       FNMSUB,
474       FMADDSUB,
475       FMSUBADD,
476 
477       // FMA with rounding mode.
478       FMADD_RND,
479       FNMADD_RND,
480       FMSUB_RND,
481       FNMSUB_RND,
482       FMADDSUB_RND,
483       FMSUBADD_RND,
484 
485       // Compress and expand.
486       COMPRESS,
487       EXPAND,
488 
489       // Bits shuffle
490       VPSHUFBITQMB,
491 
492       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
493       SINT_TO_FP_RND, UINT_TO_FP_RND,
494       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
495       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
496 
497       // Vector float/double to signed/unsigned integer.
498       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
499       // Scalar float/double to signed/unsigned integer.
500       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
501 
502       // Vector float/double to signed/unsigned integer with truncation.
503       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
504       // Scalar float/double to signed/unsigned integer with truncation.
505       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
506 
507       // Vector signed/unsigned integer to float/double.
508       CVTSI2P, CVTUI2P,
509 
510       // Masked versions of above. Used for v2f64->v4f32.
511       // SRC, PASSTHRU, MASK
512       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
513       MCVTSI2P, MCVTUI2P,
514 
515       // Vector float to bfloat16.
516       // Convert TWO packed single data to one packed BF16 data
517       CVTNE2PS2BF16,
518       // Convert packed single data to packed BF16 data
519       CVTNEPS2BF16,
520       // Masked version of above.
521       // SRC, PASSTHRU, MASK
522       MCVTNEPS2BF16,
523 
524       // Dot product of BF16 pairs to accumulated into
525       // packed single precision.
526       DPBF16PS,
527 
528       // Save xmm argument registers to the stack, according to %al. An operator
529       // is needed so that this can be expanded with control flow.
530       VASTART_SAVE_XMM_REGS,
531 
532       // Windows's _chkstk call to do stack probing.
533       WIN_ALLOCA,
534 
535       // For allocating variable amounts of stack space when using
536       // segmented stacks. Check if the current stacklet has enough space, and
537       // falls back to heap allocation if not.
538       SEG_ALLOCA,
539 
540       // Memory barriers.
541       MEMBARRIER,
542       MFENCE,
543 
544       // Store FP status word into i16 register.
545       FNSTSW16r,
546 
547       // Store contents of %ah into %eflags.
548       SAHF,
549 
550       // Get a random integer and indicate whether it is valid in CF.
551       RDRAND,
552 
553       // Get a NIST SP800-90B & C compliant random integer and
554       // indicate whether it is valid in CF.
555       RDSEED,
556 
557       // Protection keys
558       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
559       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
560       // value for ECX.
561       RDPKRU, WRPKRU,
562 
563       // SSE42 string comparisons.
564       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
565       // will emit one or two instructions based on which results are used. If
566       // flags and index/mask this allows us to use a single instruction since
567       // we won't have to pick and opcode for flags. Instead we can rely on the
568       // DAG to CSE everything and decide at isel.
569       PCMPISTR,
570       PCMPESTR,
571 
572       // Test if in transactional execution.
573       XTEST,
574 
575       // ERI instructions.
576       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
577       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
578 
579       // Conversions between float and half-float.
580       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
581 
582       // Masked version of above.
583       // SRC, RND, PASSTHRU, MASK
584       MCVTPS2PH,
585 
586       // Galois Field Arithmetic Instructions
587       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
588 
589       // LWP insert record.
590       LWPINS,
591 
592       // User level wait
593       UMWAIT, TPAUSE,
594 
595       // Enqueue Stores Instructions
596       ENQCMD, ENQCMDS,
597 
598       // For avx512-vp2intersect
599       VP2INTERSECT,
600 
601       // Compare and swap.
602       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
603       LCMPXCHG8_DAG,
604       LCMPXCHG16_DAG,
605       LCMPXCHG8_SAVE_EBX_DAG,
606       LCMPXCHG16_SAVE_RBX_DAG,
607 
608       /// LOCK-prefixed arithmetic read-modify-write instructions.
609       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
610       LADD, LSUB, LOR, LXOR, LAND,
611 
612       // Load, scalar_to_vector, and zero extend.
613       VZEXT_LOAD,
614 
615       // extract_vector_elt, store.
616       VEXTRACT_STORE,
617 
618       // scalar broadcast from memory
619       VBROADCAST_LOAD,
620 
621       // Store FP control world into i16 memory.
622       FNSTCW16m,
623 
624       /// This instruction implements FP_TO_SINT with the
625       /// integer destination in memory and a FP reg source.  This corresponds
626       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
627       /// has two inputs (token chain and address) and two outputs (int value
628       /// and token chain). Memory VT specifies the type to store to.
629       FP_TO_INT_IN_MEM,
630 
631       /// This instruction implements SINT_TO_FP with the
632       /// integer source in memory and FP reg result.  This corresponds to the
633       /// X86::FILD*m instructions. It has two inputs (token chain and address)
634       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
635       /// flag). The integer source type is specified by the memory VT.
636       FILD,
637       FILD_FLAG,
638 
639       /// This instruction implements a fp->int store from FP stack
640       /// slots. This corresponds to the fist instruction. It takes a
641       /// chain operand, value to store, address, and glue. The memory VT
642       /// specifies the type to store as.
643       FIST,
644 
645       /// This instruction implements an extending load to FP stack slots.
646       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
647       /// operand, and ptr to load from. The memory VT specifies the type to
648       /// load from.
649       FLD,
650 
651       /// This instruction implements a truncating store from FP stack
652       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
653       /// chain operand, value to store, address, and glue. The memory VT
654       /// specifies the type to store as.
655       FST,
656 
657       /// This instruction grabs the address of the next argument
658       /// from a va_list. (reads and modifies the va_list in memory)
659       VAARG_64,
660 
661       // Vector truncating store with unsigned/signed saturation
662       VTRUNCSTOREUS, VTRUNCSTORES,
663       // Vector truncating masked store with unsigned/signed saturation
664       VMTRUNCSTOREUS, VMTRUNCSTORES,
665 
666       // X86 specific gather and scatter
667       MGATHER, MSCATTER,
668 
669       // WARNING: Do not add anything in the end unless you want the node to
670       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
671       // opcodes will be thought as target memory ops!
672     };
673   } // end namespace X86ISD
674 
675   /// Define some predicates that are used for node matching.
676   namespace X86 {
677     /// Returns true if Elt is a constant zero or floating point constant +0.0.
678     bool isZeroNode(SDValue Elt);
679 
680     /// Returns true of the given offset can be
681     /// fit into displacement field of the instruction.
682     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
683                                       bool hasSymbolicDisplacement = true);
684 
685     /// Determines whether the callee is required to pop its
686     /// own arguments. Callee pop is necessary to support tail calls.
687     bool isCalleePop(CallingConv::ID CallingConv,
688                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
689 
690     /// If Op is a constant whose elements are all the same constant or
691     /// undefined, return true and return the constant value in \p SplatVal.
692     bool isConstantSplat(SDValue Op, APInt &SplatVal);
693   } // end namespace X86
694 
695   //===--------------------------------------------------------------------===//
696   //  X86 Implementation of the TargetLowering interface
697   class X86TargetLowering final : public TargetLowering {
698   public:
699     explicit X86TargetLowering(const X86TargetMachine &TM,
700                                const X86Subtarget &STI);
701 
702     unsigned getJumpTableEncoding() const override;
703     bool useSoftFloat() const override;
704 
705     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
706                                ArgListTy &Args) const override;
707 
708     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
709       return MVT::i8;
710     }
711 
712     const MCExpr *
713     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
714                               const MachineBasicBlock *MBB, unsigned uid,
715                               MCContext &Ctx) const override;
716 
717     /// Returns relocation base for the given PIC jumptable.
718     SDValue getPICJumpTableRelocBase(SDValue Table,
719                                      SelectionDAG &DAG) const override;
720     const MCExpr *
721     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
722                                  unsigned JTI, MCContext &Ctx) const override;
723 
724     /// Return the desired alignment for ByVal aggregate
725     /// function arguments in the caller parameter area. For X86, aggregates
726     /// that contains are placed at 16-byte boundaries while the rest are at
727     /// 4-byte boundaries.
728     unsigned getByValTypeAlignment(Type *Ty,
729                                    const DataLayout &DL) const override;
730 
731     /// Returns the target specific optimal type for load
732     /// and store operations as a result of memset, memcpy, and memmove
733     /// lowering. If DstAlign is zero that means it's safe to destination
734     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
735     /// means there isn't a need to check it against alignment requirement,
736     /// probably because the source does not need to be loaded. If 'IsMemset' is
737     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
738     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
739     /// source is constant so it does not need to be loaded.
740     /// It returns EVT::Other if the type should be determined using generic
741     /// target-independent logic.
742     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
743                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
744                             const AttributeList &FuncAttributes) const override;
745 
746     /// Returns true if it's safe to use load / store of the
747     /// specified type to expand memcpy / memset inline. This is mostly true
748     /// for all types except for some special cases. For example, on X86
749     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
750     /// also does type conversion. Note the specified type doesn't have to be
751     /// legal as the hook is used before type legalization.
752     bool isSafeMemOpType(MVT VT) const override;
753 
754     /// Returns true if the target allows unaligned memory accesses of the
755     /// specified type. Returns whether it is "fast" in the last argument.
756     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
757                                         MachineMemOperand::Flags Flags,
758                                         bool *Fast) const override;
759 
760     /// Provide custom lowering hooks for some operations.
761     ///
762     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
763 
764     /// Places new result values for the node in Results (their number
765     /// and types must exactly match those of the original return values of
766     /// the node), or leaves Results empty, which indicates that the node is not
767     /// to be custom lowered after all.
768     void LowerOperationWrapper(SDNode *N,
769                                SmallVectorImpl<SDValue> &Results,
770                                SelectionDAG &DAG) const override;
771 
772     /// Replace the results of node with an illegal result
773     /// type with new values built out of custom code.
774     ///
775     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
776                             SelectionDAG &DAG) const override;
777 
778     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
779 
780     // Return true if it is profitable to combine a BUILD_VECTOR with a
781     // stride-pattern to a shuffle and a truncate.
782     // Example of such a combine:
783     // v4i32 build_vector((extract_elt V, 1),
784     //                    (extract_elt V, 3),
785     //                    (extract_elt V, 5),
786     //                    (extract_elt V, 7))
787     //  -->
788     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
789     // v4i64)
790     bool isDesirableToCombineBuildVectorToShuffleTruncate(
791         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
792 
793     /// Return true if the target has native support for
794     /// the specified value type and it is 'desirable' to use the type for the
795     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
796     /// instruction encodings are longer and some i16 instructions are slow.
797     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
798 
799     /// Return true if the target has native support for the
800     /// specified value type and it is 'desirable' to use the type. e.g. On x86
801     /// i16 is legal, but undesirable since i16 instruction encodings are longer
802     /// and some i16 instructions are slow.
803     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
804 
805     /// Return 1 if we can compute the negated form of the specified expression
806     /// for the same cost as the expression itself, or 2 if we can compute the
807     /// negated form more cheaply than the expression itself. Else return 0.
808     char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
809                             bool ForCodeSize, unsigned Depth) const override;
810 
811     /// If isNegatibleForFree returns true, return the newly negated expression.
812     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
813                                  bool LegalOperations, bool ForCodeSize,
814                                  unsigned Depth) const override;
815 
816     MachineBasicBlock *
817     EmitInstrWithCustomInserter(MachineInstr &MI,
818                                 MachineBasicBlock *MBB) const override;
819 
820     /// This method returns the name of a target specific DAG node.
821     const char *getTargetNodeName(unsigned Opcode) const override;
822 
823     /// Do not merge vector stores after legalization because that may conflict
824     /// with x86-specific store splitting optimizations.
825     bool mergeStoresAfterLegalization(EVT MemVT) const override {
826       return !MemVT.isVector();
827     }
828 
829     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
830                           const SelectionDAG &DAG) const override;
831 
832     bool isCheapToSpeculateCttz() const override;
833 
834     bool isCheapToSpeculateCtlz() const override;
835 
836     bool isCtlzFast() const override;
837 
838     bool hasBitPreservingFPLogic(EVT VT) const override {
839       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
840     }
841 
842     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
843       // If the pair to store is a mixture of float and int values, we will
844       // save two bitwise instructions and one float-to-int instruction and
845       // increase one store instruction. There is potentially a more
846       // significant benefit because it avoids the float->int domain switch
847       // for input value. So It is more likely a win.
848       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
849           (LTy.isInteger() && HTy.isFloatingPoint()))
850         return true;
851       // If the pair only contains int values, we will save two bitwise
852       // instructions and increase one store instruction (costing one more
853       // store buffer). Since the benefit is more blurred so we leave
854       // such pair out until we get testcase to prove it is a win.
855       return false;
856     }
857 
858     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
859 
860     bool hasAndNotCompare(SDValue Y) const override;
861 
862     bool hasAndNot(SDValue Y) const override;
863 
864     bool hasBitTest(SDValue X, SDValue Y) const override;
865 
866     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
867         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
868         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
869         SelectionDAG &DAG) const override;
870 
871     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
872                                            CombineLevel Level) const override;
873 
874     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
875 
876     bool
877     shouldTransformSignedTruncationCheck(EVT XVT,
878                                          unsigned KeptBits) const override {
879       // For vectors, we don't have a preference..
880       if (XVT.isVector())
881         return false;
882 
883       auto VTIsOk = [](EVT VT) -> bool {
884         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
885                VT == MVT::i64;
886       };
887 
888       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
889       // XVT will be larger than KeptBitsVT.
890       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
891       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
892     }
893 
894     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
895 
896     bool shouldSplatInsEltVarIndex(EVT VT) const override;
897 
898     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
899       return VT.isScalarInteger();
900     }
901 
902     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
903     MVT hasFastEqualityCompare(unsigned NumBits) const override;
904 
905     /// Return the value type to use for ISD::SETCC.
906     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
907                            EVT VT) const override;
908 
909     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
910                                       TargetLoweringOpt &TLO) const override;
911 
912     /// Determine which of the bits specified in Mask are known to be either
913     /// zero or one and return them in the KnownZero/KnownOne bitsets.
914     void computeKnownBitsForTargetNode(const SDValue Op,
915                                        KnownBits &Known,
916                                        const APInt &DemandedElts,
917                                        const SelectionDAG &DAG,
918                                        unsigned Depth = 0) const override;
919 
920     /// Determine the number of bits in the operation that are sign bits.
921     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
922                                              const APInt &DemandedElts,
923                                              const SelectionDAG &DAG,
924                                              unsigned Depth) const override;
925 
926     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
927                                                  const APInt &DemandedElts,
928                                                  APInt &KnownUndef,
929                                                  APInt &KnownZero,
930                                                  TargetLoweringOpt &TLO,
931                                                  unsigned Depth) const override;
932 
933     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
934                                            const APInt &DemandedBits,
935                                            const APInt &DemandedElts,
936                                            KnownBits &Known,
937                                            TargetLoweringOpt &TLO,
938                                            unsigned Depth) const override;
939 
940     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
941         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
942         SelectionDAG &DAG, unsigned Depth) const override;
943 
944     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
945 
946     SDValue unwrapAddress(SDValue N) const override;
947 
948     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
949 
950     bool ExpandInlineAsm(CallInst *CI) const override;
951 
952     ConstraintType getConstraintType(StringRef Constraint) const override;
953 
954     /// Examine constraint string and operand type and determine a weight value.
955     /// The operand object must already have been set up with the operand type.
956     ConstraintWeight
957       getSingleConstraintMatchWeight(AsmOperandInfo &info,
958                                      const char *constraint) const override;
959 
960     const char *LowerXConstraint(EVT ConstraintVT) const override;
961 
962     /// Lower the specified operand into the Ops vector. If it is invalid, don't
963     /// add anything to Ops. If hasMemory is true it means one of the asm
964     /// constraint of the inline asm instruction being processed is 'm'.
965     void LowerAsmOperandForConstraint(SDValue Op,
966                                       std::string &Constraint,
967                                       std::vector<SDValue> &Ops,
968                                       SelectionDAG &DAG) const override;
969 
970     unsigned
971     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
972       if (ConstraintCode == "i")
973         return InlineAsm::Constraint_i;
974       else if (ConstraintCode == "o")
975         return InlineAsm::Constraint_o;
976       else if (ConstraintCode == "v")
977         return InlineAsm::Constraint_v;
978       else if (ConstraintCode == "X")
979         return InlineAsm::Constraint_X;
980       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
981     }
982 
983     /// Handle Lowering flag assembly outputs.
984     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
985                                         const AsmOperandInfo &Constraint,
986                                         SelectionDAG &DAG) const override;
987 
988     /// Given a physical register constraint
989     /// (e.g. {edx}), return the register number and the register class for the
990     /// register.  This should only be used for C_Register constraints.  On
991     /// error, this returns a register number of 0.
992     std::pair<unsigned, const TargetRegisterClass *>
993     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
994                                  StringRef Constraint, MVT VT) const override;
995 
996     /// Return true if the addressing mode represented
997     /// by AM is legal for this target, for a load/store of the specified type.
998     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
999                                Type *Ty, unsigned AS,
1000                                Instruction *I = nullptr) const override;
1001 
1002     /// Return true if the specified immediate is legal
1003     /// icmp immediate, that is the target has icmp instructions which can
1004     /// compare a register against the immediate without having to materialize
1005     /// the immediate into a register.
1006     bool isLegalICmpImmediate(int64_t Imm) const override;
1007 
1008     /// Return true if the specified immediate is legal
1009     /// add immediate, that is the target has add instructions which can
1010     /// add a register and the immediate without having to materialize
1011     /// the immediate into a register.
1012     bool isLegalAddImmediate(int64_t Imm) const override;
1013 
1014     bool isLegalStoreImmediate(int64_t Imm) const override;
1015 
1016     /// Return the cost of the scaling factor used in the addressing
1017     /// mode represented by AM for this target, for a load/store
1018     /// of the specified type.
1019     /// If the AM is supported, the return value must be >= 0.
1020     /// If the AM is not supported, it returns a negative value.
1021     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1022                              unsigned AS) const override;
1023 
1024     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1025 
1026     /// Add x86-specific opcodes to the default list.
1027     bool isBinOp(unsigned Opcode) const override;
1028 
1029     /// Returns true if the opcode is a commutative binary operation.
1030     bool isCommutativeBinOp(unsigned Opcode) const override;
1031 
1032     /// Return true if it's free to truncate a value of
1033     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1034     /// register EAX to i16 by referencing its sub-register AX.
1035     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1036     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1037 
1038     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1039 
1040     /// Return true if any actual instruction that defines a
1041     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1042     /// register. This does not necessarily include registers defined in
1043     /// unknown ways, such as incoming arguments, or copies from unknown
1044     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1045     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1046     /// all instructions that define 32-bit values implicit zero-extend the
1047     /// result out to 64 bits.
1048     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1049     bool isZExtFree(EVT VT1, EVT VT2) const override;
1050     bool isZExtFree(SDValue Val, EVT VT2) const override;
1051 
1052     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1053     /// extend node) is profitable.
1054     bool isVectorLoadExtDesirable(SDValue) const override;
1055 
1056     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1057     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1058     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1059     bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
1060 
1061     /// Return true if it's profitable to narrow
1062     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1063     /// from i32 to i8 but not from i32 to i16.
1064     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1065 
1066     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1067     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1068     /// true and stores the intrinsic information into the IntrinsicInfo that was
1069     /// passed to the function.
1070     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1071                             MachineFunction &MF,
1072                             unsigned Intrinsic) const override;
1073 
1074     /// Returns true if the target can instruction select the
1075     /// specified FP immediate natively. If false, the legalizer will
1076     /// materialize the FP immediate as a load from a constant pool.
1077     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1078                       bool ForCodeSize) const override;
1079 
1080     /// Targets can use this to indicate that they only support *some*
1081     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1082     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1083     /// be legal.
1084     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1085 
1086     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1087     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1088     /// constant pool entry.
1089     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1090 
1091     /// Returns true if lowering to a jump table is allowed.
1092     bool areJTsAllowed(const Function *Fn) const override;
1093 
1094     /// If true, then instruction selection should
1095     /// seek to shrink the FP constant of the specified type to a smaller type
1096     /// in order to save space and / or reduce runtime.
1097     bool ShouldShrinkFPConstant(EVT VT) const override {
1098       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1099       // expensive than a straight movsd. On the other hand, it's important to
1100       // shrink long double fp constant since fldt is very slow.
1101       return !X86ScalarSSEf64 || VT == MVT::f80;
1102     }
1103 
1104     /// Return true if we believe it is correct and profitable to reduce the
1105     /// load node to a smaller type.
1106     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1107                                EVT NewVT) const override;
1108 
1109     /// Return true if the specified scalar FP type is computed in an SSE
1110     /// register, not on the X87 floating point stack.
1111     bool isScalarFPTypeInSSEReg(EVT VT) const {
1112       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1113              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1114     }
1115 
1116     /// Returns true if it is beneficial to convert a load of a constant
1117     /// to just the constant itself.
1118     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1119                                            Type *Ty) const override;
1120 
1121     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1122 
1123     bool convertSelectOfConstantsToMath(EVT VT) const override;
1124 
1125     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1126                                 SDValue C) const override;
1127 
1128     bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1129                                   bool IsSigned) const override;
1130 
1131     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1132     /// with this index.
1133     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1134                                  unsigned Index) const override;
1135 
1136     /// Scalar ops always have equal or better analysis/performance/power than
1137     /// the vector equivalent, so this always makes sense if the scalar op is
1138     /// supported.
1139     bool shouldScalarizeBinop(SDValue) const override;
1140 
1141     /// Extract of a scalar FP value from index 0 of a vector is free.
1142     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1143       EVT EltVT = VT.getScalarType();
1144       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1145     }
1146 
1147     /// Overflow nodes should get combined/lowered to optimal instructions
1148     /// (they should allow eliminating explicit compares by getting flags from
1149     /// math ops).
1150     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1151 
1152     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1153                                       unsigned AddrSpace) const override {
1154       // If we can replace more than 2 scalar stores, there will be a reduction
1155       // in instructions even after we add a vector constant load.
1156       return NumElem > 2;
1157     }
1158 
1159     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1160                                  const SelectionDAG &DAG,
1161                                  const MachineMemOperand &MMO) const override;
1162 
1163     /// Intel processors have a unified instruction and data cache
1164     const char * getClearCacheBuiltinName() const override {
1165       return nullptr; // nothing to do, move along.
1166     }
1167 
1168     Register getRegisterByName(const char* RegName, EVT VT,
1169                                const MachineFunction &MF) const override;
1170 
1171     /// If a physical register, this returns the register that receives the
1172     /// exception address on entry to an EH pad.
1173     unsigned
1174     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1175 
1176     /// If a physical register, this returns the register that receives the
1177     /// exception typeid on entry to a landing pad.
1178     unsigned
1179     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1180 
1181     virtual bool needsFixedCatchObjects() const override;
1182 
1183     /// This method returns a target specific FastISel object,
1184     /// or null if the target does not support "fast" ISel.
1185     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1186                              const TargetLibraryInfo *libInfo) const override;
1187 
1188     /// If the target has a standard location for the stack protector cookie,
1189     /// returns the address of that location. Otherwise, returns nullptr.
1190     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1191 
1192     bool useLoadStackGuardNode() const override;
1193     bool useStackGuardXorFP() const override;
1194     void insertSSPDeclarations(Module &M) const override;
1195     Value *getSDagStackGuard(const Module &M) const override;
1196     Function *getSSPStackGuardCheck(const Module &M) const override;
1197     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1198                                 const SDLoc &DL) const override;
1199 
1200 
1201     /// Return true if the target stores SafeStack pointer at a fixed offset in
1202     /// some non-standard address space, and populates the address space and
1203     /// offset as appropriate.
1204     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1205 
1206     SDValue BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, SDValue StackSlot,
1207                       SelectionDAG &DAG) const;
1208 
1209     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1210 
1211     /// Customize the preferred legalization strategy for certain types.
1212     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1213 
1214     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1215                                       EVT VT) const override;
1216 
1217     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1218                                            CallingConv::ID CC,
1219                                            EVT VT) const override;
1220 
1221     unsigned getVectorTypeBreakdownForCallingConv(
1222         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1223         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1224 
1225     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1226 
1227     bool supportSwiftError() const override;
1228 
1229     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1230 
1231     unsigned getStackProbeSize(MachineFunction &MF) const;
1232 
1233     bool hasVectorBlend() const override { return true; }
1234 
1235     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1236 
1237     /// Lower interleaved load(s) into target specific
1238     /// instructions/intrinsics.
1239     bool lowerInterleavedLoad(LoadInst *LI,
1240                               ArrayRef<ShuffleVectorInst *> Shuffles,
1241                               ArrayRef<unsigned> Indices,
1242                               unsigned Factor) const override;
1243 
1244     /// Lower interleaved store(s) into target specific
1245     /// instructions/intrinsics.
1246     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1247                                unsigned Factor) const override;
1248 
1249     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1250                                    SDValue Addr, SelectionDAG &DAG)
1251                                    const override;
1252 
1253   protected:
1254     std::pair<const TargetRegisterClass *, uint8_t>
1255     findRepresentativeClass(const TargetRegisterInfo *TRI,
1256                             MVT VT) const override;
1257 
1258   private:
1259     /// Keep a reference to the X86Subtarget around so that we can
1260     /// make the right decision when generating code for different targets.
1261     const X86Subtarget &Subtarget;
1262 
1263     /// Select between SSE or x87 floating point ops.
1264     /// When SSE is available, use it for f32 operations.
1265     /// When SSE2 is available, use it for f64 operations.
1266     bool X86ScalarSSEf32;
1267     bool X86ScalarSSEf64;
1268 
1269     /// A list of legal FP immediates.
1270     std::vector<APFloat> LegalFPImmediates;
1271 
1272     /// Indicate that this x86 target can instruction
1273     /// select the specified FP immediate natively.
1274     void addLegalFPImmediate(const APFloat& Imm) {
1275       LegalFPImmediates.push_back(Imm);
1276     }
1277 
1278     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1279                             CallingConv::ID CallConv, bool isVarArg,
1280                             const SmallVectorImpl<ISD::InputArg> &Ins,
1281                             const SDLoc &dl, SelectionDAG &DAG,
1282                             SmallVectorImpl<SDValue> &InVals,
1283                             uint32_t *RegMask) const;
1284     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1285                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1286                              const SDLoc &dl, SelectionDAG &DAG,
1287                              const CCValAssign &VA, MachineFrameInfo &MFI,
1288                              unsigned i) const;
1289     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1290                              const SDLoc &dl, SelectionDAG &DAG,
1291                              const CCValAssign &VA,
1292                              ISD::ArgFlagsTy Flags) const;
1293 
1294     // Call lowering helpers.
1295 
1296     /// Check whether the call is eligible for tail call optimization. Targets
1297     /// that want to do tail call optimization should implement this function.
1298     bool IsEligibleForTailCallOptimization(SDValue Callee,
1299                                            CallingConv::ID CalleeCC,
1300                                            bool isVarArg,
1301                                            bool isCalleeStructRet,
1302                                            bool isCallerStructRet,
1303                                            Type *RetTy,
1304                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1305                                     const SmallVectorImpl<SDValue> &OutVals,
1306                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1307                                            SelectionDAG& DAG) const;
1308     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1309                                     SDValue Chain, bool IsTailCall,
1310                                     bool Is64Bit, int FPDiff,
1311                                     const SDLoc &dl) const;
1312 
1313     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1314                                          SelectionDAG &DAG) const;
1315 
1316     unsigned getAddressSpace(void) const;
1317 
1318     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned) const;
1319 
1320     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1321     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1322     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1323     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1324 
1325     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1326                                   const unsigned char OpFlags = 0) const;
1327     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1328     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1329     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1330     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1331     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1332 
1333     /// Creates target global address or external symbol nodes for calls or
1334     /// other uses.
1335     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1336                                   bool ForCall) const;
1337 
1338     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1339     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1340     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1341     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1342     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1343     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1344     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1345     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1346     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1347     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1348     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1349     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1350     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1351     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1352     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1353     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1354     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1355     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1356     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1357     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1358     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1359     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1360     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1361     SDValue LowerGC_TRANSITION_START(SDValue Op, SelectionDAG &DAG) const;
1362     SDValue LowerGC_TRANSITION_END(SDValue Op, SelectionDAG &DAG) const;
1363     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1364     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1365     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1366     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1367 
1368     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1369                           RTLIB::Libcall Call) const;
1370 
1371     SDValue
1372     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1373                          const SmallVectorImpl<ISD::InputArg> &Ins,
1374                          const SDLoc &dl, SelectionDAG &DAG,
1375                          SmallVectorImpl<SDValue> &InVals) const override;
1376     SDValue LowerCall(CallLoweringInfo &CLI,
1377                       SmallVectorImpl<SDValue> &InVals) const override;
1378 
1379     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1380                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1381                         const SmallVectorImpl<SDValue> &OutVals,
1382                         const SDLoc &dl, SelectionDAG &DAG) const override;
1383 
1384     bool supportSplitCSR(MachineFunction *MF) const override {
1385       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1386           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1387     }
1388     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1389     void insertCopiesSplitCSR(
1390       MachineBasicBlock *Entry,
1391       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1392 
1393     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1394 
1395     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1396 
1397     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1398                             ISD::NodeType ExtendKind) const override;
1399 
1400     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1401                         bool isVarArg,
1402                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1403                         LLVMContext &Context) const override;
1404 
1405     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1406 
1407     TargetLoweringBase::AtomicExpansionKind
1408     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1409     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1410     TargetLoweringBase::AtomicExpansionKind
1411     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1412 
1413     LoadInst *
1414     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1415 
1416     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1417     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1418 
1419     bool needsCmpXchgNb(Type *MemType) const;
1420 
1421     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1422                                 MachineBasicBlock *DispatchBB, int FI) const;
1423 
1424     // Utility function to emit the low-level va_arg code for X86-64.
1425     MachineBasicBlock *
1426     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1427                                   MachineBasicBlock *MBB) const;
1428 
1429     /// Utility function to emit the xmm reg save portion of va_start.
1430     MachineBasicBlock *
1431     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1432                                              MachineBasicBlock *BB) const;
1433 
1434     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1435                                                  MachineInstr &MI2,
1436                                                  MachineBasicBlock *BB) const;
1437 
1438     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1439                                          MachineBasicBlock *BB) const;
1440 
1441     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1442                                            MachineBasicBlock *BB) const;
1443 
1444     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1445                                            MachineBasicBlock *BB) const;
1446 
1447     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1448                                            MachineBasicBlock *BB) const;
1449 
1450     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1451                                             MachineBasicBlock *BB) const;
1452 
1453     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1454                                           MachineBasicBlock *BB) const;
1455 
1456     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1457                                           MachineBasicBlock *BB) const;
1458 
1459     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1460                                             MachineBasicBlock *BB) const;
1461 
1462     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1463                                         MachineBasicBlock *MBB) const;
1464 
1465     void emitSetJmpShadowStackFix(MachineInstr &MI,
1466                                   MachineBasicBlock *MBB) const;
1467 
1468     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1469                                          MachineBasicBlock *MBB) const;
1470 
1471     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1472                                                  MachineBasicBlock *MBB) const;
1473 
1474     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1475                                      MachineBasicBlock *MBB) const;
1476 
1477     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1478                                              MachineBasicBlock *MBB) const;
1479 
1480     /// Emit nodes that will be selected as "cmp Op0,Op1", or something
1481     /// equivalent, for use with the given x86 condition code.
1482     SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, const SDLoc &dl,
1483                     SelectionDAG &DAG) const;
1484 
1485     /// Convert a comparison if required by the subtarget.
1486     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1487 
1488     /// Emit flags for the given setcc condition and operands. Also returns the
1489     /// corresponding X86 condition code constant in X86CC.
1490     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1,
1491                               ISD::CondCode CC, const SDLoc &dl,
1492                               SelectionDAG &DAG,
1493                               SDValue &X86CC) const;
1494 
1495     /// Check if replacement of SQRT with RSQRT should be disabled.
1496     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1497 
1498     /// Use rsqrt* to speed up sqrt calculations.
1499     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1500                             int &RefinementSteps, bool &UseOneConstNR,
1501                             bool Reciprocal) const override;
1502 
1503     /// Use rcp* to speed up fdiv calculations.
1504     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1505                              int &RefinementSteps) const override;
1506 
1507     /// Reassociate floating point divisions into multiply by reciprocal.
1508     unsigned combineRepeatedFPDivisors() const override;
1509 
1510     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1511                           SmallVectorImpl<SDNode *> &Created) const override;
1512   };
1513 
1514   namespace X86 {
1515     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1516                              const TargetLibraryInfo *libInfo);
1517   } // end namespace X86
1518 
1519   // Base class for all X86 non-masked store operations.
1520   class X86StoreSDNode : public MemSDNode {
1521   public:
1522     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1523                    SDVTList VTs, EVT MemVT,
1524                    MachineMemOperand *MMO)
1525       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1526     const SDValue &getValue() const { return getOperand(1); }
1527     const SDValue &getBasePtr() const { return getOperand(2); }
1528 
1529     static bool classof(const SDNode *N) {
1530       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1531         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1532     }
1533   };
1534 
1535   // Base class for all X86 masked store operations.
1536   // The class has the same order of operands as MaskedStoreSDNode for
1537   // convenience.
1538   class X86MaskedStoreSDNode : public MemSDNode {
1539   public:
1540     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1541                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1542                          MachineMemOperand *MMO)
1543       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1544 
1545     const SDValue &getValue()   const { return getOperand(1); }
1546     const SDValue &getBasePtr() const { return getOperand(2); }
1547     const SDValue &getMask()    const { return getOperand(3); }
1548 
1549     static bool classof(const SDNode *N) {
1550       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1551         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1552     }
1553   };
1554 
1555   // X86 Truncating Store with Signed saturation.
1556   class TruncSStoreSDNode : public X86StoreSDNode {
1557   public:
1558     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1559                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1560       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1561 
1562     static bool classof(const SDNode *N) {
1563       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1564     }
1565   };
1566 
1567   // X86 Truncating Store with Unsigned saturation.
1568   class TruncUSStoreSDNode : public X86StoreSDNode {
1569   public:
1570     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1571                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1572       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1573 
1574     static bool classof(const SDNode *N) {
1575       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1576     }
1577   };
1578 
1579   // X86 Truncating Masked Store with Signed saturation.
1580   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1581   public:
1582     MaskedTruncSStoreSDNode(unsigned Order,
1583                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1584                          MachineMemOperand *MMO)
1585       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1586 
1587     static bool classof(const SDNode *N) {
1588       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1589     }
1590   };
1591 
1592   // X86 Truncating Masked Store with Unsigned saturation.
1593   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1594   public:
1595     MaskedTruncUSStoreSDNode(unsigned Order,
1596                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1597                             MachineMemOperand *MMO)
1598       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1599 
1600     static bool classof(const SDNode *N) {
1601       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1602     }
1603   };
1604 
1605   // X86 specific Gather/Scatter nodes.
1606   // The class has the same order of operands as MaskedGatherScatterSDNode for
1607   // convenience.
1608   class X86MaskedGatherScatterSDNode : public MemSDNode {
1609   public:
1610     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1611                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1612                                  MachineMemOperand *MMO)
1613         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1614 
1615     const SDValue &getBasePtr() const { return getOperand(3); }
1616     const SDValue &getIndex()   const { return getOperand(4); }
1617     const SDValue &getMask()    const { return getOperand(2); }
1618     const SDValue &getScale()   const { return getOperand(5); }
1619 
1620     static bool classof(const SDNode *N) {
1621       return N->getOpcode() == X86ISD::MGATHER ||
1622              N->getOpcode() == X86ISD::MSCATTER;
1623     }
1624   };
1625 
1626   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1627   public:
1628     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1629                           EVT MemVT, MachineMemOperand *MMO)
1630         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1631                                        MMO) {}
1632 
1633     const SDValue &getPassThru() const { return getOperand(1); }
1634 
1635     static bool classof(const SDNode *N) {
1636       return N->getOpcode() == X86ISD::MGATHER;
1637     }
1638   };
1639 
1640   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1641   public:
1642     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1643                            EVT MemVT, MachineMemOperand *MMO)
1644         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1645                                        MMO) {}
1646 
1647     const SDValue &getValue() const { return getOperand(1); }
1648 
1649     static bool classof(const SDNode *N) {
1650       return N->getOpcode() == X86ISD::MSCATTER;
1651     }
1652   };
1653 
1654   /// Generate unpacklo/unpackhi shuffle mask.
1655   template <typename T = int>
1656   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1657                                bool Unary) {
1658     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1659     int NumElts = VT.getVectorNumElements();
1660     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1661     for (int i = 0; i < NumElts; ++i) {
1662       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1663       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1664       Pos += (Unary ? 0 : NumElts * (i % 2));
1665       Pos += (Lo ? 0 : NumEltsInLane / 2);
1666       Mask.push_back(Pos);
1667     }
1668   }
1669 
1670   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1671   /// mask index with the scaled sequential indices for an equivalent narrowed
1672   /// mask. This is the reverse process to canWidenShuffleElements, but can
1673   /// always succeed.
1674   template <typename T>
1675   void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
1676                         SmallVectorImpl<T> &ScaledMask) {
1677     assert(0 < Scale && "Unexpected scaling factor");
1678     size_t NumElts = Mask.size();
1679     ScaledMask.assign(NumElts * Scale, -1);
1680 
1681     for (size_t i = 0; i != NumElts; ++i) {
1682       int M = Mask[i];
1683 
1684       // Repeat sentinel values in every mask element.
1685       if (M < 0) {
1686         for (size_t s = 0; s != Scale; ++s)
1687           ScaledMask[(Scale * i) + s] = M;
1688         continue;
1689       }
1690 
1691       // Scale mask element and increment across each mask element.
1692       for (size_t s = 0; s != Scale; ++s)
1693         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1694     }
1695   }
1696 } // end namespace llvm
1697 
1698 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1699