xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/X86ISelLowering.h (revision 6f63e88c0166ed3e5f2805a9e667c7d24d304cf1)
1 //===-- X86ISelLowering.h - X86 DAG Lowering Interface ----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the interfaces that X86 uses to lower LLVM code into a
10 // selection DAG.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
15 #define LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
16 
17 #include "llvm/CodeGen/CallingConvLower.h"
18 #include "llvm/CodeGen/SelectionDAG.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 
21 namespace llvm {
22   class X86Subtarget;
23   class X86TargetMachine;
24 
25   namespace X86ISD {
26     // X86 Specific DAG Nodes
27     enum NodeType : unsigned {
28       // Start the numbering where the builtin ops leave off.
29       FIRST_NUMBER = ISD::BUILTIN_OP_END,
30 
31       /// Bit scan forward.
32       BSF,
33       /// Bit scan reverse.
34       BSR,
35 
36       /// Double shift instructions. These correspond to
37       /// X86::SHLDxx and X86::SHRDxx instructions.
38       SHLD,
39       SHRD,
40 
41       /// Bitwise logical AND of floating point values. This corresponds
42       /// to X86::ANDPS or X86::ANDPD.
43       FAND,
44 
45       /// Bitwise logical OR of floating point values. This corresponds
46       /// to X86::ORPS or X86::ORPD.
47       FOR,
48 
49       /// Bitwise logical XOR of floating point values. This corresponds
50       /// to X86::XORPS or X86::XORPD.
51       FXOR,
52 
53       ///  Bitwise logical ANDNOT of floating point values. This
54       /// corresponds to X86::ANDNPS or X86::ANDNPD.
55       FANDN,
56 
57       /// These operations represent an abstract X86 call
58       /// instruction, which includes a bunch of information.  In particular the
59       /// operands of these node are:
60       ///
61       ///     #0 - The incoming token chain
62       ///     #1 - The callee
63       ///     #2 - The number of arg bytes the caller pushes on the stack.
64       ///     #3 - The number of arg bytes the callee pops off the stack.
65       ///     #4 - The value to pass in AL/AX/EAX (optional)
66       ///     #5 - The value to pass in DL/DX/EDX (optional)
67       ///
68       /// The result values of these nodes are:
69       ///
70       ///     #0 - The outgoing token chain
71       ///     #1 - The first register result value (optional)
72       ///     #2 - The second register result value (optional)
73       ///
74       CALL,
75 
76       /// Same as call except it adds the NoTrack prefix.
77       NT_CALL,
78 
79       /// X86 compare and logical compare instructions.
80       CMP, COMI, UCOMI,
81 
82       /// X86 bit-test instructions.
83       BT,
84 
85       /// X86 SetCC. Operand 0 is condition code, and operand 1 is the EFLAGS
86       /// operand, usually produced by a CMP instruction.
87       SETCC,
88 
89       /// X86 Select
90       SELECTS,
91 
92       // Same as SETCC except it's materialized with a sbb and the value is all
93       // one's or all zero's.
94       SETCC_CARRY,  // R = carry_bit ? ~0 : 0
95 
96       /// X86 FP SETCC, implemented with CMP{cc}SS/CMP{cc}SD.
97       /// Operands are two FP values to compare; result is a mask of
98       /// 0s or 1s.  Generally DTRT for C/C++ with NaNs.
99       FSETCC,
100 
101       /// X86 FP SETCC, similar to above, but with output as an i1 mask and
102       /// and a version with SAE.
103       FSETCCM, FSETCCM_SAE,
104 
105       /// X86 conditional moves. Operand 0 and operand 1 are the two values
106       /// to select from. Operand 2 is the condition code, and operand 3 is the
107       /// flag operand produced by a CMP or TEST instruction.
108       CMOV,
109 
110       /// X86 conditional branches. Operand 0 is the chain operand, operand 1
111       /// is the block to branch if condition is true, operand 2 is the
112       /// condition code, and operand 3 is the flag operand produced by a CMP
113       /// or TEST instruction.
114       BRCOND,
115 
116       /// BRIND node with NoTrack prefix. Operand 0 is the chain operand and
117       /// operand 1 is the target address.
118       NT_BRIND,
119 
120       /// Return with a flag operand. Operand 0 is the chain operand, operand
121       /// 1 is the number of bytes of stack to pop.
122       RET_FLAG,
123 
124       /// Return from interrupt. Operand 0 is the number of bytes to pop.
125       IRET,
126 
127       /// Repeat fill, corresponds to X86::REP_STOSx.
128       REP_STOS,
129 
130       /// Repeat move, corresponds to X86::REP_MOVSx.
131       REP_MOVS,
132 
133       /// On Darwin, this node represents the result of the popl
134       /// at function entry, used for PIC code.
135       GlobalBaseReg,
136 
137       /// A wrapper node for TargetConstantPool, TargetJumpTable,
138       /// TargetExternalSymbol, TargetGlobalAddress, TargetGlobalTLSAddress,
139       /// MCSymbol and TargetBlockAddress.
140       Wrapper,
141 
142       /// Special wrapper used under X86-64 PIC mode for RIP
143       /// relative displacements.
144       WrapperRIP,
145 
146       /// Copies a 64-bit value from an MMX vector to the low word
147       /// of an XMM vector, with the high word zero filled.
148       MOVQ2DQ,
149 
150       /// Copies a 64-bit value from the low word of an XMM vector
151       /// to an MMX vector.
152       MOVDQ2Q,
153 
154       /// Copies a 32-bit value from the low word of a MMX
155       /// vector to a GPR.
156       MMX_MOVD2W,
157 
158       /// Copies a GPR into the low 32-bit word of a MMX vector
159       /// and zero out the high word.
160       MMX_MOVW2D,
161 
162       /// Extract an 8-bit value from a vector and zero extend it to
163       /// i32, corresponds to X86::PEXTRB.
164       PEXTRB,
165 
166       /// Extract a 16-bit value from a vector and zero extend it to
167       /// i32, corresponds to X86::PEXTRW.
168       PEXTRW,
169 
170       /// Insert any element of a 4 x float vector into any element
171       /// of a destination 4 x floatvector.
172       INSERTPS,
173 
174       /// Insert the lower 8-bits of a 32-bit value to a vector,
175       /// corresponds to X86::PINSRB.
176       PINSRB,
177 
178       /// Insert the lower 16-bits of a 32-bit value to a vector,
179       /// corresponds to X86::PINSRW.
180       PINSRW,
181 
182       /// Shuffle 16 8-bit values within a vector.
183       PSHUFB,
184 
185       /// Compute Sum of Absolute Differences.
186       PSADBW,
187       /// Compute Double Block Packed Sum-Absolute-Differences
188       DBPSADBW,
189 
190       /// Bitwise Logical AND NOT of Packed FP values.
191       ANDNP,
192 
193       /// Blend where the selector is an immediate.
194       BLENDI,
195 
196       /// Dynamic (non-constant condition) vector blend where only the sign bits
197       /// of the condition elements are used. This is used to enforce that the
198       /// condition mask is not valid for generic VSELECT optimizations. This
199       /// is also used to implement the intrinsics.
200       /// Operands are in VSELECT order: MASK, TRUE, FALSE
201       BLENDV,
202 
203       /// Combined add and sub on an FP vector.
204       ADDSUB,
205 
206       //  FP vector ops with rounding mode.
207       FADD_RND, FADDS, FADDS_RND,
208       FSUB_RND, FSUBS, FSUBS_RND,
209       FMUL_RND, FMULS, FMULS_RND,
210       FDIV_RND, FDIVS, FDIVS_RND,
211       FMAX_SAE, FMAXS_SAE,
212       FMIN_SAE, FMINS_SAE,
213       FSQRT_RND, FSQRTS, FSQRTS_RND,
214 
215       // FP vector get exponent.
216       FGETEXP, FGETEXP_SAE, FGETEXPS, FGETEXPS_SAE,
217       // Extract Normalized Mantissas.
218       VGETMANT, VGETMANT_SAE, VGETMANTS, VGETMANTS_SAE,
219       // FP Scale.
220       SCALEF, SCALEF_RND,
221       SCALEFS, SCALEFS_RND,
222 
223       // Unsigned Integer average.
224       AVG,
225 
226       /// Integer horizontal add/sub.
227       HADD,
228       HSUB,
229 
230       /// Floating point horizontal add/sub.
231       FHADD,
232       FHSUB,
233 
234       // Detect Conflicts Within a Vector
235       CONFLICT,
236 
237       /// Floating point max and min.
238       FMAX, FMIN,
239 
240       /// Commutative FMIN and FMAX.
241       FMAXC, FMINC,
242 
243       /// Scalar intrinsic floating point max and min.
244       FMAXS, FMINS,
245 
246       /// Floating point reciprocal-sqrt and reciprocal approximation.
247       /// Note that these typically require refinement
248       /// in order to obtain suitable precision.
249       FRSQRT, FRCP,
250 
251       // AVX-512 reciprocal approximations with a little more precision.
252       RSQRT14, RSQRT14S, RCP14, RCP14S,
253 
254       // Thread Local Storage.
255       TLSADDR,
256 
257       // Thread Local Storage. A call to get the start address
258       // of the TLS block for the current module.
259       TLSBASEADDR,
260 
261       // Thread Local Storage.  When calling to an OS provided
262       // thunk at the address from an earlier relocation.
263       TLSCALL,
264 
265       // Exception Handling helpers.
266       EH_RETURN,
267 
268       // SjLj exception handling setjmp.
269       EH_SJLJ_SETJMP,
270 
271       // SjLj exception handling longjmp.
272       EH_SJLJ_LONGJMP,
273 
274       // SjLj exception handling dispatch.
275       EH_SJLJ_SETUP_DISPATCH,
276 
277       /// Tail call return. See X86TargetLowering::LowerCall for
278       /// the list of operands.
279       TC_RETURN,
280 
281       // Vector move to low scalar and zero higher vector elements.
282       VZEXT_MOVL,
283 
284       // Vector integer truncate.
285       VTRUNC,
286       // Vector integer truncate with unsigned/signed saturation.
287       VTRUNCUS, VTRUNCS,
288 
289       // Masked version of the above. Used when less than a 128-bit result is
290       // produced since the mask only applies to the lower elements and can't
291       // be represented by a select.
292       // SRC, PASSTHRU, MASK
293       VMTRUNC, VMTRUNCUS, VMTRUNCS,
294 
295       // Vector FP extend.
296       VFPEXT, VFPEXT_SAE, VFPEXTS, VFPEXTS_SAE,
297 
298       // Vector FP round.
299       VFPROUND, VFPROUND_RND, VFPROUNDS, VFPROUNDS_RND,
300 
301       // Masked version of above. Used for v2f64->v4f32.
302       // SRC, PASSTHRU, MASK
303       VMFPROUND,
304 
305       // 128-bit vector logical left / right shift
306       VSHLDQ, VSRLDQ,
307 
308       // Vector shift elements
309       VSHL, VSRL, VSRA,
310 
311       // Vector variable shift
312       VSHLV, VSRLV, VSRAV,
313 
314       // Vector shift elements by immediate
315       VSHLI, VSRLI, VSRAI,
316 
317       // Shifts of mask registers.
318       KSHIFTL, KSHIFTR,
319 
320       // Bit rotate by immediate
321       VROTLI, VROTRI,
322 
323       // Vector packed double/float comparison.
324       CMPP,
325 
326       // Vector integer comparisons.
327       PCMPEQ, PCMPGT,
328 
329       // v8i16 Horizontal minimum and position.
330       PHMINPOS,
331 
332       MULTISHIFT,
333 
334       /// Vector comparison generating mask bits for fp and
335       /// integer signed and unsigned data types.
336       CMPM,
337       // Vector comparison with SAE for FP values
338       CMPM_SAE,
339 
340       // Arithmetic operations with FLAGS results.
341       ADD, SUB, ADC, SBB, SMUL, UMUL,
342       OR, XOR, AND,
343 
344       // Bit field extract.
345       BEXTR,
346 
347       // Zero High Bits Starting with Specified Bit Position.
348       BZHI,
349 
350       // X86-specific multiply by immediate.
351       MUL_IMM,
352 
353       // Vector sign bit extraction.
354       MOVMSK,
355 
356       // Vector bitwise comparisons.
357       PTEST,
358 
359       // Vector packed fp sign bitwise comparisons.
360       TESTP,
361 
362       // OR/AND test for masks.
363       KORTEST,
364       KTEST,
365 
366       // ADD for masks.
367       KADD,
368 
369       // Several flavors of instructions with vector shuffle behaviors.
370       // Saturated signed/unnsigned packing.
371       PACKSS,
372       PACKUS,
373       // Intra-lane alignr.
374       PALIGNR,
375       // AVX512 inter-lane alignr.
376       VALIGN,
377       PSHUFD,
378       PSHUFHW,
379       PSHUFLW,
380       SHUFP,
381       // VBMI2 Concat & Shift.
382       VSHLD,
383       VSHRD,
384       VSHLDV,
385       VSHRDV,
386       //Shuffle Packed Values at 128-bit granularity.
387       SHUF128,
388       MOVDDUP,
389       MOVSHDUP,
390       MOVSLDUP,
391       MOVLHPS,
392       MOVHLPS,
393       MOVSD,
394       MOVSS,
395       UNPCKL,
396       UNPCKH,
397       VPERMILPV,
398       VPERMILPI,
399       VPERMI,
400       VPERM2X128,
401 
402       // Variable Permute (VPERM).
403       // Res = VPERMV MaskV, V0
404       VPERMV,
405 
406       // 3-op Variable Permute (VPERMT2).
407       // Res = VPERMV3 V0, MaskV, V1
408       VPERMV3,
409 
410       // Bitwise ternary logic.
411       VPTERNLOG,
412       // Fix Up Special Packed Float32/64 values.
413       VFIXUPIMM, VFIXUPIMM_SAE,
414       VFIXUPIMMS, VFIXUPIMMS_SAE,
415       // Range Restriction Calculation For Packed Pairs of Float32/64 values.
416       VRANGE, VRANGE_SAE, VRANGES, VRANGES_SAE,
417       // Reduce - Perform Reduction Transformation on scalar\packed FP.
418       VREDUCE, VREDUCE_SAE, VREDUCES, VREDUCES_SAE,
419       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
420       // Also used by the legacy (V)ROUND intrinsics where we mask out the
421       // scaling part of the immediate.
422       VRNDSCALE, VRNDSCALE_SAE, VRNDSCALES, VRNDSCALES_SAE,
423       // Tests Types Of a FP Values for packed types.
424       VFPCLASS,
425       // Tests Types Of a FP Values for scalar types.
426       VFPCLASSS,
427 
428       // Broadcast (splat) scalar or element 0 of a vector. If the operand is
429       // a vector, this node may change the vector length as part of the splat.
430       VBROADCAST,
431       // Broadcast mask to vector.
432       VBROADCASTM,
433       // Broadcast subvector to vector.
434       SUBV_BROADCAST,
435 
436       /// SSE4A Extraction and Insertion.
437       EXTRQI, INSERTQI,
438 
439       // XOP arithmetic/logical shifts.
440       VPSHA, VPSHL,
441       // XOP signed/unsigned integer comparisons.
442       VPCOM, VPCOMU,
443       // XOP packed permute bytes.
444       VPPERM,
445       // XOP two source permutation.
446       VPERMIL2,
447 
448       // Vector multiply packed unsigned doubleword integers.
449       PMULUDQ,
450       // Vector multiply packed signed doubleword integers.
451       PMULDQ,
452       // Vector Multiply Packed UnsignedIntegers with Round and Scale.
453       MULHRS,
454 
455       // Multiply and Add Packed Integers.
456       VPMADDUBSW, VPMADDWD,
457 
458       // AVX512IFMA multiply and add.
459       // NOTE: These are different than the instruction and perform
460       // op0 x op1 + op2.
461       VPMADD52L, VPMADD52H,
462 
463       // VNNI
464       VPDPBUSD,
465       VPDPBUSDS,
466       VPDPWSSD,
467       VPDPWSSDS,
468 
469       // FMA nodes.
470       // We use the target independent ISD::FMA for the non-inverted case.
471       FNMADD,
472       FMSUB,
473       FNMSUB,
474       FMADDSUB,
475       FMSUBADD,
476 
477       // FMA with rounding mode.
478       FMADD_RND,
479       FNMADD_RND,
480       FMSUB_RND,
481       FNMSUB_RND,
482       FMADDSUB_RND,
483       FMSUBADD_RND,
484 
485       // Compress and expand.
486       COMPRESS,
487       EXPAND,
488 
489       // Bits shuffle
490       VPSHUFBITQMB,
491 
492       // Convert Unsigned/Integer to Floating-Point Value with rounding mode.
493       SINT_TO_FP_RND, UINT_TO_FP_RND,
494       SCALAR_SINT_TO_FP, SCALAR_UINT_TO_FP,
495       SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND,
496 
497       // Vector float/double to signed/unsigned integer.
498       CVTP2SI, CVTP2UI, CVTP2SI_RND, CVTP2UI_RND,
499       // Scalar float/double to signed/unsigned integer.
500       CVTS2SI, CVTS2UI, CVTS2SI_RND, CVTS2UI_RND,
501 
502       // Vector float/double to signed/unsigned integer with truncation.
503       CVTTP2SI, CVTTP2UI, CVTTP2SI_SAE, CVTTP2UI_SAE,
504       // Scalar float/double to signed/unsigned integer with truncation.
505       CVTTS2SI, CVTTS2UI, CVTTS2SI_SAE, CVTTS2UI_SAE,
506 
507       // Vector signed/unsigned integer to float/double.
508       CVTSI2P, CVTUI2P,
509 
510       // Masked versions of above. Used for v2f64->v4f32.
511       // SRC, PASSTHRU, MASK
512       MCVTP2SI, MCVTP2UI, MCVTTP2SI, MCVTTP2UI,
513       MCVTSI2P, MCVTUI2P,
514 
515       // Vector float to bfloat16.
516       // Convert TWO packed single data to one packed BF16 data
517       CVTNE2PS2BF16,
518       // Convert packed single data to packed BF16 data
519       CVTNEPS2BF16,
520       // Masked version of above.
521       // SRC, PASSTHRU, MASK
522       MCVTNEPS2BF16,
523 
524       // Dot product of BF16 pairs to accumulated into
525       // packed single precision.
526       DPBF16PS,
527 
528       // Save xmm argument registers to the stack, according to %al. An operator
529       // is needed so that this can be expanded with control flow.
530       VASTART_SAVE_XMM_REGS,
531 
532       // Windows's _chkstk call to do stack probing.
533       WIN_ALLOCA,
534 
535       // For allocating variable amounts of stack space when using
536       // segmented stacks. Check if the current stacklet has enough space, and
537       // falls back to heap allocation if not.
538       SEG_ALLOCA,
539 
540       // Memory barriers.
541       MEMBARRIER,
542       MFENCE,
543 
544       // Store FP status word into i16 register.
545       FNSTSW16r,
546 
547       // Store contents of %ah into %eflags.
548       SAHF,
549 
550       // Get a random integer and indicate whether it is valid in CF.
551       RDRAND,
552 
553       // Get a NIST SP800-90B & C compliant random integer and
554       // indicate whether it is valid in CF.
555       RDSEED,
556 
557       // Protection keys
558       // RDPKRU - Operand 0 is chain. Operand 1 is value for ECX.
559       // WRPKRU - Operand 0 is chain. Operand 1 is value for EDX. Operand 2 is
560       // value for ECX.
561       RDPKRU, WRPKRU,
562 
563       // SSE42 string comparisons.
564       // These nodes produce 3 results, index, mask, and flags. X86ISelDAGToDAG
565       // will emit one or two instructions based on which results are used. If
566       // flags and index/mask this allows us to use a single instruction since
567       // we won't have to pick and opcode for flags. Instead we can rely on the
568       // DAG to CSE everything and decide at isel.
569       PCMPISTR,
570       PCMPESTR,
571 
572       // Test if in transactional execution.
573       XTEST,
574 
575       // ERI instructions.
576       RSQRT28, RSQRT28_SAE, RSQRT28S, RSQRT28S_SAE,
577       RCP28, RCP28_SAE, RCP28S, RCP28S_SAE, EXP2, EXP2_SAE,
578 
579       // Conversions between float and half-float.
580       CVTPS2PH, CVTPH2PS, CVTPH2PS_SAE,
581 
582       // Masked version of above.
583       // SRC, RND, PASSTHRU, MASK
584       MCVTPS2PH,
585 
586       // Galois Field Arithmetic Instructions
587       GF2P8AFFINEINVQB, GF2P8AFFINEQB, GF2P8MULB,
588 
589       // LWP insert record.
590       LWPINS,
591 
592       // User level wait
593       UMWAIT, TPAUSE,
594 
595       // Enqueue Stores Instructions
596       ENQCMD, ENQCMDS,
597 
598       // For avx512-vp2intersect
599       VP2INTERSECT,
600 
601       /// X86 strict FP compare instructions.
602       STRICT_FCMP = ISD::FIRST_TARGET_STRICTFP_OPCODE,
603       STRICT_FCMPS,
604 
605       // Vector packed double/float comparison.
606       STRICT_CMPP,
607 
608       /// Vector comparison generating mask bits for fp and
609       /// integer signed and unsigned data types.
610       STRICT_CMPM,
611 
612       // Vector float/double to signed/unsigned integer with truncation.
613       STRICT_CVTTP2SI, STRICT_CVTTP2UI,
614 
615       // Vector FP extend.
616       STRICT_VFPEXT,
617 
618       // Vector FP round.
619       STRICT_VFPROUND,
620 
621       // RndScale - Round FP Values To Include A Given Number Of Fraction Bits.
622       // Also used by the legacy (V)ROUND intrinsics where we mask out the
623       // scaling part of the immediate.
624       STRICT_VRNDSCALE,
625 
626       // Vector signed/unsigned integer to float/double.
627       STRICT_CVTSI2P, STRICT_CVTUI2P,
628 
629       // Compare and swap.
630       LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE,
631       LCMPXCHG8_DAG,
632       LCMPXCHG16_DAG,
633       LCMPXCHG8_SAVE_EBX_DAG,
634       LCMPXCHG16_SAVE_RBX_DAG,
635 
636       /// LOCK-prefixed arithmetic read-modify-write instructions.
637       /// EFLAGS, OUTCHAIN = LADD(INCHAIN, PTR, RHS)
638       LADD, LSUB, LOR, LXOR, LAND,
639 
640       // Load, scalar_to_vector, and zero extend.
641       VZEXT_LOAD,
642 
643       // extract_vector_elt, store.
644       VEXTRACT_STORE,
645 
646       // scalar broadcast from memory
647       VBROADCAST_LOAD,
648 
649       // Store FP control world into i16 memory.
650       FNSTCW16m,
651 
652       /// This instruction implements FP_TO_SINT with the
653       /// integer destination in memory and a FP reg source.  This corresponds
654       /// to the X86::FIST*m instructions and the rounding mode change stuff. It
655       /// has two inputs (token chain and address) and two outputs (int value
656       /// and token chain). Memory VT specifies the type to store to.
657       FP_TO_INT_IN_MEM,
658 
659       /// This instruction implements SINT_TO_FP with the
660       /// integer source in memory and FP reg result.  This corresponds to the
661       /// X86::FILD*m instructions. It has two inputs (token chain and address)
662       /// and two outputs (FP value and token chain). FILD_FLAG also produces a
663       /// flag). The integer source type is specified by the memory VT.
664       FILD,
665       FILD_FLAG,
666 
667       /// This instruction implements a fp->int store from FP stack
668       /// slots. This corresponds to the fist instruction. It takes a
669       /// chain operand, value to store, address, and glue. The memory VT
670       /// specifies the type to store as.
671       FIST,
672 
673       /// This instruction implements an extending load to FP stack slots.
674       /// This corresponds to the X86::FLD32m / X86::FLD64m. It takes a chain
675       /// operand, and ptr to load from. The memory VT specifies the type to
676       /// load from.
677       FLD,
678 
679       /// This instruction implements a truncating store from FP stack
680       /// slots. This corresponds to the X86::FST32m / X86::FST64m. It takes a
681       /// chain operand, value to store, address, and glue. The memory VT
682       /// specifies the type to store as.
683       FST,
684 
685       /// This instruction grabs the address of the next argument
686       /// from a va_list. (reads and modifies the va_list in memory)
687       VAARG_64,
688 
689       // Vector truncating store with unsigned/signed saturation
690       VTRUNCSTOREUS, VTRUNCSTORES,
691       // Vector truncating masked store with unsigned/signed saturation
692       VMTRUNCSTOREUS, VMTRUNCSTORES,
693 
694       // X86 specific gather and scatter
695       MGATHER, MSCATTER,
696 
697       // WARNING: Do not add anything in the end unless you want the node to
698       // have memop! In fact, starting from FIRST_TARGET_MEMORY_OPCODE all
699       // opcodes will be thought as target memory ops!
700     };
701   } // end namespace X86ISD
702 
703   /// Define some predicates that are used for node matching.
704   namespace X86 {
705     /// Returns true if Elt is a constant zero or floating point constant +0.0.
706     bool isZeroNode(SDValue Elt);
707 
708     /// Returns true of the given offset can be
709     /// fit into displacement field of the instruction.
710     bool isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
711                                       bool hasSymbolicDisplacement = true);
712 
713     /// Determines whether the callee is required to pop its
714     /// own arguments. Callee pop is necessary to support tail calls.
715     bool isCalleePop(CallingConv::ID CallingConv,
716                      bool is64Bit, bool IsVarArg, bool GuaranteeTCO);
717 
718     /// If Op is a constant whose elements are all the same constant or
719     /// undefined, return true and return the constant value in \p SplatVal.
720     bool isConstantSplat(SDValue Op, APInt &SplatVal);
721   } // end namespace X86
722 
723   //===--------------------------------------------------------------------===//
724   //  X86 Implementation of the TargetLowering interface
725   class X86TargetLowering final : public TargetLowering {
726   public:
727     explicit X86TargetLowering(const X86TargetMachine &TM,
728                                const X86Subtarget &STI);
729 
730     unsigned getJumpTableEncoding() const override;
731     bool useSoftFloat() const override;
732 
733     void markLibCallAttributes(MachineFunction *MF, unsigned CC,
734                                ArgListTy &Args) const override;
735 
736     MVT getScalarShiftAmountTy(const DataLayout &, EVT VT) const override {
737       return MVT::i8;
738     }
739 
740     const MCExpr *
741     LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
742                               const MachineBasicBlock *MBB, unsigned uid,
743                               MCContext &Ctx) const override;
744 
745     /// Returns relocation base for the given PIC jumptable.
746     SDValue getPICJumpTableRelocBase(SDValue Table,
747                                      SelectionDAG &DAG) const override;
748     const MCExpr *
749     getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
750                                  unsigned JTI, MCContext &Ctx) const override;
751 
752     /// Return the desired alignment for ByVal aggregate
753     /// function arguments in the caller parameter area. For X86, aggregates
754     /// that contains are placed at 16-byte boundaries while the rest are at
755     /// 4-byte boundaries.
756     unsigned getByValTypeAlignment(Type *Ty,
757                                    const DataLayout &DL) const override;
758 
759     /// Returns the target specific optimal type for load
760     /// and store operations as a result of memset, memcpy, and memmove
761     /// lowering. If DstAlign is zero that means it's safe to destination
762     /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it
763     /// means there isn't a need to check it against alignment requirement,
764     /// probably because the source does not need to be loaded. If 'IsMemset' is
765     /// true, that means it's expanding a memset. If 'ZeroMemset' is true, that
766     /// means it's a memset of zero. 'MemcpyStrSrc' indicates whether the memcpy
767     /// source is constant so it does not need to be loaded.
768     /// It returns EVT::Other if the type should be determined using generic
769     /// target-independent logic.
770     EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign,
771                             bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
772                             const AttributeList &FuncAttributes) const override;
773 
774     /// Returns true if it's safe to use load / store of the
775     /// specified type to expand memcpy / memset inline. This is mostly true
776     /// for all types except for some special cases. For example, on X86
777     /// targets without SSE2 f64 load / store are done with fldl / fstpl which
778     /// also does type conversion. Note the specified type doesn't have to be
779     /// legal as the hook is used before type legalization.
780     bool isSafeMemOpType(MVT VT) const override;
781 
782     /// Returns true if the target allows unaligned memory accesses of the
783     /// specified type. Returns whether it is "fast" in the last argument.
784     bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align,
785                                         MachineMemOperand::Flags Flags,
786                                         bool *Fast) const override;
787 
788     /// Provide custom lowering hooks for some operations.
789     ///
790     SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override;
791 
792     /// Places new result values for the node in Results (their number
793     /// and types must exactly match those of the original return values of
794     /// the node), or leaves Results empty, which indicates that the node is not
795     /// to be custom lowered after all.
796     void LowerOperationWrapper(SDNode *N,
797                                SmallVectorImpl<SDValue> &Results,
798                                SelectionDAG &DAG) const override;
799 
800     /// Replace the results of node with an illegal result
801     /// type with new values built out of custom code.
802     ///
803     void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue>&Results,
804                             SelectionDAG &DAG) const override;
805 
806     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
807 
808     // Return true if it is profitable to combine a BUILD_VECTOR with a
809     // stride-pattern to a shuffle and a truncate.
810     // Example of such a combine:
811     // v4i32 build_vector((extract_elt V, 1),
812     //                    (extract_elt V, 3),
813     //                    (extract_elt V, 5),
814     //                    (extract_elt V, 7))
815     //  -->
816     // v4i32 truncate (bitcast (shuffle<1,u,3,u,4,u,5,u,6,u,7,u> V, u) to
817     // v4i64)
818     bool isDesirableToCombineBuildVectorToShuffleTruncate(
819         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const override;
820 
821     /// Return true if the target has native support for
822     /// the specified value type and it is 'desirable' to use the type for the
823     /// given node type. e.g. On x86 i16 is legal, but undesirable since i16
824     /// instruction encodings are longer and some i16 instructions are slow.
825     bool isTypeDesirableForOp(unsigned Opc, EVT VT) const override;
826 
827     /// Return true if the target has native support for the
828     /// specified value type and it is 'desirable' to use the type. e.g. On x86
829     /// i16 is legal, but undesirable since i16 instruction encodings are longer
830     /// and some i16 instructions are slow.
831     bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const override;
832 
833     /// Return 1 if we can compute the negated form of the specified expression
834     /// for the same cost as the expression itself, or 2 if we can compute the
835     /// negated form more cheaply than the expression itself. Else return 0.
836     char isNegatibleForFree(SDValue Op, SelectionDAG &DAG, bool LegalOperations,
837                             bool ForCodeSize, unsigned Depth) const override;
838 
839     /// If isNegatibleForFree returns true, return the newly negated expression.
840     SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
841                                  bool LegalOperations, bool ForCodeSize,
842                                  unsigned Depth) const override;
843 
844     MachineBasicBlock *
845     EmitInstrWithCustomInserter(MachineInstr &MI,
846                                 MachineBasicBlock *MBB) const override;
847 
848     /// This method returns the name of a target specific DAG node.
849     const char *getTargetNodeName(unsigned Opcode) const override;
850 
851     /// Do not merge vector stores after legalization because that may conflict
852     /// with x86-specific store splitting optimizations.
853     bool mergeStoresAfterLegalization(EVT MemVT) const override {
854       return !MemVT.isVector();
855     }
856 
857     bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
858                           const SelectionDAG &DAG) const override;
859 
860     bool isCheapToSpeculateCttz() const override;
861 
862     bool isCheapToSpeculateCtlz() const override;
863 
864     bool isCtlzFast() const override;
865 
866     bool hasBitPreservingFPLogic(EVT VT) const override {
867       return VT == MVT::f32 || VT == MVT::f64 || VT.isVector();
868     }
869 
870     bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const override {
871       // If the pair to store is a mixture of float and int values, we will
872       // save two bitwise instructions and one float-to-int instruction and
873       // increase one store instruction. There is potentially a more
874       // significant benefit because it avoids the float->int domain switch
875       // for input value. So It is more likely a win.
876       if ((LTy.isFloatingPoint() && HTy.isInteger()) ||
877           (LTy.isInteger() && HTy.isFloatingPoint()))
878         return true;
879       // If the pair only contains int values, we will save two bitwise
880       // instructions and increase one store instruction (costing one more
881       // store buffer). Since the benefit is more blurred so we leave
882       // such pair out until we get testcase to prove it is a win.
883       return false;
884     }
885 
886     bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const override;
887 
888     bool hasAndNotCompare(SDValue Y) const override;
889 
890     bool hasAndNot(SDValue Y) const override;
891 
892     bool hasBitTest(SDValue X, SDValue Y) const override;
893 
894     bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
895         SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
896         unsigned OldShiftOpcode, unsigned NewShiftOpcode,
897         SelectionDAG &DAG) const override;
898 
899     bool shouldFoldConstantShiftPairToMask(const SDNode *N,
900                                            CombineLevel Level) const override;
901 
902     bool shouldFoldMaskToVariableShiftPair(SDValue Y) const override;
903 
904     bool
905     shouldTransformSignedTruncationCheck(EVT XVT,
906                                          unsigned KeptBits) const override {
907       // For vectors, we don't have a preference..
908       if (XVT.isVector())
909         return false;
910 
911       auto VTIsOk = [](EVT VT) -> bool {
912         return VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
913                VT == MVT::i64;
914       };
915 
916       // We are ok with KeptBitsVT being byte/word/dword, what MOVS supports.
917       // XVT will be larger than KeptBitsVT.
918       MVT KeptBitsVT = MVT::getIntegerVT(KeptBits);
919       return VTIsOk(XVT) && VTIsOk(KeptBitsVT);
920     }
921 
922     bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override;
923 
924     bool shouldSplatInsEltVarIndex(EVT VT) const override;
925 
926     bool convertSetCCLogicToBitwiseLogic(EVT VT) const override {
927       return VT.isScalarInteger();
928     }
929 
930     /// Vector-sized comparisons are fast using PCMPEQ + PMOVMSK or PTEST.
931     MVT hasFastEqualityCompare(unsigned NumBits) const override;
932 
933     /// Return the value type to use for ISD::SETCC.
934     EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
935                            EVT VT) const override;
936 
937     bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
938                                       TargetLoweringOpt &TLO) const override;
939 
940     /// Determine which of the bits specified in Mask are known to be either
941     /// zero or one and return them in the KnownZero/KnownOne bitsets.
942     void computeKnownBitsForTargetNode(const SDValue Op,
943                                        KnownBits &Known,
944                                        const APInt &DemandedElts,
945                                        const SelectionDAG &DAG,
946                                        unsigned Depth = 0) const override;
947 
948     /// Determine the number of bits in the operation that are sign bits.
949     unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
950                                              const APInt &DemandedElts,
951                                              const SelectionDAG &DAG,
952                                              unsigned Depth) const override;
953 
954     bool SimplifyDemandedVectorEltsForTargetNode(SDValue Op,
955                                                  const APInt &DemandedElts,
956                                                  APInt &KnownUndef,
957                                                  APInt &KnownZero,
958                                                  TargetLoweringOpt &TLO,
959                                                  unsigned Depth) const override;
960 
961     bool SimplifyDemandedBitsForTargetNode(SDValue Op,
962                                            const APInt &DemandedBits,
963                                            const APInt &DemandedElts,
964                                            KnownBits &Known,
965                                            TargetLoweringOpt &TLO,
966                                            unsigned Depth) const override;
967 
968     SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
969         SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
970         SelectionDAG &DAG, unsigned Depth) const override;
971 
972     const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
973 
974     SDValue unwrapAddress(SDValue N) const override;
975 
976     SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const;
977 
978     bool ExpandInlineAsm(CallInst *CI) const override;
979 
980     ConstraintType getConstraintType(StringRef Constraint) const override;
981 
982     /// Examine constraint string and operand type and determine a weight value.
983     /// The operand object must already have been set up with the operand type.
984     ConstraintWeight
985       getSingleConstraintMatchWeight(AsmOperandInfo &info,
986                                      const char *constraint) const override;
987 
988     const char *LowerXConstraint(EVT ConstraintVT) const override;
989 
990     /// Lower the specified operand into the Ops vector. If it is invalid, don't
991     /// add anything to Ops. If hasMemory is true it means one of the asm
992     /// constraint of the inline asm instruction being processed is 'm'.
993     void LowerAsmOperandForConstraint(SDValue Op,
994                                       std::string &Constraint,
995                                       std::vector<SDValue> &Ops,
996                                       SelectionDAG &DAG) const override;
997 
998     unsigned
999     getInlineAsmMemConstraint(StringRef ConstraintCode) const override {
1000       if (ConstraintCode == "o")
1001         return InlineAsm::Constraint_o;
1002       else if (ConstraintCode == "v")
1003         return InlineAsm::Constraint_v;
1004       else if (ConstraintCode == "X")
1005         return InlineAsm::Constraint_X;
1006       return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
1007     }
1008 
1009     /// Handle Lowering flag assembly outputs.
1010     SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, SDLoc DL,
1011                                         const AsmOperandInfo &Constraint,
1012                                         SelectionDAG &DAG) const override;
1013 
1014     /// Given a physical register constraint
1015     /// (e.g. {edx}), return the register number and the register class for the
1016     /// register.  This should only be used for C_Register constraints.  On
1017     /// error, this returns a register number of 0.
1018     std::pair<unsigned, const TargetRegisterClass *>
1019     getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
1020                                  StringRef Constraint, MVT VT) const override;
1021 
1022     /// Return true if the addressing mode represented
1023     /// by AM is legal for this target, for a load/store of the specified type.
1024     bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
1025                                Type *Ty, unsigned AS,
1026                                Instruction *I = nullptr) const override;
1027 
1028     /// Return true if the specified immediate is legal
1029     /// icmp immediate, that is the target has icmp instructions which can
1030     /// compare a register against the immediate without having to materialize
1031     /// the immediate into a register.
1032     bool isLegalICmpImmediate(int64_t Imm) const override;
1033 
1034     /// Return true if the specified immediate is legal
1035     /// add immediate, that is the target has add instructions which can
1036     /// add a register and the immediate without having to materialize
1037     /// the immediate into a register.
1038     bool isLegalAddImmediate(int64_t Imm) const override;
1039 
1040     bool isLegalStoreImmediate(int64_t Imm) const override;
1041 
1042     /// Return the cost of the scaling factor used in the addressing
1043     /// mode represented by AM for this target, for a load/store
1044     /// of the specified type.
1045     /// If the AM is supported, the return value must be >= 0.
1046     /// If the AM is not supported, it returns a negative value.
1047     int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty,
1048                              unsigned AS) const override;
1049 
1050     bool isVectorShiftByScalarCheap(Type *Ty) const override;
1051 
1052     /// Add x86-specific opcodes to the default list.
1053     bool isBinOp(unsigned Opcode) const override;
1054 
1055     /// Returns true if the opcode is a commutative binary operation.
1056     bool isCommutativeBinOp(unsigned Opcode) const override;
1057 
1058     /// Return true if it's free to truncate a value of
1059     /// type Ty1 to type Ty2. e.g. On x86 it's free to truncate a i32 value in
1060     /// register EAX to i16 by referencing its sub-register AX.
1061     bool isTruncateFree(Type *Ty1, Type *Ty2) const override;
1062     bool isTruncateFree(EVT VT1, EVT VT2) const override;
1063 
1064     bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override;
1065 
1066     /// Return true if any actual instruction that defines a
1067     /// value of type Ty1 implicit zero-extends the value to Ty2 in the result
1068     /// register. This does not necessarily include registers defined in
1069     /// unknown ways, such as incoming arguments, or copies from unknown
1070     /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this
1071     /// does not necessarily apply to truncate instructions. e.g. on x86-64,
1072     /// all instructions that define 32-bit values implicit zero-extend the
1073     /// result out to 64 bits.
1074     bool isZExtFree(Type *Ty1, Type *Ty2) const override;
1075     bool isZExtFree(EVT VT1, EVT VT2) const override;
1076     bool isZExtFree(SDValue Val, EVT VT2) const override;
1077 
1078     /// Return true if folding a vector load into ExtVal (a sign, zero, or any
1079     /// extend node) is profitable.
1080     bool isVectorLoadExtDesirable(SDValue) const override;
1081 
1082     /// Return true if an FMA operation is faster than a pair of fmul and fadd
1083     /// instructions. fmuladd intrinsics will be expanded to FMAs when this
1084     /// method returns true, otherwise fmuladd is expanded to fmul + fadd.
1085     bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
1086                                     EVT VT) const override;
1087 
1088     /// Return true if it's profitable to narrow
1089     /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
1090     /// from i32 to i8 but not from i32 to i16.
1091     bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
1092 
1093     /// Given an intrinsic, checks if on the target the intrinsic will need to map
1094     /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
1095     /// true and stores the intrinsic information into the IntrinsicInfo that was
1096     /// passed to the function.
1097     bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
1098                             MachineFunction &MF,
1099                             unsigned Intrinsic) const override;
1100 
1101     /// Returns true if the target can instruction select the
1102     /// specified FP immediate natively. If false, the legalizer will
1103     /// materialize the FP immediate as a load from a constant pool.
1104     bool isFPImmLegal(const APFloat &Imm, EVT VT,
1105                       bool ForCodeSize) const override;
1106 
1107     /// Targets can use this to indicate that they only support *some*
1108     /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1109     /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to
1110     /// be legal.
1111     bool isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1112 
1113     /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1114     /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1115     /// constant pool entry.
1116     bool isVectorClearMaskLegal(ArrayRef<int> Mask, EVT VT) const override;
1117 
1118     /// Returns true if lowering to a jump table is allowed.
1119     bool areJTsAllowed(const Function *Fn) const override;
1120 
1121     /// If true, then instruction selection should
1122     /// seek to shrink the FP constant of the specified type to a smaller type
1123     /// in order to save space and / or reduce runtime.
1124     bool ShouldShrinkFPConstant(EVT VT) const override {
1125       // Don't shrink FP constpool if SSE2 is available since cvtss2sd is more
1126       // expensive than a straight movsd. On the other hand, it's important to
1127       // shrink long double fp constant since fldt is very slow.
1128       return !X86ScalarSSEf64 || VT == MVT::f80;
1129     }
1130 
1131     /// Return true if we believe it is correct and profitable to reduce the
1132     /// load node to a smaller type.
1133     bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1134                                EVT NewVT) const override;
1135 
1136     /// Return true if the specified scalar FP type is computed in an SSE
1137     /// register, not on the X87 floating point stack.
1138     bool isScalarFPTypeInSSEReg(EVT VT) const {
1139       return (VT == MVT::f64 && X86ScalarSSEf64) || // f64 is when SSE2
1140              (VT == MVT::f32 && X86ScalarSSEf32);   // f32 is when SSE1
1141     }
1142 
1143     /// Returns true if it is beneficial to convert a load of a constant
1144     /// to just the constant itself.
1145     bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
1146                                            Type *Ty) const override;
1147 
1148     bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const override;
1149 
1150     bool convertSelectOfConstantsToMath(EVT VT) const override;
1151 
1152     bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
1153                                 SDValue C) const override;
1154 
1155     /// Return true if EXTRACT_SUBVECTOR is cheap for this result type
1156     /// with this index.
1157     bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
1158                                  unsigned Index) const override;
1159 
1160     /// Scalar ops always have equal or better analysis/performance/power than
1161     /// the vector equivalent, so this always makes sense if the scalar op is
1162     /// supported.
1163     bool shouldScalarizeBinop(SDValue) const override;
1164 
1165     /// Extract of a scalar FP value from index 0 of a vector is free.
1166     bool isExtractVecEltCheap(EVT VT, unsigned Index) const override {
1167       EVT EltVT = VT.getScalarType();
1168       return (EltVT == MVT::f32 || EltVT == MVT::f64) && Index == 0;
1169     }
1170 
1171     /// Overflow nodes should get combined/lowered to optimal instructions
1172     /// (they should allow eliminating explicit compares by getting flags from
1173     /// math ops).
1174     bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const override;
1175 
1176     bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem,
1177                                       unsigned AddrSpace) const override {
1178       // If we can replace more than 2 scalar stores, there will be a reduction
1179       // in instructions even after we add a vector constant load.
1180       return NumElem > 2;
1181     }
1182 
1183     bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
1184                                  const SelectionDAG &DAG,
1185                                  const MachineMemOperand &MMO) const override;
1186 
1187     /// Intel processors have a unified instruction and data cache
1188     const char * getClearCacheBuiltinName() const override {
1189       return nullptr; // nothing to do, move along.
1190     }
1191 
1192     Register getRegisterByName(const char* RegName, LLT VT,
1193                                const MachineFunction &MF) const override;
1194 
1195     /// If a physical register, this returns the register that receives the
1196     /// exception address on entry to an EH pad.
1197     unsigned
1198     getExceptionPointerRegister(const Constant *PersonalityFn) const override;
1199 
1200     /// If a physical register, this returns the register that receives the
1201     /// exception typeid on entry to a landing pad.
1202     unsigned
1203     getExceptionSelectorRegister(const Constant *PersonalityFn) const override;
1204 
1205     virtual bool needsFixedCatchObjects() const override;
1206 
1207     /// This method returns a target specific FastISel object,
1208     /// or null if the target does not support "fast" ISel.
1209     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1210                              const TargetLibraryInfo *libInfo) const override;
1211 
1212     /// If the target has a standard location for the stack protector cookie,
1213     /// returns the address of that location. Otherwise, returns nullptr.
1214     Value *getIRStackGuard(IRBuilder<> &IRB) const override;
1215 
1216     bool useLoadStackGuardNode() const override;
1217     bool useStackGuardXorFP() const override;
1218     void insertSSPDeclarations(Module &M) const override;
1219     Value *getSDagStackGuard(const Module &M) const override;
1220     Function *getSSPStackGuardCheck(const Module &M) const override;
1221     SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
1222                                 const SDLoc &DL) const override;
1223 
1224 
1225     /// Return true if the target stores SafeStack pointer at a fixed offset in
1226     /// some non-standard address space, and populates the address space and
1227     /// offset as appropriate.
1228     Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const override;
1229 
1230     std::pair<SDValue, SDValue> BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
1231                                           SDValue StackSlot,
1232                                           SelectionDAG &DAG) const;
1233 
1234     bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const override;
1235 
1236     /// Customize the preferred legalization strategy for certain types.
1237     LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
1238 
1239     MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC,
1240                                       EVT VT) const override;
1241 
1242     unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1243                                            CallingConv::ID CC,
1244                                            EVT VT) const override;
1245 
1246     unsigned getVectorTypeBreakdownForCallingConv(
1247         LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
1248         unsigned &NumIntermediates, MVT &RegisterVT) const override;
1249 
1250     bool isIntDivCheap(EVT VT, AttributeList Attr) const override;
1251 
1252     bool supportSwiftError() const override;
1253 
1254     StringRef getStackProbeSymbolName(MachineFunction &MF) const override;
1255 
1256     unsigned getStackProbeSize(MachineFunction &MF) const;
1257 
1258     bool hasVectorBlend() const override { return true; }
1259 
1260     unsigned getMaxSupportedInterleaveFactor() const override { return 4; }
1261 
1262     /// Lower interleaved load(s) into target specific
1263     /// instructions/intrinsics.
1264     bool lowerInterleavedLoad(LoadInst *LI,
1265                               ArrayRef<ShuffleVectorInst *> Shuffles,
1266                               ArrayRef<unsigned> Indices,
1267                               unsigned Factor) const override;
1268 
1269     /// Lower interleaved store(s) into target specific
1270     /// instructions/intrinsics.
1271     bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
1272                                unsigned Factor) const override;
1273 
1274     SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value,
1275                                    SDValue Addr, SelectionDAG &DAG)
1276                                    const override;
1277 
1278   protected:
1279     std::pair<const TargetRegisterClass *, uint8_t>
1280     findRepresentativeClass(const TargetRegisterInfo *TRI,
1281                             MVT VT) const override;
1282 
1283   private:
1284     /// Keep a reference to the X86Subtarget around so that we can
1285     /// make the right decision when generating code for different targets.
1286     const X86Subtarget &Subtarget;
1287 
1288     /// Select between SSE or x87 floating point ops.
1289     /// When SSE is available, use it for f32 operations.
1290     /// When SSE2 is available, use it for f64 operations.
1291     bool X86ScalarSSEf32;
1292     bool X86ScalarSSEf64;
1293 
1294     /// A list of legal FP immediates.
1295     std::vector<APFloat> LegalFPImmediates;
1296 
1297     /// Indicate that this x86 target can instruction
1298     /// select the specified FP immediate natively.
1299     void addLegalFPImmediate(const APFloat& Imm) {
1300       LegalFPImmediates.push_back(Imm);
1301     }
1302 
1303     SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
1304                             CallingConv::ID CallConv, bool isVarArg,
1305                             const SmallVectorImpl<ISD::InputArg> &Ins,
1306                             const SDLoc &dl, SelectionDAG &DAG,
1307                             SmallVectorImpl<SDValue> &InVals,
1308                             uint32_t *RegMask) const;
1309     SDValue LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1310                              const SmallVectorImpl<ISD::InputArg> &ArgInfo,
1311                              const SDLoc &dl, SelectionDAG &DAG,
1312                              const CCValAssign &VA, MachineFrameInfo &MFI,
1313                              unsigned i) const;
1314     SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg,
1315                              const SDLoc &dl, SelectionDAG &DAG,
1316                              const CCValAssign &VA,
1317                              ISD::ArgFlagsTy Flags) const;
1318 
1319     // Call lowering helpers.
1320 
1321     /// Check whether the call is eligible for tail call optimization. Targets
1322     /// that want to do tail call optimization should implement this function.
1323     bool IsEligibleForTailCallOptimization(SDValue Callee,
1324                                            CallingConv::ID CalleeCC,
1325                                            bool isVarArg,
1326                                            bool isCalleeStructRet,
1327                                            bool isCallerStructRet,
1328                                            Type *RetTy,
1329                                     const SmallVectorImpl<ISD::OutputArg> &Outs,
1330                                     const SmallVectorImpl<SDValue> &OutVals,
1331                                     const SmallVectorImpl<ISD::InputArg> &Ins,
1332                                            SelectionDAG& DAG) const;
1333     SDValue EmitTailCallLoadRetAddr(SelectionDAG &DAG, SDValue &OutRetAddr,
1334                                     SDValue Chain, bool IsTailCall,
1335                                     bool Is64Bit, int FPDiff,
1336                                     const SDLoc &dl) const;
1337 
1338     unsigned GetAlignedArgumentStackSize(unsigned StackSize,
1339                                          SelectionDAG &DAG) const;
1340 
1341     unsigned getAddressSpace(void) const;
1342 
1343     SDValue FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool isSigned,
1344                             SDValue &Chain) const;
1345 
1346     SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
1347     SDValue LowerVSELECT(SDValue Op, SelectionDAG &DAG) const;
1348     SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1349     SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
1350 
1351     unsigned getGlobalWrapperKind(const GlobalValue *GV = nullptr,
1352                                   const unsigned char OpFlags = 0) const;
1353     SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
1354     SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
1355     SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
1356     SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
1357     SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const;
1358 
1359     /// Creates target global address or external symbol nodes for calls or
1360     /// other uses.
1361     SDValue LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG,
1362                                   bool ForCall) const;
1363 
1364     SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1365     SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
1366     SDValue LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const;
1367     SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const;
1368     SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
1369     SDValue LowerSTRICT_FSETCC(SDValue Op, SelectionDAG &DAG) const;
1370     SDValue LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const;
1371     SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
1372     SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const;
1373     SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
1374     SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const;
1375     SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const;
1376     SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const;
1377     SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1378     SDValue LowerADDROFRETURNADDR(SDValue Op, SelectionDAG &DAG) const;
1379     SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
1380     SDValue LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const;
1381     SDValue LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const;
1382     SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
1383     SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
1384     SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const;
1385     SDValue LowerINIT_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) const;
1386     SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) const;
1387     SDValue LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const;
1388     SDValue LowerGC_TRANSITION(SDValue Op, SelectionDAG &DAG) const;
1389     SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
1390     SDValue lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const;
1391     SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const;
1392     SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const;
1393 
1394     SDValue LowerF128Call(SDValue Op, SelectionDAG &DAG,
1395                           RTLIB::Libcall Call) const;
1396 
1397     SDValue
1398     LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1399                          const SmallVectorImpl<ISD::InputArg> &Ins,
1400                          const SDLoc &dl, SelectionDAG &DAG,
1401                          SmallVectorImpl<SDValue> &InVals) const override;
1402     SDValue LowerCall(CallLoweringInfo &CLI,
1403                       SmallVectorImpl<SDValue> &InVals) const override;
1404 
1405     SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1406                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1407                         const SmallVectorImpl<SDValue> &OutVals,
1408                         const SDLoc &dl, SelectionDAG &DAG) const override;
1409 
1410     bool supportSplitCSR(MachineFunction *MF) const override {
1411       return MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS &&
1412           MF->getFunction().hasFnAttribute(Attribute::NoUnwind);
1413     }
1414     void initializeSplitCSR(MachineBasicBlock *Entry) const override;
1415     void insertCopiesSplitCSR(
1416       MachineBasicBlock *Entry,
1417       const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
1418 
1419     bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
1420 
1421     bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
1422 
1423     EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
1424                             ISD::NodeType ExtendKind) const override;
1425 
1426     bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
1427                         bool isVarArg,
1428                         const SmallVectorImpl<ISD::OutputArg> &Outs,
1429                         LLVMContext &Context) const override;
1430 
1431     const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
1432 
1433     TargetLoweringBase::AtomicExpansionKind
1434     shouldExpandAtomicLoadInIR(LoadInst *SI) const override;
1435     bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
1436     TargetLoweringBase::AtomicExpansionKind
1437     shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
1438 
1439     LoadInst *
1440     lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const override;
1441 
1442     bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const override;
1443     bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const override;
1444 
1445     bool needsCmpXchgNb(Type *MemType) const;
1446 
1447     void SetupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB,
1448                                 MachineBasicBlock *DispatchBB, int FI) const;
1449 
1450     // Utility function to emit the low-level va_arg code for X86-64.
1451     MachineBasicBlock *
1452     EmitVAARG64WithCustomInserter(MachineInstr &MI,
1453                                   MachineBasicBlock *MBB) const;
1454 
1455     /// Utility function to emit the xmm reg save portion of va_start.
1456     MachineBasicBlock *
1457     EmitVAStartSaveXMMRegsWithCustomInserter(MachineInstr &BInstr,
1458                                              MachineBasicBlock *BB) const;
1459 
1460     MachineBasicBlock *EmitLoweredCascadedSelect(MachineInstr &MI1,
1461                                                  MachineInstr &MI2,
1462                                                  MachineBasicBlock *BB) const;
1463 
1464     MachineBasicBlock *EmitLoweredSelect(MachineInstr &I,
1465                                          MachineBasicBlock *BB) const;
1466 
1467     MachineBasicBlock *EmitLoweredAtomicFP(MachineInstr &I,
1468                                            MachineBasicBlock *BB) const;
1469 
1470     MachineBasicBlock *EmitLoweredCatchRet(MachineInstr &MI,
1471                                            MachineBasicBlock *BB) const;
1472 
1473     MachineBasicBlock *EmitLoweredCatchPad(MachineInstr &MI,
1474                                            MachineBasicBlock *BB) const;
1475 
1476     MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr &MI,
1477                                             MachineBasicBlock *BB) const;
1478 
1479     MachineBasicBlock *EmitLoweredTLSAddr(MachineInstr &MI,
1480                                           MachineBasicBlock *BB) const;
1481 
1482     MachineBasicBlock *EmitLoweredTLSCall(MachineInstr &MI,
1483                                           MachineBasicBlock *BB) const;
1484 
1485     MachineBasicBlock *EmitLoweredRetpoline(MachineInstr &MI,
1486                                             MachineBasicBlock *BB) const;
1487 
1488     MachineBasicBlock *emitEHSjLjSetJmp(MachineInstr &MI,
1489                                         MachineBasicBlock *MBB) const;
1490 
1491     void emitSetJmpShadowStackFix(MachineInstr &MI,
1492                                   MachineBasicBlock *MBB) const;
1493 
1494     MachineBasicBlock *emitEHSjLjLongJmp(MachineInstr &MI,
1495                                          MachineBasicBlock *MBB) const;
1496 
1497     MachineBasicBlock *emitLongJmpShadowStackFix(MachineInstr &MI,
1498                                                  MachineBasicBlock *MBB) const;
1499 
1500     MachineBasicBlock *emitFMA3Instr(MachineInstr &MI,
1501                                      MachineBasicBlock *MBB) const;
1502 
1503     MachineBasicBlock *EmitSjLjDispatchBlock(MachineInstr &MI,
1504                                              MachineBasicBlock *MBB) const;
1505 
1506     /// Convert a comparison if required by the subtarget.
1507     SDValue ConvertCmpIfNecessary(SDValue Cmp, SelectionDAG &DAG) const;
1508 
1509     /// Emit flags for the given setcc condition and operands. Also returns the
1510     /// corresponding X86 condition code constant in X86CC.
1511     SDValue emitFlagsForSetcc(SDValue Op0, SDValue Op1, ISD::CondCode CC,
1512                               const SDLoc &dl, SelectionDAG &DAG,
1513                               SDValue &X86CC, SDValue &Chain,
1514                               bool IsSignaling) const;
1515 
1516     /// Check if replacement of SQRT with RSQRT should be disabled.
1517     bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override;
1518 
1519     /// Use rsqrt* to speed up sqrt calculations.
1520     SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1521                             int &RefinementSteps, bool &UseOneConstNR,
1522                             bool Reciprocal) const override;
1523 
1524     /// Use rcp* to speed up fdiv calculations.
1525     SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, int Enabled,
1526                              int &RefinementSteps) const override;
1527 
1528     /// Reassociate floating point divisions into multiply by reciprocal.
1529     unsigned combineRepeatedFPDivisors() const override;
1530 
1531     SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG,
1532                           SmallVectorImpl<SDNode *> &Created) const override;
1533   };
1534 
1535   namespace X86 {
1536     FastISel *createFastISel(FunctionLoweringInfo &funcInfo,
1537                              const TargetLibraryInfo *libInfo);
1538   } // end namespace X86
1539 
1540   // Base class for all X86 non-masked store operations.
1541   class X86StoreSDNode : public MemSDNode {
1542   public:
1543     X86StoreSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl,
1544                    SDVTList VTs, EVT MemVT,
1545                    MachineMemOperand *MMO)
1546       :MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1547     const SDValue &getValue() const { return getOperand(1); }
1548     const SDValue &getBasePtr() const { return getOperand(2); }
1549 
1550     static bool classof(const SDNode *N) {
1551       return N->getOpcode() == X86ISD::VTRUNCSTORES ||
1552         N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1553     }
1554   };
1555 
1556   // Base class for all X86 masked store operations.
1557   // The class has the same order of operands as MaskedStoreSDNode for
1558   // convenience.
1559   class X86MaskedStoreSDNode : public MemSDNode {
1560   public:
1561     X86MaskedStoreSDNode(unsigned Opcode, unsigned Order,
1562                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1563                          MachineMemOperand *MMO)
1564       : MemSDNode(Opcode, Order, dl, VTs, MemVT, MMO) {}
1565 
1566     const SDValue &getValue()   const { return getOperand(1); }
1567     const SDValue &getBasePtr() const { return getOperand(2); }
1568     const SDValue &getMask()    const { return getOperand(3); }
1569 
1570     static bool classof(const SDNode *N) {
1571       return N->getOpcode() == X86ISD::VMTRUNCSTORES ||
1572         N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1573     }
1574   };
1575 
1576   // X86 Truncating Store with Signed saturation.
1577   class TruncSStoreSDNode : public X86StoreSDNode {
1578   public:
1579     TruncSStoreSDNode(unsigned Order, const DebugLoc &dl,
1580                         SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1581       : X86StoreSDNode(X86ISD::VTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1582 
1583     static bool classof(const SDNode *N) {
1584       return N->getOpcode() == X86ISD::VTRUNCSTORES;
1585     }
1586   };
1587 
1588   // X86 Truncating Store with Unsigned saturation.
1589   class TruncUSStoreSDNode : public X86StoreSDNode {
1590   public:
1591     TruncUSStoreSDNode(unsigned Order, const DebugLoc &dl,
1592                       SDVTList VTs, EVT MemVT, MachineMemOperand *MMO)
1593       : X86StoreSDNode(X86ISD::VTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1594 
1595     static bool classof(const SDNode *N) {
1596       return N->getOpcode() == X86ISD::VTRUNCSTOREUS;
1597     }
1598   };
1599 
1600   // X86 Truncating Masked Store with Signed saturation.
1601   class MaskedTruncSStoreSDNode : public X86MaskedStoreSDNode {
1602   public:
1603     MaskedTruncSStoreSDNode(unsigned Order,
1604                          const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1605                          MachineMemOperand *MMO)
1606       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTORES, Order, dl, VTs, MemVT, MMO) {}
1607 
1608     static bool classof(const SDNode *N) {
1609       return N->getOpcode() == X86ISD::VMTRUNCSTORES;
1610     }
1611   };
1612 
1613   // X86 Truncating Masked Store with Unsigned saturation.
1614   class MaskedTruncUSStoreSDNode : public X86MaskedStoreSDNode {
1615   public:
1616     MaskedTruncUSStoreSDNode(unsigned Order,
1617                             const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1618                             MachineMemOperand *MMO)
1619       : X86MaskedStoreSDNode(X86ISD::VMTRUNCSTOREUS, Order, dl, VTs, MemVT, MMO) {}
1620 
1621     static bool classof(const SDNode *N) {
1622       return N->getOpcode() == X86ISD::VMTRUNCSTOREUS;
1623     }
1624   };
1625 
1626   // X86 specific Gather/Scatter nodes.
1627   // The class has the same order of operands as MaskedGatherScatterSDNode for
1628   // convenience.
1629   class X86MaskedGatherScatterSDNode : public MemSDNode {
1630   public:
1631     X86MaskedGatherScatterSDNode(unsigned Opc, unsigned Order,
1632                                  const DebugLoc &dl, SDVTList VTs, EVT MemVT,
1633                                  MachineMemOperand *MMO)
1634         : MemSDNode(Opc, Order, dl, VTs, MemVT, MMO) {}
1635 
1636     const SDValue &getBasePtr() const { return getOperand(3); }
1637     const SDValue &getIndex()   const { return getOperand(4); }
1638     const SDValue &getMask()    const { return getOperand(2); }
1639     const SDValue &getScale()   const { return getOperand(5); }
1640 
1641     static bool classof(const SDNode *N) {
1642       return N->getOpcode() == X86ISD::MGATHER ||
1643              N->getOpcode() == X86ISD::MSCATTER;
1644     }
1645   };
1646 
1647   class X86MaskedGatherSDNode : public X86MaskedGatherScatterSDNode {
1648   public:
1649     X86MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1650                           EVT MemVT, MachineMemOperand *MMO)
1651         : X86MaskedGatherScatterSDNode(X86ISD::MGATHER, Order, dl, VTs, MemVT,
1652                                        MMO) {}
1653 
1654     const SDValue &getPassThru() const { return getOperand(1); }
1655 
1656     static bool classof(const SDNode *N) {
1657       return N->getOpcode() == X86ISD::MGATHER;
1658     }
1659   };
1660 
1661   class X86MaskedScatterSDNode : public X86MaskedGatherScatterSDNode {
1662   public:
1663     X86MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs,
1664                            EVT MemVT, MachineMemOperand *MMO)
1665         : X86MaskedGatherScatterSDNode(X86ISD::MSCATTER, Order, dl, VTs, MemVT,
1666                                        MMO) {}
1667 
1668     const SDValue &getValue() const { return getOperand(1); }
1669 
1670     static bool classof(const SDNode *N) {
1671       return N->getOpcode() == X86ISD::MSCATTER;
1672     }
1673   };
1674 
1675   /// Generate unpacklo/unpackhi shuffle mask.
1676   template <typename T = int>
1677   void createUnpackShuffleMask(MVT VT, SmallVectorImpl<T> &Mask, bool Lo,
1678                                bool Unary) {
1679     assert(Mask.empty() && "Expected an empty shuffle mask vector");
1680     int NumElts = VT.getVectorNumElements();
1681     int NumEltsInLane = 128 / VT.getScalarSizeInBits();
1682     for (int i = 0; i < NumElts; ++i) {
1683       unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane;
1684       int Pos = (i % NumEltsInLane) / 2 + LaneStart;
1685       Pos += (Unary ? 0 : NumElts * (i % 2));
1686       Pos += (Lo ? 0 : NumEltsInLane / 2);
1687       Mask.push_back(Pos);
1688     }
1689   }
1690 
1691   /// Helper function to scale a shuffle or target shuffle mask, replacing each
1692   /// mask index with the scaled sequential indices for an equivalent narrowed
1693   /// mask. This is the reverse process to canWidenShuffleElements, but can
1694   /// always succeed.
1695   template <typename T>
1696   void scaleShuffleMask(size_t Scale, ArrayRef<T> Mask,
1697                         SmallVectorImpl<T> &ScaledMask) {
1698     assert(0 < Scale && "Unexpected scaling factor");
1699     size_t NumElts = Mask.size();
1700     ScaledMask.assign(NumElts * Scale, -1);
1701 
1702     for (size_t i = 0; i != NumElts; ++i) {
1703       int M = Mask[i];
1704 
1705       // Repeat sentinel values in every mask element.
1706       if (M < 0) {
1707         for (size_t s = 0; s != Scale; ++s)
1708           ScaledMask[(Scale * i) + s] = M;
1709         continue;
1710       }
1711 
1712       // Scale mask element and increment across each mask element.
1713       for (size_t s = 0; s != Scale; ++s)
1714         ScaledMask[(Scale * i) + s] = (Scale * M) + s;
1715     }
1716   }
1717 } // end namespace llvm
1718 
1719 #endif // LLVM_LIB_TARGET_X86_X86ISELLOWERING_H
1720