xref: /freebsd/contrib/llvm-project/llvm/lib/Target/X86/GISel/X86InstructionSelector.cpp (revision 7fdf597e96a02165cfe22ff357b857d5fa15ed8a)
1 //===- X86InstructionSelector.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 /// \file
9 /// This file implements the targeting of the InstructionSelector class for
10 /// X86.
11 /// \todo This should be generated by TableGen.
12 //===----------------------------------------------------------------------===//
13 
14 #include "MCTargetDesc/X86BaseInfo.h"
15 #include "X86.h"
16 #include "X86InstrBuilder.h"
17 #include "X86InstrInfo.h"
18 #include "X86RegisterBankInfo.h"
19 #include "X86RegisterInfo.h"
20 #include "X86Subtarget.h"
21 #include "X86TargetMachine.h"
22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
25 #include "llvm/CodeGen/GlobalISel/Utils.h"
26 #include "llvm/CodeGen/MachineBasicBlock.h"
27 #include "llvm/CodeGen/MachineConstantPool.h"
28 #include "llvm/CodeGen/MachineFunction.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineMemOperand.h"
32 #include "llvm/CodeGen/MachineOperand.h"
33 #include "llvm/CodeGen/MachineRegisterInfo.h"
34 #include "llvm/CodeGen/RegisterBank.h"
35 #include "llvm/CodeGen/TargetOpcodes.h"
36 #include "llvm/CodeGen/TargetRegisterInfo.h"
37 #include "llvm/CodeGenTypes/LowLevelType.h"
38 #include "llvm/IR/DataLayout.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/IntrinsicsX86.h"
41 #include "llvm/Support/AtomicOrdering.h"
42 #include "llvm/Support/CodeGen.h"
43 #include "llvm/Support/Debug.h"
44 #include "llvm/Support/ErrorHandling.h"
45 #include "llvm/Support/MathExtras.h"
46 #include "llvm/Support/raw_ostream.h"
47 #include <cassert>
48 #include <cstdint>
49 #include <tuple>
50 
51 #define DEBUG_TYPE "X86-isel"
52 
53 using namespace llvm;
54 
55 namespace {
56 
57 #define GET_GLOBALISEL_PREDICATE_BITSET
58 #include "X86GenGlobalISel.inc"
59 #undef GET_GLOBALISEL_PREDICATE_BITSET
60 
61 class X86InstructionSelector : public InstructionSelector {
62 public:
63   X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI,
64                          const X86RegisterBankInfo &RBI);
65 
66   bool select(MachineInstr &I) override;
67   static const char *getName() { return DEBUG_TYPE; }
68 
69 private:
70   /// tblgen-erated 'select' implementation, used as the initial selector for
71   /// the patterns that don't require complex C++.
72   bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const;
73 
74   // TODO: remove after supported by Tablegen-erated instruction selection.
75   unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc,
76                           Align Alignment) const;
77 
78   bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI,
79                          MachineFunction &MF) const;
80   bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI,
81                              MachineFunction &MF) const;
82   bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI,
83                          MachineFunction &MF) const;
84   bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI,
85                       MachineFunction &MF) const;
86   bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI,
87                              MachineFunction &MF) const;
88   bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI,
89                   MachineFunction &MF) const;
90   bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI,
91                     MachineFunction &MF) const;
92   bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI,
93                  MachineFunction &MF) const;
94   bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI,
95                   MachineFunction &MF) const;
96   bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI,
97                      MachineFunction &MF) const;
98   bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const;
99   bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const;
100   bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
101                            MachineFunction &MF);
102   bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI,
103                          MachineFunction &MF);
104   bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI,
105                     MachineFunction &MF) const;
106   bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI,
107                      MachineFunction &MF) const;
108   bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI,
109                         MachineFunction &MF) const;
110   bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI,
111                           const unsigned DstReg,
112                           const TargetRegisterClass *DstRC,
113                           const unsigned SrcReg,
114                           const TargetRegisterClass *SrcRC) const;
115   bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI,
116                      MachineFunction &MF) const;
117   bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const;
118   bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI,
119                        MachineFunction &MF) const;
120   bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI,
121                     MachineFunction &MF) const;
122 
123   // emit insert subreg instruction and insert it before MachineInstr &I
124   bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
125                         MachineRegisterInfo &MRI, MachineFunction &MF) const;
126   // emit extract subreg instruction and insert it before MachineInstr &I
127   bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I,
128                          MachineRegisterInfo &MRI, MachineFunction &MF) const;
129 
130   const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const;
131   const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg,
132                                          MachineRegisterInfo &MRI) const;
133 
134   const X86TargetMachine &TM;
135   const X86Subtarget &STI;
136   const X86InstrInfo &TII;
137   const X86RegisterInfo &TRI;
138   const X86RegisterBankInfo &RBI;
139 
140 #define GET_GLOBALISEL_PREDICATES_DECL
141 #include "X86GenGlobalISel.inc"
142 #undef GET_GLOBALISEL_PREDICATES_DECL
143 
144 #define GET_GLOBALISEL_TEMPORARIES_DECL
145 #include "X86GenGlobalISel.inc"
146 #undef GET_GLOBALISEL_TEMPORARIES_DECL
147 };
148 
149 } // end anonymous namespace
150 
151 #define GET_GLOBALISEL_IMPL
152 #include "X86GenGlobalISel.inc"
153 #undef GET_GLOBALISEL_IMPL
154 
155 X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM,
156                                                const X86Subtarget &STI,
157                                                const X86RegisterBankInfo &RBI)
158     : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()),
159       RBI(RBI),
160 #define GET_GLOBALISEL_PREDICATES_INIT
161 #include "X86GenGlobalISel.inc"
162 #undef GET_GLOBALISEL_PREDICATES_INIT
163 #define GET_GLOBALISEL_TEMPORARIES_INIT
164 #include "X86GenGlobalISel.inc"
165 #undef GET_GLOBALISEL_TEMPORARIES_INIT
166 {
167 }
168 
169 // FIXME: This should be target-independent, inferred from the types declared
170 // for each class in the bank.
171 const TargetRegisterClass *
172 X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const {
173   if (RB.getID() == X86::GPRRegBankID) {
174     if (Ty.getSizeInBits() <= 8)
175       return &X86::GR8RegClass;
176     if (Ty.getSizeInBits() == 16)
177       return &X86::GR16RegClass;
178     if (Ty.getSizeInBits() == 32)
179       return &X86::GR32RegClass;
180     if (Ty.getSizeInBits() == 64)
181       return &X86::GR64RegClass;
182   }
183   if (RB.getID() == X86::VECRRegBankID) {
184     if (Ty.getSizeInBits() == 16)
185       return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
186     if (Ty.getSizeInBits() == 32)
187       return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
188     if (Ty.getSizeInBits() == 64)
189       return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
190     if (Ty.getSizeInBits() == 128)
191       return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass;
192     if (Ty.getSizeInBits() == 256)
193       return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass;
194     if (Ty.getSizeInBits() == 512)
195       return &X86::VR512RegClass;
196   }
197 
198   if (RB.getID() == X86::PSRRegBankID) {
199     if (Ty.getSizeInBits() == 80)
200       return &X86::RFP80RegClass;
201     if (Ty.getSizeInBits() == 64)
202       return &X86::RFP64RegClass;
203     if (Ty.getSizeInBits() == 32)
204       return &X86::RFP32RegClass;
205   }
206 
207   llvm_unreachable("Unknown RegBank!");
208 }
209 
210 const TargetRegisterClass *
211 X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg,
212                                     MachineRegisterInfo &MRI) const {
213   const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI);
214   return getRegClass(Ty, RegBank);
215 }
216 
217 static unsigned getSubRegIndex(const TargetRegisterClass *RC) {
218   unsigned SubIdx = X86::NoSubRegister;
219   if (RC == &X86::GR32RegClass) {
220     SubIdx = X86::sub_32bit;
221   } else if (RC == &X86::GR16RegClass) {
222     SubIdx = X86::sub_16bit;
223   } else if (RC == &X86::GR8RegClass) {
224     SubIdx = X86::sub_8bit;
225   }
226 
227   return SubIdx;
228 }
229 
230 static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) {
231   assert(Reg.isPhysical());
232   if (X86::GR64RegClass.contains(Reg))
233     return &X86::GR64RegClass;
234   if (X86::GR32RegClass.contains(Reg))
235     return &X86::GR32RegClass;
236   if (X86::GR16RegClass.contains(Reg))
237     return &X86::GR16RegClass;
238   if (X86::GR8RegClass.contains(Reg))
239     return &X86::GR8RegClass;
240 
241   llvm_unreachable("Unknown RegClass for PhysReg!");
242 }
243 
244 // FIXME: We need some sort of API in RBI/TRI to allow generic code to
245 // constrain operands of simple instructions given a TargetRegisterClass
246 // and LLT
247 bool X86InstructionSelector::selectDebugInstr(MachineInstr &I,
248                                               MachineRegisterInfo &MRI) const {
249   for (MachineOperand &MO : I.operands()) {
250     if (!MO.isReg())
251       continue;
252     Register Reg = MO.getReg();
253     if (!Reg)
254       continue;
255     if (Reg.isPhysical())
256       continue;
257     LLT Ty = MRI.getType(Reg);
258     const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
259     const TargetRegisterClass *RC =
260         dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank);
261     if (!RC) {
262       const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank);
263       RC = getRegClass(Ty, RB);
264       if (!RC) {
265         LLVM_DEBUG(
266             dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n");
267         break;
268       }
269     }
270     RBI.constrainGenericRegister(Reg, *RC, MRI);
271   }
272 
273   return true;
274 }
275 
276 // Set X86 Opcode and constrain DestReg.
277 bool X86InstructionSelector::selectCopy(MachineInstr &I,
278                                         MachineRegisterInfo &MRI) const {
279   Register DstReg = I.getOperand(0).getReg();
280   const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI);
281   const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI);
282 
283   Register SrcReg = I.getOperand(1).getReg();
284   const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
285   const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
286 
287   if (DstReg.isPhysical()) {
288     assert(I.isCopy() && "Generic operators do not allow physical registers");
289 
290     if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID &&
291         DstRegBank.getID() == X86::GPRRegBankID) {
292 
293       const TargetRegisterClass *SrcRC =
294           getRegClass(MRI.getType(SrcReg), SrcRegBank);
295       const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg);
296 
297       if (SrcRC != DstRC) {
298         // This case can be generated by ABI lowering, performe anyext
299         Register ExtSrc = MRI.createVirtualRegister(DstRC);
300         BuildMI(*I.getParent(), I, I.getDebugLoc(),
301                 TII.get(TargetOpcode::SUBREG_TO_REG))
302             .addDef(ExtSrc)
303             .addImm(0)
304             .addReg(SrcReg)
305             .addImm(getSubRegIndex(SrcRC));
306 
307         I.getOperand(1).setReg(ExtSrc);
308       }
309     }
310 
311     return true;
312   }
313 
314   assert((!SrcReg.isPhysical() || I.isCopy()) &&
315          "No phys reg on generic operators");
316   assert((DstSize == SrcSize ||
317           // Copies are a mean to setup initial types, the number of
318           // bits may not exactly match.
319           (SrcReg.isPhysical() &&
320            DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) &&
321          "Copy with different width?!");
322 
323   const TargetRegisterClass *DstRC =
324       getRegClass(MRI.getType(DstReg), DstRegBank);
325 
326   if (SrcRegBank.getID() == X86::GPRRegBankID &&
327       DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize &&
328       SrcReg.isPhysical()) {
329     // Change the physical register to performe truncate.
330 
331     const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg);
332 
333     if (DstRC != SrcRC) {
334       I.getOperand(1).setSubReg(getSubRegIndex(DstRC));
335       I.getOperand(1).substPhysReg(SrcReg, TRI);
336     }
337   }
338 
339   // No need to constrain SrcReg. It will get constrained when
340   // we hit another of its use or its defs.
341   // Copies do not have constraints.
342   const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg);
343   if (!OldRC || !DstRC->hasSubClassEq(OldRC)) {
344     if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
345       LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
346                         << " operand\n");
347       return false;
348     }
349   }
350   I.setDesc(TII.get(X86::COPY));
351   return true;
352 }
353 
354 bool X86InstructionSelector::select(MachineInstr &I) {
355   assert(I.getParent() && "Instruction should be in a basic block!");
356   assert(I.getParent()->getParent() && "Instruction should be in a function!");
357 
358   MachineBasicBlock &MBB = *I.getParent();
359   MachineFunction &MF = *MBB.getParent();
360   MachineRegisterInfo &MRI = MF.getRegInfo();
361 
362   unsigned Opcode = I.getOpcode();
363   if (!isPreISelGenericOpcode(Opcode)) {
364     // Certain non-generic instructions also need some special handling.
365 
366     if (Opcode == TargetOpcode::LOAD_STACK_GUARD)
367       return false;
368 
369     if (I.isCopy())
370       return selectCopy(I, MRI);
371 
372     if (I.isDebugInstr())
373       return selectDebugInstr(I, MRI);
374 
375     return true;
376   }
377 
378   assert(I.getNumOperands() == I.getNumExplicitOperands() &&
379          "Generic instruction has unexpected implicit operands\n");
380 
381   if (selectImpl(I, *CoverageInfo))
382     return true;
383 
384   LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs()));
385 
386   // TODO: This should be implemented by tblgen.
387   switch (I.getOpcode()) {
388   default:
389     return false;
390   case TargetOpcode::G_STORE:
391   case TargetOpcode::G_LOAD:
392     return selectLoadStoreOp(I, MRI, MF);
393   case TargetOpcode::G_PTR_ADD:
394   case TargetOpcode::G_FRAME_INDEX:
395     return selectFrameIndexOrGep(I, MRI, MF);
396   case TargetOpcode::G_GLOBAL_VALUE:
397     return selectGlobalValue(I, MRI, MF);
398   case TargetOpcode::G_CONSTANT:
399     return selectConstant(I, MRI, MF);
400   case TargetOpcode::G_FCONSTANT:
401     return materializeFP(I, MRI, MF);
402   case TargetOpcode::G_PTRTOINT:
403   case TargetOpcode::G_TRUNC:
404     return selectTruncOrPtrToInt(I, MRI, MF);
405   case TargetOpcode::G_INTTOPTR:
406     return selectCopy(I, MRI);
407   case TargetOpcode::G_ZEXT:
408     return selectZext(I, MRI, MF);
409   case TargetOpcode::G_ANYEXT:
410     return selectAnyext(I, MRI, MF);
411   case TargetOpcode::G_ICMP:
412     return selectCmp(I, MRI, MF);
413   case TargetOpcode::G_FCMP:
414     return selectFCmp(I, MRI, MF);
415   case TargetOpcode::G_UADDE:
416   case TargetOpcode::G_UADDO:
417   case TargetOpcode::G_USUBE:
418   case TargetOpcode::G_USUBO:
419     return selectUAddSub(I, MRI, MF);
420   case TargetOpcode::G_UNMERGE_VALUES:
421     return selectUnmergeValues(I, MRI, MF);
422   case TargetOpcode::G_MERGE_VALUES:
423   case TargetOpcode::G_CONCAT_VECTORS:
424     return selectMergeValues(I, MRI, MF);
425   case TargetOpcode::G_EXTRACT:
426     return selectExtract(I, MRI, MF);
427   case TargetOpcode::G_INSERT:
428     return selectInsert(I, MRI, MF);
429   case TargetOpcode::G_BRCOND:
430     return selectCondBranch(I, MRI, MF);
431   case TargetOpcode::G_IMPLICIT_DEF:
432   case TargetOpcode::G_PHI:
433     return selectImplicitDefOrPHI(I, MRI);
434   case TargetOpcode::G_MUL:
435   case TargetOpcode::G_SMULH:
436   case TargetOpcode::G_UMULH:
437   case TargetOpcode::G_SDIV:
438   case TargetOpcode::G_UDIV:
439   case TargetOpcode::G_SREM:
440   case TargetOpcode::G_UREM:
441     return selectMulDivRem(I, MRI, MF);
442   case TargetOpcode::G_SELECT:
443     return selectSelect(I, MRI, MF);
444   }
445 
446   return false;
447 }
448 
449 unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty,
450                                                 const RegisterBank &RB,
451                                                 unsigned Opc,
452                                                 Align Alignment) const {
453   bool Isload = (Opc == TargetOpcode::G_LOAD);
454   bool HasAVX = STI.hasAVX();
455   bool HasAVX512 = STI.hasAVX512();
456   bool HasVLX = STI.hasVLX();
457 
458   if (Ty == LLT::scalar(8)) {
459     if (X86::GPRRegBankID == RB.getID())
460       return Isload ? X86::MOV8rm : X86::MOV8mr;
461   } else if (Ty == LLT::scalar(16)) {
462     if (X86::GPRRegBankID == RB.getID())
463       return Isload ? X86::MOV16rm : X86::MOV16mr;
464   } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) {
465     if (X86::GPRRegBankID == RB.getID())
466       return Isload ? X86::MOV32rm : X86::MOV32mr;
467     if (X86::VECRRegBankID == RB.getID())
468       return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt :
469                        HasAVX    ? X86::VMOVSSrm_alt :
470                                    X86::MOVSSrm_alt)
471                     : (HasAVX512 ? X86::VMOVSSZmr :
472                        HasAVX    ? X86::VMOVSSmr :
473                                    X86::MOVSSmr);
474     if (X86::PSRRegBankID == RB.getID())
475       return Isload ? X86::LD_Fp32m : X86::ST_Fp32m;
476   } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) {
477     if (X86::GPRRegBankID == RB.getID())
478       return Isload ? X86::MOV64rm : X86::MOV64mr;
479     if (X86::VECRRegBankID == RB.getID())
480       return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt :
481                        HasAVX    ? X86::VMOVSDrm_alt :
482                                    X86::MOVSDrm_alt)
483                     : (HasAVX512 ? X86::VMOVSDZmr :
484                        HasAVX    ? X86::VMOVSDmr :
485                                    X86::MOVSDmr);
486     if (X86::PSRRegBankID == RB.getID())
487       return Isload ? X86::LD_Fp64m : X86::ST_Fp64m;
488   } else if (Ty == LLT::scalar(80)) {
489     return Isload ? X86::LD_Fp80m : X86::ST_FpP80m;
490   } else if (Ty.isVector() && Ty.getSizeInBits() == 128) {
491     if (Alignment >= Align(16))
492       return Isload ? (HasVLX ? X86::VMOVAPSZ128rm
493                               : HasAVX512
494                                     ? X86::VMOVAPSZ128rm_NOVLX
495                                     : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm)
496                     : (HasVLX ? X86::VMOVAPSZ128mr
497                               : HasAVX512
498                                     ? X86::VMOVAPSZ128mr_NOVLX
499                                     : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr);
500     else
501       return Isload ? (HasVLX ? X86::VMOVUPSZ128rm
502                               : HasAVX512
503                                     ? X86::VMOVUPSZ128rm_NOVLX
504                                     : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm)
505                     : (HasVLX ? X86::VMOVUPSZ128mr
506                               : HasAVX512
507                                     ? X86::VMOVUPSZ128mr_NOVLX
508                                     : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr);
509   } else if (Ty.isVector() && Ty.getSizeInBits() == 256) {
510     if (Alignment >= Align(32))
511       return Isload ? (HasVLX ? X86::VMOVAPSZ256rm
512                               : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX
513                                           : X86::VMOVAPSYrm)
514                     : (HasVLX ? X86::VMOVAPSZ256mr
515                               : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX
516                                           : X86::VMOVAPSYmr);
517     else
518       return Isload ? (HasVLX ? X86::VMOVUPSZ256rm
519                               : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX
520                                           : X86::VMOVUPSYrm)
521                     : (HasVLX ? X86::VMOVUPSZ256mr
522                               : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX
523                                           : X86::VMOVUPSYmr);
524   } else if (Ty.isVector() && Ty.getSizeInBits() == 512) {
525     if (Alignment >= Align(64))
526       return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr;
527     else
528       return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr;
529   }
530   return Opc;
531 }
532 
533 // Fill in an address from the given instruction.
534 static void X86SelectAddress(const MachineInstr &I,
535                              const MachineRegisterInfo &MRI,
536                              X86AddressMode &AM) {
537   assert(I.getOperand(0).isReg() && "unsupported opperand.");
538   assert(MRI.getType(I.getOperand(0).getReg()).isPointer() &&
539          "unsupported type.");
540 
541   if (I.getOpcode() == TargetOpcode::G_PTR_ADD) {
542     if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) {
543       int64_t Imm = *COff;
544       if (isInt<32>(Imm)) { // Check for displacement overflow.
545         AM.Disp = static_cast<int32_t>(Imm);
546         AM.Base.Reg = I.getOperand(1).getReg();
547         return;
548       }
549     }
550   } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
551     AM.Base.FrameIndex = I.getOperand(1).getIndex();
552     AM.BaseType = X86AddressMode::FrameIndexBase;
553     return;
554   }
555 
556   // Default behavior.
557   AM.Base.Reg = I.getOperand(0).getReg();
558 }
559 
560 bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I,
561                                                MachineRegisterInfo &MRI,
562                                                MachineFunction &MF) const {
563   unsigned Opc = I.getOpcode();
564 
565   assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) &&
566          "Only G_STORE and G_LOAD are expected for selection");
567 
568   const Register DefReg = I.getOperand(0).getReg();
569   LLT Ty = MRI.getType(DefReg);
570   const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI);
571 
572   assert(I.hasOneMemOperand());
573   auto &MemOp = **I.memoperands_begin();
574   if (MemOp.isAtomic()) {
575     // Note: for unordered operations, we rely on the fact the appropriate MMO
576     // is already on the instruction we're mutating, and thus we don't need to
577     // make any changes.  So long as we select an opcode which is capable of
578     // loading or storing the appropriate size atomically, the rest of the
579     // backend is required to respect the MMO state.
580     if (!MemOp.isUnordered()) {
581       LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n");
582       return false;
583     }
584     if (MemOp.getAlign() < Ty.getSizeInBits() / 8) {
585       LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n");
586       return false;
587     }
588   }
589 
590   unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign());
591   if (NewOpc == Opc)
592     return false;
593 
594   I.setDesc(TII.get(NewOpc));
595   MachineInstrBuilder MIB(MF, I);
596   const MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg());
597 
598   if (Ptr->getOpcode() == TargetOpcode::G_CONSTANT_POOL) {
599     assert(Opc == TargetOpcode::G_LOAD &&
600            "Only G_LOAD from constant pool is expected");
601     // TODO: Need a separate move for Large model
602     if (TM.getCodeModel() == CodeModel::Large)
603       return false;
604 
605     unsigned char OpFlag = STI.classifyLocalReference(nullptr);
606     unsigned PICBase = 0;
607     if (OpFlag == X86II::MO_GOTOFF)
608       PICBase = TII.getGlobalBaseReg(&MF);
609     else if (STI.is64Bit())
610       PICBase = X86::RIP;
611 
612     I.removeOperand(1);
613     addConstantPoolReference(MIB, Ptr->getOperand(1).getIndex(), PICBase,
614                              OpFlag);
615     return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
616   }
617 
618   X86AddressMode AM;
619   X86SelectAddress(*Ptr, MRI, AM);
620   if (Opc == TargetOpcode::G_LOAD) {
621     I.removeOperand(1);
622     addFullAddress(MIB, AM);
623   } else {
624     // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL)
625     I.removeOperand(1);
626     I.removeOperand(0);
627     addFullAddress(MIB, AM).addUse(DefReg);
628   }
629   bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI);
630   I.addImplicitDefUseOperands(MF);
631   return Constrained;
632 }
633 
634 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) {
635   if (Ty == LLT::pointer(0, 64))
636     return X86::LEA64r;
637   else if (Ty == LLT::pointer(0, 32))
638     return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r;
639   else
640     llvm_unreachable("Can't get LEA opcode. Unsupported type.");
641 }
642 
643 bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I,
644                                                    MachineRegisterInfo &MRI,
645                                                    MachineFunction &MF) const {
646   unsigned Opc = I.getOpcode();
647 
648   assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) &&
649          "unexpected instruction");
650 
651   const Register DefReg = I.getOperand(0).getReg();
652   LLT Ty = MRI.getType(DefReg);
653 
654   // Use LEA to calculate frame index and GEP
655   unsigned NewOpc = getLeaOP(Ty, STI);
656   I.setDesc(TII.get(NewOpc));
657   MachineInstrBuilder MIB(MF, I);
658 
659   if (Opc == TargetOpcode::G_FRAME_INDEX) {
660     addOffset(MIB, 0);
661   } else {
662     MachineOperand &InxOp = I.getOperand(2);
663     I.addOperand(InxOp);        // set IndexReg
664     InxOp.ChangeToImmediate(1); // set Scale
665     MIB.addImm(0).addReg(0);
666   }
667 
668   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
669 }
670 
671 bool X86InstructionSelector::selectGlobalValue(MachineInstr &I,
672                                                MachineRegisterInfo &MRI,
673                                                MachineFunction &MF) const {
674   assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) &&
675          "unexpected instruction");
676 
677   auto GV = I.getOperand(1).getGlobal();
678   if (GV->isThreadLocal()) {
679     return false; // TODO: we don't support TLS yet.
680   }
681 
682   // Can't handle alternate code models yet.
683   if (TM.getCodeModel() != CodeModel::Small)
684     return false;
685 
686   X86AddressMode AM;
687   AM.GV = GV;
688   AM.GVOpFlags = STI.classifyGlobalReference(GV);
689 
690   // TODO: The ABI requires an extra load. not supported yet.
691   if (isGlobalStubReference(AM.GVOpFlags))
692     return false;
693 
694   // TODO: This reference is relative to the pic base. not supported yet.
695   if (isGlobalRelativeToPICBase(AM.GVOpFlags))
696     return false;
697 
698   if (STI.isPICStyleRIPRel()) {
699     // Use rip-relative addressing.
700     assert(AM.Base.Reg == 0 && AM.IndexReg == 0);
701     AM.Base.Reg = X86::RIP;
702   }
703 
704   const Register DefReg = I.getOperand(0).getReg();
705   LLT Ty = MRI.getType(DefReg);
706   unsigned NewOpc = getLeaOP(Ty, STI);
707 
708   I.setDesc(TII.get(NewOpc));
709   MachineInstrBuilder MIB(MF, I);
710 
711   I.removeOperand(1);
712   addFullAddress(MIB, AM);
713 
714   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
715 }
716 
717 bool X86InstructionSelector::selectConstant(MachineInstr &I,
718                                             MachineRegisterInfo &MRI,
719                                             MachineFunction &MF) const {
720   assert((I.getOpcode() == TargetOpcode::G_CONSTANT) &&
721          "unexpected instruction");
722 
723   const Register DefReg = I.getOperand(0).getReg();
724   LLT Ty = MRI.getType(DefReg);
725 
726   if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID)
727     return false;
728 
729   uint64_t Val = 0;
730   if (I.getOperand(1).isCImm()) {
731     Val = I.getOperand(1).getCImm()->getZExtValue();
732     I.getOperand(1).ChangeToImmediate(Val);
733   } else if (I.getOperand(1).isImm()) {
734     Val = I.getOperand(1).getImm();
735   } else
736     llvm_unreachable("Unsupported operand type.");
737 
738   unsigned NewOpc;
739   switch (Ty.getSizeInBits()) {
740   case 8:
741     NewOpc = X86::MOV8ri;
742     break;
743   case 16:
744     NewOpc = X86::MOV16ri;
745     break;
746   case 32:
747     NewOpc = X86::MOV32ri;
748     break;
749   case 64:
750     // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used
751     if (isInt<32>(Val))
752       NewOpc = X86::MOV64ri32;
753     else
754       NewOpc = X86::MOV64ri;
755     break;
756   default:
757     llvm_unreachable("Can't select G_CONSTANT, unsupported type.");
758   }
759 
760   I.setDesc(TII.get(NewOpc));
761   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
762 }
763 
764 // Helper function for selectTruncOrPtrToInt and selectAnyext.
765 // Returns true if DstRC lives on a floating register class and
766 // SrcRC lives on a 128-bit vector class.
767 static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC,
768                             const TargetRegisterClass *SrcRC) {
769   return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass ||
770           DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) &&
771          (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass);
772 }
773 
774 bool X86InstructionSelector::selectTurnIntoCOPY(
775     MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg,
776     const TargetRegisterClass *DstRC, const unsigned SrcReg,
777     const TargetRegisterClass *SrcRC) const {
778 
779   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
780       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
781     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
782                       << " operand\n");
783     return false;
784   }
785   I.setDesc(TII.get(X86::COPY));
786   return true;
787 }
788 
789 bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I,
790                                                    MachineRegisterInfo &MRI,
791                                                    MachineFunction &MF) const {
792   assert((I.getOpcode() == TargetOpcode::G_TRUNC ||
793           I.getOpcode() == TargetOpcode::G_PTRTOINT) &&
794          "unexpected instruction");
795 
796   const Register DstReg = I.getOperand(0).getReg();
797   const Register SrcReg = I.getOperand(1).getReg();
798 
799   const LLT DstTy = MRI.getType(DstReg);
800   const LLT SrcTy = MRI.getType(SrcReg);
801 
802   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
803   const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
804 
805   if (DstRB.getID() != SrcRB.getID()) {
806     LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode())
807                       << " input/output on different banks\n");
808     return false;
809   }
810 
811   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
812   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
813 
814   if (!DstRC || !SrcRC)
815     return false;
816 
817   // If that's truncation of the value that lives on the vector class and goes
818   // into the floating class, just replace it with copy, as we are able to
819   // select it as a regular move.
820   if (canTurnIntoCOPY(DstRC, SrcRC))
821     return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC);
822 
823   if (DstRB.getID() != X86::GPRRegBankID)
824     return false;
825 
826   unsigned SubIdx;
827   if (DstRC == SrcRC) {
828     // Nothing to be done
829     SubIdx = X86::NoSubRegister;
830   } else if (DstRC == &X86::GR32RegClass) {
831     SubIdx = X86::sub_32bit;
832   } else if (DstRC == &X86::GR16RegClass) {
833     SubIdx = X86::sub_16bit;
834   } else if (DstRC == &X86::GR8RegClass) {
835     SubIdx = X86::sub_8bit;
836   } else {
837     return false;
838   }
839 
840   SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
841 
842   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
843       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
844     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
845                       << "\n");
846     return false;
847   }
848 
849   I.getOperand(1).setSubReg(SubIdx);
850 
851   I.setDesc(TII.get(X86::COPY));
852   return true;
853 }
854 
855 bool X86InstructionSelector::selectZext(MachineInstr &I,
856                                         MachineRegisterInfo &MRI,
857                                         MachineFunction &MF) const {
858   assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction");
859 
860   const Register DstReg = I.getOperand(0).getReg();
861   const Register SrcReg = I.getOperand(1).getReg();
862 
863   const LLT DstTy = MRI.getType(DstReg);
864   const LLT SrcTy = MRI.getType(SrcReg);
865 
866   assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) &&
867          "8=>16 Zext is handled by tablegen");
868   assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) &&
869          "8=>32 Zext is handled by tablegen");
870   assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) &&
871          "16=>32 Zext is handled by tablegen");
872   assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) &&
873          "8=>64 Zext is handled by tablegen");
874   assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) &&
875          "16=>64 Zext is handled by tablegen");
876   assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) &&
877          "32=>64 Zext is handled by tablegen");
878 
879   if (SrcTy != LLT::scalar(1))
880     return false;
881 
882   unsigned AndOpc;
883   if (DstTy == LLT::scalar(8))
884     AndOpc = X86::AND8ri;
885   else if (DstTy == LLT::scalar(16))
886     AndOpc = X86::AND16ri;
887   else if (DstTy == LLT::scalar(32))
888     AndOpc = X86::AND32ri;
889   else if (DstTy == LLT::scalar(64))
890     AndOpc = X86::AND64ri32;
891   else
892     return false;
893 
894   Register DefReg = SrcReg;
895   if (DstTy != LLT::scalar(8)) {
896     Register ImpDefReg =
897         MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
898     BuildMI(*I.getParent(), I, I.getDebugLoc(),
899             TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg);
900 
901     DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI));
902     BuildMI(*I.getParent(), I, I.getDebugLoc(),
903             TII.get(TargetOpcode::INSERT_SUBREG), DefReg)
904         .addReg(ImpDefReg)
905         .addReg(SrcReg)
906         .addImm(X86::sub_8bit);
907   }
908 
909   MachineInstr &AndInst =
910       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg)
911            .addReg(DefReg)
912            .addImm(1);
913 
914   constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI);
915 
916   I.eraseFromParent();
917   return true;
918 }
919 
920 bool X86InstructionSelector::selectAnyext(MachineInstr &I,
921                                           MachineRegisterInfo &MRI,
922                                           MachineFunction &MF) const {
923   assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction");
924 
925   const Register DstReg = I.getOperand(0).getReg();
926   const Register SrcReg = I.getOperand(1).getReg();
927 
928   const LLT DstTy = MRI.getType(DstReg);
929   const LLT SrcTy = MRI.getType(SrcReg);
930 
931   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
932   const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI);
933 
934   assert(DstRB.getID() == SrcRB.getID() &&
935          "G_ANYEXT input/output on different banks\n");
936 
937   assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() &&
938          "G_ANYEXT incorrect operand size");
939 
940   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
941   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB);
942 
943   // If that's ANY_EXT of the value that lives on the floating class and goes
944   // into the vector class, just replace it with copy, as we are able to select
945   // it as a regular move.
946   if (canTurnIntoCOPY(SrcRC, DstRC))
947     return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC);
948 
949   if (DstRB.getID() != X86::GPRRegBankID)
950     return false;
951 
952   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
953       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
954     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
955                       << " operand\n");
956     return false;
957   }
958 
959   if (SrcRC == DstRC) {
960     I.setDesc(TII.get(X86::COPY));
961     return true;
962   }
963 
964   BuildMI(*I.getParent(), I, I.getDebugLoc(),
965           TII.get(TargetOpcode::SUBREG_TO_REG))
966       .addDef(DstReg)
967       .addImm(0)
968       .addReg(SrcReg)
969       .addImm(getSubRegIndex(SrcRC));
970 
971   I.eraseFromParent();
972   return true;
973 }
974 
975 bool X86InstructionSelector::selectCmp(MachineInstr &I,
976                                        MachineRegisterInfo &MRI,
977                                        MachineFunction &MF) const {
978   assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction");
979 
980   X86::CondCode CC;
981   bool SwapArgs;
982   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(
983       (CmpInst::Predicate)I.getOperand(1).getPredicate());
984 
985   Register LHS = I.getOperand(2).getReg();
986   Register RHS = I.getOperand(3).getReg();
987 
988   if (SwapArgs)
989     std::swap(LHS, RHS);
990 
991   unsigned OpCmp;
992   LLT Ty = MRI.getType(LHS);
993 
994   switch (Ty.getSizeInBits()) {
995   default:
996     return false;
997   case 8:
998     OpCmp = X86::CMP8rr;
999     break;
1000   case 16:
1001     OpCmp = X86::CMP16rr;
1002     break;
1003   case 32:
1004     OpCmp = X86::CMP32rr;
1005     break;
1006   case 64:
1007     OpCmp = X86::CMP64rr;
1008     break;
1009   }
1010 
1011   MachineInstr &CmpInst =
1012       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1013            .addReg(LHS)
1014            .addReg(RHS);
1015 
1016   MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1017                                    TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC);
1018 
1019   constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1020   constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI);
1021 
1022   I.eraseFromParent();
1023   return true;
1024 }
1025 
1026 bool X86InstructionSelector::selectFCmp(MachineInstr &I,
1027                                         MachineRegisterInfo &MRI,
1028                                         MachineFunction &MF) const {
1029   assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction");
1030 
1031   Register LhsReg = I.getOperand(2).getReg();
1032   Register RhsReg = I.getOperand(3).getReg();
1033   CmpInst::Predicate Predicate =
1034       (CmpInst::Predicate)I.getOperand(1).getPredicate();
1035 
1036   // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction.
1037   static const uint16_t SETFOpcTable[2][3] = {
1038       {X86::COND_E, X86::COND_NP, X86::AND8rr},
1039       {X86::COND_NE, X86::COND_P, X86::OR8rr}};
1040   const uint16_t *SETFOpc = nullptr;
1041   switch (Predicate) {
1042   default:
1043     break;
1044   case CmpInst::FCMP_OEQ:
1045     SETFOpc = &SETFOpcTable[0][0];
1046     break;
1047   case CmpInst::FCMP_UNE:
1048     SETFOpc = &SETFOpcTable[1][0];
1049     break;
1050   }
1051 
1052   // Compute the opcode for the CMP instruction.
1053   unsigned OpCmp;
1054   LLT Ty = MRI.getType(LhsReg);
1055   switch (Ty.getSizeInBits()) {
1056   default:
1057     return false;
1058   case 32:
1059     OpCmp = X86::UCOMISSrr;
1060     break;
1061   case 64:
1062     OpCmp = X86::UCOMISDrr;
1063     break;
1064   }
1065 
1066   Register ResultReg = I.getOperand(0).getReg();
1067   RBI.constrainGenericRegister(
1068       ResultReg,
1069       *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI);
1070   if (SETFOpc) {
1071     MachineInstr &CmpInst =
1072         *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1073              .addReg(LhsReg)
1074              .addReg(RhsReg);
1075 
1076     Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass);
1077     Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass);
1078     MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1079                                   TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]);
1080     MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1081                                   TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]);
1082     MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1083                                   TII.get(SETFOpc[2]), ResultReg)
1084                               .addReg(FlagReg1)
1085                               .addReg(FlagReg2);
1086     constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1087     constrainSelectedInstRegOperands(Set1, TII, TRI, RBI);
1088     constrainSelectedInstRegOperands(Set2, TII, TRI, RBI);
1089     constrainSelectedInstRegOperands(Set3, TII, TRI, RBI);
1090 
1091     I.eraseFromParent();
1092     return true;
1093   }
1094 
1095   X86::CondCode CC;
1096   bool SwapArgs;
1097   std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate);
1098   assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code.");
1099 
1100   if (SwapArgs)
1101     std::swap(LhsReg, RhsReg);
1102 
1103   // Emit a compare of LHS/RHS.
1104   MachineInstr &CmpInst =
1105       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp))
1106            .addReg(LhsReg)
1107            .addReg(RhsReg);
1108 
1109   MachineInstr &Set =
1110       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC);
1111   constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI);
1112   constrainSelectedInstRegOperands(Set, TII, TRI, RBI);
1113   I.eraseFromParent();
1114   return true;
1115 }
1116 
1117 bool X86InstructionSelector::selectUAddSub(MachineInstr &I,
1118                                            MachineRegisterInfo &MRI,
1119                                            MachineFunction &MF) const {
1120   assert((I.getOpcode() == TargetOpcode::G_UADDE ||
1121           I.getOpcode() == TargetOpcode::G_UADDO ||
1122           I.getOpcode() == TargetOpcode::G_USUBE ||
1123           I.getOpcode() == TargetOpcode::G_USUBO) &&
1124          "unexpected instruction");
1125 
1126   const Register DstReg = I.getOperand(0).getReg();
1127   const Register CarryOutReg = I.getOperand(1).getReg();
1128   const Register Op0Reg = I.getOperand(2).getReg();
1129   const Register Op1Reg = I.getOperand(3).getReg();
1130   bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE ||
1131                I.getOpcode() == TargetOpcode::G_USUBO;
1132   bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE ||
1133                     I.getOpcode() == TargetOpcode::G_USUBE;
1134 
1135   const LLT DstTy = MRI.getType(DstReg);
1136   assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types");
1137 
1138   // TODO: Handle immediate argument variants?
1139   unsigned OpADC, OpADD, OpSBB, OpSUB;
1140   switch (DstTy.getSizeInBits()) {
1141   case 8:
1142     OpADC = X86::ADC8rr;
1143     OpADD = X86::ADD8rr;
1144     OpSBB = X86::SBB8rr;
1145     OpSUB = X86::SUB8rr;
1146     break;
1147   case 16:
1148     OpADC = X86::ADC16rr;
1149     OpADD = X86::ADD16rr;
1150     OpSBB = X86::SBB16rr;
1151     OpSUB = X86::SUB16rr;
1152     break;
1153   case 32:
1154     OpADC = X86::ADC32rr;
1155     OpADD = X86::ADD32rr;
1156     OpSBB = X86::SBB32rr;
1157     OpSUB = X86::SUB32rr;
1158     break;
1159   case 64:
1160     OpADC = X86::ADC64rr;
1161     OpADD = X86::ADD64rr;
1162     OpSBB = X86::SBB64rr;
1163     OpSUB = X86::SUB64rr;
1164     break;
1165   default:
1166     llvm_unreachable("selectUAddSub unsupported type.");
1167   }
1168 
1169   const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI);
1170   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB);
1171 
1172   unsigned Opcode = IsSub ? OpSUB : OpADD;
1173 
1174   // G_UADDE/G_USUBE - find CarryIn def instruction.
1175   if (HasCarryIn) {
1176     Register CarryInReg = I.getOperand(4).getReg();
1177     MachineInstr *Def = MRI.getVRegDef(CarryInReg);
1178     while (Def->getOpcode() == TargetOpcode::G_TRUNC) {
1179       CarryInReg = Def->getOperand(1).getReg();
1180       Def = MRI.getVRegDef(CarryInReg);
1181     }
1182 
1183     // TODO - handle more CF generating instructions
1184     if (Def->getOpcode() == TargetOpcode::G_UADDE ||
1185         Def->getOpcode() == TargetOpcode::G_UADDO ||
1186         Def->getOpcode() == TargetOpcode::G_USUBE ||
1187         Def->getOpcode() == TargetOpcode::G_USUBO) {
1188       // carry set by prev ADD/SUB.
1189       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY),
1190               X86::EFLAGS)
1191           .addReg(CarryInReg);
1192 
1193       if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI))
1194         return false;
1195 
1196       Opcode = IsSub ? OpSBB : OpADC;
1197     } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) {
1198       // carry is constant, support only 0.
1199       if (*val != 0)
1200         return false;
1201 
1202       Opcode = IsSub ? OpSUB : OpADD;
1203     } else
1204       return false;
1205   }
1206 
1207   MachineInstr &Inst =
1208       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg)
1209            .addReg(Op0Reg)
1210            .addReg(Op1Reg);
1211 
1212   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg)
1213       .addReg(X86::EFLAGS);
1214 
1215   if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) ||
1216       !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI))
1217     return false;
1218 
1219   I.eraseFromParent();
1220   return true;
1221 }
1222 
1223 bool X86InstructionSelector::selectExtract(MachineInstr &I,
1224                                            MachineRegisterInfo &MRI,
1225                                            MachineFunction &MF) const {
1226   assert((I.getOpcode() == TargetOpcode::G_EXTRACT) &&
1227          "unexpected instruction");
1228 
1229   const Register DstReg = I.getOperand(0).getReg();
1230   const Register SrcReg = I.getOperand(1).getReg();
1231   int64_t Index = I.getOperand(2).getImm();
1232 
1233   const LLT DstTy = MRI.getType(DstReg);
1234   const LLT SrcTy = MRI.getType(SrcReg);
1235 
1236   // Meanwile handle vector type only.
1237   if (!DstTy.isVector())
1238     return false;
1239 
1240   if (Index % DstTy.getSizeInBits() != 0)
1241     return false; // Not extract subvector.
1242 
1243   if (Index == 0) {
1244     // Replace by extract subreg copy.
1245     if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF))
1246       return false;
1247 
1248     I.eraseFromParent();
1249     return true;
1250   }
1251 
1252   bool HasAVX = STI.hasAVX();
1253   bool HasAVX512 = STI.hasAVX512();
1254   bool HasVLX = STI.hasVLX();
1255 
1256   if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) {
1257     if (HasVLX)
1258       I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr));
1259     else if (HasAVX)
1260       I.setDesc(TII.get(X86::VEXTRACTF128rr));
1261     else
1262       return false;
1263   } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) {
1264     if (DstTy.getSizeInBits() == 128)
1265       I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr));
1266     else if (DstTy.getSizeInBits() == 256)
1267       I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr));
1268     else
1269       return false;
1270   } else
1271     return false;
1272 
1273   // Convert to X86 VEXTRACT immediate.
1274   Index = Index / DstTy.getSizeInBits();
1275   I.getOperand(2).setImm(Index);
1276 
1277   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1278 }
1279 
1280 bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg,
1281                                                MachineInstr &I,
1282                                                MachineRegisterInfo &MRI,
1283                                                MachineFunction &MF) const {
1284   const LLT DstTy = MRI.getType(DstReg);
1285   const LLT SrcTy = MRI.getType(SrcReg);
1286   unsigned SubIdx = X86::NoSubRegister;
1287 
1288   if (!DstTy.isVector() || !SrcTy.isVector())
1289     return false;
1290 
1291   assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() &&
1292          "Incorrect Src/Dst register size");
1293 
1294   if (DstTy.getSizeInBits() == 128)
1295     SubIdx = X86::sub_xmm;
1296   else if (DstTy.getSizeInBits() == 256)
1297     SubIdx = X86::sub_ymm;
1298   else
1299     return false;
1300 
1301   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1302   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1303 
1304   SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx);
1305 
1306   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1307       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1308     LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n");
1309     return false;
1310   }
1311 
1312   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg)
1313       .addReg(SrcReg, 0, SubIdx);
1314 
1315   return true;
1316 }
1317 
1318 bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg,
1319                                               MachineInstr &I,
1320                                               MachineRegisterInfo &MRI,
1321                                               MachineFunction &MF) const {
1322   const LLT DstTy = MRI.getType(DstReg);
1323   const LLT SrcTy = MRI.getType(SrcReg);
1324   unsigned SubIdx = X86::NoSubRegister;
1325 
1326   // TODO: support scalar types
1327   if (!DstTy.isVector() || !SrcTy.isVector())
1328     return false;
1329 
1330   assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() &&
1331          "Incorrect Src/Dst register size");
1332 
1333   if (SrcTy.getSizeInBits() == 128)
1334     SubIdx = X86::sub_xmm;
1335   else if (SrcTy.getSizeInBits() == 256)
1336     SubIdx = X86::sub_ymm;
1337   else
1338     return false;
1339 
1340   const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI);
1341   const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI);
1342 
1343   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) ||
1344       !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1345     LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n");
1346     return false;
1347   }
1348 
1349   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY))
1350       .addReg(DstReg, RegState::DefineNoRead, SubIdx)
1351       .addReg(SrcReg);
1352 
1353   return true;
1354 }
1355 
1356 bool X86InstructionSelector::selectInsert(MachineInstr &I,
1357                                           MachineRegisterInfo &MRI,
1358                                           MachineFunction &MF) const {
1359   assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction");
1360 
1361   const Register DstReg = I.getOperand(0).getReg();
1362   const Register SrcReg = I.getOperand(1).getReg();
1363   const Register InsertReg = I.getOperand(2).getReg();
1364   int64_t Index = I.getOperand(3).getImm();
1365 
1366   const LLT DstTy = MRI.getType(DstReg);
1367   const LLT InsertRegTy = MRI.getType(InsertReg);
1368 
1369   // Meanwile handle vector type only.
1370   if (!DstTy.isVector())
1371     return false;
1372 
1373   if (Index % InsertRegTy.getSizeInBits() != 0)
1374     return false; // Not insert subvector.
1375 
1376   if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) {
1377     // Replace by subreg copy.
1378     if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF))
1379       return false;
1380 
1381     I.eraseFromParent();
1382     return true;
1383   }
1384 
1385   bool HasAVX = STI.hasAVX();
1386   bool HasAVX512 = STI.hasAVX512();
1387   bool HasVLX = STI.hasVLX();
1388 
1389   if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) {
1390     if (HasVLX)
1391       I.setDesc(TII.get(X86::VINSERTF32x4Z256rr));
1392     else if (HasAVX)
1393       I.setDesc(TII.get(X86::VINSERTF128rr));
1394     else
1395       return false;
1396   } else if (DstTy.getSizeInBits() == 512 && HasAVX512) {
1397     if (InsertRegTy.getSizeInBits() == 128)
1398       I.setDesc(TII.get(X86::VINSERTF32x4Zrr));
1399     else if (InsertRegTy.getSizeInBits() == 256)
1400       I.setDesc(TII.get(X86::VINSERTF64x4Zrr));
1401     else
1402       return false;
1403   } else
1404     return false;
1405 
1406   // Convert to X86 VINSERT immediate.
1407   Index = Index / InsertRegTy.getSizeInBits();
1408 
1409   I.getOperand(3).setImm(Index);
1410 
1411   return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
1412 }
1413 
1414 bool X86InstructionSelector::selectUnmergeValues(
1415     MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1416   assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) &&
1417          "unexpected instruction");
1418 
1419   // Split to extracts.
1420   unsigned NumDefs = I.getNumOperands() - 1;
1421   Register SrcReg = I.getOperand(NumDefs).getReg();
1422   unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits();
1423 
1424   for (unsigned Idx = 0; Idx < NumDefs; ++Idx) {
1425     MachineInstr &ExtrInst =
1426         *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1427                  TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg())
1428              .addReg(SrcReg)
1429              .addImm(Idx * DefSize);
1430 
1431     if (!select(ExtrInst))
1432       return false;
1433   }
1434 
1435   I.eraseFromParent();
1436   return true;
1437 }
1438 
1439 bool X86InstructionSelector::selectMergeValues(
1440     MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) {
1441   assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES ||
1442           I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) &&
1443          "unexpected instruction");
1444 
1445   // Split to inserts.
1446   Register DstReg = I.getOperand(0).getReg();
1447   Register SrcReg0 = I.getOperand(1).getReg();
1448 
1449   const LLT DstTy = MRI.getType(DstReg);
1450   const LLT SrcTy = MRI.getType(SrcReg0);
1451   unsigned SrcSize = SrcTy.getSizeInBits();
1452 
1453   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1454 
1455   // For the first src use insertSubReg.
1456   Register DefReg = MRI.createGenericVirtualRegister(DstTy);
1457   MRI.setRegBank(DefReg, RegBank);
1458   if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF))
1459     return false;
1460 
1461   for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) {
1462     Register Tmp = MRI.createGenericVirtualRegister(DstTy);
1463     MRI.setRegBank(Tmp, RegBank);
1464 
1465     MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1466                                         TII.get(TargetOpcode::G_INSERT), Tmp)
1467                                     .addReg(DefReg)
1468                                     .addReg(I.getOperand(Idx).getReg())
1469                                     .addImm((Idx - 1) * SrcSize);
1470 
1471     DefReg = Tmp;
1472 
1473     if (!select(InsertInst))
1474       return false;
1475   }
1476 
1477   MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(),
1478                                     TII.get(TargetOpcode::COPY), DstReg)
1479                                 .addReg(DefReg);
1480 
1481   if (!select(CopyInst))
1482     return false;
1483 
1484   I.eraseFromParent();
1485   return true;
1486 }
1487 
1488 bool X86InstructionSelector::selectCondBranch(MachineInstr &I,
1489                                               MachineRegisterInfo &MRI,
1490                                               MachineFunction &MF) const {
1491   assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction");
1492 
1493   const Register CondReg = I.getOperand(0).getReg();
1494   MachineBasicBlock *DestMBB = I.getOperand(1).getMBB();
1495 
1496   MachineInstr &TestInst =
1497       *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri))
1498            .addReg(CondReg)
1499            .addImm(1);
1500   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1))
1501       .addMBB(DestMBB).addImm(X86::COND_NE);
1502 
1503   constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI);
1504 
1505   I.eraseFromParent();
1506   return true;
1507 }
1508 
1509 bool X86InstructionSelector::materializeFP(MachineInstr &I,
1510                                            MachineRegisterInfo &MRI,
1511                                            MachineFunction &MF) const {
1512   assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) &&
1513          "unexpected instruction");
1514 
1515   // Can't handle alternate code models yet.
1516   CodeModel::Model CM = TM.getCodeModel();
1517   if (CM != CodeModel::Small && CM != CodeModel::Large)
1518     return false;
1519 
1520   const Register DstReg = I.getOperand(0).getReg();
1521   const LLT DstTy = MRI.getType(DstReg);
1522   const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
1523   // Create the load from the constant pool.
1524   const ConstantFP *CFP = I.getOperand(1).getFPImm();
1525   const auto &DL = MF.getDataLayout();
1526   Align Alignment = DL.getPrefTypeAlign(CFP->getType());
1527   const DebugLoc &DbgLoc = I.getDebugLoc();
1528 
1529   unsigned Opc =
1530       getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment);
1531 
1532   unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment);
1533   MachineInstr *LoadInst = nullptr;
1534   unsigned char OpFlag = STI.classifyLocalReference(nullptr);
1535 
1536   if (CM == CodeModel::Large && STI.is64Bit()) {
1537     // Under X86-64 non-small code model, GV (and friends) are 64-bits, so
1538     // they cannot be folded into immediate fields.
1539 
1540     Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass);
1541     BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg)
1542         .addConstantPoolIndex(CPI, 0, OpFlag);
1543 
1544     MachineMemOperand *MMO = MF.getMachineMemOperand(
1545         MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad,
1546         LLT::pointer(0, DL.getPointerSizeInBits()), Alignment);
1547 
1548     LoadInst =
1549         addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg),
1550                      AddrReg)
1551             .addMemOperand(MMO);
1552 
1553   } else if (CM == CodeModel::Small || !STI.is64Bit()) {
1554     // Handle the case when globals fit in our immediate field.
1555     // This is true for X86-32 always and X86-64 when in -mcmodel=small mode.
1556 
1557     // x86-32 PIC requires a PIC base register for constant pools.
1558     unsigned PICBase = 0;
1559     if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) {
1560       // PICBase can be allocated by TII.getGlobalBaseReg(&MF).
1561       // In DAGISEL the code that initialize it generated by the CGBR pass.
1562       return false; // TODO support the mode.
1563     } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small)
1564       PICBase = X86::RIP;
1565 
1566     LoadInst = addConstantPoolReference(
1567         BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase,
1568         OpFlag);
1569   } else
1570     return false;
1571 
1572   constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI);
1573   I.eraseFromParent();
1574   return true;
1575 }
1576 
1577 bool X86InstructionSelector::selectImplicitDefOrPHI(
1578     MachineInstr &I, MachineRegisterInfo &MRI) const {
1579   assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF ||
1580           I.getOpcode() == TargetOpcode::G_PHI) &&
1581          "unexpected instruction");
1582 
1583   Register DstReg = I.getOperand(0).getReg();
1584 
1585   if (!MRI.getRegClassOrNull(DstReg)) {
1586     const LLT DstTy = MRI.getType(DstReg);
1587     const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI);
1588 
1589     if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) {
1590       LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1591                         << " operand\n");
1592       return false;
1593     }
1594   }
1595 
1596   if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF)
1597     I.setDesc(TII.get(X86::IMPLICIT_DEF));
1598   else
1599     I.setDesc(TII.get(X86::PHI));
1600 
1601   return true;
1602 }
1603 
1604 bool X86InstructionSelector::selectMulDivRem(MachineInstr &I,
1605                                              MachineRegisterInfo &MRI,
1606                                              MachineFunction &MF) const {
1607   // The implementation of this function is adapted from X86FastISel.
1608   assert((I.getOpcode() == TargetOpcode::G_MUL ||
1609           I.getOpcode() == TargetOpcode::G_SMULH ||
1610           I.getOpcode() == TargetOpcode::G_UMULH ||
1611           I.getOpcode() == TargetOpcode::G_SDIV ||
1612           I.getOpcode() == TargetOpcode::G_SREM ||
1613           I.getOpcode() == TargetOpcode::G_UDIV ||
1614           I.getOpcode() == TargetOpcode::G_UREM) &&
1615          "unexpected instruction");
1616 
1617   const Register DstReg = I.getOperand(0).getReg();
1618   const Register Op1Reg = I.getOperand(1).getReg();
1619   const Register Op2Reg = I.getOperand(2).getReg();
1620 
1621   const LLT RegTy = MRI.getType(DstReg);
1622   assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) &&
1623          "Arguments and return value types must match");
1624 
1625   const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI);
1626   if (!RegRB || RegRB->getID() != X86::GPRRegBankID)
1627     return false;
1628 
1629   const static unsigned NumTypes = 4; // i8, i16, i32, i64
1630   const static unsigned NumOps = 7;   // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh
1631   const static bool S = true;         // IsSigned
1632   const static bool U = false;        // !IsSigned
1633   const static unsigned Copy = TargetOpcode::COPY;
1634 
1635   // For the X86 IDIV instruction, in most cases the dividend
1636   // (numerator) must be in a specific register pair highreg:lowreg,
1637   // producing the quotient in lowreg and the remainder in highreg.
1638   // For most data types, to set up the instruction, the dividend is
1639   // copied into lowreg, and lowreg is sign-extended into highreg.  The
1640   // exception is i8, where the dividend is defined as a single register rather
1641   // than a register pair, and we therefore directly sign-extend the dividend
1642   // into lowreg, instead of copying, and ignore the highreg.
1643   const static struct MulDivRemEntry {
1644     // The following portion depends only on the data type.
1645     unsigned SizeInBits;
1646     unsigned LowInReg;  // low part of the register pair
1647     unsigned HighInReg; // high part of the register pair
1648     // The following portion depends on both the data type and the operation.
1649     struct MulDivRemResult {
1650       unsigned OpMulDivRem;     // The specific MUL/DIV opcode to use.
1651       unsigned OpSignExtend;    // Opcode for sign-extending lowreg into
1652                                 // highreg, or copying a zero into highreg.
1653       unsigned OpCopy;          // Opcode for copying dividend into lowreg, or
1654                                 // zero/sign-extending into lowreg for i8.
1655       unsigned ResultReg;       // Register containing the desired result.
1656       bool IsOpSigned;          // Whether to use signed or unsigned form.
1657     } ResultTable[NumOps];
1658   } OpTable[NumTypes] = {
1659       {8,
1660        X86::AX,
1661        0,
1662        {
1663            {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv
1664            {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem
1665            {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U},  // UDiv
1666            {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U},  // URem
1667            {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul
1668            {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH
1669            {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U},  // UMulH
1670        }},                                                // i8
1671       {16,
1672        X86::AX,
1673        X86::DX,
1674        {
1675            {X86::IDIV16r, X86::CWD, Copy, X86::AX, S},     // SDiv
1676            {X86::IDIV16r, X86::CWD, Copy, X86::DX, S},     // SRem
1677            {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U},  // UDiv
1678            {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U},  // URem
1679            {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul
1680            {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH
1681            {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U},  // UMulH
1682        }},                                                 // i16
1683       {32,
1684        X86::EAX,
1685        X86::EDX,
1686        {
1687            {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S},     // SDiv
1688            {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S},     // SRem
1689            {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U},  // UDiv
1690            {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U},  // URem
1691            {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul
1692            {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH
1693            {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U},  // UMulH
1694        }},                                                  // i32
1695       {64,
1696        X86::RAX,
1697        X86::RDX,
1698        {
1699            {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S},    // SDiv
1700            {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S},    // SRem
1701            {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv
1702            {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem
1703            {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul
1704            {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH
1705            {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U},  // UMulH
1706        }},                                                  // i64
1707   };
1708 
1709   auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) {
1710     return El.SizeInBits == RegTy.getSizeInBits();
1711   });
1712   if (OpEntryIt == std::end(OpTable))
1713     return false;
1714 
1715   unsigned OpIndex;
1716   switch (I.getOpcode()) {
1717   default:
1718     llvm_unreachable("Unexpected mul/div/rem opcode");
1719   case TargetOpcode::G_SDIV:
1720     OpIndex = 0;
1721     break;
1722   case TargetOpcode::G_SREM:
1723     OpIndex = 1;
1724     break;
1725   case TargetOpcode::G_UDIV:
1726     OpIndex = 2;
1727     break;
1728   case TargetOpcode::G_UREM:
1729     OpIndex = 3;
1730     break;
1731   case TargetOpcode::G_MUL:
1732     OpIndex = 4;
1733     break;
1734   case TargetOpcode::G_SMULH:
1735     OpIndex = 5;
1736     break;
1737   case TargetOpcode::G_UMULH:
1738     OpIndex = 6;
1739     break;
1740   }
1741 
1742   const MulDivRemEntry &TypeEntry = *OpEntryIt;
1743   const MulDivRemEntry::MulDivRemResult &OpEntry =
1744       TypeEntry.ResultTable[OpIndex];
1745 
1746   const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB);
1747   if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) ||
1748       !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) ||
1749       !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) {
1750     LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode())
1751                       << " operand\n");
1752     return false;
1753   }
1754 
1755   // Move op1 into low-order input register.
1756   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy),
1757           TypeEntry.LowInReg)
1758       .addReg(Op1Reg);
1759 
1760   // Zero-extend or sign-extend into high-order input register.
1761   if (OpEntry.OpSignExtend) {
1762     if (OpEntry.IsOpSigned)
1763       BuildMI(*I.getParent(), I, I.getDebugLoc(),
1764               TII.get(OpEntry.OpSignExtend));
1765     else {
1766       Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass);
1767       BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0),
1768               Zero32);
1769 
1770       // Copy the zero into the appropriate sub/super/identical physical
1771       // register. Unfortunately the operations needed are not uniform enough
1772       // to fit neatly into the table above.
1773       if (RegTy.getSizeInBits() == 16) {
1774         BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1775                 TypeEntry.HighInReg)
1776             .addReg(Zero32, 0, X86::sub_16bit);
1777       } else if (RegTy.getSizeInBits() == 32) {
1778         BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy),
1779                 TypeEntry.HighInReg)
1780             .addReg(Zero32);
1781       } else if (RegTy.getSizeInBits() == 64) {
1782         BuildMI(*I.getParent(), I, I.getDebugLoc(),
1783                 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg)
1784             .addImm(0)
1785             .addReg(Zero32)
1786             .addImm(X86::sub_32bit);
1787       }
1788     }
1789   }
1790 
1791   // Generate the DIV/IDIV/MUL/IMUL instruction.
1792   BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem))
1793       .addReg(Op2Reg);
1794 
1795   // For i8 remainder, we can't reference ah directly, as we'll end
1796   // up with bogus copies like %r9b = COPY %ah. Reference ax
1797   // instead to prevent ah references in a rex instruction.
1798   //
1799   // The current assumption of the fast register allocator is that isel
1800   // won't generate explicit references to the GR8_NOREX registers. If
1801   // the allocator and/or the backend get enhanced to be more robust in
1802   // that regard, this can be, and should be, removed.
1803   if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) {
1804     Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1805     Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass);
1806     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg)
1807         .addReg(X86::AX);
1808 
1809     // Shift AX right by 8 bits instead of using AH.
1810     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri),
1811             ResultSuperReg)
1812         .addReg(SourceSuperReg)
1813         .addImm(8);
1814 
1815     // Now reference the 8-bit subreg of the result.
1816     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1817             DstReg)
1818         .addReg(ResultSuperReg, 0, X86::sub_8bit);
1819   } else {
1820     BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY),
1821             DstReg)
1822         .addReg(OpEntry.ResultReg);
1823   }
1824   I.eraseFromParent();
1825 
1826   return true;
1827 }
1828 
1829 bool X86InstructionSelector::selectSelect(MachineInstr &I,
1830                                           MachineRegisterInfo &MRI,
1831                                           MachineFunction &MF) const {
1832   GSelect &Sel = cast<GSelect>(I);
1833   unsigned DstReg = Sel.getReg(0);
1834   BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::TEST32rr))
1835       .addReg(Sel.getCondReg())
1836       .addReg(Sel.getCondReg());
1837 
1838   unsigned OpCmp;
1839   LLT Ty = MRI.getType(DstReg);
1840   switch (Ty.getSizeInBits()) {
1841   default:
1842     return false;
1843   case 8:
1844     OpCmp = X86::CMOV_GR8;
1845     break;
1846   case 16:
1847     OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16;
1848     break;
1849   case 32:
1850     OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32;
1851     break;
1852   case 64:
1853     assert(STI.is64Bit() && STI.canUseCMOV());
1854     OpCmp = X86::CMOV64rr;
1855     break;
1856   }
1857   BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg)
1858       .addReg(Sel.getTrueReg())
1859       .addReg(Sel.getFalseReg())
1860       .addImm(X86::COND_E);
1861 
1862   const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI);
1863   if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) {
1864     LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n");
1865     return false;
1866   }
1867 
1868   Sel.eraseFromParent();
1869   return true;
1870 }
1871 
1872 InstructionSelector *
1873 llvm::createX86InstructionSelector(const X86TargetMachine &TM,
1874                                    const X86Subtarget &Subtarget,
1875                                    const X86RegisterBankInfo &RBI) {
1876   return new X86InstructionSelector(TM, Subtarget, RBI);
1877 }
1878