1 //===- X86InstructionSelector.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// X86. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86BaseInfo.h" 15 #include "X86.h" 16 #include "X86InstrBuilder.h" 17 #include "X86InstrInfo.h" 18 #include "X86RegisterBankInfo.h" 19 #include "X86RegisterInfo.h" 20 #include "X86Subtarget.h" 21 #include "X86TargetMachine.h" 22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 23 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineConstantPool.h" 28 #include "llvm/CodeGen/MachineFunction.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineMemOperand.h" 32 #include "llvm/CodeGen/MachineOperand.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/RegisterBank.h" 35 #include "llvm/CodeGen/TargetOpcodes.h" 36 #include "llvm/CodeGen/TargetRegisterInfo.h" 37 #include "llvm/CodeGenTypes/LowLevelType.h" 38 #include "llvm/IR/DataLayout.h" 39 #include "llvm/IR/InstrTypes.h" 40 #include "llvm/IR/IntrinsicsX86.h" 41 #include "llvm/Support/AtomicOrdering.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/MathExtras.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <tuple> 50 51 #define DEBUG_TYPE "X86-isel" 52 53 using namespace llvm; 54 55 namespace { 56 57 #define GET_GLOBALISEL_PREDICATE_BITSET 58 #include "X86GenGlobalISel.inc" 59 #undef GET_GLOBALISEL_PREDICATE_BITSET 60 61 class X86InstructionSelector : public InstructionSelector { 62 public: 63 X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, 64 const X86RegisterBankInfo &RBI); 65 66 bool select(MachineInstr &I) override; 67 static const char *getName() { return DEBUG_TYPE; } 68 69 private: 70 /// tblgen-erated 'select' implementation, used as the initial selector for 71 /// the patterns that don't require complex C++. 72 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 73 74 // TODO: remove after supported by Tablegen-erated instruction selection. 75 unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc, 76 Align Alignment) const; 77 78 bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, 79 MachineFunction &MF) const; 80 bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, 81 MachineFunction &MF) const; 82 bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, 83 MachineFunction &MF) const; 84 bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, 85 MachineFunction &MF) const; 86 bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI, 87 MachineFunction &MF) const; 88 bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI, 89 MachineFunction &MF) const; 90 bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI, 91 MachineFunction &MF) const; 92 bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, 93 MachineFunction &MF) const; 94 bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, 95 MachineFunction &MF) const; 96 bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI, 97 MachineFunction &MF) const; 98 bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const; 99 bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; 100 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 101 MachineFunction &MF); 102 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 103 MachineFunction &MF); 104 bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, 105 MachineFunction &MF) const; 106 bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, 107 MachineFunction &MF) const; 108 bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, 109 MachineFunction &MF) const; 110 bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI, 111 const unsigned DstReg, 112 const TargetRegisterClass *DstRC, 113 const unsigned SrcReg, 114 const TargetRegisterClass *SrcRC) const; 115 bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, 116 MachineFunction &MF) const; 117 bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; 118 bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI, 119 MachineFunction &MF) const; 120 bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI, 121 MachineFunction &MF) const; 122 123 // emit insert subreg instruction and insert it before MachineInstr &I 124 bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 125 MachineRegisterInfo &MRI, MachineFunction &MF) const; 126 // emit extract subreg instruction and insert it before MachineInstr &I 127 bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 128 MachineRegisterInfo &MRI, MachineFunction &MF) const; 129 130 const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; 131 const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, 132 MachineRegisterInfo &MRI) const; 133 134 const X86TargetMachine &TM; 135 const X86Subtarget &STI; 136 const X86InstrInfo &TII; 137 const X86RegisterInfo &TRI; 138 const X86RegisterBankInfo &RBI; 139 140 #define GET_GLOBALISEL_PREDICATES_DECL 141 #include "X86GenGlobalISel.inc" 142 #undef GET_GLOBALISEL_PREDICATES_DECL 143 144 #define GET_GLOBALISEL_TEMPORARIES_DECL 145 #include "X86GenGlobalISel.inc" 146 #undef GET_GLOBALISEL_TEMPORARIES_DECL 147 }; 148 149 } // end anonymous namespace 150 151 #define GET_GLOBALISEL_IMPL 152 #include "X86GenGlobalISel.inc" 153 #undef GET_GLOBALISEL_IMPL 154 155 X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, 156 const X86Subtarget &STI, 157 const X86RegisterBankInfo &RBI) 158 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), 159 RBI(RBI), 160 #define GET_GLOBALISEL_PREDICATES_INIT 161 #include "X86GenGlobalISel.inc" 162 #undef GET_GLOBALISEL_PREDICATES_INIT 163 #define GET_GLOBALISEL_TEMPORARIES_INIT 164 #include "X86GenGlobalISel.inc" 165 #undef GET_GLOBALISEL_TEMPORARIES_INIT 166 { 167 } 168 169 // FIXME: This should be target-independent, inferred from the types declared 170 // for each class in the bank. 171 const TargetRegisterClass * 172 X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { 173 if (RB.getID() == X86::GPRRegBankID) { 174 if (Ty.getSizeInBits() <= 8) 175 return &X86::GR8RegClass; 176 if (Ty.getSizeInBits() == 16) 177 return &X86::GR16RegClass; 178 if (Ty.getSizeInBits() == 32) 179 return &X86::GR32RegClass; 180 if (Ty.getSizeInBits() == 64) 181 return &X86::GR64RegClass; 182 } 183 if (RB.getID() == X86::VECRRegBankID) { 184 if (Ty.getSizeInBits() == 16) 185 return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; 186 if (Ty.getSizeInBits() == 32) 187 return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 188 if (Ty.getSizeInBits() == 64) 189 return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 190 if (Ty.getSizeInBits() == 128) 191 return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; 192 if (Ty.getSizeInBits() == 256) 193 return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; 194 if (Ty.getSizeInBits() == 512) 195 return &X86::VR512RegClass; 196 } 197 198 if (RB.getID() == X86::PSRRegBankID) { 199 if (Ty.getSizeInBits() == 80) 200 return &X86::RFP80RegClass; 201 if (Ty.getSizeInBits() == 64) 202 return &X86::RFP64RegClass; 203 if (Ty.getSizeInBits() == 32) 204 return &X86::RFP32RegClass; 205 } 206 207 llvm_unreachable("Unknown RegBank!"); 208 } 209 210 const TargetRegisterClass * 211 X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, 212 MachineRegisterInfo &MRI) const { 213 const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); 214 return getRegClass(Ty, RegBank); 215 } 216 217 static unsigned getSubRegIndex(const TargetRegisterClass *RC) { 218 unsigned SubIdx = X86::NoSubRegister; 219 if (RC == &X86::GR32RegClass) { 220 SubIdx = X86::sub_32bit; 221 } else if (RC == &X86::GR16RegClass) { 222 SubIdx = X86::sub_16bit; 223 } else if (RC == &X86::GR8RegClass) { 224 SubIdx = X86::sub_8bit; 225 } 226 227 return SubIdx; 228 } 229 230 static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) { 231 assert(Reg.isPhysical()); 232 if (X86::GR64RegClass.contains(Reg)) 233 return &X86::GR64RegClass; 234 if (X86::GR32RegClass.contains(Reg)) 235 return &X86::GR32RegClass; 236 if (X86::GR16RegClass.contains(Reg)) 237 return &X86::GR16RegClass; 238 if (X86::GR8RegClass.contains(Reg)) 239 return &X86::GR8RegClass; 240 241 llvm_unreachable("Unknown RegClass for PhysReg!"); 242 } 243 244 // FIXME: We need some sort of API in RBI/TRI to allow generic code to 245 // constrain operands of simple instructions given a TargetRegisterClass 246 // and LLT 247 bool X86InstructionSelector::selectDebugInstr(MachineInstr &I, 248 MachineRegisterInfo &MRI) const { 249 for (MachineOperand &MO : I.operands()) { 250 if (!MO.isReg()) 251 continue; 252 Register Reg = MO.getReg(); 253 if (!Reg) 254 continue; 255 if (Reg.isPhysical()) 256 continue; 257 LLT Ty = MRI.getType(Reg); 258 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 259 const TargetRegisterClass *RC = 260 dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank); 261 if (!RC) { 262 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank); 263 RC = getRegClass(Ty, RB); 264 if (!RC) { 265 LLVM_DEBUG( 266 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"); 267 break; 268 } 269 } 270 RBI.constrainGenericRegister(Reg, *RC, MRI); 271 } 272 273 return true; 274 } 275 276 // Set X86 Opcode and constrain DestReg. 277 bool X86InstructionSelector::selectCopy(MachineInstr &I, 278 MachineRegisterInfo &MRI) const { 279 Register DstReg = I.getOperand(0).getReg(); 280 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 281 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 282 283 Register SrcReg = I.getOperand(1).getReg(); 284 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 285 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 286 287 if (DstReg.isPhysical()) { 288 assert(I.isCopy() && "Generic operators do not allow physical registers"); 289 290 if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && 291 DstRegBank.getID() == X86::GPRRegBankID) { 292 293 const TargetRegisterClass *SrcRC = 294 getRegClass(MRI.getType(SrcReg), SrcRegBank); 295 const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg); 296 297 if (SrcRC != DstRC) { 298 // This case can be generated by ABI lowering, performe anyext 299 Register ExtSrc = MRI.createVirtualRegister(DstRC); 300 BuildMI(*I.getParent(), I, I.getDebugLoc(), 301 TII.get(TargetOpcode::SUBREG_TO_REG)) 302 .addDef(ExtSrc) 303 .addImm(0) 304 .addReg(SrcReg) 305 .addImm(getSubRegIndex(SrcRC)); 306 307 I.getOperand(1).setReg(ExtSrc); 308 } 309 } 310 311 return true; 312 } 313 314 assert((!SrcReg.isPhysical() || I.isCopy()) && 315 "No phys reg on generic operators"); 316 assert((DstSize == SrcSize || 317 // Copies are a mean to setup initial types, the number of 318 // bits may not exactly match. 319 (SrcReg.isPhysical() && 320 DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && 321 "Copy with different width?!"); 322 323 const TargetRegisterClass *DstRC = 324 getRegClass(MRI.getType(DstReg), DstRegBank); 325 326 if (SrcRegBank.getID() == X86::GPRRegBankID && 327 DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && 328 SrcReg.isPhysical()) { 329 // Change the physical register to performe truncate. 330 331 const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg); 332 333 if (DstRC != SrcRC) { 334 I.getOperand(1).setSubReg(getSubRegIndex(DstRC)); 335 I.getOperand(1).substPhysReg(SrcReg, TRI); 336 } 337 } 338 339 // No need to constrain SrcReg. It will get constrained when 340 // we hit another of its use or its defs. 341 // Copies do not have constraints. 342 const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); 343 if (!OldRC || !DstRC->hasSubClassEq(OldRC)) { 344 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 345 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 346 << " operand\n"); 347 return false; 348 } 349 } 350 I.setDesc(TII.get(X86::COPY)); 351 return true; 352 } 353 354 bool X86InstructionSelector::select(MachineInstr &I) { 355 assert(I.getParent() && "Instruction should be in a basic block!"); 356 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 357 358 MachineBasicBlock &MBB = *I.getParent(); 359 MachineFunction &MF = *MBB.getParent(); 360 MachineRegisterInfo &MRI = MF.getRegInfo(); 361 362 unsigned Opcode = I.getOpcode(); 363 if (!isPreISelGenericOpcode(Opcode)) { 364 // Certain non-generic instructions also need some special handling. 365 366 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 367 return false; 368 369 if (I.isCopy()) 370 return selectCopy(I, MRI); 371 372 if (I.isDebugInstr()) 373 return selectDebugInstr(I, MRI); 374 375 return true; 376 } 377 378 assert(I.getNumOperands() == I.getNumExplicitOperands() && 379 "Generic instruction has unexpected implicit operands\n"); 380 381 if (selectImpl(I, *CoverageInfo)) 382 return true; 383 384 LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); 385 386 // TODO: This should be implemented by tblgen. 387 switch (I.getOpcode()) { 388 default: 389 return false; 390 case TargetOpcode::G_STORE: 391 case TargetOpcode::G_LOAD: 392 return selectLoadStoreOp(I, MRI, MF); 393 case TargetOpcode::G_PTR_ADD: 394 case TargetOpcode::G_FRAME_INDEX: 395 return selectFrameIndexOrGep(I, MRI, MF); 396 case TargetOpcode::G_GLOBAL_VALUE: 397 return selectGlobalValue(I, MRI, MF); 398 case TargetOpcode::G_CONSTANT: 399 return selectConstant(I, MRI, MF); 400 case TargetOpcode::G_FCONSTANT: 401 return materializeFP(I, MRI, MF); 402 case TargetOpcode::G_PTRTOINT: 403 case TargetOpcode::G_TRUNC: 404 return selectTruncOrPtrToInt(I, MRI, MF); 405 case TargetOpcode::G_INTTOPTR: 406 return selectCopy(I, MRI); 407 case TargetOpcode::G_ZEXT: 408 return selectZext(I, MRI, MF); 409 case TargetOpcode::G_ANYEXT: 410 return selectAnyext(I, MRI, MF); 411 case TargetOpcode::G_ICMP: 412 return selectCmp(I, MRI, MF); 413 case TargetOpcode::G_FCMP: 414 return selectFCmp(I, MRI, MF); 415 case TargetOpcode::G_UADDE: 416 case TargetOpcode::G_UADDO: 417 case TargetOpcode::G_USUBE: 418 case TargetOpcode::G_USUBO: 419 return selectUAddSub(I, MRI, MF); 420 case TargetOpcode::G_UNMERGE_VALUES: 421 return selectUnmergeValues(I, MRI, MF); 422 case TargetOpcode::G_MERGE_VALUES: 423 case TargetOpcode::G_CONCAT_VECTORS: 424 return selectMergeValues(I, MRI, MF); 425 case TargetOpcode::G_EXTRACT: 426 return selectExtract(I, MRI, MF); 427 case TargetOpcode::G_INSERT: 428 return selectInsert(I, MRI, MF); 429 case TargetOpcode::G_BRCOND: 430 return selectCondBranch(I, MRI, MF); 431 case TargetOpcode::G_IMPLICIT_DEF: 432 case TargetOpcode::G_PHI: 433 return selectImplicitDefOrPHI(I, MRI); 434 case TargetOpcode::G_MUL: 435 case TargetOpcode::G_SMULH: 436 case TargetOpcode::G_UMULH: 437 case TargetOpcode::G_SDIV: 438 case TargetOpcode::G_UDIV: 439 case TargetOpcode::G_SREM: 440 case TargetOpcode::G_UREM: 441 return selectMulDivRem(I, MRI, MF); 442 case TargetOpcode::G_SELECT: 443 return selectSelect(I, MRI, MF); 444 } 445 446 return false; 447 } 448 449 unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, 450 const RegisterBank &RB, 451 unsigned Opc, 452 Align Alignment) const { 453 bool Isload = (Opc == TargetOpcode::G_LOAD); 454 bool HasAVX = STI.hasAVX(); 455 bool HasAVX512 = STI.hasAVX512(); 456 bool HasVLX = STI.hasVLX(); 457 458 if (Ty == LLT::scalar(8)) { 459 if (X86::GPRRegBankID == RB.getID()) 460 return Isload ? X86::MOV8rm : X86::MOV8mr; 461 } else if (Ty == LLT::scalar(16)) { 462 if (X86::GPRRegBankID == RB.getID()) 463 return Isload ? X86::MOV16rm : X86::MOV16mr; 464 } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) { 465 if (X86::GPRRegBankID == RB.getID()) 466 return Isload ? X86::MOV32rm : X86::MOV32mr; 467 if (X86::VECRRegBankID == RB.getID()) 468 return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt : 469 HasAVX ? X86::VMOVSSrm_alt : 470 X86::MOVSSrm_alt) 471 : (HasAVX512 ? X86::VMOVSSZmr : 472 HasAVX ? X86::VMOVSSmr : 473 X86::MOVSSmr); 474 if (X86::PSRRegBankID == RB.getID()) 475 return Isload ? X86::LD_Fp32m : X86::ST_Fp32m; 476 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { 477 if (X86::GPRRegBankID == RB.getID()) 478 return Isload ? X86::MOV64rm : X86::MOV64mr; 479 if (X86::VECRRegBankID == RB.getID()) 480 return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt : 481 HasAVX ? X86::VMOVSDrm_alt : 482 X86::MOVSDrm_alt) 483 : (HasAVX512 ? X86::VMOVSDZmr : 484 HasAVX ? X86::VMOVSDmr : 485 X86::MOVSDmr); 486 if (X86::PSRRegBankID == RB.getID()) 487 return Isload ? X86::LD_Fp64m : X86::ST_Fp64m; 488 } else if (Ty == LLT::scalar(80)) { 489 return Isload ? X86::LD_Fp80m : X86::ST_FpP80m; 490 } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { 491 if (Alignment >= Align(16)) 492 return Isload ? (HasVLX ? X86::VMOVAPSZ128rm 493 : HasAVX512 494 ? X86::VMOVAPSZ128rm_NOVLX 495 : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) 496 : (HasVLX ? X86::VMOVAPSZ128mr 497 : HasAVX512 498 ? X86::VMOVAPSZ128mr_NOVLX 499 : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); 500 else 501 return Isload ? (HasVLX ? X86::VMOVUPSZ128rm 502 : HasAVX512 503 ? X86::VMOVUPSZ128rm_NOVLX 504 : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) 505 : (HasVLX ? X86::VMOVUPSZ128mr 506 : HasAVX512 507 ? X86::VMOVUPSZ128mr_NOVLX 508 : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); 509 } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { 510 if (Alignment >= Align(32)) 511 return Isload ? (HasVLX ? X86::VMOVAPSZ256rm 512 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX 513 : X86::VMOVAPSYrm) 514 : (HasVLX ? X86::VMOVAPSZ256mr 515 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX 516 : X86::VMOVAPSYmr); 517 else 518 return Isload ? (HasVLX ? X86::VMOVUPSZ256rm 519 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX 520 : X86::VMOVUPSYrm) 521 : (HasVLX ? X86::VMOVUPSZ256mr 522 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX 523 : X86::VMOVUPSYmr); 524 } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { 525 if (Alignment >= Align(64)) 526 return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 527 else 528 return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 529 } 530 return Opc; 531 } 532 533 // Fill in an address from the given instruction. 534 static void X86SelectAddress(const MachineInstr &I, 535 const MachineRegisterInfo &MRI, 536 X86AddressMode &AM) { 537 assert(I.getOperand(0).isReg() && "unsupported opperand."); 538 assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && 539 "unsupported type."); 540 541 if (I.getOpcode() == TargetOpcode::G_PTR_ADD) { 542 if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) { 543 int64_t Imm = *COff; 544 if (isInt<32>(Imm)) { // Check for displacement overflow. 545 AM.Disp = static_cast<int32_t>(Imm); 546 AM.Base.Reg = I.getOperand(1).getReg(); 547 return; 548 } 549 } 550 } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) { 551 AM.Base.FrameIndex = I.getOperand(1).getIndex(); 552 AM.BaseType = X86AddressMode::FrameIndexBase; 553 return; 554 } 555 556 // Default behavior. 557 AM.Base.Reg = I.getOperand(0).getReg(); 558 } 559 560 bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, 561 MachineRegisterInfo &MRI, 562 MachineFunction &MF) const { 563 unsigned Opc = I.getOpcode(); 564 565 assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && 566 "Only G_STORE and G_LOAD are expected for selection"); 567 568 const Register DefReg = I.getOperand(0).getReg(); 569 LLT Ty = MRI.getType(DefReg); 570 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 571 572 assert(I.hasOneMemOperand()); 573 auto &MemOp = **I.memoperands_begin(); 574 if (MemOp.isAtomic()) { 575 // Note: for unordered operations, we rely on the fact the appropriate MMO 576 // is already on the instruction we're mutating, and thus we don't need to 577 // make any changes. So long as we select an opcode which is capable of 578 // loading or storing the appropriate size atomically, the rest of the 579 // backend is required to respect the MMO state. 580 if (!MemOp.isUnordered()) { 581 LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n"); 582 return false; 583 } 584 if (MemOp.getAlign() < Ty.getSizeInBits() / 8) { 585 LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n"); 586 return false; 587 } 588 } 589 590 unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign()); 591 if (NewOpc == Opc) 592 return false; 593 594 I.setDesc(TII.get(NewOpc)); 595 MachineInstrBuilder MIB(MF, I); 596 const MachineInstr *Ptr = MRI.getVRegDef(I.getOperand(1).getReg()); 597 598 if (Ptr->getOpcode() == TargetOpcode::G_CONSTANT_POOL) { 599 assert(Opc == TargetOpcode::G_LOAD && 600 "Only G_LOAD from constant pool is expected"); 601 // TODO: Need a separate move for Large model 602 if (TM.getCodeModel() == CodeModel::Large) 603 return false; 604 605 unsigned char OpFlag = STI.classifyLocalReference(nullptr); 606 unsigned PICBase = 0; 607 if (OpFlag == X86II::MO_GOTOFF) 608 PICBase = TII.getGlobalBaseReg(&MF); 609 else if (STI.is64Bit()) 610 PICBase = X86::RIP; 611 612 I.removeOperand(1); 613 addConstantPoolReference(MIB, Ptr->getOperand(1).getIndex(), PICBase, 614 OpFlag); 615 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 616 } 617 618 X86AddressMode AM; 619 X86SelectAddress(*Ptr, MRI, AM); 620 if (Opc == TargetOpcode::G_LOAD) { 621 I.removeOperand(1); 622 addFullAddress(MIB, AM); 623 } else { 624 // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) 625 I.removeOperand(1); 626 I.removeOperand(0); 627 addFullAddress(MIB, AM).addUse(DefReg); 628 } 629 bool Constrained = constrainSelectedInstRegOperands(I, TII, TRI, RBI); 630 I.addImplicitDefUseOperands(MF); 631 return Constrained; 632 } 633 634 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { 635 if (Ty == LLT::pointer(0, 64)) 636 return X86::LEA64r; 637 else if (Ty == LLT::pointer(0, 32)) 638 return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; 639 else 640 llvm_unreachable("Can't get LEA opcode. Unsupported type."); 641 } 642 643 bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, 644 MachineRegisterInfo &MRI, 645 MachineFunction &MF) const { 646 unsigned Opc = I.getOpcode(); 647 648 assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) && 649 "unexpected instruction"); 650 651 const Register DefReg = I.getOperand(0).getReg(); 652 LLT Ty = MRI.getType(DefReg); 653 654 // Use LEA to calculate frame index and GEP 655 unsigned NewOpc = getLeaOP(Ty, STI); 656 I.setDesc(TII.get(NewOpc)); 657 MachineInstrBuilder MIB(MF, I); 658 659 if (Opc == TargetOpcode::G_FRAME_INDEX) { 660 addOffset(MIB, 0); 661 } else { 662 MachineOperand &InxOp = I.getOperand(2); 663 I.addOperand(InxOp); // set IndexReg 664 InxOp.ChangeToImmediate(1); // set Scale 665 MIB.addImm(0).addReg(0); 666 } 667 668 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 669 } 670 671 bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, 672 MachineRegisterInfo &MRI, 673 MachineFunction &MF) const { 674 assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) && 675 "unexpected instruction"); 676 677 auto GV = I.getOperand(1).getGlobal(); 678 if (GV->isThreadLocal()) { 679 return false; // TODO: we don't support TLS yet. 680 } 681 682 // Can't handle alternate code models yet. 683 if (TM.getCodeModel() != CodeModel::Small) 684 return false; 685 686 X86AddressMode AM; 687 AM.GV = GV; 688 AM.GVOpFlags = STI.classifyGlobalReference(GV); 689 690 // TODO: The ABI requires an extra load. not supported yet. 691 if (isGlobalStubReference(AM.GVOpFlags)) 692 return false; 693 694 // TODO: This reference is relative to the pic base. not supported yet. 695 if (isGlobalRelativeToPICBase(AM.GVOpFlags)) 696 return false; 697 698 if (STI.isPICStyleRIPRel()) { 699 // Use rip-relative addressing. 700 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 701 AM.Base.Reg = X86::RIP; 702 } 703 704 const Register DefReg = I.getOperand(0).getReg(); 705 LLT Ty = MRI.getType(DefReg); 706 unsigned NewOpc = getLeaOP(Ty, STI); 707 708 I.setDesc(TII.get(NewOpc)); 709 MachineInstrBuilder MIB(MF, I); 710 711 I.removeOperand(1); 712 addFullAddress(MIB, AM); 713 714 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 715 } 716 717 bool X86InstructionSelector::selectConstant(MachineInstr &I, 718 MachineRegisterInfo &MRI, 719 MachineFunction &MF) const { 720 assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && 721 "unexpected instruction"); 722 723 const Register DefReg = I.getOperand(0).getReg(); 724 LLT Ty = MRI.getType(DefReg); 725 726 if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) 727 return false; 728 729 uint64_t Val = 0; 730 if (I.getOperand(1).isCImm()) { 731 Val = I.getOperand(1).getCImm()->getZExtValue(); 732 I.getOperand(1).ChangeToImmediate(Val); 733 } else if (I.getOperand(1).isImm()) { 734 Val = I.getOperand(1).getImm(); 735 } else 736 llvm_unreachable("Unsupported operand type."); 737 738 unsigned NewOpc; 739 switch (Ty.getSizeInBits()) { 740 case 8: 741 NewOpc = X86::MOV8ri; 742 break; 743 case 16: 744 NewOpc = X86::MOV16ri; 745 break; 746 case 32: 747 NewOpc = X86::MOV32ri; 748 break; 749 case 64: 750 // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used 751 if (isInt<32>(Val)) 752 NewOpc = X86::MOV64ri32; 753 else 754 NewOpc = X86::MOV64ri; 755 break; 756 default: 757 llvm_unreachable("Can't select G_CONSTANT, unsupported type."); 758 } 759 760 I.setDesc(TII.get(NewOpc)); 761 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 762 } 763 764 // Helper function for selectTruncOrPtrToInt and selectAnyext. 765 // Returns true if DstRC lives on a floating register class and 766 // SrcRC lives on a 128-bit vector class. 767 static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC, 768 const TargetRegisterClass *SrcRC) { 769 return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass || 770 DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) && 771 (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass); 772 } 773 774 bool X86InstructionSelector::selectTurnIntoCOPY( 775 MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg, 776 const TargetRegisterClass *DstRC, const unsigned SrcReg, 777 const TargetRegisterClass *SrcRC) const { 778 779 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 780 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 781 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 782 << " operand\n"); 783 return false; 784 } 785 I.setDesc(TII.get(X86::COPY)); 786 return true; 787 } 788 789 bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, 790 MachineRegisterInfo &MRI, 791 MachineFunction &MF) const { 792 assert((I.getOpcode() == TargetOpcode::G_TRUNC || 793 I.getOpcode() == TargetOpcode::G_PTRTOINT) && 794 "unexpected instruction"); 795 796 const Register DstReg = I.getOperand(0).getReg(); 797 const Register SrcReg = I.getOperand(1).getReg(); 798 799 const LLT DstTy = MRI.getType(DstReg); 800 const LLT SrcTy = MRI.getType(SrcReg); 801 802 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 803 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 804 805 if (DstRB.getID() != SrcRB.getID()) { 806 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) 807 << " input/output on different banks\n"); 808 return false; 809 } 810 811 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 812 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 813 814 if (!DstRC || !SrcRC) 815 return false; 816 817 // If that's truncation of the value that lives on the vector class and goes 818 // into the floating class, just replace it with copy, as we are able to 819 // select it as a regular move. 820 if (canTurnIntoCOPY(DstRC, SrcRC)) 821 return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC); 822 823 if (DstRB.getID() != X86::GPRRegBankID) 824 return false; 825 826 unsigned SubIdx; 827 if (DstRC == SrcRC) { 828 // Nothing to be done 829 SubIdx = X86::NoSubRegister; 830 } else if (DstRC == &X86::GR32RegClass) { 831 SubIdx = X86::sub_32bit; 832 } else if (DstRC == &X86::GR16RegClass) { 833 SubIdx = X86::sub_16bit; 834 } else if (DstRC == &X86::GR8RegClass) { 835 SubIdx = X86::sub_8bit; 836 } else { 837 return false; 838 } 839 840 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 841 842 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 843 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 844 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 845 << "\n"); 846 return false; 847 } 848 849 I.getOperand(1).setSubReg(SubIdx); 850 851 I.setDesc(TII.get(X86::COPY)); 852 return true; 853 } 854 855 bool X86InstructionSelector::selectZext(MachineInstr &I, 856 MachineRegisterInfo &MRI, 857 MachineFunction &MF) const { 858 assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); 859 860 const Register DstReg = I.getOperand(0).getReg(); 861 const Register SrcReg = I.getOperand(1).getReg(); 862 863 const LLT DstTy = MRI.getType(DstReg); 864 const LLT SrcTy = MRI.getType(SrcReg); 865 866 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) && 867 "8=>16 Zext is handled by tablegen"); 868 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) && 869 "8=>32 Zext is handled by tablegen"); 870 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) && 871 "16=>32 Zext is handled by tablegen"); 872 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) && 873 "8=>64 Zext is handled by tablegen"); 874 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) && 875 "16=>64 Zext is handled by tablegen"); 876 assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) && 877 "32=>64 Zext is handled by tablegen"); 878 879 if (SrcTy != LLT::scalar(1)) 880 return false; 881 882 unsigned AndOpc; 883 if (DstTy == LLT::scalar(8)) 884 AndOpc = X86::AND8ri; 885 else if (DstTy == LLT::scalar(16)) 886 AndOpc = X86::AND16ri; 887 else if (DstTy == LLT::scalar(32)) 888 AndOpc = X86::AND32ri; 889 else if (DstTy == LLT::scalar(64)) 890 AndOpc = X86::AND64ri32; 891 else 892 return false; 893 894 Register DefReg = SrcReg; 895 if (DstTy != LLT::scalar(8)) { 896 Register ImpDefReg = 897 MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 898 BuildMI(*I.getParent(), I, I.getDebugLoc(), 899 TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg); 900 901 DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 902 BuildMI(*I.getParent(), I, I.getDebugLoc(), 903 TII.get(TargetOpcode::INSERT_SUBREG), DefReg) 904 .addReg(ImpDefReg) 905 .addReg(SrcReg) 906 .addImm(X86::sub_8bit); 907 } 908 909 MachineInstr &AndInst = 910 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) 911 .addReg(DefReg) 912 .addImm(1); 913 914 constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); 915 916 I.eraseFromParent(); 917 return true; 918 } 919 920 bool X86InstructionSelector::selectAnyext(MachineInstr &I, 921 MachineRegisterInfo &MRI, 922 MachineFunction &MF) const { 923 assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); 924 925 const Register DstReg = I.getOperand(0).getReg(); 926 const Register SrcReg = I.getOperand(1).getReg(); 927 928 const LLT DstTy = MRI.getType(DstReg); 929 const LLT SrcTy = MRI.getType(SrcReg); 930 931 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 932 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 933 934 assert(DstRB.getID() == SrcRB.getID() && 935 "G_ANYEXT input/output on different banks\n"); 936 937 assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && 938 "G_ANYEXT incorrect operand size"); 939 940 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 941 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 942 943 // If that's ANY_EXT of the value that lives on the floating class and goes 944 // into the vector class, just replace it with copy, as we are able to select 945 // it as a regular move. 946 if (canTurnIntoCOPY(SrcRC, DstRC)) 947 return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC); 948 949 if (DstRB.getID() != X86::GPRRegBankID) 950 return false; 951 952 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 953 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 954 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 955 << " operand\n"); 956 return false; 957 } 958 959 if (SrcRC == DstRC) { 960 I.setDesc(TII.get(X86::COPY)); 961 return true; 962 } 963 964 BuildMI(*I.getParent(), I, I.getDebugLoc(), 965 TII.get(TargetOpcode::SUBREG_TO_REG)) 966 .addDef(DstReg) 967 .addImm(0) 968 .addReg(SrcReg) 969 .addImm(getSubRegIndex(SrcRC)); 970 971 I.eraseFromParent(); 972 return true; 973 } 974 975 bool X86InstructionSelector::selectCmp(MachineInstr &I, 976 MachineRegisterInfo &MRI, 977 MachineFunction &MF) const { 978 assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction"); 979 980 X86::CondCode CC; 981 bool SwapArgs; 982 std::tie(CC, SwapArgs) = X86::getX86ConditionCode( 983 (CmpInst::Predicate)I.getOperand(1).getPredicate()); 984 985 Register LHS = I.getOperand(2).getReg(); 986 Register RHS = I.getOperand(3).getReg(); 987 988 if (SwapArgs) 989 std::swap(LHS, RHS); 990 991 unsigned OpCmp; 992 LLT Ty = MRI.getType(LHS); 993 994 switch (Ty.getSizeInBits()) { 995 default: 996 return false; 997 case 8: 998 OpCmp = X86::CMP8rr; 999 break; 1000 case 16: 1001 OpCmp = X86::CMP16rr; 1002 break; 1003 case 32: 1004 OpCmp = X86::CMP32rr; 1005 break; 1006 case 64: 1007 OpCmp = X86::CMP64rr; 1008 break; 1009 } 1010 1011 MachineInstr &CmpInst = 1012 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1013 .addReg(LHS) 1014 .addReg(RHS); 1015 1016 MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1017 TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC); 1018 1019 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1020 constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI); 1021 1022 I.eraseFromParent(); 1023 return true; 1024 } 1025 1026 bool X86InstructionSelector::selectFCmp(MachineInstr &I, 1027 MachineRegisterInfo &MRI, 1028 MachineFunction &MF) const { 1029 assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction"); 1030 1031 Register LhsReg = I.getOperand(2).getReg(); 1032 Register RhsReg = I.getOperand(3).getReg(); 1033 CmpInst::Predicate Predicate = 1034 (CmpInst::Predicate)I.getOperand(1).getPredicate(); 1035 1036 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1037 static const uint16_t SETFOpcTable[2][3] = { 1038 {X86::COND_E, X86::COND_NP, X86::AND8rr}, 1039 {X86::COND_NE, X86::COND_P, X86::OR8rr}}; 1040 const uint16_t *SETFOpc = nullptr; 1041 switch (Predicate) { 1042 default: 1043 break; 1044 case CmpInst::FCMP_OEQ: 1045 SETFOpc = &SETFOpcTable[0][0]; 1046 break; 1047 case CmpInst::FCMP_UNE: 1048 SETFOpc = &SETFOpcTable[1][0]; 1049 break; 1050 } 1051 1052 // Compute the opcode for the CMP instruction. 1053 unsigned OpCmp; 1054 LLT Ty = MRI.getType(LhsReg); 1055 switch (Ty.getSizeInBits()) { 1056 default: 1057 return false; 1058 case 32: 1059 OpCmp = X86::UCOMISSrr; 1060 break; 1061 case 64: 1062 OpCmp = X86::UCOMISDrr; 1063 break; 1064 } 1065 1066 Register ResultReg = I.getOperand(0).getReg(); 1067 RBI.constrainGenericRegister( 1068 ResultReg, 1069 *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI); 1070 if (SETFOpc) { 1071 MachineInstr &CmpInst = 1072 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1073 .addReg(LhsReg) 1074 .addReg(RhsReg); 1075 1076 Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); 1077 Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); 1078 MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1079 TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); 1080 MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1081 TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]); 1082 MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1083 TII.get(SETFOpc[2]), ResultReg) 1084 .addReg(FlagReg1) 1085 .addReg(FlagReg2); 1086 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1087 constrainSelectedInstRegOperands(Set1, TII, TRI, RBI); 1088 constrainSelectedInstRegOperands(Set2, TII, TRI, RBI); 1089 constrainSelectedInstRegOperands(Set3, TII, TRI, RBI); 1090 1091 I.eraseFromParent(); 1092 return true; 1093 } 1094 1095 X86::CondCode CC; 1096 bool SwapArgs; 1097 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); 1098 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1099 1100 if (SwapArgs) 1101 std::swap(LhsReg, RhsReg); 1102 1103 // Emit a compare of LHS/RHS. 1104 MachineInstr &CmpInst = 1105 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1106 .addReg(LhsReg) 1107 .addReg(RhsReg); 1108 1109 MachineInstr &Set = 1110 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC); 1111 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1112 constrainSelectedInstRegOperands(Set, TII, TRI, RBI); 1113 I.eraseFromParent(); 1114 return true; 1115 } 1116 1117 bool X86InstructionSelector::selectUAddSub(MachineInstr &I, 1118 MachineRegisterInfo &MRI, 1119 MachineFunction &MF) const { 1120 assert((I.getOpcode() == TargetOpcode::G_UADDE || 1121 I.getOpcode() == TargetOpcode::G_UADDO || 1122 I.getOpcode() == TargetOpcode::G_USUBE || 1123 I.getOpcode() == TargetOpcode::G_USUBO) && 1124 "unexpected instruction"); 1125 1126 const Register DstReg = I.getOperand(0).getReg(); 1127 const Register CarryOutReg = I.getOperand(1).getReg(); 1128 const Register Op0Reg = I.getOperand(2).getReg(); 1129 const Register Op1Reg = I.getOperand(3).getReg(); 1130 bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || 1131 I.getOpcode() == TargetOpcode::G_USUBO; 1132 bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || 1133 I.getOpcode() == TargetOpcode::G_USUBE; 1134 1135 const LLT DstTy = MRI.getType(DstReg); 1136 assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types"); 1137 1138 // TODO: Handle immediate argument variants? 1139 unsigned OpADC, OpADD, OpSBB, OpSUB; 1140 switch (DstTy.getSizeInBits()) { 1141 case 8: 1142 OpADC = X86::ADC8rr; 1143 OpADD = X86::ADD8rr; 1144 OpSBB = X86::SBB8rr; 1145 OpSUB = X86::SUB8rr; 1146 break; 1147 case 16: 1148 OpADC = X86::ADC16rr; 1149 OpADD = X86::ADD16rr; 1150 OpSBB = X86::SBB16rr; 1151 OpSUB = X86::SUB16rr; 1152 break; 1153 case 32: 1154 OpADC = X86::ADC32rr; 1155 OpADD = X86::ADD32rr; 1156 OpSBB = X86::SBB32rr; 1157 OpSUB = X86::SUB32rr; 1158 break; 1159 case 64: 1160 OpADC = X86::ADC64rr; 1161 OpADD = X86::ADD64rr; 1162 OpSBB = X86::SBB64rr; 1163 OpSUB = X86::SUB64rr; 1164 break; 1165 default: 1166 llvm_unreachable("selectUAddSub unsupported type."); 1167 } 1168 1169 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 1170 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 1171 1172 unsigned Opcode = IsSub ? OpSUB : OpADD; 1173 1174 // G_UADDE/G_USUBE - find CarryIn def instruction. 1175 if (HasCarryIn) { 1176 Register CarryInReg = I.getOperand(4).getReg(); 1177 MachineInstr *Def = MRI.getVRegDef(CarryInReg); 1178 while (Def->getOpcode() == TargetOpcode::G_TRUNC) { 1179 CarryInReg = Def->getOperand(1).getReg(); 1180 Def = MRI.getVRegDef(CarryInReg); 1181 } 1182 1183 // TODO - handle more CF generating instructions 1184 if (Def->getOpcode() == TargetOpcode::G_UADDE || 1185 Def->getOpcode() == TargetOpcode::G_UADDO || 1186 Def->getOpcode() == TargetOpcode::G_USUBE || 1187 Def->getOpcode() == TargetOpcode::G_USUBO) { 1188 // carry set by prev ADD/SUB. 1189 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), 1190 X86::EFLAGS) 1191 .addReg(CarryInReg); 1192 1193 if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI)) 1194 return false; 1195 1196 Opcode = IsSub ? OpSBB : OpADC; 1197 } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) { 1198 // carry is constant, support only 0. 1199 if (*val != 0) 1200 return false; 1201 1202 Opcode = IsSub ? OpSUB : OpADD; 1203 } else 1204 return false; 1205 } 1206 1207 MachineInstr &Inst = 1208 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg) 1209 .addReg(Op0Reg) 1210 .addReg(Op1Reg); 1211 1212 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) 1213 .addReg(X86::EFLAGS); 1214 1215 if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || 1216 !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI)) 1217 return false; 1218 1219 I.eraseFromParent(); 1220 return true; 1221 } 1222 1223 bool X86InstructionSelector::selectExtract(MachineInstr &I, 1224 MachineRegisterInfo &MRI, 1225 MachineFunction &MF) const { 1226 assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && 1227 "unexpected instruction"); 1228 1229 const Register DstReg = I.getOperand(0).getReg(); 1230 const Register SrcReg = I.getOperand(1).getReg(); 1231 int64_t Index = I.getOperand(2).getImm(); 1232 1233 const LLT DstTy = MRI.getType(DstReg); 1234 const LLT SrcTy = MRI.getType(SrcReg); 1235 1236 // Meanwile handle vector type only. 1237 if (!DstTy.isVector()) 1238 return false; 1239 1240 if (Index % DstTy.getSizeInBits() != 0) 1241 return false; // Not extract subvector. 1242 1243 if (Index == 0) { 1244 // Replace by extract subreg copy. 1245 if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) 1246 return false; 1247 1248 I.eraseFromParent(); 1249 return true; 1250 } 1251 1252 bool HasAVX = STI.hasAVX(); 1253 bool HasAVX512 = STI.hasAVX512(); 1254 bool HasVLX = STI.hasVLX(); 1255 1256 if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { 1257 if (HasVLX) 1258 I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr)); 1259 else if (HasAVX) 1260 I.setDesc(TII.get(X86::VEXTRACTF128rr)); 1261 else 1262 return false; 1263 } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { 1264 if (DstTy.getSizeInBits() == 128) 1265 I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr)); 1266 else if (DstTy.getSizeInBits() == 256) 1267 I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr)); 1268 else 1269 return false; 1270 } else 1271 return false; 1272 1273 // Convert to X86 VEXTRACT immediate. 1274 Index = Index / DstTy.getSizeInBits(); 1275 I.getOperand(2).setImm(Index); 1276 1277 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1278 } 1279 1280 bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, 1281 MachineInstr &I, 1282 MachineRegisterInfo &MRI, 1283 MachineFunction &MF) const { 1284 const LLT DstTy = MRI.getType(DstReg); 1285 const LLT SrcTy = MRI.getType(SrcReg); 1286 unsigned SubIdx = X86::NoSubRegister; 1287 1288 if (!DstTy.isVector() || !SrcTy.isVector()) 1289 return false; 1290 1291 assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && 1292 "Incorrect Src/Dst register size"); 1293 1294 if (DstTy.getSizeInBits() == 128) 1295 SubIdx = X86::sub_xmm; 1296 else if (DstTy.getSizeInBits() == 256) 1297 SubIdx = X86::sub_ymm; 1298 else 1299 return false; 1300 1301 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1302 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1303 1304 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 1305 1306 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1307 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1308 LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n"); 1309 return false; 1310 } 1311 1312 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) 1313 .addReg(SrcReg, 0, SubIdx); 1314 1315 return true; 1316 } 1317 1318 bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, 1319 MachineInstr &I, 1320 MachineRegisterInfo &MRI, 1321 MachineFunction &MF) const { 1322 const LLT DstTy = MRI.getType(DstReg); 1323 const LLT SrcTy = MRI.getType(SrcReg); 1324 unsigned SubIdx = X86::NoSubRegister; 1325 1326 // TODO: support scalar types 1327 if (!DstTy.isVector() || !SrcTy.isVector()) 1328 return false; 1329 1330 assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && 1331 "Incorrect Src/Dst register size"); 1332 1333 if (SrcTy.getSizeInBits() == 128) 1334 SubIdx = X86::sub_xmm; 1335 else if (SrcTy.getSizeInBits() == 256) 1336 SubIdx = X86::sub_ymm; 1337 else 1338 return false; 1339 1340 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1341 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1342 1343 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1344 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1345 LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); 1346 return false; 1347 } 1348 1349 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) 1350 .addReg(DstReg, RegState::DefineNoRead, SubIdx) 1351 .addReg(SrcReg); 1352 1353 return true; 1354 } 1355 1356 bool X86InstructionSelector::selectInsert(MachineInstr &I, 1357 MachineRegisterInfo &MRI, 1358 MachineFunction &MF) const { 1359 assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); 1360 1361 const Register DstReg = I.getOperand(0).getReg(); 1362 const Register SrcReg = I.getOperand(1).getReg(); 1363 const Register InsertReg = I.getOperand(2).getReg(); 1364 int64_t Index = I.getOperand(3).getImm(); 1365 1366 const LLT DstTy = MRI.getType(DstReg); 1367 const LLT InsertRegTy = MRI.getType(InsertReg); 1368 1369 // Meanwile handle vector type only. 1370 if (!DstTy.isVector()) 1371 return false; 1372 1373 if (Index % InsertRegTy.getSizeInBits() != 0) 1374 return false; // Not insert subvector. 1375 1376 if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { 1377 // Replace by subreg copy. 1378 if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) 1379 return false; 1380 1381 I.eraseFromParent(); 1382 return true; 1383 } 1384 1385 bool HasAVX = STI.hasAVX(); 1386 bool HasAVX512 = STI.hasAVX512(); 1387 bool HasVLX = STI.hasVLX(); 1388 1389 if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { 1390 if (HasVLX) 1391 I.setDesc(TII.get(X86::VINSERTF32x4Z256rr)); 1392 else if (HasAVX) 1393 I.setDesc(TII.get(X86::VINSERTF128rr)); 1394 else 1395 return false; 1396 } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { 1397 if (InsertRegTy.getSizeInBits() == 128) 1398 I.setDesc(TII.get(X86::VINSERTF32x4Zrr)); 1399 else if (InsertRegTy.getSizeInBits() == 256) 1400 I.setDesc(TII.get(X86::VINSERTF64x4Zrr)); 1401 else 1402 return false; 1403 } else 1404 return false; 1405 1406 // Convert to X86 VINSERT immediate. 1407 Index = Index / InsertRegTy.getSizeInBits(); 1408 1409 I.getOperand(3).setImm(Index); 1410 1411 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1412 } 1413 1414 bool X86InstructionSelector::selectUnmergeValues( 1415 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1416 assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && 1417 "unexpected instruction"); 1418 1419 // Split to extracts. 1420 unsigned NumDefs = I.getNumOperands() - 1; 1421 Register SrcReg = I.getOperand(NumDefs).getReg(); 1422 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 1423 1424 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1425 MachineInstr &ExtrInst = 1426 *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1427 TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) 1428 .addReg(SrcReg) 1429 .addImm(Idx * DefSize); 1430 1431 if (!select(ExtrInst)) 1432 return false; 1433 } 1434 1435 I.eraseFromParent(); 1436 return true; 1437 } 1438 1439 bool X86InstructionSelector::selectMergeValues( 1440 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1441 assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || 1442 I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && 1443 "unexpected instruction"); 1444 1445 // Split to inserts. 1446 Register DstReg = I.getOperand(0).getReg(); 1447 Register SrcReg0 = I.getOperand(1).getReg(); 1448 1449 const LLT DstTy = MRI.getType(DstReg); 1450 const LLT SrcTy = MRI.getType(SrcReg0); 1451 unsigned SrcSize = SrcTy.getSizeInBits(); 1452 1453 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1454 1455 // For the first src use insertSubReg. 1456 Register DefReg = MRI.createGenericVirtualRegister(DstTy); 1457 MRI.setRegBank(DefReg, RegBank); 1458 if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) 1459 return false; 1460 1461 for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { 1462 Register Tmp = MRI.createGenericVirtualRegister(DstTy); 1463 MRI.setRegBank(Tmp, RegBank); 1464 1465 MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1466 TII.get(TargetOpcode::G_INSERT), Tmp) 1467 .addReg(DefReg) 1468 .addReg(I.getOperand(Idx).getReg()) 1469 .addImm((Idx - 1) * SrcSize); 1470 1471 DefReg = Tmp; 1472 1473 if (!select(InsertInst)) 1474 return false; 1475 } 1476 1477 MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1478 TII.get(TargetOpcode::COPY), DstReg) 1479 .addReg(DefReg); 1480 1481 if (!select(CopyInst)) 1482 return false; 1483 1484 I.eraseFromParent(); 1485 return true; 1486 } 1487 1488 bool X86InstructionSelector::selectCondBranch(MachineInstr &I, 1489 MachineRegisterInfo &MRI, 1490 MachineFunction &MF) const { 1491 assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); 1492 1493 const Register CondReg = I.getOperand(0).getReg(); 1494 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1495 1496 MachineInstr &TestInst = 1497 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri)) 1498 .addReg(CondReg) 1499 .addImm(1); 1500 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1)) 1501 .addMBB(DestMBB).addImm(X86::COND_NE); 1502 1503 constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI); 1504 1505 I.eraseFromParent(); 1506 return true; 1507 } 1508 1509 bool X86InstructionSelector::materializeFP(MachineInstr &I, 1510 MachineRegisterInfo &MRI, 1511 MachineFunction &MF) const { 1512 assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) && 1513 "unexpected instruction"); 1514 1515 // Can't handle alternate code models yet. 1516 CodeModel::Model CM = TM.getCodeModel(); 1517 if (CM != CodeModel::Small && CM != CodeModel::Large) 1518 return false; 1519 1520 const Register DstReg = I.getOperand(0).getReg(); 1521 const LLT DstTy = MRI.getType(DstReg); 1522 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1523 // Create the load from the constant pool. 1524 const ConstantFP *CFP = I.getOperand(1).getFPImm(); 1525 const auto &DL = MF.getDataLayout(); 1526 Align Alignment = DL.getPrefTypeAlign(CFP->getType()); 1527 const DebugLoc &DbgLoc = I.getDebugLoc(); 1528 1529 unsigned Opc = 1530 getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment); 1531 1532 unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment); 1533 MachineInstr *LoadInst = nullptr; 1534 unsigned char OpFlag = STI.classifyLocalReference(nullptr); 1535 1536 if (CM == CodeModel::Large && STI.is64Bit()) { 1537 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 1538 // they cannot be folded into immediate fields. 1539 1540 Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); 1541 BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg) 1542 .addConstantPoolIndex(CPI, 0, OpFlag); 1543 1544 MachineMemOperand *MMO = MF.getMachineMemOperand( 1545 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 1546 LLT::pointer(0, DL.getPointerSizeInBits()), Alignment); 1547 1548 LoadInst = 1549 addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), 1550 AddrReg) 1551 .addMemOperand(MMO); 1552 1553 } else if (CM == CodeModel::Small || !STI.is64Bit()) { 1554 // Handle the case when globals fit in our immediate field. 1555 // This is true for X86-32 always and X86-64 when in -mcmodel=small mode. 1556 1557 // x86-32 PIC requires a PIC base register for constant pools. 1558 unsigned PICBase = 0; 1559 if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) { 1560 // PICBase can be allocated by TII.getGlobalBaseReg(&MF). 1561 // In DAGISEL the code that initialize it generated by the CGBR pass. 1562 return false; // TODO support the mode. 1563 } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small) 1564 PICBase = X86::RIP; 1565 1566 LoadInst = addConstantPoolReference( 1567 BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase, 1568 OpFlag); 1569 } else 1570 return false; 1571 1572 constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI); 1573 I.eraseFromParent(); 1574 return true; 1575 } 1576 1577 bool X86InstructionSelector::selectImplicitDefOrPHI( 1578 MachineInstr &I, MachineRegisterInfo &MRI) const { 1579 assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || 1580 I.getOpcode() == TargetOpcode::G_PHI) && 1581 "unexpected instruction"); 1582 1583 Register DstReg = I.getOperand(0).getReg(); 1584 1585 if (!MRI.getRegClassOrNull(DstReg)) { 1586 const LLT DstTy = MRI.getType(DstReg); 1587 const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI); 1588 1589 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { 1590 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1591 << " operand\n"); 1592 return false; 1593 } 1594 } 1595 1596 if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF) 1597 I.setDesc(TII.get(X86::IMPLICIT_DEF)); 1598 else 1599 I.setDesc(TII.get(X86::PHI)); 1600 1601 return true; 1602 } 1603 1604 bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, 1605 MachineRegisterInfo &MRI, 1606 MachineFunction &MF) const { 1607 // The implementation of this function is adapted from X86FastISel. 1608 assert((I.getOpcode() == TargetOpcode::G_MUL || 1609 I.getOpcode() == TargetOpcode::G_SMULH || 1610 I.getOpcode() == TargetOpcode::G_UMULH || 1611 I.getOpcode() == TargetOpcode::G_SDIV || 1612 I.getOpcode() == TargetOpcode::G_SREM || 1613 I.getOpcode() == TargetOpcode::G_UDIV || 1614 I.getOpcode() == TargetOpcode::G_UREM) && 1615 "unexpected instruction"); 1616 1617 const Register DstReg = I.getOperand(0).getReg(); 1618 const Register Op1Reg = I.getOperand(1).getReg(); 1619 const Register Op2Reg = I.getOperand(2).getReg(); 1620 1621 const LLT RegTy = MRI.getType(DstReg); 1622 assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && 1623 "Arguments and return value types must match"); 1624 1625 const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI); 1626 if (!RegRB || RegRB->getID() != X86::GPRRegBankID) 1627 return false; 1628 1629 const static unsigned NumTypes = 4; // i8, i16, i32, i64 1630 const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh 1631 const static bool S = true; // IsSigned 1632 const static bool U = false; // !IsSigned 1633 const static unsigned Copy = TargetOpcode::COPY; 1634 1635 // For the X86 IDIV instruction, in most cases the dividend 1636 // (numerator) must be in a specific register pair highreg:lowreg, 1637 // producing the quotient in lowreg and the remainder in highreg. 1638 // For most data types, to set up the instruction, the dividend is 1639 // copied into lowreg, and lowreg is sign-extended into highreg. The 1640 // exception is i8, where the dividend is defined as a single register rather 1641 // than a register pair, and we therefore directly sign-extend the dividend 1642 // into lowreg, instead of copying, and ignore the highreg. 1643 const static struct MulDivRemEntry { 1644 // The following portion depends only on the data type. 1645 unsigned SizeInBits; 1646 unsigned LowInReg; // low part of the register pair 1647 unsigned HighInReg; // high part of the register pair 1648 // The following portion depends on both the data type and the operation. 1649 struct MulDivRemResult { 1650 unsigned OpMulDivRem; // The specific MUL/DIV opcode to use. 1651 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 1652 // highreg, or copying a zero into highreg. 1653 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 1654 // zero/sign-extending into lowreg for i8. 1655 unsigned ResultReg; // Register containing the desired result. 1656 bool IsOpSigned; // Whether to use signed or unsigned form. 1657 } ResultTable[NumOps]; 1658 } OpTable[NumTypes] = { 1659 {8, 1660 X86::AX, 1661 0, 1662 { 1663 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv 1664 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem 1665 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv 1666 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem 1667 {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul 1668 {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH 1669 {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH 1670 }}, // i8 1671 {16, 1672 X86::AX, 1673 X86::DX, 1674 { 1675 {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv 1676 {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem 1677 {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv 1678 {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem 1679 {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul 1680 {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH 1681 {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH 1682 }}, // i16 1683 {32, 1684 X86::EAX, 1685 X86::EDX, 1686 { 1687 {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv 1688 {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem 1689 {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv 1690 {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem 1691 {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul 1692 {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH 1693 {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH 1694 }}, // i32 1695 {64, 1696 X86::RAX, 1697 X86::RDX, 1698 { 1699 {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv 1700 {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem 1701 {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv 1702 {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem 1703 {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul 1704 {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH 1705 {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH 1706 }}, // i64 1707 }; 1708 1709 auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) { 1710 return El.SizeInBits == RegTy.getSizeInBits(); 1711 }); 1712 if (OpEntryIt == std::end(OpTable)) 1713 return false; 1714 1715 unsigned OpIndex; 1716 switch (I.getOpcode()) { 1717 default: 1718 llvm_unreachable("Unexpected mul/div/rem opcode"); 1719 case TargetOpcode::G_SDIV: 1720 OpIndex = 0; 1721 break; 1722 case TargetOpcode::G_SREM: 1723 OpIndex = 1; 1724 break; 1725 case TargetOpcode::G_UDIV: 1726 OpIndex = 2; 1727 break; 1728 case TargetOpcode::G_UREM: 1729 OpIndex = 3; 1730 break; 1731 case TargetOpcode::G_MUL: 1732 OpIndex = 4; 1733 break; 1734 case TargetOpcode::G_SMULH: 1735 OpIndex = 5; 1736 break; 1737 case TargetOpcode::G_UMULH: 1738 OpIndex = 6; 1739 break; 1740 } 1741 1742 const MulDivRemEntry &TypeEntry = *OpEntryIt; 1743 const MulDivRemEntry::MulDivRemResult &OpEntry = 1744 TypeEntry.ResultTable[OpIndex]; 1745 1746 const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB); 1747 if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) || 1748 !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) || 1749 !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) { 1750 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1751 << " operand\n"); 1752 return false; 1753 } 1754 1755 // Move op1 into low-order input register. 1756 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy), 1757 TypeEntry.LowInReg) 1758 .addReg(Op1Reg); 1759 1760 // Zero-extend or sign-extend into high-order input register. 1761 if (OpEntry.OpSignExtend) { 1762 if (OpEntry.IsOpSigned) 1763 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1764 TII.get(OpEntry.OpSignExtend)); 1765 else { 1766 Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); 1767 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0), 1768 Zero32); 1769 1770 // Copy the zero into the appropriate sub/super/identical physical 1771 // register. Unfortunately the operations needed are not uniform enough 1772 // to fit neatly into the table above. 1773 if (RegTy.getSizeInBits() == 16) { 1774 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1775 TypeEntry.HighInReg) 1776 .addReg(Zero32, 0, X86::sub_16bit); 1777 } else if (RegTy.getSizeInBits() == 32) { 1778 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1779 TypeEntry.HighInReg) 1780 .addReg(Zero32); 1781 } else if (RegTy.getSizeInBits() == 64) { 1782 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1783 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1784 .addImm(0) 1785 .addReg(Zero32) 1786 .addImm(X86::sub_32bit); 1787 } 1788 } 1789 } 1790 1791 // Generate the DIV/IDIV/MUL/IMUL instruction. 1792 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem)) 1793 .addReg(Op2Reg); 1794 1795 // For i8 remainder, we can't reference ah directly, as we'll end 1796 // up with bogus copies like %r9b = COPY %ah. Reference ax 1797 // instead to prevent ah references in a rex instruction. 1798 // 1799 // The current assumption of the fast register allocator is that isel 1800 // won't generate explicit references to the GR8_NOREX registers. If 1801 // the allocator and/or the backend get enhanced to be more robust in 1802 // that regard, this can be, and should be, removed. 1803 if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) { 1804 Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1805 Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1806 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) 1807 .addReg(X86::AX); 1808 1809 // Shift AX right by 8 bits instead of using AH. 1810 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri), 1811 ResultSuperReg) 1812 .addReg(SourceSuperReg) 1813 .addImm(8); 1814 1815 // Now reference the 8-bit subreg of the result. 1816 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), 1817 DstReg) 1818 .addReg(ResultSuperReg, 0, X86::sub_8bit); 1819 } else { 1820 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), 1821 DstReg) 1822 .addReg(OpEntry.ResultReg); 1823 } 1824 I.eraseFromParent(); 1825 1826 return true; 1827 } 1828 1829 bool X86InstructionSelector::selectSelect(MachineInstr &I, 1830 MachineRegisterInfo &MRI, 1831 MachineFunction &MF) const { 1832 GSelect &Sel = cast<GSelect>(I); 1833 unsigned DstReg = Sel.getReg(0); 1834 BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::TEST32rr)) 1835 .addReg(Sel.getCondReg()) 1836 .addReg(Sel.getCondReg()); 1837 1838 unsigned OpCmp; 1839 LLT Ty = MRI.getType(DstReg); 1840 switch (Ty.getSizeInBits()) { 1841 default: 1842 return false; 1843 case 8: 1844 OpCmp = X86::CMOV_GR8; 1845 break; 1846 case 16: 1847 OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; 1848 break; 1849 case 32: 1850 OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; 1851 break; 1852 case 64: 1853 assert(STI.is64Bit() && STI.canUseCMOV()); 1854 OpCmp = X86::CMOV64rr; 1855 break; 1856 } 1857 BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) 1858 .addReg(Sel.getTrueReg()) 1859 .addReg(Sel.getFalseReg()) 1860 .addImm(X86::COND_E); 1861 1862 const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI); 1863 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1864 LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n"); 1865 return false; 1866 } 1867 1868 Sel.eraseFromParent(); 1869 return true; 1870 } 1871 1872 InstructionSelector * 1873 llvm::createX86InstructionSelector(const X86TargetMachine &TM, 1874 const X86Subtarget &Subtarget, 1875 const X86RegisterBankInfo &RBI) { 1876 return new X86InstructionSelector(TM, Subtarget, RBI); 1877 } 1878