1 //===- X86InstructionSelector.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file implements the targeting of the InstructionSelector class for 10 /// X86. 11 /// \todo This should be generated by TableGen. 12 //===----------------------------------------------------------------------===// 13 14 #include "MCTargetDesc/X86BaseInfo.h" 15 #include "X86.h" 16 #include "X86InstrBuilder.h" 17 #include "X86InstrInfo.h" 18 #include "X86RegisterBankInfo.h" 19 #include "X86RegisterInfo.h" 20 #include "X86Subtarget.h" 21 #include "X86TargetMachine.h" 22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 23 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 24 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h" 25 #include "llvm/CodeGen/GlobalISel/Utils.h" 26 #include "llvm/CodeGen/LowLevelType.h" 27 #include "llvm/CodeGen/MachineBasicBlock.h" 28 #include "llvm/CodeGen/MachineConstantPool.h" 29 #include "llvm/CodeGen/MachineFunction.h" 30 #include "llvm/CodeGen/MachineInstr.h" 31 #include "llvm/CodeGen/MachineInstrBuilder.h" 32 #include "llvm/CodeGen/MachineMemOperand.h" 33 #include "llvm/CodeGen/MachineOperand.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/RegisterBank.h" 36 #include "llvm/CodeGen/TargetOpcodes.h" 37 #include "llvm/CodeGen/TargetRegisterInfo.h" 38 #include "llvm/IR/DataLayout.h" 39 #include "llvm/IR/InstrTypes.h" 40 #include "llvm/IR/IntrinsicsX86.h" 41 #include "llvm/Support/AtomicOrdering.h" 42 #include "llvm/Support/CodeGen.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/MathExtras.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include <cassert> 48 #include <cstdint> 49 #include <tuple> 50 51 #define DEBUG_TYPE "X86-isel" 52 53 using namespace llvm; 54 55 namespace { 56 57 #define GET_GLOBALISEL_PREDICATE_BITSET 58 #include "X86GenGlobalISel.inc" 59 #undef GET_GLOBALISEL_PREDICATE_BITSET 60 61 class X86InstructionSelector : public InstructionSelector { 62 public: 63 X86InstructionSelector(const X86TargetMachine &TM, const X86Subtarget &STI, 64 const X86RegisterBankInfo &RBI); 65 66 bool select(MachineInstr &I) override; 67 static const char *getName() { return DEBUG_TYPE; } 68 69 private: 70 /// tblgen-erated 'select' implementation, used as the initial selector for 71 /// the patterns that don't require complex C++. 72 bool selectImpl(MachineInstr &I, CodeGenCoverage &CoverageInfo) const; 73 74 // TODO: remove after supported by Tablegen-erated instruction selection. 75 unsigned getLoadStoreOp(const LLT &Ty, const RegisterBank &RB, unsigned Opc, 76 Align Alignment) const; 77 78 bool selectLoadStoreOp(MachineInstr &I, MachineRegisterInfo &MRI, 79 MachineFunction &MF) const; 80 bool selectFrameIndexOrGep(MachineInstr &I, MachineRegisterInfo &MRI, 81 MachineFunction &MF) const; 82 bool selectGlobalValue(MachineInstr &I, MachineRegisterInfo &MRI, 83 MachineFunction &MF) const; 84 bool selectConstant(MachineInstr &I, MachineRegisterInfo &MRI, 85 MachineFunction &MF) const; 86 bool selectTruncOrPtrToInt(MachineInstr &I, MachineRegisterInfo &MRI, 87 MachineFunction &MF) const; 88 bool selectZext(MachineInstr &I, MachineRegisterInfo &MRI, 89 MachineFunction &MF) const; 90 bool selectAnyext(MachineInstr &I, MachineRegisterInfo &MRI, 91 MachineFunction &MF) const; 92 bool selectCmp(MachineInstr &I, MachineRegisterInfo &MRI, 93 MachineFunction &MF) const; 94 bool selectFCmp(MachineInstr &I, MachineRegisterInfo &MRI, 95 MachineFunction &MF) const; 96 bool selectUAddSub(MachineInstr &I, MachineRegisterInfo &MRI, 97 MachineFunction &MF) const; 98 bool selectDebugInstr(MachineInstr &I, MachineRegisterInfo &MRI) const; 99 bool selectCopy(MachineInstr &I, MachineRegisterInfo &MRI) const; 100 bool selectUnmergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 101 MachineFunction &MF); 102 bool selectMergeValues(MachineInstr &I, MachineRegisterInfo &MRI, 103 MachineFunction &MF); 104 bool selectInsert(MachineInstr &I, MachineRegisterInfo &MRI, 105 MachineFunction &MF) const; 106 bool selectExtract(MachineInstr &I, MachineRegisterInfo &MRI, 107 MachineFunction &MF) const; 108 bool selectCondBranch(MachineInstr &I, MachineRegisterInfo &MRI, 109 MachineFunction &MF) const; 110 bool selectTurnIntoCOPY(MachineInstr &I, MachineRegisterInfo &MRI, 111 const unsigned DstReg, 112 const TargetRegisterClass *DstRC, 113 const unsigned SrcReg, 114 const TargetRegisterClass *SrcRC) const; 115 bool materializeFP(MachineInstr &I, MachineRegisterInfo &MRI, 116 MachineFunction &MF) const; 117 bool selectImplicitDefOrPHI(MachineInstr &I, MachineRegisterInfo &MRI) const; 118 bool selectMulDivRem(MachineInstr &I, MachineRegisterInfo &MRI, 119 MachineFunction &MF) const; 120 bool selectSelect(MachineInstr &I, MachineRegisterInfo &MRI, 121 MachineFunction &MF) const; 122 bool selectIntrinsicWSideEffects(MachineInstr &I, MachineRegisterInfo &MRI, 123 MachineFunction &MF) const; 124 125 // emit insert subreg instruction and insert it before MachineInstr &I 126 bool emitInsertSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 127 MachineRegisterInfo &MRI, MachineFunction &MF) const; 128 // emit extract subreg instruction and insert it before MachineInstr &I 129 bool emitExtractSubreg(unsigned DstReg, unsigned SrcReg, MachineInstr &I, 130 MachineRegisterInfo &MRI, MachineFunction &MF) const; 131 132 const TargetRegisterClass *getRegClass(LLT Ty, const RegisterBank &RB) const; 133 const TargetRegisterClass *getRegClass(LLT Ty, unsigned Reg, 134 MachineRegisterInfo &MRI) const; 135 136 const X86TargetMachine &TM; 137 const X86Subtarget &STI; 138 const X86InstrInfo &TII; 139 const X86RegisterInfo &TRI; 140 const X86RegisterBankInfo &RBI; 141 142 #define GET_GLOBALISEL_PREDICATES_DECL 143 #include "X86GenGlobalISel.inc" 144 #undef GET_GLOBALISEL_PREDICATES_DECL 145 146 #define GET_GLOBALISEL_TEMPORARIES_DECL 147 #include "X86GenGlobalISel.inc" 148 #undef GET_GLOBALISEL_TEMPORARIES_DECL 149 }; 150 151 } // end anonymous namespace 152 153 #define GET_GLOBALISEL_IMPL 154 #include "X86GenGlobalISel.inc" 155 #undef GET_GLOBALISEL_IMPL 156 157 X86InstructionSelector::X86InstructionSelector(const X86TargetMachine &TM, 158 const X86Subtarget &STI, 159 const X86RegisterBankInfo &RBI) 160 : TM(TM), STI(STI), TII(*STI.getInstrInfo()), TRI(*STI.getRegisterInfo()), 161 RBI(RBI), 162 #define GET_GLOBALISEL_PREDICATES_INIT 163 #include "X86GenGlobalISel.inc" 164 #undef GET_GLOBALISEL_PREDICATES_INIT 165 #define GET_GLOBALISEL_TEMPORARIES_INIT 166 #include "X86GenGlobalISel.inc" 167 #undef GET_GLOBALISEL_TEMPORARIES_INIT 168 { 169 } 170 171 // FIXME: This should be target-independent, inferred from the types declared 172 // for each class in the bank. 173 const TargetRegisterClass * 174 X86InstructionSelector::getRegClass(LLT Ty, const RegisterBank &RB) const { 175 if (RB.getID() == X86::GPRRegBankID) { 176 if (Ty.getSizeInBits() <= 8) 177 return &X86::GR8RegClass; 178 if (Ty.getSizeInBits() == 16) 179 return &X86::GR16RegClass; 180 if (Ty.getSizeInBits() == 32) 181 return &X86::GR32RegClass; 182 if (Ty.getSizeInBits() == 64) 183 return &X86::GR64RegClass; 184 } 185 if (RB.getID() == X86::VECRRegBankID) { 186 if (Ty.getSizeInBits() == 16) 187 return STI.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass; 188 if (Ty.getSizeInBits() == 32) 189 return STI.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; 190 if (Ty.getSizeInBits() == 64) 191 return STI.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; 192 if (Ty.getSizeInBits() == 128) 193 return STI.hasAVX512() ? &X86::VR128XRegClass : &X86::VR128RegClass; 194 if (Ty.getSizeInBits() == 256) 195 return STI.hasAVX512() ? &X86::VR256XRegClass : &X86::VR256RegClass; 196 if (Ty.getSizeInBits() == 512) 197 return &X86::VR512RegClass; 198 } 199 200 llvm_unreachable("Unknown RegBank!"); 201 } 202 203 const TargetRegisterClass * 204 X86InstructionSelector::getRegClass(LLT Ty, unsigned Reg, 205 MachineRegisterInfo &MRI) const { 206 const RegisterBank &RegBank = *RBI.getRegBank(Reg, MRI, TRI); 207 return getRegClass(Ty, RegBank); 208 } 209 210 static unsigned getSubRegIndex(const TargetRegisterClass *RC) { 211 unsigned SubIdx = X86::NoSubRegister; 212 if (RC == &X86::GR32RegClass) { 213 SubIdx = X86::sub_32bit; 214 } else if (RC == &X86::GR16RegClass) { 215 SubIdx = X86::sub_16bit; 216 } else if (RC == &X86::GR8RegClass) { 217 SubIdx = X86::sub_8bit; 218 } 219 220 return SubIdx; 221 } 222 223 static const TargetRegisterClass *getRegClassFromGRPhysReg(Register Reg) { 224 assert(Reg.isPhysical()); 225 if (X86::GR64RegClass.contains(Reg)) 226 return &X86::GR64RegClass; 227 if (X86::GR32RegClass.contains(Reg)) 228 return &X86::GR32RegClass; 229 if (X86::GR16RegClass.contains(Reg)) 230 return &X86::GR16RegClass; 231 if (X86::GR8RegClass.contains(Reg)) 232 return &X86::GR8RegClass; 233 234 llvm_unreachable("Unknown RegClass for PhysReg!"); 235 } 236 237 // FIXME: We need some sort of API in RBI/TRI to allow generic code to 238 // constrain operands of simple instructions given a TargetRegisterClass 239 // and LLT 240 bool X86InstructionSelector::selectDebugInstr(MachineInstr &I, 241 MachineRegisterInfo &MRI) const { 242 for (MachineOperand &MO : I.operands()) { 243 if (!MO.isReg()) 244 continue; 245 Register Reg = MO.getReg(); 246 if (!Reg) 247 continue; 248 if (Reg.isPhysical()) 249 continue; 250 LLT Ty = MRI.getType(Reg); 251 const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(Reg); 252 const TargetRegisterClass *RC = 253 dyn_cast_if_present<const TargetRegisterClass *>(RegClassOrBank); 254 if (!RC) { 255 const RegisterBank &RB = *cast<const RegisterBank *>(RegClassOrBank); 256 RC = getRegClass(Ty, RB); 257 if (!RC) { 258 LLVM_DEBUG( 259 dbgs() << "Warning: DBG_VALUE operand has unexpected size/bank\n"); 260 break; 261 } 262 } 263 RBI.constrainGenericRegister(Reg, *RC, MRI); 264 } 265 266 return true; 267 } 268 269 // Set X86 Opcode and constrain DestReg. 270 bool X86InstructionSelector::selectCopy(MachineInstr &I, 271 MachineRegisterInfo &MRI) const { 272 Register DstReg = I.getOperand(0).getReg(); 273 const unsigned DstSize = RBI.getSizeInBits(DstReg, MRI, TRI); 274 const RegisterBank &DstRegBank = *RBI.getRegBank(DstReg, MRI, TRI); 275 276 Register SrcReg = I.getOperand(1).getReg(); 277 const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI); 278 const RegisterBank &SrcRegBank = *RBI.getRegBank(SrcReg, MRI, TRI); 279 280 if (DstReg.isPhysical()) { 281 assert(I.isCopy() && "Generic operators do not allow physical registers"); 282 283 if (DstSize > SrcSize && SrcRegBank.getID() == X86::GPRRegBankID && 284 DstRegBank.getID() == X86::GPRRegBankID) { 285 286 const TargetRegisterClass *SrcRC = 287 getRegClass(MRI.getType(SrcReg), SrcRegBank); 288 const TargetRegisterClass *DstRC = getRegClassFromGRPhysReg(DstReg); 289 290 if (SrcRC != DstRC) { 291 // This case can be generated by ABI lowering, performe anyext 292 Register ExtSrc = MRI.createVirtualRegister(DstRC); 293 BuildMI(*I.getParent(), I, I.getDebugLoc(), 294 TII.get(TargetOpcode::SUBREG_TO_REG)) 295 .addDef(ExtSrc) 296 .addImm(0) 297 .addReg(SrcReg) 298 .addImm(getSubRegIndex(SrcRC)); 299 300 I.getOperand(1).setReg(ExtSrc); 301 } 302 } 303 304 return true; 305 } 306 307 assert((!SrcReg.isPhysical() || I.isCopy()) && 308 "No phys reg on generic operators"); 309 assert((DstSize == SrcSize || 310 // Copies are a mean to setup initial types, the number of 311 // bits may not exactly match. 312 (SrcReg.isPhysical() && 313 DstSize <= RBI.getSizeInBits(SrcReg, MRI, TRI))) && 314 "Copy with different width?!"); 315 316 const TargetRegisterClass *DstRC = 317 getRegClass(MRI.getType(DstReg), DstRegBank); 318 319 if (SrcRegBank.getID() == X86::GPRRegBankID && 320 DstRegBank.getID() == X86::GPRRegBankID && SrcSize > DstSize && 321 SrcReg.isPhysical()) { 322 // Change the physical register to performe truncate. 323 324 const TargetRegisterClass *SrcRC = getRegClassFromGRPhysReg(SrcReg); 325 326 if (DstRC != SrcRC) { 327 I.getOperand(1).setSubReg(getSubRegIndex(DstRC)); 328 I.getOperand(1).substPhysReg(SrcReg, TRI); 329 } 330 } 331 332 // No need to constrain SrcReg. It will get constrained when 333 // we hit another of its use or its defs. 334 // Copies do not have constraints. 335 const TargetRegisterClass *OldRC = MRI.getRegClassOrNull(DstReg); 336 if (!OldRC || !DstRC->hasSubClassEq(OldRC)) { 337 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 338 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 339 << " operand\n"); 340 return false; 341 } 342 } 343 I.setDesc(TII.get(X86::COPY)); 344 return true; 345 } 346 347 bool X86InstructionSelector::select(MachineInstr &I) { 348 assert(I.getParent() && "Instruction should be in a basic block!"); 349 assert(I.getParent()->getParent() && "Instruction should be in a function!"); 350 351 MachineBasicBlock &MBB = *I.getParent(); 352 MachineFunction &MF = *MBB.getParent(); 353 MachineRegisterInfo &MRI = MF.getRegInfo(); 354 355 unsigned Opcode = I.getOpcode(); 356 if (!isPreISelGenericOpcode(Opcode)) { 357 // Certain non-generic instructions also need some special handling. 358 359 if (Opcode == TargetOpcode::LOAD_STACK_GUARD) 360 return false; 361 362 if (I.isCopy()) 363 return selectCopy(I, MRI); 364 365 if (I.isDebugInstr()) 366 return selectDebugInstr(I, MRI); 367 368 return true; 369 } 370 371 assert(I.getNumOperands() == I.getNumExplicitOperands() && 372 "Generic instruction has unexpected implicit operands\n"); 373 374 if (selectImpl(I, *CoverageInfo)) 375 return true; 376 377 LLVM_DEBUG(dbgs() << " C++ instruction selection: "; I.print(dbgs())); 378 379 // TODO: This should be implemented by tblgen. 380 switch (I.getOpcode()) { 381 default: 382 return false; 383 case TargetOpcode::G_STORE: 384 case TargetOpcode::G_LOAD: 385 return selectLoadStoreOp(I, MRI, MF); 386 case TargetOpcode::G_PTR_ADD: 387 case TargetOpcode::G_FRAME_INDEX: 388 return selectFrameIndexOrGep(I, MRI, MF); 389 case TargetOpcode::G_GLOBAL_VALUE: 390 return selectGlobalValue(I, MRI, MF); 391 case TargetOpcode::G_CONSTANT: 392 return selectConstant(I, MRI, MF); 393 case TargetOpcode::G_FCONSTANT: 394 return materializeFP(I, MRI, MF); 395 case TargetOpcode::G_PTRTOINT: 396 case TargetOpcode::G_TRUNC: 397 return selectTruncOrPtrToInt(I, MRI, MF); 398 case TargetOpcode::G_INTTOPTR: 399 return selectCopy(I, MRI); 400 case TargetOpcode::G_ZEXT: 401 return selectZext(I, MRI, MF); 402 case TargetOpcode::G_ANYEXT: 403 return selectAnyext(I, MRI, MF); 404 case TargetOpcode::G_ICMP: 405 return selectCmp(I, MRI, MF); 406 case TargetOpcode::G_FCMP: 407 return selectFCmp(I, MRI, MF); 408 case TargetOpcode::G_UADDE: 409 case TargetOpcode::G_UADDO: 410 case TargetOpcode::G_USUBE: 411 case TargetOpcode::G_USUBO: 412 return selectUAddSub(I, MRI, MF); 413 case TargetOpcode::G_UNMERGE_VALUES: 414 return selectUnmergeValues(I, MRI, MF); 415 case TargetOpcode::G_MERGE_VALUES: 416 case TargetOpcode::G_CONCAT_VECTORS: 417 return selectMergeValues(I, MRI, MF); 418 case TargetOpcode::G_EXTRACT: 419 return selectExtract(I, MRI, MF); 420 case TargetOpcode::G_INSERT: 421 return selectInsert(I, MRI, MF); 422 case TargetOpcode::G_BRCOND: 423 return selectCondBranch(I, MRI, MF); 424 case TargetOpcode::G_IMPLICIT_DEF: 425 case TargetOpcode::G_PHI: 426 return selectImplicitDefOrPHI(I, MRI); 427 case TargetOpcode::G_MUL: 428 case TargetOpcode::G_SMULH: 429 case TargetOpcode::G_UMULH: 430 case TargetOpcode::G_SDIV: 431 case TargetOpcode::G_UDIV: 432 case TargetOpcode::G_SREM: 433 case TargetOpcode::G_UREM: 434 return selectMulDivRem(I, MRI, MF); 435 case TargetOpcode::G_SELECT: 436 return selectSelect(I, MRI, MF); 437 case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: 438 return selectIntrinsicWSideEffects(I, MRI, MF); 439 } 440 441 return false; 442 } 443 444 unsigned X86InstructionSelector::getLoadStoreOp(const LLT &Ty, 445 const RegisterBank &RB, 446 unsigned Opc, 447 Align Alignment) const { 448 bool Isload = (Opc == TargetOpcode::G_LOAD); 449 bool HasAVX = STI.hasAVX(); 450 bool HasAVX512 = STI.hasAVX512(); 451 bool HasVLX = STI.hasVLX(); 452 453 if (Ty == LLT::scalar(8)) { 454 if (X86::GPRRegBankID == RB.getID()) 455 return Isload ? X86::MOV8rm : X86::MOV8mr; 456 } else if (Ty == LLT::scalar(16)) { 457 if (X86::GPRRegBankID == RB.getID()) 458 return Isload ? X86::MOV16rm : X86::MOV16mr; 459 } else if (Ty == LLT::scalar(32) || Ty == LLT::pointer(0, 32)) { 460 if (X86::GPRRegBankID == RB.getID()) 461 return Isload ? X86::MOV32rm : X86::MOV32mr; 462 if (X86::VECRRegBankID == RB.getID()) 463 return Isload ? (HasAVX512 ? X86::VMOVSSZrm_alt : 464 HasAVX ? X86::VMOVSSrm_alt : 465 X86::MOVSSrm_alt) 466 : (HasAVX512 ? X86::VMOVSSZmr : 467 HasAVX ? X86::VMOVSSmr : 468 X86::MOVSSmr); 469 } else if (Ty == LLT::scalar(64) || Ty == LLT::pointer(0, 64)) { 470 if (X86::GPRRegBankID == RB.getID()) 471 return Isload ? X86::MOV64rm : X86::MOV64mr; 472 if (X86::VECRRegBankID == RB.getID()) 473 return Isload ? (HasAVX512 ? X86::VMOVSDZrm_alt : 474 HasAVX ? X86::VMOVSDrm_alt : 475 X86::MOVSDrm_alt) 476 : (HasAVX512 ? X86::VMOVSDZmr : 477 HasAVX ? X86::VMOVSDmr : 478 X86::MOVSDmr); 479 } else if (Ty.isVector() && Ty.getSizeInBits() == 128) { 480 if (Alignment >= Align(16)) 481 return Isload ? (HasVLX ? X86::VMOVAPSZ128rm 482 : HasAVX512 483 ? X86::VMOVAPSZ128rm_NOVLX 484 : HasAVX ? X86::VMOVAPSrm : X86::MOVAPSrm) 485 : (HasVLX ? X86::VMOVAPSZ128mr 486 : HasAVX512 487 ? X86::VMOVAPSZ128mr_NOVLX 488 : HasAVX ? X86::VMOVAPSmr : X86::MOVAPSmr); 489 else 490 return Isload ? (HasVLX ? X86::VMOVUPSZ128rm 491 : HasAVX512 492 ? X86::VMOVUPSZ128rm_NOVLX 493 : HasAVX ? X86::VMOVUPSrm : X86::MOVUPSrm) 494 : (HasVLX ? X86::VMOVUPSZ128mr 495 : HasAVX512 496 ? X86::VMOVUPSZ128mr_NOVLX 497 : HasAVX ? X86::VMOVUPSmr : X86::MOVUPSmr); 498 } else if (Ty.isVector() && Ty.getSizeInBits() == 256) { 499 if (Alignment >= Align(32)) 500 return Isload ? (HasVLX ? X86::VMOVAPSZ256rm 501 : HasAVX512 ? X86::VMOVAPSZ256rm_NOVLX 502 : X86::VMOVAPSYrm) 503 : (HasVLX ? X86::VMOVAPSZ256mr 504 : HasAVX512 ? X86::VMOVAPSZ256mr_NOVLX 505 : X86::VMOVAPSYmr); 506 else 507 return Isload ? (HasVLX ? X86::VMOVUPSZ256rm 508 : HasAVX512 ? X86::VMOVUPSZ256rm_NOVLX 509 : X86::VMOVUPSYrm) 510 : (HasVLX ? X86::VMOVUPSZ256mr 511 : HasAVX512 ? X86::VMOVUPSZ256mr_NOVLX 512 : X86::VMOVUPSYmr); 513 } else if (Ty.isVector() && Ty.getSizeInBits() == 512) { 514 if (Alignment >= Align(64)) 515 return Isload ? X86::VMOVAPSZrm : X86::VMOVAPSZmr; 516 else 517 return Isload ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; 518 } 519 return Opc; 520 } 521 522 // Fill in an address from the given instruction. 523 static void X86SelectAddress(const MachineInstr &I, 524 const MachineRegisterInfo &MRI, 525 X86AddressMode &AM) { 526 assert(I.getOperand(0).isReg() && "unsupported opperand."); 527 assert(MRI.getType(I.getOperand(0).getReg()).isPointer() && 528 "unsupported type."); 529 530 if (I.getOpcode() == TargetOpcode::G_PTR_ADD) { 531 if (auto COff = getIConstantVRegSExtVal(I.getOperand(2).getReg(), MRI)) { 532 int64_t Imm = *COff; 533 if (isInt<32>(Imm)) { // Check for displacement overflow. 534 AM.Disp = static_cast<int32_t>(Imm); 535 AM.Base.Reg = I.getOperand(1).getReg(); 536 return; 537 } 538 } 539 } else if (I.getOpcode() == TargetOpcode::G_FRAME_INDEX) { 540 AM.Base.FrameIndex = I.getOperand(1).getIndex(); 541 AM.BaseType = X86AddressMode::FrameIndexBase; 542 return; 543 } 544 545 // Default behavior. 546 AM.Base.Reg = I.getOperand(0).getReg(); 547 } 548 549 bool X86InstructionSelector::selectLoadStoreOp(MachineInstr &I, 550 MachineRegisterInfo &MRI, 551 MachineFunction &MF) const { 552 unsigned Opc = I.getOpcode(); 553 554 assert((Opc == TargetOpcode::G_STORE || Opc == TargetOpcode::G_LOAD) && 555 "unexpected instruction"); 556 557 const Register DefReg = I.getOperand(0).getReg(); 558 LLT Ty = MRI.getType(DefReg); 559 const RegisterBank &RB = *RBI.getRegBank(DefReg, MRI, TRI); 560 561 assert(I.hasOneMemOperand()); 562 auto &MemOp = **I.memoperands_begin(); 563 if (MemOp.isAtomic()) { 564 // Note: for unordered operations, we rely on the fact the appropriate MMO 565 // is already on the instruction we're mutating, and thus we don't need to 566 // make any changes. So long as we select an opcode which is capable of 567 // loading or storing the appropriate size atomically, the rest of the 568 // backend is required to respect the MMO state. 569 if (!MemOp.isUnordered()) { 570 LLVM_DEBUG(dbgs() << "Atomic ordering not supported yet\n"); 571 return false; 572 } 573 if (MemOp.getAlign() < Ty.getSizeInBits() / 8) { 574 LLVM_DEBUG(dbgs() << "Unaligned atomics not supported yet\n"); 575 return false; 576 } 577 } 578 579 unsigned NewOpc = getLoadStoreOp(Ty, RB, Opc, MemOp.getAlign()); 580 if (NewOpc == Opc) 581 return false; 582 583 X86AddressMode AM; 584 X86SelectAddress(*MRI.getVRegDef(I.getOperand(1).getReg()), MRI, AM); 585 586 I.setDesc(TII.get(NewOpc)); 587 MachineInstrBuilder MIB(MF, I); 588 if (Opc == TargetOpcode::G_LOAD) { 589 I.removeOperand(1); 590 addFullAddress(MIB, AM); 591 } else { 592 // G_STORE (VAL, Addr), X86Store instruction (Addr, VAL) 593 I.removeOperand(1); 594 I.removeOperand(0); 595 addFullAddress(MIB, AM).addUse(DefReg); 596 } 597 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 598 } 599 600 static unsigned getLeaOP(LLT Ty, const X86Subtarget &STI) { 601 if (Ty == LLT::pointer(0, 64)) 602 return X86::LEA64r; 603 else if (Ty == LLT::pointer(0, 32)) 604 return STI.isTarget64BitILP32() ? X86::LEA64_32r : X86::LEA32r; 605 else 606 llvm_unreachable("Can't get LEA opcode. Unsupported type."); 607 } 608 609 bool X86InstructionSelector::selectFrameIndexOrGep(MachineInstr &I, 610 MachineRegisterInfo &MRI, 611 MachineFunction &MF) const { 612 unsigned Opc = I.getOpcode(); 613 614 assert((Opc == TargetOpcode::G_FRAME_INDEX || Opc == TargetOpcode::G_PTR_ADD) && 615 "unexpected instruction"); 616 617 const Register DefReg = I.getOperand(0).getReg(); 618 LLT Ty = MRI.getType(DefReg); 619 620 // Use LEA to calculate frame index and GEP 621 unsigned NewOpc = getLeaOP(Ty, STI); 622 I.setDesc(TII.get(NewOpc)); 623 MachineInstrBuilder MIB(MF, I); 624 625 if (Opc == TargetOpcode::G_FRAME_INDEX) { 626 addOffset(MIB, 0); 627 } else { 628 MachineOperand &InxOp = I.getOperand(2); 629 I.addOperand(InxOp); // set IndexReg 630 InxOp.ChangeToImmediate(1); // set Scale 631 MIB.addImm(0).addReg(0); 632 } 633 634 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 635 } 636 637 bool X86InstructionSelector::selectGlobalValue(MachineInstr &I, 638 MachineRegisterInfo &MRI, 639 MachineFunction &MF) const { 640 assert((I.getOpcode() == TargetOpcode::G_GLOBAL_VALUE) && 641 "unexpected instruction"); 642 643 auto GV = I.getOperand(1).getGlobal(); 644 if (GV->isThreadLocal()) { 645 return false; // TODO: we don't support TLS yet. 646 } 647 648 // Can't handle alternate code models yet. 649 if (TM.getCodeModel() != CodeModel::Small) 650 return false; 651 652 X86AddressMode AM; 653 AM.GV = GV; 654 AM.GVOpFlags = STI.classifyGlobalReference(GV); 655 656 // TODO: The ABI requires an extra load. not supported yet. 657 if (isGlobalStubReference(AM.GVOpFlags)) 658 return false; 659 660 // TODO: This reference is relative to the pic base. not supported yet. 661 if (isGlobalRelativeToPICBase(AM.GVOpFlags)) 662 return false; 663 664 if (STI.isPICStyleRIPRel()) { 665 // Use rip-relative addressing. 666 assert(AM.Base.Reg == 0 && AM.IndexReg == 0); 667 AM.Base.Reg = X86::RIP; 668 } 669 670 const Register DefReg = I.getOperand(0).getReg(); 671 LLT Ty = MRI.getType(DefReg); 672 unsigned NewOpc = getLeaOP(Ty, STI); 673 674 I.setDesc(TII.get(NewOpc)); 675 MachineInstrBuilder MIB(MF, I); 676 677 I.removeOperand(1); 678 addFullAddress(MIB, AM); 679 680 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 681 } 682 683 bool X86InstructionSelector::selectConstant(MachineInstr &I, 684 MachineRegisterInfo &MRI, 685 MachineFunction &MF) const { 686 assert((I.getOpcode() == TargetOpcode::G_CONSTANT) && 687 "unexpected instruction"); 688 689 const Register DefReg = I.getOperand(0).getReg(); 690 LLT Ty = MRI.getType(DefReg); 691 692 if (RBI.getRegBank(DefReg, MRI, TRI)->getID() != X86::GPRRegBankID) 693 return false; 694 695 uint64_t Val = 0; 696 if (I.getOperand(1).isCImm()) { 697 Val = I.getOperand(1).getCImm()->getZExtValue(); 698 I.getOperand(1).ChangeToImmediate(Val); 699 } else if (I.getOperand(1).isImm()) { 700 Val = I.getOperand(1).getImm(); 701 } else 702 llvm_unreachable("Unsupported operand type."); 703 704 unsigned NewOpc; 705 switch (Ty.getSizeInBits()) { 706 case 8: 707 NewOpc = X86::MOV8ri; 708 break; 709 case 16: 710 NewOpc = X86::MOV16ri; 711 break; 712 case 32: 713 NewOpc = X86::MOV32ri; 714 break; 715 case 64: 716 // TODO: in case isUInt<32>(Val), X86::MOV32ri can be used 717 if (isInt<32>(Val)) 718 NewOpc = X86::MOV64ri32; 719 else 720 NewOpc = X86::MOV64ri; 721 break; 722 default: 723 llvm_unreachable("Can't select G_CONSTANT, unsupported type."); 724 } 725 726 I.setDesc(TII.get(NewOpc)); 727 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 728 } 729 730 // Helper function for selectTruncOrPtrToInt and selectAnyext. 731 // Returns true if DstRC lives on a floating register class and 732 // SrcRC lives on a 128-bit vector class. 733 static bool canTurnIntoCOPY(const TargetRegisterClass *DstRC, 734 const TargetRegisterClass *SrcRC) { 735 return (DstRC == &X86::FR32RegClass || DstRC == &X86::FR32XRegClass || 736 DstRC == &X86::FR64RegClass || DstRC == &X86::FR64XRegClass) && 737 (SrcRC == &X86::VR128RegClass || SrcRC == &X86::VR128XRegClass); 738 } 739 740 bool X86InstructionSelector::selectTurnIntoCOPY( 741 MachineInstr &I, MachineRegisterInfo &MRI, const unsigned DstReg, 742 const TargetRegisterClass *DstRC, const unsigned SrcReg, 743 const TargetRegisterClass *SrcRC) const { 744 745 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 746 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 747 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 748 << " operand\n"); 749 return false; 750 } 751 I.setDesc(TII.get(X86::COPY)); 752 return true; 753 } 754 755 bool X86InstructionSelector::selectTruncOrPtrToInt(MachineInstr &I, 756 MachineRegisterInfo &MRI, 757 MachineFunction &MF) const { 758 assert((I.getOpcode() == TargetOpcode::G_TRUNC || 759 I.getOpcode() == TargetOpcode::G_PTRTOINT) && 760 "unexpected instruction"); 761 762 const Register DstReg = I.getOperand(0).getReg(); 763 const Register SrcReg = I.getOperand(1).getReg(); 764 765 const LLT DstTy = MRI.getType(DstReg); 766 const LLT SrcTy = MRI.getType(SrcReg); 767 768 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 769 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 770 771 if (DstRB.getID() != SrcRB.getID()) { 772 LLVM_DEBUG(dbgs() << TII.getName(I.getOpcode()) 773 << " input/output on different banks\n"); 774 return false; 775 } 776 777 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 778 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 779 780 if (!DstRC || !SrcRC) 781 return false; 782 783 // If that's truncation of the value that lives on the vector class and goes 784 // into the floating class, just replace it with copy, as we are able to 785 // select it as a regular move. 786 if (canTurnIntoCOPY(DstRC, SrcRC)) 787 return selectTurnIntoCOPY(I, MRI, DstReg, DstRC, SrcReg, SrcRC); 788 789 if (DstRB.getID() != X86::GPRRegBankID) 790 return false; 791 792 unsigned SubIdx; 793 if (DstRC == SrcRC) { 794 // Nothing to be done 795 SubIdx = X86::NoSubRegister; 796 } else if (DstRC == &X86::GR32RegClass) { 797 SubIdx = X86::sub_32bit; 798 } else if (DstRC == &X86::GR16RegClass) { 799 SubIdx = X86::sub_16bit; 800 } else if (DstRC == &X86::GR8RegClass) { 801 SubIdx = X86::sub_8bit; 802 } else { 803 return false; 804 } 805 806 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 807 808 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 809 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 810 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 811 << "\n"); 812 return false; 813 } 814 815 I.getOperand(1).setSubReg(SubIdx); 816 817 I.setDesc(TII.get(X86::COPY)); 818 return true; 819 } 820 821 bool X86InstructionSelector::selectZext(MachineInstr &I, 822 MachineRegisterInfo &MRI, 823 MachineFunction &MF) const { 824 assert((I.getOpcode() == TargetOpcode::G_ZEXT) && "unexpected instruction"); 825 826 const Register DstReg = I.getOperand(0).getReg(); 827 const Register SrcReg = I.getOperand(1).getReg(); 828 829 const LLT DstTy = MRI.getType(DstReg); 830 const LLT SrcTy = MRI.getType(SrcReg); 831 832 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(16)) && 833 "8=>16 Zext is handled by tablegen"); 834 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(32)) && 835 "8=>32 Zext is handled by tablegen"); 836 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(32)) && 837 "16=>32 Zext is handled by tablegen"); 838 assert(!(SrcTy == LLT::scalar(8) && DstTy == LLT::scalar(64)) && 839 "8=>64 Zext is handled by tablegen"); 840 assert(!(SrcTy == LLT::scalar(16) && DstTy == LLT::scalar(64)) && 841 "16=>64 Zext is handled by tablegen"); 842 assert(!(SrcTy == LLT::scalar(32) && DstTy == LLT::scalar(64)) && 843 "32=>64 Zext is handled by tablegen"); 844 845 if (SrcTy != LLT::scalar(1)) 846 return false; 847 848 unsigned AndOpc; 849 if (DstTy == LLT::scalar(8)) 850 AndOpc = X86::AND8ri; 851 else if (DstTy == LLT::scalar(16)) 852 AndOpc = X86::AND16ri; 853 else if (DstTy == LLT::scalar(32)) 854 AndOpc = X86::AND32ri; 855 else if (DstTy == LLT::scalar(64)) 856 AndOpc = X86::AND64ri32; 857 else 858 return false; 859 860 Register DefReg = SrcReg; 861 if (DstTy != LLT::scalar(8)) { 862 Register ImpDefReg = 863 MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 864 BuildMI(*I.getParent(), I, I.getDebugLoc(), 865 TII.get(TargetOpcode::IMPLICIT_DEF), ImpDefReg); 866 867 DefReg = MRI.createVirtualRegister(getRegClass(DstTy, DstReg, MRI)); 868 BuildMI(*I.getParent(), I, I.getDebugLoc(), 869 TII.get(TargetOpcode::INSERT_SUBREG), DefReg) 870 .addReg(ImpDefReg) 871 .addReg(SrcReg) 872 .addImm(X86::sub_8bit); 873 } 874 875 MachineInstr &AndInst = 876 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AndOpc), DstReg) 877 .addReg(DefReg) 878 .addImm(1); 879 880 constrainSelectedInstRegOperands(AndInst, TII, TRI, RBI); 881 882 I.eraseFromParent(); 883 return true; 884 } 885 886 bool X86InstructionSelector::selectAnyext(MachineInstr &I, 887 MachineRegisterInfo &MRI, 888 MachineFunction &MF) const { 889 assert((I.getOpcode() == TargetOpcode::G_ANYEXT) && "unexpected instruction"); 890 891 const Register DstReg = I.getOperand(0).getReg(); 892 const Register SrcReg = I.getOperand(1).getReg(); 893 894 const LLT DstTy = MRI.getType(DstReg); 895 const LLT SrcTy = MRI.getType(SrcReg); 896 897 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 898 const RegisterBank &SrcRB = *RBI.getRegBank(SrcReg, MRI, TRI); 899 900 assert(DstRB.getID() == SrcRB.getID() && 901 "G_ANYEXT input/output on different banks\n"); 902 903 assert(DstTy.getSizeInBits() > SrcTy.getSizeInBits() && 904 "G_ANYEXT incorrect operand size"); 905 906 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 907 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcRB); 908 909 // If that's ANY_EXT of the value that lives on the floating class and goes 910 // into the vector class, just replace it with copy, as we are able to select 911 // it as a regular move. 912 if (canTurnIntoCOPY(SrcRC, DstRC)) 913 return selectTurnIntoCOPY(I, MRI, SrcReg, SrcRC, DstReg, DstRC); 914 915 if (DstRB.getID() != X86::GPRRegBankID) 916 return false; 917 918 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 919 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 920 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 921 << " operand\n"); 922 return false; 923 } 924 925 if (SrcRC == DstRC) { 926 I.setDesc(TII.get(X86::COPY)); 927 return true; 928 } 929 930 BuildMI(*I.getParent(), I, I.getDebugLoc(), 931 TII.get(TargetOpcode::SUBREG_TO_REG)) 932 .addDef(DstReg) 933 .addImm(0) 934 .addReg(SrcReg) 935 .addImm(getSubRegIndex(SrcRC)); 936 937 I.eraseFromParent(); 938 return true; 939 } 940 941 bool X86InstructionSelector::selectCmp(MachineInstr &I, 942 MachineRegisterInfo &MRI, 943 MachineFunction &MF) const { 944 assert((I.getOpcode() == TargetOpcode::G_ICMP) && "unexpected instruction"); 945 946 X86::CondCode CC; 947 bool SwapArgs; 948 std::tie(CC, SwapArgs) = X86::getX86ConditionCode( 949 (CmpInst::Predicate)I.getOperand(1).getPredicate()); 950 951 Register LHS = I.getOperand(2).getReg(); 952 Register RHS = I.getOperand(3).getReg(); 953 954 if (SwapArgs) 955 std::swap(LHS, RHS); 956 957 unsigned OpCmp; 958 LLT Ty = MRI.getType(LHS); 959 960 switch (Ty.getSizeInBits()) { 961 default: 962 return false; 963 case 8: 964 OpCmp = X86::CMP8rr; 965 break; 966 case 16: 967 OpCmp = X86::CMP16rr; 968 break; 969 case 32: 970 OpCmp = X86::CMP32rr; 971 break; 972 case 64: 973 OpCmp = X86::CMP64rr; 974 break; 975 } 976 977 MachineInstr &CmpInst = 978 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 979 .addReg(LHS) 980 .addReg(RHS); 981 982 MachineInstr &SetInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 983 TII.get(X86::SETCCr), I.getOperand(0).getReg()).addImm(CC); 984 985 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 986 constrainSelectedInstRegOperands(SetInst, TII, TRI, RBI); 987 988 I.eraseFromParent(); 989 return true; 990 } 991 992 bool X86InstructionSelector::selectFCmp(MachineInstr &I, 993 MachineRegisterInfo &MRI, 994 MachineFunction &MF) const { 995 assert((I.getOpcode() == TargetOpcode::G_FCMP) && "unexpected instruction"); 996 997 Register LhsReg = I.getOperand(2).getReg(); 998 Register RhsReg = I.getOperand(3).getReg(); 999 CmpInst::Predicate Predicate = 1000 (CmpInst::Predicate)I.getOperand(1).getPredicate(); 1001 1002 // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. 1003 static const uint16_t SETFOpcTable[2][3] = { 1004 {X86::COND_E, X86::COND_NP, X86::AND8rr}, 1005 {X86::COND_NE, X86::COND_P, X86::OR8rr}}; 1006 const uint16_t *SETFOpc = nullptr; 1007 switch (Predicate) { 1008 default: 1009 break; 1010 case CmpInst::FCMP_OEQ: 1011 SETFOpc = &SETFOpcTable[0][0]; 1012 break; 1013 case CmpInst::FCMP_UNE: 1014 SETFOpc = &SETFOpcTable[1][0]; 1015 break; 1016 } 1017 1018 // Compute the opcode for the CMP instruction. 1019 unsigned OpCmp; 1020 LLT Ty = MRI.getType(LhsReg); 1021 switch (Ty.getSizeInBits()) { 1022 default: 1023 return false; 1024 case 32: 1025 OpCmp = X86::UCOMISSrr; 1026 break; 1027 case 64: 1028 OpCmp = X86::UCOMISDrr; 1029 break; 1030 } 1031 1032 Register ResultReg = I.getOperand(0).getReg(); 1033 RBI.constrainGenericRegister( 1034 ResultReg, 1035 *getRegClass(LLT::scalar(8), *RBI.getRegBank(ResultReg, MRI, TRI)), MRI); 1036 if (SETFOpc) { 1037 MachineInstr &CmpInst = 1038 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1039 .addReg(LhsReg) 1040 .addReg(RhsReg); 1041 1042 Register FlagReg1 = MRI.createVirtualRegister(&X86::GR8RegClass); 1043 Register FlagReg2 = MRI.createVirtualRegister(&X86::GR8RegClass); 1044 MachineInstr &Set1 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1045 TII.get(X86::SETCCr), FlagReg1).addImm(SETFOpc[0]); 1046 MachineInstr &Set2 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1047 TII.get(X86::SETCCr), FlagReg2).addImm(SETFOpc[1]); 1048 MachineInstr &Set3 = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1049 TII.get(SETFOpc[2]), ResultReg) 1050 .addReg(FlagReg1) 1051 .addReg(FlagReg2); 1052 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1053 constrainSelectedInstRegOperands(Set1, TII, TRI, RBI); 1054 constrainSelectedInstRegOperands(Set2, TII, TRI, RBI); 1055 constrainSelectedInstRegOperands(Set3, TII, TRI, RBI); 1056 1057 I.eraseFromParent(); 1058 return true; 1059 } 1060 1061 X86::CondCode CC; 1062 bool SwapArgs; 1063 std::tie(CC, SwapArgs) = X86::getX86ConditionCode(Predicate); 1064 assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); 1065 1066 if (SwapArgs) 1067 std::swap(LhsReg, RhsReg); 1068 1069 // Emit a compare of LHS/RHS. 1070 MachineInstr &CmpInst = 1071 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpCmp)) 1072 .addReg(LhsReg) 1073 .addReg(RhsReg); 1074 1075 MachineInstr &Set = 1076 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SETCCr), ResultReg).addImm(CC); 1077 constrainSelectedInstRegOperands(CmpInst, TII, TRI, RBI); 1078 constrainSelectedInstRegOperands(Set, TII, TRI, RBI); 1079 I.eraseFromParent(); 1080 return true; 1081 } 1082 1083 bool X86InstructionSelector::selectUAddSub(MachineInstr &I, 1084 MachineRegisterInfo &MRI, 1085 MachineFunction &MF) const { 1086 assert((I.getOpcode() == TargetOpcode::G_UADDE || 1087 I.getOpcode() == TargetOpcode::G_UADDO || 1088 I.getOpcode() == TargetOpcode::G_USUBE || 1089 I.getOpcode() == TargetOpcode::G_USUBO) && 1090 "unexpected instruction"); 1091 1092 const Register DstReg = I.getOperand(0).getReg(); 1093 const Register CarryOutReg = I.getOperand(1).getReg(); 1094 const Register Op0Reg = I.getOperand(2).getReg(); 1095 const Register Op1Reg = I.getOperand(3).getReg(); 1096 bool IsSub = I.getOpcode() == TargetOpcode::G_USUBE || 1097 I.getOpcode() == TargetOpcode::G_USUBO; 1098 bool HasCarryIn = I.getOpcode() == TargetOpcode::G_UADDE || 1099 I.getOpcode() == TargetOpcode::G_USUBE; 1100 1101 const LLT DstTy = MRI.getType(DstReg); 1102 assert(DstTy.isScalar() && "selectUAddSub only supported for scalar types"); 1103 1104 // TODO: Handle immediate argument variants? 1105 unsigned OpADC, OpADD, OpSBB, OpSUB; 1106 switch (DstTy.getSizeInBits()) { 1107 case 8: 1108 OpADC = X86::ADC8rr; 1109 OpADD = X86::ADD8rr; 1110 OpSBB = X86::SBB8rr; 1111 OpSUB = X86::SUB8rr; 1112 break; 1113 case 16: 1114 OpADC = X86::ADC16rr; 1115 OpADD = X86::ADD16rr; 1116 OpSBB = X86::SBB16rr; 1117 OpSUB = X86::SUB16rr; 1118 break; 1119 case 32: 1120 OpADC = X86::ADC32rr; 1121 OpADD = X86::ADD32rr; 1122 OpSBB = X86::SBB32rr; 1123 OpSUB = X86::SUB32rr; 1124 break; 1125 case 64: 1126 OpADC = X86::ADC64rr; 1127 OpADD = X86::ADD64rr; 1128 OpSBB = X86::SBB64rr; 1129 OpSUB = X86::SUB64rr; 1130 break; 1131 default: 1132 llvm_unreachable("selectUAddSub unsupported type."); 1133 } 1134 1135 const RegisterBank &DstRB = *RBI.getRegBank(DstReg, MRI, TRI); 1136 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstRB); 1137 1138 unsigned Opcode = IsSub ? OpSUB : OpADD; 1139 1140 // G_UADDE/G_USUBE - find CarryIn def instruction. 1141 if (HasCarryIn) { 1142 Register CarryInReg = I.getOperand(4).getReg(); 1143 MachineInstr *Def = MRI.getVRegDef(CarryInReg); 1144 while (Def->getOpcode() == TargetOpcode::G_TRUNC) { 1145 CarryInReg = Def->getOperand(1).getReg(); 1146 Def = MRI.getVRegDef(CarryInReg); 1147 } 1148 1149 // TODO - handle more CF generating instructions 1150 if (Def->getOpcode() == TargetOpcode::G_UADDE || 1151 Def->getOpcode() == TargetOpcode::G_UADDO || 1152 Def->getOpcode() == TargetOpcode::G_USUBE || 1153 Def->getOpcode() == TargetOpcode::G_USUBO) { 1154 // carry set by prev ADD/SUB. 1155 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), 1156 X86::EFLAGS) 1157 .addReg(CarryInReg); 1158 1159 if (!RBI.constrainGenericRegister(CarryInReg, *DstRC, MRI)) 1160 return false; 1161 1162 Opcode = IsSub ? OpSBB : OpADC; 1163 } else if (auto val = getIConstantVRegVal(CarryInReg, MRI)) { 1164 // carry is constant, support only 0. 1165 if (*val != 0) 1166 return false; 1167 1168 Opcode = IsSub ? OpSUB : OpADD; 1169 } else 1170 return false; 1171 } 1172 1173 MachineInstr &Inst = 1174 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Opcode), DstReg) 1175 .addReg(Op0Reg) 1176 .addReg(Op1Reg); 1177 1178 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), CarryOutReg) 1179 .addReg(X86::EFLAGS); 1180 1181 if (!constrainSelectedInstRegOperands(Inst, TII, TRI, RBI) || 1182 !RBI.constrainGenericRegister(CarryOutReg, *DstRC, MRI)) 1183 return false; 1184 1185 I.eraseFromParent(); 1186 return true; 1187 } 1188 1189 bool X86InstructionSelector::selectExtract(MachineInstr &I, 1190 MachineRegisterInfo &MRI, 1191 MachineFunction &MF) const { 1192 assert((I.getOpcode() == TargetOpcode::G_EXTRACT) && 1193 "unexpected instruction"); 1194 1195 const Register DstReg = I.getOperand(0).getReg(); 1196 const Register SrcReg = I.getOperand(1).getReg(); 1197 int64_t Index = I.getOperand(2).getImm(); 1198 1199 const LLT DstTy = MRI.getType(DstReg); 1200 const LLT SrcTy = MRI.getType(SrcReg); 1201 1202 // Meanwile handle vector type only. 1203 if (!DstTy.isVector()) 1204 return false; 1205 1206 if (Index % DstTy.getSizeInBits() != 0) 1207 return false; // Not extract subvector. 1208 1209 if (Index == 0) { 1210 // Replace by extract subreg copy. 1211 if (!emitExtractSubreg(DstReg, SrcReg, I, MRI, MF)) 1212 return false; 1213 1214 I.eraseFromParent(); 1215 return true; 1216 } 1217 1218 bool HasAVX = STI.hasAVX(); 1219 bool HasAVX512 = STI.hasAVX512(); 1220 bool HasVLX = STI.hasVLX(); 1221 1222 if (SrcTy.getSizeInBits() == 256 && DstTy.getSizeInBits() == 128) { 1223 if (HasVLX) 1224 I.setDesc(TII.get(X86::VEXTRACTF32x4Z256rr)); 1225 else if (HasAVX) 1226 I.setDesc(TII.get(X86::VEXTRACTF128rr)); 1227 else 1228 return false; 1229 } else if (SrcTy.getSizeInBits() == 512 && HasAVX512) { 1230 if (DstTy.getSizeInBits() == 128) 1231 I.setDesc(TII.get(X86::VEXTRACTF32x4Zrr)); 1232 else if (DstTy.getSizeInBits() == 256) 1233 I.setDesc(TII.get(X86::VEXTRACTF64x4Zrr)); 1234 else 1235 return false; 1236 } else 1237 return false; 1238 1239 // Convert to X86 VEXTRACT immediate. 1240 Index = Index / DstTy.getSizeInBits(); 1241 I.getOperand(2).setImm(Index); 1242 1243 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1244 } 1245 1246 bool X86InstructionSelector::emitExtractSubreg(unsigned DstReg, unsigned SrcReg, 1247 MachineInstr &I, 1248 MachineRegisterInfo &MRI, 1249 MachineFunction &MF) const { 1250 const LLT DstTy = MRI.getType(DstReg); 1251 const LLT SrcTy = MRI.getType(SrcReg); 1252 unsigned SubIdx = X86::NoSubRegister; 1253 1254 if (!DstTy.isVector() || !SrcTy.isVector()) 1255 return false; 1256 1257 assert(SrcTy.getSizeInBits() > DstTy.getSizeInBits() && 1258 "Incorrect Src/Dst register size"); 1259 1260 if (DstTy.getSizeInBits() == 128) 1261 SubIdx = X86::sub_xmm; 1262 else if (DstTy.getSizeInBits() == 256) 1263 SubIdx = X86::sub_ymm; 1264 else 1265 return false; 1266 1267 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1268 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1269 1270 SrcRC = TRI.getSubClassWithSubReg(SrcRC, SubIdx); 1271 1272 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1273 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1274 LLVM_DEBUG(dbgs() << "Failed to constrain EXTRACT_SUBREG\n"); 1275 return false; 1276 } 1277 1278 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY), DstReg) 1279 .addReg(SrcReg, 0, SubIdx); 1280 1281 return true; 1282 } 1283 1284 bool X86InstructionSelector::emitInsertSubreg(unsigned DstReg, unsigned SrcReg, 1285 MachineInstr &I, 1286 MachineRegisterInfo &MRI, 1287 MachineFunction &MF) const { 1288 const LLT DstTy = MRI.getType(DstReg); 1289 const LLT SrcTy = MRI.getType(SrcReg); 1290 unsigned SubIdx = X86::NoSubRegister; 1291 1292 // TODO: support scalar types 1293 if (!DstTy.isVector() || !SrcTy.isVector()) 1294 return false; 1295 1296 assert(SrcTy.getSizeInBits() < DstTy.getSizeInBits() && 1297 "Incorrect Src/Dst register size"); 1298 1299 if (SrcTy.getSizeInBits() == 128) 1300 SubIdx = X86::sub_xmm; 1301 else if (SrcTy.getSizeInBits() == 256) 1302 SubIdx = X86::sub_ymm; 1303 else 1304 return false; 1305 1306 const TargetRegisterClass *SrcRC = getRegClass(SrcTy, SrcReg, MRI); 1307 const TargetRegisterClass *DstRC = getRegClass(DstTy, DstReg, MRI); 1308 1309 if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, MRI) || 1310 !RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1311 LLVM_DEBUG(dbgs() << "Failed to constrain INSERT_SUBREG\n"); 1312 return false; 1313 } 1314 1315 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::COPY)) 1316 .addReg(DstReg, RegState::DefineNoRead, SubIdx) 1317 .addReg(SrcReg); 1318 1319 return true; 1320 } 1321 1322 bool X86InstructionSelector::selectInsert(MachineInstr &I, 1323 MachineRegisterInfo &MRI, 1324 MachineFunction &MF) const { 1325 assert((I.getOpcode() == TargetOpcode::G_INSERT) && "unexpected instruction"); 1326 1327 const Register DstReg = I.getOperand(0).getReg(); 1328 const Register SrcReg = I.getOperand(1).getReg(); 1329 const Register InsertReg = I.getOperand(2).getReg(); 1330 int64_t Index = I.getOperand(3).getImm(); 1331 1332 const LLT DstTy = MRI.getType(DstReg); 1333 const LLT InsertRegTy = MRI.getType(InsertReg); 1334 1335 // Meanwile handle vector type only. 1336 if (!DstTy.isVector()) 1337 return false; 1338 1339 if (Index % InsertRegTy.getSizeInBits() != 0) 1340 return false; // Not insert subvector. 1341 1342 if (Index == 0 && MRI.getVRegDef(SrcReg)->isImplicitDef()) { 1343 // Replace by subreg copy. 1344 if (!emitInsertSubreg(DstReg, InsertReg, I, MRI, MF)) 1345 return false; 1346 1347 I.eraseFromParent(); 1348 return true; 1349 } 1350 1351 bool HasAVX = STI.hasAVX(); 1352 bool HasAVX512 = STI.hasAVX512(); 1353 bool HasVLX = STI.hasVLX(); 1354 1355 if (DstTy.getSizeInBits() == 256 && InsertRegTy.getSizeInBits() == 128) { 1356 if (HasVLX) 1357 I.setDesc(TII.get(X86::VINSERTF32x4Z256rr)); 1358 else if (HasAVX) 1359 I.setDesc(TII.get(X86::VINSERTF128rr)); 1360 else 1361 return false; 1362 } else if (DstTy.getSizeInBits() == 512 && HasAVX512) { 1363 if (InsertRegTy.getSizeInBits() == 128) 1364 I.setDesc(TII.get(X86::VINSERTF32x4Zrr)); 1365 else if (InsertRegTy.getSizeInBits() == 256) 1366 I.setDesc(TII.get(X86::VINSERTF64x4Zrr)); 1367 else 1368 return false; 1369 } else 1370 return false; 1371 1372 // Convert to X86 VINSERT immediate. 1373 Index = Index / InsertRegTy.getSizeInBits(); 1374 1375 I.getOperand(3).setImm(Index); 1376 1377 return constrainSelectedInstRegOperands(I, TII, TRI, RBI); 1378 } 1379 1380 bool X86InstructionSelector::selectUnmergeValues( 1381 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1382 assert((I.getOpcode() == TargetOpcode::G_UNMERGE_VALUES) && 1383 "unexpected instruction"); 1384 1385 // Split to extracts. 1386 unsigned NumDefs = I.getNumOperands() - 1; 1387 Register SrcReg = I.getOperand(NumDefs).getReg(); 1388 unsigned DefSize = MRI.getType(I.getOperand(0).getReg()).getSizeInBits(); 1389 1390 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1391 MachineInstr &ExtrInst = 1392 *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1393 TII.get(TargetOpcode::G_EXTRACT), I.getOperand(Idx).getReg()) 1394 .addReg(SrcReg) 1395 .addImm(Idx * DefSize); 1396 1397 if (!select(ExtrInst)) 1398 return false; 1399 } 1400 1401 I.eraseFromParent(); 1402 return true; 1403 } 1404 1405 bool X86InstructionSelector::selectMergeValues( 1406 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) { 1407 assert((I.getOpcode() == TargetOpcode::G_MERGE_VALUES || 1408 I.getOpcode() == TargetOpcode::G_CONCAT_VECTORS) && 1409 "unexpected instruction"); 1410 1411 // Split to inserts. 1412 Register DstReg = I.getOperand(0).getReg(); 1413 Register SrcReg0 = I.getOperand(1).getReg(); 1414 1415 const LLT DstTy = MRI.getType(DstReg); 1416 const LLT SrcTy = MRI.getType(SrcReg0); 1417 unsigned SrcSize = SrcTy.getSizeInBits(); 1418 1419 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1420 1421 // For the first src use insertSubReg. 1422 Register DefReg = MRI.createGenericVirtualRegister(DstTy); 1423 MRI.setRegBank(DefReg, RegBank); 1424 if (!emitInsertSubreg(DefReg, I.getOperand(1).getReg(), I, MRI, MF)) 1425 return false; 1426 1427 for (unsigned Idx = 2; Idx < I.getNumOperands(); ++Idx) { 1428 Register Tmp = MRI.createGenericVirtualRegister(DstTy); 1429 MRI.setRegBank(Tmp, RegBank); 1430 1431 MachineInstr &InsertInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1432 TII.get(TargetOpcode::G_INSERT), Tmp) 1433 .addReg(DefReg) 1434 .addReg(I.getOperand(Idx).getReg()) 1435 .addImm((Idx - 1) * SrcSize); 1436 1437 DefReg = Tmp; 1438 1439 if (!select(InsertInst)) 1440 return false; 1441 } 1442 1443 MachineInstr &CopyInst = *BuildMI(*I.getParent(), I, I.getDebugLoc(), 1444 TII.get(TargetOpcode::COPY), DstReg) 1445 .addReg(DefReg); 1446 1447 if (!select(CopyInst)) 1448 return false; 1449 1450 I.eraseFromParent(); 1451 return true; 1452 } 1453 1454 bool X86InstructionSelector::selectCondBranch(MachineInstr &I, 1455 MachineRegisterInfo &MRI, 1456 MachineFunction &MF) const { 1457 assert((I.getOpcode() == TargetOpcode::G_BRCOND) && "unexpected instruction"); 1458 1459 const Register CondReg = I.getOperand(0).getReg(); 1460 MachineBasicBlock *DestMBB = I.getOperand(1).getMBB(); 1461 1462 MachineInstr &TestInst = 1463 *BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TEST8ri)) 1464 .addReg(CondReg) 1465 .addImm(1); 1466 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::JCC_1)) 1467 .addMBB(DestMBB).addImm(X86::COND_NE); 1468 1469 constrainSelectedInstRegOperands(TestInst, TII, TRI, RBI); 1470 1471 I.eraseFromParent(); 1472 return true; 1473 } 1474 1475 bool X86InstructionSelector::materializeFP(MachineInstr &I, 1476 MachineRegisterInfo &MRI, 1477 MachineFunction &MF) const { 1478 assert((I.getOpcode() == TargetOpcode::G_FCONSTANT) && 1479 "unexpected instruction"); 1480 1481 // Can't handle alternate code models yet. 1482 CodeModel::Model CM = TM.getCodeModel(); 1483 if (CM != CodeModel::Small && CM != CodeModel::Large) 1484 return false; 1485 1486 const Register DstReg = I.getOperand(0).getReg(); 1487 const LLT DstTy = MRI.getType(DstReg); 1488 const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI); 1489 Align Alignment = Align(DstTy.getSizeInBytes()); 1490 const DebugLoc &DbgLoc = I.getDebugLoc(); 1491 1492 unsigned Opc = 1493 getLoadStoreOp(DstTy, RegBank, TargetOpcode::G_LOAD, Alignment); 1494 1495 // Create the load from the constant pool. 1496 const ConstantFP *CFP = I.getOperand(1).getFPImm(); 1497 unsigned CPI = MF.getConstantPool()->getConstantPoolIndex(CFP, Alignment); 1498 MachineInstr *LoadInst = nullptr; 1499 unsigned char OpFlag = STI.classifyLocalReference(nullptr); 1500 1501 if (CM == CodeModel::Large && STI.is64Bit()) { 1502 // Under X86-64 non-small code model, GV (and friends) are 64-bits, so 1503 // they cannot be folded into immediate fields. 1504 1505 Register AddrReg = MRI.createVirtualRegister(&X86::GR64RegClass); 1506 BuildMI(*I.getParent(), I, DbgLoc, TII.get(X86::MOV64ri), AddrReg) 1507 .addConstantPoolIndex(CPI, 0, OpFlag); 1508 1509 MachineMemOperand *MMO = MF.getMachineMemOperand( 1510 MachinePointerInfo::getConstantPool(MF), MachineMemOperand::MOLoad, 1511 LLT::pointer(0, MF.getDataLayout().getPointerSizeInBits()), Alignment); 1512 1513 LoadInst = 1514 addDirectMem(BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), 1515 AddrReg) 1516 .addMemOperand(MMO); 1517 1518 } else if (CM == CodeModel::Small || !STI.is64Bit()) { 1519 // Handle the case when globals fit in our immediate field. 1520 // This is true for X86-32 always and X86-64 when in -mcmodel=small mode. 1521 1522 // x86-32 PIC requires a PIC base register for constant pools. 1523 unsigned PICBase = 0; 1524 if (OpFlag == X86II::MO_PIC_BASE_OFFSET || OpFlag == X86II::MO_GOTOFF) { 1525 // PICBase can be allocated by TII.getGlobalBaseReg(&MF). 1526 // In DAGISEL the code that initialize it generated by the CGBR pass. 1527 return false; // TODO support the mode. 1528 } else if (STI.is64Bit() && TM.getCodeModel() == CodeModel::Small) 1529 PICBase = X86::RIP; 1530 1531 LoadInst = addConstantPoolReference( 1532 BuildMI(*I.getParent(), I, DbgLoc, TII.get(Opc), DstReg), CPI, PICBase, 1533 OpFlag); 1534 } else 1535 return false; 1536 1537 constrainSelectedInstRegOperands(*LoadInst, TII, TRI, RBI); 1538 I.eraseFromParent(); 1539 return true; 1540 } 1541 1542 bool X86InstructionSelector::selectImplicitDefOrPHI( 1543 MachineInstr &I, MachineRegisterInfo &MRI) const { 1544 assert((I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF || 1545 I.getOpcode() == TargetOpcode::G_PHI) && 1546 "unexpected instruction"); 1547 1548 Register DstReg = I.getOperand(0).getReg(); 1549 1550 if (!MRI.getRegClassOrNull(DstReg)) { 1551 const LLT DstTy = MRI.getType(DstReg); 1552 const TargetRegisterClass *RC = getRegClass(DstTy, DstReg, MRI); 1553 1554 if (!RBI.constrainGenericRegister(DstReg, *RC, MRI)) { 1555 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1556 << " operand\n"); 1557 return false; 1558 } 1559 } 1560 1561 if (I.getOpcode() == TargetOpcode::G_IMPLICIT_DEF) 1562 I.setDesc(TII.get(X86::IMPLICIT_DEF)); 1563 else 1564 I.setDesc(TII.get(X86::PHI)); 1565 1566 return true; 1567 } 1568 1569 bool X86InstructionSelector::selectMulDivRem(MachineInstr &I, 1570 MachineRegisterInfo &MRI, 1571 MachineFunction &MF) const { 1572 // The implementation of this function is adapted from X86FastISel. 1573 assert((I.getOpcode() == TargetOpcode::G_MUL || 1574 I.getOpcode() == TargetOpcode::G_SMULH || 1575 I.getOpcode() == TargetOpcode::G_UMULH || 1576 I.getOpcode() == TargetOpcode::G_SDIV || 1577 I.getOpcode() == TargetOpcode::G_SREM || 1578 I.getOpcode() == TargetOpcode::G_UDIV || 1579 I.getOpcode() == TargetOpcode::G_UREM) && 1580 "unexpected instruction"); 1581 1582 const Register DstReg = I.getOperand(0).getReg(); 1583 const Register Op1Reg = I.getOperand(1).getReg(); 1584 const Register Op2Reg = I.getOperand(2).getReg(); 1585 1586 const LLT RegTy = MRI.getType(DstReg); 1587 assert(RegTy == MRI.getType(Op1Reg) && RegTy == MRI.getType(Op2Reg) && 1588 "Arguments and return value types must match"); 1589 1590 const RegisterBank *RegRB = RBI.getRegBank(DstReg, MRI, TRI); 1591 if (!RegRB || RegRB->getID() != X86::GPRRegBankID) 1592 return false; 1593 1594 const static unsigned NumTypes = 4; // i8, i16, i32, i64 1595 const static unsigned NumOps = 7; // SDiv/SRem/UDiv/URem/Mul/SMulH/UMulh 1596 const static bool S = true; // IsSigned 1597 const static bool U = false; // !IsSigned 1598 const static unsigned Copy = TargetOpcode::COPY; 1599 1600 // For the X86 IDIV instruction, in most cases the dividend 1601 // (numerator) must be in a specific register pair highreg:lowreg, 1602 // producing the quotient in lowreg and the remainder in highreg. 1603 // For most data types, to set up the instruction, the dividend is 1604 // copied into lowreg, and lowreg is sign-extended into highreg. The 1605 // exception is i8, where the dividend is defined as a single register rather 1606 // than a register pair, and we therefore directly sign-extend the dividend 1607 // into lowreg, instead of copying, and ignore the highreg. 1608 const static struct MulDivRemEntry { 1609 // The following portion depends only on the data type. 1610 unsigned SizeInBits; 1611 unsigned LowInReg; // low part of the register pair 1612 unsigned HighInReg; // high part of the register pair 1613 // The following portion depends on both the data type and the operation. 1614 struct MulDivRemResult { 1615 unsigned OpMulDivRem; // The specific MUL/DIV opcode to use. 1616 unsigned OpSignExtend; // Opcode for sign-extending lowreg into 1617 // highreg, or copying a zero into highreg. 1618 unsigned OpCopy; // Opcode for copying dividend into lowreg, or 1619 // zero/sign-extending into lowreg for i8. 1620 unsigned ResultReg; // Register containing the desired result. 1621 bool IsOpSigned; // Whether to use signed or unsigned form. 1622 } ResultTable[NumOps]; 1623 } OpTable[NumTypes] = { 1624 {8, 1625 X86::AX, 1626 0, 1627 { 1628 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AL, S}, // SDiv 1629 {X86::IDIV8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SRem 1630 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AL, U}, // UDiv 1631 {X86::DIV8r, 0, X86::MOVZX16rr8, X86::AH, U}, // URem 1632 {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AL, S}, // Mul 1633 {X86::IMUL8r, 0, X86::MOVSX16rr8, X86::AH, S}, // SMulH 1634 {X86::MUL8r, 0, X86::MOVZX16rr8, X86::AH, U}, // UMulH 1635 }}, // i8 1636 {16, 1637 X86::AX, 1638 X86::DX, 1639 { 1640 {X86::IDIV16r, X86::CWD, Copy, X86::AX, S}, // SDiv 1641 {X86::IDIV16r, X86::CWD, Copy, X86::DX, S}, // SRem 1642 {X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U}, // UDiv 1643 {X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U}, // URem 1644 {X86::IMUL16r, X86::MOV32r0, Copy, X86::AX, S}, // Mul 1645 {X86::IMUL16r, X86::MOV32r0, Copy, X86::DX, S}, // SMulH 1646 {X86::MUL16r, X86::MOV32r0, Copy, X86::DX, U}, // UMulH 1647 }}, // i16 1648 {32, 1649 X86::EAX, 1650 X86::EDX, 1651 { 1652 {X86::IDIV32r, X86::CDQ, Copy, X86::EAX, S}, // SDiv 1653 {X86::IDIV32r, X86::CDQ, Copy, X86::EDX, S}, // SRem 1654 {X86::DIV32r, X86::MOV32r0, Copy, X86::EAX, U}, // UDiv 1655 {X86::DIV32r, X86::MOV32r0, Copy, X86::EDX, U}, // URem 1656 {X86::IMUL32r, X86::MOV32r0, Copy, X86::EAX, S}, // Mul 1657 {X86::IMUL32r, X86::MOV32r0, Copy, X86::EDX, S}, // SMulH 1658 {X86::MUL32r, X86::MOV32r0, Copy, X86::EDX, U}, // UMulH 1659 }}, // i32 1660 {64, 1661 X86::RAX, 1662 X86::RDX, 1663 { 1664 {X86::IDIV64r, X86::CQO, Copy, X86::RAX, S}, // SDiv 1665 {X86::IDIV64r, X86::CQO, Copy, X86::RDX, S}, // SRem 1666 {X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U}, // UDiv 1667 {X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U}, // URem 1668 {X86::IMUL64r, X86::MOV32r0, Copy, X86::RAX, S}, // Mul 1669 {X86::IMUL64r, X86::MOV32r0, Copy, X86::RDX, S}, // SMulH 1670 {X86::MUL64r, X86::MOV32r0, Copy, X86::RDX, U}, // UMulH 1671 }}, // i64 1672 }; 1673 1674 auto OpEntryIt = llvm::find_if(OpTable, [RegTy](const MulDivRemEntry &El) { 1675 return El.SizeInBits == RegTy.getSizeInBits(); 1676 }); 1677 if (OpEntryIt == std::end(OpTable)) 1678 return false; 1679 1680 unsigned OpIndex; 1681 switch (I.getOpcode()) { 1682 default: 1683 llvm_unreachable("Unexpected mul/div/rem opcode"); 1684 case TargetOpcode::G_SDIV: 1685 OpIndex = 0; 1686 break; 1687 case TargetOpcode::G_SREM: 1688 OpIndex = 1; 1689 break; 1690 case TargetOpcode::G_UDIV: 1691 OpIndex = 2; 1692 break; 1693 case TargetOpcode::G_UREM: 1694 OpIndex = 3; 1695 break; 1696 case TargetOpcode::G_MUL: 1697 OpIndex = 4; 1698 break; 1699 case TargetOpcode::G_SMULH: 1700 OpIndex = 5; 1701 break; 1702 case TargetOpcode::G_UMULH: 1703 OpIndex = 6; 1704 break; 1705 } 1706 1707 const MulDivRemEntry &TypeEntry = *OpEntryIt; 1708 const MulDivRemEntry::MulDivRemResult &OpEntry = 1709 TypeEntry.ResultTable[OpIndex]; 1710 1711 const TargetRegisterClass *RegRC = getRegClass(RegTy, *RegRB); 1712 if (!RBI.constrainGenericRegister(Op1Reg, *RegRC, MRI) || 1713 !RBI.constrainGenericRegister(Op2Reg, *RegRC, MRI) || 1714 !RBI.constrainGenericRegister(DstReg, *RegRC, MRI)) { 1715 LLVM_DEBUG(dbgs() << "Failed to constrain " << TII.getName(I.getOpcode()) 1716 << " operand\n"); 1717 return false; 1718 } 1719 1720 // Move op1 into low-order input register. 1721 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpCopy), 1722 TypeEntry.LowInReg) 1723 .addReg(Op1Reg); 1724 1725 // Zero-extend or sign-extend into high-order input register. 1726 if (OpEntry.OpSignExtend) { 1727 if (OpEntry.IsOpSigned) 1728 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1729 TII.get(OpEntry.OpSignExtend)); 1730 else { 1731 Register Zero32 = MRI.createVirtualRegister(&X86::GR32RegClass); 1732 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::MOV32r0), 1733 Zero32); 1734 1735 // Copy the zero into the appropriate sub/super/identical physical 1736 // register. Unfortunately the operations needed are not uniform enough 1737 // to fit neatly into the table above. 1738 if (RegTy.getSizeInBits() == 16) { 1739 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1740 TypeEntry.HighInReg) 1741 .addReg(Zero32, 0, X86::sub_16bit); 1742 } else if (RegTy.getSizeInBits() == 32) { 1743 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), 1744 TypeEntry.HighInReg) 1745 .addReg(Zero32); 1746 } else if (RegTy.getSizeInBits() == 64) { 1747 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1748 TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) 1749 .addImm(0) 1750 .addReg(Zero32) 1751 .addImm(X86::sub_32bit); 1752 } 1753 } 1754 } 1755 1756 // Generate the DIV/IDIV/MUL/IMUL instruction. 1757 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(OpEntry.OpMulDivRem)) 1758 .addReg(Op2Reg); 1759 1760 // For i8 remainder, we can't reference ah directly, as we'll end 1761 // up with bogus copies like %r9b = COPY %ah. Reference ax 1762 // instead to prevent ah references in a rex instruction. 1763 // 1764 // The current assumption of the fast register allocator is that isel 1765 // won't generate explicit references to the GR8_NOREX registers. If 1766 // the allocator and/or the backend get enhanced to be more robust in 1767 // that regard, this can be, and should be, removed. 1768 if (OpEntry.ResultReg == X86::AH && STI.is64Bit()) { 1769 Register SourceSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1770 Register ResultSuperReg = MRI.createVirtualRegister(&X86::GR16RegClass); 1771 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(Copy), SourceSuperReg) 1772 .addReg(X86::AX); 1773 1774 // Shift AX right by 8 bits instead of using AH. 1775 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::SHR16ri), 1776 ResultSuperReg) 1777 .addReg(SourceSuperReg) 1778 .addImm(8); 1779 1780 // Now reference the 8-bit subreg of the result. 1781 BuildMI(*I.getParent(), I, I.getDebugLoc(), 1782 TII.get(TargetOpcode::SUBREG_TO_REG)) 1783 .addDef(DstReg) 1784 .addImm(0) 1785 .addReg(ResultSuperReg) 1786 .addImm(X86::sub_8bit); 1787 } else { 1788 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY), 1789 DstReg) 1790 .addReg(OpEntry.ResultReg); 1791 } 1792 I.eraseFromParent(); 1793 1794 return true; 1795 } 1796 1797 bool X86InstructionSelector::selectSelect(MachineInstr &I, 1798 MachineRegisterInfo &MRI, 1799 MachineFunction &MF) const { 1800 GSelect &Sel = cast<GSelect>(I); 1801 unsigned DstReg = Sel.getReg(0); 1802 BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(X86::TEST32rr)) 1803 .addReg(Sel.getCondReg()) 1804 .addReg(Sel.getCondReg()); 1805 1806 unsigned OpCmp; 1807 LLT Ty = MRI.getType(DstReg); 1808 switch (Ty.getSizeInBits()) { 1809 default: 1810 return false; 1811 case 8: 1812 OpCmp = X86::CMOV_GR8; 1813 break; 1814 case 16: 1815 OpCmp = STI.canUseCMOV() ? X86::CMOV16rr : X86::CMOV_GR16; 1816 break; 1817 case 32: 1818 OpCmp = STI.canUseCMOV() ? X86::CMOV32rr : X86::CMOV_GR32; 1819 break; 1820 case 64: 1821 assert(STI.is64Bit() && STI.canUseCMOV()); 1822 OpCmp = X86::CMOV64rr; 1823 break; 1824 } 1825 BuildMI(*Sel.getParent(), Sel, Sel.getDebugLoc(), TII.get(OpCmp), DstReg) 1826 .addReg(Sel.getTrueReg()) 1827 .addReg(Sel.getFalseReg()) 1828 .addImm(X86::COND_E); 1829 1830 const TargetRegisterClass *DstRC = getRegClass(Ty, DstReg, MRI); 1831 if (!RBI.constrainGenericRegister(DstReg, *DstRC, MRI)) { 1832 LLVM_DEBUG(dbgs() << "Failed to constrain CMOV\n"); 1833 return false; 1834 } 1835 1836 Sel.eraseFromParent(); 1837 return true; 1838 } 1839 1840 bool X86InstructionSelector::selectIntrinsicWSideEffects( 1841 MachineInstr &I, MachineRegisterInfo &MRI, MachineFunction &MF) const { 1842 1843 assert(I.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS && 1844 "unexpected instruction"); 1845 1846 if (I.getOperand(0).getIntrinsicID() != Intrinsic::trap) 1847 return false; 1848 1849 BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(X86::TRAP)); 1850 1851 I.eraseFromParent(); 1852 return true; 1853 } 1854 1855 InstructionSelector * 1856 llvm::createX86InstructionSelector(const X86TargetMachine &TM, 1857 X86Subtarget &Subtarget, 1858 X86RegisterBankInfo &RBI) { 1859 return new X86InstructionSelector(TM, Subtarget, RBI); 1860 } 1861