1 //===-- llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h -----*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file contains some helper functions which try to cleanup artifacts 9 // such as G_TRUNCs/G_[ZSA]EXTENDS that were created during legalization to make 10 // the types match. This file also contains some combines of merges that happens 11 // at the end of the legalization. 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 15 #define LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 16 17 #include "llvm/ADT/SmallBitVector.h" 18 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 19 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 20 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 21 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" 22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 23 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 24 #include "llvm/CodeGen/GlobalISel/Utils.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/Register.h" 27 #include "llvm/CodeGen/TargetOpcodes.h" 28 #include "llvm/IR/Constants.h" 29 #include "llvm/IR/DebugInfoMetadata.h" 30 #include "llvm/Support/Debug.h" 31 32 #define DEBUG_TYPE "legalizer" 33 34 namespace llvm { 35 class LegalizationArtifactCombiner { 36 MachineIRBuilder &Builder; 37 MachineRegisterInfo &MRI; 38 const LegalizerInfo &LI; 39 GISelKnownBits *KB; 40 isArtifactCast(unsigned Opc)41 static bool isArtifactCast(unsigned Opc) { 42 switch (Opc) { 43 case TargetOpcode::G_TRUNC: 44 case TargetOpcode::G_SEXT: 45 case TargetOpcode::G_ZEXT: 46 case TargetOpcode::G_ANYEXT: 47 return true; 48 default: 49 return false; 50 } 51 } 52 53 public: 54 LegalizationArtifactCombiner(MachineIRBuilder &B, MachineRegisterInfo &MRI, 55 const LegalizerInfo &LI, 56 GISelKnownBits *KB = nullptr) Builder(B)57 : Builder(B), MRI(MRI), LI(LI), KB(KB) {} 58 tryCombineAnyExt(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)59 bool tryCombineAnyExt(MachineInstr &MI, 60 SmallVectorImpl<MachineInstr *> &DeadInsts, 61 SmallVectorImpl<Register> &UpdatedDefs, 62 GISelObserverWrapper &Observer) { 63 using namespace llvm::MIPatternMatch; 64 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT); 65 66 Builder.setInstrAndDebugLoc(MI); 67 Register DstReg = MI.getOperand(0).getReg(); 68 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 69 70 // aext(trunc x) - > aext/copy/trunc x 71 Register TruncSrc; 72 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 73 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 74 if (MRI.getType(DstReg) == MRI.getType(TruncSrc)) 75 replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, 76 Observer); 77 else 78 Builder.buildAnyExtOrTrunc(DstReg, TruncSrc); 79 UpdatedDefs.push_back(DstReg); 80 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 81 return true; 82 } 83 84 // aext([asz]ext x) -> [asz]ext x 85 Register ExtSrc; 86 MachineInstr *ExtMI; 87 if (mi_match(SrcReg, MRI, 88 m_all_of(m_MInstr(ExtMI), m_any_of(m_GAnyExt(m_Reg(ExtSrc)), 89 m_GSExt(m_Reg(ExtSrc)), 90 m_GZExt(m_Reg(ExtSrc)))))) { 91 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 92 UpdatedDefs.push_back(DstReg); 93 markInstAndDefDead(MI, *ExtMI, DeadInsts); 94 return true; 95 } 96 97 // Try to fold aext(g_constant) when the larger constant type is legal. 98 auto *SrcMI = MRI.getVRegDef(SrcReg); 99 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 100 const LLT DstTy = MRI.getType(DstReg); 101 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 102 auto &CstVal = SrcMI->getOperand(1); 103 auto *MergedLocation = DILocation::getMergedLocation( 104 MI.getDebugLoc().get(), SrcMI->getDebugLoc().get()); 105 // Set the debug location to the merged location of the SrcMI and the MI 106 // if the aext fold is successful. 107 Builder.setDebugLoc(MergedLocation); 108 Builder.buildConstant( 109 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 110 UpdatedDefs.push_back(DstReg); 111 markInstAndDefDead(MI, *SrcMI, DeadInsts); 112 return true; 113 } 114 } 115 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 116 } 117 tryCombineZExt(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)118 bool tryCombineZExt(MachineInstr &MI, 119 SmallVectorImpl<MachineInstr *> &DeadInsts, 120 SmallVectorImpl<Register> &UpdatedDefs, 121 GISelObserverWrapper &Observer) { 122 using namespace llvm::MIPatternMatch; 123 assert(MI.getOpcode() == TargetOpcode::G_ZEXT); 124 125 Builder.setInstrAndDebugLoc(MI); 126 Register DstReg = MI.getOperand(0).getReg(); 127 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 128 129 // zext(trunc x) - > and (aext/copy/trunc x), mask 130 // zext(sext x) -> and (sext x), mask 131 Register TruncSrc; 132 Register SextSrc; 133 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))) || 134 mi_match(SrcReg, MRI, m_GSExt(m_Reg(SextSrc)))) { 135 LLT DstTy = MRI.getType(DstReg); 136 if (isInstUnsupported({TargetOpcode::G_AND, {DstTy}}) || 137 isConstantUnsupported(DstTy)) 138 return false; 139 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 140 LLT SrcTy = MRI.getType(SrcReg); 141 APInt MaskVal = APInt::getAllOnes(SrcTy.getScalarSizeInBits()); 142 if (SextSrc && (DstTy != MRI.getType(SextSrc))) 143 SextSrc = Builder.buildSExtOrTrunc(DstTy, SextSrc).getReg(0); 144 if (TruncSrc && (DstTy != MRI.getType(TruncSrc))) 145 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 146 APInt ExtMaskVal = MaskVal.zext(DstTy.getScalarSizeInBits()); 147 Register AndSrc = SextSrc ? SextSrc : TruncSrc; 148 // Elide G_AND and mask constant if possible. 149 // The G_AND would also be removed by the post-legalize redundant_and 150 // combine, but in this very common case, eliding early and regardless of 151 // OptLevel results in significant compile-time and O0 code-size 152 // improvements. Inserting unnecessary instructions between boolean defs 153 // and uses hinders a lot of folding during ISel. 154 if (KB && (KB->getKnownZeroes(AndSrc) | ExtMaskVal).isAllOnes()) { 155 replaceRegOrBuildCopy(DstReg, AndSrc, MRI, Builder, UpdatedDefs, 156 Observer); 157 } else { 158 auto Mask = Builder.buildConstant(DstTy, ExtMaskVal); 159 Builder.buildAnd(DstReg, AndSrc, Mask); 160 } 161 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 162 return true; 163 } 164 165 // zext(zext x) -> (zext x) 166 Register ZextSrc; 167 if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) { 168 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 169 Observer.changingInstr(MI); 170 MI.getOperand(1).setReg(ZextSrc); 171 Observer.changedInstr(MI); 172 UpdatedDefs.push_back(DstReg); 173 markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 174 return true; 175 } 176 177 // Try to fold zext(g_constant) when the larger constant type is legal. 178 auto *SrcMI = MRI.getVRegDef(SrcReg); 179 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 180 const LLT DstTy = MRI.getType(DstReg); 181 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 182 auto &CstVal = SrcMI->getOperand(1); 183 Builder.buildConstant( 184 DstReg, CstVal.getCImm()->getValue().zext(DstTy.getSizeInBits())); 185 UpdatedDefs.push_back(DstReg); 186 markInstAndDefDead(MI, *SrcMI, DeadInsts); 187 return true; 188 } 189 } 190 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 191 } 192 tryCombineSExt(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)193 bool tryCombineSExt(MachineInstr &MI, 194 SmallVectorImpl<MachineInstr *> &DeadInsts, 195 SmallVectorImpl<Register> &UpdatedDefs, 196 GISelObserverWrapper &Observer) { 197 using namespace llvm::MIPatternMatch; 198 assert(MI.getOpcode() == TargetOpcode::G_SEXT); 199 200 Builder.setInstrAndDebugLoc(MI); 201 Register DstReg = MI.getOperand(0).getReg(); 202 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 203 204 // sext(trunc x) - > (sext_inreg (aext/copy/trunc x), c) 205 Register TruncSrc; 206 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 207 LLT DstTy = MRI.getType(DstReg); 208 if (isInstUnsupported({TargetOpcode::G_SEXT_INREG, {DstTy}})) 209 return false; 210 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI;); 211 LLT SrcTy = MRI.getType(SrcReg); 212 uint64_t SizeInBits = SrcTy.getScalarSizeInBits(); 213 if (DstTy != MRI.getType(TruncSrc)) 214 TruncSrc = Builder.buildAnyExtOrTrunc(DstTy, TruncSrc).getReg(0); 215 // Elide G_SEXT_INREG if possible. This is similar to eliding G_AND in 216 // tryCombineZExt. Refer to the comment in tryCombineZExt for rationale. 217 if (KB && KB->computeNumSignBits(TruncSrc) > 218 DstTy.getScalarSizeInBits() - SizeInBits) 219 replaceRegOrBuildCopy(DstReg, TruncSrc, MRI, Builder, UpdatedDefs, 220 Observer); 221 else 222 Builder.buildSExtInReg(DstReg, TruncSrc, SizeInBits); 223 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 224 return true; 225 } 226 227 // sext(zext x) -> (zext x) 228 // sext(sext x) -> (sext x) 229 Register ExtSrc; 230 MachineInstr *ExtMI; 231 if (mi_match(SrcReg, MRI, 232 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)), 233 m_GSExt(m_Reg(ExtSrc)))))) { 234 LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI); 235 Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc}); 236 UpdatedDefs.push_back(DstReg); 237 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 238 return true; 239 } 240 241 // Try to fold sext(g_constant) when the larger constant type is legal. 242 auto *SrcMI = MRI.getVRegDef(SrcReg); 243 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 244 const LLT DstTy = MRI.getType(DstReg); 245 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 246 auto &CstVal = SrcMI->getOperand(1); 247 Builder.buildConstant( 248 DstReg, CstVal.getCImm()->getValue().sext(DstTy.getSizeInBits())); 249 UpdatedDefs.push_back(DstReg); 250 markInstAndDefDead(MI, *SrcMI, DeadInsts); 251 return true; 252 } 253 } 254 255 return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs); 256 } 257 tryCombineTrunc(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelObserverWrapper & Observer)258 bool tryCombineTrunc(MachineInstr &MI, 259 SmallVectorImpl<MachineInstr *> &DeadInsts, 260 SmallVectorImpl<Register> &UpdatedDefs, 261 GISelObserverWrapper &Observer) { 262 using namespace llvm::MIPatternMatch; 263 assert(MI.getOpcode() == TargetOpcode::G_TRUNC); 264 265 Builder.setInstr(MI); 266 Register DstReg = MI.getOperand(0).getReg(); 267 const LLT DstTy = MRI.getType(DstReg); 268 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 269 270 // Try to fold trunc(g_constant) when the smaller constant type is legal. 271 auto *SrcMI = MRI.getVRegDef(SrcReg); 272 if (SrcMI->getOpcode() == TargetOpcode::G_CONSTANT) { 273 if (isInstLegal({TargetOpcode::G_CONSTANT, {DstTy}})) { 274 auto &CstVal = SrcMI->getOperand(1); 275 Builder.buildConstant( 276 DstReg, CstVal.getCImm()->getValue().trunc(DstTy.getSizeInBits())); 277 UpdatedDefs.push_back(DstReg); 278 markInstAndDefDead(MI, *SrcMI, DeadInsts); 279 return true; 280 } 281 } 282 283 // Try to fold trunc(merge) to directly use the source of the merge. 284 // This gets rid of large, difficult to legalize, merges 285 if (auto *SrcMerge = dyn_cast<GMerge>(SrcMI)) { 286 const Register MergeSrcReg = SrcMerge->getSourceReg(0); 287 const LLT MergeSrcTy = MRI.getType(MergeSrcReg); 288 289 // We can only fold if the types are scalar 290 const unsigned DstSize = DstTy.getSizeInBits(); 291 const unsigned MergeSrcSize = MergeSrcTy.getSizeInBits(); 292 if (!DstTy.isScalar() || !MergeSrcTy.isScalar()) 293 return false; 294 295 if (DstSize < MergeSrcSize) { 296 // When the merge source is larger than the destination, we can just 297 // truncate the merge source directly 298 if (isInstUnsupported({TargetOpcode::G_TRUNC, {DstTy, MergeSrcTy}})) 299 return false; 300 301 LLVM_DEBUG(dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_TRUNC: " 302 << MI); 303 304 Builder.buildTrunc(DstReg, MergeSrcReg); 305 UpdatedDefs.push_back(DstReg); 306 } else if (DstSize == MergeSrcSize) { 307 // If the sizes match we can simply try to replace the register 308 LLVM_DEBUG( 309 dbgs() << "Replacing G_TRUNC(G_MERGE_VALUES) with merge input: " 310 << MI); 311 replaceRegOrBuildCopy(DstReg, MergeSrcReg, MRI, Builder, UpdatedDefs, 312 Observer); 313 } else if (DstSize % MergeSrcSize == 0) { 314 // If the trunc size is a multiple of the merge source size we can use 315 // a smaller merge instead 316 if (isInstUnsupported( 317 {TargetOpcode::G_MERGE_VALUES, {DstTy, MergeSrcTy}})) 318 return false; 319 320 LLVM_DEBUG( 321 dbgs() << "Combining G_TRUNC(G_MERGE_VALUES) to G_MERGE_VALUES: " 322 << MI); 323 324 const unsigned NumSrcs = DstSize / MergeSrcSize; 325 assert(NumSrcs < SrcMI->getNumOperands() - 1 && 326 "trunc(merge) should require less inputs than merge"); 327 SmallVector<Register, 8> SrcRegs(NumSrcs); 328 for (unsigned i = 0; i < NumSrcs; ++i) 329 SrcRegs[i] = SrcMerge->getSourceReg(i); 330 331 Builder.buildMergeValues(DstReg, SrcRegs); 332 UpdatedDefs.push_back(DstReg); 333 } else { 334 // Unable to combine 335 return false; 336 } 337 338 markInstAndDefDead(MI, *SrcMerge, DeadInsts); 339 return true; 340 } 341 342 // trunc(trunc) -> trunc 343 Register TruncSrc; 344 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc)))) { 345 // Always combine trunc(trunc) since the eventual resulting trunc must be 346 // legal anyway as it must be legal for all outputs of the consumer type 347 // set. 348 LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_TRUNC): " << MI); 349 350 Builder.buildTrunc(DstReg, TruncSrc); 351 UpdatedDefs.push_back(DstReg); 352 markInstAndDefDead(MI, *MRI.getVRegDef(TruncSrc), DeadInsts); 353 return true; 354 } 355 356 // trunc(ext x) -> x 357 ArtifactValueFinder Finder(MRI, Builder, LI); 358 if (Register FoundReg = 359 Finder.findValueFromDef(DstReg, 0, DstTy.getSizeInBits())) { 360 LLT FoundRegTy = MRI.getType(FoundReg); 361 if (DstTy == FoundRegTy) { 362 LLVM_DEBUG(dbgs() << ".. Combine G_TRUNC(G_[S,Z,ANY]EXT/G_TRUNC...): " 363 << MI;); 364 365 replaceRegOrBuildCopy(DstReg, FoundReg, MRI, Builder, UpdatedDefs, 366 Observer); 367 UpdatedDefs.push_back(DstReg); 368 markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts); 369 return true; 370 } 371 } 372 373 return false; 374 } 375 376 /// Try to fold G_[ASZ]EXT (G_IMPLICIT_DEF). tryFoldImplicitDef(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs)377 bool tryFoldImplicitDef(MachineInstr &MI, 378 SmallVectorImpl<MachineInstr *> &DeadInsts, 379 SmallVectorImpl<Register> &UpdatedDefs) { 380 unsigned Opcode = MI.getOpcode(); 381 assert(Opcode == TargetOpcode::G_ANYEXT || Opcode == TargetOpcode::G_ZEXT || 382 Opcode == TargetOpcode::G_SEXT); 383 384 if (MachineInstr *DefMI = getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, 385 MI.getOperand(1).getReg(), MRI)) { 386 Builder.setInstr(MI); 387 Register DstReg = MI.getOperand(0).getReg(); 388 LLT DstTy = MRI.getType(DstReg); 389 390 if (Opcode == TargetOpcode::G_ANYEXT) { 391 // G_ANYEXT (G_IMPLICIT_DEF) -> G_IMPLICIT_DEF 392 if (!isInstLegal({TargetOpcode::G_IMPLICIT_DEF, {DstTy}})) 393 return false; 394 LLVM_DEBUG(dbgs() << ".. Combine G_ANYEXT(G_IMPLICIT_DEF): " << MI;); 395 Builder.buildInstr(TargetOpcode::G_IMPLICIT_DEF, {DstReg}, {}); 396 UpdatedDefs.push_back(DstReg); 397 } else { 398 // G_[SZ]EXT (G_IMPLICIT_DEF) -> G_CONSTANT 0 because the top 399 // bits will be 0 for G_ZEXT and 0/1 for the G_SEXT. 400 if (isConstantUnsupported(DstTy)) 401 return false; 402 LLVM_DEBUG(dbgs() << ".. Combine G_[SZ]EXT(G_IMPLICIT_DEF): " << MI;); 403 Builder.buildConstant(DstReg, 0); 404 UpdatedDefs.push_back(DstReg); 405 } 406 407 markInstAndDefDead(MI, *DefMI, DeadInsts); 408 return true; 409 } 410 return false; 411 } 412 tryFoldUnmergeCast(MachineInstr & MI,MachineInstr & CastMI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs)413 bool tryFoldUnmergeCast(MachineInstr &MI, MachineInstr &CastMI, 414 SmallVectorImpl<MachineInstr *> &DeadInsts, 415 SmallVectorImpl<Register> &UpdatedDefs) { 416 417 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES); 418 419 const unsigned CastOpc = CastMI.getOpcode(); 420 421 if (!isArtifactCast(CastOpc)) 422 return false; 423 424 const unsigned NumDefs = MI.getNumOperands() - 1; 425 426 const Register CastSrcReg = CastMI.getOperand(1).getReg(); 427 const LLT CastSrcTy = MRI.getType(CastSrcReg); 428 const LLT DestTy = MRI.getType(MI.getOperand(0).getReg()); 429 const LLT SrcTy = MRI.getType(MI.getOperand(NumDefs).getReg()); 430 431 const unsigned CastSrcSize = CastSrcTy.getSizeInBits(); 432 const unsigned DestSize = DestTy.getSizeInBits(); 433 434 if (CastOpc == TargetOpcode::G_TRUNC) { 435 if (SrcTy.isVector() && SrcTy.getScalarType() == DestTy.getScalarType()) { 436 // %1:_(<4 x s8>) = G_TRUNC %0(<4 x s32>) 437 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %1 438 // => 439 // %6:_(s32), %7:_(s32), %8:_(s32), %9:_(s32) = G_UNMERGE_VALUES %0 440 // %2:_(s8) = G_TRUNC %6 441 // %3:_(s8) = G_TRUNC %7 442 // %4:_(s8) = G_TRUNC %8 443 // %5:_(s8) = G_TRUNC %9 444 445 unsigned UnmergeNumElts = 446 DestTy.isVector() ? CastSrcTy.getNumElements() / NumDefs : 1; 447 LLT UnmergeTy = CastSrcTy.changeElementCount( 448 ElementCount::getFixed(UnmergeNumElts)); 449 LLT SrcWideTy = 450 SrcTy.changeElementCount(ElementCount::getFixed(UnmergeNumElts)); 451 452 if (isInstUnsupported( 453 {TargetOpcode::G_UNMERGE_VALUES, {UnmergeTy, CastSrcTy}}) || 454 LI.getAction({TargetOpcode::G_TRUNC, {SrcWideTy, UnmergeTy}}) 455 .Action == LegalizeActions::MoreElements) 456 return false; 457 458 Builder.setInstr(MI); 459 auto NewUnmerge = Builder.buildUnmerge(UnmergeTy, CastSrcReg); 460 461 for (unsigned I = 0; I != NumDefs; ++I) { 462 Register DefReg = MI.getOperand(I).getReg(); 463 UpdatedDefs.push_back(DefReg); 464 Builder.buildTrunc(DefReg, NewUnmerge.getReg(I)); 465 } 466 467 markInstAndDefDead(MI, CastMI, DeadInsts); 468 return true; 469 } 470 471 if (CastSrcTy.isScalar() && SrcTy.isScalar() && !DestTy.isVector()) { 472 // %1:_(s16) = G_TRUNC %0(s32) 473 // %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %1 474 // => 475 // %2:_(s8), %3:_(s8), %4:_(s8), %5:_(s8) = G_UNMERGE_VALUES %0 476 477 // Unmerge(trunc) can be combined if the trunc source size is a multiple 478 // of the unmerge destination size 479 if (CastSrcSize % DestSize != 0) 480 return false; 481 482 // Check if the new unmerge is supported 483 if (isInstUnsupported( 484 {TargetOpcode::G_UNMERGE_VALUES, {DestTy, CastSrcTy}})) 485 return false; 486 487 // Gather the original destination registers and create new ones for the 488 // unused bits 489 const unsigned NewNumDefs = CastSrcSize / DestSize; 490 SmallVector<Register, 8> DstRegs(NewNumDefs); 491 for (unsigned Idx = 0; Idx < NewNumDefs; ++Idx) { 492 if (Idx < NumDefs) 493 DstRegs[Idx] = MI.getOperand(Idx).getReg(); 494 else 495 DstRegs[Idx] = MRI.createGenericVirtualRegister(DestTy); 496 } 497 498 // Build new unmerge 499 Builder.setInstr(MI); 500 Builder.buildUnmerge(DstRegs, CastSrcReg); 501 UpdatedDefs.append(DstRegs.begin(), DstRegs.begin() + NewNumDefs); 502 markInstAndDefDead(MI, CastMI, DeadInsts); 503 return true; 504 } 505 } 506 507 // TODO: support combines with other casts as well 508 return false; 509 } 510 canFoldMergeOpcode(unsigned MergeOp,unsigned ConvertOp,LLT OpTy,LLT DestTy)511 static bool canFoldMergeOpcode(unsigned MergeOp, unsigned ConvertOp, 512 LLT OpTy, LLT DestTy) { 513 // Check if we found a definition that is like G_MERGE_VALUES. 514 switch (MergeOp) { 515 default: 516 return false; 517 case TargetOpcode::G_BUILD_VECTOR: 518 case TargetOpcode::G_MERGE_VALUES: 519 // The convert operation that we will need to insert is 520 // going to convert the input of that type of instruction (scalar) 521 // to the destination type (DestTy). 522 // The conversion needs to stay in the same domain (scalar to scalar 523 // and vector to vector), so if we were to allow to fold the merge 524 // we would need to insert some bitcasts. 525 // E.g., 526 // <2 x s16> = build_vector s16, s16 527 // <2 x s32> = zext <2 x s16> 528 // <2 x s16>, <2 x s16> = unmerge <2 x s32> 529 // 530 // As is the folding would produce: 531 // <2 x s16> = zext s16 <-- scalar to vector 532 // <2 x s16> = zext s16 <-- scalar to vector 533 // Which is invalid. 534 // Instead we would want to generate: 535 // s32 = zext s16 536 // <2 x s16> = bitcast s32 537 // s32 = zext s16 538 // <2 x s16> = bitcast s32 539 // 540 // That is not done yet. 541 if (ConvertOp == 0) 542 return true; 543 return !DestTy.isVector() && OpTy.isVector() && 544 DestTy == OpTy.getElementType(); 545 case TargetOpcode::G_CONCAT_VECTORS: { 546 if (ConvertOp == 0) 547 return true; 548 if (!DestTy.isVector()) 549 return false; 550 551 const unsigned OpEltSize = OpTy.getElementType().getSizeInBits(); 552 553 // Don't handle scalarization with a cast that isn't in the same 554 // direction as the vector cast. This could be handled, but it would 555 // require more intermediate unmerges. 556 if (ConvertOp == TargetOpcode::G_TRUNC) 557 return DestTy.getSizeInBits() <= OpEltSize; 558 return DestTy.getSizeInBits() >= OpEltSize; 559 } 560 } 561 } 562 563 /// Try to replace DstReg with SrcReg or build a COPY instruction 564 /// depending on the register constraints. replaceRegOrBuildCopy(Register DstReg,Register SrcReg,MachineRegisterInfo & MRI,MachineIRBuilder & Builder,SmallVectorImpl<Register> & UpdatedDefs,GISelChangeObserver & Observer)565 static void replaceRegOrBuildCopy(Register DstReg, Register SrcReg, 566 MachineRegisterInfo &MRI, 567 MachineIRBuilder &Builder, 568 SmallVectorImpl<Register> &UpdatedDefs, 569 GISelChangeObserver &Observer) { 570 if (!llvm::canReplaceReg(DstReg, SrcReg, MRI)) { 571 Builder.buildCopy(DstReg, SrcReg); 572 UpdatedDefs.push_back(DstReg); 573 return; 574 } 575 SmallVector<MachineInstr *, 4> UseMIs; 576 // Get the users and notify the observer before replacing. 577 for (auto &UseMI : MRI.use_instructions(DstReg)) { 578 UseMIs.push_back(&UseMI); 579 Observer.changingInstr(UseMI); 580 } 581 // Replace the registers. 582 MRI.replaceRegWith(DstReg, SrcReg); 583 UpdatedDefs.push_back(SrcReg); 584 // Notify the observer that we changed the instructions. 585 for (auto *UseMI : UseMIs) 586 Observer.changedInstr(*UseMI); 587 } 588 589 /// Return the operand index in \p MI that defines \p Def getDefIndex(const MachineInstr & MI,Register SearchDef)590 static unsigned getDefIndex(const MachineInstr &MI, Register SearchDef) { 591 unsigned DefIdx = 0; 592 for (const MachineOperand &Def : MI.defs()) { 593 if (Def.getReg() == SearchDef) 594 break; 595 ++DefIdx; 596 } 597 598 return DefIdx; 599 } 600 601 /// This class provides utilities for finding source registers of specific 602 /// bit ranges in an artifact. The routines can look through the source 603 /// registers if they're other artifacts to try to find a non-artifact source 604 /// of a value. 605 class ArtifactValueFinder { 606 MachineRegisterInfo &MRI; 607 MachineIRBuilder &MIB; 608 const LegalizerInfo &LI; 609 610 // Stores the best register found in the current query so far. 611 Register CurrentBest = Register(); 612 613 /// Given an concat_vector op \p Concat and a start bit and size, try to 614 /// find the origin of the value defined by that start position and size. 615 /// 616 /// \returns a register with the requested size, or the current best 617 /// register found during the current query. findValueFromConcat(GConcatVectors & Concat,unsigned StartBit,unsigned Size)618 Register findValueFromConcat(GConcatVectors &Concat, unsigned StartBit, 619 unsigned Size) { 620 assert(Size > 0); 621 622 // Find the source operand that provides the bits requested. 623 Register Src1Reg = Concat.getSourceReg(0); 624 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 625 626 // Operand index of the source that provides the start of the bit range. 627 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 628 // Offset into the source at which the bit range starts. 629 unsigned InRegOffset = StartBit % SrcSize; 630 // Check that the bits don't span multiple sources. 631 // FIXME: we might be able return multiple sources? Or create an 632 // appropriate concat to make it fit. 633 if (InRegOffset + Size > SrcSize) 634 return CurrentBest; 635 636 Register SrcReg = Concat.getReg(StartSrcIdx); 637 if (InRegOffset == 0 && Size == SrcSize) { 638 CurrentBest = SrcReg; 639 return findValueFromDefImpl(SrcReg, 0, Size); 640 } 641 642 return findValueFromDefImpl(SrcReg, InRegOffset, Size); 643 } 644 645 /// Given an build_vector op \p BV and a start bit and size, try to find 646 /// the origin of the value defined by that start position and size. 647 /// 648 /// \returns a register with the requested size, or the current best 649 /// register found during the current query. findValueFromBuildVector(GBuildVector & BV,unsigned StartBit,unsigned Size)650 Register findValueFromBuildVector(GBuildVector &BV, unsigned StartBit, 651 unsigned Size) { 652 assert(Size > 0); 653 654 // Find the source operand that provides the bits requested. 655 Register Src1Reg = BV.getSourceReg(0); 656 unsigned SrcSize = MRI.getType(Src1Reg).getSizeInBits(); 657 658 // Operand index of the source that provides the start of the bit range. 659 unsigned StartSrcIdx = (StartBit / SrcSize) + 1; 660 // Offset into the source at which the bit range starts. 661 unsigned InRegOffset = StartBit % SrcSize; 662 663 if (InRegOffset != 0) 664 return CurrentBest; // Give up, bits don't start at a scalar source. 665 if (Size < SrcSize) 666 return CurrentBest; // Scalar source is too large for requested bits. 667 668 // If the bits cover multiple sources evenly, then create a new 669 // build_vector to synthesize the required size, if that's been requested. 670 if (Size > SrcSize) { 671 if (Size % SrcSize > 0) 672 return CurrentBest; // Isn't covered exactly by sources. 673 674 unsigned NumSrcsUsed = Size / SrcSize; 675 // If we're requesting all of the sources, just return this def. 676 if (NumSrcsUsed == BV.getNumSources()) 677 return BV.getReg(0); 678 679 LLT SrcTy = MRI.getType(Src1Reg); 680 LLT NewBVTy = LLT::fixed_vector(NumSrcsUsed, SrcTy); 681 682 // Check if the resulting build vector would be legal. 683 LegalizeActionStep ActionStep = 684 LI.getAction({TargetOpcode::G_BUILD_VECTOR, {NewBVTy, SrcTy}}); 685 if (ActionStep.Action != LegalizeActions::Legal) 686 return CurrentBest; 687 688 SmallVector<Register> NewSrcs; 689 for (unsigned SrcIdx = StartSrcIdx; SrcIdx < StartSrcIdx + NumSrcsUsed; 690 ++SrcIdx) 691 NewSrcs.push_back(BV.getReg(SrcIdx)); 692 MIB.setInstrAndDebugLoc(BV); 693 return MIB.buildBuildVector(NewBVTy, NewSrcs).getReg(0); 694 } 695 // A single source is requested, just return it. 696 return BV.getReg(StartSrcIdx); 697 } 698 699 /// Given an G_INSERT op \p MI and a start bit and size, try to find 700 /// the origin of the value defined by that start position and size. 701 /// 702 /// \returns a register with the requested size, or the current best 703 /// register found during the current query. findValueFromInsert(MachineInstr & MI,unsigned StartBit,unsigned Size)704 Register findValueFromInsert(MachineInstr &MI, unsigned StartBit, 705 unsigned Size) { 706 assert(MI.getOpcode() == TargetOpcode::G_INSERT); 707 assert(Size > 0); 708 709 Register ContainerSrcReg = MI.getOperand(1).getReg(); 710 Register InsertedReg = MI.getOperand(2).getReg(); 711 LLT InsertedRegTy = MRI.getType(InsertedReg); 712 unsigned InsertOffset = MI.getOperand(3).getImm(); 713 714 // There are 4 possible container/insertreg + requested bit-range layouts 715 // that the instruction and query could be representing. 716 // For: %_ = G_INSERT %CONTAINER, %INS, InsOff (abbrev. to 'IO') 717 // and a start bit 'SB', with size S, giving an end bit 'EB', we could 718 // have... 719 // Scenario A: 720 // -------------------------- 721 // | INS | CONTAINER | 722 // -------------------------- 723 // | | 724 // SB EB 725 // 726 // Scenario B: 727 // -------------------------- 728 // | INS | CONTAINER | 729 // -------------------------- 730 // | | 731 // SB EB 732 // 733 // Scenario C: 734 // -------------------------- 735 // | CONTAINER | INS | 736 // -------------------------- 737 // | | 738 // SB EB 739 // 740 // Scenario D: 741 // -------------------------- 742 // | CONTAINER | INS | 743 // -------------------------- 744 // | | 745 // SB EB 746 // 747 // So therefore, A and D are requesting data from the INS operand, while 748 // B and C are requesting from the container operand. 749 750 unsigned InsertedEndBit = InsertOffset + InsertedRegTy.getSizeInBits(); 751 unsigned EndBit = StartBit + Size; 752 unsigned NewStartBit; 753 Register SrcRegToUse; 754 if (EndBit <= InsertOffset || InsertedEndBit <= StartBit) { 755 SrcRegToUse = ContainerSrcReg; 756 NewStartBit = StartBit; 757 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 758 } 759 if (InsertOffset <= StartBit && EndBit <= InsertedEndBit) { 760 SrcRegToUse = InsertedReg; 761 NewStartBit = StartBit - InsertOffset; 762 if (NewStartBit == 0 && 763 Size == MRI.getType(SrcRegToUse).getSizeInBits()) 764 CurrentBest = SrcRegToUse; 765 return findValueFromDefImpl(SrcRegToUse, NewStartBit, Size); 766 } 767 // The bit range spans both the inserted and container regions. 768 return Register(); 769 } 770 771 /// Given an G_SEXT, G_ZEXT, G_ANYEXT op \p MI and a start bit and 772 /// size, try to find the origin of the value defined by that start 773 /// position and size. 774 /// 775 /// \returns a register with the requested size, or the current best 776 /// register found during the current query. findValueFromExt(MachineInstr & MI,unsigned StartBit,unsigned Size)777 Register findValueFromExt(MachineInstr &MI, unsigned StartBit, 778 unsigned Size) { 779 assert(MI.getOpcode() == TargetOpcode::G_SEXT || 780 MI.getOpcode() == TargetOpcode::G_ZEXT || 781 MI.getOpcode() == TargetOpcode::G_ANYEXT); 782 assert(Size > 0); 783 784 Register SrcReg = MI.getOperand(1).getReg(); 785 LLT SrcType = MRI.getType(SrcReg); 786 unsigned SrcSize = SrcType.getSizeInBits(); 787 788 // Currently we don't go into vectors. 789 if (!SrcType.isScalar()) 790 return CurrentBest; 791 792 if (StartBit + Size > SrcSize) 793 return CurrentBest; 794 795 if (StartBit == 0 && SrcType.getSizeInBits() == Size) 796 CurrentBest = SrcReg; 797 return findValueFromDefImpl(SrcReg, StartBit, Size); 798 } 799 800 /// Given an G_TRUNC op \p MI and a start bit and size, try to find 801 /// the origin of the value defined by that start position and size. 802 /// 803 /// \returns a register with the requested size, or the current best 804 /// register found during the current query. findValueFromTrunc(MachineInstr & MI,unsigned StartBit,unsigned Size)805 Register findValueFromTrunc(MachineInstr &MI, unsigned StartBit, 806 unsigned Size) { 807 assert(MI.getOpcode() == TargetOpcode::G_TRUNC); 808 assert(Size > 0); 809 810 Register SrcReg = MI.getOperand(1).getReg(); 811 LLT SrcType = MRI.getType(SrcReg); 812 813 // Currently we don't go into vectors. 814 if (!SrcType.isScalar()) 815 return CurrentBest; 816 817 return findValueFromDefImpl(SrcReg, StartBit, Size); 818 } 819 820 /// Internal implementation for findValueFromDef(). findValueFromDef() 821 /// initializes some data like the CurrentBest register, which this method 822 /// and its callees rely upon. findValueFromDefImpl(Register DefReg,unsigned StartBit,unsigned Size)823 Register findValueFromDefImpl(Register DefReg, unsigned StartBit, 824 unsigned Size) { 825 std::optional<DefinitionAndSourceRegister> DefSrcReg = 826 getDefSrcRegIgnoringCopies(DefReg, MRI); 827 MachineInstr *Def = DefSrcReg->MI; 828 DefReg = DefSrcReg->Reg; 829 // If the instruction has a single def, then simply delegate the search. 830 // For unmerge however with multiple defs, we need to compute the offset 831 // into the source of the unmerge. 832 switch (Def->getOpcode()) { 833 case TargetOpcode::G_CONCAT_VECTORS: 834 return findValueFromConcat(cast<GConcatVectors>(*Def), StartBit, Size); 835 case TargetOpcode::G_UNMERGE_VALUES: { 836 unsigned DefStartBit = 0; 837 unsigned DefSize = MRI.getType(DefReg).getSizeInBits(); 838 for (const auto &MO : Def->defs()) { 839 if (MO.getReg() == DefReg) 840 break; 841 DefStartBit += DefSize; 842 } 843 Register SrcReg = Def->getOperand(Def->getNumOperands() - 1).getReg(); 844 Register SrcOriginReg = 845 findValueFromDefImpl(SrcReg, StartBit + DefStartBit, Size); 846 if (SrcOriginReg) 847 return SrcOriginReg; 848 // Failed to find a further value. If the StartBit and Size perfectly 849 // covered the requested DefReg, return that since it's better than 850 // nothing. 851 if (StartBit == 0 && Size == DefSize) 852 return DefReg; 853 return CurrentBest; 854 } 855 case TargetOpcode::G_BUILD_VECTOR: 856 return findValueFromBuildVector(cast<GBuildVector>(*Def), StartBit, 857 Size); 858 case TargetOpcode::G_INSERT: 859 return findValueFromInsert(*Def, StartBit, Size); 860 case TargetOpcode::G_TRUNC: 861 return findValueFromTrunc(*Def, StartBit, Size); 862 case TargetOpcode::G_SEXT: 863 case TargetOpcode::G_ZEXT: 864 case TargetOpcode::G_ANYEXT: 865 return findValueFromExt(*Def, StartBit, Size); 866 default: 867 return CurrentBest; 868 } 869 } 870 871 public: ArtifactValueFinder(MachineRegisterInfo & Mri,MachineIRBuilder & Builder,const LegalizerInfo & Info)872 ArtifactValueFinder(MachineRegisterInfo &Mri, MachineIRBuilder &Builder, 873 const LegalizerInfo &Info) 874 : MRI(Mri), MIB(Builder), LI(Info) {} 875 876 /// Try to find a source of the value defined in the def \p DefReg, starting 877 /// at position \p StartBit with size \p Size. 878 /// \returns a register with the requested size, or an empty Register if no 879 /// better value could be found. findValueFromDef(Register DefReg,unsigned StartBit,unsigned Size)880 Register findValueFromDef(Register DefReg, unsigned StartBit, 881 unsigned Size) { 882 CurrentBest = Register(); 883 Register FoundReg = findValueFromDefImpl(DefReg, StartBit, Size); 884 return FoundReg != DefReg ? FoundReg : Register(); 885 } 886 887 /// Try to combine the defs of an unmerge \p MI by attempting to find 888 /// values that provides the bits for each def reg. 889 /// \returns true if all the defs of the unmerge have been made dead. tryCombineUnmergeDefs(GUnmerge & MI,GISelChangeObserver & Observer,SmallVectorImpl<Register> & UpdatedDefs)890 bool tryCombineUnmergeDefs(GUnmerge &MI, GISelChangeObserver &Observer, 891 SmallVectorImpl<Register> &UpdatedDefs) { 892 unsigned NumDefs = MI.getNumDefs(); 893 LLT DestTy = MRI.getType(MI.getReg(0)); 894 895 SmallBitVector DeadDefs(NumDefs); 896 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 897 Register DefReg = MI.getReg(DefIdx); 898 if (MRI.use_nodbg_empty(DefReg)) { 899 DeadDefs[DefIdx] = true; 900 continue; 901 } 902 Register FoundVal = findValueFromDef(DefReg, 0, DestTy.getSizeInBits()); 903 if (!FoundVal) 904 continue; 905 if (MRI.getType(FoundVal) != DestTy) 906 continue; 907 908 replaceRegOrBuildCopy(DefReg, FoundVal, MRI, MIB, UpdatedDefs, 909 Observer); 910 // We only want to replace the uses, not the def of the old reg. 911 Observer.changingInstr(MI); 912 MI.getOperand(DefIdx).setReg(DefReg); 913 Observer.changedInstr(MI); 914 DeadDefs[DefIdx] = true; 915 } 916 return DeadDefs.all(); 917 } 918 findUnmergeThatDefinesReg(Register Reg,unsigned Size,unsigned & DefOperandIdx)919 GUnmerge *findUnmergeThatDefinesReg(Register Reg, unsigned Size, 920 unsigned &DefOperandIdx) { 921 if (Register Def = findValueFromDefImpl(Reg, 0, Size)) { 922 if (auto *Unmerge = dyn_cast<GUnmerge>(MRI.getVRegDef(Def))) { 923 DefOperandIdx = 924 Unmerge->findRegisterDefOperandIdx(Def, /*TRI=*/nullptr); 925 return Unmerge; 926 } 927 } 928 return nullptr; 929 } 930 931 // Check if sequence of elements from merge-like instruction is defined by 932 // another sequence of elements defined by unmerge. Most often this is the 933 // same sequence. Search for elements using findValueFromDefImpl. isSequenceFromUnmerge(GMergeLikeInstr & MI,unsigned MergeStartIdx,GUnmerge * Unmerge,unsigned UnmergeIdxStart,unsigned NumElts,unsigned EltSize,bool AllowUndef)934 bool isSequenceFromUnmerge(GMergeLikeInstr &MI, unsigned MergeStartIdx, 935 GUnmerge *Unmerge, unsigned UnmergeIdxStart, 936 unsigned NumElts, unsigned EltSize, 937 bool AllowUndef) { 938 assert(MergeStartIdx + NumElts <= MI.getNumSources()); 939 for (unsigned i = MergeStartIdx; i < MergeStartIdx + NumElts; ++i) { 940 unsigned EltUnmergeIdx; 941 GUnmerge *EltUnmerge = findUnmergeThatDefinesReg( 942 MI.getSourceReg(i), EltSize, EltUnmergeIdx); 943 // Check if source i comes from the same Unmerge. 944 if (EltUnmerge == Unmerge) { 945 // Check that source i's def has same index in sequence in Unmerge. 946 if (i - MergeStartIdx != EltUnmergeIdx - UnmergeIdxStart) 947 return false; 948 } else if (!AllowUndef || 949 MRI.getVRegDef(MI.getSourceReg(i))->getOpcode() != 950 TargetOpcode::G_IMPLICIT_DEF) 951 return false; 952 } 953 return true; 954 } 955 tryCombineMergeLike(GMergeLikeInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelChangeObserver & Observer)956 bool tryCombineMergeLike(GMergeLikeInstr &MI, 957 SmallVectorImpl<MachineInstr *> &DeadInsts, 958 SmallVectorImpl<Register> &UpdatedDefs, 959 GISelChangeObserver &Observer) { 960 Register Elt0 = MI.getSourceReg(0); 961 LLT EltTy = MRI.getType(Elt0); 962 unsigned EltSize = EltTy.getSizeInBits(); 963 964 unsigned Elt0UnmergeIdx; 965 // Search for unmerge that will be candidate for combine. 966 auto *Unmerge = findUnmergeThatDefinesReg(Elt0, EltSize, Elt0UnmergeIdx); 967 if (!Unmerge) 968 return false; 969 970 unsigned NumMIElts = MI.getNumSources(); 971 Register Dst = MI.getReg(0); 972 LLT DstTy = MRI.getType(Dst); 973 Register UnmergeSrc = Unmerge->getSourceReg(); 974 LLT UnmergeSrcTy = MRI.getType(UnmergeSrc); 975 976 // Recognize copy of UnmergeSrc to Dst. 977 // Unmerge UnmergeSrc and reassemble it using merge-like opcode into Dst. 978 // 979 // %0:_(EltTy), %1, ... = G_UNMERGE_VALUES %UnmergeSrc:_(Ty) 980 // %Dst:_(Ty) = G_merge_like_opcode %0:_(EltTy), %1, ... 981 // 982 // %Dst:_(Ty) = COPY %UnmergeSrc:_(Ty) 983 if ((DstTy == UnmergeSrcTy) && (Elt0UnmergeIdx == 0)) { 984 if (!isSequenceFromUnmerge(MI, 0, Unmerge, 0, NumMIElts, EltSize, 985 /*AllowUndef=*/DstTy.isVector())) 986 return false; 987 988 replaceRegOrBuildCopy(Dst, UnmergeSrc, MRI, MIB, UpdatedDefs, Observer); 989 DeadInsts.push_back(&MI); 990 return true; 991 } 992 993 // Recognize UnmergeSrc that can be unmerged to DstTy directly. 994 // Types have to be either both vector or both non-vector types. 995 // Merge-like opcodes are combined one at the time. First one creates new 996 // unmerge, following should use the same unmerge (builder performs CSE). 997 // 998 // %0:_(EltTy), %1, %2, %3 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) 999 // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1 1000 // %AnotherDst:_(DstTy) = G_merge_like_opcode %2:_(EltTy), %3 1001 // 1002 // %Dst:_(DstTy), %AnotherDst = G_UNMERGE_VALUES %UnmergeSrc 1003 if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && 1004 (Elt0UnmergeIdx % NumMIElts == 0) && 1005 getCoverTy(UnmergeSrcTy, DstTy) == UnmergeSrcTy) { 1006 if (!isSequenceFromUnmerge(MI, 0, Unmerge, Elt0UnmergeIdx, NumMIElts, 1007 EltSize, false)) 1008 return false; 1009 MIB.setInstrAndDebugLoc(MI); 1010 auto NewUnmerge = MIB.buildUnmerge(DstTy, Unmerge->getSourceReg()); 1011 unsigned DstIdx = (Elt0UnmergeIdx * EltSize) / DstTy.getSizeInBits(); 1012 replaceRegOrBuildCopy(Dst, NewUnmerge.getReg(DstIdx), MRI, MIB, 1013 UpdatedDefs, Observer); 1014 DeadInsts.push_back(&MI); 1015 return true; 1016 } 1017 1018 // Recognize when multiple unmerged sources with UnmergeSrcTy type 1019 // can be merged into Dst with DstTy type directly. 1020 // Types have to be either both vector or both non-vector types. 1021 1022 // %0:_(EltTy), %1 = G_UNMERGE_VALUES %UnmergeSrc:_(UnmergeSrcTy) 1023 // %2:_(EltTy), %3 = G_UNMERGE_VALUES %AnotherUnmergeSrc:_(UnmergeSrcTy) 1024 // %Dst:_(DstTy) = G_merge_like_opcode %0:_(EltTy), %1, %2, %3 1025 // 1026 // %Dst:_(DstTy) = G_merge_like_opcode %UnmergeSrc, %AnotherUnmergeSrc 1027 1028 if ((DstTy.isVector() == UnmergeSrcTy.isVector()) && 1029 getCoverTy(DstTy, UnmergeSrcTy) == DstTy) { 1030 SmallVector<Register, 4> ConcatSources; 1031 unsigned NumElts = Unmerge->getNumDefs(); 1032 for (unsigned i = 0; i < MI.getNumSources(); i += NumElts) { 1033 unsigned EltUnmergeIdx; 1034 auto *UnmergeI = findUnmergeThatDefinesReg(MI.getSourceReg(i), 1035 EltSize, EltUnmergeIdx); 1036 // All unmerges have to be the same size. 1037 if ((!UnmergeI) || (UnmergeI->getNumDefs() != NumElts) || 1038 (EltUnmergeIdx != 0)) 1039 return false; 1040 if (!isSequenceFromUnmerge(MI, i, UnmergeI, 0, NumElts, EltSize, 1041 false)) 1042 return false; 1043 ConcatSources.push_back(UnmergeI->getSourceReg()); 1044 } 1045 1046 MIB.setInstrAndDebugLoc(MI); 1047 MIB.buildMergeLikeInstr(Dst, ConcatSources); 1048 DeadInsts.push_back(&MI); 1049 return true; 1050 } 1051 1052 return false; 1053 } 1054 }; 1055 tryCombineUnmergeValues(GUnmerge & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs,GISelChangeObserver & Observer)1056 bool tryCombineUnmergeValues(GUnmerge &MI, 1057 SmallVectorImpl<MachineInstr *> &DeadInsts, 1058 SmallVectorImpl<Register> &UpdatedDefs, 1059 GISelChangeObserver &Observer) { 1060 unsigned NumDefs = MI.getNumDefs(); 1061 Register SrcReg = MI.getSourceReg(); 1062 MachineInstr *SrcDef = getDefIgnoringCopies(SrcReg, MRI); 1063 if (!SrcDef) 1064 return false; 1065 1066 LLT OpTy = MRI.getType(SrcReg); 1067 LLT DestTy = MRI.getType(MI.getReg(0)); 1068 unsigned SrcDefIdx = getDefIndex(*SrcDef, SrcReg); 1069 1070 Builder.setInstrAndDebugLoc(MI); 1071 1072 ArtifactValueFinder Finder(MRI, Builder, LI); 1073 if (Finder.tryCombineUnmergeDefs(MI, Observer, UpdatedDefs)) { 1074 markInstAndDefDead(MI, *SrcDef, DeadInsts, SrcDefIdx); 1075 return true; 1076 } 1077 1078 if (auto *SrcUnmerge = dyn_cast<GUnmerge>(SrcDef)) { 1079 // %0:_(<4 x s16>) = G_FOO 1080 // %1:_(<2 x s16>), %2:_(<2 x s16>) = G_UNMERGE_VALUES %0 1081 // %3:_(s16), %4:_(s16) = G_UNMERGE_VALUES %1 1082 // 1083 // %3:_(s16), %4:_(s16), %5:_(s16), %6:_(s16) = G_UNMERGE_VALUES %0 1084 Register SrcUnmergeSrc = SrcUnmerge->getSourceReg(); 1085 LLT SrcUnmergeSrcTy = MRI.getType(SrcUnmergeSrc); 1086 1087 // If we need to decrease the number of vector elements in the result type 1088 // of an unmerge, this would involve the creation of an equivalent unmerge 1089 // to copy back to the original result registers. 1090 LegalizeActionStep ActionStep = LI.getAction( 1091 {TargetOpcode::G_UNMERGE_VALUES, {OpTy, SrcUnmergeSrcTy}}); 1092 switch (ActionStep.Action) { 1093 case LegalizeActions::Lower: 1094 case LegalizeActions::Unsupported: 1095 break; 1096 case LegalizeActions::FewerElements: 1097 case LegalizeActions::NarrowScalar: 1098 if (ActionStep.TypeIdx == 1) 1099 return false; 1100 break; 1101 default: 1102 return false; 1103 } 1104 1105 auto NewUnmerge = Builder.buildUnmerge(DestTy, SrcUnmergeSrc); 1106 1107 // TODO: Should we try to process out the other defs now? If the other 1108 // defs of the source unmerge are also unmerged, we end up with a separate 1109 // unmerge for each one. 1110 for (unsigned I = 0; I != NumDefs; ++I) { 1111 Register Def = MI.getReg(I); 1112 replaceRegOrBuildCopy(Def, NewUnmerge.getReg(SrcDefIdx * NumDefs + I), 1113 MRI, Builder, UpdatedDefs, Observer); 1114 } 1115 1116 markInstAndDefDead(MI, *SrcUnmerge, DeadInsts, SrcDefIdx); 1117 return true; 1118 } 1119 1120 MachineInstr *MergeI = SrcDef; 1121 unsigned ConvertOp = 0; 1122 1123 // Handle intermediate conversions 1124 unsigned SrcOp = SrcDef->getOpcode(); 1125 if (isArtifactCast(SrcOp)) { 1126 ConvertOp = SrcOp; 1127 MergeI = getDefIgnoringCopies(SrcDef->getOperand(1).getReg(), MRI); 1128 } 1129 1130 if (!MergeI || !canFoldMergeOpcode(MergeI->getOpcode(), 1131 ConvertOp, OpTy, DestTy)) { 1132 // We might have a chance to combine later by trying to combine 1133 // unmerge(cast) first 1134 return tryFoldUnmergeCast(MI, *SrcDef, DeadInsts, UpdatedDefs); 1135 } 1136 1137 const unsigned NumMergeRegs = MergeI->getNumOperands() - 1; 1138 1139 if (NumMergeRegs < NumDefs) { 1140 if (NumDefs % NumMergeRegs != 0) 1141 return false; 1142 1143 Builder.setInstr(MI); 1144 // Transform to UNMERGEs, for example 1145 // %1 = G_MERGE_VALUES %4, %5 1146 // %9, %10, %11, %12 = G_UNMERGE_VALUES %1 1147 // to 1148 // %9, %10 = G_UNMERGE_VALUES %4 1149 // %11, %12 = G_UNMERGE_VALUES %5 1150 1151 const unsigned NewNumDefs = NumDefs / NumMergeRegs; 1152 for (unsigned Idx = 0; Idx < NumMergeRegs; ++Idx) { 1153 SmallVector<Register, 8> DstRegs; 1154 for (unsigned j = 0, DefIdx = Idx * NewNumDefs; j < NewNumDefs; 1155 ++j, ++DefIdx) 1156 DstRegs.push_back(MI.getReg(DefIdx)); 1157 1158 if (ConvertOp) { 1159 LLT MergeDstTy = MRI.getType(SrcDef->getOperand(0).getReg()); 1160 1161 // This is a vector that is being split and casted. Extract to the 1162 // element type, and do the conversion on the scalars (or smaller 1163 // vectors). 1164 LLT MergeEltTy = MergeDstTy.divide(NumMergeRegs); 1165 1166 // Handle split to smaller vectors, with conversions. 1167 // %2(<8 x s8>) = G_CONCAT_VECTORS %0(<4 x s8>), %1(<4 x s8>) 1168 // %3(<8 x s16>) = G_SEXT %2 1169 // %4(<2 x s16>), %5(<2 x s16>), %6(<2 x s16>), %7(<2 x s16>) = 1170 // G_UNMERGE_VALUES %3 1171 // 1172 // => 1173 // 1174 // %8(<4 x s16>) = G_SEXT %0 1175 // %9(<4 x s16>) = G_SEXT %1 1176 // %4(<2 x s16>), %5(<2 x s16>) = G_UNMERGE_VALUES %8 1177 // %7(<2 x s16>), %7(<2 x s16>) = G_UNMERGE_VALUES %9 1178 1179 Register TmpReg = MRI.createGenericVirtualRegister(MergeEltTy); 1180 Builder.buildInstr(ConvertOp, {TmpReg}, 1181 {MergeI->getOperand(Idx + 1).getReg()}); 1182 Builder.buildUnmerge(DstRegs, TmpReg); 1183 } else { 1184 Builder.buildUnmerge(DstRegs, MergeI->getOperand(Idx + 1).getReg()); 1185 } 1186 UpdatedDefs.append(DstRegs.begin(), DstRegs.end()); 1187 } 1188 1189 } else if (NumMergeRegs > NumDefs) { 1190 if (ConvertOp != 0 || NumMergeRegs % NumDefs != 0) 1191 return false; 1192 1193 Builder.setInstr(MI); 1194 // Transform to MERGEs 1195 // %6 = G_MERGE_VALUES %17, %18, %19, %20 1196 // %7, %8 = G_UNMERGE_VALUES %6 1197 // to 1198 // %7 = G_MERGE_VALUES %17, %18 1199 // %8 = G_MERGE_VALUES %19, %20 1200 1201 const unsigned NumRegs = NumMergeRegs / NumDefs; 1202 for (unsigned DefIdx = 0; DefIdx < NumDefs; ++DefIdx) { 1203 SmallVector<Register, 8> Regs; 1204 for (unsigned j = 0, Idx = NumRegs * DefIdx + 1; j < NumRegs; 1205 ++j, ++Idx) 1206 Regs.push_back(MergeI->getOperand(Idx).getReg()); 1207 1208 Register DefReg = MI.getReg(DefIdx); 1209 Builder.buildMergeLikeInstr(DefReg, Regs); 1210 UpdatedDefs.push_back(DefReg); 1211 } 1212 1213 } else { 1214 LLT MergeSrcTy = MRI.getType(MergeI->getOperand(1).getReg()); 1215 1216 if (!ConvertOp && DestTy != MergeSrcTy) 1217 ConvertOp = TargetOpcode::G_BITCAST; 1218 1219 if (ConvertOp) { 1220 Builder.setInstr(MI); 1221 1222 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1223 Register DefReg = MI.getOperand(Idx).getReg(); 1224 Register MergeSrc = MergeI->getOperand(Idx + 1).getReg(); 1225 1226 if (!MRI.use_empty(DefReg)) { 1227 Builder.buildInstr(ConvertOp, {DefReg}, {MergeSrc}); 1228 UpdatedDefs.push_back(DefReg); 1229 } 1230 } 1231 1232 markInstAndDefDead(MI, *MergeI, DeadInsts); 1233 return true; 1234 } 1235 1236 assert(DestTy == MergeSrcTy && 1237 "Bitcast and the other kinds of conversions should " 1238 "have happened earlier"); 1239 1240 Builder.setInstr(MI); 1241 for (unsigned Idx = 0; Idx < NumDefs; ++Idx) { 1242 Register DstReg = MI.getOperand(Idx).getReg(); 1243 Register SrcReg = MergeI->getOperand(Idx + 1).getReg(); 1244 replaceRegOrBuildCopy(DstReg, SrcReg, MRI, Builder, UpdatedDefs, 1245 Observer); 1246 } 1247 } 1248 1249 markInstAndDefDead(MI, *MergeI, DeadInsts); 1250 return true; 1251 } 1252 tryCombineExtract(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,SmallVectorImpl<Register> & UpdatedDefs)1253 bool tryCombineExtract(MachineInstr &MI, 1254 SmallVectorImpl<MachineInstr *> &DeadInsts, 1255 SmallVectorImpl<Register> &UpdatedDefs) { 1256 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT); 1257 1258 // Try to use the source registers from a G_MERGE_VALUES 1259 // 1260 // %2 = G_MERGE_VALUES %0, %1 1261 // %3 = G_EXTRACT %2, N 1262 // => 1263 // 1264 // for N < %2.getSizeInBits() / 2 1265 // %3 = G_EXTRACT %0, N 1266 // 1267 // for N >= %2.getSizeInBits() / 2 1268 // %3 = G_EXTRACT %1, (N - %0.getSizeInBits() 1269 1270 Register SrcReg = lookThroughCopyInstrs(MI.getOperand(1).getReg()); 1271 MachineInstr *MergeI = MRI.getVRegDef(SrcReg); 1272 if (!MergeI || !isa<GMergeLikeInstr>(MergeI)) 1273 return false; 1274 1275 Register DstReg = MI.getOperand(0).getReg(); 1276 LLT DstTy = MRI.getType(DstReg); 1277 LLT SrcTy = MRI.getType(SrcReg); 1278 1279 // TODO: Do we need to check if the resulting extract is supported? 1280 unsigned ExtractDstSize = DstTy.getSizeInBits(); 1281 unsigned Offset = MI.getOperand(2).getImm(); 1282 unsigned NumMergeSrcs = MergeI->getNumOperands() - 1; 1283 unsigned MergeSrcSize = SrcTy.getSizeInBits() / NumMergeSrcs; 1284 unsigned MergeSrcIdx = Offset / MergeSrcSize; 1285 1286 // Compute the offset of the last bit the extract needs. 1287 unsigned EndMergeSrcIdx = (Offset + ExtractDstSize - 1) / MergeSrcSize; 1288 1289 // Can't handle the case where the extract spans multiple inputs. 1290 if (MergeSrcIdx != EndMergeSrcIdx) 1291 return false; 1292 1293 // TODO: We could modify MI in place in most cases. 1294 Builder.setInstr(MI); 1295 Builder.buildExtract(DstReg, MergeI->getOperand(MergeSrcIdx + 1).getReg(), 1296 Offset - MergeSrcIdx * MergeSrcSize); 1297 UpdatedDefs.push_back(DstReg); 1298 markInstAndDefDead(MI, *MergeI, DeadInsts); 1299 return true; 1300 } 1301 1302 /// Try to combine away MI. 1303 /// Returns true if it combined away the MI. 1304 /// Adds instructions that are dead as a result of the combine 1305 /// into DeadInsts, which can include MI. tryCombineInstruction(MachineInstr & MI,SmallVectorImpl<MachineInstr * > & DeadInsts,GISelObserverWrapper & WrapperObserver)1306 bool tryCombineInstruction(MachineInstr &MI, 1307 SmallVectorImpl<MachineInstr *> &DeadInsts, 1308 GISelObserverWrapper &WrapperObserver) { 1309 ArtifactValueFinder Finder(MRI, Builder, LI); 1310 1311 // This might be a recursive call, and we might have DeadInsts already 1312 // populated. To avoid bad things happening later with multiple vreg defs 1313 // etc, process the dead instructions now if any. 1314 if (!DeadInsts.empty()) 1315 deleteMarkedDeadInsts(DeadInsts, WrapperObserver); 1316 1317 // Put here every vreg that was redefined in such a way that it's at least 1318 // possible that one (or more) of its users (immediate or COPY-separated) 1319 // could become artifact combinable with the new definition (or the 1320 // instruction reachable from it through a chain of copies if any). 1321 SmallVector<Register, 4> UpdatedDefs; 1322 bool Changed = false; 1323 switch (MI.getOpcode()) { 1324 default: 1325 return false; 1326 case TargetOpcode::G_ANYEXT: 1327 Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1328 break; 1329 case TargetOpcode::G_ZEXT: 1330 Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1331 break; 1332 case TargetOpcode::G_SEXT: 1333 Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1334 break; 1335 case TargetOpcode::G_UNMERGE_VALUES: 1336 Changed = tryCombineUnmergeValues(cast<GUnmerge>(MI), DeadInsts, 1337 UpdatedDefs, WrapperObserver); 1338 break; 1339 case TargetOpcode::G_MERGE_VALUES: 1340 case TargetOpcode::G_BUILD_VECTOR: 1341 case TargetOpcode::G_CONCAT_VECTORS: 1342 // If any of the users of this merge are an unmerge, then add them to the 1343 // artifact worklist in case there's folding that can be done looking up. 1344 for (MachineInstr &U : MRI.use_instructions(MI.getOperand(0).getReg())) { 1345 if (U.getOpcode() == TargetOpcode::G_UNMERGE_VALUES || 1346 U.getOpcode() == TargetOpcode::G_TRUNC) { 1347 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1348 break; 1349 } 1350 } 1351 Changed = Finder.tryCombineMergeLike(cast<GMergeLikeInstr>(MI), DeadInsts, 1352 UpdatedDefs, WrapperObserver); 1353 break; 1354 case TargetOpcode::G_EXTRACT: 1355 Changed = tryCombineExtract(MI, DeadInsts, UpdatedDefs); 1356 break; 1357 case TargetOpcode::G_TRUNC: 1358 Changed = tryCombineTrunc(MI, DeadInsts, UpdatedDefs, WrapperObserver); 1359 if (!Changed) { 1360 // Try to combine truncates away even if they are legal. As all artifact 1361 // combines at the moment look only "up" the def-use chains, we achieve 1362 // that by throwing truncates' users (with look through copies) into the 1363 // ArtifactList again. 1364 UpdatedDefs.push_back(MI.getOperand(0).getReg()); 1365 } 1366 break; 1367 } 1368 // If the main loop through the ArtifactList found at least one combinable 1369 // pair of artifacts, not only combine it away (as done above), but also 1370 // follow the def-use chain from there to combine everything that can be 1371 // combined within this def-use chain of artifacts. 1372 while (!UpdatedDefs.empty()) { 1373 Register NewDef = UpdatedDefs.pop_back_val(); 1374 assert(NewDef.isVirtual() && "Unexpected redefinition of a physreg"); 1375 for (MachineInstr &Use : MRI.use_instructions(NewDef)) { 1376 switch (Use.getOpcode()) { 1377 // Keep this list in sync with the list of all artifact combines. 1378 case TargetOpcode::G_ANYEXT: 1379 case TargetOpcode::G_ZEXT: 1380 case TargetOpcode::G_SEXT: 1381 case TargetOpcode::G_UNMERGE_VALUES: 1382 case TargetOpcode::G_EXTRACT: 1383 case TargetOpcode::G_TRUNC: 1384 case TargetOpcode::G_BUILD_VECTOR: 1385 // Adding Use to ArtifactList. 1386 WrapperObserver.changedInstr(Use); 1387 break; 1388 case TargetOpcode::G_ASSERT_SEXT: 1389 case TargetOpcode::G_ASSERT_ZEXT: 1390 case TargetOpcode::G_ASSERT_ALIGN: 1391 case TargetOpcode::COPY: { 1392 Register Copy = Use.getOperand(0).getReg(); 1393 if (Copy.isVirtual()) 1394 UpdatedDefs.push_back(Copy); 1395 break; 1396 } 1397 default: 1398 // If we do not have an artifact combine for the opcode, there is no 1399 // point in adding it to the ArtifactList as nothing interesting will 1400 // be done to it anyway. 1401 break; 1402 } 1403 } 1404 } 1405 return Changed; 1406 } 1407 1408 private: getArtifactSrcReg(const MachineInstr & MI)1409 static Register getArtifactSrcReg(const MachineInstr &MI) { 1410 switch (MI.getOpcode()) { 1411 case TargetOpcode::COPY: 1412 case TargetOpcode::G_TRUNC: 1413 case TargetOpcode::G_ZEXT: 1414 case TargetOpcode::G_ANYEXT: 1415 case TargetOpcode::G_SEXT: 1416 case TargetOpcode::G_EXTRACT: 1417 case TargetOpcode::G_ASSERT_SEXT: 1418 case TargetOpcode::G_ASSERT_ZEXT: 1419 case TargetOpcode::G_ASSERT_ALIGN: 1420 return MI.getOperand(1).getReg(); 1421 case TargetOpcode::G_UNMERGE_VALUES: 1422 return MI.getOperand(MI.getNumOperands() - 1).getReg(); 1423 default: 1424 llvm_unreachable("Not a legalization artifact happen"); 1425 } 1426 } 1427 1428 /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI 1429 /// (either by killing it or changing operands) results in DefMI being dead 1430 /// too. In-between COPYs or artifact-casts are also collected if they are 1431 /// dead. 1432 /// MI is not marked dead. 1433 void markDefDead(MachineInstr &MI, MachineInstr &DefMI, 1434 SmallVectorImpl<MachineInstr *> &DeadInsts, 1435 unsigned DefIdx = 0) { 1436 // Collect all the copy instructions that are made dead, due to deleting 1437 // this instruction. Collect all of them until the Trunc(DefMI). 1438 // Eg, 1439 // %1(s1) = G_TRUNC %0(s32) 1440 // %2(s1) = COPY %1(s1) 1441 // %3(s1) = COPY %2(s1) 1442 // %4(s32) = G_ANYEXT %3(s1) 1443 // In this case, we would have replaced %4 with a copy of %0, 1444 // and as a result, %3, %2, %1 are dead. 1445 MachineInstr *PrevMI = &MI; 1446 while (PrevMI != &DefMI) { 1447 Register PrevRegSrc = getArtifactSrcReg(*PrevMI); 1448 1449 MachineInstr *TmpDef = MRI.getVRegDef(PrevRegSrc); 1450 if (MRI.hasOneUse(PrevRegSrc)) { 1451 if (TmpDef != &DefMI) { 1452 assert((TmpDef->getOpcode() == TargetOpcode::COPY || 1453 isArtifactCast(TmpDef->getOpcode()) || 1454 isPreISelGenericOptimizationHint(TmpDef->getOpcode())) && 1455 "Expecting copy or artifact cast here"); 1456 1457 DeadInsts.push_back(TmpDef); 1458 } 1459 } else 1460 break; 1461 PrevMI = TmpDef; 1462 } 1463 1464 if (PrevMI == &DefMI) { 1465 unsigned I = 0; 1466 bool IsDead = true; 1467 for (MachineOperand &Def : DefMI.defs()) { 1468 if (I != DefIdx) { 1469 if (!MRI.use_empty(Def.getReg())) { 1470 IsDead = false; 1471 break; 1472 } 1473 } else { 1474 if (!MRI.hasOneUse(DefMI.getOperand(DefIdx).getReg())) 1475 break; 1476 } 1477 1478 ++I; 1479 } 1480 1481 if (IsDead) 1482 DeadInsts.push_back(&DefMI); 1483 } 1484 } 1485 1486 /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be 1487 /// dead due to MI being killed, then mark DefMI as dead too. 1488 /// Some of the combines (extends(trunc)), try to walk through redundant 1489 /// copies in between the extends and the truncs, and this attempts to collect 1490 /// the in between copies if they're dead. 1491 void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI, 1492 SmallVectorImpl<MachineInstr *> &DeadInsts, 1493 unsigned DefIdx = 0) { 1494 DeadInsts.push_back(&MI); 1495 markDefDead(MI, DefMI, DeadInsts, DefIdx); 1496 } 1497 1498 /// Erase the dead instructions in the list and call the observer hooks. 1499 /// Normally the Legalizer will deal with erasing instructions that have been 1500 /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to 1501 /// process instructions which have been marked dead, but otherwise break the 1502 /// MIR by introducing multiple vreg defs. For those cases, allow the combines 1503 /// to explicitly delete the instructions before we run into trouble. deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr * > & DeadInsts,GISelObserverWrapper & WrapperObserver)1504 void deleteMarkedDeadInsts(SmallVectorImpl<MachineInstr *> &DeadInsts, 1505 GISelObserverWrapper &WrapperObserver) { 1506 for (auto *DeadMI : DeadInsts) { 1507 LLVM_DEBUG(dbgs() << *DeadMI << "Is dead, eagerly deleting\n"); 1508 WrapperObserver.erasingInstr(*DeadMI); 1509 DeadMI->eraseFromParent(); 1510 } 1511 DeadInsts.clear(); 1512 } 1513 1514 /// Checks if the target legalizer info has specified anything about the 1515 /// instruction, or if unsupported. isInstUnsupported(const LegalityQuery & Query)1516 bool isInstUnsupported(const LegalityQuery &Query) const { 1517 using namespace LegalizeActions; 1518 auto Step = LI.getAction(Query); 1519 return Step.Action == Unsupported || Step.Action == NotFound; 1520 } 1521 isInstLegal(const LegalityQuery & Query)1522 bool isInstLegal(const LegalityQuery &Query) const { 1523 return LI.getAction(Query).Action == LegalizeActions::Legal; 1524 } 1525 isConstantUnsupported(LLT Ty)1526 bool isConstantUnsupported(LLT Ty) const { 1527 if (!Ty.isVector()) 1528 return isInstUnsupported({TargetOpcode::G_CONSTANT, {Ty}}); 1529 1530 LLT EltTy = Ty.getElementType(); 1531 return isInstUnsupported({TargetOpcode::G_CONSTANT, {EltTy}}) || 1532 isInstUnsupported({TargetOpcode::G_BUILD_VECTOR, {Ty, EltTy}}); 1533 } 1534 1535 /// Looks through copy instructions and returns the actual 1536 /// source register. lookThroughCopyInstrs(Register Reg)1537 Register lookThroughCopyInstrs(Register Reg) { 1538 Register TmpReg = getSrcRegIgnoringCopies(Reg, MRI); 1539 return TmpReg.isValid() ? TmpReg : Reg; 1540 } 1541 }; 1542 1543 } // namespace llvm 1544 1545 #endif // LLVM_CODEGEN_GLOBALISEL_LEGALIZATIONARTIFACTCOMBINER_H 1546