1 //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Post-legalization lowering for instructions. 11 /// 12 /// This is used to offload pattern matching from the selector. 13 /// 14 /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually 15 /// a G_ZIP, G_UZP, etc. 16 /// 17 /// General optimization combines should be handled by either the 18 /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #include "AArch64GlobalISelUtils.h" 23 #include "AArch64Subtarget.h" 24 #include "AArch64TargetMachine.h" 25 #include "GISel/AArch64LegalizerInfo.h" 26 #include "MCTargetDesc/AArch64MCTargetDesc.h" 27 #include "TargetInfo/AArch64TargetInfo.h" 28 #include "Utils/AArch64BaseInfo.h" 29 #include "llvm/CodeGen/GlobalISel/Combiner.h" 30 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 31 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 32 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 33 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 34 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 35 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 36 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 37 #include "llvm/CodeGen/GlobalISel/Utils.h" 38 #include "llvm/CodeGen/MachineFunctionPass.h" 39 #include "llvm/CodeGen/MachineInstrBuilder.h" 40 #include "llvm/CodeGen/MachineRegisterInfo.h" 41 #include "llvm/CodeGen/TargetOpcodes.h" 42 #include "llvm/CodeGen/TargetPassConfig.h" 43 #include "llvm/IR/InstrTypes.h" 44 #include "llvm/InitializePasses.h" 45 #include "llvm/Support/Debug.h" 46 #include "llvm/Support/ErrorHandling.h" 47 #include <optional> 48 49 #define DEBUG_TYPE "aarch64-postlegalizer-lowering" 50 51 using namespace llvm; 52 using namespace MIPatternMatch; 53 using namespace AArch64GISelUtils; 54 55 /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. 56 /// 57 /// Used for matching target-supported shuffles before codegen. 58 struct ShuffleVectorPseudo { 59 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) 60 Register Dst; ///< Destination register. 61 SmallVector<SrcOp, 2> SrcOps; ///< Source registers. 62 ShuffleVectorPseudo(unsigned Opc, Register Dst, 63 std::initializer_list<SrcOp> SrcOps) 64 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; 65 ShuffleVectorPseudo() = default; 66 }; 67 68 /// Check if a vector shuffle corresponds to a REV instruction with the 69 /// specified blocksize. 70 static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, 71 unsigned BlockSize) { 72 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 73 "Only possible block sizes for REV are: 16, 32, 64"); 74 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); 75 76 unsigned BlockElts = M[0] + 1; 77 78 // If the first shuffle index is UNDEF, be optimistic. 79 if (M[0] < 0) 80 BlockElts = BlockSize / EltSize; 81 82 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) 83 return false; 84 85 for (unsigned i = 0; i < NumElts; ++i) { 86 // Ignore undef indices. 87 if (M[i] < 0) 88 continue; 89 if (static_cast<unsigned>(M[i]) != 90 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) 91 return false; 92 } 93 94 return true; 95 } 96 97 /// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. 98 /// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. 99 static bool isTRNMask(ArrayRef<int> M, unsigned NumElts, 100 unsigned &WhichResult) { 101 if (NumElts % 2 != 0) 102 return false; 103 WhichResult = (M[0] == 0 ? 0 : 1); 104 for (unsigned i = 0; i < NumElts; i += 2) { 105 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) || 106 (M[i + 1] >= 0 && 107 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult)) 108 return false; 109 } 110 return true; 111 } 112 113 /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector 114 /// sources of the shuffle are different. 115 static std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, 116 unsigned NumElts) { 117 // Look for the first non-undef element. 118 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); 119 if (FirstRealElt == M.end()) 120 return std::nullopt; 121 122 // Use APInt to handle overflow when calculating expected element. 123 unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); 124 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); 125 126 // The following shuffle indices must be the successive elements after the 127 // first real element. 128 if (any_of( 129 make_range(std::next(FirstRealElt), M.end()), 130 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) 131 return std::nullopt; 132 133 // The index of an EXT is the first element if it is not UNDEF. 134 // Watch out for the beginning UNDEFs. The EXT index should be the expected 135 // value of the first element. E.g. 136 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. 137 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. 138 // ExpectedElt is the last mask index plus 1. 139 uint64_t Imm = ExpectedElt.getZExtValue(); 140 bool ReverseExt = false; 141 142 // There are two difference cases requiring to reverse input vectors. 143 // For example, for vector <4 x i32> we have the following cases, 144 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) 145 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) 146 // For both cases, we finally use mask <5, 6, 7, 0>, which requires 147 // to reverse two input vectors. 148 if (Imm < NumElts) 149 ReverseExt = true; 150 else 151 Imm -= NumElts; 152 return std::make_pair(ReverseExt, Imm); 153 } 154 155 /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. 156 /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. 157 static bool isUZPMask(ArrayRef<int> M, unsigned NumElts, 158 unsigned &WhichResult) { 159 WhichResult = (M[0] == 0 ? 0 : 1); 160 for (unsigned i = 0; i != NumElts; ++i) { 161 // Skip undef indices. 162 if (M[i] < 0) 163 continue; 164 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult) 165 return false; 166 } 167 return true; 168 } 169 170 /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. 171 /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. 172 static bool isZipMask(ArrayRef<int> M, unsigned NumElts, 173 unsigned &WhichResult) { 174 if (NumElts % 2 != 0) 175 return false; 176 177 // 0 means use ZIP1, 1 means use ZIP2. 178 WhichResult = (M[0] == 0 ? 0 : 1); 179 unsigned Idx = WhichResult * NumElts / 2; 180 for (unsigned i = 0; i != NumElts; i += 2) { 181 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) || 182 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts)) 183 return false; 184 Idx += 1; 185 } 186 return true; 187 } 188 189 /// Helper function for matchINS. 190 /// 191 /// \returns a value when \p M is an ins mask for \p NumInputElements. 192 /// 193 /// First element of the returned pair is true when the produced 194 /// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR. 195 /// 196 /// Second element is the destination lane for the G_INSERT_VECTOR_ELT. 197 static std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M, 198 int NumInputElements) { 199 if (M.size() != static_cast<size_t>(NumInputElements)) 200 return std::nullopt; 201 int NumLHSMatch = 0, NumRHSMatch = 0; 202 int LastLHSMismatch = -1, LastRHSMismatch = -1; 203 for (int Idx = 0; Idx < NumInputElements; ++Idx) { 204 if (M[Idx] == -1) { 205 ++NumLHSMatch; 206 ++NumRHSMatch; 207 continue; 208 } 209 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx; 210 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx; 211 } 212 const int NumNeededToMatch = NumInputElements - 1; 213 if (NumLHSMatch == NumNeededToMatch) 214 return std::make_pair(true, LastLHSMismatch); 215 if (NumRHSMatch == NumNeededToMatch) 216 return std::make_pair(false, LastRHSMismatch); 217 return std::nullopt; 218 } 219 220 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a 221 /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. 222 static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, 223 ShuffleVectorPseudo &MatchInfo) { 224 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 225 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 226 Register Dst = MI.getOperand(0).getReg(); 227 Register Src = MI.getOperand(1).getReg(); 228 LLT Ty = MRI.getType(Dst); 229 unsigned EltSize = Ty.getScalarSizeInBits(); 230 231 // Element size for a rev cannot be 64. 232 if (EltSize == 64) 233 return false; 234 235 unsigned NumElts = Ty.getNumElements(); 236 237 // Try to produce G_REV64 238 if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { 239 MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); 240 return true; 241 } 242 243 // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. 244 // This should be identical to above, but with a constant 32 and constant 245 // 16. 246 return false; 247 } 248 249 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 250 /// a G_TRN1 or G_TRN2 instruction. 251 static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, 252 ShuffleVectorPseudo &MatchInfo) { 253 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 254 unsigned WhichResult; 255 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 256 Register Dst = MI.getOperand(0).getReg(); 257 unsigned NumElts = MRI.getType(Dst).getNumElements(); 258 if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) 259 return false; 260 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; 261 Register V1 = MI.getOperand(1).getReg(); 262 Register V2 = MI.getOperand(2).getReg(); 263 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 264 return true; 265 } 266 267 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 268 /// a G_UZP1 or G_UZP2 instruction. 269 /// 270 /// \param [in] MI - The shuffle vector instruction. 271 /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. 272 static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, 273 ShuffleVectorPseudo &MatchInfo) { 274 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 275 unsigned WhichResult; 276 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 277 Register Dst = MI.getOperand(0).getReg(); 278 unsigned NumElts = MRI.getType(Dst).getNumElements(); 279 if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) 280 return false; 281 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; 282 Register V1 = MI.getOperand(1).getReg(); 283 Register V2 = MI.getOperand(2).getReg(); 284 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 285 return true; 286 } 287 288 static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, 289 ShuffleVectorPseudo &MatchInfo) { 290 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 291 unsigned WhichResult; 292 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 293 Register Dst = MI.getOperand(0).getReg(); 294 unsigned NumElts = MRI.getType(Dst).getNumElements(); 295 if (!isZipMask(ShuffleMask, NumElts, WhichResult)) 296 return false; 297 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; 298 Register V1 = MI.getOperand(1).getReg(); 299 Register V2 = MI.getOperand(2).getReg(); 300 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 301 return true; 302 } 303 304 /// Helper function for matchDup. 305 static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, 306 MachineRegisterInfo &MRI, 307 ShuffleVectorPseudo &MatchInfo) { 308 if (Lane != 0) 309 return false; 310 311 // Try to match a vector splat operation into a dup instruction. 312 // We're looking for this pattern: 313 // 314 // %scalar:gpr(s64) = COPY $x0 315 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 316 // %cst0:gpr(s32) = G_CONSTANT i32 0 317 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 318 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 319 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) 320 // 321 // ...into: 322 // %splat = G_DUP %scalar 323 324 // Begin matching the insert. 325 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, 326 MI.getOperand(1).getReg(), MRI); 327 if (!InsMI) 328 return false; 329 // Match the undef vector operand. 330 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), 331 MRI)) 332 return false; 333 334 // Match the index constant 0. 335 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) 336 return false; 337 338 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), 339 {InsMI->getOperand(2).getReg()}); 340 return true; 341 } 342 343 /// Helper function for matchDup. 344 static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, 345 MachineRegisterInfo &MRI, 346 ShuffleVectorPseudo &MatchInfo) { 347 assert(Lane >= 0 && "Expected positive lane?"); 348 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the 349 // lane's definition directly. 350 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, 351 MI.getOperand(1).getReg(), MRI); 352 if (!BuildVecMI) 353 return false; 354 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); 355 MatchInfo = 356 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); 357 return true; 358 } 359 360 static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, 361 ShuffleVectorPseudo &MatchInfo) { 362 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 363 auto MaybeLane = getSplatIndex(MI); 364 if (!MaybeLane) 365 return false; 366 int Lane = *MaybeLane; 367 // If this is undef splat, generate it via "just" vdup, if possible. 368 if (Lane < 0) 369 Lane = 0; 370 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) 371 return true; 372 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) 373 return true; 374 return false; 375 } 376 377 // Check if an EXT instruction can handle the shuffle mask when the vector 378 // sources of the shuffle are the same. 379 static bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) { 380 unsigned NumElts = Ty.getNumElements(); 381 382 // Assume that the first shuffle index is not UNDEF. Fail if it is. 383 if (M[0] < 0) 384 return false; 385 386 // If this is a VEXT shuffle, the immediate value is the index of the first 387 // element. The other shuffle indices must be the successive elements after 388 // the first one. 389 unsigned ExpectedElt = M[0]; 390 for (unsigned I = 1; I < NumElts; ++I) { 391 // Increment the expected index. If it wraps around, just follow it 392 // back to index zero and keep going. 393 ++ExpectedElt; 394 if (ExpectedElt == NumElts) 395 ExpectedElt = 0; 396 397 if (M[I] < 0) 398 continue; // Ignore UNDEF indices. 399 if (ExpectedElt != static_cast<unsigned>(M[I])) 400 return false; 401 } 402 403 return true; 404 } 405 406 static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, 407 ShuffleVectorPseudo &MatchInfo) { 408 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 409 Register Dst = MI.getOperand(0).getReg(); 410 LLT DstTy = MRI.getType(Dst); 411 Register V1 = MI.getOperand(1).getReg(); 412 Register V2 = MI.getOperand(2).getReg(); 413 auto Mask = MI.getOperand(3).getShuffleMask(); 414 uint64_t Imm; 415 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements()); 416 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; 417 418 if (!ExtInfo) { 419 if (!getOpcodeDef<GImplicitDef>(V2, MRI) || 420 !isSingletonExtMask(Mask, DstTy)) 421 return false; 422 423 Imm = Mask[0] * ExtFactor; 424 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm}); 425 return true; 426 } 427 bool ReverseExt; 428 std::tie(ReverseExt, Imm) = *ExtInfo; 429 if (ReverseExt) 430 std::swap(V1, V2); 431 Imm *= ExtFactor; 432 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); 433 return true; 434 } 435 436 /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. 437 /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. 438 static bool applyShuffleVectorPseudo(MachineInstr &MI, 439 ShuffleVectorPseudo &MatchInfo) { 440 MachineIRBuilder MIRBuilder(MI); 441 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); 442 MI.eraseFromParent(); 443 return true; 444 } 445 446 /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. 447 /// Special-cased because the constant operand must be emitted as a G_CONSTANT 448 /// for the imported tablegen patterns to work. 449 static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { 450 MachineIRBuilder MIRBuilder(MI); 451 // Tablegen patterns expect an i32 G_CONSTANT as the final op. 452 auto Cst = 453 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); 454 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, 455 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); 456 MI.eraseFromParent(); 457 return true; 458 } 459 460 /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a 461 /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair. 462 /// 463 /// e.g. 464 /// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0) 465 /// 466 /// Can be represented as 467 /// 468 /// %extract = G_EXTRACT_VECTOR_ELT %left, 0 469 /// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1 470 /// 471 static bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI, 472 std::tuple<Register, int, Register, int> &MatchInfo) { 473 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 474 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 475 Register Dst = MI.getOperand(0).getReg(); 476 int NumElts = MRI.getType(Dst).getNumElements(); 477 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts); 478 if (!DstIsLeftAndDstLane) 479 return false; 480 bool DstIsLeft; 481 int DstLane; 482 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane; 483 Register Left = MI.getOperand(1).getReg(); 484 Register Right = MI.getOperand(2).getReg(); 485 Register DstVec = DstIsLeft ? Left : Right; 486 Register SrcVec = Left; 487 488 int SrcLane = ShuffleMask[DstLane]; 489 if (SrcLane >= NumElts) { 490 SrcVec = Right; 491 SrcLane -= NumElts; 492 } 493 494 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane); 495 return true; 496 } 497 498 static bool applyINS(MachineInstr &MI, MachineRegisterInfo &MRI, 499 MachineIRBuilder &Builder, 500 std::tuple<Register, int, Register, int> &MatchInfo) { 501 Builder.setInstrAndDebugLoc(MI); 502 Register Dst = MI.getOperand(0).getReg(); 503 auto ScalarTy = MRI.getType(Dst).getElementType(); 504 Register DstVec, SrcVec; 505 int DstLane, SrcLane; 506 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo; 507 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane); 508 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst); 509 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane); 510 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst); 511 MI.eraseFromParent(); 512 return true; 513 } 514 515 /// isVShiftRImm - Check if this is a valid vector for the immediate 516 /// operand of a vector shift right operation. The value must be in the range: 517 /// 1 <= Value <= ElementBits for a right shift. 518 static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, 519 int64_t &Cnt) { 520 assert(Ty.isVector() && "vector shift count is not a vector type"); 521 MachineInstr *MI = MRI.getVRegDef(Reg); 522 auto Cst = getAArch64VectorSplatScalar(*MI, MRI); 523 if (!Cst) 524 return false; 525 Cnt = *Cst; 526 int64_t ElementBits = Ty.getScalarSizeInBits(); 527 return Cnt >= 1 && Cnt <= ElementBits; 528 } 529 530 /// Match a vector G_ASHR or G_LSHR with a valid immediate shift. 531 static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 532 int64_t &Imm) { 533 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 534 MI.getOpcode() == TargetOpcode::G_LSHR); 535 LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 536 if (!Ty.isVector()) 537 return false; 538 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); 539 } 540 541 static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 542 int64_t &Imm) { 543 unsigned Opc = MI.getOpcode(); 544 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); 545 unsigned NewOpc = 546 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; 547 MachineIRBuilder MIB(MI); 548 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); 549 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); 550 MI.eraseFromParent(); 551 return true; 552 } 553 554 /// Determine if it is possible to modify the \p RHS and predicate \p P of a 555 /// G_ICMP instruction such that the right-hand side is an arithmetic immediate. 556 /// 557 /// \returns A pair containing the updated immediate and predicate which may 558 /// be used to optimize the instruction. 559 /// 560 /// \note This assumes that the comparison has been legalized. 561 std::optional<std::pair<uint64_t, CmpInst::Predicate>> 562 tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, 563 const MachineRegisterInfo &MRI) { 564 const auto &Ty = MRI.getType(RHS); 565 if (Ty.isVector()) 566 return std::nullopt; 567 unsigned Size = Ty.getSizeInBits(); 568 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); 569 570 // If the RHS is not a constant, or the RHS is already a valid arithmetic 571 // immediate, then there is nothing to change. 572 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI); 573 if (!ValAndVReg) 574 return std::nullopt; 575 uint64_t C = ValAndVReg->Value.getZExtValue(); 576 if (isLegalArithImmed(C)) 577 return std::nullopt; 578 579 // We have a non-arithmetic immediate. Check if adjusting the immediate and 580 // adjusting the predicate will result in a legal arithmetic immediate. 581 switch (P) { 582 default: 583 return std::nullopt; 584 case CmpInst::ICMP_SLT: 585 case CmpInst::ICMP_SGE: 586 // Check for 587 // 588 // x slt c => x sle c - 1 589 // x sge c => x sgt c - 1 590 // 591 // When c is not the smallest possible negative number. 592 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 593 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 594 return std::nullopt; 595 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 596 C -= 1; 597 break; 598 case CmpInst::ICMP_ULT: 599 case CmpInst::ICMP_UGE: 600 // Check for 601 // 602 // x ult c => x ule c - 1 603 // x uge c => x ugt c - 1 604 // 605 // When c is not zero. 606 if (C == 0) 607 return std::nullopt; 608 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 609 C -= 1; 610 break; 611 case CmpInst::ICMP_SLE: 612 case CmpInst::ICMP_SGT: 613 // Check for 614 // 615 // x sle c => x slt c + 1 616 // x sgt c => s sge c + 1 617 // 618 // When c is not the largest possible signed integer. 619 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 620 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 621 return std::nullopt; 622 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 623 C += 1; 624 break; 625 case CmpInst::ICMP_ULE: 626 case CmpInst::ICMP_UGT: 627 // Check for 628 // 629 // x ule c => x ult c + 1 630 // x ugt c => s uge c + 1 631 // 632 // When c is not the largest possible unsigned integer. 633 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 634 (Size == 64 && C == UINT64_MAX)) 635 return std::nullopt; 636 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 637 C += 1; 638 break; 639 } 640 641 // Check if the new constant is valid, and return the updated constant and 642 // predicate if it is. 643 if (Size == 32) 644 C = static_cast<uint32_t>(C); 645 if (!isLegalArithImmed(C)) 646 return std::nullopt; 647 return {{C, P}}; 648 } 649 650 /// Determine whether or not it is possible to update the RHS and predicate of 651 /// a G_ICMP instruction such that the RHS will be selected as an arithmetic 652 /// immediate. 653 /// 654 /// \p MI - The G_ICMP instruction 655 /// \p MatchInfo - The new RHS immediate and predicate on success 656 /// 657 /// See tryAdjustICmpImmAndPred for valid transformations. 658 bool matchAdjustICmpImmAndPred( 659 MachineInstr &MI, const MachineRegisterInfo &MRI, 660 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { 661 assert(MI.getOpcode() == TargetOpcode::G_ICMP); 662 Register RHS = MI.getOperand(3).getReg(); 663 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 664 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { 665 MatchInfo = *MaybeNewImmAndPred; 666 return true; 667 } 668 return false; 669 } 670 671 bool applyAdjustICmpImmAndPred( 672 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, 673 MachineIRBuilder &MIB, GISelChangeObserver &Observer) { 674 MIB.setInstrAndDebugLoc(MI); 675 MachineOperand &RHS = MI.getOperand(3); 676 MachineRegisterInfo &MRI = *MIB.getMRI(); 677 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), 678 MatchInfo.first); 679 Observer.changingInstr(MI); 680 RHS.setReg(Cst->getOperand(0).getReg()); 681 MI.getOperand(1).setPredicate(MatchInfo.second); 682 Observer.changedInstr(MI); 683 return true; 684 } 685 686 bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 687 std::pair<unsigned, int> &MatchInfo) { 688 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 689 Register Src1Reg = MI.getOperand(1).getReg(); 690 const LLT SrcTy = MRI.getType(Src1Reg); 691 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 692 693 auto LaneIdx = getSplatIndex(MI); 694 if (!LaneIdx) 695 return false; 696 697 // The lane idx should be within the first source vector. 698 if (*LaneIdx >= SrcTy.getNumElements()) 699 return false; 700 701 if (DstTy != SrcTy) 702 return false; 703 704 LLT ScalarTy = SrcTy.getElementType(); 705 unsigned ScalarSize = ScalarTy.getSizeInBits(); 706 707 unsigned Opc = 0; 708 switch (SrcTy.getNumElements()) { 709 case 2: 710 if (ScalarSize == 64) 711 Opc = AArch64::G_DUPLANE64; 712 else if (ScalarSize == 32) 713 Opc = AArch64::G_DUPLANE32; 714 break; 715 case 4: 716 if (ScalarSize == 32) 717 Opc = AArch64::G_DUPLANE32; 718 break; 719 case 8: 720 if (ScalarSize == 16) 721 Opc = AArch64::G_DUPLANE16; 722 break; 723 case 16: 724 if (ScalarSize == 8) 725 Opc = AArch64::G_DUPLANE8; 726 break; 727 default: 728 break; 729 } 730 if (!Opc) 731 return false; 732 733 MatchInfo.first = Opc; 734 MatchInfo.second = *LaneIdx; 735 return true; 736 } 737 738 bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 739 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { 740 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 741 Register Src1Reg = MI.getOperand(1).getReg(); 742 const LLT SrcTy = MRI.getType(Src1Reg); 743 744 B.setInstrAndDebugLoc(MI); 745 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); 746 747 Register DupSrc = MI.getOperand(1).getReg(); 748 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source. 749 // To do this, we can use a G_CONCAT_VECTORS to do the widening. 750 if (SrcTy == LLT::fixed_vector(2, LLT::scalar(32))) { 751 assert(MRI.getType(MI.getOperand(0).getReg()).getNumElements() == 2 && 752 "Unexpected dest elements"); 753 auto Undef = B.buildUndef(SrcTy); 754 DupSrc = B.buildConcatVectors( 755 SrcTy.changeElementCount(ElementCount::getFixed(4)), 756 {Src1Reg, Undef.getReg(0)}) 757 .getReg(0); 758 } 759 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane}); 760 MI.eraseFromParent(); 761 return true; 762 } 763 764 static bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) { 765 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 766 auto Splat = getAArch64VectorSplat(MI, MRI); 767 if (!Splat) 768 return false; 769 if (Splat->isReg()) 770 return true; 771 // Later, during selection, we'll try to match imported patterns using 772 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower 773 // G_BUILD_VECTORs which could match those patterns. 774 int64_t Cst = Splat->getCst(); 775 return (Cst != 0 && Cst != -1); 776 } 777 778 static bool applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI, 779 MachineIRBuilder &B) { 780 B.setInstrAndDebugLoc(MI); 781 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, 782 {MI.getOperand(1).getReg()}); 783 MI.eraseFromParent(); 784 return true; 785 } 786 787 /// \returns how many instructions would be saved by folding a G_ICMP's shift 788 /// and/or extension operations. 789 static unsigned getCmpOperandFoldingProfit(Register CmpOp, 790 const MachineRegisterInfo &MRI) { 791 // No instructions to save if there's more than one use or no uses. 792 if (!MRI.hasOneNonDBGUse(CmpOp)) 793 return 0; 794 795 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister) 796 auto IsSupportedExtend = [&](const MachineInstr &MI) { 797 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG) 798 return true; 799 if (MI.getOpcode() != TargetOpcode::G_AND) 800 return false; 801 auto ValAndVReg = 802 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 803 if (!ValAndVReg) 804 return false; 805 uint64_t Mask = ValAndVReg->Value.getZExtValue(); 806 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); 807 }; 808 809 MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI); 810 if (IsSupportedExtend(*Def)) 811 return 1; 812 813 unsigned Opc = Def->getOpcode(); 814 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR && 815 Opc != TargetOpcode::G_LSHR) 816 return 0; 817 818 auto MaybeShiftAmt = 819 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI); 820 if (!MaybeShiftAmt) 821 return 0; 822 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue(); 823 MachineInstr *ShiftLHS = 824 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI); 825 826 // Check if we can fold an extend and a shift. 827 // FIXME: This is duplicated with the selector. (See: 828 // selectArithExtendedRegister) 829 if (IsSupportedExtend(*ShiftLHS)) 830 return (ShiftAmt <= 4) ? 2 : 1; 831 832 LLT Ty = MRI.getType(Def->getOperand(0).getReg()); 833 if (Ty.isVector()) 834 return 0; 835 unsigned ShiftSize = Ty.getSizeInBits(); 836 if ((ShiftSize == 32 && ShiftAmt <= 31) || 837 (ShiftSize == 64 && ShiftAmt <= 63)) 838 return 1; 839 return 0; 840 } 841 842 /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP 843 /// instruction \p MI. 844 static bool trySwapICmpOperands(MachineInstr &MI, 845 const MachineRegisterInfo &MRI) { 846 assert(MI.getOpcode() == TargetOpcode::G_ICMP); 847 // Swap the operands if it would introduce a profitable folding opportunity. 848 // (e.g. a shift + extend). 849 // 850 // For example: 851 // lsl w13, w11, #1 852 // cmp w13, w12 853 // can be turned into: 854 // cmp w12, w11, lsl #1 855 856 // Don't swap if there's a constant on the RHS, because we know we can fold 857 // that. 858 Register RHS = MI.getOperand(3).getReg(); 859 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI); 860 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue())) 861 return false; 862 863 Register LHS = MI.getOperand(2).getReg(); 864 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 865 auto GetRegForProfit = [&](Register Reg) { 866 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); 867 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg; 868 }; 869 870 // Don't have a constant on the RHS. If we swap the LHS and RHS of the 871 // compare, would we be able to fold more instructions? 872 Register TheLHS = GetRegForProfit(LHS); 873 Register TheRHS = GetRegForProfit(RHS); 874 875 // If the LHS is more likely to give us a folding opportunity, then swap the 876 // LHS and RHS. 877 return (getCmpOperandFoldingProfit(TheLHS, MRI) > 878 getCmpOperandFoldingProfit(TheRHS, MRI)); 879 } 880 881 static bool applySwapICmpOperands(MachineInstr &MI, 882 GISelChangeObserver &Observer) { 883 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 884 Register LHS = MI.getOperand(2).getReg(); 885 Register RHS = MI.getOperand(3).getReg(); 886 Observer.changedInstr(MI); 887 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred)); 888 MI.getOperand(2).setReg(RHS); 889 MI.getOperand(3).setReg(LHS); 890 Observer.changedInstr(MI); 891 return true; 892 } 893 894 /// \returns a function which builds a vector floating point compare instruction 895 /// for a condition code \p CC. 896 /// \param [in] IsZero - True if the comparison is against 0. 897 /// \param [in] NoNans - True if the target has NoNansFPMath. 898 static std::function<Register(MachineIRBuilder &)> 899 getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero, 900 bool NoNans, MachineRegisterInfo &MRI) { 901 LLT DstTy = MRI.getType(LHS); 902 assert(DstTy.isVector() && "Expected vector types only?"); 903 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!"); 904 switch (CC) { 905 default: 906 llvm_unreachable("Unexpected condition code!"); 907 case AArch64CC::NE: 908 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 909 auto FCmp = IsZero 910 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}) 911 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}); 912 return MIB.buildNot(DstTy, FCmp).getReg(0); 913 }; 914 case AArch64CC::EQ: 915 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 916 return IsZero 917 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0) 918 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}) 919 .getReg(0); 920 }; 921 case AArch64CC::GE: 922 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 923 return IsZero 924 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0) 925 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}) 926 .getReg(0); 927 }; 928 case AArch64CC::GT: 929 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 930 return IsZero 931 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0) 932 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}) 933 .getReg(0); 934 }; 935 case AArch64CC::LS: 936 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 937 return IsZero 938 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0) 939 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}) 940 .getReg(0); 941 }; 942 case AArch64CC::MI: 943 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 944 return IsZero 945 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0) 946 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}) 947 .getReg(0); 948 }; 949 } 950 } 951 952 /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo. 953 static bool lowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI, 954 MachineIRBuilder &MIB) { 955 assert(MI.getOpcode() == TargetOpcode::G_FCMP); 956 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>(); 957 Register Dst = MI.getOperand(0).getReg(); 958 LLT DstTy = MRI.getType(Dst); 959 if (!DstTy.isVector() || !ST.hasNEON()) 960 return false; 961 const auto Pred = 962 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 963 Register LHS = MI.getOperand(2).getReg(); 964 // TODO: Handle v4s16 case. 965 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits(); 966 if (EltSize != 32 && EltSize != 64) 967 return false; 968 Register RHS = MI.getOperand(3).getReg(); 969 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI); 970 971 // Compares against 0 have special target-specific pseudos. 972 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0; 973 974 975 bool Invert = false; 976 AArch64CC::CondCode CC, CC2 = AArch64CC::AL; 977 if (Pred == CmpInst::Predicate::FCMP_ORD && IsZero) { 978 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't 979 // NaN, so equivalent to a == a and doesn't need the two comparisons an 980 // "ord" normally would. 981 RHS = LHS; 982 IsZero = false; 983 CC = AArch64CC::EQ; 984 } else 985 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert); 986 987 bool NoNans = ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath; 988 989 // Instead of having an apply function, just build here to simplify things. 990 MIB.setInstrAndDebugLoc(MI); 991 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI); 992 Register CmpRes; 993 if (CC2 == AArch64CC::AL) 994 CmpRes = Cmp(MIB); 995 else { 996 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI); 997 auto Cmp2Dst = Cmp2(MIB); 998 auto Cmp1Dst = Cmp(MIB); 999 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0); 1000 } 1001 if (Invert) 1002 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0); 1003 MRI.replaceRegWith(Dst, CmpRes); 1004 MI.eraseFromParent(); 1005 return false; 1006 } 1007 1008 static bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, 1009 Register &SrcReg) { 1010 assert(MI.getOpcode() == TargetOpcode::G_STORE); 1011 Register DstReg = MI.getOperand(0).getReg(); 1012 if (MRI.getType(DstReg).isVector()) 1013 return false; 1014 // Match a store of a truncate. 1015 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg)))) 1016 return false; 1017 // Only form truncstores for value types of max 64b. 1018 return MRI.getType(SrcReg).getSizeInBits() <= 64; 1019 } 1020 1021 static bool applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, 1022 MachineIRBuilder &B, 1023 GISelChangeObserver &Observer, 1024 Register &SrcReg) { 1025 assert(MI.getOpcode() == TargetOpcode::G_STORE); 1026 Observer.changingInstr(MI); 1027 MI.getOperand(0).setReg(SrcReg); 1028 Observer.changedInstr(MI); 1029 return true; 1030 } 1031 1032 // Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to 1033 // form in the first place for combine opportunities, so any remaining ones 1034 // at this stage need be lowered back. 1035 static bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) { 1036 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 1037 Register DstReg = MI.getOperand(0).getReg(); 1038 LLT DstTy = MRI.getType(DstReg); 1039 return DstTy.isVector(); 1040 } 1041 1042 static void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, 1043 MachineIRBuilder &B, 1044 GISelChangeObserver &Observer) { 1045 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 1046 B.setInstrAndDebugLoc(MI); 1047 LegalizerHelper Helper(*MI.getMF(), Observer, B); 1048 Helper.lower(MI, 0, /* Unused hint type */ LLT()); 1049 } 1050 1051 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 1052 #include "AArch64GenPostLegalizeGILowering.inc" 1053 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 1054 1055 namespace { 1056 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 1057 #include "AArch64GenPostLegalizeGILowering.inc" 1058 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 1059 1060 class AArch64PostLegalizerLoweringInfo : public CombinerInfo { 1061 public: 1062 AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg; 1063 1064 AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize) 1065 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 1066 /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize, 1067 MinSize) { 1068 if (!GeneratedRuleCfg.parseCommandLineOption()) 1069 report_fatal_error("Invalid rule identifier"); 1070 } 1071 1072 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 1073 MachineIRBuilder &B) const override; 1074 }; 1075 1076 bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer, 1077 MachineInstr &MI, 1078 MachineIRBuilder &B) const { 1079 CombinerHelper Helper(Observer, B, /* IsPreLegalize*/ false); 1080 AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg); 1081 return Generated.tryCombineAll(Observer, MI, B, Helper); 1082 } 1083 1084 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 1085 #include "AArch64GenPostLegalizeGILowering.inc" 1086 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 1087 1088 class AArch64PostLegalizerLowering : public MachineFunctionPass { 1089 public: 1090 static char ID; 1091 1092 AArch64PostLegalizerLowering(); 1093 1094 StringRef getPassName() const override { 1095 return "AArch64PostLegalizerLowering"; 1096 } 1097 1098 bool runOnMachineFunction(MachineFunction &MF) override; 1099 void getAnalysisUsage(AnalysisUsage &AU) const override; 1100 }; 1101 } // end anonymous namespace 1102 1103 void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { 1104 AU.addRequired<TargetPassConfig>(); 1105 AU.setPreservesCFG(); 1106 getSelectionDAGFallbackAnalysisUsage(AU); 1107 MachineFunctionPass::getAnalysisUsage(AU); 1108 } 1109 1110 AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() 1111 : MachineFunctionPass(ID) { 1112 initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); 1113 } 1114 1115 bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { 1116 if (MF.getProperties().hasProperty( 1117 MachineFunctionProperties::Property::FailedISel)) 1118 return false; 1119 assert(MF.getProperties().hasProperty( 1120 MachineFunctionProperties::Property::Legalized) && 1121 "Expected a legalized function?"); 1122 auto *TPC = &getAnalysis<TargetPassConfig>(); 1123 const Function &F = MF.getFunction(); 1124 AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); 1125 Combiner C(PCInfo, TPC); 1126 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 1127 } 1128 1129 char AArch64PostLegalizerLowering::ID = 0; 1130 INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, 1131 "Lower AArch64 MachineInstrs after legalization", false, 1132 false) 1133 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 1134 INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, 1135 "Lower AArch64 MachineInstrs after legalization", false, 1136 false) 1137 1138 namespace llvm { 1139 FunctionPass *createAArch64PostLegalizerLowering() { 1140 return new AArch64PostLegalizerLowering(); 1141 } 1142 } // end namespace llvm 1143