1 //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Post-legalization lowering for instructions. 11 /// 12 /// This is used to offload pattern matching from the selector. 13 /// 14 /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually 15 /// a G_ZIP, G_UZP, etc. 16 /// 17 /// General optimization combines should be handled by either the 18 /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #include "AArch64TargetMachine.h" 23 #include "AArch64GlobalISelUtils.h" 24 #include "MCTargetDesc/AArch64MCTargetDesc.h" 25 #include "llvm/CodeGen/GlobalISel/Combiner.h" 26 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 27 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 28 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 29 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 30 #include "llvm/CodeGen/GlobalISel/Utils.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/CodeGen/MachineInstrBuilder.h" 33 #include "llvm/CodeGen/MachineRegisterInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/CodeGen/TargetPassConfig.h" 36 #include "llvm/InitializePasses.h" 37 #include "llvm/Support/Debug.h" 38 39 #define DEBUG_TYPE "aarch64-postlegalizer-lowering" 40 41 using namespace llvm; 42 using namespace MIPatternMatch; 43 using namespace AArch64GISelUtils; 44 45 /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. 46 /// 47 /// Used for matching target-supported shuffles before codegen. 48 struct ShuffleVectorPseudo { 49 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) 50 Register Dst; ///< Destination register. 51 SmallVector<SrcOp, 2> SrcOps; ///< Source registers. 52 ShuffleVectorPseudo(unsigned Opc, Register Dst, 53 std::initializer_list<SrcOp> SrcOps) 54 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; 55 ShuffleVectorPseudo() {} 56 }; 57 58 /// Check if a vector shuffle corresponds to a REV instruction with the 59 /// specified blocksize. 60 static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts, 61 unsigned BlockSize) { 62 assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && 63 "Only possible block sizes for REV are: 16, 32, 64"); 64 assert(EltSize != 64 && "EltSize cannot be 64 for REV mask."); 65 66 unsigned BlockElts = M[0] + 1; 67 68 // If the first shuffle index is UNDEF, be optimistic. 69 if (M[0] < 0) 70 BlockElts = BlockSize / EltSize; 71 72 if (BlockSize <= EltSize || BlockSize != BlockElts * EltSize) 73 return false; 74 75 for (unsigned i = 0; i < NumElts; ++i) { 76 // Ignore undef indices. 77 if (M[i] < 0) 78 continue; 79 if (static_cast<unsigned>(M[i]) != 80 (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) 81 return false; 82 } 83 84 return true; 85 } 86 87 /// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts. 88 /// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult. 89 static bool isTRNMask(ArrayRef<int> M, unsigned NumElts, 90 unsigned &WhichResult) { 91 if (NumElts % 2 != 0) 92 return false; 93 WhichResult = (M[0] == 0 ? 0 : 1); 94 for (unsigned i = 0; i < NumElts; i += 2) { 95 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) || 96 (M[i + 1] >= 0 && 97 static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult)) 98 return false; 99 } 100 return true; 101 } 102 103 /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector 104 /// sources of the shuffle are different. 105 static Optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, 106 unsigned NumElts) { 107 // Look for the first non-undef element. 108 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); 109 if (FirstRealElt == M.end()) 110 return None; 111 112 // Use APInt to handle overflow when calculating expected element. 113 unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); 114 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); 115 116 // The following shuffle indices must be the successive elements after the 117 // first real element. 118 if (any_of( 119 make_range(std::next(FirstRealElt), M.end()), 120 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) 121 return None; 122 123 // The index of an EXT is the first element if it is not UNDEF. 124 // Watch out for the beginning UNDEFs. The EXT index should be the expected 125 // value of the first element. E.g. 126 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. 127 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. 128 // ExpectedElt is the last mask index plus 1. 129 uint64_t Imm = ExpectedElt.getZExtValue(); 130 bool ReverseExt = false; 131 132 // There are two difference cases requiring to reverse input vectors. 133 // For example, for vector <4 x i32> we have the following cases, 134 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) 135 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) 136 // For both cases, we finally use mask <5, 6, 7, 0>, which requires 137 // to reverse two input vectors. 138 if (Imm < NumElts) 139 ReverseExt = true; 140 else 141 Imm -= NumElts; 142 return std::make_pair(ReverseExt, Imm); 143 } 144 145 /// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts. 146 /// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult. 147 static bool isUZPMask(ArrayRef<int> M, unsigned NumElts, 148 unsigned &WhichResult) { 149 WhichResult = (M[0] == 0 ? 0 : 1); 150 for (unsigned i = 0; i != NumElts; ++i) { 151 // Skip undef indices. 152 if (M[i] < 0) 153 continue; 154 if (static_cast<unsigned>(M[i]) != 2 * i + WhichResult) 155 return false; 156 } 157 return true; 158 } 159 160 /// \return true if \p M is a zip mask for a shuffle vector of \p NumElts. 161 /// Whether or not G_ZIP1 or G_ZIP2 should be used is stored in \p WhichResult. 162 static bool isZipMask(ArrayRef<int> M, unsigned NumElts, 163 unsigned &WhichResult) { 164 if (NumElts % 2 != 0) 165 return false; 166 167 // 0 means use ZIP1, 1 means use ZIP2. 168 WhichResult = (M[0] == 0 ? 0 : 1); 169 unsigned Idx = WhichResult * NumElts / 2; 170 for (unsigned i = 0; i != NumElts; i += 2) { 171 if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != Idx) || 172 (M[i + 1] >= 0 && static_cast<unsigned>(M[i + 1]) != Idx + NumElts)) 173 return false; 174 Idx += 1; 175 } 176 return true; 177 } 178 179 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a 180 /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. 181 static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, 182 ShuffleVectorPseudo &MatchInfo) { 183 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 184 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 185 Register Dst = MI.getOperand(0).getReg(); 186 Register Src = MI.getOperand(1).getReg(); 187 LLT Ty = MRI.getType(Dst); 188 unsigned EltSize = Ty.getScalarSizeInBits(); 189 190 // Element size for a rev cannot be 64. 191 if (EltSize == 64) 192 return false; 193 194 unsigned NumElts = Ty.getNumElements(); 195 196 // Try to produce G_REV64 197 if (isREVMask(ShuffleMask, EltSize, NumElts, 64)) { 198 MatchInfo = ShuffleVectorPseudo(AArch64::G_REV64, Dst, {Src}); 199 return true; 200 } 201 202 // TODO: Produce G_REV32 and G_REV16 once we have proper legalization support. 203 // This should be identical to above, but with a constant 32 and constant 204 // 16. 205 return false; 206 } 207 208 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 209 /// a G_TRN1 or G_TRN2 instruction. 210 static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, 211 ShuffleVectorPseudo &MatchInfo) { 212 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 213 unsigned WhichResult; 214 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 215 Register Dst = MI.getOperand(0).getReg(); 216 unsigned NumElts = MRI.getType(Dst).getNumElements(); 217 if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) 218 return false; 219 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; 220 Register V1 = MI.getOperand(1).getReg(); 221 Register V2 = MI.getOperand(2).getReg(); 222 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 223 return true; 224 } 225 226 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 227 /// a G_UZP1 or G_UZP2 instruction. 228 /// 229 /// \param [in] MI - The shuffle vector instruction. 230 /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. 231 static bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, 232 ShuffleVectorPseudo &MatchInfo) { 233 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 234 unsigned WhichResult; 235 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 236 Register Dst = MI.getOperand(0).getReg(); 237 unsigned NumElts = MRI.getType(Dst).getNumElements(); 238 if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) 239 return false; 240 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; 241 Register V1 = MI.getOperand(1).getReg(); 242 Register V2 = MI.getOperand(2).getReg(); 243 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 244 return true; 245 } 246 247 static bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, 248 ShuffleVectorPseudo &MatchInfo) { 249 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 250 unsigned WhichResult; 251 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 252 Register Dst = MI.getOperand(0).getReg(); 253 unsigned NumElts = MRI.getType(Dst).getNumElements(); 254 if (!isZipMask(ShuffleMask, NumElts, WhichResult)) 255 return false; 256 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; 257 Register V1 = MI.getOperand(1).getReg(); 258 Register V2 = MI.getOperand(2).getReg(); 259 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 260 return true; 261 } 262 263 /// Helper function for matchDup. 264 static bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, 265 MachineRegisterInfo &MRI, 266 ShuffleVectorPseudo &MatchInfo) { 267 if (Lane != 0) 268 return false; 269 270 // Try to match a vector splat operation into a dup instruction. 271 // We're looking for this pattern: 272 // 273 // %scalar:gpr(s64) = COPY $x0 274 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 275 // %cst0:gpr(s32) = G_CONSTANT i32 0 276 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 277 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 278 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, %zerovec(<2 x s32>) 279 // 280 // ...into: 281 // %splat = G_DUP %scalar 282 283 // Begin matching the insert. 284 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, 285 MI.getOperand(1).getReg(), MRI); 286 if (!InsMI) 287 return false; 288 // Match the undef vector operand. 289 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), 290 MRI)) 291 return false; 292 293 // Match the index constant 0. 294 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) 295 return false; 296 297 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), 298 {InsMI->getOperand(2).getReg()}); 299 return true; 300 } 301 302 /// Helper function for matchDup. 303 static bool matchDupFromBuildVector(int Lane, MachineInstr &MI, 304 MachineRegisterInfo &MRI, 305 ShuffleVectorPseudo &MatchInfo) { 306 assert(Lane >= 0 && "Expected positive lane?"); 307 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the 308 // lane's definition directly. 309 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, 310 MI.getOperand(1).getReg(), MRI); 311 if (!BuildVecMI) 312 return false; 313 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); 314 MatchInfo = 315 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); 316 return true; 317 } 318 319 static bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, 320 ShuffleVectorPseudo &MatchInfo) { 321 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 322 auto MaybeLane = getSplatIndex(MI); 323 if (!MaybeLane) 324 return false; 325 int Lane = *MaybeLane; 326 // If this is undef splat, generate it via "just" vdup, if possible. 327 if (Lane < 0) 328 Lane = 0; 329 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) 330 return true; 331 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) 332 return true; 333 return false; 334 } 335 336 static bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, 337 ShuffleVectorPseudo &MatchInfo) { 338 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 339 Register Dst = MI.getOperand(0).getReg(); 340 auto ExtInfo = getExtMask(MI.getOperand(3).getShuffleMask(), 341 MRI.getType(Dst).getNumElements()); 342 if (!ExtInfo) 343 return false; 344 bool ReverseExt; 345 uint64_t Imm; 346 std::tie(ReverseExt, Imm) = *ExtInfo; 347 Register V1 = MI.getOperand(1).getReg(); 348 Register V2 = MI.getOperand(2).getReg(); 349 if (ReverseExt) 350 std::swap(V1, V2); 351 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; 352 Imm *= ExtFactor; 353 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); 354 return true; 355 } 356 357 /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. 358 /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. 359 static bool applyShuffleVectorPseudo(MachineInstr &MI, 360 ShuffleVectorPseudo &MatchInfo) { 361 MachineIRBuilder MIRBuilder(MI); 362 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); 363 MI.eraseFromParent(); 364 return true; 365 } 366 367 /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. 368 /// Special-cased because the constant operand must be emitted as a G_CONSTANT 369 /// for the imported tablegen patterns to work. 370 static bool applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { 371 MachineIRBuilder MIRBuilder(MI); 372 // Tablegen patterns expect an i32 G_CONSTANT as the final op. 373 auto Cst = 374 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); 375 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, 376 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); 377 MI.eraseFromParent(); 378 return true; 379 } 380 381 /// isVShiftRImm - Check if this is a valid vector for the immediate 382 /// operand of a vector shift right operation. The value must be in the range: 383 /// 1 <= Value <= ElementBits for a right shift. 384 static bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, 385 int64_t &Cnt) { 386 assert(Ty.isVector() && "vector shift count is not a vector type"); 387 MachineInstr *MI = MRI.getVRegDef(Reg); 388 auto Cst = getBuildVectorConstantSplat(*MI, MRI); 389 if (!Cst) 390 return false; 391 Cnt = *Cst; 392 int64_t ElementBits = Ty.getScalarSizeInBits(); 393 return Cnt >= 1 && Cnt <= ElementBits; 394 } 395 396 /// Match a vector G_ASHR or G_LSHR with a valid immediate shift. 397 static bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 398 int64_t &Imm) { 399 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 400 MI.getOpcode() == TargetOpcode::G_LSHR); 401 LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 402 if (!Ty.isVector()) 403 return false; 404 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); 405 } 406 407 static bool applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 408 int64_t &Imm) { 409 unsigned Opc = MI.getOpcode(); 410 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); 411 unsigned NewOpc = 412 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; 413 MachineIRBuilder MIB(MI); 414 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); 415 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); 416 MI.eraseFromParent(); 417 return true; 418 } 419 420 /// Determine if it is possible to modify the \p RHS and predicate \p P of a 421 /// G_ICMP instruction such that the right-hand side is an arithmetic immediate. 422 /// 423 /// \returns A pair containing the updated immediate and predicate which may 424 /// be used to optimize the instruction. 425 /// 426 /// \note This assumes that the comparison has been legalized. 427 Optional<std::pair<uint64_t, CmpInst::Predicate>> 428 tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, 429 const MachineRegisterInfo &MRI) { 430 const auto &Ty = MRI.getType(RHS); 431 if (Ty.isVector()) 432 return None; 433 unsigned Size = Ty.getSizeInBits(); 434 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); 435 436 // If the RHS is not a constant, or the RHS is already a valid arithmetic 437 // immediate, then there is nothing to change. 438 auto ValAndVReg = getConstantVRegValWithLookThrough(RHS, MRI); 439 if (!ValAndVReg) 440 return None; 441 uint64_t C = ValAndVReg->Value.getZExtValue(); 442 if (isLegalArithImmed(C)) 443 return None; 444 445 // We have a non-arithmetic immediate. Check if adjusting the immediate and 446 // adjusting the predicate will result in a legal arithmetic immediate. 447 switch (P) { 448 default: 449 return None; 450 case CmpInst::ICMP_SLT: 451 case CmpInst::ICMP_SGE: 452 // Check for 453 // 454 // x slt c => x sle c - 1 455 // x sge c => x sgt c - 1 456 // 457 // When c is not the smallest possible negative number. 458 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 459 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 460 return None; 461 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 462 C -= 1; 463 break; 464 case CmpInst::ICMP_ULT: 465 case CmpInst::ICMP_UGE: 466 // Check for 467 // 468 // x ult c => x ule c - 1 469 // x uge c => x ugt c - 1 470 // 471 // When c is not zero. 472 if (C == 0) 473 return None; 474 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 475 C -= 1; 476 break; 477 case CmpInst::ICMP_SLE: 478 case CmpInst::ICMP_SGT: 479 // Check for 480 // 481 // x sle c => x slt c + 1 482 // x sgt c => s sge c + 1 483 // 484 // When c is not the largest possible signed integer. 485 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 486 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 487 return None; 488 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 489 C += 1; 490 break; 491 case CmpInst::ICMP_ULE: 492 case CmpInst::ICMP_UGT: 493 // Check for 494 // 495 // x ule c => x ult c + 1 496 // x ugt c => s uge c + 1 497 // 498 // When c is not the largest possible unsigned integer. 499 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 500 (Size == 64 && C == UINT64_MAX)) 501 return None; 502 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 503 C += 1; 504 break; 505 } 506 507 // Check if the new constant is valid, and return the updated constant and 508 // predicate if it is. 509 if (Size == 32) 510 C = static_cast<uint32_t>(C); 511 if (!isLegalArithImmed(C)) 512 return None; 513 return {{C, P}}; 514 } 515 516 /// Determine whether or not it is possible to update the RHS and predicate of 517 /// a G_ICMP instruction such that the RHS will be selected as an arithmetic 518 /// immediate. 519 /// 520 /// \p MI - The G_ICMP instruction 521 /// \p MatchInfo - The new RHS immediate and predicate on success 522 /// 523 /// See tryAdjustICmpImmAndPred for valid transformations. 524 bool matchAdjustICmpImmAndPred( 525 MachineInstr &MI, const MachineRegisterInfo &MRI, 526 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { 527 assert(MI.getOpcode() == TargetOpcode::G_ICMP); 528 Register RHS = MI.getOperand(3).getReg(); 529 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 530 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { 531 MatchInfo = *MaybeNewImmAndPred; 532 return true; 533 } 534 return false; 535 } 536 537 bool applyAdjustICmpImmAndPred( 538 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, 539 MachineIRBuilder &MIB, GISelChangeObserver &Observer) { 540 MIB.setInstrAndDebugLoc(MI); 541 MachineOperand &RHS = MI.getOperand(3); 542 MachineRegisterInfo &MRI = *MIB.getMRI(); 543 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), 544 MatchInfo.first); 545 Observer.changingInstr(MI); 546 RHS.setReg(Cst->getOperand(0).getReg()); 547 MI.getOperand(1).setPredicate(MatchInfo.second); 548 Observer.changedInstr(MI); 549 return true; 550 } 551 552 bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 553 std::pair<unsigned, int> &MatchInfo) { 554 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 555 Register Src1Reg = MI.getOperand(1).getReg(); 556 const LLT SrcTy = MRI.getType(Src1Reg); 557 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 558 559 auto LaneIdx = getSplatIndex(MI); 560 if (!LaneIdx) 561 return false; 562 563 // The lane idx should be within the first source vector. 564 if (*LaneIdx >= SrcTy.getNumElements()) 565 return false; 566 567 if (DstTy != SrcTy) 568 return false; 569 570 LLT ScalarTy = SrcTy.getElementType(); 571 unsigned ScalarSize = ScalarTy.getSizeInBits(); 572 573 unsigned Opc = 0; 574 switch (SrcTy.getNumElements()) { 575 case 2: 576 if (ScalarSize == 64) 577 Opc = AArch64::G_DUPLANE64; 578 break; 579 case 4: 580 if (ScalarSize == 32) 581 Opc = AArch64::G_DUPLANE32; 582 break; 583 case 8: 584 if (ScalarSize == 16) 585 Opc = AArch64::G_DUPLANE16; 586 break; 587 case 16: 588 if (ScalarSize == 8) 589 Opc = AArch64::G_DUPLANE8; 590 break; 591 default: 592 break; 593 } 594 if (!Opc) 595 return false; 596 597 MatchInfo.first = Opc; 598 MatchInfo.second = *LaneIdx; 599 return true; 600 } 601 602 bool applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 603 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { 604 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 605 B.setInstrAndDebugLoc(MI); 606 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); 607 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, 608 {MI.getOperand(1).getReg(), Lane}); 609 MI.eraseFromParent(); 610 return true; 611 } 612 613 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 614 #include "AArch64GenPostLegalizeGILowering.inc" 615 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_DEPS 616 617 namespace { 618 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 619 #include "AArch64GenPostLegalizeGILowering.inc" 620 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_H 621 622 class AArch64PostLegalizerLoweringInfo : public CombinerInfo { 623 public: 624 AArch64GenPostLegalizerLoweringHelperRuleConfig GeneratedRuleCfg; 625 626 AArch64PostLegalizerLoweringInfo(bool OptSize, bool MinSize) 627 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 628 /*LegalizerInfo*/ nullptr, /*OptEnabled = */ true, OptSize, 629 MinSize) { 630 if (!GeneratedRuleCfg.parseCommandLineOption()) 631 report_fatal_error("Invalid rule identifier"); 632 } 633 634 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 635 MachineIRBuilder &B) const override; 636 }; 637 638 bool AArch64PostLegalizerLoweringInfo::combine(GISelChangeObserver &Observer, 639 MachineInstr &MI, 640 MachineIRBuilder &B) const { 641 CombinerHelper Helper(Observer, B); 642 AArch64GenPostLegalizerLoweringHelper Generated(GeneratedRuleCfg); 643 return Generated.tryCombineAll(Observer, MI, B, Helper); 644 } 645 646 #define AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 647 #include "AArch64GenPostLegalizeGILowering.inc" 648 #undef AARCH64POSTLEGALIZERLOWERINGHELPER_GENCOMBINERHELPER_CPP 649 650 class AArch64PostLegalizerLowering : public MachineFunctionPass { 651 public: 652 static char ID; 653 654 AArch64PostLegalizerLowering(); 655 656 StringRef getPassName() const override { 657 return "AArch64PostLegalizerLowering"; 658 } 659 660 bool runOnMachineFunction(MachineFunction &MF) override; 661 void getAnalysisUsage(AnalysisUsage &AU) const override; 662 }; 663 } // end anonymous namespace 664 665 void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { 666 AU.addRequired<TargetPassConfig>(); 667 AU.setPreservesCFG(); 668 getSelectionDAGFallbackAnalysisUsage(AU); 669 MachineFunctionPass::getAnalysisUsage(AU); 670 } 671 672 AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() 673 : MachineFunctionPass(ID) { 674 initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); 675 } 676 677 bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { 678 if (MF.getProperties().hasProperty( 679 MachineFunctionProperties::Property::FailedISel)) 680 return false; 681 assert(MF.getProperties().hasProperty( 682 MachineFunctionProperties::Property::Legalized) && 683 "Expected a legalized function?"); 684 auto *TPC = &getAnalysis<TargetPassConfig>(); 685 const Function &F = MF.getFunction(); 686 AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); 687 Combiner C(PCInfo, TPC); 688 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 689 } 690 691 char AArch64PostLegalizerLowering::ID = 0; 692 INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, 693 "Lower AArch64 MachineInstrs after legalization", false, 694 false) 695 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 696 INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, 697 "Lower AArch64 MachineInstrs after legalization", false, 698 false) 699 700 namespace llvm { 701 FunctionPass *createAArch64PostLegalizerLowering() { 702 return new AArch64PostLegalizerLowering(); 703 } 704 } // end namespace llvm 705