1 //=== AArch64PostLegalizerLowering.cpp --------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// Post-legalization lowering for instructions. 11 /// 12 /// This is used to offload pattern matching from the selector. 13 /// 14 /// For example, this combiner will notice that a G_SHUFFLE_VECTOR is actually 15 /// a G_ZIP, G_UZP, etc. 16 /// 17 /// General optimization combines should be handled by either the 18 /// AArch64PostLegalizerCombiner or the AArch64PreLegalizerCombiner. 19 /// 20 //===----------------------------------------------------------------------===// 21 22 #include "AArch64ExpandImm.h" 23 #include "AArch64GlobalISelUtils.h" 24 #include "AArch64PerfectShuffle.h" 25 #include "AArch64Subtarget.h" 26 #include "AArch64TargetMachine.h" 27 #include "GISel/AArch64LegalizerInfo.h" 28 #include "MCTargetDesc/AArch64MCTargetDesc.h" 29 #include "TargetInfo/AArch64TargetInfo.h" 30 #include "Utils/AArch64BaseInfo.h" 31 #include "llvm/CodeGen/GlobalISel/Combiner.h" 32 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 33 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 34 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" 35 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" 36 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" 37 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" 38 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 39 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" 40 #include "llvm/CodeGen/GlobalISel/Utils.h" 41 #include "llvm/CodeGen/MachineFrameInfo.h" 42 #include "llvm/CodeGen/MachineFunctionPass.h" 43 #include "llvm/CodeGen/MachineInstrBuilder.h" 44 #include "llvm/CodeGen/MachineRegisterInfo.h" 45 #include "llvm/CodeGen/TargetOpcodes.h" 46 #include "llvm/CodeGen/TargetPassConfig.h" 47 #include "llvm/IR/InstrTypes.h" 48 #include "llvm/InitializePasses.h" 49 #include "llvm/Support/Debug.h" 50 #include "llvm/Support/ErrorHandling.h" 51 #include <optional> 52 53 #define GET_GICOMBINER_DEPS 54 #include "AArch64GenPostLegalizeGILowering.inc" 55 #undef GET_GICOMBINER_DEPS 56 57 #define DEBUG_TYPE "aarch64-postlegalizer-lowering" 58 59 using namespace llvm; 60 using namespace MIPatternMatch; 61 using namespace AArch64GISelUtils; 62 63 namespace { 64 65 #define GET_GICOMBINER_TYPES 66 #include "AArch64GenPostLegalizeGILowering.inc" 67 #undef GET_GICOMBINER_TYPES 68 69 /// Represents a pseudo instruction which replaces a G_SHUFFLE_VECTOR. 70 /// 71 /// Used for matching target-supported shuffles before codegen. 72 struct ShuffleVectorPseudo { 73 unsigned Opc; ///< Opcode for the instruction. (E.g. G_ZIP1) 74 Register Dst; ///< Destination register. 75 SmallVector<SrcOp, 2> SrcOps; ///< Source registers. 76 ShuffleVectorPseudo(unsigned Opc, Register Dst, 77 std::initializer_list<SrcOp> SrcOps) 78 : Opc(Opc), Dst(Dst), SrcOps(SrcOps){}; 79 ShuffleVectorPseudo() = default; 80 }; 81 82 /// Check if a G_EXT instruction can handle a shuffle mask \p M when the vector 83 /// sources of the shuffle are different. 84 std::optional<std::pair<bool, uint64_t>> getExtMask(ArrayRef<int> M, 85 unsigned NumElts) { 86 // Look for the first non-undef element. 87 auto FirstRealElt = find_if(M, [](int Elt) { return Elt >= 0; }); 88 if (FirstRealElt == M.end()) 89 return std::nullopt; 90 91 // Use APInt to handle overflow when calculating expected element. 92 unsigned MaskBits = APInt(32, NumElts * 2).logBase2(); 93 APInt ExpectedElt = APInt(MaskBits, *FirstRealElt + 1); 94 95 // The following shuffle indices must be the successive elements after the 96 // first real element. 97 if (any_of( 98 make_range(std::next(FirstRealElt), M.end()), 99 [&ExpectedElt](int Elt) { return Elt != ExpectedElt++ && Elt >= 0; })) 100 return std::nullopt; 101 102 // The index of an EXT is the first element if it is not UNDEF. 103 // Watch out for the beginning UNDEFs. The EXT index should be the expected 104 // value of the first element. E.g. 105 // <-1, -1, 3, ...> is treated as <1, 2, 3, ...>. 106 // <-1, -1, 0, 1, ...> is treated as <2*NumElts-2, 2*NumElts-1, 0, 1, ...>. 107 // ExpectedElt is the last mask index plus 1. 108 uint64_t Imm = ExpectedElt.getZExtValue(); 109 bool ReverseExt = false; 110 111 // There are two difference cases requiring to reverse input vectors. 112 // For example, for vector <4 x i32> we have the following cases, 113 // Case 1: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, -1, 0>) 114 // Case 2: shufflevector(<4 x i32>,<4 x i32>,<-1, -1, 7, 0>) 115 // For both cases, we finally use mask <5, 6, 7, 0>, which requires 116 // to reverse two input vectors. 117 if (Imm < NumElts) 118 ReverseExt = true; 119 else 120 Imm -= NumElts; 121 return std::make_pair(ReverseExt, Imm); 122 } 123 124 /// Helper function for matchINS. 125 /// 126 /// \returns a value when \p M is an ins mask for \p NumInputElements. 127 /// 128 /// First element of the returned pair is true when the produced 129 /// G_INSERT_VECTOR_ELT destination should be the LHS of the G_SHUFFLE_VECTOR. 130 /// 131 /// Second element is the destination lane for the G_INSERT_VECTOR_ELT. 132 std::optional<std::pair<bool, int>> isINSMask(ArrayRef<int> M, 133 int NumInputElements) { 134 if (M.size() != static_cast<size_t>(NumInputElements)) 135 return std::nullopt; 136 int NumLHSMatch = 0, NumRHSMatch = 0; 137 int LastLHSMismatch = -1, LastRHSMismatch = -1; 138 for (int Idx = 0; Idx < NumInputElements; ++Idx) { 139 if (M[Idx] == -1) { 140 ++NumLHSMatch; 141 ++NumRHSMatch; 142 continue; 143 } 144 M[Idx] == Idx ? ++NumLHSMatch : LastLHSMismatch = Idx; 145 M[Idx] == Idx + NumInputElements ? ++NumRHSMatch : LastRHSMismatch = Idx; 146 } 147 const int NumNeededToMatch = NumInputElements - 1; 148 if (NumLHSMatch == NumNeededToMatch) 149 return std::make_pair(true, LastLHSMismatch); 150 if (NumRHSMatch == NumNeededToMatch) 151 return std::make_pair(false, LastRHSMismatch); 152 return std::nullopt; 153 } 154 155 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with a 156 /// G_REV instruction. Returns the appropriate G_REV opcode in \p Opc. 157 bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI, 158 ShuffleVectorPseudo &MatchInfo) { 159 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 160 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 161 Register Dst = MI.getOperand(0).getReg(); 162 Register Src = MI.getOperand(1).getReg(); 163 LLT Ty = MRI.getType(Dst); 164 unsigned EltSize = Ty.getScalarSizeInBits(); 165 166 // Element size for a rev cannot be 64. 167 if (EltSize == 64) 168 return false; 169 170 unsigned NumElts = Ty.getNumElements(); 171 172 // Try to produce a G_REV instruction 173 for (unsigned LaneSize : {64U, 32U, 16U}) { 174 if (isREVMask(ShuffleMask, EltSize, NumElts, LaneSize)) { 175 unsigned Opcode; 176 if (LaneSize == 64U) 177 Opcode = AArch64::G_REV64; 178 else if (LaneSize == 32U) 179 Opcode = AArch64::G_REV32; 180 else 181 Opcode = AArch64::G_REV16; 182 183 MatchInfo = ShuffleVectorPseudo(Opcode, Dst, {Src}); 184 return true; 185 } 186 } 187 188 return false; 189 } 190 191 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 192 /// a G_TRN1 or G_TRN2 instruction. 193 bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI, 194 ShuffleVectorPseudo &MatchInfo) { 195 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 196 unsigned WhichResult; 197 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 198 Register Dst = MI.getOperand(0).getReg(); 199 unsigned NumElts = MRI.getType(Dst).getNumElements(); 200 if (!isTRNMask(ShuffleMask, NumElts, WhichResult)) 201 return false; 202 unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2; 203 Register V1 = MI.getOperand(1).getReg(); 204 Register V2 = MI.getOperand(2).getReg(); 205 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 206 return true; 207 } 208 209 /// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with 210 /// a G_UZP1 or G_UZP2 instruction. 211 /// 212 /// \param [in] MI - The shuffle vector instruction. 213 /// \param [out] MatchInfo - Either G_UZP1 or G_UZP2 on success. 214 bool matchUZP(MachineInstr &MI, MachineRegisterInfo &MRI, 215 ShuffleVectorPseudo &MatchInfo) { 216 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 217 unsigned WhichResult; 218 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 219 Register Dst = MI.getOperand(0).getReg(); 220 unsigned NumElts = MRI.getType(Dst).getNumElements(); 221 if (!isUZPMask(ShuffleMask, NumElts, WhichResult)) 222 return false; 223 unsigned Opc = (WhichResult == 0) ? AArch64::G_UZP1 : AArch64::G_UZP2; 224 Register V1 = MI.getOperand(1).getReg(); 225 Register V2 = MI.getOperand(2).getReg(); 226 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 227 return true; 228 } 229 230 bool matchZip(MachineInstr &MI, MachineRegisterInfo &MRI, 231 ShuffleVectorPseudo &MatchInfo) { 232 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 233 unsigned WhichResult; 234 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 235 Register Dst = MI.getOperand(0).getReg(); 236 unsigned NumElts = MRI.getType(Dst).getNumElements(); 237 if (!isZIPMask(ShuffleMask, NumElts, WhichResult)) 238 return false; 239 unsigned Opc = (WhichResult == 0) ? AArch64::G_ZIP1 : AArch64::G_ZIP2; 240 Register V1 = MI.getOperand(1).getReg(); 241 Register V2 = MI.getOperand(2).getReg(); 242 MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2}); 243 return true; 244 } 245 246 /// Helper function for matchDup. 247 bool matchDupFromInsertVectorElt(int Lane, MachineInstr &MI, 248 MachineRegisterInfo &MRI, 249 ShuffleVectorPseudo &MatchInfo) { 250 if (Lane != 0) 251 return false; 252 253 // Try to match a vector splat operation into a dup instruction. 254 // We're looking for this pattern: 255 // 256 // %scalar:gpr(s64) = COPY $x0 257 // %undef:fpr(<2 x s64>) = G_IMPLICIT_DEF 258 // %cst0:gpr(s32) = G_CONSTANT i32 0 259 // %zerovec:fpr(<2 x s32>) = G_BUILD_VECTOR %cst0(s32), %cst0(s32) 260 // %ins:fpr(<2 x s64>) = G_INSERT_VECTOR_ELT %undef, %scalar(s64), %cst0(s32) 261 // %splat:fpr(<2 x s64>) = G_SHUFFLE_VECTOR %ins(<2 x s64>), %undef, 262 // %zerovec(<2 x s32>) 263 // 264 // ...into: 265 // %splat = G_DUP %scalar 266 267 // Begin matching the insert. 268 auto *InsMI = getOpcodeDef(TargetOpcode::G_INSERT_VECTOR_ELT, 269 MI.getOperand(1).getReg(), MRI); 270 if (!InsMI) 271 return false; 272 // Match the undef vector operand. 273 if (!getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, InsMI->getOperand(1).getReg(), 274 MRI)) 275 return false; 276 277 // Match the index constant 0. 278 if (!mi_match(InsMI->getOperand(3).getReg(), MRI, m_ZeroInt())) 279 return false; 280 281 MatchInfo = ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), 282 {InsMI->getOperand(2).getReg()}); 283 return true; 284 } 285 286 /// Helper function for matchDup. 287 bool matchDupFromBuildVector(int Lane, MachineInstr &MI, 288 MachineRegisterInfo &MRI, 289 ShuffleVectorPseudo &MatchInfo) { 290 assert(Lane >= 0 && "Expected positive lane?"); 291 // Test if the LHS is a BUILD_VECTOR. If it is, then we can just reference the 292 // lane's definition directly. 293 auto *BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, 294 MI.getOperand(1).getReg(), MRI); 295 if (!BuildVecMI) 296 return false; 297 Register Reg = BuildVecMI->getOperand(Lane + 1).getReg(); 298 MatchInfo = 299 ShuffleVectorPseudo(AArch64::G_DUP, MI.getOperand(0).getReg(), {Reg}); 300 return true; 301 } 302 303 bool matchDup(MachineInstr &MI, MachineRegisterInfo &MRI, 304 ShuffleVectorPseudo &MatchInfo) { 305 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 306 auto MaybeLane = getSplatIndex(MI); 307 if (!MaybeLane) 308 return false; 309 int Lane = *MaybeLane; 310 // If this is undef splat, generate it via "just" vdup, if possible. 311 if (Lane < 0) 312 Lane = 0; 313 if (matchDupFromInsertVectorElt(Lane, MI, MRI, MatchInfo)) 314 return true; 315 if (matchDupFromBuildVector(Lane, MI, MRI, MatchInfo)) 316 return true; 317 return false; 318 } 319 320 // Check if an EXT instruction can handle the shuffle mask when the vector 321 // sources of the shuffle are the same. 322 bool isSingletonExtMask(ArrayRef<int> M, LLT Ty) { 323 unsigned NumElts = Ty.getNumElements(); 324 325 // Assume that the first shuffle index is not UNDEF. Fail if it is. 326 if (M[0] < 0) 327 return false; 328 329 // If this is a VEXT shuffle, the immediate value is the index of the first 330 // element. The other shuffle indices must be the successive elements after 331 // the first one. 332 unsigned ExpectedElt = M[0]; 333 for (unsigned I = 1; I < NumElts; ++I) { 334 // Increment the expected index. If it wraps around, just follow it 335 // back to index zero and keep going. 336 ++ExpectedElt; 337 if (ExpectedElt == NumElts) 338 ExpectedElt = 0; 339 340 if (M[I] < 0) 341 continue; // Ignore UNDEF indices. 342 if (ExpectedElt != static_cast<unsigned>(M[I])) 343 return false; 344 } 345 346 return true; 347 } 348 349 bool matchEXT(MachineInstr &MI, MachineRegisterInfo &MRI, 350 ShuffleVectorPseudo &MatchInfo) { 351 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 352 Register Dst = MI.getOperand(0).getReg(); 353 LLT DstTy = MRI.getType(Dst); 354 Register V1 = MI.getOperand(1).getReg(); 355 Register V2 = MI.getOperand(2).getReg(); 356 auto Mask = MI.getOperand(3).getShuffleMask(); 357 uint64_t Imm; 358 auto ExtInfo = getExtMask(Mask, DstTy.getNumElements()); 359 uint64_t ExtFactor = MRI.getType(V1).getScalarSizeInBits() / 8; 360 361 if (!ExtInfo) { 362 if (!getOpcodeDef<GImplicitDef>(V2, MRI) || 363 !isSingletonExtMask(Mask, DstTy)) 364 return false; 365 366 Imm = Mask[0] * ExtFactor; 367 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V1, Imm}); 368 return true; 369 } 370 bool ReverseExt; 371 std::tie(ReverseExt, Imm) = *ExtInfo; 372 if (ReverseExt) 373 std::swap(V1, V2); 374 Imm *= ExtFactor; 375 MatchInfo = ShuffleVectorPseudo(AArch64::G_EXT, Dst, {V1, V2, Imm}); 376 return true; 377 } 378 379 /// Replace a G_SHUFFLE_VECTOR instruction with a pseudo. 380 /// \p Opc is the opcode to use. \p MI is the G_SHUFFLE_VECTOR. 381 void applyShuffleVectorPseudo(MachineInstr &MI, 382 ShuffleVectorPseudo &MatchInfo) { 383 MachineIRBuilder MIRBuilder(MI); 384 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, MatchInfo.SrcOps); 385 MI.eraseFromParent(); 386 } 387 388 /// Replace a G_SHUFFLE_VECTOR instruction with G_EXT. 389 /// Special-cased because the constant operand must be emitted as a G_CONSTANT 390 /// for the imported tablegen patterns to work. 391 void applyEXT(MachineInstr &MI, ShuffleVectorPseudo &MatchInfo) { 392 MachineIRBuilder MIRBuilder(MI); 393 if (MatchInfo.SrcOps[2].getImm() == 0) 394 MIRBuilder.buildCopy(MatchInfo.Dst, MatchInfo.SrcOps[0]); 395 else { 396 // Tablegen patterns expect an i32 G_CONSTANT as the final op. 397 auto Cst = 398 MIRBuilder.buildConstant(LLT::scalar(32), MatchInfo.SrcOps[2].getImm()); 399 MIRBuilder.buildInstr(MatchInfo.Opc, {MatchInfo.Dst}, 400 {MatchInfo.SrcOps[0], MatchInfo.SrcOps[1], Cst}); 401 } 402 MI.eraseFromParent(); 403 } 404 405 bool matchNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI) { 406 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT); 407 408 auto ValAndVReg = 409 getIConstantVRegValWithLookThrough(MI.getOperand(3).getReg(), MRI); 410 return !ValAndVReg; 411 } 412 413 void applyNonConstInsert(MachineInstr &MI, MachineRegisterInfo &MRI, 414 MachineIRBuilder &Builder) { 415 auto &Insert = cast<GInsertVectorElement>(MI); 416 Builder.setInstrAndDebugLoc(Insert); 417 418 Register Offset = Insert.getIndexReg(); 419 LLT VecTy = MRI.getType(Insert.getReg(0)); 420 LLT EltTy = MRI.getType(Insert.getElementReg()); 421 LLT IdxTy = MRI.getType(Insert.getIndexReg()); 422 423 // Create a stack slot and store the vector into it 424 MachineFunction &MF = Builder.getMF(); 425 Align Alignment( 426 std::min<uint64_t>(VecTy.getSizeInBytes().getKnownMinValue(), 16)); 427 int FrameIdx = MF.getFrameInfo().CreateStackObject(VecTy.getSizeInBytes(), 428 Alignment, false); 429 LLT FramePtrTy = LLT::pointer(0, 64); 430 MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx); 431 auto StackTemp = Builder.buildFrameIndex(FramePtrTy, FrameIdx); 432 433 Builder.buildStore(Insert.getOperand(1), StackTemp, PtrInfo, Align(8)); 434 435 // Get the pointer to the element, and be sure not to hit undefined behavior 436 // if the index is out of bounds. 437 assert(isPowerOf2_64(VecTy.getNumElements()) && 438 "Expected a power-2 vector size"); 439 auto Mask = Builder.buildConstant(IdxTy, VecTy.getNumElements() - 1); 440 Register And = Builder.buildAnd(IdxTy, Offset, Mask).getReg(0); 441 auto EltSize = Builder.buildConstant(IdxTy, EltTy.getSizeInBytes()); 442 Register Mul = Builder.buildMul(IdxTy, And, EltSize).getReg(0); 443 Register EltPtr = 444 Builder.buildPtrAdd(MRI.getType(StackTemp.getReg(0)), StackTemp, Mul) 445 .getReg(0); 446 447 // Write the inserted element 448 Builder.buildStore(Insert.getElementReg(), EltPtr, PtrInfo, Align(1)); 449 // Reload the whole vector. 450 Builder.buildLoad(Insert.getReg(0), StackTemp, PtrInfo, Align(8)); 451 Insert.eraseFromParent(); 452 } 453 454 /// Match a G_SHUFFLE_VECTOR with a mask which corresponds to a 455 /// G_INSERT_VECTOR_ELT and G_EXTRACT_VECTOR_ELT pair. 456 /// 457 /// e.g. 458 /// %shuf = G_SHUFFLE_VECTOR %left, %right, shufflemask(0, 0) 459 /// 460 /// Can be represented as 461 /// 462 /// %extract = G_EXTRACT_VECTOR_ELT %left, 0 463 /// %ins = G_INSERT_VECTOR_ELT %left, %extract, 1 464 /// 465 bool matchINS(MachineInstr &MI, MachineRegisterInfo &MRI, 466 std::tuple<Register, int, Register, int> &MatchInfo) { 467 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 468 ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask(); 469 Register Dst = MI.getOperand(0).getReg(); 470 int NumElts = MRI.getType(Dst).getNumElements(); 471 auto DstIsLeftAndDstLane = isINSMask(ShuffleMask, NumElts); 472 if (!DstIsLeftAndDstLane) 473 return false; 474 bool DstIsLeft; 475 int DstLane; 476 std::tie(DstIsLeft, DstLane) = *DstIsLeftAndDstLane; 477 Register Left = MI.getOperand(1).getReg(); 478 Register Right = MI.getOperand(2).getReg(); 479 Register DstVec = DstIsLeft ? Left : Right; 480 Register SrcVec = Left; 481 482 int SrcLane = ShuffleMask[DstLane]; 483 if (SrcLane >= NumElts) { 484 SrcVec = Right; 485 SrcLane -= NumElts; 486 } 487 488 MatchInfo = std::make_tuple(DstVec, DstLane, SrcVec, SrcLane); 489 return true; 490 } 491 492 void applyINS(MachineInstr &MI, MachineRegisterInfo &MRI, 493 MachineIRBuilder &Builder, 494 std::tuple<Register, int, Register, int> &MatchInfo) { 495 Builder.setInstrAndDebugLoc(MI); 496 Register Dst = MI.getOperand(0).getReg(); 497 auto ScalarTy = MRI.getType(Dst).getElementType(); 498 Register DstVec, SrcVec; 499 int DstLane, SrcLane; 500 std::tie(DstVec, DstLane, SrcVec, SrcLane) = MatchInfo; 501 auto SrcCst = Builder.buildConstant(LLT::scalar(64), SrcLane); 502 auto Extract = Builder.buildExtractVectorElement(ScalarTy, SrcVec, SrcCst); 503 auto DstCst = Builder.buildConstant(LLT::scalar(64), DstLane); 504 Builder.buildInsertVectorElement(Dst, DstVec, Extract, DstCst); 505 MI.eraseFromParent(); 506 } 507 508 /// isVShiftRImm - Check if this is a valid vector for the immediate 509 /// operand of a vector shift right operation. The value must be in the range: 510 /// 1 <= Value <= ElementBits for a right shift. 511 bool isVShiftRImm(Register Reg, MachineRegisterInfo &MRI, LLT Ty, 512 int64_t &Cnt) { 513 assert(Ty.isVector() && "vector shift count is not a vector type"); 514 MachineInstr *MI = MRI.getVRegDef(Reg); 515 auto Cst = getAArch64VectorSplatScalar(*MI, MRI); 516 if (!Cst) 517 return false; 518 Cnt = *Cst; 519 int64_t ElementBits = Ty.getScalarSizeInBits(); 520 return Cnt >= 1 && Cnt <= ElementBits; 521 } 522 523 /// Match a vector G_ASHR or G_LSHR with a valid immediate shift. 524 bool matchVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 525 int64_t &Imm) { 526 assert(MI.getOpcode() == TargetOpcode::G_ASHR || 527 MI.getOpcode() == TargetOpcode::G_LSHR); 528 LLT Ty = MRI.getType(MI.getOperand(1).getReg()); 529 if (!Ty.isVector()) 530 return false; 531 return isVShiftRImm(MI.getOperand(2).getReg(), MRI, Ty, Imm); 532 } 533 534 void applyVAshrLshrImm(MachineInstr &MI, MachineRegisterInfo &MRI, 535 int64_t &Imm) { 536 unsigned Opc = MI.getOpcode(); 537 assert(Opc == TargetOpcode::G_ASHR || Opc == TargetOpcode::G_LSHR); 538 unsigned NewOpc = 539 Opc == TargetOpcode::G_ASHR ? AArch64::G_VASHR : AArch64::G_VLSHR; 540 MachineIRBuilder MIB(MI); 541 auto ImmDef = MIB.buildConstant(LLT::scalar(32), Imm); 542 MIB.buildInstr(NewOpc, {MI.getOperand(0)}, {MI.getOperand(1), ImmDef}); 543 MI.eraseFromParent(); 544 } 545 546 /// Determine if it is possible to modify the \p RHS and predicate \p P of a 547 /// G_ICMP instruction such that the right-hand side is an arithmetic immediate. 548 /// 549 /// \returns A pair containing the updated immediate and predicate which may 550 /// be used to optimize the instruction. 551 /// 552 /// \note This assumes that the comparison has been legalized. 553 std::optional<std::pair<uint64_t, CmpInst::Predicate>> 554 tryAdjustICmpImmAndPred(Register RHS, CmpInst::Predicate P, 555 const MachineRegisterInfo &MRI) { 556 const auto &Ty = MRI.getType(RHS); 557 if (Ty.isVector()) 558 return std::nullopt; 559 unsigned Size = Ty.getSizeInBits(); 560 assert((Size == 32 || Size == 64) && "Expected 32 or 64 bit compare only?"); 561 562 // If the RHS is not a constant, or the RHS is already a valid arithmetic 563 // immediate, then there is nothing to change. 564 auto ValAndVReg = getIConstantVRegValWithLookThrough(RHS, MRI); 565 if (!ValAndVReg) 566 return std::nullopt; 567 uint64_t OriginalC = ValAndVReg->Value.getZExtValue(); 568 uint64_t C = OriginalC; 569 if (isLegalArithImmed(C)) 570 return std::nullopt; 571 572 // We have a non-arithmetic immediate. Check if adjusting the immediate and 573 // adjusting the predicate will result in a legal arithmetic immediate. 574 switch (P) { 575 default: 576 return std::nullopt; 577 case CmpInst::ICMP_SLT: 578 case CmpInst::ICMP_SGE: 579 // Check for 580 // 581 // x slt c => x sle c - 1 582 // x sge c => x sgt c - 1 583 // 584 // When c is not the smallest possible negative number. 585 if ((Size == 64 && static_cast<int64_t>(C) == INT64_MIN) || 586 (Size == 32 && static_cast<int32_t>(C) == INT32_MIN)) 587 return std::nullopt; 588 P = (P == CmpInst::ICMP_SLT) ? CmpInst::ICMP_SLE : CmpInst::ICMP_SGT; 589 C -= 1; 590 break; 591 case CmpInst::ICMP_ULT: 592 case CmpInst::ICMP_UGE: 593 // Check for 594 // 595 // x ult c => x ule c - 1 596 // x uge c => x ugt c - 1 597 // 598 // When c is not zero. 599 if (C == 0) 600 return std::nullopt; 601 P = (P == CmpInst::ICMP_ULT) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; 602 C -= 1; 603 break; 604 case CmpInst::ICMP_SLE: 605 case CmpInst::ICMP_SGT: 606 // Check for 607 // 608 // x sle c => x slt c + 1 609 // x sgt c => s sge c + 1 610 // 611 // When c is not the largest possible signed integer. 612 if ((Size == 32 && static_cast<int32_t>(C) == INT32_MAX) || 613 (Size == 64 && static_cast<int64_t>(C) == INT64_MAX)) 614 return std::nullopt; 615 P = (P == CmpInst::ICMP_SLE) ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGE; 616 C += 1; 617 break; 618 case CmpInst::ICMP_ULE: 619 case CmpInst::ICMP_UGT: 620 // Check for 621 // 622 // x ule c => x ult c + 1 623 // x ugt c => s uge c + 1 624 // 625 // When c is not the largest possible unsigned integer. 626 if ((Size == 32 && static_cast<uint32_t>(C) == UINT32_MAX) || 627 (Size == 64 && C == UINT64_MAX)) 628 return std::nullopt; 629 P = (P == CmpInst::ICMP_ULE) ? CmpInst::ICMP_ULT : CmpInst::ICMP_UGE; 630 C += 1; 631 break; 632 } 633 634 // Check if the new constant is valid, and return the updated constant and 635 // predicate if it is. 636 if (Size == 32) 637 C = static_cast<uint32_t>(C); 638 if (isLegalArithImmed(C)) 639 return {{C, P}}; 640 641 auto IsMaterializableInSingleInstruction = [=](uint64_t Imm) { 642 SmallVector<AArch64_IMM::ImmInsnModel> Insn; 643 AArch64_IMM::expandMOVImm(Imm, 32, Insn); 644 return Insn.size() == 1; 645 }; 646 647 if (!IsMaterializableInSingleInstruction(OriginalC) && 648 IsMaterializableInSingleInstruction(C)) 649 return {{C, P}}; 650 651 return std::nullopt; 652 } 653 654 /// Determine whether or not it is possible to update the RHS and predicate of 655 /// a G_ICMP instruction such that the RHS will be selected as an arithmetic 656 /// immediate. 657 /// 658 /// \p MI - The G_ICMP instruction 659 /// \p MatchInfo - The new RHS immediate and predicate on success 660 /// 661 /// See tryAdjustICmpImmAndPred for valid transformations. 662 bool matchAdjustICmpImmAndPred( 663 MachineInstr &MI, const MachineRegisterInfo &MRI, 664 std::pair<uint64_t, CmpInst::Predicate> &MatchInfo) { 665 assert(MI.getOpcode() == TargetOpcode::G_ICMP); 666 Register RHS = MI.getOperand(3).getReg(); 667 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 668 if (auto MaybeNewImmAndPred = tryAdjustICmpImmAndPred(RHS, Pred, MRI)) { 669 MatchInfo = *MaybeNewImmAndPred; 670 return true; 671 } 672 return false; 673 } 674 675 void applyAdjustICmpImmAndPred( 676 MachineInstr &MI, std::pair<uint64_t, CmpInst::Predicate> &MatchInfo, 677 MachineIRBuilder &MIB, GISelChangeObserver &Observer) { 678 MIB.setInstrAndDebugLoc(MI); 679 MachineOperand &RHS = MI.getOperand(3); 680 MachineRegisterInfo &MRI = *MIB.getMRI(); 681 auto Cst = MIB.buildConstant(MRI.cloneVirtualRegister(RHS.getReg()), 682 MatchInfo.first); 683 Observer.changingInstr(MI); 684 RHS.setReg(Cst->getOperand(0).getReg()); 685 MI.getOperand(1).setPredicate(MatchInfo.second); 686 Observer.changedInstr(MI); 687 } 688 689 bool matchDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 690 std::pair<unsigned, int> &MatchInfo) { 691 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 692 Register Src1Reg = MI.getOperand(1).getReg(); 693 const LLT SrcTy = MRI.getType(Src1Reg); 694 const LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 695 696 auto LaneIdx = getSplatIndex(MI); 697 if (!LaneIdx) 698 return false; 699 700 // The lane idx should be within the first source vector. 701 if (*LaneIdx >= SrcTy.getNumElements()) 702 return false; 703 704 if (DstTy != SrcTy) 705 return false; 706 707 LLT ScalarTy = SrcTy.getElementType(); 708 unsigned ScalarSize = ScalarTy.getSizeInBits(); 709 710 unsigned Opc = 0; 711 switch (SrcTy.getNumElements()) { 712 case 2: 713 if (ScalarSize == 64) 714 Opc = AArch64::G_DUPLANE64; 715 else if (ScalarSize == 32) 716 Opc = AArch64::G_DUPLANE32; 717 break; 718 case 4: 719 if (ScalarSize == 32) 720 Opc = AArch64::G_DUPLANE32; 721 else if (ScalarSize == 16) 722 Opc = AArch64::G_DUPLANE16; 723 break; 724 case 8: 725 if (ScalarSize == 8) 726 Opc = AArch64::G_DUPLANE8; 727 else if (ScalarSize == 16) 728 Opc = AArch64::G_DUPLANE16; 729 break; 730 case 16: 731 if (ScalarSize == 8) 732 Opc = AArch64::G_DUPLANE8; 733 break; 734 default: 735 break; 736 } 737 if (!Opc) 738 return false; 739 740 MatchInfo.first = Opc; 741 MatchInfo.second = *LaneIdx; 742 return true; 743 } 744 745 void applyDupLane(MachineInstr &MI, MachineRegisterInfo &MRI, 746 MachineIRBuilder &B, std::pair<unsigned, int> &MatchInfo) { 747 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR); 748 Register Src1Reg = MI.getOperand(1).getReg(); 749 const LLT SrcTy = MRI.getType(Src1Reg); 750 751 B.setInstrAndDebugLoc(MI); 752 auto Lane = B.buildConstant(LLT::scalar(64), MatchInfo.second); 753 754 Register DupSrc = MI.getOperand(1).getReg(); 755 // For types like <2 x s32>, we can use G_DUPLANE32, with a <4 x s32> source. 756 // To do this, we can use a G_CONCAT_VECTORS to do the widening. 757 if (SrcTy.getSizeInBits() == 64) { 758 auto Undef = B.buildUndef(SrcTy); 759 DupSrc = B.buildConcatVectors(SrcTy.multiplyElements(2), 760 {Src1Reg, Undef.getReg(0)}) 761 .getReg(0); 762 } 763 B.buildInstr(MatchInfo.first, {MI.getOperand(0).getReg()}, {DupSrc, Lane}); 764 MI.eraseFromParent(); 765 } 766 767 bool matchScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI) { 768 auto &Unmerge = cast<GUnmerge>(MI); 769 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1); 770 const LLT SrcTy = MRI.getType(Src1Reg); 771 if (SrcTy.getSizeInBits() != 128 && SrcTy.getSizeInBits() != 64) 772 return false; 773 return SrcTy.isVector() && !SrcTy.isScalable() && 774 Unmerge.getNumOperands() == (unsigned)SrcTy.getNumElements() + 1; 775 } 776 777 void applyScalarizeVectorUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, 778 MachineIRBuilder &B) { 779 auto &Unmerge = cast<GUnmerge>(MI); 780 Register Src1Reg = Unmerge.getReg(Unmerge.getNumOperands() - 1); 781 const LLT SrcTy = MRI.getType(Src1Reg); 782 assert((SrcTy.isVector() && !SrcTy.isScalable()) && 783 "Expected a fixed length vector"); 784 785 for (int I = 0; I < SrcTy.getNumElements(); ++I) 786 B.buildExtractVectorElementConstant(Unmerge.getReg(I), Src1Reg, I); 787 MI.eraseFromParent(); 788 } 789 790 bool matchBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI) { 791 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR); 792 auto Splat = getAArch64VectorSplat(MI, MRI); 793 if (!Splat) 794 return false; 795 if (Splat->isReg()) 796 return true; 797 // Later, during selection, we'll try to match imported patterns using 798 // immAllOnesV and immAllZerosV. These require G_BUILD_VECTOR. Don't lower 799 // G_BUILD_VECTORs which could match those patterns. 800 int64_t Cst = Splat->getCst(); 801 return (Cst != 0 && Cst != -1); 802 } 803 804 void applyBuildVectorToDup(MachineInstr &MI, MachineRegisterInfo &MRI, 805 MachineIRBuilder &B) { 806 B.setInstrAndDebugLoc(MI); 807 B.buildInstr(AArch64::G_DUP, {MI.getOperand(0).getReg()}, 808 {MI.getOperand(1).getReg()}); 809 MI.eraseFromParent(); 810 } 811 812 /// \returns how many instructions would be saved by folding a G_ICMP's shift 813 /// and/or extension operations. 814 unsigned getCmpOperandFoldingProfit(Register CmpOp, MachineRegisterInfo &MRI) { 815 // No instructions to save if there's more than one use or no uses. 816 if (!MRI.hasOneNonDBGUse(CmpOp)) 817 return 0; 818 819 // FIXME: This is duplicated with the selector. (See: selectShiftedRegister) 820 auto IsSupportedExtend = [&](const MachineInstr &MI) { 821 if (MI.getOpcode() == TargetOpcode::G_SEXT_INREG) 822 return true; 823 if (MI.getOpcode() != TargetOpcode::G_AND) 824 return false; 825 auto ValAndVReg = 826 getIConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI); 827 if (!ValAndVReg) 828 return false; 829 uint64_t Mask = ValAndVReg->Value.getZExtValue(); 830 return (Mask == 0xFF || Mask == 0xFFFF || Mask == 0xFFFFFFFF); 831 }; 832 833 MachineInstr *Def = getDefIgnoringCopies(CmpOp, MRI); 834 if (IsSupportedExtend(*Def)) 835 return 1; 836 837 unsigned Opc = Def->getOpcode(); 838 if (Opc != TargetOpcode::G_SHL && Opc != TargetOpcode::G_ASHR && 839 Opc != TargetOpcode::G_LSHR) 840 return 0; 841 842 auto MaybeShiftAmt = 843 getIConstantVRegValWithLookThrough(Def->getOperand(2).getReg(), MRI); 844 if (!MaybeShiftAmt) 845 return 0; 846 uint64_t ShiftAmt = MaybeShiftAmt->Value.getZExtValue(); 847 MachineInstr *ShiftLHS = 848 getDefIgnoringCopies(Def->getOperand(1).getReg(), MRI); 849 850 // Check if we can fold an extend and a shift. 851 // FIXME: This is duplicated with the selector. (See: 852 // selectArithExtendedRegister) 853 if (IsSupportedExtend(*ShiftLHS)) 854 return (ShiftAmt <= 4) ? 2 : 1; 855 856 LLT Ty = MRI.getType(Def->getOperand(0).getReg()); 857 if (Ty.isVector()) 858 return 0; 859 unsigned ShiftSize = Ty.getSizeInBits(); 860 if ((ShiftSize == 32 && ShiftAmt <= 31) || 861 (ShiftSize == 64 && ShiftAmt <= 63)) 862 return 1; 863 return 0; 864 } 865 866 /// \returns true if it would be profitable to swap the LHS and RHS of a G_ICMP 867 /// instruction \p MI. 868 bool trySwapICmpOperands(MachineInstr &MI, MachineRegisterInfo &MRI) { 869 assert(MI.getOpcode() == TargetOpcode::G_ICMP); 870 // Swap the operands if it would introduce a profitable folding opportunity. 871 // (e.g. a shift + extend). 872 // 873 // For example: 874 // lsl w13, w11, #1 875 // cmp w13, w12 876 // can be turned into: 877 // cmp w12, w11, lsl #1 878 879 // Don't swap if there's a constant on the RHS, because we know we can fold 880 // that. 881 Register RHS = MI.getOperand(3).getReg(); 882 auto RHSCst = getIConstantVRegValWithLookThrough(RHS, MRI); 883 if (RHSCst && isLegalArithImmed(RHSCst->Value.getSExtValue())) 884 return false; 885 886 Register LHS = MI.getOperand(2).getReg(); 887 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 888 auto GetRegForProfit = [&](Register Reg) { 889 MachineInstr *Def = getDefIgnoringCopies(Reg, MRI); 890 return isCMN(Def, Pred, MRI) ? Def->getOperand(2).getReg() : Reg; 891 }; 892 893 // Don't have a constant on the RHS. If we swap the LHS and RHS of the 894 // compare, would we be able to fold more instructions? 895 Register TheLHS = GetRegForProfit(LHS); 896 Register TheRHS = GetRegForProfit(RHS); 897 898 // If the LHS is more likely to give us a folding opportunity, then swap the 899 // LHS and RHS. 900 return (getCmpOperandFoldingProfit(TheLHS, MRI) > 901 getCmpOperandFoldingProfit(TheRHS, MRI)); 902 } 903 904 void applySwapICmpOperands(MachineInstr &MI, GISelChangeObserver &Observer) { 905 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate()); 906 Register LHS = MI.getOperand(2).getReg(); 907 Register RHS = MI.getOperand(3).getReg(); 908 Observer.changedInstr(MI); 909 MI.getOperand(1).setPredicate(CmpInst::getSwappedPredicate(Pred)); 910 MI.getOperand(2).setReg(RHS); 911 MI.getOperand(3).setReg(LHS); 912 Observer.changedInstr(MI); 913 } 914 915 /// \returns a function which builds a vector floating point compare instruction 916 /// for a condition code \p CC. 917 /// \param [in] IsZero - True if the comparison is against 0. 918 /// \param [in] NoNans - True if the target has NoNansFPMath. 919 std::function<Register(MachineIRBuilder &)> 920 getVectorFCMP(AArch64CC::CondCode CC, Register LHS, Register RHS, bool IsZero, 921 bool NoNans, MachineRegisterInfo &MRI) { 922 LLT DstTy = MRI.getType(LHS); 923 assert(DstTy.isVector() && "Expected vector types only?"); 924 assert(DstTy == MRI.getType(RHS) && "Src and Dst types must match!"); 925 switch (CC) { 926 default: 927 llvm_unreachable("Unexpected condition code!"); 928 case AArch64CC::NE: 929 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 930 auto FCmp = IsZero 931 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}) 932 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}); 933 return MIB.buildNot(DstTy, FCmp).getReg(0); 934 }; 935 case AArch64CC::EQ: 936 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 937 return IsZero 938 ? MIB.buildInstr(AArch64::G_FCMEQZ, {DstTy}, {LHS}).getReg(0) 939 : MIB.buildInstr(AArch64::G_FCMEQ, {DstTy}, {LHS, RHS}) 940 .getReg(0); 941 }; 942 case AArch64CC::GE: 943 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 944 return IsZero 945 ? MIB.buildInstr(AArch64::G_FCMGEZ, {DstTy}, {LHS}).getReg(0) 946 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {LHS, RHS}) 947 .getReg(0); 948 }; 949 case AArch64CC::GT: 950 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 951 return IsZero 952 ? MIB.buildInstr(AArch64::G_FCMGTZ, {DstTy}, {LHS}).getReg(0) 953 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {LHS, RHS}) 954 .getReg(0); 955 }; 956 case AArch64CC::LS: 957 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 958 return IsZero 959 ? MIB.buildInstr(AArch64::G_FCMLEZ, {DstTy}, {LHS}).getReg(0) 960 : MIB.buildInstr(AArch64::G_FCMGE, {DstTy}, {RHS, LHS}) 961 .getReg(0); 962 }; 963 case AArch64CC::MI: 964 return [LHS, RHS, IsZero, DstTy](MachineIRBuilder &MIB) { 965 return IsZero 966 ? MIB.buildInstr(AArch64::G_FCMLTZ, {DstTy}, {LHS}).getReg(0) 967 : MIB.buildInstr(AArch64::G_FCMGT, {DstTy}, {RHS, LHS}) 968 .getReg(0); 969 }; 970 } 971 } 972 973 /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo. 974 bool matchLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI, 975 MachineIRBuilder &MIB) { 976 assert(MI.getOpcode() == TargetOpcode::G_FCMP); 977 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>(); 978 979 Register Dst = MI.getOperand(0).getReg(); 980 LLT DstTy = MRI.getType(Dst); 981 if (!DstTy.isVector() || !ST.hasNEON()) 982 return false; 983 Register LHS = MI.getOperand(2).getReg(); 984 unsigned EltSize = MRI.getType(LHS).getScalarSizeInBits(); 985 if (EltSize == 16 && !ST.hasFullFP16()) 986 return false; 987 if (EltSize != 16 && EltSize != 32 && EltSize != 64) 988 return false; 989 990 return true; 991 } 992 993 /// Try to lower a vector G_FCMP \p MI into an AArch64-specific pseudo. 994 void applyLowerVectorFCMP(MachineInstr &MI, MachineRegisterInfo &MRI, 995 MachineIRBuilder &MIB) { 996 assert(MI.getOpcode() == TargetOpcode::G_FCMP); 997 const auto &ST = MI.getMF()->getSubtarget<AArch64Subtarget>(); 998 999 const auto &CmpMI = cast<GFCmp>(MI); 1000 1001 Register Dst = CmpMI.getReg(0); 1002 CmpInst::Predicate Pred = CmpMI.getCond(); 1003 Register LHS = CmpMI.getLHSReg(); 1004 Register RHS = CmpMI.getRHSReg(); 1005 1006 LLT DstTy = MRI.getType(Dst); 1007 1008 auto Splat = getAArch64VectorSplat(*MRI.getVRegDef(RHS), MRI); 1009 1010 // Compares against 0 have special target-specific pseudos. 1011 bool IsZero = Splat && Splat->isCst() && Splat->getCst() == 0; 1012 1013 bool Invert = false; 1014 AArch64CC::CondCode CC, CC2 = AArch64CC::AL; 1015 if ((Pred == CmpInst::Predicate::FCMP_ORD || 1016 Pred == CmpInst::Predicate::FCMP_UNO) && 1017 IsZero) { 1018 // The special case "fcmp ord %a, 0" is the canonical check that LHS isn't 1019 // NaN, so equivalent to a == a and doesn't need the two comparisons an 1020 // "ord" normally would. 1021 // Similarly, "fcmp uno %a, 0" is the canonical check that LHS is NaN and is 1022 // thus equivalent to a != a. 1023 RHS = LHS; 1024 IsZero = false; 1025 CC = Pred == CmpInst::Predicate::FCMP_ORD ? AArch64CC::EQ : AArch64CC::NE; 1026 } else 1027 changeVectorFCMPPredToAArch64CC(Pred, CC, CC2, Invert); 1028 1029 // Instead of having an apply function, just build here to simplify things. 1030 MIB.setInstrAndDebugLoc(MI); 1031 1032 const bool NoNans = 1033 ST.getTargetLowering()->getTargetMachine().Options.NoNaNsFPMath; 1034 1035 auto Cmp = getVectorFCMP(CC, LHS, RHS, IsZero, NoNans, MRI); 1036 Register CmpRes; 1037 if (CC2 == AArch64CC::AL) 1038 CmpRes = Cmp(MIB); 1039 else { 1040 auto Cmp2 = getVectorFCMP(CC2, LHS, RHS, IsZero, NoNans, MRI); 1041 auto Cmp2Dst = Cmp2(MIB); 1042 auto Cmp1Dst = Cmp(MIB); 1043 CmpRes = MIB.buildOr(DstTy, Cmp1Dst, Cmp2Dst).getReg(0); 1044 } 1045 if (Invert) 1046 CmpRes = MIB.buildNot(DstTy, CmpRes).getReg(0); 1047 MRI.replaceRegWith(Dst, CmpRes); 1048 MI.eraseFromParent(); 1049 } 1050 1051 bool matchFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, 1052 Register &SrcReg) { 1053 assert(MI.getOpcode() == TargetOpcode::G_STORE); 1054 Register DstReg = MI.getOperand(0).getReg(); 1055 if (MRI.getType(DstReg).isVector()) 1056 return false; 1057 // Match a store of a truncate. 1058 if (!mi_match(DstReg, MRI, m_GTrunc(m_Reg(SrcReg)))) 1059 return false; 1060 // Only form truncstores for value types of max 64b. 1061 return MRI.getType(SrcReg).getSizeInBits() <= 64; 1062 } 1063 1064 void applyFormTruncstore(MachineInstr &MI, MachineRegisterInfo &MRI, 1065 MachineIRBuilder &B, GISelChangeObserver &Observer, 1066 Register &SrcReg) { 1067 assert(MI.getOpcode() == TargetOpcode::G_STORE); 1068 Observer.changingInstr(MI); 1069 MI.getOperand(0).setReg(SrcReg); 1070 Observer.changedInstr(MI); 1071 } 1072 1073 // Lower vector G_SEXT_INREG back to shifts for selection. We allowed them to 1074 // form in the first place for combine opportunities, so any remaining ones 1075 // at this stage need be lowered back. 1076 bool matchVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI) { 1077 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 1078 Register DstReg = MI.getOperand(0).getReg(); 1079 LLT DstTy = MRI.getType(DstReg); 1080 return DstTy.isVector(); 1081 } 1082 1083 void applyVectorSextInReg(MachineInstr &MI, MachineRegisterInfo &MRI, 1084 MachineIRBuilder &B, GISelChangeObserver &Observer) { 1085 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG); 1086 B.setInstrAndDebugLoc(MI); 1087 LegalizerHelper Helper(*MI.getMF(), Observer, B); 1088 Helper.lower(MI, 0, /* Unused hint type */ LLT()); 1089 } 1090 1091 /// Combine <N x t>, unused = unmerge(G_EXT <2*N x t> v, undef, N) 1092 /// => unused, <N x t> = unmerge v 1093 bool matchUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, 1094 Register &MatchInfo) { 1095 auto &Unmerge = cast<GUnmerge>(MI); 1096 if (Unmerge.getNumDefs() != 2) 1097 return false; 1098 if (!MRI.use_nodbg_empty(Unmerge.getReg(1))) 1099 return false; 1100 1101 LLT DstTy = MRI.getType(Unmerge.getReg(0)); 1102 if (!DstTy.isVector()) 1103 return false; 1104 1105 MachineInstr *Ext = getOpcodeDef(AArch64::G_EXT, Unmerge.getSourceReg(), MRI); 1106 if (!Ext) 1107 return false; 1108 1109 Register ExtSrc1 = Ext->getOperand(1).getReg(); 1110 Register ExtSrc2 = Ext->getOperand(2).getReg(); 1111 auto LowestVal = 1112 getIConstantVRegValWithLookThrough(Ext->getOperand(3).getReg(), MRI); 1113 if (!LowestVal || LowestVal->Value.getZExtValue() != DstTy.getSizeInBytes()) 1114 return false; 1115 1116 if (!getOpcodeDef<GImplicitDef>(ExtSrc2, MRI)) 1117 return false; 1118 1119 MatchInfo = ExtSrc1; 1120 return true; 1121 } 1122 1123 void applyUnmergeExtToUnmerge(MachineInstr &MI, MachineRegisterInfo &MRI, 1124 MachineIRBuilder &B, 1125 GISelChangeObserver &Observer, Register &SrcReg) { 1126 Observer.changingInstr(MI); 1127 // Swap dst registers. 1128 Register Dst1 = MI.getOperand(0).getReg(); 1129 MI.getOperand(0).setReg(MI.getOperand(1).getReg()); 1130 MI.getOperand(1).setReg(Dst1); 1131 MI.getOperand(2).setReg(SrcReg); 1132 Observer.changedInstr(MI); 1133 } 1134 1135 // Match mul({z/s}ext , {z/s}ext) => {u/s}mull OR 1136 // Match v2s64 mul instructions, which will then be scalarised later on 1137 // Doing these two matches in one function to ensure that the order of matching 1138 // will always be the same. 1139 // Try lowering MUL to MULL before trying to scalarize if needed. 1140 bool matchExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI) { 1141 // Get the instructions that defined the source operand 1142 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 1143 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); 1144 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI); 1145 1146 if (DstTy.isVector()) { 1147 // If the source operands were EXTENDED before, then {U/S}MULL can be used 1148 unsigned I1Opc = I1->getOpcode(); 1149 unsigned I2Opc = I2->getOpcode(); 1150 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) || 1151 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) && 1152 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() == 1153 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) && 1154 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() == 1155 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) { 1156 return true; 1157 } 1158 // If result type is v2s64, scalarise the instruction 1159 else if (DstTy == LLT::fixed_vector(2, 64)) { 1160 return true; 1161 } 1162 } 1163 return false; 1164 } 1165 1166 void applyExtMulToMULL(MachineInstr &MI, MachineRegisterInfo &MRI, 1167 MachineIRBuilder &B, GISelChangeObserver &Observer) { 1168 assert(MI.getOpcode() == TargetOpcode::G_MUL && 1169 "Expected a G_MUL instruction"); 1170 1171 // Get the instructions that defined the source operand 1172 LLT DstTy = MRI.getType(MI.getOperand(0).getReg()); 1173 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI); 1174 MachineInstr *I2 = getDefIgnoringCopies(MI.getOperand(2).getReg(), MRI); 1175 1176 // If the source operands were EXTENDED before, then {U/S}MULL can be used 1177 unsigned I1Opc = I1->getOpcode(); 1178 unsigned I2Opc = I2->getOpcode(); 1179 if (((I1Opc == TargetOpcode::G_ZEXT && I2Opc == TargetOpcode::G_ZEXT) || 1180 (I1Opc == TargetOpcode::G_SEXT && I2Opc == TargetOpcode::G_SEXT)) && 1181 (MRI.getType(I1->getOperand(0).getReg()).getScalarSizeInBits() == 1182 MRI.getType(I1->getOperand(1).getReg()).getScalarSizeInBits() * 2) && 1183 (MRI.getType(I2->getOperand(0).getReg()).getScalarSizeInBits() == 1184 MRI.getType(I2->getOperand(1).getReg()).getScalarSizeInBits() * 2)) { 1185 1186 B.setInstrAndDebugLoc(MI); 1187 B.buildInstr(I1->getOpcode() == TargetOpcode::G_ZEXT ? AArch64::G_UMULL 1188 : AArch64::G_SMULL, 1189 {MI.getOperand(0).getReg()}, 1190 {I1->getOperand(1).getReg(), I2->getOperand(1).getReg()}); 1191 MI.eraseFromParent(); 1192 } 1193 // If result type is v2s64, scalarise the instruction 1194 else if (DstTy == LLT::fixed_vector(2, 64)) { 1195 LegalizerHelper Helper(*MI.getMF(), Observer, B); 1196 B.setInstrAndDebugLoc(MI); 1197 Helper.fewerElementsVector( 1198 MI, 0, 1199 DstTy.changeElementCount( 1200 DstTy.getElementCount().divideCoefficientBy(2))); 1201 } 1202 } 1203 1204 class AArch64PostLegalizerLoweringImpl : public Combiner { 1205 protected: 1206 // TODO: Make CombinerHelper methods const. 1207 mutable CombinerHelper Helper; 1208 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig; 1209 const AArch64Subtarget &STI; 1210 1211 public: 1212 AArch64PostLegalizerLoweringImpl( 1213 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 1214 GISelCSEInfo *CSEInfo, 1215 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig, 1216 const AArch64Subtarget &STI); 1217 1218 static const char *getName() { return "AArch6400PreLegalizerCombiner"; } 1219 1220 bool tryCombineAll(MachineInstr &I) const override; 1221 1222 private: 1223 #define GET_GICOMBINER_CLASS_MEMBERS 1224 #include "AArch64GenPostLegalizeGILowering.inc" 1225 #undef GET_GICOMBINER_CLASS_MEMBERS 1226 }; 1227 1228 #define GET_GICOMBINER_IMPL 1229 #include "AArch64GenPostLegalizeGILowering.inc" 1230 #undef GET_GICOMBINER_IMPL 1231 1232 AArch64PostLegalizerLoweringImpl::AArch64PostLegalizerLoweringImpl( 1233 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, 1234 GISelCSEInfo *CSEInfo, 1235 const AArch64PostLegalizerLoweringImplRuleConfig &RuleConfig, 1236 const AArch64Subtarget &STI) 1237 : Combiner(MF, CInfo, TPC, /*KB*/ nullptr, CSEInfo), 1238 Helper(Observer, B, /*IsPreLegalize*/ true), RuleConfig(RuleConfig), 1239 STI(STI), 1240 #define GET_GICOMBINER_CONSTRUCTOR_INITS 1241 #include "AArch64GenPostLegalizeGILowering.inc" 1242 #undef GET_GICOMBINER_CONSTRUCTOR_INITS 1243 { 1244 } 1245 1246 class AArch64PostLegalizerLowering : public MachineFunctionPass { 1247 public: 1248 static char ID; 1249 1250 AArch64PostLegalizerLowering(); 1251 1252 StringRef getPassName() const override { 1253 return "AArch64PostLegalizerLowering"; 1254 } 1255 1256 bool runOnMachineFunction(MachineFunction &MF) override; 1257 void getAnalysisUsage(AnalysisUsage &AU) const override; 1258 1259 private: 1260 AArch64PostLegalizerLoweringImplRuleConfig RuleConfig; 1261 }; 1262 } // end anonymous namespace 1263 1264 void AArch64PostLegalizerLowering::getAnalysisUsage(AnalysisUsage &AU) const { 1265 AU.addRequired<TargetPassConfig>(); 1266 AU.setPreservesCFG(); 1267 getSelectionDAGFallbackAnalysisUsage(AU); 1268 MachineFunctionPass::getAnalysisUsage(AU); 1269 } 1270 1271 AArch64PostLegalizerLowering::AArch64PostLegalizerLowering() 1272 : MachineFunctionPass(ID) { 1273 initializeAArch64PostLegalizerLoweringPass(*PassRegistry::getPassRegistry()); 1274 1275 if (!RuleConfig.parseCommandLineOption()) 1276 report_fatal_error("Invalid rule identifier"); 1277 } 1278 1279 bool AArch64PostLegalizerLowering::runOnMachineFunction(MachineFunction &MF) { 1280 if (MF.getProperties().hasProperty( 1281 MachineFunctionProperties::Property::FailedISel)) 1282 return false; 1283 assert(MF.getProperties().hasProperty( 1284 MachineFunctionProperties::Property::Legalized) && 1285 "Expected a legalized function?"); 1286 auto *TPC = &getAnalysis<TargetPassConfig>(); 1287 const Function &F = MF.getFunction(); 1288 1289 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>(); 1290 CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 1291 /*LegalizerInfo*/ nullptr, /*OptEnabled=*/true, 1292 F.hasOptSize(), F.hasMinSize()); 1293 AArch64PostLegalizerLoweringImpl Impl(MF, CInfo, TPC, /*CSEInfo*/ nullptr, 1294 RuleConfig, ST); 1295 return Impl.combineMachineInstrs(); 1296 } 1297 1298 char AArch64PostLegalizerLowering::ID = 0; 1299 INITIALIZE_PASS_BEGIN(AArch64PostLegalizerLowering, DEBUG_TYPE, 1300 "Lower AArch64 MachineInstrs after legalization", false, 1301 false) 1302 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 1303 INITIALIZE_PASS_END(AArch64PostLegalizerLowering, DEBUG_TYPE, 1304 "Lower AArch64 MachineInstrs after legalization", false, 1305 false) 1306 1307 namespace llvm { 1308 FunctionPass *createAArch64PostLegalizerLowering() { 1309 return new AArch64PostLegalizerLowering(); 1310 } 1311 } // end namespace llvm 1312