Lines Matching +full:scatter +full:- +full:gather

1 //===- MVEGatherScatterLowering.cpp - Gather/Scatter lowering -------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This pass custom lowers llvm.gather and llvm.scatter instructions to
10 /// arm.mve.gather and arm.mve.scatter intrinsics, optimising the code to
13 //===----------------------------------------------------------------------===//
48 #define DEBUG_TYPE "arm-mve-gather-scatter-lowering"
51 "enable-arm-maskedgatscat", cl::Hidden, cl::init(true),
67 return "MVE gather/scatter lowering"; in getPassName()
81 // Check this is a valid gather with correct alignment
97 // Compute the scale of this gather/scatter instruction
108 // Create a gather from a base + vector of offsets
112 // Create a gather from a vector of pointers
116 // Create an incrementing gather from a vector of pointers
122 // Create a scatter to a base + vector of offsets
125 // Create a scatter to a vector of pointers
129 // Create an incrementing scatter from a vector of pointers
139 // increment is a constant value (digit) - this creates a writeback QI
140 // gather/scatter
165 "MVE gather/scattering lowering pass", false, false)
189 // positive offsets - i.e., the offsets are not allowed to be variables we in checkOffsetSize()
192 // vector with element types smaller or equal the type of the gather we're in checkOffsetSize()
194 // to fit into the gather type. in checkOffsetSize()
197 unsigned OffsetElemSize = cast<FixedVectorType>(Offsets->getType()) in checkOffsetSize()
198 ->getElementType() in checkOffsetSize()
199 ->getScalarSizeInBits(); in checkOffsetSize()
209 int SExtValue = OConst->getSExtValue(); in checkOffsetSize()
214 if (isa<FixedVectorType>(ConstOff->getType())) { in checkOffsetSize()
216 if (!CheckValueSize(ConstOff->getAggregateElement(i))) in checkOffsetSize()
234 computeScale(GEP->getSourceElementType()->getPrimitiveSizeInBits(), in decomposePtr()
235 MemoryTy->getScalarSizeInBits()); in decomposePtr()
236 return Scale == -1 ? nullptr : V; in decomposePtr()
243 FixedVectorType *PtrTy = cast<FixedVectorType>(Ptr->getType()); in decomposePtr()
244 if (PtrTy->getNumElements() != 4 || MemoryTy->getScalarSizeInBits() == 32) in decomposePtr()
265 Value *GEPPtr = GEP->getPointerOperand(); in decomposeGEP()
266 Offsets = GEP->getOperand(1); in decomposeGEP()
267 if (GEPPtr->getType()->isVectorTy() || in decomposeGEP()
268 !isa<FixedVectorType>(Offsets->getType())) in decomposeGEP()
271 if (GEP->getNumOperands() != 2) { in decomposeGEP()
276 Offsets = GEP->getOperand(1); in decomposeGEP()
278 cast<FixedVectorType>(Offsets->getType())->getNumElements(); in decomposeGEP()
280 assert(Ty->getNumElements() == OffsetsElemCount); in decomposeGEP()
284 Offsets = ZextOffs->getOperand(0); in decomposeGEP()
285 FixedVectorType *OffsetType = cast<FixedVectorType>(Offsets->getType()); in decomposeGEP()
287 // If the offsets are already being zext-ed to <N x i32>, that relieves us of in decomposeGEP()
289 if (!ZextOffs || cast<FixedVectorType>(ZextOffs->getDestTy()) in decomposeGEP()
290 ->getElementType() in decomposeGEP()
291 ->getScalarSizeInBits() != 32) in decomposeGEP()
295 // The offset sizes have been checked; if any truncating or zext-ing is in decomposeGEP()
297 if (Ty != Offsets->getType()) { in decomposeGEP()
298 if ((Ty->getElementType()->getScalarSizeInBits() < in decomposeGEP()
299 OffsetType->getElementType()->getScalarSizeInBits())) { in decomposeGEP()
313 auto *BCTy = cast<FixedVectorType>(BitCast->getType()); in lookThroughBitcast()
314 auto *BCSrcTy = cast<FixedVectorType>(BitCast->getOperand(0)->getType()); in lookThroughBitcast()
315 if (BCTy->getNumElements() == BCSrcTy->getNumElements()) { in lookThroughBitcast()
318 Ptr = BitCast->getOperand(0); in lookThroughBitcast()
335 return -1; in computeScale()
340 if (C && C->getSplatValue()) in getIfConst()
341 return std::optional<int64_t>{C->getUniqueInteger().getSExtValue()}; in getIfConst()
346 if (I->getOpcode() == Instruction::Add || I->getOpcode() == Instruction::Or || in getIfConst()
347 I->getOpcode() == Instruction::Mul || in getIfConst()
348 I->getOpcode() == Instruction::Shl) { in getIfConst()
349 std::optional<int64_t> Op0 = getIfConst(I->getOperand(0)); in getIfConst()
350 std::optional<int64_t> Op1 = getIfConst(I->getOperand(1)); in getIfConst()
353 if (I->getOpcode() == Instruction::Add) in getIfConst()
355 if (I->getOpcode() == Instruction::Mul) in getIfConst()
357 if (I->getOpcode() == Instruction::Shl) in getIfConst()
359 if (I->getOpcode() == Instruction::Or) in getIfConst()
368 return I->getOpcode() == Instruction::Or && in isAddLikeOr()
369 haveNoCommonBitsSet(I->getOperand(0), I->getOperand(1), DL); in isAddLikeOr()
377 // add-like-or. in getVarAndConst()
380 (Add->getOpcode() != Instruction::Add && !isAddLikeOr(Add, *DL))) in getVarAndConst()
386 if ((Const = getIfConst(Add->getOperand(0)))) in getVarAndConst()
387 Summand = Add->getOperand(1); in getVarAndConst()
388 else if ((Const = getIfConst(Add->getOperand(1)))) in getVarAndConst()
389 Summand = Add->getOperand(0); in getVarAndConst()
393 // Check that the constant is small enough for an incrementing gather in getVarAndConst()
395 if (Immediate > 512 || Immediate < -512 || Immediate % 4 != 0) in getVarAndConst()
406 // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0) in lowerGather()
407 // Attempt to turn the masked gather in I into a MVE intrinsic in lowerGather()
409 auto *Ty = cast<FixedVectorType>(I->getType()); in lowerGather()
410 Value *Ptr = I->getArgOperand(0); in lowerGather()
411 Align Alignment = cast<ConstantInt>(I->getArgOperand(1))->getAlignValue(); in lowerGather()
412 Value *Mask = I->getArgOperand(2); in lowerGather()
413 Value *PassThru = I->getArgOperand(3); in lowerGather()
415 if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(), in lowerGather()
419 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); in lowerGather()
421 IRBuilder<> Builder(I->getContext()); in lowerGather()
423 Builder.SetCurrentDebugLocation(I->getDebugLoc()); in lowerGather()
436 LLVM_DEBUG(dbgs() << "masked gathers: found non-trivial passthru - " in lowerGather()
442 Root->replaceAllUsesWith(Load); in lowerGather()
443 Root->eraseFromParent(); in lowerGather()
445 // If this was an extending gather, we need to get rid of the sext/zext in lowerGather()
446 // sext/zext as well as of the gather itself in lowerGather()
447 I->eraseFromParent(); in lowerGather()
449 LLVM_DEBUG(dbgs() << "masked gathers: successfully built masked gather\n" in lowerGather()
457 auto *Ty = cast<FixedVectorType>(I->getType()); in tryCreateMaskedGatherBase()
459 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) in tryCreateMaskedGatherBase()
462 Value *Mask = I->getArgOperand(2); in tryCreateMaskedGatherBase()
465 {Ty, Ptr->getType()}, in tryCreateMaskedGatherBase()
470 {Ty, Ptr->getType(), Mask->getType()}, in tryCreateMaskedGatherBase()
477 auto *Ty = cast<FixedVectorType>(I->getType()); in tryCreateMaskedGatherBaseWB()
480 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) in tryCreateMaskedGatherBaseWB()
483 Value *Mask = I->getArgOperand(2); in tryCreateMaskedGatherBaseWB()
486 {Ty, Ptr->getType()}, in tryCreateMaskedGatherBaseWB()
491 {Ty, Ptr->getType(), Mask->getType()}, in tryCreateMaskedGatherBaseWB()
499 Type *MemoryTy = I->getType(); in tryCreateMaskedGatherOffset()
503 // The size of the gather was already checked in isLegalTypeAndAlignment; in tryCreateMaskedGatherOffset()
507 if (MemoryTy->getPrimitiveSizeInBits() < 128) { in tryCreateMaskedGatherOffset()
508 if (I->hasOneUse()) { in tryCreateMaskedGatherOffset()
509 // If the gather has a single extend of the correct type, use an extending in tryCreateMaskedGatherOffset()
510 // gather and replace the ext. In which case the correct root to replace in tryCreateMaskedGatherOffset()
512 Instruction* User = cast<Instruction>(*I->users().begin()); in tryCreateMaskedGatherOffset()
514 User->getType()->getPrimitiveSizeInBits() == 128) { in tryCreateMaskedGatherOffset()
518 ResultTy = User->getType(); in tryCreateMaskedGatherOffset()
521 User->getType()->getPrimitiveSizeInBits() == 128) { in tryCreateMaskedGatherOffset()
525 ResultTy = User->getType(); in tryCreateMaskedGatherOffset()
530 // extending gather and truncate back to the original type. in tryCreateMaskedGatherOffset()
531 if (ResultTy->getPrimitiveSizeInBits() < 128 && in tryCreateMaskedGatherOffset()
532 ResultTy->isIntOrIntVectorTy()) { in tryCreateMaskedGatherOffset()
533 ResultTy = ResultTy->getWithNewBitWidth( in tryCreateMaskedGatherOffset()
534 128 / cast<FixedVectorType>(ResultTy)->getNumElements()); in tryCreateMaskedGatherOffset()
540 // The final size of the gather must be a full vector width in tryCreateMaskedGatherOffset()
541 if (ResultTy->getPrimitiveSizeInBits() != 128) { in tryCreateMaskedGatherOffset()
556 Value *Mask = I->getArgOperand(2); in tryCreateMaskedGatherOffset()
561 {ResultTy, BasePtr->getType(), Offsets->getType(), Mask->getType()}, in tryCreateMaskedGatherOffset()
562 {BasePtr, Offsets, Builder.getInt32(MemoryTy->getScalarSizeInBits()), in tryCreateMaskedGatherOffset()
567 {ResultTy, BasePtr->getType(), Offsets->getType()}, in tryCreateMaskedGatherOffset()
568 {BasePtr, Offsets, Builder.getInt32(MemoryTy->getScalarSizeInBits()), in tryCreateMaskedGatherOffset()
583 // @llvm.masked.scatter.*(data, ptrs, alignment, mask) in lowerScatter()
584 // Attempt to turn the masked scatter in I into a MVE intrinsic in lowerScatter()
586 Value *Input = I->getArgOperand(0); in lowerScatter()
587 Value *Ptr = I->getArgOperand(1); in lowerScatter()
588 Align Alignment = cast<ConstantInt>(I->getArgOperand(2))->getAlignValue(); in lowerScatter()
589 auto *Ty = cast<FixedVectorType>(Input->getType()); in lowerScatter()
591 if (!isLegalTypeAndAlignment(Ty->getNumElements(), Ty->getScalarSizeInBits(), in lowerScatter()
596 assert(Ptr->getType()->isVectorTy() && "Unexpected pointer type"); in lowerScatter()
598 IRBuilder<> Builder(I->getContext()); in lowerScatter()
600 Builder.SetCurrentDebugLocation(I->getDebugLoc()); in lowerScatter()
610 LLVM_DEBUG(dbgs() << "masked scatters: successfully built masked scatter\n" in lowerScatter()
612 I->eraseFromParent(); in lowerScatter()
619 Value *Input = I->getArgOperand(0); in tryCreateMaskedScatterBase()
620 auto *Ty = cast<FixedVectorType>(Input->getType()); in tryCreateMaskedScatterBase()
622 if (!(Ty->getNumElements() == 4 && Ty->getScalarSizeInBits() == 32)) { in tryCreateMaskedScatterBase()
626 Value *Mask = I->getArgOperand(3); in tryCreateMaskedScatterBase()
631 {Ptr->getType(), Input->getType()}, in tryCreateMaskedScatterBase()
636 {Ptr->getType(), Input->getType(), Mask->getType()}, in tryCreateMaskedScatterBase()
643 Value *Input = I->getArgOperand(0); in tryCreateMaskedScatterBaseWB()
644 auto *Ty = cast<FixedVectorType>(Input->getType()); in tryCreateMaskedScatterBaseWB()
647 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) in tryCreateMaskedScatterBaseWB()
650 Value *Mask = I->getArgOperand(3); in tryCreateMaskedScatterBaseWB()
653 {Ptr->getType(), Input->getType()}, in tryCreateMaskedScatterBaseWB()
658 {Ptr->getType(), Input->getType(), Mask->getType()}, in tryCreateMaskedScatterBaseWB()
665 Value *Input = I->getArgOperand(0); in tryCreateMaskedScatterOffset()
666 Value *Mask = I->getArgOperand(3); in tryCreateMaskedScatterOffset()
667 Type *InputTy = Input->getType(); in tryCreateMaskedScatterOffset()
673 // scatter instruction (we don't care about alignment here) in tryCreateMaskedScatterOffset()
675 Value *PreTrunc = Trunc->getOperand(0); in tryCreateMaskedScatterOffset()
676 Type *PreTruncTy = PreTrunc->getType(); in tryCreateMaskedScatterOffset()
677 if (PreTruncTy->getPrimitiveSizeInBits() == 128) { in tryCreateMaskedScatterOffset()
683 if (InputTy->getPrimitiveSizeInBits() < 128 && in tryCreateMaskedScatterOffset()
684 InputTy->isIntOrIntVectorTy()) { in tryCreateMaskedScatterOffset()
686 // implicit one with a zext, so that we can still create a scatter. We know in tryCreateMaskedScatterOffset()
689 InputTy = InputTy->getWithNewBitWidth( in tryCreateMaskedScatterOffset()
690 128 / cast<FixedVectorType>(InputTy)->getNumElements()); in tryCreateMaskedScatterOffset()
695 if (InputTy->getPrimitiveSizeInBits() != 128) { in tryCreateMaskedScatterOffset()
697 "non-standard input types. Expanding.\n"); in tryCreateMaskedScatterOffset()
713 {BasePtr->getType(), Offsets->getType(), Input->getType(), in tryCreateMaskedScatterOffset()
714 Mask->getType()}, in tryCreateMaskedScatterOffset()
716 Builder.getInt32(MemoryTy->getScalarSizeInBits()), in tryCreateMaskedScatterOffset()
721 {BasePtr->getType(), Offsets->getType(), Input->getType()}, in tryCreateMaskedScatterOffset()
723 Builder.getInt32(MemoryTy->getScalarSizeInBits()), in tryCreateMaskedScatterOffset()
730 if (I->getIntrinsicID() == Intrinsic::masked_gather) in tryCreateIncrementingGatScat()
731 Ty = cast<FixedVectorType>(I->getType()); in tryCreateIncrementingGatScat()
733 Ty = cast<FixedVectorType>(I->getArgOperand(0)->getType()); in tryCreateIncrementingGatScat()
736 if (Ty->getNumElements() != 4 || Ty->getScalarSizeInBits() != 32) in tryCreateIncrementingGatScat()
739 Loop *L = LI->getLoopFor(I->getParent()); in tryCreateIncrementingGatScat()
751 "wb gather/scatter\n"); in tryCreateIncrementingGatScat()
754 // - calculate that factor so it can be applied by hand in tryCreateIncrementingGatScat()
756 computeScale(DL->getTypeSizeInBits(GEP->getOperand(0)->getType()), in tryCreateIncrementingGatScat()
757 DL->getTypeSizeInBits(GEP->getType()) / in tryCreateIncrementingGatScat()
758 cast<FixedVectorType>(GEP->getType())->getNumElements()); in tryCreateIncrementingGatScat()
759 if (TypeScale == -1) in tryCreateIncrementingGatScat()
762 if (GEP->hasOneUse()) { in tryCreateIncrementingGatScat()
763 // Only in this case do we want to build a wb gather, because the wb will in tryCreateIncrementingGatScat()
772 "non-wb gather/scatter\n"); in tryCreateIncrementingGatScat()
783 Builder.CreateVectorSplat(Ty->getNumElements(), in tryCreateIncrementingGatScat()
785 "ScaledIndex", I->getIterator()); in tryCreateIncrementingGatScat()
790 Ty->getNumElements(), in tryCreateIncrementingGatScat()
793 cast<VectorType>(ScaledOffsets->getType())->getElementType())), in tryCreateIncrementingGatScat()
794 "StartIndex", I->getIterator()); in tryCreateIncrementingGatScat()
796 if (I->getIntrinsicID() == Intrinsic::masked_gather) in tryCreateIncrementingGatScat()
805 // Check whether this gather's offset is incremented by a constant - if so, in tryCreateIncrementingWBGatScat()
806 // and the load is of the right type, we can merge this into a QI gather in tryCreateIncrementingWBGatScat()
807 Loop *L = LI->getLoopFor(I->getParent()); in tryCreateIncrementingWBGatScat()
811 if (Phi == nullptr || Phi->getNumIncomingValues() != 2 || in tryCreateIncrementingWBGatScat()
812 Phi->getParent() != L->getHeader() || Phi->getNumUses() != 2) in tryCreateIncrementingWBGatScat()
816 // one in the gather's gep in tryCreateIncrementingWBGatScat()
820 Phi->getIncomingBlock(0) == L->getLoopLatch() ? 0 : 1; in tryCreateIncrementingWBGatScat()
822 Offsets = Phi->getIncomingValue(IncrementIndex); in tryCreateIncrementingWBGatScat()
834 Builder.SetInsertPoint(&Phi->getIncomingBlock(1 - IncrementIndex)->back()); in tryCreateIncrementingWBGatScat()
836 cast<FixedVectorType>(OffsetsIncoming->getType())->getNumElements(); in tryCreateIncrementingWBGatScat()
840 Instruction::Shl, Phi->getIncomingValue(1 - IncrementIndex), in tryCreateIncrementingWBGatScat()
843 Phi->getIncomingBlock(1 - IncrementIndex)->back().getIterator()); in tryCreateIncrementingWBGatScat()
851 cast<VectorType>(ScaledOffsets->getType())->getElementType())), in tryCreateIncrementingWBGatScat()
853 Phi->getIncomingBlock(1 - IncrementIndex)->back().getIterator()); in tryCreateIncrementingWBGatScat()
854 // The gather is pre-incrementing in tryCreateIncrementingWBGatScat()
859 Phi->getIncomingBlock(1 - IncrementIndex)->back().getIterator()); in tryCreateIncrementingWBGatScat()
860 Phi->setIncomingValue(1 - IncrementIndex, OffsetsIncoming); in tryCreateIncrementingWBGatScat()
866 if (I->getIntrinsicID() == Intrinsic::masked_gather) { in tryCreateIncrementingWBGatScat()
867 // Build the incrementing gather in tryCreateIncrementingWBGatScat()
869 // One value to be handed to whoever uses the gather, one is the loop in tryCreateIncrementingWBGatScat()
871 EndResult = ExtractValueInst::Create(Load, 0, "Gather"); in tryCreateIncrementingWBGatScat()
876 // Build the incrementing scatter in tryCreateIncrementingWBGatScat()
881 AddInst->replaceAllUsesWith(NewInduction); in tryCreateIncrementingWBGatScat()
882 AddInst->eraseFromParent(); in tryCreateIncrementingWBGatScat()
883 Phi->setIncomingValue(IncrementIndex, NewInduction); in tryCreateIncrementingWBGatScat()
893 Phi->getIncomingBlock(StartIndex)->back().getIterator(); in pushOutAdd()
896 Instruction::Add, Phi->getIncomingValue(StartIndex), OffsSecondOperand, in pushOutAdd()
901 Phi->addIncoming(NewIndex, Phi->getIncomingBlock(StartIndex)); in pushOutAdd()
902 Phi->addIncoming(Phi->getIncomingValue(IncrementIndex), in pushOutAdd()
903 Phi->getIncomingBlock(IncrementIndex)); in pushOutAdd()
904 Phi->removeIncomingValue(1); in pushOutAdd()
905 Phi->removeIncomingValue((unsigned)0); in pushOutAdd()
918 Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back().getIterator(); in pushOutMulShl()
923 Phi->getIncomingValue(LoopIncrement == 1 ? 0 : 1), in pushOutMulShl()
931 Phi->getIncomingBlock(LoopIncrement)->back().getIterator(); in pushOutMulShl()
938 Phi->addIncoming(StartIndex, in pushOutMulShl()
939 Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)); in pushOutMulShl()
940 Phi->addIncoming(NewIncrement, Phi->getIncomingBlock(LoopIncrement)); in pushOutMulShl()
941 Phi->removeIncomingValue((unsigned)0); in pushOutMulShl()
942 Phi->removeIncomingValue((unsigned)0); in pushOutMulShl()
948 if (I->hasNUses(0)) { in hasAllGatScatUsers()
952 for (User *U : I->users()) { in hasAllGatScatUsers()
959 unsigned OpCode = cast<Instruction>(U)->getOpcode(); in hasAllGatScatUsers()
981 if (Offs->getOpcode() != Instruction::Add && !isAddLikeOr(Offs, *DL) && in optimiseOffsets()
982 Offs->getOpcode() != Instruction::Mul && in optimiseOffsets()
983 Offs->getOpcode() != Instruction::Shl) in optimiseOffsets()
985 Loop *L = LI->getLoopFor(BB); in optimiseOffsets()
988 if (!Offs->hasOneUse()) { in optimiseOffsets()
997 if (isa<PHINode>(Offs->getOperand(0))) { in optimiseOffsets()
998 Phi = cast<PHINode>(Offs->getOperand(0)); in optimiseOffsets()
1000 } else if (isa<PHINode>(Offs->getOperand(1))) { in optimiseOffsets()
1001 Phi = cast<PHINode>(Offs->getOperand(1)); in optimiseOffsets()
1005 if (isa<Instruction>(Offs->getOperand(0)) && in optimiseOffsets()
1006 L->contains(cast<Instruction>(Offs->getOperand(0)))) in optimiseOffsets()
1007 Changed |= optimiseOffsets(Offs->getOperand(0), BB, LI); in optimiseOffsets()
1008 if (isa<Instruction>(Offs->getOperand(1)) && in optimiseOffsets()
1009 L->contains(cast<Instruction>(Offs->getOperand(1)))) in optimiseOffsets()
1010 Changed |= optimiseOffsets(Offs->getOperand(1), BB, LI); in optimiseOffsets()
1013 if (isa<PHINode>(Offs->getOperand(0))) { in optimiseOffsets()
1014 Phi = cast<PHINode>(Offs->getOperand(0)); in optimiseOffsets()
1016 } else if (isa<PHINode>(Offs->getOperand(1))) { in optimiseOffsets()
1017 Phi = cast<PHINode>(Offs->getOperand(1)); in optimiseOffsets()
1025 if (Phi->getParent() != L->getHeader()) in optimiseOffsets()
1032 IncInstruction->getOpcode() != Instruction::Add) in optimiseOffsets()
1035 int IncrementingBlock = Phi->getIncomingValue(0) == IncInstruction ? 0 : 1; in optimiseOffsets()
1038 Value *OffsSecondOperand = Offs->getOperand(OffsSecondOp); in optimiseOffsets()
1040 if (IncrementPerRound->getType() != OffsSecondOperand->getType() || in optimiseOffsets()
1041 !L->isLoopInvariant(OffsSecondOperand)) in optimiseOffsets()
1049 !L->contains(cast<Instruction>(IncrementPerRound)))) in optimiseOffsets()
1055 if (Phi->getNumUses() == 2) { in optimiseOffsets()
1056 // No other users -> reuse existing phi (One user is the instruction in optimiseOffsets()
1058 if (IncInstruction->getNumUses() != 1) { in optimiseOffsets()
1062 Instruction::BinaryOps(IncInstruction->getOpcode()), Phi, in optimiseOffsets()
1063 IncrementPerRound, "LoopIncrement", IncInstruction->getIterator()); in optimiseOffsets()
1064 Phi->setIncomingValue(IncrementingBlock, IncInstruction); in optimiseOffsets()
1068 // There are other users -> create a new phi in optimiseOffsets()
1069 NewPhi = PHINode::Create(Phi->getType(), 2, "NewPhi", Phi->getIterator()); in optimiseOffsets()
1071 NewPhi->addIncoming(Phi->getIncomingValue(IncrementingBlock == 1 ? 0 : 1), in optimiseOffsets()
1072 Phi->getIncomingBlock(IncrementingBlock == 1 ? 0 : 1)); in optimiseOffsets()
1074 Instruction::BinaryOps(IncInstruction->getOpcode()), NewPhi, in optimiseOffsets()
1075 IncrementPerRound, "LoopIncrement", IncInstruction->getIterator()); in optimiseOffsets()
1076 NewPhi->addIncoming(IncInstruction, in optimiseOffsets()
1077 Phi->getIncomingBlock(IncrementingBlock)); in optimiseOffsets()
1081 IRBuilder<> Builder(BB->getContext()); in optimiseOffsets()
1083 Builder.SetCurrentDebugLocation(Offs->getDebugLoc()); in optimiseOffsets()
1085 switch (Offs->getOpcode()) { in optimiseOffsets()
1092 pushOutMulShl(Offs->getOpcode(), NewPhi, IncrementPerRound, in optimiseOffsets()
1102 Offs->replaceAllUsesWith(NewPhi); in optimiseOffsets()
1103 if (Offs->hasNUses(0)) in optimiseOffsets()
1104 Offs->eraseFromParent(); in optimiseOffsets()
1107 if (IncInstruction->hasNUses(0)) in optimiseOffsets()
1108 IncInstruction->eraseFromParent(); in optimiseOffsets()
1115 // Splat the non-vector value to a vector of the given type - if the value is in CheckAndCreateOffsetAdd()
1121 VT->getElementType() != NonVectorVal->getType()) { in CheckAndCreateOffsetAdd()
1122 unsigned TargetElemSize = VT->getElementType()->getPrimitiveSizeInBits(); in CheckAndCreateOffsetAdd()
1123 uint64_t N = Const->getZExtValue(); in CheckAndCreateOffsetAdd()
1124 if (N < (unsigned)(1 << (TargetElemSize - 1))) { in CheckAndCreateOffsetAdd()
1126 VT->getNumElements(), Builder.getIntN(TargetElemSize, N)); in CheckAndCreateOffsetAdd()
1131 Builder.CreateVectorSplat(VT->getNumElements(), NonVectorVal); in CheckAndCreateOffsetAdd()
1134 FixedVectorType *XElType = dyn_cast<FixedVectorType>(X->getType()); in CheckAndCreateOffsetAdd()
1135 FixedVectorType *YElType = dyn_cast<FixedVectorType>(Y->getType()); in CheckAndCreateOffsetAdd()
1140 YElType = cast<FixedVectorType>(Y->getType()); in CheckAndCreateOffsetAdd()
1143 XElType = cast<FixedVectorType>(X->getType()); in CheckAndCreateOffsetAdd()
1152 if (XElType->getElementType()->getScalarSizeInBits() != 32) { in CheckAndCreateOffsetAdd()
1159 unsigned TargetElemSize = 128 / XElType->getNumElements(); in CheckAndCreateOffsetAdd()
1160 for (unsigned i = 0; i < XElType->getNumElements(); i++) { in CheckAndCreateOffsetAdd()
1162 dyn_cast<ConstantInt>(ConstX->getAggregateElement(i)); in CheckAndCreateOffsetAdd()
1164 dyn_cast<ConstantInt>(ConstY->getAggregateElement(i)); in CheckAndCreateOffsetAdd()
1166 ConstXEl->getZExtValue() * ScaleX + in CheckAndCreateOffsetAdd()
1167 ConstYEl->getZExtValue() * ScaleY >= in CheckAndCreateOffsetAdd()
1168 (unsigned)(1 << (TargetElemSize - 1))) in CheckAndCreateOffsetAdd()
1174 XElType->getNumElements(), in CheckAndCreateOffsetAdd()
1175 Builder.getIntN(XElType->getScalarSizeInBits(), ScaleX)); in CheckAndCreateOffsetAdd()
1177 YElType->getNumElements(), in CheckAndCreateOffsetAdd()
1178 Builder.getIntN(YElType->getScalarSizeInBits(), ScaleY)); in CheckAndCreateOffsetAdd()
1182 if (checkOffsetSize(Add, XElType->getNumElements())) in CheckAndCreateOffsetAdd()
1191 Value *GEPPtr = GEP->getPointerOperand(); in foldGEP()
1192 Offsets = GEP->getOperand(1); in foldGEP()
1193 Scale = DL->getTypeAllocSize(GEP->getSourceElementType()); in foldGEP()
1196 if (GEP->getNumIndices() != 1 || !isa<Constant>(Offsets)) in foldGEP()
1204 Offsets, Scale, GEP->getOperand(1), in foldGEP()
1205 DL->getTypeAllocSize(GEP->getSourceElementType()), Builder); in foldGEP()
1220 if (GEP->hasOneUse() && isa<GetElementPtrInst>(GEP->getPointerOperand())) { in optimiseAddress()
1221 IRBuilder<> Builder(GEP->getContext()); in optimiseAddress()
1223 Builder.SetCurrentDebugLocation(GEP->getDebugLoc()); in optimiseAddress()
1228 // used by an MVE gather; thus the offset has to have the correct size in optimiseAddress()
1234 if (auto *VecTy = dyn_cast<FixedVectorType>(Base->getType())) in optimiseAddress()
1238 "gep.merged", GEP->getIterator()); in optimiseAddress()
1241 GEP->replaceAllUsesWith( in optimiseAddress()
1242 Builder.CreateBitCast(NewAddress, GEP->getType())); in optimiseAddress()
1247 Changed |= optimiseOffsets(GEP->getOperand(1), GEP->getParent(), LI); in optimiseAddress()
1257 if (!ST->hasMVEIntegerOps()) in runOnFunction()
1271 if (II && II->getIntrinsicID() == Intrinsic::masked_gather && in runOnFunction()
1272 isa<FixedVectorType>(II->getType())) { in runOnFunction()
1274 Changed |= optimiseAddress(II->getArgOperand(0), II->getParent(), LI); in runOnFunction()
1275 } else if (II && II->getIntrinsicID() == Intrinsic::masked_scatter && in runOnFunction()
1276 isa<FixedVectorType>(II->getArgOperand(0)->getType())) { in runOnFunction()
1278 Changed |= optimiseAddress(II->getArgOperand(1), II->getParent(), LI); in runOnFunction()
1288 SimplifyInstructionsInBlock(L->getParent()); in runOnFunction()
1298 SimplifyInstructionsInBlock(S->getParent()); in runOnFunction()