Lines Matching +full:full +full:- +full:scale

1 //===----------- VectorUtils.cpp - Vectorizer utility functions -----------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
11 //===----------------------------------------------------------------------===//
38 "max-interleave-group-factor", cl::Hidden,
48 case Intrinsic::abs: // Begin integer bit-manipulation. in isTriviallyVectorizable()
68 case Intrinsic::sqrt: // Begin floating-point. in isTriviallyVectorizable()
135 return OpdIdx == -1 || OpdIdx == 0; in isVectorIntrinsicWithOverloadTypeAtArg()
139 return OpdIdx == -1 || OpdIdx == 1; in isVectorIntrinsicWithOverloadTypeAtArg()
141 return OpdIdx == -1; in isVectorIntrinsicWithOverloadTypeAtArg()
166 assert(V->getType()->isVectorTy() && "Not looking at a vector?"); in findScalarElement()
167 VectorType *VTy = cast<VectorType>(V->getType()); in findScalarElement()
168 // For fixed-length vector, return poison for out of range access. in findScalarElement()
170 unsigned Width = FVTy->getNumElements(); in findScalarElement()
172 return PoisonValue::get(FVTy->getElementType()); in findScalarElement()
176 return C->getAggregateElement(EltNo); in findScalarElement()
180 if (!isa<ConstantInt>(III->getOperand(2))) in findScalarElement()
182 unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue(); in findScalarElement()
187 return III->getOperand(1); in findScalarElement()
190 if (III == III->getOperand(0)) in findScalarElement()
195 return findScalarElement(III->getOperand(0), EltNo); in findScalarElement()
199 // Restrict the following transformation to fixed-length vector. in findScalarElement()
200 if (SVI && isa<FixedVectorType>(SVI->getType())) { in findScalarElement()
202 cast<FixedVectorType>(SVI->getOperand(0)->getType())->getNumElements(); in findScalarElement()
203 int InEl = SVI->getMaskValue(EltNo); in findScalarElement()
205 return PoisonValue::get(VTy->getElementType()); in findScalarElement()
207 return findScalarElement(SVI->getOperand(0), InEl); in findScalarElement()
208 return findScalarElement(SVI->getOperand(1), InEl - LHSWidth); in findScalarElement()
215 if (Constant *Elt = C->getAggregateElement(EltNo)) in findScalarElement()
216 if (Elt->isNullValue()) in findScalarElement()
222 if (EltNo < VTy->getElementCount().getKnownMinValue()) in findScalarElement()
230 int SplatIndex = -1; in getSplatIndex()
236 // There can be only 1 non-negative mask element value if this is a splat. in getSplatIndex()
237 if (SplatIndex != -1 && SplatIndex != M) in getSplatIndex()
238 return -1; in getSplatIndex()
240 // Initialize the splat index to the 1st non-negative mask element. in getSplatIndex()
243 assert((SplatIndex == -1 || SplatIndex >= 0) && "Negative index?"); in getSplatIndex()
252 if (isa<VectorType>(V->getType())) in getSplatValue()
254 return C->getSplatValue(); in getSplatValue()
269 if (isa<VectorType>(V->getType())) { in isSplatValue()
275 return C->getSplatValue() != nullptr; in isSplatValue()
281 if (!all_equal(Shuf->getShuffleMask())) in isSplatValue()
285 if (Index == -1) in isSplatValue()
290 return Shuf->getMaskValue(Index) == Index; in isSplatValue()
329 assert((-1 <= M) && (M < (SrcWidth * 2)) && in getShuffleDemandedElts()
343 DemandedRHS.setBit(M - SrcWidth); in getShuffleDemandedElts()
349 void llvm::narrowShuffleMaskElts(int Scale, ArrayRef<int> Mask, in narrowShuffleMaskElts() argument
351 assert(Scale > 0 && "Unexpected scaling factor"); in narrowShuffleMaskElts()
353 // Fast-path: if no scaling, then it is just a copy. in narrowShuffleMaskElts()
354 if (Scale == 1) { in narrowShuffleMaskElts()
362 assert(((uint64_t)Scale * MaskElt + (Scale - 1)) <= INT32_MAX && in narrowShuffleMaskElts()
363 "Overflowed 32-bits"); in narrowShuffleMaskElts()
365 for (int SliceElt = 0; SliceElt != Scale; ++SliceElt) in narrowShuffleMaskElts()
366 ScaledMask.push_back(MaskElt < 0 ? MaskElt : Scale * MaskElt + SliceElt); in narrowShuffleMaskElts()
370 bool llvm::widenShuffleMaskElts(int Scale, ArrayRef<int> Mask, in widenShuffleMaskElts() argument
372 assert(Scale > 0 && "Unexpected scaling factor"); in widenShuffleMaskElts()
374 // Fast-path: if no scaling, then it is just a copy. in widenShuffleMaskElts()
375 if (Scale == 1) { in widenShuffleMaskElts()
382 if (NumElts % Scale != 0) in widenShuffleMaskElts()
386 ScaledMask.reserve(NumElts / Scale); in widenShuffleMaskElts()
388 // Step through the input mask by splitting into Scale-sized slices. in widenShuffleMaskElts()
390 ArrayRef<int> MaskSlice = Mask.take_front(Scale); in widenShuffleMaskElts()
391 assert((int)MaskSlice.size() == Scale && "Expected Scale-sized slice."); in widenShuffleMaskElts()
403 if (SliceFront % Scale != 0) in widenShuffleMaskElts()
406 for (int i = 1; i < Scale; ++i) in widenShuffleMaskElts()
409 ScaledMask.push_back(SliceFront / Scale); in widenShuffleMaskElts()
411 Mask = Mask.drop_front(Scale); in widenShuffleMaskElts()
414 assert((int)ScaledMask.size() * Scale == NumElts && "Unexpected scaled mask"); in widenShuffleMaskElts()
426 // Fast-path: if no scaling, then it is just a copy. in scaleShuffleMaskElts()
432 // Ensure we can find a whole scale factor. in scaleShuffleMaskElts()
437 int Scale = NumSrcElts / NumDstElts; in scaleShuffleMaskElts() local
438 return widenShuffleMaskElts(Scale, Mask, ScaledMask); in scaleShuffleMaskElts()
441 int Scale = NumDstElts / NumSrcElts; in scaleShuffleMaskElts() local
442 narrowShuffleMaskElts(Scale, Mask, ScaledMask); in scaleShuffleMaskElts()
451 for (unsigned Scale = 2; Scale <= InputMask.size(); ++Scale) { in getShuffleMaskWithWidestElts() local
452 while (widenShuffleMaskElts(Scale, InputMask, *Output)) { in getShuffleMaskWithWidestElts()
535 int FirstIdx = -1; in processShuffleMasks()
536 SecondIdx = -1; in processShuffleMasks()
592 LocalIdx -= HalfEltsPerLane; in getHorizDemandedEltsForFirstOperand()
602 // DemandedBits will give us every value's live-out bits. But we want in computeMinimumValueSizes()
613 // Determine the roots. We work bottom-up, from truncs or icmps. in computeMinimumValueSizes()
620 !TTI->isTypeLegal(I.getOperand(0)->getType())) in computeMinimumValueSizes()
623 // Only deal with non-vector integers up to 64-bits wide. in computeMinimumValueSizes()
625 !I.getType()->isVectorTy() && in computeMinimumValueSizes()
626 I.getOperand(0)->getType()->getScalarSizeInBits() <= 64) { in computeMinimumValueSizes()
629 if (TTI && isa<TruncInst>(&I) && TTI->isTypeLegal(I.getType())) in computeMinimumValueSizes()
640 // Now proceed breadth-first, unioning values together. in computeMinimumValueSizes()
648 // Non-instructions terminate a chain successfully. in computeMinimumValueSizes()
672 !I->getType()->isIntegerTy()) { in computeMinimumValueSizes()
687 for (Value *O : cast<User>(I)->operands()) { in computeMinimumValueSizes()
697 for (auto *U : I.first->users()) in computeMinimumValueSizes()
698 if (U->getType()->isIntegerTy() && DBits.count(U) == 0) in computeMinimumValueSizes()
716 if (isa<PHINode>(M) && MinBW < M->getType()->getScalarSizeInBits()) { in computeMinimumValueSizes()
727 Type *Ty = M->getType(); in computeMinimumValueSizes()
729 Ty = MI->getOperand(0)->getType(); in computeMinimumValueSizes()
731 if (MinBW >= Ty->getScalarSizeInBits()) in computeMinimumValueSizes()
736 if (any_of(MI->operands(), [&DB, MinBW](Use &U) { in computeMinimumValueSizes()
743 return CI->uge(MinBW); in computeMinimumValueSizes()
760 if (AccGroups->getNumOperands() == 0) { in addToAccessGroupList()
766 for (const auto &AccGroupListOp : AccGroups->operands()) { in addToAccessGroupList()
790 LLVMContext &Ctx = AccGroups1->getContext(); in uniteAccessGroups()
796 bool MayAccessMem1 = Inst1->mayReadOrWriteMemory(); in intersectAccessGroups()
797 bool MayAccessMem2 = Inst2->mayReadOrWriteMemory(); in intersectAccessGroups()
802 return Inst2->getMetadata(LLVMContext::MD_access_group); in intersectAccessGroups()
804 return Inst1->getMetadata(LLVMContext::MD_access_group); in intersectAccessGroups()
806 MDNode *MD1 = Inst1->getMetadata(LLVMContext::MD_access_group); in intersectAccessGroups()
807 MDNode *MD2 = Inst2->getMetadata(LLVMContext::MD_access_group); in intersectAccessGroups()
818 if (MD1->getNumOperands() == 0) { in intersectAccessGroups()
823 for (const MDOperand &Node : MD1->operands()) { in intersectAccessGroups()
836 LLVMContext &Ctx = Inst1->getContext(); in intersectAccessGroups()
846 I0->getAllMetadataOtherThanDebugLoc(Metadata); in propagateMetadata()
852 MDNode *MD = I0->getMetadata(Kind); in propagateMetadata()
855 MDNode *IMD = IJ->getMetadata(Kind); in propagateMetadata()
859 MD = MMRAMetadata::combine(Inst->getContext(), MD, IMD); in propagateMetadata()
884 Inst->setMetadata(Kind, MD); in propagateMetadata()
947 Mask.push_back(-1); in createSequentialMask()
956 assert(NumEltsSigned > 0 && "Expected smaller or non-zero element count"); in createUnaryMask()
963 int UnaryElt = MaskElt >= NumEltsSigned ? MaskElt - NumEltsSigned : MaskElt; in createUnaryMask()
974 VectorType *VecTy1 = dyn_cast<VectorType>(V1->getType()); in concatenateTwoVectors()
975 VectorType *VecTy2 = dyn_cast<VectorType>(V2->getType()); in concatenateTwoVectors()
977 VecTy1->getScalarType() == VecTy2->getScalarType() && in concatenateTwoVectors()
980 unsigned NumElts1 = cast<FixedVectorType>(VecTy1)->getNumElements(); in concatenateTwoVectors()
981 unsigned NumElts2 = cast<FixedVectorType>(VecTy2)->getNumElements(); in concatenateTwoVectors()
987 V2, createSequentialMask(0, NumElts2, NumElts1 - NumElts2)); in concatenateTwoVectors()
1003 for (unsigned i = 0; i < NumVecs - 1; i += 2) { in concatenateVectors()
1005 assert((V0->getType() == V1->getType() || i == NumVecs - 2) && in concatenateVectors()
1013 TmpList.push_back(ResList[NumVecs - 1]); in concatenateVectors()
1023 assert(isa<VectorType>(Mask->getType()) && in maskIsAllZeroOrUndef()
1024 isa<IntegerType>(Mask->getType()->getScalarType()) && in maskIsAllZeroOrUndef()
1025 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == in maskIsAllZeroOrUndef()
1032 if (ConstMask->isNullValue() || isa<UndefValue>(ConstMask)) in maskIsAllZeroOrUndef()
1034 if (isa<ScalableVectorType>(ConstMask->getType())) in maskIsAllZeroOrUndef()
1038 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); in maskIsAllZeroOrUndef()
1040 if (auto *MaskElt = ConstMask->getAggregateElement(I)) in maskIsAllZeroOrUndef()
1041 if (MaskElt->isNullValue() || isa<UndefValue>(MaskElt)) in maskIsAllZeroOrUndef()
1049 assert(isa<VectorType>(Mask->getType()) && in maskIsAllOneOrUndef()
1050 isa<IntegerType>(Mask->getType()->getScalarType()) && in maskIsAllOneOrUndef()
1051 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == in maskIsAllOneOrUndef()
1058 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) in maskIsAllOneOrUndef()
1060 if (isa<ScalableVectorType>(ConstMask->getType())) in maskIsAllOneOrUndef()
1064 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); in maskIsAllOneOrUndef()
1066 if (auto *MaskElt = ConstMask->getAggregateElement(I)) in maskIsAllOneOrUndef()
1067 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) in maskIsAllOneOrUndef()
1075 assert(isa<VectorType>(Mask->getType()) && in maskContainsAllOneOrUndef()
1076 isa<IntegerType>(Mask->getType()->getScalarType()) && in maskContainsAllOneOrUndef()
1077 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == in maskContainsAllOneOrUndef()
1084 if (ConstMask->isAllOnesValue() || isa<UndefValue>(ConstMask)) in maskContainsAllOneOrUndef()
1086 if (isa<ScalableVectorType>(ConstMask->getType())) in maskContainsAllOneOrUndef()
1090 E = cast<FixedVectorType>(ConstMask->getType())->getNumElements(); in maskContainsAllOneOrUndef()
1092 if (auto *MaskElt = ConstMask->getAggregateElement(I)) in maskContainsAllOneOrUndef()
1093 if (MaskElt->isAllOnesValue() || isa<UndefValue>(MaskElt)) in maskContainsAllOneOrUndef()
1102 assert(isa<FixedVectorType>(Mask->getType()) && in possiblyDemandedEltsInMask()
1103 isa<IntegerType>(Mask->getType()->getScalarType()) && in possiblyDemandedEltsInMask()
1104 cast<IntegerType>(Mask->getType()->getScalarType())->getBitWidth() == in possiblyDemandedEltsInMask()
1109 cast<FixedVectorType>(Mask->getType())->getNumElements(); in possiblyDemandedEltsInMask()
1113 if (CV->getAggregateElement(i)->isNullValue()) in possiblyDemandedEltsInMask()
1126 auto &DL = TheLoop->getHeader()->getDataLayout(); in collectConstStrideAccesses()
1150 // part of a full group or a group with gaps. Checking wrapping for all in collectConstStrideAccesses()
1152 // conservative. For full groups, wrapping should be ok since if we would in collectConstStrideAccesses()
1176 // any write-after-read (WAR) dependences.
1186 // The algorithm visits all memory accesses in the loop in bottom-up program
1190 // We visit the memory accesses in bottom-up order because it can simplify the
1191 // construction of store groups in the presence of write-after-write (WAW)
1201 // bottom-up order does not imply that WAW dependences should not be checked.
1205 const auto &Strides = LAI->getSymbolicStrides(); in analyzeInterleaving()
1224 // Search in bottom-up program order for pairs of accesses (A and B) that can in analyzeInterleaving()
1238 Instruction *B = BI->first; in analyzeInterleaving()
1239 StrideDescriptor DesB = BI->second; in analyzeInterleaving()
1242 // create a group for B, we continue with the bottom-up algorithm to ensure in analyzeInterleaving()
1246 (!isPredicated(B->getParent()) || EnablePredicatedInterleavedMemAccesses)) { in analyzeInterleaving()
1252 if (B->mayWriteToMemory()) in analyzeInterleaving()
1260 Instruction *A = AI->first; in analyzeInterleaving()
1261 StrideDescriptor DesA = AI->second; in analyzeInterleaving()
1272 // stride-2 loop: in analyzeInterleaving()
1275 // | A[i-1] = b; // (2) | in analyzeInterleaving()
1276 // A[i-3] = c; // (3) in analyzeInterleaving()
1283 StrideEntry *A) -> Instruction * { in analyzeInterleaving()
1284 for (uint32_t Index = 0; Index < Group->getFactor(); ++Index) { in analyzeInterleaving()
1285 Instruction *MemberOfGroupB = Group->getMember(Index); in analyzeInterleaving()
1298 if (A->mayWriteToMemory() && GroupA != GroupB) { in analyzeInterleaving()
1349 // here, canVectorizeMemory() should have returned false - except for the in analyzeInterleaving()
1352 (A->mayReadFromMemory() != B->mayReadFromMemory()) || in analyzeInterleaving()
1353 (A->mayWriteToMemory() != B->mayWriteToMemory())) in analyzeInterleaving()
1368 PSE.getSE()->getMinusSCEV(DesA.Scev, DesB.Scev)); in analyzeInterleaving()
1371 int64_t DistanceToB = DistToB->getAPInt().getSExtValue(); in analyzeInterleaving()
1378 // All members of a predicated interleave-group must have the same predicate, in analyzeInterleaving()
1380 BasicBlock *BlockA = A->getParent(); in analyzeInterleaving()
1381 BasicBlock *BlockB = B->getParent(); in analyzeInterleaving()
1389 GroupB->getIndex(B) + DistanceToB / static_cast<int64_t>(DesB.Size); in analyzeInterleaving()
1392 if (GroupB->insertMember(A, IndexA, DesA.Alignment)) { in analyzeInterleaving()
1399 if (A->mayReadFromMemory()) in analyzeInterleaving()
1400 GroupB->setInsertPos(A); in analyzeInterleaving()
1407 std::string FirstOrLast) -> bool { in analyzeInterleaving()
1408 Instruction *Member = Group->getMember(Index); in analyzeInterleaving()
1417 << " group member potentially pointer-wrapping.\n"); in analyzeInterleaving()
1434 // check the first pointer for no-wrap. When we'll change to use Assume=true in analyzeInterleaving()
1437 // Case 1: A full group. Can Skip the checks; For full groups, if the wide in analyzeInterleaving()
1440 if (Group->getNumMembers() == Group->getFactor()) in analyzeInterleaving()
1446 // and group member Factor - 1; If the latter doesn't exist we rely on in analyzeInterleaving()
1447 // peeling (if it is a non-reversed accsess -- see Case 3). in analyzeInterleaving()
1450 if (Group->getMember(Group->getFactor() - 1)) in analyzeInterleaving()
1451 InvalidateGroupIfMemberMayWrap(Group, Group->getFactor() - 1, in analyzeInterleaving()
1454 // Case 3: A non-reversed interleaved load group with gaps: We need in analyzeInterleaving()
1456 // we don't speculatively access memory out-of-bounds. We only need in analyzeInterleaving()
1457 // to look for a member at index factor - 1, since every group must have in analyzeInterleaving()
1459 if (Group->isReverse()) { in analyzeInterleaving()
1473 // Case 1: A full group. Can Skip the checks; For full groups, if the wide in analyzeInterleaving()
1476 if (Group->getNumMembers() == Group->getFactor()) in analyzeInterleaving()
1479 // Interleave-store-group with gaps is implemented using masked wide store. in analyzeInterleaving()
1481 // masked-interleaved-accesses are not enabled by the target. in analyzeInterleaving()
1494 // stores with gaps, which are implemented with masked-store (rather than in analyzeInterleaving()
1498 for (int Index = Group->getFactor() - 1; Index > 0; Index--) in analyzeInterleaving()
1499 if (Group->getMember(Index)) { in analyzeInterleaving()
1515 if (!Group->requiresScalarEpilogue()) in invalidateGroupsRequiringScalarEpilogue()