Lines Matching +full:non +full:- +full:overlapping

1 //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass tries to expand memcmp() calls into optimally-sized loads and
12 //===----------------------------------------------------------------------===//
42 #define DEBUG_TYPE "expand-memcmp"
51 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
56 "max-loads-per-memcmp", cl::Hidden,
60 "max-loads-per-memcmp-opt-size", cl::Hidden,
61 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
91 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
92 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
190 // We try to do as many non-overlapping loads as possible starting from the in computeOverlappingLoadSequence()
194 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with in computeOverlappingLoadSequence()
195 // an overlapping load. in computeOverlappingLoadSequence()
196 Size = Size - NumNonOverlappingLoads * MaxLoadSize; in computeOverlappingLoadSequence()
201 // Bail if the number of loads (non-overlapping + potential overlapping one) in computeOverlappingLoadSequence()
206 // Add non-overlapping loads. in computeOverlappingLoadSequence()
214 // Add the last overlapping load. in computeOverlappingLoadSequence()
216 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)}); in computeOverlappingLoadSequence()
233 auto Last = LoadSequence[LoadSequence.size() - 1]; in optimiseLoadSequence()
234 auto PreLast = LoadSequence[LoadSequence.size() - 2]; in optimiseLoadSequence()
255 // 1. A list of load compare blocks - LoadCmpBlocks.
282 // If we allow overlapping loads and the load sequence is not already optimal, in MemCmpExpansion()
283 // use overlapping loads. in MemCmpExpansion()
309 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb", in createLoadCmpBlocks()
310 EndBlock->getParent(), EndBlock); in createLoadCmpBlocks()
316 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block", in createResultBlock()
317 EndBlock->getParent(), EndBlock); in createResultBlock()
325 Value *LhsSource = CI->getArgOperand(0); in getLoadPair()
326 Value *RhsSource = CI->getArgOperand(1); in getLoadPair()
327 Align LhsAlign = LhsSource->getPointerAlignment(DL); in getLoadPair()
328 Align RhsAlign = RhsSource->getPointerAlignment(DL); in getLoadPair()
330 auto *ByteType = Type::getInt8Ty(CI->getContext()); in getLoadPair()
359 CI->getModule(), Intrinsic::bswap, BSwapSizeType); in getLoadPair()
365 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) { in getLoadPair()
381 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr, in emitLoadCompareByteBlock()
382 Type::getInt32Ty(CI->getContext()), OffsetBytes); in emitLoadCompareByteBlock()
385 PhiRes->addIncoming(Diff, BB); in emitLoadCompareByteBlock()
387 if (BlockIndex < (LoadCmpBlocks.size() - 1)) { in emitLoadCompareByteBlock()
391 ConstantInt::get(Diff->getType(), 0)); in emitLoadCompareByteBlock()
396 DTU->applyUpdates( in emitLoadCompareByteBlock()
404 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}}); in emitLoadCompareByteBlock()
419 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp); in getCompareLoadPairs()
421 // For a single-block expansion, start inserting before the memcmp call. in getCompareLoadPairs()
433 : IntegerType::get(CI->getContext(), MaxLoadSize * 8); in getCompareLoadPairs()
438 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr, in getCompareLoadPairs()
453 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> { in getCompareLoadPairs()
455 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) { in getCompareLoadPairs()
474 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0)); in getCompareLoadPairs()
484 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) in emitLoadCompareBlockMultipleLoads()
493 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB}, in emitLoadCompareBlockMultipleLoads()
499 if (BlockIndex == LoadCmpBlocks.size() - 1) { in emitLoadCompareBlockMultipleLoads()
500 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); in emitLoadCompareBlockMultipleLoads()
501 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); in emitLoadCompareBlockMultipleLoads()
524 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8); in emitLoadCompareBlock()
527 ? IntegerType::get(CI->getContext(), in emitLoadCompareBlock()
531 CI->getContext(), in emitLoadCompareBlock()
543 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]); in emitLoadCompareBlock()
544 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]); in emitLoadCompareBlock()
548 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1)) in emitLoadCompareBlock()
557 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB}, in emitLoadCompareBlock()
563 if (BlockIndex == LoadCmpBlocks.size() - 1) { in emitLoadCompareBlock()
564 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0); in emitLoadCompareBlock()
565 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]); in emitLoadCompareBlock()
570 // memcmp result. It compares the two loaded source values and returns -1 if
576 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); in emitMemCmpResultBlock()
578 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1); in emitMemCmpResultBlock()
579 PhiRes->addIncoming(Res, ResBlock.BB); in emitMemCmpResultBlock()
583 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}}); in emitMemCmpResultBlock()
586 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt(); in emitMemCmpResultBlock()
593 Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1), in emitMemCmpResultBlock()
596 PhiRes->addIncoming(Res, ResBlock.BB); in emitMemCmpResultBlock()
600 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}}); in emitMemCmpResultBlock()
604 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8); in setupResultBlockPHINodes()
614 Builder.SetInsertPoint(EndBlock, EndBlock->begin()); in setupEndBlockPHINodes()
615 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res"); in setupEndBlockPHINodes()
637 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext())); in getMemCmpEqZeroOneBlock()
647 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8); in getMemCmpOneBlock()
649 NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8)) in getMemCmpOneBlock()
652 IntegerType::get(CI->getContext(), in getMemCmpOneBlock()
669 if (CI->hasOneUser()) { in getMemCmpOneBlock()
670 auto *UI = cast<Instruction>(*CI->user_begin()); in getMemCmpOneBlock()
680 Shift == (CI->getType()->getIntegerBitWidth() - 1)) { in getMemCmpOneBlock()
691 auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp; in getMemCmpOneBlock()
692 UI->replaceAllUsesWith(Result); in getMemCmpOneBlock()
693 UI->eraseFromParent(); in getMemCmpOneBlock()
694 CI->eraseFromParent(); in getMemCmpOneBlock()
701 // If a target prefers to use selects to get -1/0/1, they should be able in getMemCmpOneBlock()
715 // Create the basic block framework for a multi-block expansion. in getMemCmpExpansion()
717 BasicBlock *StartBlock = CI->getParent(); in getMemCmpExpansion()
734 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]); in getMemCmpExpansion()
736 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]}, in getMemCmpExpansion()
740 Builder.SetCurrentDebugLocation(CI->getDebugLoc()); in getMemCmpExpansion()
781 /// %11 = select i1 %10, i32 -1, i32 1
836 // Early exit from expansion if -Oz. in expandMemCmp()
837 if (CI->getFunction()->hasMinSize()) in expandMemCmp()
841 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2)); in expandMemCmp()
846 const uint64_t SizeVal = SizeCast->getZExtValue(); in expandMemCmp()
855 bool OptForSize = CI->getFunction()->hasOptSize() || in expandMemCmp()
856 llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI); in expandMemCmp()
857 auto Options = TTI->enableMemCmpExpansion(OptForSize, in expandMemCmp()
883 CI->replaceAllUsesWith(Res); in expandMemCmp()
884 CI->eraseFromParent(); in expandMemCmp()
918 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering(); in runOnFunction()
925 auto *BFI = (PSI && PSI->hasProfileSummary()) ? in runOnFunction()
930 DT = &DTWP->getDomTree(); in runOnFunction()
956 if (TLI->getLibFunc(*CI, Func) && in runOnBlock()
999 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering(); in run()
1004 BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary()) in run()