10b57cec5SDimitry Andric //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass turns chains of integer comparisons into memcmp (the memcmp is
100b57cec5SDimitry Andric // later typically inlined as a chain of efficient hardware comparisons). This
110b57cec5SDimitry Andric // typically benefits c++ member or nonmember operator==().
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // The basic idea is to replace a longer chain of integer comparisons loaded
140b57cec5SDimitry Andric // from contiguous memory locations into a shorter chain of larger integer
150b57cec5SDimitry Andric // comparisons. Benefits are double:
160b57cec5SDimitry Andric // - There are less jumps, and therefore less opportunities for mispredictions
170b57cec5SDimitry Andric // and I-cache misses.
180b57cec5SDimitry Andric // - Code size is smaller, both because jumps are removed and because the
190b57cec5SDimitry Andric // encoding of a 2*n byte compare is smaller than that of two n-byte
200b57cec5SDimitry Andric // compares.
210b57cec5SDimitry Andric //
220b57cec5SDimitry Andric // Example:
230b57cec5SDimitry Andric //
240b57cec5SDimitry Andric // struct S {
250b57cec5SDimitry Andric // int a;
260b57cec5SDimitry Andric // char b;
270b57cec5SDimitry Andric // char c;
280b57cec5SDimitry Andric // uint16_t d;
290b57cec5SDimitry Andric // bool operator==(const S& o) const {
300b57cec5SDimitry Andric // return a == o.a && b == o.b && c == o.c && d == o.d;
310b57cec5SDimitry Andric // }
320b57cec5SDimitry Andric // };
330b57cec5SDimitry Andric //
340b57cec5SDimitry Andric // Is optimized as :
350b57cec5SDimitry Andric //
360b57cec5SDimitry Andric // bool S::operator==(const S& o) const {
370b57cec5SDimitry Andric // return memcmp(this, &o, 8) == 0;
380b57cec5SDimitry Andric // }
390b57cec5SDimitry Andric //
400b57cec5SDimitry Andric // Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/MergeICmps.h"
4506c3fb27SDimitry Andric #include "llvm/ADT/SmallString.h"
460b57cec5SDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
470b57cec5SDimitry Andric #include "llvm/Analysis/GlobalsModRef.h"
480b57cec5SDimitry Andric #include "llvm/Analysis/Loads.h"
490b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
500b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
510b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
520b57cec5SDimitry Andric #include "llvm/IR/Function.h"
5306c3fb27SDimitry Andric #include "llvm/IR/Instruction.h"
540b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
55480093f4SDimitry Andric #include "llvm/InitializePasses.h"
560b57cec5SDimitry Andric #include "llvm/Pass.h"
570b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
580b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
590b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BuildLibCalls.h"
600b57cec5SDimitry Andric #include <algorithm>
610b57cec5SDimitry Andric #include <numeric>
620b57cec5SDimitry Andric #include <utility>
630b57cec5SDimitry Andric #include <vector>
640b57cec5SDimitry Andric
650b57cec5SDimitry Andric using namespace llvm;
660b57cec5SDimitry Andric
670b57cec5SDimitry Andric namespace {
680b57cec5SDimitry Andric
690b57cec5SDimitry Andric #define DEBUG_TYPE "mergeicmps"
700b57cec5SDimitry Andric
710b57cec5SDimitry Andric // A BCE atom "Binary Compare Expression Atom" represents an integer load
720b57cec5SDimitry Andric // that is a constant offset from a base value, e.g. `a` or `o.c` in the example
730b57cec5SDimitry Andric // at the top.
740b57cec5SDimitry Andric struct BCEAtom {
750b57cec5SDimitry Andric BCEAtom() = default;
BCEAtom__anonde3b773f0111::BCEAtom760b57cec5SDimitry Andric BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
77*0fca6ea1SDimitry Andric : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(std::move(Offset)) {}
780b57cec5SDimitry Andric
790b57cec5SDimitry Andric BCEAtom(const BCEAtom &) = delete;
800b57cec5SDimitry Andric BCEAtom &operator=(const BCEAtom &) = delete;
810b57cec5SDimitry Andric
820b57cec5SDimitry Andric BCEAtom(BCEAtom &&that) = default;
operator =__anonde3b773f0111::BCEAtom830b57cec5SDimitry Andric BCEAtom &operator=(BCEAtom &&that) {
840b57cec5SDimitry Andric if (this == &that)
850b57cec5SDimitry Andric return *this;
860b57cec5SDimitry Andric GEP = that.GEP;
870b57cec5SDimitry Andric LoadI = that.LoadI;
880b57cec5SDimitry Andric BaseId = that.BaseId;
890b57cec5SDimitry Andric Offset = std::move(that.Offset);
900b57cec5SDimitry Andric return *this;
910b57cec5SDimitry Andric }
920b57cec5SDimitry Andric
930b57cec5SDimitry Andric // We want to order BCEAtoms by (Base, Offset). However we cannot use
940b57cec5SDimitry Andric // the pointer values for Base because these are non-deterministic.
950b57cec5SDimitry Andric // To make sure that the sort order is stable, we first assign to each atom
960b57cec5SDimitry Andric // base value an index based on its order of appearance in the chain of
970b57cec5SDimitry Andric // comparisons. We call this index `BaseOrdering`. For example, for:
980b57cec5SDimitry Andric // b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
990b57cec5SDimitry Andric // | block 1 | | block 2 | | block 3 |
1000b57cec5SDimitry Andric // b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
1010b57cec5SDimitry Andric // which is before block 2.
1020b57cec5SDimitry Andric // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
operator <__anonde3b773f0111::BCEAtom1030b57cec5SDimitry Andric bool operator<(const BCEAtom &O) const {
1040b57cec5SDimitry Andric return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
1050b57cec5SDimitry Andric }
1060b57cec5SDimitry Andric
1070b57cec5SDimitry Andric GetElementPtrInst *GEP = nullptr;
1080b57cec5SDimitry Andric LoadInst *LoadI = nullptr;
1090b57cec5SDimitry Andric unsigned BaseId = 0;
1100b57cec5SDimitry Andric APInt Offset;
1110b57cec5SDimitry Andric };
1120b57cec5SDimitry Andric
1130b57cec5SDimitry Andric // A class that assigns increasing ids to values in the order in which they are
1140b57cec5SDimitry Andric // seen. See comment in `BCEAtom::operator<()``.
1150b57cec5SDimitry Andric class BaseIdentifier {
1160b57cec5SDimitry Andric public:
1170b57cec5SDimitry Andric // Returns the id for value `Base`, after assigning one if `Base` has not been
1180b57cec5SDimitry Andric // seen before.
getBaseId(const Value * Base)1190b57cec5SDimitry Andric int getBaseId(const Value *Base) {
1200b57cec5SDimitry Andric assert(Base && "invalid base");
1210b57cec5SDimitry Andric const auto Insertion = BaseToIndex.try_emplace(Base, Order);
1220b57cec5SDimitry Andric if (Insertion.second)
1230b57cec5SDimitry Andric ++Order;
1240b57cec5SDimitry Andric return Insertion.first->second;
1250b57cec5SDimitry Andric }
1260b57cec5SDimitry Andric
1270b57cec5SDimitry Andric private:
1280b57cec5SDimitry Andric unsigned Order = 1;
1290b57cec5SDimitry Andric DenseMap<const Value*, int> BaseToIndex;
1300b57cec5SDimitry Andric };
1310b57cec5SDimitry Andric
1320b57cec5SDimitry Andric // If this value is a load from a constant offset w.r.t. a base address, and
1330b57cec5SDimitry Andric // there are no other users of the load or address, returns the base address and
1340b57cec5SDimitry Andric // the offset.
visitICmpLoadOperand(Value * const Val,BaseIdentifier & BaseId)1350b57cec5SDimitry Andric BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
1360b57cec5SDimitry Andric auto *const LoadI = dyn_cast<LoadInst>(Val);
1370b57cec5SDimitry Andric if (!LoadI)
1380b57cec5SDimitry Andric return {};
1390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "load\n");
1400b57cec5SDimitry Andric if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
1410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "used outside of block\n");
1420b57cec5SDimitry Andric return {};
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric // Do not optimize atomic loads to non-atomic memcmp
1450b57cec5SDimitry Andric if (!LoadI->isSimple()) {
1460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "volatile or atomic\n");
1470b57cec5SDimitry Andric return {};
1480b57cec5SDimitry Andric }
14981ad6265SDimitry Andric Value *Addr = LoadI->getOperand(0);
150349cc55cSDimitry Andric if (Addr->getType()->getPointerAddressSpace() != 0) {
151349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "from non-zero AddressSpace\n");
152349cc55cSDimitry Andric return {};
153349cc55cSDimitry Andric }
154*0fca6ea1SDimitry Andric const auto &DL = LoadI->getDataLayout();
15581ad6265SDimitry Andric if (!isDereferenceablePointer(Addr, LoadI->getType(), DL)) {
1560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "not dereferenceable\n");
1570b57cec5SDimitry Andric // We need to make sure that we can do comparison in any order, so we
158bdd1243dSDimitry Andric // require memory to be unconditionally dereferenceable.
1590b57cec5SDimitry Andric return {};
1600b57cec5SDimitry Andric }
16181ad6265SDimitry Andric
16206c3fb27SDimitry Andric APInt Offset = APInt(DL.getIndexTypeSizeInBits(Addr->getType()), 0);
16381ad6265SDimitry Andric Value *Base = Addr;
16481ad6265SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(Addr);
16581ad6265SDimitry Andric if (GEP) {
16681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "GEP\n");
16781ad6265SDimitry Andric if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
16881ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "used outside of block\n");
16981ad6265SDimitry Andric return {};
17081ad6265SDimitry Andric }
1710b57cec5SDimitry Andric if (!GEP->accumulateConstantOffset(DL, Offset))
1720b57cec5SDimitry Andric return {};
17381ad6265SDimitry Andric Base = GEP->getPointerOperand();
17481ad6265SDimitry Andric }
17581ad6265SDimitry Andric return BCEAtom(GEP, LoadI, BaseId.getBaseId(Base), Offset);
1760b57cec5SDimitry Andric }
1770b57cec5SDimitry Andric
178fe6060f1SDimitry Andric // A comparison between two BCE atoms, e.g. `a == o.a` in the example at the
179fe6060f1SDimitry Andric // top.
1800b57cec5SDimitry Andric // Note: the terminology is misleading: the comparison is symmetric, so there
1810b57cec5SDimitry Andric // is no real {l/r}hs. What we want though is to have the same base on the
1820b57cec5SDimitry Andric // left (resp. right), so that we can detect consecutive loads. To ensure this
1830b57cec5SDimitry Andric // we put the smallest atom on the left.
184fe6060f1SDimitry Andric struct BCECmp {
185fe6060f1SDimitry Andric BCEAtom Lhs;
186fe6060f1SDimitry Andric BCEAtom Rhs;
187fe6060f1SDimitry Andric int SizeBits;
188fe6060f1SDimitry Andric const ICmpInst *CmpI;
189fe6060f1SDimitry Andric
BCECmp__anonde3b773f0111::BCECmp190fe6060f1SDimitry Andric BCECmp(BCEAtom L, BCEAtom R, int SizeBits, const ICmpInst *CmpI)
191fe6060f1SDimitry Andric : Lhs(std::move(L)), Rhs(std::move(R)), SizeBits(SizeBits), CmpI(CmpI) {
192fe6060f1SDimitry Andric if (Rhs < Lhs) std::swap(Rhs, Lhs);
193fe6060f1SDimitry Andric }
194fe6060f1SDimitry Andric };
195fe6060f1SDimitry Andric
196fe6060f1SDimitry Andric // A basic block with a comparison between two BCE atoms.
197fe6060f1SDimitry Andric // The block might do extra work besides the atom comparison, in which case
198fe6060f1SDimitry Andric // doesOtherWork() returns true. Under some conditions, the block can be
199fe6060f1SDimitry Andric // split into the atom comparison part and the "other work" part
200fe6060f1SDimitry Andric // (see canSplit()).
2010b57cec5SDimitry Andric class BCECmpBlock {
2020b57cec5SDimitry Andric public:
203fe6060f1SDimitry Andric typedef SmallDenseSet<const Instruction *, 8> InstructionSet;
2040b57cec5SDimitry Andric
BCECmpBlock(BCECmp Cmp,BasicBlock * BB,InstructionSet BlockInsts)205fe6060f1SDimitry Andric BCECmpBlock(BCECmp Cmp, BasicBlock *BB, InstructionSet BlockInsts)
206fe6060f1SDimitry Andric : BB(BB), BlockInsts(std::move(BlockInsts)), Cmp(std::move(Cmp)) {}
2070b57cec5SDimitry Andric
Lhs() const208fe6060f1SDimitry Andric const BCEAtom &Lhs() const { return Cmp.Lhs; }
Rhs() const209fe6060f1SDimitry Andric const BCEAtom &Rhs() const { return Cmp.Rhs; }
SizeBits() const210fe6060f1SDimitry Andric int SizeBits() const { return Cmp.SizeBits; }
2110b57cec5SDimitry Andric
2120b57cec5SDimitry Andric // Returns true if the block does other works besides comparison.
2130b57cec5SDimitry Andric bool doesOtherWork() const;
2140b57cec5SDimitry Andric
2150b57cec5SDimitry Andric // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
2160b57cec5SDimitry Andric // instructions in the block.
2170b57cec5SDimitry Andric bool canSplit(AliasAnalysis &AA) const;
2180b57cec5SDimitry Andric
2190b57cec5SDimitry Andric // Return true if this all the relevant instructions in the BCE-cmp-block can
2200b57cec5SDimitry Andric // be sunk below this instruction. By doing this, we know we can separate the
2210b57cec5SDimitry Andric // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
2220b57cec5SDimitry Andric // block.
223fe6060f1SDimitry Andric bool canSinkBCECmpInst(const Instruction *, AliasAnalysis &AA) const;
2240b57cec5SDimitry Andric
2250b57cec5SDimitry Andric // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
2260b57cec5SDimitry Andric // instructions. Split the old block and move all non-BCE-cmp-insts into the
2270b57cec5SDimitry Andric // new parent block.
2280b57cec5SDimitry Andric void split(BasicBlock *NewParent, AliasAnalysis &AA) const;
2290b57cec5SDimitry Andric
2300b57cec5SDimitry Andric // The basic block where this comparison happens.
231fe6060f1SDimitry Andric BasicBlock *BB;
232fe6060f1SDimitry Andric // Instructions relating to the BCECmp and branch.
233fe6060f1SDimitry Andric InstructionSet BlockInsts;
2340b57cec5SDimitry Andric // The block requires splitting.
2350b57cec5SDimitry Andric bool RequireSplit = false;
236349cc55cSDimitry Andric // Original order of this block in the chain.
237349cc55cSDimitry Andric unsigned OrigOrder = 0;
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric private:
240fe6060f1SDimitry Andric BCECmp Cmp;
2410b57cec5SDimitry Andric };
2420b57cec5SDimitry Andric
canSinkBCECmpInst(const Instruction * Inst,AliasAnalysis & AA) const2430b57cec5SDimitry Andric bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
2440b57cec5SDimitry Andric AliasAnalysis &AA) const {
245fe6060f1SDimitry Andric // If this instruction may clobber the loads and is in middle of the BCE cmp
246fe6060f1SDimitry Andric // block instructions, then bail for now.
247fe6060f1SDimitry Andric if (Inst->mayWriteToMemory()) {
248349cc55cSDimitry Andric auto MayClobber = [&](LoadInst *LI) {
249349cc55cSDimitry Andric // If a potentially clobbering instruction comes before the load,
250349cc55cSDimitry Andric // we can still safely sink the load.
25181ad6265SDimitry Andric return (Inst->getParent() != LI->getParent() || !Inst->comesBefore(LI)) &&
252349cc55cSDimitry Andric isModSet(AA.getModRefInfo(Inst, MemoryLocation::get(LI)));
253349cc55cSDimitry Andric };
254349cc55cSDimitry Andric if (MayClobber(Cmp.Lhs.LoadI) || MayClobber(Cmp.Rhs.LoadI))
2550b57cec5SDimitry Andric return false;
2560b57cec5SDimitry Andric }
2570b57cec5SDimitry Andric // Make sure this instruction does not use any of the BCE cmp block
2580b57cec5SDimitry Andric // instructions as operand.
259fe6060f1SDimitry Andric return llvm::none_of(Inst->operands(), [&](const Value *Op) {
260fe6060f1SDimitry Andric const Instruction *OpI = dyn_cast<Instruction>(Op);
261fe6060f1SDimitry Andric return OpI && BlockInsts.contains(OpI);
262fe6060f1SDimitry Andric });
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric
split(BasicBlock * NewParent,AliasAnalysis & AA) const2650b57cec5SDimitry Andric void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis &AA) const {
2660b57cec5SDimitry Andric llvm::SmallVector<Instruction *, 4> OtherInsts;
2670b57cec5SDimitry Andric for (Instruction &Inst : *BB) {
2680b57cec5SDimitry Andric if (BlockInsts.count(&Inst))
2690b57cec5SDimitry Andric continue;
270fe6060f1SDimitry Andric assert(canSinkBCECmpInst(&Inst, AA) && "Split unsplittable block");
2710b57cec5SDimitry Andric // This is a non-BCE-cmp-block instruction. And it can be separated
2720b57cec5SDimitry Andric // from the BCE-cmp-block instruction.
2730b57cec5SDimitry Andric OtherInsts.push_back(&Inst);
2740b57cec5SDimitry Andric }
2750b57cec5SDimitry Andric
2760b57cec5SDimitry Andric // Do the actual spliting.
27781ad6265SDimitry Andric for (Instruction *Inst : reverse(OtherInsts))
2785f757f3fSDimitry Andric Inst->moveBeforePreserving(*NewParent, NewParent->begin());
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric
canSplit(AliasAnalysis & AA) const2810b57cec5SDimitry Andric bool BCECmpBlock::canSplit(AliasAnalysis &AA) const {
2820b57cec5SDimitry Andric for (Instruction &Inst : *BB) {
2830b57cec5SDimitry Andric if (!BlockInsts.count(&Inst)) {
284fe6060f1SDimitry Andric if (!canSinkBCECmpInst(&Inst, AA))
2850b57cec5SDimitry Andric return false;
2860b57cec5SDimitry Andric }
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric return true;
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric
doesOtherWork() const2910b57cec5SDimitry Andric bool BCECmpBlock::doesOtherWork() const {
2920b57cec5SDimitry Andric // TODO(courbet): Can we allow some other things ? This is very conservative.
2930b57cec5SDimitry Andric // We might be able to get away with anything does not have any side
2940b57cec5SDimitry Andric // effects outside of the basic block.
2950b57cec5SDimitry Andric // Note: The GEPs and/or loads are not necessarily in the same block.
2960b57cec5SDimitry Andric for (const Instruction &Inst : *BB) {
2970b57cec5SDimitry Andric if (!BlockInsts.count(&Inst))
2980b57cec5SDimitry Andric return true;
2990b57cec5SDimitry Andric }
3000b57cec5SDimitry Andric return false;
3010b57cec5SDimitry Andric }
3020b57cec5SDimitry Andric
3030b57cec5SDimitry Andric // Visit the given comparison. If this is a comparison between two valid
3040b57cec5SDimitry Andric // BCE atoms, returns the comparison.
visitICmp(const ICmpInst * const CmpI,const ICmpInst::Predicate ExpectedPredicate,BaseIdentifier & BaseId)305bdd1243dSDimitry Andric std::optional<BCECmp> visitICmp(const ICmpInst *const CmpI,
3060b57cec5SDimitry Andric const ICmpInst::Predicate ExpectedPredicate,
3070b57cec5SDimitry Andric BaseIdentifier &BaseId) {
3080b57cec5SDimitry Andric // The comparison can only be used once:
3090b57cec5SDimitry Andric // - For intermediate blocks, as a branch condition.
3100b57cec5SDimitry Andric // - For the final block, as an incoming value for the Phi.
3110b57cec5SDimitry Andric // If there are any other uses of the comparison, we cannot merge it with
3120b57cec5SDimitry Andric // other comparisons as we would create an orphan use of the value.
3130b57cec5SDimitry Andric if (!CmpI->hasOneUse()) {
3140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "cmp has several uses\n");
315bdd1243dSDimitry Andric return std::nullopt;
3160b57cec5SDimitry Andric }
3170b57cec5SDimitry Andric if (CmpI->getPredicate() != ExpectedPredicate)
318bdd1243dSDimitry Andric return std::nullopt;
3190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "cmp "
3200b57cec5SDimitry Andric << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
3210b57cec5SDimitry Andric << "\n");
3220b57cec5SDimitry Andric auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
3230b57cec5SDimitry Andric if (!Lhs.BaseId)
324bdd1243dSDimitry Andric return std::nullopt;
3250b57cec5SDimitry Andric auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
3260b57cec5SDimitry Andric if (!Rhs.BaseId)
327bdd1243dSDimitry Andric return std::nullopt;
328*0fca6ea1SDimitry Andric const auto &DL = CmpI->getDataLayout();
329fe6060f1SDimitry Andric return BCECmp(std::move(Lhs), std::move(Rhs),
330fe6060f1SDimitry Andric DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()), CmpI);
3310b57cec5SDimitry Andric }
3320b57cec5SDimitry Andric
3330b57cec5SDimitry Andric // Visit the given comparison block. If this is a comparison between two valid
3340b57cec5SDimitry Andric // BCE atoms, returns the comparison.
visitCmpBlock(Value * const Val,BasicBlock * const Block,const BasicBlock * const PhiBlock,BaseIdentifier & BaseId)335bdd1243dSDimitry Andric std::optional<BCECmpBlock> visitCmpBlock(Value *const Val,
336bdd1243dSDimitry Andric BasicBlock *const Block,
3370b57cec5SDimitry Andric const BasicBlock *const PhiBlock,
3380b57cec5SDimitry Andric BaseIdentifier &BaseId) {
339bdd1243dSDimitry Andric if (Block->empty())
340bdd1243dSDimitry Andric return std::nullopt;
3410b57cec5SDimitry Andric auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
342bdd1243dSDimitry Andric if (!BranchI)
343bdd1243dSDimitry Andric return std::nullopt;
3440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "branch\n");
345fe6060f1SDimitry Andric Value *Cond;
346fe6060f1SDimitry Andric ICmpInst::Predicate ExpectedPredicate;
3470b57cec5SDimitry Andric if (BranchI->isUnconditional()) {
3480b57cec5SDimitry Andric // In this case, we expect an incoming value which is the result of the
3490b57cec5SDimitry Andric // comparison. This is the last link in the chain of comparisons (note
3500b57cec5SDimitry Andric // that this does not mean that this is the last incoming value, blocks
3510b57cec5SDimitry Andric // can be reordered).
352fe6060f1SDimitry Andric Cond = Val;
353fe6060f1SDimitry Andric ExpectedPredicate = ICmpInst::ICMP_EQ;
3540b57cec5SDimitry Andric } else {
3550b57cec5SDimitry Andric // In this case, we expect a constant incoming value (the comparison is
3560b57cec5SDimitry Andric // chained).
357e8d8bef9SDimitry Andric const auto *const Const = cast<ConstantInt>(Val);
3580b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "const\n");
359bdd1243dSDimitry Andric if (!Const->isZero())
360bdd1243dSDimitry Andric return std::nullopt;
3610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "false\n");
3620b57cec5SDimitry Andric assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
3630b57cec5SDimitry Andric BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
364fe6060f1SDimitry Andric Cond = BranchI->getCondition();
365fe6060f1SDimitry Andric ExpectedPredicate =
366fe6060f1SDimitry Andric FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE;
3670b57cec5SDimitry Andric }
368fe6060f1SDimitry Andric
369fe6060f1SDimitry Andric auto *CmpI = dyn_cast<ICmpInst>(Cond);
370bdd1243dSDimitry Andric if (!CmpI)
371bdd1243dSDimitry Andric return std::nullopt;
372fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "icmp\n");
373fe6060f1SDimitry Andric
374bdd1243dSDimitry Andric std::optional<BCECmp> Result = visitICmp(CmpI, ExpectedPredicate, BaseId);
375fe6060f1SDimitry Andric if (!Result)
376bdd1243dSDimitry Andric return std::nullopt;
377fe6060f1SDimitry Andric
378fe6060f1SDimitry Andric BCECmpBlock::InstructionSet BlockInsts(
37981ad6265SDimitry Andric {Result->Lhs.LoadI, Result->Rhs.LoadI, Result->CmpI, BranchI});
38081ad6265SDimitry Andric if (Result->Lhs.GEP)
38181ad6265SDimitry Andric BlockInsts.insert(Result->Lhs.GEP);
38281ad6265SDimitry Andric if (Result->Rhs.GEP)
38381ad6265SDimitry Andric BlockInsts.insert(Result->Rhs.GEP);
384fe6060f1SDimitry Andric return BCECmpBlock(std::move(*Result), Block, BlockInsts);
3850b57cec5SDimitry Andric }
3860b57cec5SDimitry Andric
enqueueBlock(std::vector<BCECmpBlock> & Comparisons,BCECmpBlock && Comparison)3870b57cec5SDimitry Andric static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
3880b57cec5SDimitry Andric BCECmpBlock &&Comparison) {
3890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
3900b57cec5SDimitry Andric << "': Found cmp of " << Comparison.SizeBits()
3910b57cec5SDimitry Andric << " bits between " << Comparison.Lhs().BaseId << " + "
3920b57cec5SDimitry Andric << Comparison.Lhs().Offset << " and "
3930b57cec5SDimitry Andric << Comparison.Rhs().BaseId << " + "
3940b57cec5SDimitry Andric << Comparison.Rhs().Offset << "\n");
3950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n");
396349cc55cSDimitry Andric Comparison.OrigOrder = Comparisons.size();
3970b57cec5SDimitry Andric Comparisons.push_back(std::move(Comparison));
3980b57cec5SDimitry Andric }
3990b57cec5SDimitry Andric
4000b57cec5SDimitry Andric // A chain of comparisons.
4010b57cec5SDimitry Andric class BCECmpChain {
4020b57cec5SDimitry Andric public:
403349cc55cSDimitry Andric using ContiguousBlocks = std::vector<BCECmpBlock>;
404349cc55cSDimitry Andric
4050b57cec5SDimitry Andric BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
4060b57cec5SDimitry Andric AliasAnalysis &AA);
4070b57cec5SDimitry Andric
4080b57cec5SDimitry Andric bool simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
4090b57cec5SDimitry Andric DomTreeUpdater &DTU);
4100b57cec5SDimitry Andric
atLeastOneMerged() const411349cc55cSDimitry Andric bool atLeastOneMerged() const {
412349cc55cSDimitry Andric return any_of(MergedBlocks_,
413349cc55cSDimitry Andric [](const auto &Blocks) { return Blocks.size() > 1; });
414349cc55cSDimitry Andric }
415349cc55cSDimitry Andric
4160b57cec5SDimitry Andric private:
417349cc55cSDimitry Andric PHINode &Phi_;
418349cc55cSDimitry Andric // The list of all blocks in the chain, grouped by contiguity.
419349cc55cSDimitry Andric std::vector<ContiguousBlocks> MergedBlocks_;
420349cc55cSDimitry Andric // The original entry block (before sorting);
421349cc55cSDimitry Andric BasicBlock *EntryBlock_;
422349cc55cSDimitry Andric };
423349cc55cSDimitry Andric
areContiguous(const BCECmpBlock & First,const BCECmpBlock & Second)424349cc55cSDimitry Andric static bool areContiguous(const BCECmpBlock &First, const BCECmpBlock &Second) {
4250b57cec5SDimitry Andric return First.Lhs().BaseId == Second.Lhs().BaseId &&
4260b57cec5SDimitry Andric First.Rhs().BaseId == Second.Rhs().BaseId &&
4270b57cec5SDimitry Andric First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
4280b57cec5SDimitry Andric First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
4290b57cec5SDimitry Andric }
4300b57cec5SDimitry Andric
getMinOrigOrder(const BCECmpChain::ContiguousBlocks & Blocks)431349cc55cSDimitry Andric static unsigned getMinOrigOrder(const BCECmpChain::ContiguousBlocks &Blocks) {
432349cc55cSDimitry Andric unsigned MinOrigOrder = std::numeric_limits<unsigned>::max();
433349cc55cSDimitry Andric for (const BCECmpBlock &Block : Blocks)
434349cc55cSDimitry Andric MinOrigOrder = std::min(MinOrigOrder, Block.OrigOrder);
435349cc55cSDimitry Andric return MinOrigOrder;
436349cc55cSDimitry Andric }
437349cc55cSDimitry Andric
438349cc55cSDimitry Andric /// Given a chain of comparison blocks, groups the blocks into contiguous
439349cc55cSDimitry Andric /// ranges that can be merged together into a single comparison.
440349cc55cSDimitry Andric static std::vector<BCECmpChain::ContiguousBlocks>
mergeBlocks(std::vector<BCECmpBlock> && Blocks)441349cc55cSDimitry Andric mergeBlocks(std::vector<BCECmpBlock> &&Blocks) {
442349cc55cSDimitry Andric std::vector<BCECmpChain::ContiguousBlocks> MergedBlocks;
443349cc55cSDimitry Andric
444349cc55cSDimitry Andric // Sort to detect continuous offsets.
445349cc55cSDimitry Andric llvm::sort(Blocks,
446349cc55cSDimitry Andric [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
447349cc55cSDimitry Andric return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
448349cc55cSDimitry Andric std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
449349cc55cSDimitry Andric });
450349cc55cSDimitry Andric
451349cc55cSDimitry Andric BCECmpChain::ContiguousBlocks *LastMergedBlock = nullptr;
452349cc55cSDimitry Andric for (BCECmpBlock &Block : Blocks) {
453349cc55cSDimitry Andric if (!LastMergedBlock || !areContiguous(LastMergedBlock->back(), Block)) {
454349cc55cSDimitry Andric MergedBlocks.emplace_back();
455349cc55cSDimitry Andric LastMergedBlock = &MergedBlocks.back();
456349cc55cSDimitry Andric } else {
457349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Merging block " << Block.BB->getName() << " into "
458349cc55cSDimitry Andric << LastMergedBlock->back().BB->getName() << "\n");
459349cc55cSDimitry Andric }
460349cc55cSDimitry Andric LastMergedBlock->push_back(std::move(Block));
461349cc55cSDimitry Andric }
462349cc55cSDimitry Andric
463349cc55cSDimitry Andric // While we allow reordering for merging, do not reorder unmerged comparisons.
464349cc55cSDimitry Andric // Doing so may introduce branch on poison.
465349cc55cSDimitry Andric llvm::sort(MergedBlocks, [](const BCECmpChain::ContiguousBlocks &LhsBlocks,
466349cc55cSDimitry Andric const BCECmpChain::ContiguousBlocks &RhsBlocks) {
467349cc55cSDimitry Andric return getMinOrigOrder(LhsBlocks) < getMinOrigOrder(RhsBlocks);
468349cc55cSDimitry Andric });
469349cc55cSDimitry Andric
470349cc55cSDimitry Andric return MergedBlocks;
471349cc55cSDimitry Andric }
4720b57cec5SDimitry Andric
BCECmpChain(const std::vector<BasicBlock * > & Blocks,PHINode & Phi,AliasAnalysis & AA)4730b57cec5SDimitry Andric BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
4740b57cec5SDimitry Andric AliasAnalysis &AA)
4750b57cec5SDimitry Andric : Phi_(Phi) {
4760b57cec5SDimitry Andric assert(!Blocks.empty() && "a chain should have at least one block");
4770b57cec5SDimitry Andric // Now look inside blocks to check for BCE comparisons.
4780b57cec5SDimitry Andric std::vector<BCECmpBlock> Comparisons;
4790b57cec5SDimitry Andric BaseIdentifier BaseId;
480fe6060f1SDimitry Andric for (BasicBlock *const Block : Blocks) {
4810b57cec5SDimitry Andric assert(Block && "invalid block");
482bdd1243dSDimitry Andric std::optional<BCECmpBlock> Comparison = visitCmpBlock(
483fe6060f1SDimitry Andric Phi.getIncomingValueForBlock(Block), Block, Phi.getParent(), BaseId);
484fe6060f1SDimitry Andric if (!Comparison) {
4850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
4860b57cec5SDimitry Andric return;
4870b57cec5SDimitry Andric }
488fe6060f1SDimitry Andric if (Comparison->doesOtherWork()) {
489fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "block '" << Comparison->BB->getName()
4900b57cec5SDimitry Andric << "' does extra work besides compare\n");
4910b57cec5SDimitry Andric if (Comparisons.empty()) {
4920b57cec5SDimitry Andric // This is the initial block in the chain, in case this block does other
4930b57cec5SDimitry Andric // work, we can try to split the block and move the irrelevant
4940b57cec5SDimitry Andric // instructions to the predecessor.
4950b57cec5SDimitry Andric //
4960b57cec5SDimitry Andric // If this is not the initial block in the chain, splitting it wont
4970b57cec5SDimitry Andric // work.
4980b57cec5SDimitry Andric //
4990b57cec5SDimitry Andric // As once split, there will still be instructions before the BCE cmp
5000b57cec5SDimitry Andric // instructions that do other work in program order, i.e. within the
5010b57cec5SDimitry Andric // chain before sorting. Unless we can abort the chain at this point
5020b57cec5SDimitry Andric // and start anew.
5030b57cec5SDimitry Andric //
5040b57cec5SDimitry Andric // NOTE: we only handle blocks a with single predecessor for now.
505fe6060f1SDimitry Andric if (Comparison->canSplit(AA)) {
5060b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
507fe6060f1SDimitry Andric << "Split initial block '" << Comparison->BB->getName()
5080b57cec5SDimitry Andric << "' that does extra work besides compare\n");
509fe6060f1SDimitry Andric Comparison->RequireSplit = true;
510fe6060f1SDimitry Andric enqueueBlock(Comparisons, std::move(*Comparison));
5110b57cec5SDimitry Andric } else {
5120b57cec5SDimitry Andric LLVM_DEBUG(dbgs()
513fe6060f1SDimitry Andric << "ignoring initial block '" << Comparison->BB->getName()
5140b57cec5SDimitry Andric << "' that does extra work besides compare\n");
5150b57cec5SDimitry Andric }
5160b57cec5SDimitry Andric continue;
5170b57cec5SDimitry Andric }
5180b57cec5SDimitry Andric // TODO(courbet): Right now we abort the whole chain. We could be
5190b57cec5SDimitry Andric // merging only the blocks that don't do other work and resume the
5200b57cec5SDimitry Andric // chain from there. For example:
5210b57cec5SDimitry Andric // if (a[0] == b[0]) { // bb1
5220b57cec5SDimitry Andric // if (a[1] == b[1]) { // bb2
5230b57cec5SDimitry Andric // some_value = 3; //bb3
5240b57cec5SDimitry Andric // if (a[2] == b[2]) { //bb3
5250b57cec5SDimitry Andric // do a ton of stuff //bb4
5260b57cec5SDimitry Andric // }
5270b57cec5SDimitry Andric // }
5280b57cec5SDimitry Andric // }
5290b57cec5SDimitry Andric //
5300b57cec5SDimitry Andric // This is:
5310b57cec5SDimitry Andric //
5320b57cec5SDimitry Andric // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+
5330b57cec5SDimitry Andric // \ \ \ \
5340b57cec5SDimitry Andric // ne ne ne \
5350b57cec5SDimitry Andric // \ \ \ v
5360b57cec5SDimitry Andric // +------------+-----------+----------> bb_phi
5370b57cec5SDimitry Andric //
5380b57cec5SDimitry Andric // We can only merge the first two comparisons, because bb3* does
5390b57cec5SDimitry Andric // "other work" (setting some_value to 3).
5400b57cec5SDimitry Andric // We could still merge bb1 and bb2 though.
5410b57cec5SDimitry Andric return;
5420b57cec5SDimitry Andric }
543fe6060f1SDimitry Andric enqueueBlock(Comparisons, std::move(*Comparison));
5440b57cec5SDimitry Andric }
5450b57cec5SDimitry Andric
5460b57cec5SDimitry Andric // It is possible we have no suitable comparison to merge.
5470b57cec5SDimitry Andric if (Comparisons.empty()) {
5480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "chain with no BCE basic blocks, no merge\n");
5490b57cec5SDimitry Andric return;
5500b57cec5SDimitry Andric }
5510b57cec5SDimitry Andric EntryBlock_ = Comparisons[0].BB;
552349cc55cSDimitry Andric MergedBlocks_ = mergeBlocks(std::move(Comparisons));
5530b57cec5SDimitry Andric }
5540b57cec5SDimitry Andric
5550b57cec5SDimitry Andric namespace {
5560b57cec5SDimitry Andric
5570b57cec5SDimitry Andric // A class to compute the name of a set of merged basic blocks.
5580b57cec5SDimitry Andric // This is optimized for the common case of no block names.
5590b57cec5SDimitry Andric class MergedBlockName {
5600b57cec5SDimitry Andric // Storage for the uncommon case of several named blocks.
5610b57cec5SDimitry Andric SmallString<16> Scratch;
5620b57cec5SDimitry Andric
5630b57cec5SDimitry Andric public:
MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)5640b57cec5SDimitry Andric explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
5650b57cec5SDimitry Andric : Name(makeName(Comparisons)) {}
5660b57cec5SDimitry Andric const StringRef Name;
5670b57cec5SDimitry Andric
5680b57cec5SDimitry Andric private:
makeName(ArrayRef<BCECmpBlock> Comparisons)5690b57cec5SDimitry Andric StringRef makeName(ArrayRef<BCECmpBlock> Comparisons) {
5700b57cec5SDimitry Andric assert(!Comparisons.empty() && "no basic block");
5710b57cec5SDimitry Andric // Fast path: only one block, or no names at all.
5720b57cec5SDimitry Andric if (Comparisons.size() == 1)
5730b57cec5SDimitry Andric return Comparisons[0].BB->getName();
5740b57cec5SDimitry Andric const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
5750b57cec5SDimitry Andric [](int i, const BCECmpBlock &Cmp) {
5760b57cec5SDimitry Andric return i + Cmp.BB->getName().size();
5770b57cec5SDimitry Andric });
5780b57cec5SDimitry Andric if (size == 0)
5790b57cec5SDimitry Andric return StringRef("", 0);
5800b57cec5SDimitry Andric
5810b57cec5SDimitry Andric // Slow path: at least two blocks, at least one block with a name.
5820b57cec5SDimitry Andric Scratch.clear();
5830b57cec5SDimitry Andric // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
5840b57cec5SDimitry Andric // separators.
5850b57cec5SDimitry Andric Scratch.reserve(size + Comparisons.size() - 1);
5860b57cec5SDimitry Andric const auto append = [this](StringRef str) {
5870b57cec5SDimitry Andric Scratch.append(str.begin(), str.end());
5880b57cec5SDimitry Andric };
5890b57cec5SDimitry Andric append(Comparisons[0].BB->getName());
5900b57cec5SDimitry Andric for (int I = 1, E = Comparisons.size(); I < E; ++I) {
5910b57cec5SDimitry Andric const BasicBlock *const BB = Comparisons[I].BB;
5920b57cec5SDimitry Andric if (!BB->getName().empty()) {
5930b57cec5SDimitry Andric append("+");
5940b57cec5SDimitry Andric append(BB->getName());
5950b57cec5SDimitry Andric }
5960b57cec5SDimitry Andric }
597fe6060f1SDimitry Andric return Scratch.str();
5980b57cec5SDimitry Andric }
5990b57cec5SDimitry Andric };
6000b57cec5SDimitry Andric } // namespace
6010b57cec5SDimitry Andric
6020b57cec5SDimitry Andric // Merges the given contiguous comparison blocks into one memcmp block.
mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,BasicBlock * const InsertBefore,BasicBlock * const NextCmpBlock,PHINode & Phi,const TargetLibraryInfo & TLI,AliasAnalysis & AA,DomTreeUpdater & DTU)6030b57cec5SDimitry Andric static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
6040b57cec5SDimitry Andric BasicBlock *const InsertBefore,
6050b57cec5SDimitry Andric BasicBlock *const NextCmpBlock,
6060b57cec5SDimitry Andric PHINode &Phi, const TargetLibraryInfo &TLI,
6070b57cec5SDimitry Andric AliasAnalysis &AA, DomTreeUpdater &DTU) {
6080b57cec5SDimitry Andric assert(!Comparisons.empty() && "merging zero comparisons");
6090b57cec5SDimitry Andric LLVMContext &Context = NextCmpBlock->getContext();
6100b57cec5SDimitry Andric const BCECmpBlock &FirstCmp = Comparisons[0];
6110b57cec5SDimitry Andric
6120b57cec5SDimitry Andric // Create a new cmp block before next cmp block.
6130b57cec5SDimitry Andric BasicBlock *const BB =
6140b57cec5SDimitry Andric BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
6150b57cec5SDimitry Andric NextCmpBlock->getParent(), InsertBefore);
6160b57cec5SDimitry Andric IRBuilder<> Builder(BB);
6170b57cec5SDimitry Andric // Add the GEPs from the first BCECmpBlock.
61881ad6265SDimitry Andric Value *Lhs, *Rhs;
61981ad6265SDimitry Andric if (FirstCmp.Lhs().GEP)
62081ad6265SDimitry Andric Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
62181ad6265SDimitry Andric else
62281ad6265SDimitry Andric Lhs = FirstCmp.Lhs().LoadI->getPointerOperand();
62381ad6265SDimitry Andric if (FirstCmp.Rhs().GEP)
62481ad6265SDimitry Andric Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
62581ad6265SDimitry Andric else
62681ad6265SDimitry Andric Rhs = FirstCmp.Rhs().LoadI->getPointerOperand();
6270b57cec5SDimitry Andric
6280b57cec5SDimitry Andric Value *IsEqual = nullptr;
6290b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
6300b57cec5SDimitry Andric << BB->getName() << "\n");
631e8d8bef9SDimitry Andric
632e8d8bef9SDimitry Andric // If there is one block that requires splitting, we do it now, i.e.
633e8d8bef9SDimitry Andric // just before we know we will collapse the chain. The instructions
634e8d8bef9SDimitry Andric // can be executed before any of the instructions in the chain.
635e8d8bef9SDimitry Andric const auto ToSplit = llvm::find_if(
636e8d8bef9SDimitry Andric Comparisons, [](const BCECmpBlock &B) { return B.RequireSplit; });
637e8d8bef9SDimitry Andric if (ToSplit != Comparisons.end()) {
638e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
639e8d8bef9SDimitry Andric ToSplit->split(BB, AA);
640e8d8bef9SDimitry Andric }
641e8d8bef9SDimitry Andric
6420b57cec5SDimitry Andric if (Comparisons.size() == 1) {
6430b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
64406c3fb27SDimitry Andric // Use clone to keep the metadata
64506c3fb27SDimitry Andric Instruction *const LhsLoad = Builder.Insert(FirstCmp.Lhs().LoadI->clone());
64606c3fb27SDimitry Andric Instruction *const RhsLoad = Builder.Insert(FirstCmp.Rhs().LoadI->clone());
64706c3fb27SDimitry Andric LhsLoad->replaceUsesOfWith(LhsLoad->getOperand(0), Lhs);
64806c3fb27SDimitry Andric RhsLoad->replaceUsesOfWith(RhsLoad->getOperand(0), Rhs);
6490b57cec5SDimitry Andric // There are no blocks to merge, just do the comparison.
6500b57cec5SDimitry Andric IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
6510b57cec5SDimitry Andric } else {
6520b57cec5SDimitry Andric const unsigned TotalSizeBits = std::accumulate(
6530b57cec5SDimitry Andric Comparisons.begin(), Comparisons.end(), 0u,
6540b57cec5SDimitry Andric [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
6550b57cec5SDimitry Andric
656bdd1243dSDimitry Andric // memcmp expects a 'size_t' argument and returns 'int'.
657bdd1243dSDimitry Andric unsigned SizeTBits = TLI.getSizeTSize(*Phi.getModule());
658bdd1243dSDimitry Andric unsigned IntBits = TLI.getIntSize();
659bdd1243dSDimitry Andric
6600b57cec5SDimitry Andric // Create memcmp() == 0.
661*0fca6ea1SDimitry Andric const auto &DL = Phi.getDataLayout();
6620b57cec5SDimitry Andric Value *const MemCmpCall = emitMemCmp(
6630b57cec5SDimitry Andric Lhs, Rhs,
664bdd1243dSDimitry Andric ConstantInt::get(Builder.getIntNTy(SizeTBits), TotalSizeBits / 8),
665bdd1243dSDimitry Andric Builder, DL, &TLI);
6660b57cec5SDimitry Andric IsEqual = Builder.CreateICmpEQ(
667bdd1243dSDimitry Andric MemCmpCall, ConstantInt::get(Builder.getIntNTy(IntBits), 0));
6680b57cec5SDimitry Andric }
6690b57cec5SDimitry Andric
6700b57cec5SDimitry Andric BasicBlock *const PhiBB = Phi.getParent();
6710b57cec5SDimitry Andric // Add a branch to the next basic block in the chain.
6720b57cec5SDimitry Andric if (NextCmpBlock == PhiBB) {
6730b57cec5SDimitry Andric // Continue to phi, passing it the comparison result.
6740b57cec5SDimitry Andric Builder.CreateBr(PhiBB);
6750b57cec5SDimitry Andric Phi.addIncoming(IsEqual, BB);
6760b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
6770b57cec5SDimitry Andric } else {
6780b57cec5SDimitry Andric // Continue to next block if equal, exit to phi else.
6790b57cec5SDimitry Andric Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
6800b57cec5SDimitry Andric Phi.addIncoming(ConstantInt::getFalse(Context), BB);
6810b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
6820b57cec5SDimitry Andric {DominatorTree::Insert, BB, PhiBB}});
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric return BB;
6850b57cec5SDimitry Andric }
6860b57cec5SDimitry Andric
simplify(const TargetLibraryInfo & TLI,AliasAnalysis & AA,DomTreeUpdater & DTU)6870b57cec5SDimitry Andric bool BCECmpChain::simplify(const TargetLibraryInfo &TLI, AliasAnalysis &AA,
6880b57cec5SDimitry Andric DomTreeUpdater &DTU) {
689349cc55cSDimitry Andric assert(atLeastOneMerged() && "simplifying trivial BCECmpChain");
6900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
6910b57cec5SDimitry Andric << EntryBlock_->getName() << "\n");
6920b57cec5SDimitry Andric
6930b57cec5SDimitry Andric // Effectively merge blocks. We go in the reverse direction from the phi block
6940b57cec5SDimitry Andric // so that the next block is always available to branch to.
695349cc55cSDimitry Andric BasicBlock *InsertBefore = EntryBlock_;
6960b57cec5SDimitry Andric BasicBlock *NextCmpBlock = Phi_.getParent();
697349cc55cSDimitry Andric for (const auto &Blocks : reverse(MergedBlocks_)) {
698349cc55cSDimitry Andric InsertBefore = NextCmpBlock = mergeComparisons(
699349cc55cSDimitry Andric Blocks, InsertBefore, NextCmpBlock, Phi_, TLI, AA, DTU);
7000b57cec5SDimitry Andric }
7010b57cec5SDimitry Andric
7020b57cec5SDimitry Andric // Replace the original cmp chain with the new cmp chain by pointing all
7030b57cec5SDimitry Andric // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
7040b57cec5SDimitry Andric // blocks in the old chain unreachable.
7050b57cec5SDimitry Andric while (!pred_empty(EntryBlock_)) {
7060b57cec5SDimitry Andric BasicBlock* const Pred = *pred_begin(EntryBlock_);
7070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
7080b57cec5SDimitry Andric << "\n");
7090b57cec5SDimitry Andric Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
7100b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
7110b57cec5SDimitry Andric {DominatorTree::Insert, Pred, NextCmpBlock}});
7120b57cec5SDimitry Andric }
7130b57cec5SDimitry Andric
7140b57cec5SDimitry Andric // If the old cmp chain was the function entry, we need to update the function
7150b57cec5SDimitry Andric // entry.
716fe6060f1SDimitry Andric const bool ChainEntryIsFnEntry = EntryBlock_->isEntryBlock();
7170b57cec5SDimitry Andric if (ChainEntryIsFnEntry && DTU.hasDomTree()) {
7180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Changing function entry from "
7190b57cec5SDimitry Andric << EntryBlock_->getName() << " to "
7200b57cec5SDimitry Andric << NextCmpBlock->getName() << "\n");
7210b57cec5SDimitry Andric DTU.getDomTree().setNewRoot(NextCmpBlock);
7220b57cec5SDimitry Andric DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
7230b57cec5SDimitry Andric }
7240b57cec5SDimitry Andric EntryBlock_ = nullptr;
7250b57cec5SDimitry Andric
7260b57cec5SDimitry Andric // Delete merged blocks. This also removes incoming values in phi.
7270b57cec5SDimitry Andric SmallVector<BasicBlock *, 16> DeadBlocks;
728349cc55cSDimitry Andric for (const auto &Blocks : MergedBlocks_) {
729349cc55cSDimitry Andric for (const BCECmpBlock &Block : Blocks) {
730349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Deleting merged block " << Block.BB->getName()
731349cc55cSDimitry Andric << "\n");
732349cc55cSDimitry Andric DeadBlocks.push_back(Block.BB);
733349cc55cSDimitry Andric }
7340b57cec5SDimitry Andric }
7350b57cec5SDimitry Andric DeleteDeadBlocks(DeadBlocks, &DTU);
7360b57cec5SDimitry Andric
737349cc55cSDimitry Andric MergedBlocks_.clear();
7380b57cec5SDimitry Andric return true;
7390b57cec5SDimitry Andric }
7400b57cec5SDimitry Andric
getOrderedBlocks(PHINode & Phi,BasicBlock * const LastBlock,int NumBlocks)7410b57cec5SDimitry Andric std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
7420b57cec5SDimitry Andric BasicBlock *const LastBlock,
7430b57cec5SDimitry Andric int NumBlocks) {
7440b57cec5SDimitry Andric // Walk up from the last block to find other blocks.
7450b57cec5SDimitry Andric std::vector<BasicBlock *> Blocks(NumBlocks);
7460b57cec5SDimitry Andric assert(LastBlock && "invalid last block");
7470b57cec5SDimitry Andric BasicBlock *CurBlock = LastBlock;
7480b57cec5SDimitry Andric for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
7490b57cec5SDimitry Andric if (CurBlock->hasAddressTaken()) {
7500b57cec5SDimitry Andric // Somebody is jumping to the block through an address, all bets are
7510b57cec5SDimitry Andric // off.
7520b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
7530b57cec5SDimitry Andric << " has its address taken\n");
7540b57cec5SDimitry Andric return {};
7550b57cec5SDimitry Andric }
7560b57cec5SDimitry Andric Blocks[BlockIndex] = CurBlock;
7570b57cec5SDimitry Andric auto *SinglePredecessor = CurBlock->getSinglePredecessor();
7580b57cec5SDimitry Andric if (!SinglePredecessor) {
7590b57cec5SDimitry Andric // The block has two or more predecessors.
7600b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
7610b57cec5SDimitry Andric << " has two or more predecessors\n");
7620b57cec5SDimitry Andric return {};
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) {
7650b57cec5SDimitry Andric // The block does not link back to the phi.
7660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
7670b57cec5SDimitry Andric << " does not link back to the phi\n");
7680b57cec5SDimitry Andric return {};
7690b57cec5SDimitry Andric }
7700b57cec5SDimitry Andric CurBlock = SinglePredecessor;
7710b57cec5SDimitry Andric }
7720b57cec5SDimitry Andric Blocks[0] = CurBlock;
7730b57cec5SDimitry Andric return Blocks;
7740b57cec5SDimitry Andric }
7750b57cec5SDimitry Andric
processPhi(PHINode & Phi,const TargetLibraryInfo & TLI,AliasAnalysis & AA,DomTreeUpdater & DTU)7760b57cec5SDimitry Andric bool processPhi(PHINode &Phi, const TargetLibraryInfo &TLI, AliasAnalysis &AA,
7770b57cec5SDimitry Andric DomTreeUpdater &DTU) {
7780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "processPhi()\n");
7790b57cec5SDimitry Andric if (Phi.getNumIncomingValues() <= 1) {
7800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
7810b57cec5SDimitry Andric return false;
7820b57cec5SDimitry Andric }
7830b57cec5SDimitry Andric // We are looking for something that has the following structure:
7840b57cec5SDimitry Andric // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+
7850b57cec5SDimitry Andric // \ \ \ \
7860b57cec5SDimitry Andric // ne ne ne \
7870b57cec5SDimitry Andric // \ \ \ v
7880b57cec5SDimitry Andric // +------------+-----------+----------> bb_phi
7890b57cec5SDimitry Andric //
7900b57cec5SDimitry Andric // - The last basic block (bb4 here) must branch unconditionally to bb_phi.
7910b57cec5SDimitry Andric // It's the only block that contributes a non-constant value to the Phi.
7920b57cec5SDimitry Andric // - All other blocks (b1, b2, b3) must have exactly two successors, one of
7930b57cec5SDimitry Andric // them being the phi block.
7940b57cec5SDimitry Andric // - All intermediate blocks (bb2, bb3) must have only one predecessor.
7950b57cec5SDimitry Andric // - Blocks cannot do other work besides the comparison, see doesOtherWork()
7960b57cec5SDimitry Andric
7970b57cec5SDimitry Andric // The blocks are not necessarily ordered in the phi, so we start from the
7980b57cec5SDimitry Andric // last block and reconstruct the order.
7990b57cec5SDimitry Andric BasicBlock *LastBlock = nullptr;
8000b57cec5SDimitry Andric for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) {
8010b57cec5SDimitry Andric if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue;
8020b57cec5SDimitry Andric if (LastBlock) {
8030b57cec5SDimitry Andric // There are several non-constant values.
8040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: several non-constant values\n");
8050b57cec5SDimitry Andric return false;
8060b57cec5SDimitry Andric }
8070b57cec5SDimitry Andric if (!isa<ICmpInst>(Phi.getIncomingValue(I)) ||
8080b57cec5SDimitry Andric cast<ICmpInst>(Phi.getIncomingValue(I))->getParent() !=
8090b57cec5SDimitry Andric Phi.getIncomingBlock(I)) {
8100b57cec5SDimitry Andric // Non-constant incoming value is not from a cmp instruction or not
8110b57cec5SDimitry Andric // produced by the last block. We could end up processing the value
8120b57cec5SDimitry Andric // producing block more than once.
8130b57cec5SDimitry Andric //
8140b57cec5SDimitry Andric // This is an uncommon case, so we bail.
8150b57cec5SDimitry Andric LLVM_DEBUG(
8160b57cec5SDimitry Andric dbgs()
8170b57cec5SDimitry Andric << "skip: non-constant value not from cmp or not from last block.\n");
8180b57cec5SDimitry Andric return false;
8190b57cec5SDimitry Andric }
8200b57cec5SDimitry Andric LastBlock = Phi.getIncomingBlock(I);
8210b57cec5SDimitry Andric }
8220b57cec5SDimitry Andric if (!LastBlock) {
8230b57cec5SDimitry Andric // There is no non-constant block.
8240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: no non-constant block\n");
8250b57cec5SDimitry Andric return false;
8260b57cec5SDimitry Andric }
8270b57cec5SDimitry Andric if (LastBlock->getSingleSuccessor() != Phi.getParent()) {
8280b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "skip: last block non-phi successor\n");
8290b57cec5SDimitry Andric return false;
8300b57cec5SDimitry Andric }
8310b57cec5SDimitry Andric
8320b57cec5SDimitry Andric const auto Blocks =
8330b57cec5SDimitry Andric getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
8340b57cec5SDimitry Andric if (Blocks.empty()) return false;
8350b57cec5SDimitry Andric BCECmpChain CmpChain(Blocks, Phi, AA);
8360b57cec5SDimitry Andric
837349cc55cSDimitry Andric if (!CmpChain.atLeastOneMerged()) {
838349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "skip: nothing merged\n");
8390b57cec5SDimitry Andric return false;
8400b57cec5SDimitry Andric }
8410b57cec5SDimitry Andric
8420b57cec5SDimitry Andric return CmpChain.simplify(TLI, AA, DTU);
8430b57cec5SDimitry Andric }
8440b57cec5SDimitry Andric
runImpl(Function & F,const TargetLibraryInfo & TLI,const TargetTransformInfo & TTI,AliasAnalysis & AA,DominatorTree * DT)8450b57cec5SDimitry Andric static bool runImpl(Function &F, const TargetLibraryInfo &TLI,
8460b57cec5SDimitry Andric const TargetTransformInfo &TTI, AliasAnalysis &AA,
8470b57cec5SDimitry Andric DominatorTree *DT) {
8480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "MergeICmpsLegacyPass: " << F.getName() << "\n");
8490b57cec5SDimitry Andric
8500b57cec5SDimitry Andric // We only try merging comparisons if the target wants to expand memcmp later.
8510b57cec5SDimitry Andric // The rationale is to avoid turning small chains into memcmp calls.
8520b57cec5SDimitry Andric if (!TTI.enableMemCmpExpansion(F.hasOptSize(), true))
8530b57cec5SDimitry Andric return false;
8540b57cec5SDimitry Andric
8550b57cec5SDimitry Andric // If we don't have memcmp avaiable we can't emit calls to it.
8560b57cec5SDimitry Andric if (!TLI.has(LibFunc_memcmp))
8570b57cec5SDimitry Andric return false;
8580b57cec5SDimitry Andric
8590b57cec5SDimitry Andric DomTreeUpdater DTU(DT, /*PostDominatorTree*/ nullptr,
8600b57cec5SDimitry Andric DomTreeUpdater::UpdateStrategy::Eager);
8610b57cec5SDimitry Andric
8620b57cec5SDimitry Andric bool MadeChange = false;
8630b57cec5SDimitry Andric
864349cc55cSDimitry Andric for (BasicBlock &BB : llvm::drop_begin(F)) {
8650b57cec5SDimitry Andric // A Phi operation is always first in a basic block.
866349cc55cSDimitry Andric if (auto *const Phi = dyn_cast<PHINode>(&*BB.begin()))
8670b57cec5SDimitry Andric MadeChange |= processPhi(*Phi, TLI, AA, DTU);
8680b57cec5SDimitry Andric }
8690b57cec5SDimitry Andric
8700b57cec5SDimitry Andric return MadeChange;
8710b57cec5SDimitry Andric }
8720b57cec5SDimitry Andric
8730b57cec5SDimitry Andric class MergeICmpsLegacyPass : public FunctionPass {
8740b57cec5SDimitry Andric public:
8750b57cec5SDimitry Andric static char ID;
8760b57cec5SDimitry Andric
MergeICmpsLegacyPass()8770b57cec5SDimitry Andric MergeICmpsLegacyPass() : FunctionPass(ID) {
8780b57cec5SDimitry Andric initializeMergeICmpsLegacyPassPass(*PassRegistry::getPassRegistry());
8790b57cec5SDimitry Andric }
8800b57cec5SDimitry Andric
runOnFunction(Function & F)8810b57cec5SDimitry Andric bool runOnFunction(Function &F) override {
8820b57cec5SDimitry Andric if (skipFunction(F)) return false;
8838bcb0991SDimitry Andric const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
8840b57cec5SDimitry Andric const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
8850b57cec5SDimitry Andric // MergeICmps does not need the DominatorTree, but we update it if it's
8860b57cec5SDimitry Andric // already available.
8870b57cec5SDimitry Andric auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
8880b57cec5SDimitry Andric auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
8890b57cec5SDimitry Andric return runImpl(F, TLI, TTI, AA, DTWP ? &DTWP->getDomTree() : nullptr);
8900b57cec5SDimitry Andric }
8910b57cec5SDimitry Andric
8920b57cec5SDimitry Andric private:
getAnalysisUsage(AnalysisUsage & AU) const8930b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
8940b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>();
8950b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
8960b57cec5SDimitry Andric AU.addRequired<AAResultsWrapperPass>();
8970b57cec5SDimitry Andric AU.addPreserved<GlobalsAAWrapperPass>();
8980b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>();
8990b57cec5SDimitry Andric }
9000b57cec5SDimitry Andric };
9010b57cec5SDimitry Andric
9020b57cec5SDimitry Andric } // namespace
9030b57cec5SDimitry Andric
9040b57cec5SDimitry Andric char MergeICmpsLegacyPass::ID = 0;
9050b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(MergeICmpsLegacyPass, "mergeicmps",
9060b57cec5SDimitry Andric "Merge contiguous icmps into a memcmp", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)9070b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
9080b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
9090b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
9100b57cec5SDimitry Andric INITIALIZE_PASS_END(MergeICmpsLegacyPass, "mergeicmps",
9110b57cec5SDimitry Andric "Merge contiguous icmps into a memcmp", false, false)
9120b57cec5SDimitry Andric
9130b57cec5SDimitry Andric Pass *llvm::createMergeICmpsLegacyPass() { return new MergeICmpsLegacyPass(); }
9140b57cec5SDimitry Andric
run(Function & F,FunctionAnalysisManager & AM)9150b57cec5SDimitry Andric PreservedAnalyses MergeICmpsPass::run(Function &F,
9160b57cec5SDimitry Andric FunctionAnalysisManager &AM) {
9170b57cec5SDimitry Andric auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
9180b57cec5SDimitry Andric auto &TTI = AM.getResult<TargetIRAnalysis>(F);
9190b57cec5SDimitry Andric auto &AA = AM.getResult<AAManager>(F);
9200b57cec5SDimitry Andric auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
9210b57cec5SDimitry Andric const bool MadeChanges = runImpl(F, TLI, TTI, AA, DT);
9220b57cec5SDimitry Andric if (!MadeChanges)
9230b57cec5SDimitry Andric return PreservedAnalyses::all();
9240b57cec5SDimitry Andric PreservedAnalyses PA;
9250b57cec5SDimitry Andric PA.preserve<DominatorTreeAnalysis>();
9260b57cec5SDimitry Andric return PA;
9270b57cec5SDimitry Andric }
928