10b57cec5SDimitry Andric //===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains an optimization for div and rem on architectures that
100b57cec5SDimitry Andric // execute short instructions significantly faster than longer instructions.
110b57cec5SDimitry Andric // For example, on Intel Atom 32-bit divides are slow enough that during
120b57cec5SDimitry Andric // runtime it is profitable to check the value of the operands, and if they are
130b57cec5SDimitry Andric // positive and less than 256 use an unsigned 8-bit divide.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric
170b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BypassSlowDivision.h"
180b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
190b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
210b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
230b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
240b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
250b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
260b57cec5SDimitry Andric #include "llvm/IR/Function.h"
270b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
280b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
290b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
300b57cec5SDimitry Andric #include "llvm/IR/Module.h"
310b57cec5SDimitry Andric #include "llvm/IR/Type.h"
320b57cec5SDimitry Andric #include "llvm/IR/Value.h"
330b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
340b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
350b57cec5SDimitry Andric #include <cassert>
360b57cec5SDimitry Andric #include <cstdint>
370b57cec5SDimitry Andric
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric
400b57cec5SDimitry Andric #define DEBUG_TYPE "bypass-slow-division"
410b57cec5SDimitry Andric
420b57cec5SDimitry Andric namespace {
430b57cec5SDimitry Andric
440b57cec5SDimitry Andric struct QuotRemPair {
450b57cec5SDimitry Andric Value *Quotient;
460b57cec5SDimitry Andric Value *Remainder;
470b57cec5SDimitry Andric
QuotRemPair__anon06c93eea0111::QuotRemPair480b57cec5SDimitry Andric QuotRemPair(Value *InQuotient, Value *InRemainder)
490b57cec5SDimitry Andric : Quotient(InQuotient), Remainder(InRemainder) {}
500b57cec5SDimitry Andric };
510b57cec5SDimitry Andric
520b57cec5SDimitry Andric /// A quotient and remainder, plus a BB from which they logically "originate".
530b57cec5SDimitry Andric /// If you use Quotient or Remainder in a Phi node, you should use BB as its
540b57cec5SDimitry Andric /// corresponding predecessor.
550b57cec5SDimitry Andric struct QuotRemWithBB {
560b57cec5SDimitry Andric BasicBlock *BB = nullptr;
570b57cec5SDimitry Andric Value *Quotient = nullptr;
580b57cec5SDimitry Andric Value *Remainder = nullptr;
590b57cec5SDimitry Andric };
600b57cec5SDimitry Andric
610b57cec5SDimitry Andric using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
620b57cec5SDimitry Andric using BypassWidthsTy = DenseMap<unsigned, unsigned>;
630b57cec5SDimitry Andric using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
640b57cec5SDimitry Andric
650b57cec5SDimitry Andric enum ValueRange {
660b57cec5SDimitry Andric /// Operand definitely fits into BypassType. No runtime checks are needed.
670b57cec5SDimitry Andric VALRNG_KNOWN_SHORT,
680b57cec5SDimitry Andric /// A runtime check is required, as value range is unknown.
690b57cec5SDimitry Andric VALRNG_UNKNOWN,
700b57cec5SDimitry Andric /// Operand is unlikely to fit into BypassType. The bypassing should be
710b57cec5SDimitry Andric /// disabled.
720b57cec5SDimitry Andric VALRNG_LIKELY_LONG
730b57cec5SDimitry Andric };
740b57cec5SDimitry Andric
750b57cec5SDimitry Andric class FastDivInsertionTask {
760b57cec5SDimitry Andric bool IsValidTask = false;
770b57cec5SDimitry Andric Instruction *SlowDivOrRem = nullptr;
780b57cec5SDimitry Andric IntegerType *BypassType = nullptr;
790b57cec5SDimitry Andric BasicBlock *MainBB = nullptr;
800b57cec5SDimitry Andric
810b57cec5SDimitry Andric bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
820b57cec5SDimitry Andric ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
830b57cec5SDimitry Andric QuotRemWithBB createSlowBB(BasicBlock *Successor);
840b57cec5SDimitry Andric QuotRemWithBB createFastBB(BasicBlock *Successor);
850b57cec5SDimitry Andric QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
860b57cec5SDimitry Andric BasicBlock *PhiBB);
870b57cec5SDimitry Andric Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
88bdd1243dSDimitry Andric std::optional<QuotRemPair> insertFastDivAndRem();
890b57cec5SDimitry Andric
isSignedOp()900b57cec5SDimitry Andric bool isSignedOp() {
910b57cec5SDimitry Andric return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
920b57cec5SDimitry Andric SlowDivOrRem->getOpcode() == Instruction::SRem;
930b57cec5SDimitry Andric }
940b57cec5SDimitry Andric
isDivisionOp()950b57cec5SDimitry Andric bool isDivisionOp() {
960b57cec5SDimitry Andric return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
970b57cec5SDimitry Andric SlowDivOrRem->getOpcode() == Instruction::UDiv;
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric
getSlowType()1000b57cec5SDimitry Andric Type *getSlowType() { return SlowDivOrRem->getType(); }
1010b57cec5SDimitry Andric
1020b57cec5SDimitry Andric public:
1030b57cec5SDimitry Andric FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
1040b57cec5SDimitry Andric
1050b57cec5SDimitry Andric Value *getReplacement(DivCacheTy &Cache);
1060b57cec5SDimitry Andric };
1070b57cec5SDimitry Andric
1080b57cec5SDimitry Andric } // end anonymous namespace
1090b57cec5SDimitry Andric
FastDivInsertionTask(Instruction * I,const BypassWidthsTy & BypassWidths)1100b57cec5SDimitry Andric FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
1110b57cec5SDimitry Andric const BypassWidthsTy &BypassWidths) {
1120b57cec5SDimitry Andric switch (I->getOpcode()) {
1130b57cec5SDimitry Andric case Instruction::UDiv:
1140b57cec5SDimitry Andric case Instruction::SDiv:
1150b57cec5SDimitry Andric case Instruction::URem:
1160b57cec5SDimitry Andric case Instruction::SRem:
1170b57cec5SDimitry Andric SlowDivOrRem = I;
1180b57cec5SDimitry Andric break;
1190b57cec5SDimitry Andric default:
1200b57cec5SDimitry Andric // I is not a div/rem operation.
1210b57cec5SDimitry Andric return;
1220b57cec5SDimitry Andric }
1230b57cec5SDimitry Andric
1240b57cec5SDimitry Andric // Skip division on vector types. Only optimize integer instructions.
1250b57cec5SDimitry Andric IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
1260b57cec5SDimitry Andric if (!SlowType)
1270b57cec5SDimitry Andric return;
1280b57cec5SDimitry Andric
1290b57cec5SDimitry Andric // Skip if this bitwidth is not bypassed.
1300b57cec5SDimitry Andric auto BI = BypassWidths.find(SlowType->getBitWidth());
1310b57cec5SDimitry Andric if (BI == BypassWidths.end())
1320b57cec5SDimitry Andric return;
1330b57cec5SDimitry Andric
1340b57cec5SDimitry Andric // Get type for div/rem instruction with bypass bitwidth.
1350b57cec5SDimitry Andric IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
1360b57cec5SDimitry Andric BypassType = BT;
1370b57cec5SDimitry Andric
1380b57cec5SDimitry Andric // The original basic block.
1390b57cec5SDimitry Andric MainBB = I->getParent();
1400b57cec5SDimitry Andric
1410b57cec5SDimitry Andric // The instruction is indeed a slow div or rem operation.
1420b57cec5SDimitry Andric IsValidTask = true;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric
1450b57cec5SDimitry Andric /// Reuses previously-computed dividend or remainder from the current BB if
1460b57cec5SDimitry Andric /// operands and operation are identical. Otherwise calls insertFastDivAndRem to
1470b57cec5SDimitry Andric /// perform the optimization and caches the resulting dividend and remainder.
1480b57cec5SDimitry Andric /// If no replacement can be generated, nullptr is returned.
getReplacement(DivCacheTy & Cache)1490b57cec5SDimitry Andric Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
1500b57cec5SDimitry Andric // First, make sure that the task is valid.
1510b57cec5SDimitry Andric if (!IsValidTask)
1520b57cec5SDimitry Andric return nullptr;
1530b57cec5SDimitry Andric
1540b57cec5SDimitry Andric // Then, look for a value in Cache.
1550b57cec5SDimitry Andric Value *Dividend = SlowDivOrRem->getOperand(0);
1560b57cec5SDimitry Andric Value *Divisor = SlowDivOrRem->getOperand(1);
1570b57cec5SDimitry Andric DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
1580b57cec5SDimitry Andric auto CacheI = Cache.find(Key);
1590b57cec5SDimitry Andric
1600b57cec5SDimitry Andric if (CacheI == Cache.end()) {
1610b57cec5SDimitry Andric // If previous instance does not exist, try to insert fast div.
162bdd1243dSDimitry Andric std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
1630b57cec5SDimitry Andric // Bail out if insertFastDivAndRem has failed.
1640b57cec5SDimitry Andric if (!OptResult)
1650b57cec5SDimitry Andric return nullptr;
1660b57cec5SDimitry Andric CacheI = Cache.insert({Key, *OptResult}).first;
1670b57cec5SDimitry Andric }
1680b57cec5SDimitry Andric
1690b57cec5SDimitry Andric QuotRemPair &Value = CacheI->second;
1700b57cec5SDimitry Andric return isDivisionOp() ? Value.Quotient : Value.Remainder;
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric
1730b57cec5SDimitry Andric /// Check if a value looks like a hash.
1740b57cec5SDimitry Andric ///
1750b57cec5SDimitry Andric /// The routine is expected to detect values computed using the most common hash
1760b57cec5SDimitry Andric /// algorithms. Typically, hash computations end with one of the following
1770b57cec5SDimitry Andric /// instructions:
1780b57cec5SDimitry Andric ///
1790b57cec5SDimitry Andric /// 1) MUL with a constant wider than BypassType
1800b57cec5SDimitry Andric /// 2) XOR instruction
1810b57cec5SDimitry Andric ///
1820b57cec5SDimitry Andric /// And even if we are wrong and the value is not a hash, it is still quite
1830b57cec5SDimitry Andric /// unlikely that such values will fit into BypassType.
1840b57cec5SDimitry Andric ///
1850b57cec5SDimitry Andric /// To detect string hash algorithms like FNV we have to look through PHI-nodes.
1860b57cec5SDimitry Andric /// It is implemented as a depth-first search for values that look neither long
1870b57cec5SDimitry Andric /// nor hash-like.
isHashLikeValue(Value * V,VisitedSetTy & Visited)1880b57cec5SDimitry Andric bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
1890b57cec5SDimitry Andric Instruction *I = dyn_cast<Instruction>(V);
1900b57cec5SDimitry Andric if (!I)
1910b57cec5SDimitry Andric return false;
1920b57cec5SDimitry Andric
1930b57cec5SDimitry Andric switch (I->getOpcode()) {
1940b57cec5SDimitry Andric case Instruction::Xor:
1950b57cec5SDimitry Andric return true;
1960b57cec5SDimitry Andric case Instruction::Mul: {
1970b57cec5SDimitry Andric // After Constant Hoisting pass, long constants may be represented as
1980b57cec5SDimitry Andric // bitcast instructions. As a result, some constants may look like an
1990b57cec5SDimitry Andric // instruction at first, and an additional check is necessary to find out if
2000b57cec5SDimitry Andric // an operand is actually a constant.
2010b57cec5SDimitry Andric Value *Op1 = I->getOperand(1);
2020b57cec5SDimitry Andric ConstantInt *C = dyn_cast<ConstantInt>(Op1);
2030b57cec5SDimitry Andric if (!C && isa<BitCastInst>(Op1))
2040b57cec5SDimitry Andric C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
20506c3fb27SDimitry Andric return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();
2060b57cec5SDimitry Andric }
2070b57cec5SDimitry Andric case Instruction::PHI:
2080b57cec5SDimitry Andric // Stop IR traversal in case of a crazy input code. This limits recursion
2090b57cec5SDimitry Andric // depth.
2100b57cec5SDimitry Andric if (Visited.size() >= 16)
2110b57cec5SDimitry Andric return false;
2120b57cec5SDimitry Andric // Do not visit nodes that have been visited already. We return true because
2130b57cec5SDimitry Andric // it means that we couldn't find any value that doesn't look hash-like.
2145ffd83dbSDimitry Andric if (!Visited.insert(I).second)
2150b57cec5SDimitry Andric return true;
2160b57cec5SDimitry Andric return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
2170b57cec5SDimitry Andric // Ignore undef values as they probably don't affect the division
2180b57cec5SDimitry Andric // operands.
2190b57cec5SDimitry Andric return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
2200b57cec5SDimitry Andric isa<UndefValue>(V);
2210b57cec5SDimitry Andric });
2220b57cec5SDimitry Andric default:
2230b57cec5SDimitry Andric return false;
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric
2270b57cec5SDimitry Andric /// Check if an integer value fits into our bypass type.
getValueRange(Value * V,VisitedSetTy & Visited)2280b57cec5SDimitry Andric ValueRange FastDivInsertionTask::getValueRange(Value *V,
2290b57cec5SDimitry Andric VisitedSetTy &Visited) {
2300b57cec5SDimitry Andric unsigned ShortLen = BypassType->getBitWidth();
2310b57cec5SDimitry Andric unsigned LongLen = V->getType()->getIntegerBitWidth();
2320b57cec5SDimitry Andric
2330b57cec5SDimitry Andric assert(LongLen > ShortLen && "Value type must be wider than BypassType");
2340b57cec5SDimitry Andric unsigned HiBits = LongLen - ShortLen;
2350b57cec5SDimitry Andric
236*0fca6ea1SDimitry Andric const DataLayout &DL = SlowDivOrRem->getDataLayout();
2370b57cec5SDimitry Andric KnownBits Known(LongLen);
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric computeKnownBits(V, Known, DL);
2400b57cec5SDimitry Andric
2410b57cec5SDimitry Andric if (Known.countMinLeadingZeros() >= HiBits)
2420b57cec5SDimitry Andric return VALRNG_KNOWN_SHORT;
2430b57cec5SDimitry Andric
2440b57cec5SDimitry Andric if (Known.countMaxLeadingZeros() < HiBits)
2450b57cec5SDimitry Andric return VALRNG_LIKELY_LONG;
2460b57cec5SDimitry Andric
2470b57cec5SDimitry Andric // Long integer divisions are often used in hashtable implementations. It's
2480b57cec5SDimitry Andric // not worth bypassing such divisions because hash values are extremely
2490b57cec5SDimitry Andric // unlikely to have enough leading zeros. The call below tries to detect
2500b57cec5SDimitry Andric // values that are unlikely to fit BypassType (including hashes).
2510b57cec5SDimitry Andric if (isHashLikeValue(V, Visited))
2520b57cec5SDimitry Andric return VALRNG_LIKELY_LONG;
2530b57cec5SDimitry Andric
2540b57cec5SDimitry Andric return VALRNG_UNKNOWN;
2550b57cec5SDimitry Andric }
2560b57cec5SDimitry Andric
2570b57cec5SDimitry Andric /// Add new basic block for slow div and rem operations and put it before
2580b57cec5SDimitry Andric /// SuccessorBB.
createSlowBB(BasicBlock * SuccessorBB)2590b57cec5SDimitry Andric QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
2600b57cec5SDimitry Andric QuotRemWithBB DivRemPair;
2610b57cec5SDimitry Andric DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
2620b57cec5SDimitry Andric MainBB->getParent(), SuccessorBB);
2630b57cec5SDimitry Andric IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
2645ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
2650b57cec5SDimitry Andric
2660b57cec5SDimitry Andric Value *Dividend = SlowDivOrRem->getOperand(0);
2670b57cec5SDimitry Andric Value *Divisor = SlowDivOrRem->getOperand(1);
2680b57cec5SDimitry Andric
2690b57cec5SDimitry Andric if (isSignedOp()) {
2700b57cec5SDimitry Andric DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
2710b57cec5SDimitry Andric DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
2720b57cec5SDimitry Andric } else {
2730b57cec5SDimitry Andric DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
2740b57cec5SDimitry Andric DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
2750b57cec5SDimitry Andric }
2760b57cec5SDimitry Andric
2770b57cec5SDimitry Andric Builder.CreateBr(SuccessorBB);
2780b57cec5SDimitry Andric return DivRemPair;
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric
2810b57cec5SDimitry Andric /// Add new basic block for fast div and rem operations and put it before
2820b57cec5SDimitry Andric /// SuccessorBB.
createFastBB(BasicBlock * SuccessorBB)2830b57cec5SDimitry Andric QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
2840b57cec5SDimitry Andric QuotRemWithBB DivRemPair;
2850b57cec5SDimitry Andric DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
2860b57cec5SDimitry Andric MainBB->getParent(), SuccessorBB);
2870b57cec5SDimitry Andric IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
2885ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
2890b57cec5SDimitry Andric
2900b57cec5SDimitry Andric Value *Dividend = SlowDivOrRem->getOperand(0);
2910b57cec5SDimitry Andric Value *Divisor = SlowDivOrRem->getOperand(1);
2920b57cec5SDimitry Andric Value *ShortDivisorV =
2930b57cec5SDimitry Andric Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
2940b57cec5SDimitry Andric Value *ShortDividendV =
2950b57cec5SDimitry Andric Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
2960b57cec5SDimitry Andric
2970b57cec5SDimitry Andric // udiv/urem because this optimization only handles positive numbers.
2980b57cec5SDimitry Andric Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
2990b57cec5SDimitry Andric Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
3000b57cec5SDimitry Andric DivRemPair.Quotient =
3010b57cec5SDimitry Andric Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
3020b57cec5SDimitry Andric DivRemPair.Remainder =
3030b57cec5SDimitry Andric Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
3040b57cec5SDimitry Andric Builder.CreateBr(SuccessorBB);
3050b57cec5SDimitry Andric
3060b57cec5SDimitry Andric return DivRemPair;
3070b57cec5SDimitry Andric }
3080b57cec5SDimitry Andric
3090b57cec5SDimitry Andric /// Creates Phi nodes for result of Div and Rem.
createDivRemPhiNodes(QuotRemWithBB & LHS,QuotRemWithBB & RHS,BasicBlock * PhiBB)3100b57cec5SDimitry Andric QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
3110b57cec5SDimitry Andric QuotRemWithBB &RHS,
3120b57cec5SDimitry Andric BasicBlock *PhiBB) {
3130b57cec5SDimitry Andric IRBuilder<> Builder(PhiBB, PhiBB->begin());
3145ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
3150b57cec5SDimitry Andric PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
3160b57cec5SDimitry Andric QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
3170b57cec5SDimitry Andric QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
3180b57cec5SDimitry Andric PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
3190b57cec5SDimitry Andric RemPhi->addIncoming(LHS.Remainder, LHS.BB);
3200b57cec5SDimitry Andric RemPhi->addIncoming(RHS.Remainder, RHS.BB);
3210b57cec5SDimitry Andric return QuotRemPair(QuoPhi, RemPhi);
3220b57cec5SDimitry Andric }
3230b57cec5SDimitry Andric
3240b57cec5SDimitry Andric /// Creates a runtime check to test whether both the divisor and dividend fit
3250b57cec5SDimitry Andric /// into BypassType. The check is inserted at the end of MainBB. True return
3260b57cec5SDimitry Andric /// value means that the operands fit. Either of the operands may be NULL if it
3270b57cec5SDimitry Andric /// doesn't need a runtime check.
insertOperandRuntimeCheck(Value * Op1,Value * Op2)3280b57cec5SDimitry Andric Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
3290b57cec5SDimitry Andric assert((Op1 || Op2) && "Nothing to check");
3300b57cec5SDimitry Andric IRBuilder<> Builder(MainBB, MainBB->end());
3315ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
3320b57cec5SDimitry Andric
3330b57cec5SDimitry Andric Value *OrV;
3340b57cec5SDimitry Andric if (Op1 && Op2)
3350b57cec5SDimitry Andric OrV = Builder.CreateOr(Op1, Op2);
3360b57cec5SDimitry Andric else
3370b57cec5SDimitry Andric OrV = Op1 ? Op1 : Op2;
3380b57cec5SDimitry Andric
3390b57cec5SDimitry Andric // BitMask is inverted to check if the operands are
3400b57cec5SDimitry Andric // larger than the bypass type
3410b57cec5SDimitry Andric uint64_t BitMask = ~BypassType->getBitMask();
3420b57cec5SDimitry Andric Value *AndV = Builder.CreateAnd(OrV, BitMask);
3430b57cec5SDimitry Andric
3440b57cec5SDimitry Andric // Compare operand values
3450b57cec5SDimitry Andric Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
3460b57cec5SDimitry Andric return Builder.CreateICmpEQ(AndV, ZeroV);
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric
3490b57cec5SDimitry Andric /// Substitutes the div/rem instruction with code that checks the value of the
3500b57cec5SDimitry Andric /// operands and uses a shorter-faster div/rem instruction when possible.
insertFastDivAndRem()351bdd1243dSDimitry Andric std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
3520b57cec5SDimitry Andric Value *Dividend = SlowDivOrRem->getOperand(0);
3530b57cec5SDimitry Andric Value *Divisor = SlowDivOrRem->getOperand(1);
3540b57cec5SDimitry Andric
3550b57cec5SDimitry Andric VisitedSetTy SetL;
3560b57cec5SDimitry Andric ValueRange DividendRange = getValueRange(Dividend, SetL);
3570b57cec5SDimitry Andric if (DividendRange == VALRNG_LIKELY_LONG)
358bdd1243dSDimitry Andric return std::nullopt;
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric VisitedSetTy SetR;
3610b57cec5SDimitry Andric ValueRange DivisorRange = getValueRange(Divisor, SetR);
3620b57cec5SDimitry Andric if (DivisorRange == VALRNG_LIKELY_LONG)
363bdd1243dSDimitry Andric return std::nullopt;
3640b57cec5SDimitry Andric
3650b57cec5SDimitry Andric bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
3660b57cec5SDimitry Andric bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
3670b57cec5SDimitry Andric
3680b57cec5SDimitry Andric if (DividendShort && DivisorShort) {
3690b57cec5SDimitry Andric // If both operands are known to be short then just replace the long
3700b57cec5SDimitry Andric // division with a short one in-place. Since we're not introducing control
3710b57cec5SDimitry Andric // flow in this case, narrowing the division is always a win, even if the
3720b57cec5SDimitry Andric // divisor is a constant (and will later get replaced by a multiplication).
3730b57cec5SDimitry Andric
3740b57cec5SDimitry Andric IRBuilder<> Builder(SlowDivOrRem);
3750b57cec5SDimitry Andric Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
3760b57cec5SDimitry Andric Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
3770b57cec5SDimitry Andric Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
3780b57cec5SDimitry Andric Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
3790b57cec5SDimitry Andric Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
3800b57cec5SDimitry Andric Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
3810b57cec5SDimitry Andric return QuotRemPair(ExtDiv, ExtRem);
3820b57cec5SDimitry Andric }
3830b57cec5SDimitry Andric
3840b57cec5SDimitry Andric if (isa<ConstantInt>(Divisor)) {
3850b57cec5SDimitry Andric // If the divisor is not a constant, DAGCombiner will convert it to a
3860b57cec5SDimitry Andric // multiplication by a magic constant. It isn't clear if it is worth
3870b57cec5SDimitry Andric // introducing control flow to get a narrower multiply.
388bdd1243dSDimitry Andric return std::nullopt;
3890b57cec5SDimitry Andric }
3900b57cec5SDimitry Andric
3910b57cec5SDimitry Andric // After Constant Hoisting pass, long constants may be represented as
3920b57cec5SDimitry Andric // bitcast instructions. As a result, some constants may look like an
3930b57cec5SDimitry Andric // instruction at first, and an additional check is necessary to find out if
3940b57cec5SDimitry Andric // an operand is actually a constant.
3950b57cec5SDimitry Andric if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
3960b57cec5SDimitry Andric if (BCI->getParent() == SlowDivOrRem->getParent() &&
3970b57cec5SDimitry Andric isa<ConstantInt>(BCI->getOperand(0)))
398bdd1243dSDimitry Andric return std::nullopt;
3990b57cec5SDimitry Andric
4005ffd83dbSDimitry Andric IRBuilder<> Builder(MainBB, MainBB->end());
4015ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
4025ffd83dbSDimitry Andric
4030b57cec5SDimitry Andric if (DividendShort && !isSignedOp()) {
4040b57cec5SDimitry Andric // If the division is unsigned and Dividend is known to be short, then
4050b57cec5SDimitry Andric // either
4060b57cec5SDimitry Andric // 1) Divisor is less or equal to Dividend, and the result can be computed
4070b57cec5SDimitry Andric // with a short division.
4080b57cec5SDimitry Andric // 2) Divisor is greater than Dividend. In this case, no division is needed
4090b57cec5SDimitry Andric // at all: The quotient is 0 and the remainder is equal to Dividend.
4100b57cec5SDimitry Andric //
4110b57cec5SDimitry Andric // So instead of checking at runtime whether Divisor fits into BypassType,
4120b57cec5SDimitry Andric // we emit a runtime check to differentiate between these two cases. This
4130b57cec5SDimitry Andric // lets us entirely avoid a long div.
4140b57cec5SDimitry Andric
4150b57cec5SDimitry Andric // Split the basic block before the div/rem.
4160b57cec5SDimitry Andric BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
4170b57cec5SDimitry Andric // Remove the unconditional branch from MainBB to SuccessorBB.
418bdd1243dSDimitry Andric MainBB->back().eraseFromParent();
4190b57cec5SDimitry Andric QuotRemWithBB Long;
4200b57cec5SDimitry Andric Long.BB = MainBB;
4210b57cec5SDimitry Andric Long.Quotient = ConstantInt::get(getSlowType(), 0);
4220b57cec5SDimitry Andric Long.Remainder = Dividend;
4230b57cec5SDimitry Andric QuotRemWithBB Fast = createFastBB(SuccessorBB);
4240b57cec5SDimitry Andric QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
4250b57cec5SDimitry Andric Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
4260b57cec5SDimitry Andric Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
4270b57cec5SDimitry Andric return Result;
4280b57cec5SDimitry Andric } else {
4290b57cec5SDimitry Andric // General case. Create both slow and fast div/rem pairs and choose one of
4300b57cec5SDimitry Andric // them at runtime.
4310b57cec5SDimitry Andric
4320b57cec5SDimitry Andric // Split the basic block before the div/rem.
4330b57cec5SDimitry Andric BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
4340b57cec5SDimitry Andric // Remove the unconditional branch from MainBB to SuccessorBB.
435bdd1243dSDimitry Andric MainBB->back().eraseFromParent();
4360b57cec5SDimitry Andric QuotRemWithBB Fast = createFastBB(SuccessorBB);
4370b57cec5SDimitry Andric QuotRemWithBB Slow = createSlowBB(SuccessorBB);
4380b57cec5SDimitry Andric QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
4390b57cec5SDimitry Andric Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
4400b57cec5SDimitry Andric DivisorShort ? nullptr : Divisor);
4410b57cec5SDimitry Andric Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
4420b57cec5SDimitry Andric return Result;
4430b57cec5SDimitry Andric }
4440b57cec5SDimitry Andric }
4450b57cec5SDimitry Andric
4460b57cec5SDimitry Andric /// This optimization identifies DIV/REM instructions in a BB that can be
4470b57cec5SDimitry Andric /// profitably bypassed and carried out with a shorter, faster divide.
bypassSlowDivision(BasicBlock * BB,const BypassWidthsTy & BypassWidths)4480b57cec5SDimitry Andric bool llvm::bypassSlowDivision(BasicBlock *BB,
4490b57cec5SDimitry Andric const BypassWidthsTy &BypassWidths) {
4500b57cec5SDimitry Andric DivCacheTy PerBBDivCache;
4510b57cec5SDimitry Andric
4520b57cec5SDimitry Andric bool MadeChange = false;
4530b57cec5SDimitry Andric Instruction *Next = &*BB->begin();
4540b57cec5SDimitry Andric while (Next != nullptr) {
4550b57cec5SDimitry Andric // We may add instructions immediately after I, but we want to skip over
4560b57cec5SDimitry Andric // them.
4570b57cec5SDimitry Andric Instruction *I = Next;
4580b57cec5SDimitry Andric Next = Next->getNextNode();
4598bcb0991SDimitry Andric
4608bcb0991SDimitry Andric // Ignore dead code to save time and avoid bugs.
4618bcb0991SDimitry Andric if (I->hasNUses(0))
4628bcb0991SDimitry Andric continue;
4630b57cec5SDimitry Andric
4640b57cec5SDimitry Andric FastDivInsertionTask Task(I, BypassWidths);
4650b57cec5SDimitry Andric if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
4660b57cec5SDimitry Andric I->replaceAllUsesWith(Replacement);
4670b57cec5SDimitry Andric I->eraseFromParent();
4680b57cec5SDimitry Andric MadeChange = true;
4690b57cec5SDimitry Andric }
4700b57cec5SDimitry Andric }
4710b57cec5SDimitry Andric
4720b57cec5SDimitry Andric // Above we eagerly create divs and rems, as pairs, so that we can efficiently
4730b57cec5SDimitry Andric // create divrem machine instructions. Now erase any unused divs / rems so we
4740b57cec5SDimitry Andric // don't leave extra instructions sitting around.
4750b57cec5SDimitry Andric for (auto &KV : PerBBDivCache)
4760b57cec5SDimitry Andric for (Value *V : {KV.second.Quotient, KV.second.Remainder})
4770b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(V);
4780b57cec5SDimitry Andric
4790b57cec5SDimitry Andric return MadeChange;
4800b57cec5SDimitry Andric }
481