xref: /freebsd/contrib/llvm-project/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains an optimization for div and rem on architectures that
100b57cec5SDimitry Andric // execute short instructions significantly faster than longer instructions.
110b57cec5SDimitry Andric // For example, on Intel Atom 32-bit divides are slow enough that during
120b57cec5SDimitry Andric // runtime it is profitable to check the value of the operands, and if they are
130b57cec5SDimitry Andric // positive and less than 256 use an unsigned 8-bit divide.
140b57cec5SDimitry Andric //
150b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
160b57cec5SDimitry Andric 
170b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BypassSlowDivision.h"
180b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
190b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
200b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
210b57cec5SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
230b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
240b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
250b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
260b57cec5SDimitry Andric #include "llvm/IR/Function.h"
270b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
280b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
290b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
300b57cec5SDimitry Andric #include "llvm/IR/Module.h"
310b57cec5SDimitry Andric #include "llvm/IR/Type.h"
320b57cec5SDimitry Andric #include "llvm/IR/Value.h"
330b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
340b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h"
350b57cec5SDimitry Andric #include <cassert>
360b57cec5SDimitry Andric #include <cstdint>
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric #define DEBUG_TYPE "bypass-slow-division"
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric namespace {
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric   struct QuotRemPair {
450b57cec5SDimitry Andric     Value *Quotient;
460b57cec5SDimitry Andric     Value *Remainder;
470b57cec5SDimitry Andric 
QuotRemPair__anon06c93eea0111::QuotRemPair480b57cec5SDimitry Andric     QuotRemPair(Value *InQuotient, Value *InRemainder)
490b57cec5SDimitry Andric         : Quotient(InQuotient), Remainder(InRemainder) {}
500b57cec5SDimitry Andric   };
510b57cec5SDimitry Andric 
520b57cec5SDimitry Andric   /// A quotient and remainder, plus a BB from which they logically "originate".
530b57cec5SDimitry Andric   /// If you use Quotient or Remainder in a Phi node, you should use BB as its
540b57cec5SDimitry Andric   /// corresponding predecessor.
550b57cec5SDimitry Andric   struct QuotRemWithBB {
560b57cec5SDimitry Andric     BasicBlock *BB = nullptr;
570b57cec5SDimitry Andric     Value *Quotient = nullptr;
580b57cec5SDimitry Andric     Value *Remainder = nullptr;
590b57cec5SDimitry Andric   };
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
620b57cec5SDimitry Andric using BypassWidthsTy = DenseMap<unsigned, unsigned>;
630b57cec5SDimitry Andric using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric enum ValueRange {
660b57cec5SDimitry Andric   /// Operand definitely fits into BypassType. No runtime checks are needed.
670b57cec5SDimitry Andric   VALRNG_KNOWN_SHORT,
680b57cec5SDimitry Andric   /// A runtime check is required, as value range is unknown.
690b57cec5SDimitry Andric   VALRNG_UNKNOWN,
700b57cec5SDimitry Andric   /// Operand is unlikely to fit into BypassType. The bypassing should be
710b57cec5SDimitry Andric   /// disabled.
720b57cec5SDimitry Andric   VALRNG_LIKELY_LONG
730b57cec5SDimitry Andric };
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric class FastDivInsertionTask {
760b57cec5SDimitry Andric   bool IsValidTask = false;
770b57cec5SDimitry Andric   Instruction *SlowDivOrRem = nullptr;
780b57cec5SDimitry Andric   IntegerType *BypassType = nullptr;
790b57cec5SDimitry Andric   BasicBlock *MainBB = nullptr;
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric   bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
820b57cec5SDimitry Andric   ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
830b57cec5SDimitry Andric   QuotRemWithBB createSlowBB(BasicBlock *Successor);
840b57cec5SDimitry Andric   QuotRemWithBB createFastBB(BasicBlock *Successor);
850b57cec5SDimitry Andric   QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
860b57cec5SDimitry Andric                                    BasicBlock *PhiBB);
870b57cec5SDimitry Andric   Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
88bdd1243dSDimitry Andric   std::optional<QuotRemPair> insertFastDivAndRem();
890b57cec5SDimitry Andric 
isSignedOp()900b57cec5SDimitry Andric   bool isSignedOp() {
910b57cec5SDimitry Andric     return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
920b57cec5SDimitry Andric            SlowDivOrRem->getOpcode() == Instruction::SRem;
930b57cec5SDimitry Andric   }
940b57cec5SDimitry Andric 
isDivisionOp()950b57cec5SDimitry Andric   bool isDivisionOp() {
960b57cec5SDimitry Andric     return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
970b57cec5SDimitry Andric            SlowDivOrRem->getOpcode() == Instruction::UDiv;
980b57cec5SDimitry Andric   }
990b57cec5SDimitry Andric 
getSlowType()1000b57cec5SDimitry Andric   Type *getSlowType() { return SlowDivOrRem->getType(); }
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric public:
1030b57cec5SDimitry Andric   FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
1040b57cec5SDimitry Andric 
1050b57cec5SDimitry Andric   Value *getReplacement(DivCacheTy &Cache);
1060b57cec5SDimitry Andric };
1070b57cec5SDimitry Andric 
1080b57cec5SDimitry Andric } // end anonymous namespace
1090b57cec5SDimitry Andric 
FastDivInsertionTask(Instruction * I,const BypassWidthsTy & BypassWidths)1100b57cec5SDimitry Andric FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
1110b57cec5SDimitry Andric                                            const BypassWidthsTy &BypassWidths) {
1120b57cec5SDimitry Andric   switch (I->getOpcode()) {
1130b57cec5SDimitry Andric   case Instruction::UDiv:
1140b57cec5SDimitry Andric   case Instruction::SDiv:
1150b57cec5SDimitry Andric   case Instruction::URem:
1160b57cec5SDimitry Andric   case Instruction::SRem:
1170b57cec5SDimitry Andric     SlowDivOrRem = I;
1180b57cec5SDimitry Andric     break;
1190b57cec5SDimitry Andric   default:
1200b57cec5SDimitry Andric     // I is not a div/rem operation.
1210b57cec5SDimitry Andric     return;
1220b57cec5SDimitry Andric   }
1230b57cec5SDimitry Andric 
1240b57cec5SDimitry Andric   // Skip division on vector types. Only optimize integer instructions.
1250b57cec5SDimitry Andric   IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
1260b57cec5SDimitry Andric   if (!SlowType)
1270b57cec5SDimitry Andric     return;
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric   // Skip if this bitwidth is not bypassed.
1300b57cec5SDimitry Andric   auto BI = BypassWidths.find(SlowType->getBitWidth());
1310b57cec5SDimitry Andric   if (BI == BypassWidths.end())
1320b57cec5SDimitry Andric     return;
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric   // Get type for div/rem instruction with bypass bitwidth.
1350b57cec5SDimitry Andric   IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
1360b57cec5SDimitry Andric   BypassType = BT;
1370b57cec5SDimitry Andric 
1380b57cec5SDimitry Andric   // The original basic block.
1390b57cec5SDimitry Andric   MainBB = I->getParent();
1400b57cec5SDimitry Andric 
1410b57cec5SDimitry Andric   // The instruction is indeed a slow div or rem operation.
1420b57cec5SDimitry Andric   IsValidTask = true;
1430b57cec5SDimitry Andric }
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric /// Reuses previously-computed dividend or remainder from the current BB if
1460b57cec5SDimitry Andric /// operands and operation are identical. Otherwise calls insertFastDivAndRem to
1470b57cec5SDimitry Andric /// perform the optimization and caches the resulting dividend and remainder.
1480b57cec5SDimitry Andric /// If no replacement can be generated, nullptr is returned.
getReplacement(DivCacheTy & Cache)1490b57cec5SDimitry Andric Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
1500b57cec5SDimitry Andric   // First, make sure that the task is valid.
1510b57cec5SDimitry Andric   if (!IsValidTask)
1520b57cec5SDimitry Andric     return nullptr;
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric   // Then, look for a value in Cache.
1550b57cec5SDimitry Andric   Value *Dividend = SlowDivOrRem->getOperand(0);
1560b57cec5SDimitry Andric   Value *Divisor = SlowDivOrRem->getOperand(1);
1570b57cec5SDimitry Andric   DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
1580b57cec5SDimitry Andric   auto CacheI = Cache.find(Key);
1590b57cec5SDimitry Andric 
1600b57cec5SDimitry Andric   if (CacheI == Cache.end()) {
1610b57cec5SDimitry Andric     // If previous instance does not exist, try to insert fast div.
162bdd1243dSDimitry Andric     std::optional<QuotRemPair> OptResult = insertFastDivAndRem();
1630b57cec5SDimitry Andric     // Bail out if insertFastDivAndRem has failed.
1640b57cec5SDimitry Andric     if (!OptResult)
1650b57cec5SDimitry Andric       return nullptr;
1660b57cec5SDimitry Andric     CacheI = Cache.insert({Key, *OptResult}).first;
1670b57cec5SDimitry Andric   }
1680b57cec5SDimitry Andric 
1690b57cec5SDimitry Andric   QuotRemPair &Value = CacheI->second;
1700b57cec5SDimitry Andric   return isDivisionOp() ? Value.Quotient : Value.Remainder;
1710b57cec5SDimitry Andric }
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric /// Check if a value looks like a hash.
1740b57cec5SDimitry Andric ///
1750b57cec5SDimitry Andric /// The routine is expected to detect values computed using the most common hash
1760b57cec5SDimitry Andric /// algorithms. Typically, hash computations end with one of the following
1770b57cec5SDimitry Andric /// instructions:
1780b57cec5SDimitry Andric ///
1790b57cec5SDimitry Andric /// 1) MUL with a constant wider than BypassType
1800b57cec5SDimitry Andric /// 2) XOR instruction
1810b57cec5SDimitry Andric ///
1820b57cec5SDimitry Andric /// And even if we are wrong and the value is not a hash, it is still quite
1830b57cec5SDimitry Andric /// unlikely that such values will fit into BypassType.
1840b57cec5SDimitry Andric ///
1850b57cec5SDimitry Andric /// To detect string hash algorithms like FNV we have to look through PHI-nodes.
1860b57cec5SDimitry Andric /// It is implemented as a depth-first search for values that look neither long
1870b57cec5SDimitry Andric /// nor hash-like.
isHashLikeValue(Value * V,VisitedSetTy & Visited)1880b57cec5SDimitry Andric bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
1890b57cec5SDimitry Andric   Instruction *I = dyn_cast<Instruction>(V);
1900b57cec5SDimitry Andric   if (!I)
1910b57cec5SDimitry Andric     return false;
1920b57cec5SDimitry Andric 
1930b57cec5SDimitry Andric   switch (I->getOpcode()) {
1940b57cec5SDimitry Andric   case Instruction::Xor:
1950b57cec5SDimitry Andric     return true;
1960b57cec5SDimitry Andric   case Instruction::Mul: {
1970b57cec5SDimitry Andric     // After Constant Hoisting pass, long constants may be represented as
1980b57cec5SDimitry Andric     // bitcast instructions. As a result, some constants may look like an
1990b57cec5SDimitry Andric     // instruction at first, and an additional check is necessary to find out if
2000b57cec5SDimitry Andric     // an operand is actually a constant.
2010b57cec5SDimitry Andric     Value *Op1 = I->getOperand(1);
2020b57cec5SDimitry Andric     ConstantInt *C = dyn_cast<ConstantInt>(Op1);
2030b57cec5SDimitry Andric     if (!C && isa<BitCastInst>(Op1))
2040b57cec5SDimitry Andric       C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
20506c3fb27SDimitry Andric     return C && C->getValue().getSignificantBits() > BypassType->getBitWidth();
2060b57cec5SDimitry Andric   }
2070b57cec5SDimitry Andric   case Instruction::PHI:
2080b57cec5SDimitry Andric     // Stop IR traversal in case of a crazy input code. This limits recursion
2090b57cec5SDimitry Andric     // depth.
2100b57cec5SDimitry Andric     if (Visited.size() >= 16)
2110b57cec5SDimitry Andric       return false;
2120b57cec5SDimitry Andric     // Do not visit nodes that have been visited already. We return true because
2130b57cec5SDimitry Andric     // it means that we couldn't find any value that doesn't look hash-like.
2145ffd83dbSDimitry Andric     if (!Visited.insert(I).second)
2150b57cec5SDimitry Andric       return true;
2160b57cec5SDimitry Andric     return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
2170b57cec5SDimitry Andric       // Ignore undef values as they probably don't affect the division
2180b57cec5SDimitry Andric       // operands.
2190b57cec5SDimitry Andric       return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
2200b57cec5SDimitry Andric              isa<UndefValue>(V);
2210b57cec5SDimitry Andric     });
2220b57cec5SDimitry Andric   default:
2230b57cec5SDimitry Andric     return false;
2240b57cec5SDimitry Andric   }
2250b57cec5SDimitry Andric }
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric /// Check if an integer value fits into our bypass type.
getValueRange(Value * V,VisitedSetTy & Visited)2280b57cec5SDimitry Andric ValueRange FastDivInsertionTask::getValueRange(Value *V,
2290b57cec5SDimitry Andric                                                VisitedSetTy &Visited) {
2300b57cec5SDimitry Andric   unsigned ShortLen = BypassType->getBitWidth();
2310b57cec5SDimitry Andric   unsigned LongLen = V->getType()->getIntegerBitWidth();
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric   assert(LongLen > ShortLen && "Value type must be wider than BypassType");
2340b57cec5SDimitry Andric   unsigned HiBits = LongLen - ShortLen;
2350b57cec5SDimitry Andric 
236*0fca6ea1SDimitry Andric   const DataLayout &DL = SlowDivOrRem->getDataLayout();
2370b57cec5SDimitry Andric   KnownBits Known(LongLen);
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   computeKnownBits(V, Known, DL);
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric   if (Known.countMinLeadingZeros() >= HiBits)
2420b57cec5SDimitry Andric     return VALRNG_KNOWN_SHORT;
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   if (Known.countMaxLeadingZeros() < HiBits)
2450b57cec5SDimitry Andric     return VALRNG_LIKELY_LONG;
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric   // Long integer divisions are often used in hashtable implementations. It's
2480b57cec5SDimitry Andric   // not worth bypassing such divisions because hash values are extremely
2490b57cec5SDimitry Andric   // unlikely to have enough leading zeros. The call below tries to detect
2500b57cec5SDimitry Andric   // values that are unlikely to fit BypassType (including hashes).
2510b57cec5SDimitry Andric   if (isHashLikeValue(V, Visited))
2520b57cec5SDimitry Andric     return VALRNG_LIKELY_LONG;
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   return VALRNG_UNKNOWN;
2550b57cec5SDimitry Andric }
2560b57cec5SDimitry Andric 
2570b57cec5SDimitry Andric /// Add new basic block for slow div and rem operations and put it before
2580b57cec5SDimitry Andric /// SuccessorBB.
createSlowBB(BasicBlock * SuccessorBB)2590b57cec5SDimitry Andric QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
2600b57cec5SDimitry Andric   QuotRemWithBB DivRemPair;
2610b57cec5SDimitry Andric   DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
2620b57cec5SDimitry Andric                                      MainBB->getParent(), SuccessorBB);
2630b57cec5SDimitry Andric   IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
2645ffd83dbSDimitry Andric   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
2650b57cec5SDimitry Andric 
2660b57cec5SDimitry Andric   Value *Dividend = SlowDivOrRem->getOperand(0);
2670b57cec5SDimitry Andric   Value *Divisor = SlowDivOrRem->getOperand(1);
2680b57cec5SDimitry Andric 
2690b57cec5SDimitry Andric   if (isSignedOp()) {
2700b57cec5SDimitry Andric     DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
2710b57cec5SDimitry Andric     DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
2720b57cec5SDimitry Andric   } else {
2730b57cec5SDimitry Andric     DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
2740b57cec5SDimitry Andric     DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
2750b57cec5SDimitry Andric   }
2760b57cec5SDimitry Andric 
2770b57cec5SDimitry Andric   Builder.CreateBr(SuccessorBB);
2780b57cec5SDimitry Andric   return DivRemPair;
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric /// Add new basic block for fast div and rem operations and put it before
2820b57cec5SDimitry Andric /// SuccessorBB.
createFastBB(BasicBlock * SuccessorBB)2830b57cec5SDimitry Andric QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
2840b57cec5SDimitry Andric   QuotRemWithBB DivRemPair;
2850b57cec5SDimitry Andric   DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
2860b57cec5SDimitry Andric                                      MainBB->getParent(), SuccessorBB);
2870b57cec5SDimitry Andric   IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
2885ffd83dbSDimitry Andric   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric   Value *Dividend = SlowDivOrRem->getOperand(0);
2910b57cec5SDimitry Andric   Value *Divisor = SlowDivOrRem->getOperand(1);
2920b57cec5SDimitry Andric   Value *ShortDivisorV =
2930b57cec5SDimitry Andric       Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
2940b57cec5SDimitry Andric   Value *ShortDividendV =
2950b57cec5SDimitry Andric       Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric   // udiv/urem because this optimization only handles positive numbers.
2980b57cec5SDimitry Andric   Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
2990b57cec5SDimitry Andric   Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
3000b57cec5SDimitry Andric   DivRemPair.Quotient =
3010b57cec5SDimitry Andric       Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
3020b57cec5SDimitry Andric   DivRemPair.Remainder =
3030b57cec5SDimitry Andric       Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
3040b57cec5SDimitry Andric   Builder.CreateBr(SuccessorBB);
3050b57cec5SDimitry Andric 
3060b57cec5SDimitry Andric   return DivRemPair;
3070b57cec5SDimitry Andric }
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric /// Creates Phi nodes for result of Div and Rem.
createDivRemPhiNodes(QuotRemWithBB & LHS,QuotRemWithBB & RHS,BasicBlock * PhiBB)3100b57cec5SDimitry Andric QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
3110b57cec5SDimitry Andric                                                        QuotRemWithBB &RHS,
3120b57cec5SDimitry Andric                                                        BasicBlock *PhiBB) {
3130b57cec5SDimitry Andric   IRBuilder<> Builder(PhiBB, PhiBB->begin());
3145ffd83dbSDimitry Andric   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
3150b57cec5SDimitry Andric   PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
3160b57cec5SDimitry Andric   QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
3170b57cec5SDimitry Andric   QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
3180b57cec5SDimitry Andric   PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
3190b57cec5SDimitry Andric   RemPhi->addIncoming(LHS.Remainder, LHS.BB);
3200b57cec5SDimitry Andric   RemPhi->addIncoming(RHS.Remainder, RHS.BB);
3210b57cec5SDimitry Andric   return QuotRemPair(QuoPhi, RemPhi);
3220b57cec5SDimitry Andric }
3230b57cec5SDimitry Andric 
3240b57cec5SDimitry Andric /// Creates a runtime check to test whether both the divisor and dividend fit
3250b57cec5SDimitry Andric /// into BypassType. The check is inserted at the end of MainBB. True return
3260b57cec5SDimitry Andric /// value means that the operands fit. Either of the operands may be NULL if it
3270b57cec5SDimitry Andric /// doesn't need a runtime check.
insertOperandRuntimeCheck(Value * Op1,Value * Op2)3280b57cec5SDimitry Andric Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
3290b57cec5SDimitry Andric   assert((Op1 || Op2) && "Nothing to check");
3300b57cec5SDimitry Andric   IRBuilder<> Builder(MainBB, MainBB->end());
3315ffd83dbSDimitry Andric   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
3320b57cec5SDimitry Andric 
3330b57cec5SDimitry Andric   Value *OrV;
3340b57cec5SDimitry Andric   if (Op1 && Op2)
3350b57cec5SDimitry Andric     OrV = Builder.CreateOr(Op1, Op2);
3360b57cec5SDimitry Andric   else
3370b57cec5SDimitry Andric     OrV = Op1 ? Op1 : Op2;
3380b57cec5SDimitry Andric 
3390b57cec5SDimitry Andric   // BitMask is inverted to check if the operands are
3400b57cec5SDimitry Andric   // larger than the bypass type
3410b57cec5SDimitry Andric   uint64_t BitMask = ~BypassType->getBitMask();
3420b57cec5SDimitry Andric   Value *AndV = Builder.CreateAnd(OrV, BitMask);
3430b57cec5SDimitry Andric 
3440b57cec5SDimitry Andric   // Compare operand values
3450b57cec5SDimitry Andric   Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
3460b57cec5SDimitry Andric   return Builder.CreateICmpEQ(AndV, ZeroV);
3470b57cec5SDimitry Andric }
3480b57cec5SDimitry Andric 
3490b57cec5SDimitry Andric /// Substitutes the div/rem instruction with code that checks the value of the
3500b57cec5SDimitry Andric /// operands and uses a shorter-faster div/rem instruction when possible.
insertFastDivAndRem()351bdd1243dSDimitry Andric std::optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
3520b57cec5SDimitry Andric   Value *Dividend = SlowDivOrRem->getOperand(0);
3530b57cec5SDimitry Andric   Value *Divisor = SlowDivOrRem->getOperand(1);
3540b57cec5SDimitry Andric 
3550b57cec5SDimitry Andric   VisitedSetTy SetL;
3560b57cec5SDimitry Andric   ValueRange DividendRange = getValueRange(Dividend, SetL);
3570b57cec5SDimitry Andric   if (DividendRange == VALRNG_LIKELY_LONG)
358bdd1243dSDimitry Andric     return std::nullopt;
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   VisitedSetTy SetR;
3610b57cec5SDimitry Andric   ValueRange DivisorRange = getValueRange(Divisor, SetR);
3620b57cec5SDimitry Andric   if (DivisorRange == VALRNG_LIKELY_LONG)
363bdd1243dSDimitry Andric     return std::nullopt;
3640b57cec5SDimitry Andric 
3650b57cec5SDimitry Andric   bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
3660b57cec5SDimitry Andric   bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   if (DividendShort && DivisorShort) {
3690b57cec5SDimitry Andric     // If both operands are known to be short then just replace the long
3700b57cec5SDimitry Andric     // division with a short one in-place.  Since we're not introducing control
3710b57cec5SDimitry Andric     // flow in this case, narrowing the division is always a win, even if the
3720b57cec5SDimitry Andric     // divisor is a constant (and will later get replaced by a multiplication).
3730b57cec5SDimitry Andric 
3740b57cec5SDimitry Andric     IRBuilder<> Builder(SlowDivOrRem);
3750b57cec5SDimitry Andric     Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
3760b57cec5SDimitry Andric     Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
3770b57cec5SDimitry Andric     Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
3780b57cec5SDimitry Andric     Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
3790b57cec5SDimitry Andric     Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
3800b57cec5SDimitry Andric     Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
3810b57cec5SDimitry Andric     return QuotRemPair(ExtDiv, ExtRem);
3820b57cec5SDimitry Andric   }
3830b57cec5SDimitry Andric 
3840b57cec5SDimitry Andric   if (isa<ConstantInt>(Divisor)) {
3850b57cec5SDimitry Andric     // If the divisor is not a constant, DAGCombiner will convert it to a
3860b57cec5SDimitry Andric     // multiplication by a magic constant.  It isn't clear if it is worth
3870b57cec5SDimitry Andric     // introducing control flow to get a narrower multiply.
388bdd1243dSDimitry Andric     return std::nullopt;
3890b57cec5SDimitry Andric   }
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric   // After Constant Hoisting pass, long constants may be represented as
3920b57cec5SDimitry Andric   // bitcast instructions. As a result, some constants may look like an
3930b57cec5SDimitry Andric   // instruction at first, and an additional check is necessary to find out if
3940b57cec5SDimitry Andric   // an operand is actually a constant.
3950b57cec5SDimitry Andric   if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
3960b57cec5SDimitry Andric     if (BCI->getParent() == SlowDivOrRem->getParent() &&
3970b57cec5SDimitry Andric         isa<ConstantInt>(BCI->getOperand(0)))
398bdd1243dSDimitry Andric       return std::nullopt;
3990b57cec5SDimitry Andric 
4005ffd83dbSDimitry Andric   IRBuilder<> Builder(MainBB, MainBB->end());
4015ffd83dbSDimitry Andric   Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
4025ffd83dbSDimitry Andric 
4030b57cec5SDimitry Andric   if (DividendShort && !isSignedOp()) {
4040b57cec5SDimitry Andric     // If the division is unsigned and Dividend is known to be short, then
4050b57cec5SDimitry Andric     // either
4060b57cec5SDimitry Andric     // 1) Divisor is less or equal to Dividend, and the result can be computed
4070b57cec5SDimitry Andric     //    with a short division.
4080b57cec5SDimitry Andric     // 2) Divisor is greater than Dividend. In this case, no division is needed
4090b57cec5SDimitry Andric     //    at all: The quotient is 0 and the remainder is equal to Dividend.
4100b57cec5SDimitry Andric     //
4110b57cec5SDimitry Andric     // So instead of checking at runtime whether Divisor fits into BypassType,
4120b57cec5SDimitry Andric     // we emit a runtime check to differentiate between these two cases. This
4130b57cec5SDimitry Andric     // lets us entirely avoid a long div.
4140b57cec5SDimitry Andric 
4150b57cec5SDimitry Andric     // Split the basic block before the div/rem.
4160b57cec5SDimitry Andric     BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
4170b57cec5SDimitry Andric     // Remove the unconditional branch from MainBB to SuccessorBB.
418bdd1243dSDimitry Andric     MainBB->back().eraseFromParent();
4190b57cec5SDimitry Andric     QuotRemWithBB Long;
4200b57cec5SDimitry Andric     Long.BB = MainBB;
4210b57cec5SDimitry Andric     Long.Quotient = ConstantInt::get(getSlowType(), 0);
4220b57cec5SDimitry Andric     Long.Remainder = Dividend;
4230b57cec5SDimitry Andric     QuotRemWithBB Fast = createFastBB(SuccessorBB);
4240b57cec5SDimitry Andric     QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
4250b57cec5SDimitry Andric     Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
4260b57cec5SDimitry Andric     Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
4270b57cec5SDimitry Andric     return Result;
4280b57cec5SDimitry Andric   } else {
4290b57cec5SDimitry Andric     // General case. Create both slow and fast div/rem pairs and choose one of
4300b57cec5SDimitry Andric     // them at runtime.
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric     // Split the basic block before the div/rem.
4330b57cec5SDimitry Andric     BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
4340b57cec5SDimitry Andric     // Remove the unconditional branch from MainBB to SuccessorBB.
435bdd1243dSDimitry Andric     MainBB->back().eraseFromParent();
4360b57cec5SDimitry Andric     QuotRemWithBB Fast = createFastBB(SuccessorBB);
4370b57cec5SDimitry Andric     QuotRemWithBB Slow = createSlowBB(SuccessorBB);
4380b57cec5SDimitry Andric     QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
4390b57cec5SDimitry Andric     Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
4400b57cec5SDimitry Andric                                             DivisorShort ? nullptr : Divisor);
4410b57cec5SDimitry Andric     Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
4420b57cec5SDimitry Andric     return Result;
4430b57cec5SDimitry Andric   }
4440b57cec5SDimitry Andric }
4450b57cec5SDimitry Andric 
4460b57cec5SDimitry Andric /// This optimization identifies DIV/REM instructions in a BB that can be
4470b57cec5SDimitry Andric /// profitably bypassed and carried out with a shorter, faster divide.
bypassSlowDivision(BasicBlock * BB,const BypassWidthsTy & BypassWidths)4480b57cec5SDimitry Andric bool llvm::bypassSlowDivision(BasicBlock *BB,
4490b57cec5SDimitry Andric                               const BypassWidthsTy &BypassWidths) {
4500b57cec5SDimitry Andric   DivCacheTy PerBBDivCache;
4510b57cec5SDimitry Andric 
4520b57cec5SDimitry Andric   bool MadeChange = false;
4530b57cec5SDimitry Andric   Instruction *Next = &*BB->begin();
4540b57cec5SDimitry Andric   while (Next != nullptr) {
4550b57cec5SDimitry Andric     // We may add instructions immediately after I, but we want to skip over
4560b57cec5SDimitry Andric     // them.
4570b57cec5SDimitry Andric     Instruction *I = Next;
4580b57cec5SDimitry Andric     Next = Next->getNextNode();
4598bcb0991SDimitry Andric 
4608bcb0991SDimitry Andric     // Ignore dead code to save time and avoid bugs.
4618bcb0991SDimitry Andric     if (I->hasNUses(0))
4628bcb0991SDimitry Andric       continue;
4630b57cec5SDimitry Andric 
4640b57cec5SDimitry Andric     FastDivInsertionTask Task(I, BypassWidths);
4650b57cec5SDimitry Andric     if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
4660b57cec5SDimitry Andric       I->replaceAllUsesWith(Replacement);
4670b57cec5SDimitry Andric       I->eraseFromParent();
4680b57cec5SDimitry Andric       MadeChange = true;
4690b57cec5SDimitry Andric     }
4700b57cec5SDimitry Andric   }
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric   // Above we eagerly create divs and rems, as pairs, so that we can efficiently
4730b57cec5SDimitry Andric   // create divrem machine instructions.  Now erase any unused divs / rems so we
4740b57cec5SDimitry Andric   // don't leave extra instructions sitting around.
4750b57cec5SDimitry Andric   for (auto &KV : PerBBDivCache)
4760b57cec5SDimitry Andric     for (Value *V : {KV.second.Quotient, KV.second.Remainder})
4770b57cec5SDimitry Andric       RecursivelyDeleteTriviallyDeadInstructions(V);
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric   return MadeChange;
4800b57cec5SDimitry Andric }
481