1 //===- DivRemPairs.cpp - Hoist/decompose division and remainder -*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass hoists and/or decomposes integer division and remainder 10 // instructions to enable CFG improvements and better codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Scalar/DivRemPairs.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/MapVector.h" 17 #include "llvm/ADT/Statistic.h" 18 #include "llvm/Analysis/GlobalsModRef.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/IR/Dominators.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/Pass.h" 23 #include "llvm/Support/DebugCounter.h" 24 #include "llvm/Transforms/Scalar.h" 25 #include "llvm/Transforms/Utils/BypassSlowDivision.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "div-rem-pairs" 30 STATISTIC(NumPairs, "Number of div/rem pairs"); 31 STATISTIC(NumHoisted, "Number of instructions hoisted"); 32 STATISTIC(NumDecomposed, "Number of instructions decomposed"); 33 DEBUG_COUNTER(DRPCounter, "div-rem-pairs-transform", 34 "Controls transformations in div-rem-pairs pass"); 35 36 /// A thin wrapper to store two values that we matched as div-rem pair. 37 /// We want this extra indirection to avoid dealing with RAUW'ing the map keys. 38 struct DivRemPairWorklistEntry { 39 /// The actual udiv/sdiv instruction. Source of truth. 40 AssertingVH<Instruction> DivInst; 41 42 /// The instruction that we have matched as a remainder instruction. 43 /// Should only be used as Value, don't introspect it. 44 AssertingVH<Instruction> RemInst; 45 46 DivRemPairWorklistEntry(Instruction *DivInst_, Instruction *RemInst_) 47 : DivInst(DivInst_), RemInst(RemInst_) { 48 assert((DivInst->getOpcode() == Instruction::UDiv || 49 DivInst->getOpcode() == Instruction::SDiv) && 50 "Not a division."); 51 assert(DivInst->getType() == RemInst->getType() && "Types should match."); 52 // We can't check anything else about remainder instruction, 53 // it's not strictly required to be a urem/srem. 54 } 55 56 /// The type for this pair, identical for both the div and rem. 57 Type *getType() const { return DivInst->getType(); } 58 59 /// Is this pair signed or unsigned? 60 bool isSigned() const { return DivInst->getOpcode() == Instruction::SDiv; } 61 62 /// In this pair, what are the divident and divisor? 63 Value *getDividend() const { return DivInst->getOperand(0); } 64 Value *getDivisor() const { return DivInst->getOperand(1); } 65 }; 66 using DivRemWorklistTy = SmallVector<DivRemPairWorklistEntry, 4>; 67 68 /// Find matching pairs of integer div/rem ops (they have the same numerator, 69 /// denominator, and signedness). Place those pairs into a worklist for further 70 /// processing. This indirection is needed because we have to use TrackingVH<> 71 /// because we will be doing RAUW, and if one of the rem instructions we change 72 /// happens to be an input to another div/rem in the maps, we'd have problems. 73 static DivRemWorklistTy getWorklist(Function &F) { 74 // Insert all divide and remainder instructions into maps keyed by their 75 // operands and opcode (signed or unsigned). 76 DenseMap<DivRemMapKey, Instruction *> DivMap; 77 // Use a MapVector for RemMap so that instructions are moved/inserted in a 78 // deterministic order. 79 MapVector<DivRemMapKey, Instruction *> RemMap; 80 for (auto &BB : F) { 81 for (auto &I : BB) { 82 if (I.getOpcode() == Instruction::SDiv) 83 DivMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; 84 else if (I.getOpcode() == Instruction::UDiv) 85 DivMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; 86 else if (I.getOpcode() == Instruction::SRem) 87 RemMap[DivRemMapKey(true, I.getOperand(0), I.getOperand(1))] = &I; 88 else if (I.getOpcode() == Instruction::URem) 89 RemMap[DivRemMapKey(false, I.getOperand(0), I.getOperand(1))] = &I; 90 } 91 } 92 93 // We'll accumulate the matching pairs of div-rem instructions here. 94 DivRemWorklistTy Worklist; 95 96 // We can iterate over either map because we are only looking for matched 97 // pairs. Choose remainders for efficiency because they are usually even more 98 // rare than division. 99 for (auto &RemPair : RemMap) { 100 // Find the matching division instruction from the division map. 101 Instruction *DivInst = DivMap[RemPair.first]; 102 if (!DivInst) 103 continue; 104 105 // We have a matching pair of div/rem instructions. 106 NumPairs++; 107 Instruction *RemInst = RemPair.second; 108 109 // Place it in the worklist. 110 Worklist.emplace_back(DivInst, RemInst); 111 } 112 113 return Worklist; 114 } 115 116 /// Find matching pairs of integer div/rem ops (they have the same numerator, 117 /// denominator, and signedness). If they exist in different basic blocks, bring 118 /// them together by hoisting or replace the common division operation that is 119 /// implicit in the remainder: 120 /// X % Y <--> X - ((X / Y) * Y). 121 /// 122 /// We can largely ignore the normal safety and cost constraints on speculation 123 /// of these ops when we find a matching pair. This is because we are already 124 /// guaranteed that any exceptions and most cost are already incurred by the 125 /// first member of the pair. 126 /// 127 /// Note: This transform could be an oddball enhancement to EarlyCSE, GVN, or 128 /// SimplifyCFG, but it's split off on its own because it's different enough 129 /// that it doesn't quite match the stated objectives of those passes. 130 static bool optimizeDivRem(Function &F, const TargetTransformInfo &TTI, 131 const DominatorTree &DT) { 132 bool Changed = false; 133 134 // Get the matching pairs of div-rem instructions. We want this extra 135 // indirection to avoid dealing with having to RAUW the keys of the maps. 136 DivRemWorklistTy Worklist = getWorklist(F); 137 138 // Process each entry in the worklist. 139 for (DivRemPairWorklistEntry &E : Worklist) { 140 bool HasDivRemOp = TTI.hasDivRemOp(E.getType(), E.isSigned()); 141 142 auto &DivInst = E.DivInst; 143 auto &RemInst = E.RemInst; 144 145 // If the target supports div+rem and the instructions are in the same block 146 // already, there's nothing to do. The backend should handle this. If the 147 // target does not support div+rem, then we will decompose the rem. 148 if (HasDivRemOp && RemInst->getParent() == DivInst->getParent()) 149 continue; 150 151 bool DivDominates = DT.dominates(DivInst, RemInst); 152 if (!DivDominates && !DT.dominates(RemInst, DivInst)) 153 continue; 154 155 if (!DebugCounter::shouldExecute(DRPCounter)) 156 continue; 157 158 if (HasDivRemOp) { 159 // The target has a single div/rem operation. Hoist the lower instruction 160 // to make the matched pair visible to the backend. 161 if (DivDominates) 162 RemInst->moveAfter(DivInst); 163 else 164 DivInst->moveAfter(RemInst); 165 NumHoisted++; 166 } else { 167 // The target does not have a single div/rem operation. Decompose the 168 // remainder calculation as: 169 // X % Y --> X - ((X / Y) * Y). 170 Value *X = E.getDividend(); 171 Value *Y = E.getDivisor(); 172 Instruction *Mul = BinaryOperator::CreateMul(DivInst, Y); 173 Instruction *Sub = BinaryOperator::CreateSub(X, Mul); 174 175 // If the remainder dominates, then hoist the division up to that block: 176 // 177 // bb1: 178 // %rem = srem %x, %y 179 // bb2: 180 // %div = sdiv %x, %y 181 // --> 182 // bb1: 183 // %div = sdiv %x, %y 184 // %mul = mul %div, %y 185 // %rem = sub %x, %mul 186 // 187 // If the division dominates, it's already in the right place. The mul+sub 188 // will be in a different block because we don't assume that they are 189 // cheap to speculatively execute: 190 // 191 // bb1: 192 // %div = sdiv %x, %y 193 // bb2: 194 // %rem = srem %x, %y 195 // --> 196 // bb1: 197 // %div = sdiv %x, %y 198 // bb2: 199 // %mul = mul %div, %y 200 // %rem = sub %x, %mul 201 // 202 // If the div and rem are in the same block, we do the same transform, 203 // but any code movement would be within the same block. 204 205 if (!DivDominates) 206 DivInst->moveBefore(RemInst); 207 Mul->insertAfter(RemInst); 208 Sub->insertAfter(Mul); 209 210 // Now kill the explicit remainder. We have replaced it with: 211 // (sub X, (mul (div X, Y), Y) 212 Sub->setName(RemInst->getName() + ".decomposed"); 213 Instruction *OrigRemInst = RemInst; 214 // Update AssertingVH<> with new instruction so it doesn't assert. 215 RemInst = Sub; 216 // And replace the original instruction with the new one. 217 OrigRemInst->replaceAllUsesWith(Sub); 218 OrigRemInst->eraseFromParent(); 219 NumDecomposed++; 220 } 221 Changed = true; 222 } 223 224 return Changed; 225 } 226 227 // Pass manager boilerplate below here. 228 229 namespace { 230 struct DivRemPairsLegacyPass : public FunctionPass { 231 static char ID; 232 DivRemPairsLegacyPass() : FunctionPass(ID) { 233 initializeDivRemPairsLegacyPassPass(*PassRegistry::getPassRegistry()); 234 } 235 236 void getAnalysisUsage(AnalysisUsage &AU) const override { 237 AU.addRequired<DominatorTreeWrapperPass>(); 238 AU.addRequired<TargetTransformInfoWrapperPass>(); 239 AU.setPreservesCFG(); 240 AU.addPreserved<DominatorTreeWrapperPass>(); 241 AU.addPreserved<GlobalsAAWrapperPass>(); 242 FunctionPass::getAnalysisUsage(AU); 243 } 244 245 bool runOnFunction(Function &F) override { 246 if (skipFunction(F)) 247 return false; 248 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 249 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 250 return optimizeDivRem(F, TTI, DT); 251 } 252 }; 253 } // namespace 254 255 char DivRemPairsLegacyPass::ID = 0; 256 INITIALIZE_PASS_BEGIN(DivRemPairsLegacyPass, "div-rem-pairs", 257 "Hoist/decompose integer division and remainder", false, 258 false) 259 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 260 INITIALIZE_PASS_END(DivRemPairsLegacyPass, "div-rem-pairs", 261 "Hoist/decompose integer division and remainder", false, 262 false) 263 FunctionPass *llvm::createDivRemPairsPass() { 264 return new DivRemPairsLegacyPass(); 265 } 266 267 PreservedAnalyses DivRemPairsPass::run(Function &F, 268 FunctionAnalysisManager &FAM) { 269 TargetTransformInfo &TTI = FAM.getResult<TargetIRAnalysis>(F); 270 DominatorTree &DT = FAM.getResult<DominatorTreeAnalysis>(F); 271 if (!optimizeDivRem(F, TTI, DT)) 272 return PreservedAnalyses::all(); 273 // TODO: This pass just hoists/replaces math ops - all analyses are preserved? 274 PreservedAnalyses PA; 275 PA.preserveSet<CFGAnalyses>(); 276 PA.preserve<GlobalsAA>(); 277 return PA; 278 } 279