1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass implements IR expansion for reduction intrinsics, allowing targets 10 // to enable the experimental intrinsics until just before codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandReductions.h" 15 #include "llvm/Analysis/TargetTransformInfo.h" 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/IRBuilder.h" 19 #include "llvm/IR/InstIterator.h" 20 #include "llvm/IR/IntrinsicInst.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/InitializePasses.h" 24 #include "llvm/Pass.h" 25 #include "llvm/Transforms/Utils/LoopUtils.h" 26 27 using namespace llvm; 28 29 namespace { 30 31 unsigned getOpcode(Intrinsic::ID ID) { 32 switch (ID) { 33 case Intrinsic::experimental_vector_reduce_v2_fadd: 34 return Instruction::FAdd; 35 case Intrinsic::experimental_vector_reduce_v2_fmul: 36 return Instruction::FMul; 37 case Intrinsic::experimental_vector_reduce_add: 38 return Instruction::Add; 39 case Intrinsic::experimental_vector_reduce_mul: 40 return Instruction::Mul; 41 case Intrinsic::experimental_vector_reduce_and: 42 return Instruction::And; 43 case Intrinsic::experimental_vector_reduce_or: 44 return Instruction::Or; 45 case Intrinsic::experimental_vector_reduce_xor: 46 return Instruction::Xor; 47 case Intrinsic::experimental_vector_reduce_smax: 48 case Intrinsic::experimental_vector_reduce_smin: 49 case Intrinsic::experimental_vector_reduce_umax: 50 case Intrinsic::experimental_vector_reduce_umin: 51 return Instruction::ICmp; 52 case Intrinsic::experimental_vector_reduce_fmax: 53 case Intrinsic::experimental_vector_reduce_fmin: 54 return Instruction::FCmp; 55 default: 56 llvm_unreachable("Unexpected ID"); 57 } 58 } 59 60 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { 61 switch (ID) { 62 case Intrinsic::experimental_vector_reduce_smax: 63 return RecurrenceDescriptor::MRK_SIntMax; 64 case Intrinsic::experimental_vector_reduce_smin: 65 return RecurrenceDescriptor::MRK_SIntMin; 66 case Intrinsic::experimental_vector_reduce_umax: 67 return RecurrenceDescriptor::MRK_UIntMax; 68 case Intrinsic::experimental_vector_reduce_umin: 69 return RecurrenceDescriptor::MRK_UIntMin; 70 case Intrinsic::experimental_vector_reduce_fmax: 71 return RecurrenceDescriptor::MRK_FloatMax; 72 case Intrinsic::experimental_vector_reduce_fmin: 73 return RecurrenceDescriptor::MRK_FloatMin; 74 default: 75 return RecurrenceDescriptor::MRK_Invalid; 76 } 77 } 78 79 bool expandReductions(Function &F, const TargetTransformInfo *TTI) { 80 bool Changed = false; 81 SmallVector<IntrinsicInst *, 4> Worklist; 82 for (auto &I : instructions(F)) { 83 if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 84 switch (II->getIntrinsicID()) { 85 default: break; 86 case Intrinsic::experimental_vector_reduce_v2_fadd: 87 case Intrinsic::experimental_vector_reduce_v2_fmul: 88 case Intrinsic::experimental_vector_reduce_add: 89 case Intrinsic::experimental_vector_reduce_mul: 90 case Intrinsic::experimental_vector_reduce_and: 91 case Intrinsic::experimental_vector_reduce_or: 92 case Intrinsic::experimental_vector_reduce_xor: 93 case Intrinsic::experimental_vector_reduce_smax: 94 case Intrinsic::experimental_vector_reduce_smin: 95 case Intrinsic::experimental_vector_reduce_umax: 96 case Intrinsic::experimental_vector_reduce_umin: 97 case Intrinsic::experimental_vector_reduce_fmax: 98 case Intrinsic::experimental_vector_reduce_fmin: 99 if (TTI->shouldExpandReduction(II)) 100 Worklist.push_back(II); 101 102 break; 103 } 104 } 105 } 106 107 for (auto *II : Worklist) { 108 FastMathFlags FMF = 109 isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; 110 Intrinsic::ID ID = II->getIntrinsicID(); 111 RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); 112 113 Value *Rdx = nullptr; 114 IRBuilder<> Builder(II); 115 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 116 Builder.setFastMathFlags(FMF); 117 switch (ID) { 118 default: llvm_unreachable("Unexpected intrinsic!"); 119 case Intrinsic::experimental_vector_reduce_v2_fadd: 120 case Intrinsic::experimental_vector_reduce_v2_fmul: { 121 // FMFs must be attached to the call, otherwise it's an ordered reduction 122 // and it can't be handled by generating a shuffle sequence. 123 Value *Acc = II->getArgOperand(0); 124 Value *Vec = II->getArgOperand(1); 125 if (!FMF.allowReassoc()) 126 Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); 127 else { 128 if (!isPowerOf2_32( 129 cast<FixedVectorType>(Vec->getType())->getNumElements())) 130 continue; 131 132 Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); 133 Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), 134 Acc, Rdx, "bin.rdx"); 135 } 136 break; 137 } 138 case Intrinsic::experimental_vector_reduce_add: 139 case Intrinsic::experimental_vector_reduce_mul: 140 case Intrinsic::experimental_vector_reduce_and: 141 case Intrinsic::experimental_vector_reduce_or: 142 case Intrinsic::experimental_vector_reduce_xor: 143 case Intrinsic::experimental_vector_reduce_smax: 144 case Intrinsic::experimental_vector_reduce_smin: 145 case Intrinsic::experimental_vector_reduce_umax: 146 case Intrinsic::experimental_vector_reduce_umin: 147 case Intrinsic::experimental_vector_reduce_fmax: 148 case Intrinsic::experimental_vector_reduce_fmin: { 149 Value *Vec = II->getArgOperand(0); 150 if (!isPowerOf2_32( 151 cast<FixedVectorType>(Vec->getType())->getNumElements())) 152 continue; 153 154 Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); 155 break; 156 } 157 } 158 II->replaceAllUsesWith(Rdx); 159 II->eraseFromParent(); 160 Changed = true; 161 } 162 return Changed; 163 } 164 165 class ExpandReductions : public FunctionPass { 166 public: 167 static char ID; 168 ExpandReductions() : FunctionPass(ID) { 169 initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); 170 } 171 172 bool runOnFunction(Function &F) override { 173 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 174 return expandReductions(F, TTI); 175 } 176 177 void getAnalysisUsage(AnalysisUsage &AU) const override { 178 AU.addRequired<TargetTransformInfoWrapperPass>(); 179 AU.setPreservesCFG(); 180 } 181 }; 182 } 183 184 char ExpandReductions::ID; 185 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", 186 "Expand reduction intrinsics", false, false) 187 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 188 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", 189 "Expand reduction intrinsics", false, false) 190 191 FunctionPass *llvm::createExpandReductionsPass() { 192 return new ExpandReductions(); 193 } 194 195 PreservedAnalyses ExpandReductionsPass::run(Function &F, 196 FunctionAnalysisManager &AM) { 197 const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 198 if (!expandReductions(F, &TTI)) 199 return PreservedAnalyses::all(); 200 PreservedAnalyses PA; 201 PA.preserveSet<CFGAnalyses>(); 202 return PA; 203 } 204