1 //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass implements IR expansion for reduction intrinsics, allowing targets 10 // to enable the experimental intrinsics until just before codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/CodeGen/ExpandReductions.h" 15 #include "llvm/Analysis/TargetTransformInfo.h" 16 #include "llvm/CodeGen/Passes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/IRBuilder.h" 19 #include "llvm/IR/InstIterator.h" 20 #include "llvm/IR/IntrinsicInst.h" 21 #include "llvm/IR/Intrinsics.h" 22 #include "llvm/IR/Module.h" 23 #include "llvm/Pass.h" 24 #include "llvm/Transforms/Utils/LoopUtils.h" 25 26 using namespace llvm; 27 28 namespace { 29 30 unsigned getOpcode(Intrinsic::ID ID) { 31 switch (ID) { 32 case Intrinsic::experimental_vector_reduce_v2_fadd: 33 return Instruction::FAdd; 34 case Intrinsic::experimental_vector_reduce_v2_fmul: 35 return Instruction::FMul; 36 case Intrinsic::experimental_vector_reduce_add: 37 return Instruction::Add; 38 case Intrinsic::experimental_vector_reduce_mul: 39 return Instruction::Mul; 40 case Intrinsic::experimental_vector_reduce_and: 41 return Instruction::And; 42 case Intrinsic::experimental_vector_reduce_or: 43 return Instruction::Or; 44 case Intrinsic::experimental_vector_reduce_xor: 45 return Instruction::Xor; 46 case Intrinsic::experimental_vector_reduce_smax: 47 case Intrinsic::experimental_vector_reduce_smin: 48 case Intrinsic::experimental_vector_reduce_umax: 49 case Intrinsic::experimental_vector_reduce_umin: 50 return Instruction::ICmp; 51 case Intrinsic::experimental_vector_reduce_fmax: 52 case Intrinsic::experimental_vector_reduce_fmin: 53 return Instruction::FCmp; 54 default: 55 llvm_unreachable("Unexpected ID"); 56 } 57 } 58 59 RecurrenceDescriptor::MinMaxRecurrenceKind getMRK(Intrinsic::ID ID) { 60 switch (ID) { 61 case Intrinsic::experimental_vector_reduce_smax: 62 return RecurrenceDescriptor::MRK_SIntMax; 63 case Intrinsic::experimental_vector_reduce_smin: 64 return RecurrenceDescriptor::MRK_SIntMin; 65 case Intrinsic::experimental_vector_reduce_umax: 66 return RecurrenceDescriptor::MRK_UIntMax; 67 case Intrinsic::experimental_vector_reduce_umin: 68 return RecurrenceDescriptor::MRK_UIntMin; 69 case Intrinsic::experimental_vector_reduce_fmax: 70 return RecurrenceDescriptor::MRK_FloatMax; 71 case Intrinsic::experimental_vector_reduce_fmin: 72 return RecurrenceDescriptor::MRK_FloatMin; 73 default: 74 return RecurrenceDescriptor::MRK_Invalid; 75 } 76 } 77 78 bool expandReductions(Function &F, const TargetTransformInfo *TTI) { 79 bool Changed = false; 80 SmallVector<IntrinsicInst *, 4> Worklist; 81 for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) 82 if (auto II = dyn_cast<IntrinsicInst>(&*I)) 83 Worklist.push_back(II); 84 85 for (auto *II : Worklist) { 86 if (!TTI->shouldExpandReduction(II)) 87 continue; 88 89 FastMathFlags FMF = 90 isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; 91 Intrinsic::ID ID = II->getIntrinsicID(); 92 RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID); 93 94 Value *Rdx = nullptr; 95 IRBuilder<> Builder(II); 96 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 97 Builder.setFastMathFlags(FMF); 98 switch (ID) { 99 case Intrinsic::experimental_vector_reduce_v2_fadd: 100 case Intrinsic::experimental_vector_reduce_v2_fmul: { 101 // FMFs must be attached to the call, otherwise it's an ordered reduction 102 // and it can't be handled by generating a shuffle sequence. 103 Value *Acc = II->getArgOperand(0); 104 Value *Vec = II->getArgOperand(1); 105 if (!FMF.allowReassoc()) 106 Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK); 107 else { 108 Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); 109 Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), 110 Acc, Rdx, "bin.rdx"); 111 } 112 } break; 113 case Intrinsic::experimental_vector_reduce_add: 114 case Intrinsic::experimental_vector_reduce_mul: 115 case Intrinsic::experimental_vector_reduce_and: 116 case Intrinsic::experimental_vector_reduce_or: 117 case Intrinsic::experimental_vector_reduce_xor: 118 case Intrinsic::experimental_vector_reduce_smax: 119 case Intrinsic::experimental_vector_reduce_smin: 120 case Intrinsic::experimental_vector_reduce_umax: 121 case Intrinsic::experimental_vector_reduce_umin: 122 case Intrinsic::experimental_vector_reduce_fmax: 123 case Intrinsic::experimental_vector_reduce_fmin: { 124 Value *Vec = II->getArgOperand(0); 125 Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK); 126 } break; 127 default: 128 continue; 129 } 130 II->replaceAllUsesWith(Rdx); 131 II->eraseFromParent(); 132 Changed = true; 133 } 134 return Changed; 135 } 136 137 class ExpandReductions : public FunctionPass { 138 public: 139 static char ID; 140 ExpandReductions() : FunctionPass(ID) { 141 initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); 142 } 143 144 bool runOnFunction(Function &F) override { 145 const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 146 return expandReductions(F, TTI); 147 } 148 149 void getAnalysisUsage(AnalysisUsage &AU) const override { 150 AU.addRequired<TargetTransformInfoWrapperPass>(); 151 AU.setPreservesCFG(); 152 } 153 }; 154 } 155 156 char ExpandReductions::ID; 157 INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", 158 "Expand reduction intrinsics", false, false) 159 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 160 INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", 161 "Expand reduction intrinsics", false, false) 162 163 FunctionPass *llvm::createExpandReductionsPass() { 164 return new ExpandReductions(); 165 } 166 167 PreservedAnalyses ExpandReductionsPass::run(Function &F, 168 FunctionAnalysisManager &AM) { 169 const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 170 if (!expandReductions(F, &TTI)) 171 return PreservedAnalyses::all(); 172 PreservedAnalyses PA; 173 PA.preserveSet<CFGAnalyses>(); 174 return PA; 175 } 176