xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric //===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements IR expansion for reduction intrinsics, allowing targets
10e8d8bef9SDimitry Andric // to enable the intrinsics until just before codegen.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/CodeGen/ExpandReductions.h"
150b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
170b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
180b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h"
190b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
200b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
21480093f4SDimitry Andric #include "llvm/InitializePasses.h"
220b57cec5SDimitry Andric #include "llvm/Pass.h"
230b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric namespace {
280b57cec5SDimitry Andric 
expandReductions(Function & F,const TargetTransformInfo * TTI)290b57cec5SDimitry Andric bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
300b57cec5SDimitry Andric   bool Changed = false;
310b57cec5SDimitry Andric   SmallVector<IntrinsicInst *, 4> Worklist;
32480093f4SDimitry Andric   for (auto &I : instructions(F)) {
33480093f4SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
34480093f4SDimitry Andric       switch (II->getIntrinsicID()) {
35480093f4SDimitry Andric       default: break;
36e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fadd:
37e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmul:
38e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_add:
39e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_mul:
40e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_and:
41e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_or:
42e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_xor:
43e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_smax:
44e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_smin:
45e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_umax:
46e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_umin:
47e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmax:
48e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmin:
49480093f4SDimitry Andric         if (TTI->shouldExpandReduction(II))
500b57cec5SDimitry Andric           Worklist.push_back(II);
510b57cec5SDimitry Andric 
52480093f4SDimitry Andric         break;
53480093f4SDimitry Andric       }
54480093f4SDimitry Andric     }
55480093f4SDimitry Andric   }
560b57cec5SDimitry Andric 
57480093f4SDimitry Andric   for (auto *II : Worklist) {
580b57cec5SDimitry Andric     FastMathFlags FMF =
590b57cec5SDimitry Andric         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
600b57cec5SDimitry Andric     Intrinsic::ID ID = II->getIntrinsicID();
61*0fca6ea1SDimitry Andric     RecurKind RK = getMinMaxReductionRecurKind(ID);
62*0fca6ea1SDimitry Andric     TargetTransformInfo::ReductionShuffle RS =
63*0fca6ea1SDimitry Andric         TTI->getPreferredExpandedReductionShuffle(II);
640b57cec5SDimitry Andric 
650b57cec5SDimitry Andric     Value *Rdx = nullptr;
660b57cec5SDimitry Andric     IRBuilder<> Builder(II);
670b57cec5SDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
680b57cec5SDimitry Andric     Builder.setFastMathFlags(FMF);
690b57cec5SDimitry Andric     switch (ID) {
70480093f4SDimitry Andric     default: llvm_unreachable("Unexpected intrinsic!");
71e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fadd:
72e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmul: {
730b57cec5SDimitry Andric       // FMFs must be attached to the call, otherwise it's an ordered reduction
740b57cec5SDimitry Andric       // and it can't be handled by generating a shuffle sequence.
750b57cec5SDimitry Andric       Value *Acc = II->getArgOperand(0);
760b57cec5SDimitry Andric       Value *Vec = II->getArgOperand(1);
77*0fca6ea1SDimitry Andric       unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
780b57cec5SDimitry Andric       if (!FMF.allowReassoc())
79*0fca6ea1SDimitry Andric         Rdx = getOrderedReduction(Builder, Acc, Vec, RdxOpcode, RK);
800b57cec5SDimitry Andric       else {
815ffd83dbSDimitry Andric         if (!isPowerOf2_32(
825ffd83dbSDimitry Andric                 cast<FixedVectorType>(Vec->getType())->getNumElements()))
83480093f4SDimitry Andric           continue;
84*0fca6ea1SDimitry Andric         Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
85*0fca6ea1SDimitry Andric         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)RdxOpcode, Acc, Rdx,
86*0fca6ea1SDimitry Andric                                   "bin.rdx");
870b57cec5SDimitry Andric       }
88480093f4SDimitry Andric       break;
89480093f4SDimitry Andric     }
9006c3fb27SDimitry Andric     case Intrinsic::vector_reduce_and:
9106c3fb27SDimitry Andric     case Intrinsic::vector_reduce_or: {
9206c3fb27SDimitry Andric       // Canonicalize logical or/and reductions:
9306c3fb27SDimitry Andric       // Or reduction for i1 is represented as:
9406c3fb27SDimitry Andric       // %val = bitcast <ReduxWidth x i1> to iReduxWidth
9506c3fb27SDimitry Andric       // %res = cmp ne iReduxWidth %val, 0
9606c3fb27SDimitry Andric       // And reduction for i1 is represented as:
9706c3fb27SDimitry Andric       // %val = bitcast <ReduxWidth x i1> to iReduxWidth
9806c3fb27SDimitry Andric       // %res = cmp eq iReduxWidth %val, 11111
9906c3fb27SDimitry Andric       Value *Vec = II->getArgOperand(0);
10006c3fb27SDimitry Andric       auto *FTy = cast<FixedVectorType>(Vec->getType());
10106c3fb27SDimitry Andric       unsigned NumElts = FTy->getNumElements();
10206c3fb27SDimitry Andric       if (!isPowerOf2_32(NumElts))
10306c3fb27SDimitry Andric         continue;
10406c3fb27SDimitry Andric 
10506c3fb27SDimitry Andric       if (FTy->getElementType() == Builder.getInt1Ty()) {
10606c3fb27SDimitry Andric         Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
10706c3fb27SDimitry Andric         if (ID == Intrinsic::vector_reduce_and) {
10806c3fb27SDimitry Andric           Rdx = Builder.CreateICmpEQ(
10906c3fb27SDimitry Andric               Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
11006c3fb27SDimitry Andric         } else {
11106c3fb27SDimitry Andric           assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
11206c3fb27SDimitry Andric           Rdx = Builder.CreateIsNotNull(Rdx);
11306c3fb27SDimitry Andric         }
11406c3fb27SDimitry Andric         break;
11506c3fb27SDimitry Andric       }
116*0fca6ea1SDimitry Andric       unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
117*0fca6ea1SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
11806c3fb27SDimitry Andric       break;
11906c3fb27SDimitry Andric     }
120e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_add:
121e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_mul:
122e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_xor:
123e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_smax:
124e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_smin:
125e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_umax:
126e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_umin: {
1270b57cec5SDimitry Andric       Value *Vec = II->getArgOperand(0);
1285ffd83dbSDimitry Andric       if (!isPowerOf2_32(
1295ffd83dbSDimitry Andric               cast<FixedVectorType>(Vec->getType())->getNumElements()))
1300b57cec5SDimitry Andric         continue;
131*0fca6ea1SDimitry Andric       unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
132*0fca6ea1SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
133e8d8bef9SDimitry Andric       break;
134e8d8bef9SDimitry Andric     }
135e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmax:
136e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmin: {
137fe6060f1SDimitry Andric       // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
138fe6060f1SDimitry Andric       // semantics of the reduction.
139e8d8bef9SDimitry Andric       Value *Vec = II->getArgOperand(0);
140e8d8bef9SDimitry Andric       if (!isPowerOf2_32(
141e8d8bef9SDimitry Andric               cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
142fe6060f1SDimitry Andric           !FMF.noNaNs())
143e8d8bef9SDimitry Andric         continue;
144*0fca6ea1SDimitry Andric       unsigned RdxOpcode = getArithmeticReductionInstruction(ID);
145*0fca6ea1SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, RdxOpcode, RS, RK);
146480093f4SDimitry Andric       break;
147480093f4SDimitry Andric     }
1480b57cec5SDimitry Andric     }
1490b57cec5SDimitry Andric     II->replaceAllUsesWith(Rdx);
1500b57cec5SDimitry Andric     II->eraseFromParent();
1510b57cec5SDimitry Andric     Changed = true;
1520b57cec5SDimitry Andric   }
1530b57cec5SDimitry Andric   return Changed;
1540b57cec5SDimitry Andric }
1550b57cec5SDimitry Andric 
1560b57cec5SDimitry Andric class ExpandReductions : public FunctionPass {
1570b57cec5SDimitry Andric public:
1580b57cec5SDimitry Andric   static char ID;
ExpandReductions()1590b57cec5SDimitry Andric   ExpandReductions() : FunctionPass(ID) {
1600b57cec5SDimitry Andric     initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
1610b57cec5SDimitry Andric   }
1620b57cec5SDimitry Andric 
runOnFunction(Function & F)1630b57cec5SDimitry Andric   bool runOnFunction(Function &F) override {
1640b57cec5SDimitry Andric     const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1650b57cec5SDimitry Andric     return expandReductions(F, TTI);
1660b57cec5SDimitry Andric   }
1670b57cec5SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const1680b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1690b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
1700b57cec5SDimitry Andric     AU.setPreservesCFG();
1710b57cec5SDimitry Andric   }
1720b57cec5SDimitry Andric };
1730b57cec5SDimitry Andric }
1740b57cec5SDimitry Andric 
1750b57cec5SDimitry Andric char ExpandReductions::ID;
1760b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
1770b57cec5SDimitry Andric                       "Expand reduction intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)1780b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
1790b57cec5SDimitry Andric INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
1800b57cec5SDimitry Andric                     "Expand reduction intrinsics", false, false)
1810b57cec5SDimitry Andric 
1820b57cec5SDimitry Andric FunctionPass *llvm::createExpandReductionsPass() {
1830b57cec5SDimitry Andric   return new ExpandReductions();
1840b57cec5SDimitry Andric }
1850b57cec5SDimitry Andric 
run(Function & F,FunctionAnalysisManager & AM)1860b57cec5SDimitry Andric PreservedAnalyses ExpandReductionsPass::run(Function &F,
1870b57cec5SDimitry Andric                                             FunctionAnalysisManager &AM) {
1880b57cec5SDimitry Andric   const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1890b57cec5SDimitry Andric   if (!expandReductions(F, &TTI))
1900b57cec5SDimitry Andric     return PreservedAnalyses::all();
1910b57cec5SDimitry Andric   PreservedAnalyses PA;
1920b57cec5SDimitry Andric   PA.preserveSet<CFGAnalyses>();
1930b57cec5SDimitry Andric   return PA;
1940b57cec5SDimitry Andric }
195