xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp (revision e8d8bef961a50d4dc22501cde4fb9fb0be1b2532)
10b57cec5SDimitry Andric //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements IR expansion for reduction intrinsics, allowing targets
10*e8d8bef9SDimitry Andric // to enable the intrinsics until just before codegen.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/CodeGen/ExpandReductions.h"
150b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
170b57cec5SDimitry Andric #include "llvm/IR/Function.h"
180b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
190b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h"
200b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
210b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
220b57cec5SDimitry Andric #include "llvm/IR/Module.h"
23480093f4SDimitry Andric #include "llvm/InitializePasses.h"
240b57cec5SDimitry Andric #include "llvm/Pass.h"
250b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric using namespace llvm;
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric namespace {
300b57cec5SDimitry Andric 
310b57cec5SDimitry Andric unsigned getOpcode(Intrinsic::ID ID) {
320b57cec5SDimitry Andric   switch (ID) {
33*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fadd:
340b57cec5SDimitry Andric     return Instruction::FAdd;
35*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmul:
360b57cec5SDimitry Andric     return Instruction::FMul;
37*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_add:
380b57cec5SDimitry Andric     return Instruction::Add;
39*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_mul:
400b57cec5SDimitry Andric     return Instruction::Mul;
41*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_and:
420b57cec5SDimitry Andric     return Instruction::And;
43*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_or:
440b57cec5SDimitry Andric     return Instruction::Or;
45*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_xor:
460b57cec5SDimitry Andric     return Instruction::Xor;
47*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smax:
48*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smin:
49*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umax:
50*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umin:
510b57cec5SDimitry Andric     return Instruction::ICmp;
52*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmax:
53*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmin:
540b57cec5SDimitry Andric     return Instruction::FCmp;
550b57cec5SDimitry Andric   default:
560b57cec5SDimitry Andric     llvm_unreachable("Unexpected ID");
570b57cec5SDimitry Andric   }
580b57cec5SDimitry Andric }
590b57cec5SDimitry Andric 
60*e8d8bef9SDimitry Andric RecurKind getRK(Intrinsic::ID ID) {
610b57cec5SDimitry Andric   switch (ID) {
62*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smax:
63*e8d8bef9SDimitry Andric     return RecurKind::SMax;
64*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smin:
65*e8d8bef9SDimitry Andric     return RecurKind::SMin;
66*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umax:
67*e8d8bef9SDimitry Andric     return RecurKind::UMax;
68*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umin:
69*e8d8bef9SDimitry Andric     return RecurKind::UMin;
70*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmax:
71*e8d8bef9SDimitry Andric     return RecurKind::FMax;
72*e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmin:
73*e8d8bef9SDimitry Andric     return RecurKind::FMin;
740b57cec5SDimitry Andric   default:
75*e8d8bef9SDimitry Andric     return RecurKind::None;
760b57cec5SDimitry Andric   }
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
800b57cec5SDimitry Andric   bool Changed = false;
810b57cec5SDimitry Andric   SmallVector<IntrinsicInst *, 4> Worklist;
82480093f4SDimitry Andric   for (auto &I : instructions(F)) {
83480093f4SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
84480093f4SDimitry Andric       switch (II->getIntrinsicID()) {
85480093f4SDimitry Andric       default: break;
86*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fadd:
87*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmul:
88*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_add:
89*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_mul:
90*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_and:
91*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_or:
92*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_xor:
93*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_smax:
94*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_smin:
95*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_umax:
96*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_umin:
97*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmax:
98*e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmin:
99480093f4SDimitry Andric         if (TTI->shouldExpandReduction(II))
1000b57cec5SDimitry Andric           Worklist.push_back(II);
1010b57cec5SDimitry Andric 
102480093f4SDimitry Andric         break;
103480093f4SDimitry Andric       }
104480093f4SDimitry Andric     }
105480093f4SDimitry Andric   }
1060b57cec5SDimitry Andric 
107480093f4SDimitry Andric   for (auto *II : Worklist) {
1080b57cec5SDimitry Andric     FastMathFlags FMF =
1090b57cec5SDimitry Andric         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
1100b57cec5SDimitry Andric     Intrinsic::ID ID = II->getIntrinsicID();
111*e8d8bef9SDimitry Andric     RecurKind RK = getRK(ID);
1120b57cec5SDimitry Andric 
1130b57cec5SDimitry Andric     Value *Rdx = nullptr;
1140b57cec5SDimitry Andric     IRBuilder<> Builder(II);
1150b57cec5SDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1160b57cec5SDimitry Andric     Builder.setFastMathFlags(FMF);
1170b57cec5SDimitry Andric     switch (ID) {
118480093f4SDimitry Andric     default: llvm_unreachable("Unexpected intrinsic!");
119*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fadd:
120*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmul: {
1210b57cec5SDimitry Andric       // FMFs must be attached to the call, otherwise it's an ordered reduction
1220b57cec5SDimitry Andric       // and it can't be handled by generating a shuffle sequence.
1230b57cec5SDimitry Andric       Value *Acc = II->getArgOperand(0);
1240b57cec5SDimitry Andric       Value *Vec = II->getArgOperand(1);
1250b57cec5SDimitry Andric       if (!FMF.allowReassoc())
126*e8d8bef9SDimitry Andric         Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
1270b57cec5SDimitry Andric       else {
1285ffd83dbSDimitry Andric         if (!isPowerOf2_32(
1295ffd83dbSDimitry Andric                 cast<FixedVectorType>(Vec->getType())->getNumElements()))
130480093f4SDimitry Andric           continue;
131480093f4SDimitry Andric 
132*e8d8bef9SDimitry Andric         Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
1330b57cec5SDimitry Andric         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
1340b57cec5SDimitry Andric                                   Acc, Rdx, "bin.rdx");
1350b57cec5SDimitry Andric       }
136480093f4SDimitry Andric       break;
137480093f4SDimitry Andric     }
138*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_add:
139*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_mul:
140*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_and:
141*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_or:
142*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_xor:
143*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_smax:
144*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_smin:
145*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_umax:
146*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_umin: {
1470b57cec5SDimitry Andric       Value *Vec = II->getArgOperand(0);
1485ffd83dbSDimitry Andric       if (!isPowerOf2_32(
1495ffd83dbSDimitry Andric               cast<FixedVectorType>(Vec->getType())->getNumElements()))
1500b57cec5SDimitry Andric         continue;
151480093f4SDimitry Andric 
152*e8d8bef9SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
153*e8d8bef9SDimitry Andric       break;
154*e8d8bef9SDimitry Andric     }
155*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmax:
156*e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmin: {
157*e8d8bef9SDimitry Andric       // FIXME: We only expand 'fast' reductions here because the underlying
158*e8d8bef9SDimitry Andric       //        code in createMinMaxOp() assumes that comparisons use 'fast'
159*e8d8bef9SDimitry Andric       //        semantics.
160*e8d8bef9SDimitry Andric       Value *Vec = II->getArgOperand(0);
161*e8d8bef9SDimitry Andric       if (!isPowerOf2_32(
162*e8d8bef9SDimitry Andric               cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
163*e8d8bef9SDimitry Andric           !FMF.isFast())
164*e8d8bef9SDimitry Andric         continue;
165*e8d8bef9SDimitry Andric 
166*e8d8bef9SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
167480093f4SDimitry Andric       break;
168480093f4SDimitry Andric     }
1690b57cec5SDimitry Andric     }
1700b57cec5SDimitry Andric     II->replaceAllUsesWith(Rdx);
1710b57cec5SDimitry Andric     II->eraseFromParent();
1720b57cec5SDimitry Andric     Changed = true;
1730b57cec5SDimitry Andric   }
1740b57cec5SDimitry Andric   return Changed;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric class ExpandReductions : public FunctionPass {
1780b57cec5SDimitry Andric public:
1790b57cec5SDimitry Andric   static char ID;
1800b57cec5SDimitry Andric   ExpandReductions() : FunctionPass(ID) {
1810b57cec5SDimitry Andric     initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric 
1840b57cec5SDimitry Andric   bool runOnFunction(Function &F) override {
1850b57cec5SDimitry Andric     const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
1860b57cec5SDimitry Andric     return expandReductions(F, TTI);
1870b57cec5SDimitry Andric   }
1880b57cec5SDimitry Andric 
1890b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1900b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
1910b57cec5SDimitry Andric     AU.setPreservesCFG();
1920b57cec5SDimitry Andric   }
1930b57cec5SDimitry Andric };
1940b57cec5SDimitry Andric }
1950b57cec5SDimitry Andric 
1960b57cec5SDimitry Andric char ExpandReductions::ID;
1970b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
1980b57cec5SDimitry Andric                       "Expand reduction intrinsics", false, false)
1990b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2000b57cec5SDimitry Andric INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
2010b57cec5SDimitry Andric                     "Expand reduction intrinsics", false, false)
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric FunctionPass *llvm::createExpandReductionsPass() {
2040b57cec5SDimitry Andric   return new ExpandReductions();
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric PreservedAnalyses ExpandReductionsPass::run(Function &F,
2080b57cec5SDimitry Andric                                             FunctionAnalysisManager &AM) {
2090b57cec5SDimitry Andric   const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
2100b57cec5SDimitry Andric   if (!expandReductions(F, &TTI))
2110b57cec5SDimitry Andric     return PreservedAnalyses::all();
2120b57cec5SDimitry Andric   PreservedAnalyses PA;
2130b57cec5SDimitry Andric   PA.preserveSet<CFGAnalyses>();
2140b57cec5SDimitry Andric   return PA;
2150b57cec5SDimitry Andric }
216