xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/ExpandReductions.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1*06c3fb27SDimitry Andric //===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This pass implements IR expansion for reduction intrinsics, allowing targets
10e8d8bef9SDimitry Andric // to enable the intrinsics until just before codegen.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
130b57cec5SDimitry Andric 
140b57cec5SDimitry Andric #include "llvm/CodeGen/ExpandReductions.h"
150b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
160b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h"
170b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
180b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h"
190b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
200b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h"
21480093f4SDimitry Andric #include "llvm/InitializePasses.h"
220b57cec5SDimitry Andric #include "llvm/Pass.h"
230b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
240b57cec5SDimitry Andric 
250b57cec5SDimitry Andric using namespace llvm;
260b57cec5SDimitry Andric 
270b57cec5SDimitry Andric namespace {
280b57cec5SDimitry Andric 
290b57cec5SDimitry Andric unsigned getOpcode(Intrinsic::ID ID) {
300b57cec5SDimitry Andric   switch (ID) {
31e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fadd:
320b57cec5SDimitry Andric     return Instruction::FAdd;
33e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmul:
340b57cec5SDimitry Andric     return Instruction::FMul;
35e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_add:
360b57cec5SDimitry Andric     return Instruction::Add;
37e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_mul:
380b57cec5SDimitry Andric     return Instruction::Mul;
39e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_and:
400b57cec5SDimitry Andric     return Instruction::And;
41e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_or:
420b57cec5SDimitry Andric     return Instruction::Or;
43e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_xor:
440b57cec5SDimitry Andric     return Instruction::Xor;
45e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smax:
46e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smin:
47e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umax:
48e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umin:
490b57cec5SDimitry Andric     return Instruction::ICmp;
50e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmax:
51e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmin:
520b57cec5SDimitry Andric     return Instruction::FCmp;
530b57cec5SDimitry Andric   default:
540b57cec5SDimitry Andric     llvm_unreachable("Unexpected ID");
550b57cec5SDimitry Andric   }
560b57cec5SDimitry Andric }
570b57cec5SDimitry Andric 
58e8d8bef9SDimitry Andric RecurKind getRK(Intrinsic::ID ID) {
590b57cec5SDimitry Andric   switch (ID) {
60e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smax:
61e8d8bef9SDimitry Andric     return RecurKind::SMax;
62e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_smin:
63e8d8bef9SDimitry Andric     return RecurKind::SMin;
64e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umax:
65e8d8bef9SDimitry Andric     return RecurKind::UMax;
66e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_umin:
67e8d8bef9SDimitry Andric     return RecurKind::UMin;
68e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmax:
69e8d8bef9SDimitry Andric     return RecurKind::FMax;
70e8d8bef9SDimitry Andric   case Intrinsic::vector_reduce_fmin:
71e8d8bef9SDimitry Andric     return RecurKind::FMin;
720b57cec5SDimitry Andric   default:
73e8d8bef9SDimitry Andric     return RecurKind::None;
740b57cec5SDimitry Andric   }
750b57cec5SDimitry Andric }
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
780b57cec5SDimitry Andric   bool Changed = false;
790b57cec5SDimitry Andric   SmallVector<IntrinsicInst *, 4> Worklist;
80480093f4SDimitry Andric   for (auto &I : instructions(F)) {
81480093f4SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
82480093f4SDimitry Andric       switch (II->getIntrinsicID()) {
83480093f4SDimitry Andric       default: break;
84e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fadd:
85e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmul:
86e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_add:
87e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_mul:
88e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_and:
89e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_or:
90e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_xor:
91e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_smax:
92e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_smin:
93e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_umax:
94e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_umin:
95e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmax:
96e8d8bef9SDimitry Andric       case Intrinsic::vector_reduce_fmin:
97480093f4SDimitry Andric         if (TTI->shouldExpandReduction(II))
980b57cec5SDimitry Andric           Worklist.push_back(II);
990b57cec5SDimitry Andric 
100480093f4SDimitry Andric         break;
101480093f4SDimitry Andric       }
102480093f4SDimitry Andric     }
103480093f4SDimitry Andric   }
1040b57cec5SDimitry Andric 
105480093f4SDimitry Andric   for (auto *II : Worklist) {
1060b57cec5SDimitry Andric     FastMathFlags FMF =
1070b57cec5SDimitry Andric         isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
1080b57cec5SDimitry Andric     Intrinsic::ID ID = II->getIntrinsicID();
109e8d8bef9SDimitry Andric     RecurKind RK = getRK(ID);
1100b57cec5SDimitry Andric 
1110b57cec5SDimitry Andric     Value *Rdx = nullptr;
1120b57cec5SDimitry Andric     IRBuilder<> Builder(II);
1130b57cec5SDimitry Andric     IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1140b57cec5SDimitry Andric     Builder.setFastMathFlags(FMF);
1150b57cec5SDimitry Andric     switch (ID) {
116480093f4SDimitry Andric     default: llvm_unreachable("Unexpected intrinsic!");
117e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fadd:
118e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmul: {
1190b57cec5SDimitry Andric       // FMFs must be attached to the call, otherwise it's an ordered reduction
1200b57cec5SDimitry Andric       // and it can't be handled by generating a shuffle sequence.
1210b57cec5SDimitry Andric       Value *Acc = II->getArgOperand(0);
1220b57cec5SDimitry Andric       Value *Vec = II->getArgOperand(1);
1230b57cec5SDimitry Andric       if (!FMF.allowReassoc())
124e8d8bef9SDimitry Andric         Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK);
1250b57cec5SDimitry Andric       else {
1265ffd83dbSDimitry Andric         if (!isPowerOf2_32(
1275ffd83dbSDimitry Andric                 cast<FixedVectorType>(Vec->getType())->getNumElements()))
128480093f4SDimitry Andric           continue;
129480093f4SDimitry Andric 
130e8d8bef9SDimitry Andric         Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
1310b57cec5SDimitry Andric         Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
1320b57cec5SDimitry Andric                                   Acc, Rdx, "bin.rdx");
1330b57cec5SDimitry Andric       }
134480093f4SDimitry Andric       break;
135480093f4SDimitry Andric     }
136*06c3fb27SDimitry Andric     case Intrinsic::vector_reduce_and:
137*06c3fb27SDimitry Andric     case Intrinsic::vector_reduce_or: {
138*06c3fb27SDimitry Andric       // Canonicalize logical or/and reductions:
139*06c3fb27SDimitry Andric       // Or reduction for i1 is represented as:
140*06c3fb27SDimitry Andric       // %val = bitcast <ReduxWidth x i1> to iReduxWidth
141*06c3fb27SDimitry Andric       // %res = cmp ne iReduxWidth %val, 0
142*06c3fb27SDimitry Andric       // And reduction for i1 is represented as:
143*06c3fb27SDimitry Andric       // %val = bitcast <ReduxWidth x i1> to iReduxWidth
144*06c3fb27SDimitry Andric       // %res = cmp eq iReduxWidth %val, 11111
145*06c3fb27SDimitry Andric       Value *Vec = II->getArgOperand(0);
146*06c3fb27SDimitry Andric       auto *FTy = cast<FixedVectorType>(Vec->getType());
147*06c3fb27SDimitry Andric       unsigned NumElts = FTy->getNumElements();
148*06c3fb27SDimitry Andric       if (!isPowerOf2_32(NumElts))
149*06c3fb27SDimitry Andric         continue;
150*06c3fb27SDimitry Andric 
151*06c3fb27SDimitry Andric       if (FTy->getElementType() == Builder.getInt1Ty()) {
152*06c3fb27SDimitry Andric         Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts));
153*06c3fb27SDimitry Andric         if (ID == Intrinsic::vector_reduce_and) {
154*06c3fb27SDimitry Andric           Rdx = Builder.CreateICmpEQ(
155*06c3fb27SDimitry Andric               Rdx, ConstantInt::getAllOnesValue(Rdx->getType()));
156*06c3fb27SDimitry Andric         } else {
157*06c3fb27SDimitry Andric           assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction.");
158*06c3fb27SDimitry Andric           Rdx = Builder.CreateIsNotNull(Rdx);
159*06c3fb27SDimitry Andric         }
160*06c3fb27SDimitry Andric         break;
161*06c3fb27SDimitry Andric       }
162*06c3fb27SDimitry Andric 
163*06c3fb27SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
164*06c3fb27SDimitry Andric       break;
165*06c3fb27SDimitry Andric     }
166e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_add:
167e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_mul:
168e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_xor:
169e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_smax:
170e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_smin:
171e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_umax:
172e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_umin: {
1730b57cec5SDimitry Andric       Value *Vec = II->getArgOperand(0);
1745ffd83dbSDimitry Andric       if (!isPowerOf2_32(
1755ffd83dbSDimitry Andric               cast<FixedVectorType>(Vec->getType())->getNumElements()))
1760b57cec5SDimitry Andric         continue;
177480093f4SDimitry Andric 
178e8d8bef9SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
179e8d8bef9SDimitry Andric       break;
180e8d8bef9SDimitry Andric     }
181e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmax:
182e8d8bef9SDimitry Andric     case Intrinsic::vector_reduce_fmin: {
183fe6060f1SDimitry Andric       // We require "nnan" to use a shuffle reduction; "nsz" is implied by the
184fe6060f1SDimitry Andric       // semantics of the reduction.
185e8d8bef9SDimitry Andric       Value *Vec = II->getArgOperand(0);
186e8d8bef9SDimitry Andric       if (!isPowerOf2_32(
187e8d8bef9SDimitry Andric               cast<FixedVectorType>(Vec->getType())->getNumElements()) ||
188fe6060f1SDimitry Andric           !FMF.noNaNs())
189e8d8bef9SDimitry Andric         continue;
190e8d8bef9SDimitry Andric 
191e8d8bef9SDimitry Andric       Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK);
192480093f4SDimitry Andric       break;
193480093f4SDimitry Andric     }
1940b57cec5SDimitry Andric     }
1950b57cec5SDimitry Andric     II->replaceAllUsesWith(Rdx);
1960b57cec5SDimitry Andric     II->eraseFromParent();
1970b57cec5SDimitry Andric     Changed = true;
1980b57cec5SDimitry Andric   }
1990b57cec5SDimitry Andric   return Changed;
2000b57cec5SDimitry Andric }
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric class ExpandReductions : public FunctionPass {
2030b57cec5SDimitry Andric public:
2040b57cec5SDimitry Andric   static char ID;
2050b57cec5SDimitry Andric   ExpandReductions() : FunctionPass(ID) {
2060b57cec5SDimitry Andric     initializeExpandReductionsPass(*PassRegistry::getPassRegistry());
2070b57cec5SDimitry Andric   }
2080b57cec5SDimitry Andric 
2090b57cec5SDimitry Andric   bool runOnFunction(Function &F) override {
2100b57cec5SDimitry Andric     const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2110b57cec5SDimitry Andric     return expandReductions(F, TTI);
2120b57cec5SDimitry Andric   }
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
2150b57cec5SDimitry Andric     AU.addRequired<TargetTransformInfoWrapperPass>();
2160b57cec5SDimitry Andric     AU.setPreservesCFG();
2170b57cec5SDimitry Andric   }
2180b57cec5SDimitry Andric };
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric 
2210b57cec5SDimitry Andric char ExpandReductions::ID;
2220b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions",
2230b57cec5SDimitry Andric                       "Expand reduction intrinsics", false, false)
2240b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
2250b57cec5SDimitry Andric INITIALIZE_PASS_END(ExpandReductions, "expand-reductions",
2260b57cec5SDimitry Andric                     "Expand reduction intrinsics", false, false)
2270b57cec5SDimitry Andric 
2280b57cec5SDimitry Andric FunctionPass *llvm::createExpandReductionsPass() {
2290b57cec5SDimitry Andric   return new ExpandReductions();
2300b57cec5SDimitry Andric }
2310b57cec5SDimitry Andric 
2320b57cec5SDimitry Andric PreservedAnalyses ExpandReductionsPass::run(Function &F,
2330b57cec5SDimitry Andric                                             FunctionAnalysisManager &AM) {
2340b57cec5SDimitry Andric   const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
2350b57cec5SDimitry Andric   if (!expandReductions(F, &TTI))
2360b57cec5SDimitry Andric     return PreservedAnalyses::all();
2370b57cec5SDimitry Andric   PreservedAnalyses PA;
2380b57cec5SDimitry Andric   PA.preserveSet<CFGAnalyses>();
2390b57cec5SDimitry Andric   return PA;
2400b57cec5SDimitry Andric }
241