1*06c3fb27SDimitry Andric //===- ExpandReductions.cpp - Expand reduction intrinsics -----------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass implements IR expansion for reduction intrinsics, allowing targets 10e8d8bef9SDimitry Andric // to enable the intrinsics until just before codegen. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/CodeGen/ExpandReductions.h" 150b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h" 170b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 180b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 190b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 200b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 21480093f4SDimitry Andric #include "llvm/InitializePasses.h" 220b57cec5SDimitry Andric #include "llvm/Pass.h" 230b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 240b57cec5SDimitry Andric 250b57cec5SDimitry Andric using namespace llvm; 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric namespace { 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric unsigned getOpcode(Intrinsic::ID ID) { 300b57cec5SDimitry Andric switch (ID) { 31e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 320b57cec5SDimitry Andric return Instruction::FAdd; 33e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: 340b57cec5SDimitry Andric return Instruction::FMul; 35e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 360b57cec5SDimitry Andric return Instruction::Add; 37e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 380b57cec5SDimitry Andric return Instruction::Mul; 39e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_and: 400b57cec5SDimitry Andric return Instruction::And; 41e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_or: 420b57cec5SDimitry Andric return Instruction::Or; 43e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 440b57cec5SDimitry Andric return Instruction::Xor; 45e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 46e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 47e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 48e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 490b57cec5SDimitry Andric return Instruction::ICmp; 50e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 51e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 520b57cec5SDimitry Andric return Instruction::FCmp; 530b57cec5SDimitry Andric default: 540b57cec5SDimitry Andric llvm_unreachable("Unexpected ID"); 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric 58e8d8bef9SDimitry Andric RecurKind getRK(Intrinsic::ID ID) { 590b57cec5SDimitry Andric switch (ID) { 60e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 61e8d8bef9SDimitry Andric return RecurKind::SMax; 62e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 63e8d8bef9SDimitry Andric return RecurKind::SMin; 64e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 65e8d8bef9SDimitry Andric return RecurKind::UMax; 66e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 67e8d8bef9SDimitry Andric return RecurKind::UMin; 68e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 69e8d8bef9SDimitry Andric return RecurKind::FMax; 70e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 71e8d8bef9SDimitry Andric return RecurKind::FMin; 720b57cec5SDimitry Andric default: 73e8d8bef9SDimitry Andric return RecurKind::None; 740b57cec5SDimitry Andric } 750b57cec5SDimitry Andric } 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric bool expandReductions(Function &F, const TargetTransformInfo *TTI) { 780b57cec5SDimitry Andric bool Changed = false; 790b57cec5SDimitry Andric SmallVector<IntrinsicInst *, 4> Worklist; 80480093f4SDimitry Andric for (auto &I : instructions(F)) { 81480093f4SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 82480093f4SDimitry Andric switch (II->getIntrinsicID()) { 83480093f4SDimitry Andric default: break; 84e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 85e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: 86e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 87e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 88e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_and: 89e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_or: 90e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 91e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 92e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 93e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 94e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 95e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 96e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 97480093f4SDimitry Andric if (TTI->shouldExpandReduction(II)) 980b57cec5SDimitry Andric Worklist.push_back(II); 990b57cec5SDimitry Andric 100480093f4SDimitry Andric break; 101480093f4SDimitry Andric } 102480093f4SDimitry Andric } 103480093f4SDimitry Andric } 1040b57cec5SDimitry Andric 105480093f4SDimitry Andric for (auto *II : Worklist) { 1060b57cec5SDimitry Andric FastMathFlags FMF = 1070b57cec5SDimitry Andric isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; 1080b57cec5SDimitry Andric Intrinsic::ID ID = II->getIntrinsicID(); 109e8d8bef9SDimitry Andric RecurKind RK = getRK(ID); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric Value *Rdx = nullptr; 1120b57cec5SDimitry Andric IRBuilder<> Builder(II); 1130b57cec5SDimitry Andric IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 1140b57cec5SDimitry Andric Builder.setFastMathFlags(FMF); 1150b57cec5SDimitry Andric switch (ID) { 116480093f4SDimitry Andric default: llvm_unreachable("Unexpected intrinsic!"); 117e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 118e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: { 1190b57cec5SDimitry Andric // FMFs must be attached to the call, otherwise it's an ordered reduction 1200b57cec5SDimitry Andric // and it can't be handled by generating a shuffle sequence. 1210b57cec5SDimitry Andric Value *Acc = II->getArgOperand(0); 1220b57cec5SDimitry Andric Value *Vec = II->getArgOperand(1); 1230b57cec5SDimitry Andric if (!FMF.allowReassoc()) 124e8d8bef9SDimitry Andric Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK); 1250b57cec5SDimitry Andric else { 1265ffd83dbSDimitry Andric if (!isPowerOf2_32( 1275ffd83dbSDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements())) 128480093f4SDimitry Andric continue; 129480093f4SDimitry Andric 130e8d8bef9SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 1310b57cec5SDimitry Andric Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), 1320b57cec5SDimitry Andric Acc, Rdx, "bin.rdx"); 1330b57cec5SDimitry Andric } 134480093f4SDimitry Andric break; 135480093f4SDimitry Andric } 136*06c3fb27SDimitry Andric case Intrinsic::vector_reduce_and: 137*06c3fb27SDimitry Andric case Intrinsic::vector_reduce_or: { 138*06c3fb27SDimitry Andric // Canonicalize logical or/and reductions: 139*06c3fb27SDimitry Andric // Or reduction for i1 is represented as: 140*06c3fb27SDimitry Andric // %val = bitcast <ReduxWidth x i1> to iReduxWidth 141*06c3fb27SDimitry Andric // %res = cmp ne iReduxWidth %val, 0 142*06c3fb27SDimitry Andric // And reduction for i1 is represented as: 143*06c3fb27SDimitry Andric // %val = bitcast <ReduxWidth x i1> to iReduxWidth 144*06c3fb27SDimitry Andric // %res = cmp eq iReduxWidth %val, 11111 145*06c3fb27SDimitry Andric Value *Vec = II->getArgOperand(0); 146*06c3fb27SDimitry Andric auto *FTy = cast<FixedVectorType>(Vec->getType()); 147*06c3fb27SDimitry Andric unsigned NumElts = FTy->getNumElements(); 148*06c3fb27SDimitry Andric if (!isPowerOf2_32(NumElts)) 149*06c3fb27SDimitry Andric continue; 150*06c3fb27SDimitry Andric 151*06c3fb27SDimitry Andric if (FTy->getElementType() == Builder.getInt1Ty()) { 152*06c3fb27SDimitry Andric Rdx = Builder.CreateBitCast(Vec, Builder.getIntNTy(NumElts)); 153*06c3fb27SDimitry Andric if (ID == Intrinsic::vector_reduce_and) { 154*06c3fb27SDimitry Andric Rdx = Builder.CreateICmpEQ( 155*06c3fb27SDimitry Andric Rdx, ConstantInt::getAllOnesValue(Rdx->getType())); 156*06c3fb27SDimitry Andric } else { 157*06c3fb27SDimitry Andric assert(ID == Intrinsic::vector_reduce_or && "Expected or reduction."); 158*06c3fb27SDimitry Andric Rdx = Builder.CreateIsNotNull(Rdx); 159*06c3fb27SDimitry Andric } 160*06c3fb27SDimitry Andric break; 161*06c3fb27SDimitry Andric } 162*06c3fb27SDimitry Andric 163*06c3fb27SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 164*06c3fb27SDimitry Andric break; 165*06c3fb27SDimitry Andric } 166e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 167e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 168e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 169e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 170e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 171e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 172e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: { 1730b57cec5SDimitry Andric Value *Vec = II->getArgOperand(0); 1745ffd83dbSDimitry Andric if (!isPowerOf2_32( 1755ffd83dbSDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements())) 1760b57cec5SDimitry Andric continue; 177480093f4SDimitry Andric 178e8d8bef9SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 179e8d8bef9SDimitry Andric break; 180e8d8bef9SDimitry Andric } 181e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 182e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: { 183fe6060f1SDimitry Andric // We require "nnan" to use a shuffle reduction; "nsz" is implied by the 184fe6060f1SDimitry Andric // semantics of the reduction. 185e8d8bef9SDimitry Andric Value *Vec = II->getArgOperand(0); 186e8d8bef9SDimitry Andric if (!isPowerOf2_32( 187e8d8bef9SDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements()) || 188fe6060f1SDimitry Andric !FMF.noNaNs()) 189e8d8bef9SDimitry Andric continue; 190e8d8bef9SDimitry Andric 191e8d8bef9SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 192480093f4SDimitry Andric break; 193480093f4SDimitry Andric } 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric II->replaceAllUsesWith(Rdx); 1960b57cec5SDimitry Andric II->eraseFromParent(); 1970b57cec5SDimitry Andric Changed = true; 1980b57cec5SDimitry Andric } 1990b57cec5SDimitry Andric return Changed; 2000b57cec5SDimitry Andric } 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric class ExpandReductions : public FunctionPass { 2030b57cec5SDimitry Andric public: 2040b57cec5SDimitry Andric static char ID; 2050b57cec5SDimitry Andric ExpandReductions() : FunctionPass(ID) { 2060b57cec5SDimitry Andric initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); 2070b57cec5SDimitry Andric } 2080b57cec5SDimitry Andric 2090b57cec5SDimitry Andric bool runOnFunction(Function &F) override { 2100b57cec5SDimitry Andric const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2110b57cec5SDimitry Andric return expandReductions(F, TTI); 2120b57cec5SDimitry Andric } 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 2150b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 2160b57cec5SDimitry Andric AU.setPreservesCFG(); 2170b57cec5SDimitry Andric } 2180b57cec5SDimitry Andric }; 2190b57cec5SDimitry Andric } 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric char ExpandReductions::ID; 2220b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", 2230b57cec5SDimitry Andric "Expand reduction intrinsics", false, false) 2240b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 2250b57cec5SDimitry Andric INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", 2260b57cec5SDimitry Andric "Expand reduction intrinsics", false, false) 2270b57cec5SDimitry Andric 2280b57cec5SDimitry Andric FunctionPass *llvm::createExpandReductionsPass() { 2290b57cec5SDimitry Andric return new ExpandReductions(); 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric 2320b57cec5SDimitry Andric PreservedAnalyses ExpandReductionsPass::run(Function &F, 2330b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 2340b57cec5SDimitry Andric const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 2350b57cec5SDimitry Andric if (!expandReductions(F, &TTI)) 2360b57cec5SDimitry Andric return PreservedAnalyses::all(); 2370b57cec5SDimitry Andric PreservedAnalyses PA; 2380b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 2390b57cec5SDimitry Andric return PA; 2400b57cec5SDimitry Andric } 241