10b57cec5SDimitry Andric //===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This pass implements IR expansion for reduction intrinsics, allowing targets 10*e8d8bef9SDimitry Andric // to enable the intrinsics until just before codegen. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/CodeGen/ExpandReductions.h" 150b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 160b57cec5SDimitry Andric #include "llvm/CodeGen/Passes.h" 170b57cec5SDimitry Andric #include "llvm/IR/Function.h" 180b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 190b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 200b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 210b57cec5SDimitry Andric #include "llvm/IR/Intrinsics.h" 220b57cec5SDimitry Andric #include "llvm/IR/Module.h" 23480093f4SDimitry Andric #include "llvm/InitializePasses.h" 240b57cec5SDimitry Andric #include "llvm/Pass.h" 250b57cec5SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 260b57cec5SDimitry Andric 270b57cec5SDimitry Andric using namespace llvm; 280b57cec5SDimitry Andric 290b57cec5SDimitry Andric namespace { 300b57cec5SDimitry Andric 310b57cec5SDimitry Andric unsigned getOpcode(Intrinsic::ID ID) { 320b57cec5SDimitry Andric switch (ID) { 33*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 340b57cec5SDimitry Andric return Instruction::FAdd; 35*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: 360b57cec5SDimitry Andric return Instruction::FMul; 37*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 380b57cec5SDimitry Andric return Instruction::Add; 39*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 400b57cec5SDimitry Andric return Instruction::Mul; 41*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_and: 420b57cec5SDimitry Andric return Instruction::And; 43*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_or: 440b57cec5SDimitry Andric return Instruction::Or; 45*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 460b57cec5SDimitry Andric return Instruction::Xor; 47*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 48*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 49*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 50*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 510b57cec5SDimitry Andric return Instruction::ICmp; 52*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 53*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 540b57cec5SDimitry Andric return Instruction::FCmp; 550b57cec5SDimitry Andric default: 560b57cec5SDimitry Andric llvm_unreachable("Unexpected ID"); 570b57cec5SDimitry Andric } 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 60*e8d8bef9SDimitry Andric RecurKind getRK(Intrinsic::ID ID) { 610b57cec5SDimitry Andric switch (ID) { 62*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 63*e8d8bef9SDimitry Andric return RecurKind::SMax; 64*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 65*e8d8bef9SDimitry Andric return RecurKind::SMin; 66*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 67*e8d8bef9SDimitry Andric return RecurKind::UMax; 68*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 69*e8d8bef9SDimitry Andric return RecurKind::UMin; 70*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 71*e8d8bef9SDimitry Andric return RecurKind::FMax; 72*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 73*e8d8bef9SDimitry Andric return RecurKind::FMin; 740b57cec5SDimitry Andric default: 75*e8d8bef9SDimitry Andric return RecurKind::None; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric } 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric bool expandReductions(Function &F, const TargetTransformInfo *TTI) { 800b57cec5SDimitry Andric bool Changed = false; 810b57cec5SDimitry Andric SmallVector<IntrinsicInst *, 4> Worklist; 82480093f4SDimitry Andric for (auto &I : instructions(F)) { 83480093f4SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 84480093f4SDimitry Andric switch (II->getIntrinsicID()) { 85480093f4SDimitry Andric default: break; 86*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 87*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: 88*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 89*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 90*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_and: 91*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_or: 92*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 93*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 94*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 95*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 96*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: 97*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 98*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: 99480093f4SDimitry Andric if (TTI->shouldExpandReduction(II)) 1000b57cec5SDimitry Andric Worklist.push_back(II); 1010b57cec5SDimitry Andric 102480093f4SDimitry Andric break; 103480093f4SDimitry Andric } 104480093f4SDimitry Andric } 105480093f4SDimitry Andric } 1060b57cec5SDimitry Andric 107480093f4SDimitry Andric for (auto *II : Worklist) { 1080b57cec5SDimitry Andric FastMathFlags FMF = 1090b57cec5SDimitry Andric isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{}; 1100b57cec5SDimitry Andric Intrinsic::ID ID = II->getIntrinsicID(); 111*e8d8bef9SDimitry Andric RecurKind RK = getRK(ID); 1120b57cec5SDimitry Andric 1130b57cec5SDimitry Andric Value *Rdx = nullptr; 1140b57cec5SDimitry Andric IRBuilder<> Builder(II); 1150b57cec5SDimitry Andric IRBuilder<>::FastMathFlagGuard FMFGuard(Builder); 1160b57cec5SDimitry Andric Builder.setFastMathFlags(FMF); 1170b57cec5SDimitry Andric switch (ID) { 118480093f4SDimitry Andric default: llvm_unreachable("Unexpected intrinsic!"); 119*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fadd: 120*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmul: { 1210b57cec5SDimitry Andric // FMFs must be attached to the call, otherwise it's an ordered reduction 1220b57cec5SDimitry Andric // and it can't be handled by generating a shuffle sequence. 1230b57cec5SDimitry Andric Value *Acc = II->getArgOperand(0); 1240b57cec5SDimitry Andric Value *Vec = II->getArgOperand(1); 1250b57cec5SDimitry Andric if (!FMF.allowReassoc()) 126*e8d8bef9SDimitry Andric Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), RK); 1270b57cec5SDimitry Andric else { 1285ffd83dbSDimitry Andric if (!isPowerOf2_32( 1295ffd83dbSDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements())) 130480093f4SDimitry Andric continue; 131480093f4SDimitry Andric 132*e8d8bef9SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 1330b57cec5SDimitry Andric Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID), 1340b57cec5SDimitry Andric Acc, Rdx, "bin.rdx"); 1350b57cec5SDimitry Andric } 136480093f4SDimitry Andric break; 137480093f4SDimitry Andric } 138*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_add: 139*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_mul: 140*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_and: 141*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_or: 142*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_xor: 143*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smax: 144*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_smin: 145*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umax: 146*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_umin: { 1470b57cec5SDimitry Andric Value *Vec = II->getArgOperand(0); 1485ffd83dbSDimitry Andric if (!isPowerOf2_32( 1495ffd83dbSDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements())) 1500b57cec5SDimitry Andric continue; 151480093f4SDimitry Andric 152*e8d8bef9SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 153*e8d8bef9SDimitry Andric break; 154*e8d8bef9SDimitry Andric } 155*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmax: 156*e8d8bef9SDimitry Andric case Intrinsic::vector_reduce_fmin: { 157*e8d8bef9SDimitry Andric // FIXME: We only expand 'fast' reductions here because the underlying 158*e8d8bef9SDimitry Andric // code in createMinMaxOp() assumes that comparisons use 'fast' 159*e8d8bef9SDimitry Andric // semantics. 160*e8d8bef9SDimitry Andric Value *Vec = II->getArgOperand(0); 161*e8d8bef9SDimitry Andric if (!isPowerOf2_32( 162*e8d8bef9SDimitry Andric cast<FixedVectorType>(Vec->getType())->getNumElements()) || 163*e8d8bef9SDimitry Andric !FMF.isFast()) 164*e8d8bef9SDimitry Andric continue; 165*e8d8bef9SDimitry Andric 166*e8d8bef9SDimitry Andric Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), RK); 167480093f4SDimitry Andric break; 168480093f4SDimitry Andric } 1690b57cec5SDimitry Andric } 1700b57cec5SDimitry Andric II->replaceAllUsesWith(Rdx); 1710b57cec5SDimitry Andric II->eraseFromParent(); 1720b57cec5SDimitry Andric Changed = true; 1730b57cec5SDimitry Andric } 1740b57cec5SDimitry Andric return Changed; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric class ExpandReductions : public FunctionPass { 1780b57cec5SDimitry Andric public: 1790b57cec5SDimitry Andric static char ID; 1800b57cec5SDimitry Andric ExpandReductions() : FunctionPass(ID) { 1810b57cec5SDimitry Andric initializeExpandReductionsPass(*PassRegistry::getPassRegistry()); 1820b57cec5SDimitry Andric } 1830b57cec5SDimitry Andric 1840b57cec5SDimitry Andric bool runOnFunction(Function &F) override { 1850b57cec5SDimitry Andric const auto *TTI =&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 1860b57cec5SDimitry Andric return expandReductions(F, TTI); 1870b57cec5SDimitry Andric } 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1900b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 1910b57cec5SDimitry Andric AU.setPreservesCFG(); 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric }; 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric char ExpandReductions::ID; 1970b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandReductions, "expand-reductions", 1980b57cec5SDimitry Andric "Expand reduction intrinsics", false, false) 1990b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 2000b57cec5SDimitry Andric INITIALIZE_PASS_END(ExpandReductions, "expand-reductions", 2010b57cec5SDimitry Andric "Expand reduction intrinsics", false, false) 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric FunctionPass *llvm::createExpandReductionsPass() { 2040b57cec5SDimitry Andric return new ExpandReductions(); 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric PreservedAnalyses ExpandReductionsPass::run(Function &F, 2080b57cec5SDimitry Andric FunctionAnalysisManager &AM) { 2090b57cec5SDimitry Andric const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 2100b57cec5SDimitry Andric if (!expandReductions(F, &TTI)) 2110b57cec5SDimitry Andric return PreservedAnalyses::all(); 2120b57cec5SDimitry Andric PreservedAnalyses PA; 2130b57cec5SDimitry Andric PA.preserveSet<CFGAnalyses>(); 2140b57cec5SDimitry Andric return PA; 2150b57cec5SDimitry Andric } 216