1 //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This is a RISC-V specific version of CodeGenPrepare. 10 // It munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "RISCV.h" 17 #include "RISCVTargetMachine.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/CodeGen/TargetPassConfig.h" 21 #include "llvm/IR/Dominators.h" 22 #include "llvm/IR/IRBuilder.h" 23 #include "llvm/IR/InstVisitor.h" 24 #include "llvm/IR/Intrinsics.h" 25 #include "llvm/IR/IntrinsicsRISCV.h" 26 #include "llvm/IR/PatternMatch.h" 27 #include "llvm/InitializePasses.h" 28 #include "llvm/Pass.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "riscv-codegenprepare" 33 #define PASS_NAME "RISC-V CodeGenPrepare" 34 35 namespace { 36 37 class RISCVCodeGenPrepare : public FunctionPass, 38 public InstVisitor<RISCVCodeGenPrepare, bool> { 39 const DataLayout *DL; 40 const DominatorTree *DT; 41 const RISCVSubtarget *ST; 42 43 public: 44 static char ID; 45 46 RISCVCodeGenPrepare() : FunctionPass(ID) {} 47 48 bool runOnFunction(Function &F) override; 49 50 StringRef getPassName() const override { return PASS_NAME; } 51 52 void getAnalysisUsage(AnalysisUsage &AU) const override { 53 AU.setPreservesCFG(); 54 AU.addRequired<DominatorTreeWrapperPass>(); 55 AU.addRequired<TargetPassConfig>(); 56 } 57 58 bool visitInstruction(Instruction &I) { return false; } 59 bool visitAnd(BinaryOperator &BO); 60 bool visitIntrinsicInst(IntrinsicInst &I); 61 bool expandVPStrideLoad(IntrinsicInst &I); 62 }; 63 64 } // end anonymous namespace 65 66 // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, 67 // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill 68 // the upper 32 bits with ones. 69 bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { 70 if (!ST->is64Bit()) 71 return false; 72 73 if (!BO.getType()->isIntegerTy(64)) 74 return false; 75 76 using namespace PatternMatch; 77 78 // Left hand side should be a zext nneg. 79 Value *LHSSrc; 80 if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc)))) 81 return false; 82 83 if (!LHSSrc->getType()->isIntegerTy(32)) 84 return false; 85 86 // Right hand side should be a constant. 87 Value *RHS = BO.getOperand(1); 88 89 auto *CI = dyn_cast<ConstantInt>(RHS); 90 if (!CI) 91 return false; 92 uint64_t C = CI->getZExtValue(); 93 94 // Look for constants that fit in 32 bits but not simm12, and can be made 95 // into simm12 by sign extending bit 31. This will allow use of ANDI. 96 // TODO: Is worth making simm32? 97 if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) 98 return false; 99 100 // Sign extend the constant and replace the And operand. 101 C = SignExtend64<32>(C); 102 BO.setOperand(1, ConstantInt::get(RHS->getType(), C)); 103 104 return true; 105 } 106 107 // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector 108 // reduction instructions write the result in the first element of a vector 109 // register. So when a reduction in a loop uses a scalar phi, we end up with 110 // unnecessary scalar moves: 111 // 112 // loop: 113 // vfmv.s.f v10, fa0 114 // vfredosum.vs v8, v8, v10 115 // vfmv.f.s fa0, v8 116 // 117 // This mainly affects ordered fadd reductions, since other types of reduction 118 // typically use element-wise vectorisation in the loop body. This tries to 119 // vectorize any scalar phis that feed into a fadd reduction: 120 // 121 // loop: 122 // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ] 123 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %phi, 124 // <vscale x 2 x float> %vec) 125 // 126 // -> 127 // 128 // loop: 129 // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ] 130 // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0 131 // %acc = call float @llvm.vector.reduce.fadd.nxv2f32(float %x, 132 // <vscale x 2 x float> %vec) 133 // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0 134 // 135 // Which eliminates the scalar -> vector -> scalar crossing during instruction 136 // selection. 137 bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { 138 if (expandVPStrideLoad(I)) 139 return true; 140 141 if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd) 142 return false; 143 144 auto *PHI = dyn_cast<PHINode>(I.getOperand(0)); 145 if (!PHI || !PHI->hasOneUse() || 146 !llvm::is_contained(PHI->incoming_values(), &I)) 147 return false; 148 149 Type *VecTy = I.getOperand(1)->getType(); 150 IRBuilder<> Builder(PHI); 151 auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues()); 152 153 for (auto *BB : PHI->blocks()) { 154 Builder.SetInsertPoint(BB->getTerminator()); 155 Value *InsertElt = Builder.CreateInsertElement( 156 VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0); 157 VecPHI->addIncoming(InsertElt, BB); 158 } 159 160 Builder.SetInsertPoint(&I); 161 I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0)); 162 163 PHI->eraseFromParent(); 164 165 return true; 166 } 167 168 // Always expand zero strided loads so we match more .vx splat patterns, even if 169 // we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert 170 // it back to a strided load if it's optimized. 171 bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) { 172 Value *BasePtr, *VL; 173 174 using namespace PatternMatch; 175 if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>( 176 m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL)))) 177 return false; 178 179 // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so 180 // avoid expanding here. 181 if (II.getType()->getScalarSizeInBits() > ST->getXLen()) 182 return false; 183 184 if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II})) 185 return false; 186 187 auto *VTy = cast<VectorType>(II.getType()); 188 189 IRBuilder<> Builder(&II); 190 Type *STy = VTy->getElementType(); 191 Value *Val = Builder.CreateLoad(STy, BasePtr); 192 Value *Res = Builder.CreateIntrinsic(Intrinsic::experimental_vp_splat, {VTy}, 193 {Val, II.getOperand(2), VL}); 194 195 II.replaceAllUsesWith(Res); 196 II.eraseFromParent(); 197 return true; 198 } 199 200 bool RISCVCodeGenPrepare::runOnFunction(Function &F) { 201 if (skipFunction(F)) 202 return false; 203 204 auto &TPC = getAnalysis<TargetPassConfig>(); 205 auto &TM = TPC.getTM<RISCVTargetMachine>(); 206 ST = &TM.getSubtarget<RISCVSubtarget>(F); 207 208 DL = &F.getDataLayout(); 209 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 210 211 bool MadeChange = false; 212 for (auto &BB : F) 213 for (Instruction &I : llvm::make_early_inc_range(BB)) 214 MadeChange |= visit(I); 215 216 return MadeChange; 217 } 218 219 INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 220 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 221 INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 222 223 char RISCVCodeGenPrepare::ID = 0; 224 225 FunctionPass *llvm::createRISCVCodeGenPreparePass() { 226 return new RISCVCodeGenPrepare(); 227 } 228