1 //===----- RISCVCodeGenPrepare.cpp ----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This is a RISC-V specific version of CodeGenPrepare. 10 // It munges the code in the input function to better prepare it for 11 // SelectionDAG-based code generation. This works around limitations in it's 12 // basic-block-at-a-time approach. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "RISCV.h" 17 #include "RISCVTargetMachine.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/ValueTracking.h" 20 #include "llvm/CodeGen/TargetPassConfig.h" 21 #include "llvm/IR/IRBuilder.h" 22 #include "llvm/IR/InstVisitor.h" 23 #include "llvm/IR/Intrinsics.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include "llvm/InitializePasses.h" 26 #include "llvm/Pass.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "riscv-codegenprepare" 31 #define PASS_NAME "RISC-V CodeGenPrepare" 32 33 namespace { 34 35 class RISCVCodeGenPrepare : public FunctionPass, 36 public InstVisitor<RISCVCodeGenPrepare, bool> { 37 const DataLayout *DL; 38 const RISCVSubtarget *ST; 39 40 public: 41 static char ID; 42 43 RISCVCodeGenPrepare() : FunctionPass(ID) {} 44 45 bool runOnFunction(Function &F) override; 46 47 StringRef getPassName() const override { return PASS_NAME; } 48 49 void getAnalysisUsage(AnalysisUsage &AU) const override { 50 AU.setPreservesCFG(); 51 AU.addRequired<TargetPassConfig>(); 52 } 53 54 bool visitInstruction(Instruction &I) { return false; } 55 bool visitAnd(BinaryOperator &BO); 56 bool visitIntrinsicInst(IntrinsicInst &I); 57 }; 58 59 } // end anonymous namespace 60 61 // Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set, 62 // but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill 63 // the upper 32 bits with ones. 64 bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) { 65 if (!ST->is64Bit()) 66 return false; 67 68 if (!BO.getType()->isIntegerTy(64)) 69 return false; 70 71 using namespace PatternMatch; 72 73 // Left hand side should be a zext nneg. 74 Value *LHSSrc; 75 if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc)))) 76 return false; 77 78 if (!LHSSrc->getType()->isIntegerTy(32)) 79 return false; 80 81 // Right hand side should be a constant. 82 Value *RHS = BO.getOperand(1); 83 84 auto *CI = dyn_cast<ConstantInt>(RHS); 85 if (!CI) 86 return false; 87 uint64_t C = CI->getZExtValue(); 88 89 // Look for constants that fit in 32 bits but not simm12, and can be made 90 // into simm12 by sign extending bit 31. This will allow use of ANDI. 91 // TODO: Is worth making simm32? 92 if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C))) 93 return false; 94 95 // Sign extend the constant and replace the And operand. 96 C = SignExtend64<32>(C); 97 BO.setOperand(1, ConstantInt::get(RHS->getType(), C)); 98 99 return true; 100 } 101 102 // LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector 103 // reduction instructions write the result in the first element of a vector 104 // register. So when a reduction in a loop uses a scalar phi, we end up with 105 // unnecessary scalar moves: 106 // 107 // loop: 108 // vfmv.s.f v10, fa0 109 // vfredosum.vs v8, v8, v10 110 // vfmv.f.s fa0, v8 111 // 112 // This mainly affects ordered fadd reductions, since other types of reduction 113 // typically use element-wise vectorisation in the loop body. This tries to 114 // vectorize any scalar phis that feed into a fadd reduction: 115 // 116 // loop: 117 // %phi = phi <float> [ ..., %entry ], [ %acc, %loop ] 118 // %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %phi, <vscale x 2 x float> %vec) 119 // 120 // -> 121 // 122 // loop: 123 // %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ] 124 // %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0 125 // %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %x, <vscale x 2 x float> %vec) 126 // %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0 127 // 128 // Which eliminates the scalar -> vector -> scalar crossing during instruction 129 // selection. 130 bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) { 131 if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd) 132 return false; 133 134 auto *PHI = dyn_cast<PHINode>(I.getOperand(0)); 135 if (!PHI || !PHI->hasOneUse() || 136 !llvm::is_contained(PHI->incoming_values(), &I)) 137 return false; 138 139 Type *VecTy = I.getOperand(1)->getType(); 140 IRBuilder<> Builder(PHI); 141 auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues()); 142 143 for (auto *BB : PHI->blocks()) { 144 Builder.SetInsertPoint(BB->getTerminator()); 145 Value *InsertElt = Builder.CreateInsertElement( 146 VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0); 147 VecPHI->addIncoming(InsertElt, BB); 148 } 149 150 Builder.SetInsertPoint(&I); 151 I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0)); 152 153 PHI->eraseFromParent(); 154 155 return true; 156 } 157 158 bool RISCVCodeGenPrepare::runOnFunction(Function &F) { 159 if (skipFunction(F)) 160 return false; 161 162 auto &TPC = getAnalysis<TargetPassConfig>(); 163 auto &TM = TPC.getTM<RISCVTargetMachine>(); 164 ST = &TM.getSubtarget<RISCVSubtarget>(F); 165 166 DL = &F.getParent()->getDataLayout(); 167 168 bool MadeChange = false; 169 for (auto &BB : F) 170 for (Instruction &I : llvm::make_early_inc_range(BB)) 171 MadeChange |= visit(I); 172 173 return MadeChange; 174 } 175 176 INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 177 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 178 INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false) 179 180 char RISCVCodeGenPrepare::ID = 0; 181 182 FunctionPass *llvm::createRISCVCodeGenPreparePass() { 183 return new RISCVCodeGenPrepare(); 184 } 185