1 //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Performs general IR level optimizations on SVE intrinsics. 11 // 12 // The main goal of this pass is to remove unnecessary reinterpret 13 // intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g: 14 // 15 // %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a) 16 // %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) 17 // 18 // This pass also looks for ptest intrinsics & phi instructions where the 19 // operands are being needlessly converted to and from svbool_t. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #include "Utils/AArch64BaseInfo.h" 24 #include "llvm/ADT/PostOrderIterator.h" 25 #include "llvm/ADT/SetVector.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/IntrinsicInst.h" 31 #include "llvm/IR/IntrinsicsAArch64.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/InitializePasses.h" 35 #include "llvm/Support/Debug.h" 36 37 using namespace llvm; 38 using namespace llvm::PatternMatch; 39 40 #define DEBUG_TYPE "sve-intrinsic-opts" 41 42 namespace llvm { 43 void initializeSVEIntrinsicOptsPass(PassRegistry &); 44 } 45 46 namespace { 47 struct SVEIntrinsicOpts : public ModulePass { 48 static char ID; // Pass identification, replacement for typeid 49 SVEIntrinsicOpts() : ModulePass(ID) { 50 initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); 51 } 52 53 bool runOnModule(Module &M) override; 54 void getAnalysisUsage(AnalysisUsage &AU) const override; 55 56 private: 57 static IntrinsicInst *isReinterpretToSVBool(Value *V); 58 59 static bool optimizeIntrinsic(Instruction *I); 60 61 bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); 62 63 static bool optimizeConvertFromSVBool(IntrinsicInst *I); 64 static bool optimizePTest(IntrinsicInst *I); 65 66 static bool processPhiNode(IntrinsicInst *I); 67 }; 68 } // end anonymous namespace 69 70 void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { 71 AU.addRequired<DominatorTreeWrapperPass>(); 72 AU.setPreservesCFG(); 73 } 74 75 char SVEIntrinsicOpts::ID = 0; 76 static const char *name = "SVE intrinsics optimizations"; 77 INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 78 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 79 INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 80 81 namespace llvm { 82 ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); } 83 } // namespace llvm 84 85 /// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr 86 /// otherwise. 87 IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) { 88 IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); 89 if (!I) 90 return nullptr; 91 92 if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) 93 return nullptr; 94 95 return I; 96 } 97 98 /// The function will remove redundant reinterprets casting in the presence 99 /// of the control flow 100 bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { 101 102 SmallVector<Instruction *, 32> Worklist; 103 auto RequiredType = X->getType(); 104 105 auto *PN = dyn_cast<PHINode>(X->getArgOperand(0)); 106 assert(PN && "Expected Phi Node!"); 107 108 // Don't create a new Phi unless we can remove the old one. 109 if (!PN->hasOneUse()) 110 return false; 111 112 for (Value *IncValPhi : PN->incoming_values()) { 113 auto *Reinterpret = isReinterpretToSVBool(IncValPhi); 114 if (!Reinterpret || 115 RequiredType != Reinterpret->getArgOperand(0)->getType()) 116 return false; 117 } 118 119 // Create the new Phi 120 LLVMContext &Ctx = PN->getContext(); 121 IRBuilder<> Builder(Ctx); 122 Builder.SetInsertPoint(PN); 123 PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); 124 Worklist.push_back(PN); 125 126 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { 127 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I)); 128 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); 129 Worklist.push_back(Reinterpret); 130 } 131 132 // Cleanup Phi Node and reinterprets 133 X->replaceAllUsesWith(NPN); 134 X->eraseFromParent(); 135 136 for (auto &I : Worklist) 137 if (I->use_empty()) 138 I->eraseFromParent(); 139 140 return true; 141 } 142 143 bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { 144 IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0)); 145 IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1)); 146 147 if (Op1 && Op2 && 148 Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 149 Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 150 Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { 151 152 Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; 153 Type *Tys[] = {Op1->getArgOperand(0)->getType()}; 154 Module *M = I->getParent()->getParent()->getParent(); 155 156 auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys); 157 auto CI = CallInst::Create(Fn, Ops, I->getName(), I); 158 159 I->replaceAllUsesWith(CI); 160 I->eraseFromParent(); 161 if (Op1->use_empty()) 162 Op1->eraseFromParent(); 163 if (Op1 != Op2 && Op2->use_empty()) 164 Op2->eraseFromParent(); 165 166 return true; 167 } 168 169 return false; 170 } 171 172 bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { 173 assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool && 174 "Unexpected opcode"); 175 176 // If the reinterpret instruction operand is a PHI Node 177 if (isa<PHINode>(I->getArgOperand(0))) 178 return processPhiNode(I); 179 180 // If we have a reinterpret intrinsic I of type A which is converting from 181 // another reinterpret Y of type B, and the source type of Y is A, then we can 182 // elide away both reinterprets if there are no other users of Y. 183 auto *Y = isReinterpretToSVBool(I->getArgOperand(0)); 184 if (!Y) 185 return false; 186 187 Value *SourceVal = Y->getArgOperand(0); 188 if (I->getType() != SourceVal->getType()) 189 return false; 190 191 I->replaceAllUsesWith(SourceVal); 192 I->eraseFromParent(); 193 if (Y->use_empty()) 194 Y->eraseFromParent(); 195 196 return true; 197 } 198 199 bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { 200 IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I); 201 if (!IntrI) 202 return false; 203 204 switch (IntrI->getIntrinsicID()) { 205 case Intrinsic::aarch64_sve_convert_from_svbool: 206 return optimizeConvertFromSVBool(IntrI); 207 case Intrinsic::aarch64_sve_ptest_any: 208 case Intrinsic::aarch64_sve_ptest_first: 209 case Intrinsic::aarch64_sve_ptest_last: 210 return optimizePTest(IntrI); 211 default: 212 return false; 213 } 214 215 return true; 216 } 217 218 bool SVEIntrinsicOpts::optimizeFunctions( 219 SmallSetVector<Function *, 4> &Functions) { 220 bool Changed = false; 221 for (auto *F : Functions) { 222 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); 223 224 // Traverse the DT with an rpo walk so we see defs before uses, allowing 225 // simplification to be done incrementally. 226 BasicBlock *Root = DT->getRoot(); 227 ReversePostOrderTraversal<BasicBlock *> RPOT(Root); 228 for (auto *BB : RPOT) 229 for (Instruction &I : make_early_inc_range(*BB)) 230 Changed |= optimizeIntrinsic(&I); 231 } 232 return Changed; 233 } 234 235 bool SVEIntrinsicOpts::runOnModule(Module &M) { 236 bool Changed = false; 237 SmallSetVector<Function *, 4> Functions; 238 239 // Check for SVE intrinsic declarations first so that we only iterate over 240 // relevant functions. Where an appropriate declaration is found, store the 241 // function(s) where it is used so we can target these only. 242 for (auto &F : M.getFunctionList()) { 243 if (!F.isDeclaration()) 244 continue; 245 246 switch (F.getIntrinsicID()) { 247 case Intrinsic::aarch64_sve_convert_from_svbool: 248 case Intrinsic::aarch64_sve_ptest_any: 249 case Intrinsic::aarch64_sve_ptest_first: 250 case Intrinsic::aarch64_sve_ptest_last: 251 for (auto I = F.user_begin(), E = F.user_end(); I != E;) { 252 auto *Inst = dyn_cast<Instruction>(*I++); 253 Functions.insert(Inst->getFunction()); 254 } 255 break; 256 default: 257 break; 258 } 259 } 260 261 if (!Functions.empty()) 262 Changed |= optimizeFunctions(Functions); 263 264 return Changed; 265 } 266