1 //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Performs general IR level optimizations on SVE intrinsics. 11 // 12 // The main goal of this pass is to remove unnecessary reinterpret 13 // intrinsics (llvm.aarch64.sve.convert.[to|from].svbool), e.g: 14 // 15 // %1 = @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %a) 16 // %2 = @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> %1) 17 // 18 // This pass also looks for ptest intrinsics & phi instructions where the 19 // operands are being needlessly converted to and from svbool_t. 20 // 21 //===----------------------------------------------------------------------===// 22 23 #include "Utils/AArch64BaseInfo.h" 24 #include "llvm/ADT/PostOrderIterator.h" 25 #include "llvm/ADT/SetVector.h" 26 #include "llvm/IR/Constants.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/IntrinsicInst.h" 31 #include "llvm/IR/IntrinsicsAArch64.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/IR/PatternMatch.h" 34 #include "llvm/InitializePasses.h" 35 #include "llvm/Support/Debug.h" 36 37 using namespace llvm; 38 using namespace llvm::PatternMatch; 39 40 #define DEBUG_TYPE "aarch64-sve-intrinsic-opts" 41 42 namespace llvm { 43 void initializeSVEIntrinsicOptsPass(PassRegistry &); 44 } 45 46 namespace { 47 struct SVEIntrinsicOpts : public ModulePass { 48 static char ID; // Pass identification, replacement for typeid 49 SVEIntrinsicOpts() : ModulePass(ID) { 50 initializeSVEIntrinsicOptsPass(*PassRegistry::getPassRegistry()); 51 } 52 53 bool runOnModule(Module &M) override; 54 void getAnalysisUsage(AnalysisUsage &AU) const override; 55 56 private: 57 static IntrinsicInst *isReinterpretToSVBool(Value *V); 58 59 static bool optimizeIntrinsic(Instruction *I); 60 61 bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); 62 63 static bool optimizeConvertFromSVBool(IntrinsicInst *I); 64 static bool optimizePTest(IntrinsicInst *I); 65 66 static bool processPhiNode(IntrinsicInst *I); 67 }; 68 } // end anonymous namespace 69 70 void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { 71 AU.addRequired<DominatorTreeWrapperPass>(); 72 AU.setPreservesCFG(); 73 } 74 75 char SVEIntrinsicOpts::ID = 0; 76 static const char *name = "SVE intrinsics optimizations"; 77 INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 78 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); 79 INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) 80 81 namespace llvm { 82 ModulePass *createSVEIntrinsicOptsPass() { return new SVEIntrinsicOpts(); } 83 } // namespace llvm 84 85 /// Returns V if it's a cast from <n x 16 x i1> (aka svbool_t), nullptr 86 /// otherwise. 87 IntrinsicInst *SVEIntrinsicOpts::isReinterpretToSVBool(Value *V) { 88 IntrinsicInst *I = dyn_cast<IntrinsicInst>(V); 89 if (!I) 90 return nullptr; 91 92 if (I->getIntrinsicID() != Intrinsic::aarch64_sve_convert_to_svbool) 93 return nullptr; 94 95 return I; 96 } 97 98 /// The function will remove redundant reinterprets casting in the presence 99 /// of the control flow 100 bool SVEIntrinsicOpts::processPhiNode(IntrinsicInst *X) { 101 102 SmallVector<Instruction *, 32> Worklist; 103 auto RequiredType = X->getType(); 104 105 auto *PN = dyn_cast<PHINode>(X->getArgOperand(0)); 106 assert(PN && "Expected Phi Node!"); 107 108 // Don't create a new Phi unless we can remove the old one. 109 if (!PN->hasOneUse()) 110 return false; 111 112 for (Value *IncValPhi : PN->incoming_values()) { 113 auto *Reinterpret = isReinterpretToSVBool(IncValPhi); 114 if (!Reinterpret || 115 RequiredType != Reinterpret->getArgOperand(0)->getType()) 116 return false; 117 } 118 119 // Create the new Phi 120 LLVMContext &Ctx = PN->getContext(); 121 IRBuilder<> Builder(Ctx); 122 Builder.SetInsertPoint(PN); 123 PHINode *NPN = Builder.CreatePHI(RequiredType, PN->getNumIncomingValues()); 124 Worklist.push_back(PN); 125 126 for (unsigned I = 0; I < PN->getNumIncomingValues(); I++) { 127 auto *Reinterpret = cast<Instruction>(PN->getIncomingValue(I)); 128 NPN->addIncoming(Reinterpret->getOperand(0), PN->getIncomingBlock(I)); 129 Worklist.push_back(Reinterpret); 130 } 131 132 // Cleanup Phi Node and reinterprets 133 X->replaceAllUsesWith(NPN); 134 X->eraseFromParent(); 135 136 for (auto &I : Worklist) 137 if (I->use_empty()) 138 I->eraseFromParent(); 139 140 return true; 141 } 142 143 bool SVEIntrinsicOpts::optimizePTest(IntrinsicInst *I) { 144 IntrinsicInst *Op1 = dyn_cast<IntrinsicInst>(I->getArgOperand(0)); 145 IntrinsicInst *Op2 = dyn_cast<IntrinsicInst>(I->getArgOperand(1)); 146 147 if (Op1 && Op2 && 148 Op1->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 149 Op2->getIntrinsicID() == Intrinsic::aarch64_sve_convert_to_svbool && 150 Op1->getArgOperand(0)->getType() == Op2->getArgOperand(0)->getType()) { 151 152 Value *Ops[] = {Op1->getArgOperand(0), Op2->getArgOperand(0)}; 153 Type *Tys[] = {Op1->getArgOperand(0)->getType()}; 154 Module *M = I->getParent()->getParent()->getParent(); 155 156 auto Fn = Intrinsic::getDeclaration(M, I->getIntrinsicID(), Tys); 157 auto CI = CallInst::Create(Fn, Ops, I->getName(), I); 158 159 I->replaceAllUsesWith(CI); 160 I->eraseFromParent(); 161 if (Op1->use_empty()) 162 Op1->eraseFromParent(); 163 if (Op1 != Op2 && Op2->use_empty()) 164 Op2->eraseFromParent(); 165 166 return true; 167 } 168 169 return false; 170 } 171 172 bool SVEIntrinsicOpts::optimizeConvertFromSVBool(IntrinsicInst *I) { 173 assert(I->getIntrinsicID() == Intrinsic::aarch64_sve_convert_from_svbool && 174 "Unexpected opcode"); 175 176 // If the reinterpret instruction operand is a PHI Node 177 if (isa<PHINode>(I->getArgOperand(0))) 178 return processPhiNode(I); 179 180 SmallVector<Instruction *, 32> CandidatesForRemoval; 181 Value *Cursor = I->getOperand(0), *EarliestReplacement = nullptr; 182 183 const auto *IVTy = cast<VectorType>(I->getType()); 184 185 // Walk the chain of conversions. 186 while (Cursor) { 187 // If the type of the cursor has fewer lanes than the final result, zeroing 188 // must take place, which breaks the equivalence chain. 189 const auto *CursorVTy = cast<VectorType>(Cursor->getType()); 190 if (CursorVTy->getElementCount().getKnownMinValue() < 191 IVTy->getElementCount().getKnownMinValue()) 192 break; 193 194 // If the cursor has the same type as I, it is a viable replacement. 195 if (Cursor->getType() == IVTy) 196 EarliestReplacement = Cursor; 197 198 auto *IntrinsicCursor = dyn_cast<IntrinsicInst>(Cursor); 199 200 // If this is not an SVE conversion intrinsic, this is the end of the chain. 201 if (!IntrinsicCursor || !(IntrinsicCursor->getIntrinsicID() == 202 Intrinsic::aarch64_sve_convert_to_svbool || 203 IntrinsicCursor->getIntrinsicID() == 204 Intrinsic::aarch64_sve_convert_from_svbool)) 205 break; 206 207 CandidatesForRemoval.insert(CandidatesForRemoval.begin(), IntrinsicCursor); 208 Cursor = IntrinsicCursor->getOperand(0); 209 } 210 211 // If no viable replacement in the conversion chain was found, there is 212 // nothing to do. 213 if (!EarliestReplacement) 214 return false; 215 216 I->replaceAllUsesWith(EarliestReplacement); 217 I->eraseFromParent(); 218 219 while (!CandidatesForRemoval.empty()) { 220 Instruction *Candidate = CandidatesForRemoval.pop_back_val(); 221 if (Candidate->use_empty()) 222 Candidate->eraseFromParent(); 223 } 224 return true; 225 } 226 227 bool SVEIntrinsicOpts::optimizeIntrinsic(Instruction *I) { 228 IntrinsicInst *IntrI = dyn_cast<IntrinsicInst>(I); 229 if (!IntrI) 230 return false; 231 232 switch (IntrI->getIntrinsicID()) { 233 case Intrinsic::aarch64_sve_convert_from_svbool: 234 return optimizeConvertFromSVBool(IntrI); 235 case Intrinsic::aarch64_sve_ptest_any: 236 case Intrinsic::aarch64_sve_ptest_first: 237 case Intrinsic::aarch64_sve_ptest_last: 238 return optimizePTest(IntrI); 239 default: 240 return false; 241 } 242 243 return true; 244 } 245 246 bool SVEIntrinsicOpts::optimizeFunctions( 247 SmallSetVector<Function *, 4> &Functions) { 248 bool Changed = false; 249 for (auto *F : Functions) { 250 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(*F).getDomTree(); 251 252 // Traverse the DT with an rpo walk so we see defs before uses, allowing 253 // simplification to be done incrementally. 254 BasicBlock *Root = DT->getRoot(); 255 ReversePostOrderTraversal<BasicBlock *> RPOT(Root); 256 for (auto *BB : RPOT) 257 for (Instruction &I : make_early_inc_range(*BB)) 258 Changed |= optimizeIntrinsic(&I); 259 } 260 return Changed; 261 } 262 263 bool SVEIntrinsicOpts::runOnModule(Module &M) { 264 bool Changed = false; 265 SmallSetVector<Function *, 4> Functions; 266 267 // Check for SVE intrinsic declarations first so that we only iterate over 268 // relevant functions. Where an appropriate declaration is found, store the 269 // function(s) where it is used so we can target these only. 270 for (auto &F : M.getFunctionList()) { 271 if (!F.isDeclaration()) 272 continue; 273 274 switch (F.getIntrinsicID()) { 275 case Intrinsic::aarch64_sve_convert_from_svbool: 276 case Intrinsic::aarch64_sve_ptest_any: 277 case Intrinsic::aarch64_sve_ptest_first: 278 case Intrinsic::aarch64_sve_ptest_last: 279 for (User *U : F.users()) 280 Functions.insert(cast<Instruction>(U)->getFunction()); 281 break; 282 default: 283 break; 284 } 285 } 286 287 if (!Functions.empty()) 288 Changed |= optimizeFunctions(Functions); 289 290 return Changed; 291 } 292