1 //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass extracts the specified basic blocks from the module into their 10 // own functions. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/ADT/STLExtras.h" 15 #include "llvm/ADT/Statistic.h" 16 #include "llvm/IR/Instructions.h" 17 #include "llvm/IR/Module.h" 18 #include "llvm/InitializePasses.h" 19 #include "llvm/Pass.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Support/MemoryBuffer.h" 23 #include "llvm/Transforms/IPO.h" 24 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 25 #include "llvm/Transforms/Utils/CodeExtractor.h" 26 27 using namespace llvm; 28 29 #define DEBUG_TYPE "block-extractor" 30 31 STATISTIC(NumExtracted, "Number of basic blocks extracted"); 32 33 static cl::opt<std::string> BlockExtractorFile( 34 "extract-blocks-file", cl::value_desc("filename"), 35 cl::desc("A file containing list of basic blocks to extract"), cl::Hidden); 36 37 cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs", 38 cl::desc("Erase the existing functions"), 39 cl::Hidden); 40 namespace { 41 class BlockExtractor : public ModulePass { 42 SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks; 43 bool EraseFunctions; 44 /// Map a function name to groups of blocks. 45 SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4> 46 BlocksByName; 47 48 void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>> 49 &GroupsOfBlocksToExtract) { 50 for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks : 51 GroupsOfBlocksToExtract) { 52 SmallVector<BasicBlock *, 16> NewGroup; 53 NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end()); 54 GroupsOfBlocks.emplace_back(NewGroup); 55 } 56 if (!BlockExtractorFile.empty()) 57 loadFile(); 58 } 59 60 public: 61 static char ID; 62 BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract, 63 bool EraseFunctions) 64 : ModulePass(ID), EraseFunctions(EraseFunctions) { 65 // We want one group per element of the input list. 66 SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks; 67 for (BasicBlock *BB : BlocksToExtract) { 68 SmallVector<BasicBlock *, 16> NewGroup; 69 NewGroup.push_back(BB); 70 MassagedGroupsOfBlocks.push_back(NewGroup); 71 } 72 init(MassagedGroupsOfBlocks); 73 } 74 75 BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>> 76 &GroupsOfBlocksToExtract, 77 bool EraseFunctions) 78 : ModulePass(ID), EraseFunctions(EraseFunctions) { 79 init(GroupsOfBlocksToExtract); 80 } 81 82 BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {} 83 bool runOnModule(Module &M) override; 84 85 private: 86 void loadFile(); 87 void splitLandingPadPreds(Function &F); 88 }; 89 } // end anonymous namespace 90 91 char BlockExtractor::ID = 0; 92 INITIALIZE_PASS(BlockExtractor, "extract-blocks", 93 "Extract basic blocks from module", false, false) 94 95 ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractor(); } 96 ModulePass *llvm::createBlockExtractorPass( 97 const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) { 98 return new BlockExtractor(BlocksToExtract, EraseFunctions); 99 } 100 ModulePass *llvm::createBlockExtractorPass( 101 const SmallVectorImpl<SmallVector<BasicBlock *, 16>> 102 &GroupsOfBlocksToExtract, 103 bool EraseFunctions) { 104 return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions); 105 } 106 107 /// Gets all of the blocks specified in the input file. 108 void BlockExtractor::loadFile() { 109 auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile); 110 if (ErrOrBuf.getError()) 111 report_fatal_error("BlockExtractor couldn't load the file."); 112 // Read the file. 113 auto &Buf = *ErrOrBuf; 114 SmallVector<StringRef, 16> Lines; 115 Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1, 116 /*KeepEmpty=*/false); 117 for (const auto &Line : Lines) { 118 SmallVector<StringRef, 4> LineSplit; 119 Line.split(LineSplit, ' ', /*MaxSplit=*/-1, 120 /*KeepEmpty=*/false); 121 if (LineSplit.empty()) 122 continue; 123 if (LineSplit.size()!=2) 124 report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'"); 125 SmallVector<StringRef, 4> BBNames; 126 LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1, 127 /*KeepEmpty=*/false); 128 if (BBNames.empty()) 129 report_fatal_error("Missing bbs name"); 130 BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}}); 131 } 132 } 133 134 /// Extracts the landing pads to make sure all of them have only one 135 /// predecessor. 136 void BlockExtractor::splitLandingPadPreds(Function &F) { 137 for (BasicBlock &BB : F) { 138 for (Instruction &I : BB) { 139 if (!isa<InvokeInst>(&I)) 140 continue; 141 InvokeInst *II = cast<InvokeInst>(&I); 142 BasicBlock *Parent = II->getParent(); 143 BasicBlock *LPad = II->getUnwindDest(); 144 145 // Look through the landing pad's predecessors. If one of them ends in an 146 // 'invoke', then we want to split the landing pad. 147 bool Split = false; 148 for (auto PredBB : predecessors(LPad)) { 149 if (PredBB->isLandingPad() && PredBB != Parent && 150 isa<InvokeInst>(Parent->getTerminator())) { 151 Split = true; 152 break; 153 } 154 } 155 156 if (!Split) 157 continue; 158 159 SmallVector<BasicBlock *, 2> NewBBs; 160 SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs); 161 } 162 } 163 } 164 165 bool BlockExtractor::runOnModule(Module &M) { 166 167 bool Changed = false; 168 169 // Get all the functions. 170 SmallVector<Function *, 4> Functions; 171 for (Function &F : M) { 172 splitLandingPadPreds(F); 173 Functions.push_back(&F); 174 } 175 176 // Get all the blocks specified in the input file. 177 unsigned NextGroupIdx = GroupsOfBlocks.size(); 178 GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size()); 179 for (const auto &BInfo : BlocksByName) { 180 Function *F = M.getFunction(BInfo.first); 181 if (!F) 182 report_fatal_error("Invalid function name specified in the input file"); 183 for (const auto &BBInfo : BInfo.second) { 184 auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) { 185 return BB.getName().equals(BBInfo); 186 }); 187 if (Res == F->end()) 188 report_fatal_error("Invalid block name specified in the input file"); 189 GroupsOfBlocks[NextGroupIdx].push_back(&*Res); 190 } 191 ++NextGroupIdx; 192 } 193 194 // Extract each group of basic blocks. 195 for (auto &BBs : GroupsOfBlocks) { 196 SmallVector<BasicBlock *, 32> BlocksToExtractVec; 197 for (BasicBlock *BB : BBs) { 198 // Check if the module contains BB. 199 if (BB->getParent()->getParent() != &M) 200 report_fatal_error("Invalid basic block"); 201 LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting " 202 << BB->getParent()->getName() << ":" << BB->getName() 203 << "\n"); 204 BlocksToExtractVec.push_back(BB); 205 if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator())) 206 BlocksToExtractVec.push_back(II->getUnwindDest()); 207 ++NumExtracted; 208 Changed = true; 209 } 210 CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent()); 211 Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC); 212 if (F) 213 LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName() 214 << "' in: " << F->getName() << '\n'); 215 else 216 LLVM_DEBUG(dbgs() << "Failed to extract for group '" 217 << (*BBs.begin())->getName() << "'\n"); 218 } 219 220 // Erase the functions. 221 if (EraseFunctions || BlockExtractorEraseFuncs) { 222 for (Function *F : Functions) { 223 LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName() 224 << "\n"); 225 F->deleteBody(); 226 } 227 // Set linkage as ExternalLinkage to avoid erasing unreachable functions. 228 for (Function &F : M) 229 F.setLinkage(GlobalValue::ExternalLinkage); 230 Changed = true; 231 } 232 233 return Changed; 234 } 235