1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements feature and label extraction for offline supervised learning 10 // of a IR to native size model. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 14 15 #ifdef LLVM_HAVE_TF_API 16 #include "llvm/Analysis/Utils/TFUtils.h" 17 #endif 18 #include "llvm/Analysis/LoopInfo.h" 19 #include "llvm/Analysis/TargetLibraryInfo.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/IR/BasicBlock.h" 22 #include "llvm/IR/Dominators.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/Instructions.h" 25 #include "llvm/IR/PassManager.h" 26 #include "llvm/MC/MCAsmLayout.h" 27 #include "llvm/Support/Casting.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/raw_ostream.h" 30 31 #include <algorithm> 32 #include <deque> 33 34 using namespace llvm; 35 36 AnalysisKey InlineSizeEstimatorAnalysis::Key; 37 38 #define DEBUG_TYPE "inline-size-estimator" 39 40 #ifdef LLVM_HAVE_TF_API 41 cl::opt<std::string> TFIR2NativeModelPath( 42 "ml-inliner-ir2native-model", cl::Hidden, 43 cl::desc("Path to saved model evaluating native size from IR.")); 44 45 namespace { 46 unsigned getMaxInstructionID() { 47 #define LAST_OTHER_INST(NR) return NR; 48 #include "llvm/IR/Instruction.def" 49 } 50 51 class IRToNativeSizeLearning { 52 public: 53 enum class NamedFeatureIndex : size_t { 54 InitialSize, 55 Blocks, 56 Calls, 57 IsLocal, 58 IsLinkOnceODR, 59 IsLinkOnce, 60 Loops, 61 MaxLoopDepth, 62 MaxDomTreeLevel, 63 64 NumNamedFeatures 65 }; 66 static const size_t NumNamedFeatures = 67 static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); 68 struct FunctionFeatures { 69 static const size_t FeatureCount; 70 71 std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; 72 std::vector<int32_t> InstructionHistogram; 73 std::vector<int32_t> InstructionPairHistogram; 74 75 void fillTensor(int32_t *Ptr) const; 76 int32_t &operator[](NamedFeatureIndex Pos) { 77 return NamedFeatures[static_cast<size_t>(Pos)]; 78 } 79 }; 80 IRToNativeSizeLearning() = default; 81 82 static FunctionFeatures getFunctionFeatures(Function &F, 83 FunctionAnalysisManager &FAM); 84 }; 85 86 // This is a point in time - we determined including these pairs of 87 // consecutive instructions (in the IR layout available at inline time) as 88 // features improves the model performance. We want to move away from manual 89 // feature selection. 90 // The array is given in opcode pairs rather than labels because 1) labels 91 // weren't readily available, and 2) the successions were hand - extracted. 92 // 93 // This array must be sorted. 94 static const std::array<std::pair<size_t, size_t>, 137> 95 ImportantInstructionSuccessions{ 96 {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, 97 {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, 98 {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, 99 {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, 100 {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, 101 {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, 102 {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, 103 {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, 104 {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, 105 {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, 106 {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, 107 {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, 108 {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, 109 {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, 110 {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, 111 {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, 112 {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, 113 {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, 114 {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, 115 {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; 116 117 // We have: 9 calculated features (the features here); 1 feature for each 118 // instruction opcode; and 1 feature for each manually-identified sequence. 119 // For the latter 2, we build a histogram: we count the number of 120 // occurrences of each instruction opcode or succession of instructions, 121 // respectively. 122 // Note that instruction opcodes start from 1. For convenience, we also have an 123 // always 0 feature for the '0' opcode, hence the extra 1. 124 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = 125 ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + 126 IRToNativeSizeLearning::NumNamedFeatures; 127 128 size_t getSize(Function &F, TargetTransformInfo &TTI) { 129 size_t Ret = 0; 130 for (const auto &BB : F) 131 for (const auto &I : BB) 132 Ret += *(TTI.getInstructionCost( 133 &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue()); 134 return Ret; 135 } 136 137 size_t getSize(Function &F, FunctionAnalysisManager &FAM) { 138 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 139 return getSize(F, TTI); 140 } 141 142 unsigned getMaxDominatorTreeDepth(const Function &F, 143 const DominatorTree &Tree) { 144 unsigned Ret = 0; 145 for (const auto &BB : F) 146 if (const auto *TN = Tree.getNode(&BB)) 147 Ret = std::max(Ret, TN->getLevel()); 148 return Ret; 149 } 150 } // namespace 151 152 IRToNativeSizeLearning::FunctionFeatures 153 IRToNativeSizeLearning::getFunctionFeatures(Function &F, 154 FunctionAnalysisManager &FAM) { 155 assert(llvm::is_sorted(ImportantInstructionSuccessions) && 156 "expected function features are sorted"); 157 158 auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); 159 FunctionFeatures FF; 160 size_t InstrCount = getMaxInstructionID() + 1; 161 FF.InstructionHistogram.resize(InstrCount); 162 163 FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); 164 165 int StartID = 0; 166 int LastID = StartID; 167 auto getPairIndex = [](size_t a, size_t b) { 168 auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); 169 if (I == ImportantInstructionSuccessions.end()) 170 return -1; 171 return static_cast<int>( 172 std::distance(ImportantInstructionSuccessions.begin(), I)); 173 }; 174 175 // We don't want debug calls, because they'd just add noise. 176 for (const auto &BB : F) { 177 for (const auto &I : BB.instructionsWithoutDebug()) { 178 auto ID = I.getOpcode(); 179 180 ++FF.InstructionHistogram[ID]; 181 int PairIndex = getPairIndex(LastID, ID); 182 if (PairIndex >= 0) 183 ++FF.InstructionPairHistogram[PairIndex]; 184 LastID = ID; 185 if (isa<CallBase>(I)) 186 ++FF[NamedFeatureIndex::Calls]; 187 } 188 } 189 190 FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); 191 FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); 192 FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); 193 FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); 194 FF[NamedFeatureIndex::Blocks] = 195 std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); 196 auto &LI = FAM.getResult<LoopAnalysis>(F); 197 FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); 198 for (auto &L : LI) 199 FF[NamedFeatureIndex::MaxLoopDepth] = 200 std::max(FF[NamedFeatureIndex::MaxLoopDepth], 201 static_cast<int32_t>(L->getLoopDepth())); 202 FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); 203 return FF; 204 } 205 206 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { 207 std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); 208 Ptr += NamedFeatures.size(); 209 std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); 210 Ptr += InstructionHistogram.size(); 211 std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), 212 Ptr); 213 } 214 215 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { 216 return !TFIR2NativeModelPath.empty(); 217 } 218 219 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { 220 if (!isEvaluatorRequested()) { 221 return; 222 } 223 std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( 224 "serving_default_input_1", 225 {1, static_cast<int64_t>( 226 IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; 227 std::vector<TensorSpec> OutputSpecs{ 228 TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})}; 229 Evaluator = std::make_unique<TFModelEvaluator>( 230 TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); 231 if (!Evaluator || !Evaluator->isValid()) { 232 Evaluator.reset(); 233 return; 234 } 235 } 236 237 InlineSizeEstimatorAnalysis::Result 238 InlineSizeEstimatorAnalysis::run(const Function &F, 239 FunctionAnalysisManager &FAM) { 240 if (!Evaluator) 241 return None; 242 auto Features = IRToNativeSizeLearning::getFunctionFeatures( 243 const_cast<Function &>(F), FAM); 244 int32_t *V = Evaluator->getInput<int32_t>(0); 245 Features.fillTensor(V); 246 auto ER = Evaluator->evaluate(); 247 if (!ER) 248 return None; 249 float Ret = *ER->getTensorValue<float>(0); 250 if (Ret < 0.0) 251 Ret = 0.0; 252 return static_cast<size_t>(Ret); 253 } 254 255 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} 256 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( 257 InlineSizeEstimatorAnalysis &&Other) 258 : Evaluator(std::move(Other.Evaluator)) {} 259 260 #else 261 namespace llvm { 262 class TFModelEvaluator {}; 263 } // namespace llvm 264 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} 265 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( 266 InlineSizeEstimatorAnalysis &&) {} 267 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} 268 InlineSizeEstimatorAnalysis::Result 269 InlineSizeEstimatorAnalysis::run(const Function &F, 270 FunctionAnalysisManager &FAM) { 271 return None; 272 } 273 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } 274 #endif 275 276 PreservedAnalyses 277 InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, 278 FunctionAnalysisManager &AM) { 279 OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() 280 << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n"; 281 return PreservedAnalyses::all(); 282 } 283