1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This implements feature and label extraction for offline supervised learning 10 // of a IR to native size model. 11 // 12 //===----------------------------------------------------------------------===// 13 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 14 15 #ifdef LLVM_HAVE_TFLITE 16 #include "llvm/Analysis/Utils/TFUtils.h" 17 #endif 18 #include "llvm/IR/Function.h" 19 #include "llvm/IR/PassManager.h" 20 #include "llvm/Support/raw_ostream.h" 21 22 using namespace llvm; 23 24 AnalysisKey InlineSizeEstimatorAnalysis::Key; 25 26 #ifdef LLVM_HAVE_TFLITE 27 #include "llvm/Analysis/LoopInfo.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/TargetTransformInfo.h" 30 #include "llvm/IR/BasicBlock.h" 31 #include "llvm/IR/Dominators.h" 32 #include "llvm/IR/Instructions.h" 33 #include "llvm/Support/Casting.h" 34 #include "llvm/Support/CommandLine.h" 35 #include <algorithm> 36 #include <deque> 37 #include <optional> 38 39 cl::opt<std::string> TFIR2NativeModelPath( 40 "ml-inliner-ir2native-model", cl::Hidden, 41 cl::desc("Path to saved model evaluating native size from IR.")); 42 43 #define DEBUG_TYPE "inline-size-estimator" 44 namespace { 45 unsigned getMaxInstructionID() { 46 #define LAST_OTHER_INST(NR) return NR; 47 #include "llvm/IR/Instruction.def" 48 } 49 50 class IRToNativeSizeLearning { 51 public: 52 enum class NamedFeatureIndex : size_t { 53 InitialSize, 54 Blocks, 55 Calls, 56 IsLocal, 57 IsLinkOnceODR, 58 IsLinkOnce, 59 Loops, 60 MaxLoopDepth, 61 MaxDomTreeLevel, 62 63 NumNamedFeatures 64 }; 65 static const size_t NumNamedFeatures = 66 static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); 67 struct FunctionFeatures { 68 static const size_t FeatureCount; 69 70 std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; 71 std::vector<int32_t> InstructionHistogram; 72 std::vector<int32_t> InstructionPairHistogram; 73 74 void fillTensor(int32_t *Ptr) const; 75 int32_t &operator[](NamedFeatureIndex Pos) { 76 return NamedFeatures[static_cast<size_t>(Pos)]; 77 } 78 }; 79 IRToNativeSizeLearning() = default; 80 81 static FunctionFeatures getFunctionFeatures(Function &F, 82 FunctionAnalysisManager &FAM); 83 }; 84 85 // This is a point in time - we determined including these pairs of 86 // consecutive instructions (in the IR layout available at inline time) as 87 // features improves the model performance. We want to move away from manual 88 // feature selection. 89 // The array is given in opcode pairs rather than labels because 1) labels 90 // weren't readily available, and 2) the successions were hand - extracted. 91 // 92 // This array must be sorted. 93 static const std::array<std::pair<size_t, size_t>, 137> 94 ImportantInstructionSuccessions{ 95 {{1, 1}, {1, 4}, {1, 5}, {1, 7}, {1, 8}, {1, 9}, {1, 11}, 96 {1, 12}, {1, 13}, {1, 14}, {1, 18}, {1, 20}, {1, 22}, {1, 24}, 97 {1, 25}, {1, 26}, {1, 27}, {1, 28}, {1, 29}, {1, 30}, {1, 31}, 98 {1, 32}, {1, 33}, {1, 34}, {1, 39}, {1, 40}, {1, 42}, {1, 45}, 99 {2, 1}, {2, 2}, {2, 13}, {2, 28}, {2, 29}, {2, 32}, {2, 33}, 100 {2, 34}, {2, 38}, {2, 48}, {2, 49}, {2, 53}, {2, 55}, {2, 56}, 101 {13, 2}, {13, 13}, {13, 26}, {13, 33}, {13, 34}, {13, 56}, {15, 27}, 102 {28, 2}, {28, 48}, {28, 53}, {29, 2}, {29, 33}, {29, 56}, {31, 31}, 103 {31, 33}, {31, 34}, {31, 49}, {32, 1}, {32, 2}, {32, 13}, {32, 15}, 104 {32, 28}, {32, 29}, {32, 32}, {32, 33}, {32, 34}, {32, 39}, {32, 40}, 105 {32, 48}, {32, 49}, {32, 53}, {32, 56}, {33, 1}, {33, 2}, {33, 32}, 106 {33, 33}, {33, 34}, {33, 49}, {33, 53}, {33, 56}, {34, 1}, {34, 2}, 107 {34, 32}, {34, 33}, {34, 34}, {34, 49}, {34, 53}, {34, 56}, {38, 34}, 108 {39, 57}, {40, 34}, {47, 15}, {47, 49}, {48, 2}, {48, 34}, {48, 56}, 109 {49, 1}, {49, 2}, {49, 28}, {49, 32}, {49, 33}, {49, 34}, {49, 39}, 110 {49, 49}, {49, 56}, {53, 1}, {53, 2}, {53, 28}, {53, 34}, {53, 53}, 111 {53, 57}, {55, 1}, {55, 28}, {55, 34}, {55, 53}, {55, 55}, {55, 56}, 112 {56, 1}, {56, 2}, {56, 7}, {56, 13}, {56, 32}, {56, 33}, {56, 34}, 113 {56, 49}, {56, 53}, {56, 56}, {56, 64}, {57, 34}, {57, 56}, {57, 57}, 114 {64, 1}, {64, 64}, {65, 1}, {65, 65}}}; 115 116 // We have: 9 calculated features (the features here); 1 feature for each 117 // instruction opcode; and 1 feature for each manually-identified sequence. 118 // For the latter 2, we build a histogram: we count the number of 119 // occurrences of each instruction opcode or succession of instructions, 120 // respectively. 121 // Note that instruction opcodes start from 1. For convenience, we also have an 122 // always 0 feature for the '0' opcode, hence the extra 1. 123 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = 124 ImportantInstructionSuccessions.size() + getMaxInstructionID() + 1 + 125 IRToNativeSizeLearning::NumNamedFeatures; 126 127 size_t getSize(Function &F, TargetTransformInfo &TTI) { 128 size_t Ret = 0; 129 for (const auto &BB : F) 130 for (const auto &I : BB) 131 Ret += *(TTI.getInstructionCost( 132 &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize).getValue()); 133 return Ret; 134 } 135 136 size_t getSize(Function &F, FunctionAnalysisManager &FAM) { 137 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 138 return getSize(F, TTI); 139 } 140 141 unsigned getMaxDominatorTreeDepth(const Function &F, 142 const DominatorTree &Tree) { 143 unsigned Ret = 0; 144 for (const auto &BB : F) 145 if (const auto *TN = Tree.getNode(&BB)) 146 Ret = std::max(Ret, TN->getLevel()); 147 return Ret; 148 } 149 } // namespace 150 151 IRToNativeSizeLearning::FunctionFeatures 152 IRToNativeSizeLearning::getFunctionFeatures(Function &F, 153 FunctionAnalysisManager &FAM) { 154 assert(llvm::is_sorted(ImportantInstructionSuccessions) && 155 "expected function features are sorted"); 156 157 auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); 158 FunctionFeatures FF; 159 size_t InstrCount = getMaxInstructionID() + 1; 160 FF.InstructionHistogram.resize(InstrCount); 161 162 FF.InstructionPairHistogram.resize(ImportantInstructionSuccessions.size()); 163 164 int StartID = 0; 165 int LastID = StartID; 166 auto getPairIndex = [](size_t a, size_t b) { 167 auto I = llvm::find(ImportantInstructionSuccessions, std::make_pair(a, b)); 168 if (I == ImportantInstructionSuccessions.end()) 169 return -1; 170 return static_cast<int>( 171 std::distance(ImportantInstructionSuccessions.begin(), I)); 172 }; 173 174 // We don't want debug calls, because they'd just add noise. 175 for (const auto &BB : F) { 176 for (const auto &I : BB.instructionsWithoutDebug()) { 177 auto ID = I.getOpcode(); 178 179 ++FF.InstructionHistogram[ID]; 180 int PairIndex = getPairIndex(LastID, ID); 181 if (PairIndex >= 0) 182 ++FF.InstructionPairHistogram[PairIndex]; 183 LastID = ID; 184 if (isa<CallBase>(I)) 185 ++FF[NamedFeatureIndex::Calls]; 186 } 187 } 188 189 FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); 190 FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); 191 FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); 192 FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); 193 FF[NamedFeatureIndex::Blocks] = F.size(); 194 auto &LI = FAM.getResult<LoopAnalysis>(F); 195 FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); 196 for (auto &L : LI) 197 FF[NamedFeatureIndex::MaxLoopDepth] = 198 std::max(FF[NamedFeatureIndex::MaxLoopDepth], 199 static_cast<int32_t>(L->getLoopDepth())); 200 FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); 201 return FF; 202 } 203 204 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { 205 std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); 206 Ptr += NamedFeatures.size(); 207 std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); 208 Ptr += InstructionHistogram.size(); 209 std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), 210 Ptr); 211 } 212 213 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { 214 return !TFIR2NativeModelPath.empty(); 215 } 216 217 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { 218 if (!isEvaluatorRequested()) { 219 return; 220 } 221 std::vector<TensorSpec> InputSpecs{TensorSpec::createSpec<int32_t>( 222 "serving_default_input_1", 223 {1, static_cast<int64_t>( 224 IRToNativeSizeLearning::FunctionFeatures::FeatureCount)})}; 225 std::vector<TensorSpec> OutputSpecs{ 226 TensorSpec::createSpec<float>("StatefulPartitionedCall", {1})}; 227 Evaluator = std::make_unique<TFModelEvaluator>( 228 TFIR2NativeModelPath.getValue().c_str(), InputSpecs, OutputSpecs); 229 if (!Evaluator || !Evaluator->isValid()) { 230 Evaluator.reset(); 231 return; 232 } 233 } 234 235 InlineSizeEstimatorAnalysis::Result 236 InlineSizeEstimatorAnalysis::run(const Function &F, 237 FunctionAnalysisManager &FAM) { 238 if (!Evaluator) 239 return std::nullopt; 240 auto Features = IRToNativeSizeLearning::getFunctionFeatures( 241 const_cast<Function &>(F), FAM); 242 int32_t *V = Evaluator->getInput<int32_t>(0); 243 Features.fillTensor(V); 244 auto ER = Evaluator->evaluate(); 245 if (!ER) 246 return std::nullopt; 247 float Ret = *ER->getTensorValue<float>(0); 248 if (Ret < 0.0) 249 Ret = 0.0; 250 return static_cast<size_t>(Ret); 251 } 252 253 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} 254 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( 255 InlineSizeEstimatorAnalysis &&Other) 256 : Evaluator(std::move(Other.Evaluator)) {} 257 258 #else 259 namespace llvm { 260 class TFModelEvaluator {}; 261 } // namespace llvm 262 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() = default; 263 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( 264 InlineSizeEstimatorAnalysis &&) {} 265 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() = default; 266 InlineSizeEstimatorAnalysis::Result 267 InlineSizeEstimatorAnalysis::run(const Function &F, 268 FunctionAnalysisManager &FAM) { 269 return std::nullopt; 270 } 271 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } 272 #endif 273 274 PreservedAnalyses 275 InlineSizeEstimatorAnalysisPrinterPass::run(Function &F, 276 FunctionAnalysisManager &AM) { 277 OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName() 278 << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n"; 279 return PreservedAnalyses::all(); 280 } 281