1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This implements feature and label extraction for offline supervised learning 11 // of a IR to native size model. 12 // 13 //===----------------------------------------------------------------------===// 14 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h" 15 16 #ifdef LLVM_HAVE_TF_API 17 #include "llvm/Analysis/Utils/TFUtils.h" 18 #endif 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/TargetLibraryInfo.h" 21 #include "llvm/Analysis/TargetTransformInfo.h" 22 #include "llvm/IR/BasicBlock.h" 23 #include "llvm/IR/Dominators.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/Instructions.h" 26 #include "llvm/IR/PassManager.h" 27 #include "llvm/MC/MCAsmLayout.h" 28 #include "llvm/Support/Casting.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/raw_ostream.h" 31 32 #include <algorithm> 33 #include <deque> 34 35 using namespace llvm; 36 37 AnalysisKey InlineSizeEstimatorAnalysis::Key; 38 39 #define DEBUG_TYPE "inline-size-estimator" 40 41 #ifdef LLVM_HAVE_TF_API 42 cl::opt<std::string> TFIR2NativeModelPath( 43 "ml-inliner-ir2native-model", cl::Hidden, 44 cl::desc("Path to saved model evaluating native size from IR.")); 45 46 namespace { 47 unsigned getMaxInstructionID() { 48 #define LAST_OTHER_INST(NR) return NR; 49 #include "llvm/IR/Instruction.def" 50 } 51 52 class IRToNativeSizeLearning { 53 public: 54 enum class NamedFeatureIndex : size_t { 55 InitialSize, 56 Blocks, 57 Calls, 58 IsLocal, 59 IsLinkOnceODR, 60 IsLinkOnce, 61 Loops, 62 MaxLoopDepth, 63 MaxDomTreeLevel, 64 65 NumNamedFeatures 66 }; 67 static const size_t NumNamedFeatures = 68 static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures); 69 struct FunctionFeatures { 70 static std::vector<std::pair<size_t, size_t>> 71 ImportantInstructionSuccessions; 72 static const size_t FeatureCount; 73 74 std::array<int32_t, NumNamedFeatures> NamedFeatures = {0}; 75 std::vector<int32_t> InstructionHistogram; 76 std::vector<int32_t> InstructionPairHistogram; 77 78 void fillTensor(int32_t *Ptr) const; 79 int32_t &operator[](NamedFeatureIndex Pos) { 80 return NamedFeatures[static_cast<size_t>(Pos)]; 81 } 82 }; 83 IRToNativeSizeLearning() = default; 84 85 static FunctionFeatures getFunctionFeatures(Function &F, 86 FunctionAnalysisManager &FAM); 87 88 private: 89 /// Sort once the feature tuples. 90 struct SortFeatureTuples { 91 bool IsSorted = false; 92 SortFeatureTuples() { 93 std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(), 94 FunctionFeatures::ImportantInstructionSuccessions.end()); 95 IsSorted = true; 96 } 97 }; 98 99 static llvm::ManagedStatic<SortFeatureTuples> TupleSorter; 100 101 static bool ensureSortedTuples() { return TupleSorter->IsSorted; } 102 }; 103 llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples> 104 IRToNativeSizeLearning::TupleSorter; 105 106 // This is a point in time - we determined including these pairs of 107 // consecutive instructions (in the IR layout available at inline time) as 108 // features improves the model performance. We want to move away from manual 109 // feature selection. 110 // The vector is given in opcode pairs rather than labels because 1) labels 111 // weren't readily available, and 2) the successions were hand - extracted 112 std::vector<std::pair<size_t, size_t>> 113 IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions = 114 {{1, 34}, {15, 27}, {53, 53}, {53, 34}, {1, 11}, {32, 2}, {2, 48}, 115 {28, 48}, {1, 45}, {49, 32}, {57, 56}, {55, 53}, {1, 28}, {57, 34}, 116 {1, 1}, {32, 28}, {32, 15}, {49, 28}, {53, 1}, {2, 53}, {48, 34}, 117 {28, 53}, {2, 32}, {1, 40}, {32, 48}, {29, 56}, {56, 32}, {55, 56}, 118 {48, 56}, {1, 31}, {33, 34}, {2, 28}, {1, 12}, {55, 1}, {31, 31}, 119 {65, 1}, {33, 56}, {32, 32}, {13, 13}, {1, 26}, {13, 26}, {2, 1}, 120 {1, 33}, {47, 49}, {64, 1}, {2, 38}, {34, 53}, {48, 2}, {55, 34}, 121 {34, 32}, {1, 5}, {56, 13}, {2, 2}, {2, 49}, {33, 2}, {49, 39}, 122 {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29}, 123 {47, 15}, {13, 34}, {2, 33}, {32, 49}, {49, 34}, {56, 33}, {1, 30}, 124 {33, 33}, {31, 33}, {2, 29}, {56, 7}, {32, 13}, {2, 55}, {56, 56}, 125 {2, 34}, {1, 42}, {34, 49}, {1, 20}, {32, 33}, {1, 25}, {53, 28}, 126 {1, 14}, {31, 49}, {28, 2}, {2, 13}, {2, 56}, {1, 32}, {56, 53}, 127 {65, 65}, {33, 53}, {64, 64}, {13, 2}, {34, 33}, {1, 4}, {49, 2}, 128 {1, 9}, {56, 1}, {33, 1}, {53, 57}, {32, 53}, {13, 56}, {32, 56}, 129 {55, 55}, {1, 18}, {49, 56}, {34, 34}, {1, 7}, {56, 64}, {32, 1}, 130 {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32}, 131 {32, 40}, {1, 29}, {53, 2}, {34, 1}, {32, 34}, {49, 49}, {1, 24}, 132 {40, 34}, {1, 13}, {38, 34}, {29, 2}, {34, 2}, {1, 39}, {1, 22}, 133 {1, 27}, {49, 1}, {1, 8}, {56, 2}}; 134 135 // We have: 9 calculated features (the features here); 1 feature for each 136 // instruction opcode; and 1 feature for each manually-identified sequence. 137 // For the latter 2, we build a histogram: we count the number of 138 // occurrences of each instruction opcode or succession of instructions, 139 // respectively. 140 // Note that instruction opcodes start from 1. For convenience, we also have an 141 // always 0 feature for the '0' opcode, hence the extra 1. 142 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount = 143 IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions 144 .size() + 145 getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures; 146 147 size_t getSize(Function &F, TargetTransformInfo &TTI) { 148 size_t Ret = 0; 149 for (auto &BB : F) 150 for (auto &I : BB) 151 Ret += TTI.getInstructionCost( 152 &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize); 153 return Ret; 154 } 155 156 size_t getSize(Function &F, FunctionAnalysisManager &FAM) { 157 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 158 return getSize(F, TTI); 159 } 160 161 unsigned getMaxDominatorTreeDepth(const Function &F, 162 const DominatorTree &Tree) { 163 unsigned Ret = 0; 164 for (auto &BB : F) 165 if (auto *TN = Tree.getNode(&BB)) 166 Ret = std::max(Ret, TN->getLevel()); 167 return Ret; 168 } 169 } // namespace 170 171 IRToNativeSizeLearning::FunctionFeatures 172 IRToNativeSizeLearning::getFunctionFeatures(Function &F, 173 FunctionAnalysisManager &FAM) { 174 assert(ensureSortedTuples() && "expected lazy initialization"); 175 176 auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F); 177 FunctionFeatures FF; 178 size_t InstrCount = getMaxInstructionID() + 1; 179 FF.InstructionHistogram.resize(InstrCount); 180 181 FF.InstructionPairHistogram.resize( 182 FunctionFeatures::ImportantInstructionSuccessions.size()); 183 184 auto StartID = 0; 185 auto LastID = StartID; 186 auto getPairIndex = [](size_t a, size_t b) { 187 auto I = 188 std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(), 189 FunctionFeatures::ImportantInstructionSuccessions.end(), 190 std::make_pair(a, b)); 191 if (I == FunctionFeatures::ImportantInstructionSuccessions.end()) 192 return -1; 193 return static_cast<int>(std::distance( 194 FunctionFeatures::ImportantInstructionSuccessions.begin(), I)); 195 }; 196 197 // We don't want debug calls, because they'd just add noise. 198 for (auto &BB : F) { 199 for (auto I = BB.instructionsWithoutDebug().begin(), 200 E = BB.instructionsWithoutDebug().end(); 201 I != E; ++I) { 202 auto ID = I->getOpcode(); 203 204 ++FF.InstructionHistogram[ID]; 205 int PairIndex = getPairIndex(LastID, ID); 206 if (PairIndex >= 0) 207 ++FF.InstructionPairHistogram[PairIndex]; 208 LastID = ID; 209 if (isa<CallBase>(*I)) 210 ++FF[NamedFeatureIndex::Calls]; 211 } 212 } 213 214 FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM); 215 FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage(); 216 FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage(); 217 FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage(); 218 FF[NamedFeatureIndex::Blocks] = 219 std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end()); 220 auto &LI = FAM.getResult<LoopAnalysis>(F); 221 FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end()); 222 for (auto &L : LI) 223 FF[NamedFeatureIndex::MaxLoopDepth] = 224 std::max(FF[NamedFeatureIndex::MaxLoopDepth], 225 static_cast<int32_t>(L->getLoopDepth())); 226 FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree); 227 return FF; 228 } 229 230 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const { 231 std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr); 232 Ptr += NamedFeatures.size(); 233 std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr); 234 Ptr += InstructionHistogram.size(); 235 std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(), 236 Ptr); 237 } 238 239 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { 240 return !TFIR2NativeModelPath.empty(); 241 } 242 243 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() { 244 if (!isEvaluatorRequested()) { 245 return; 246 } 247 std::vector<std::string> InputNames{"serving_default_input_1"}; 248 std::vector<std::string> OutputName{"StatefulPartitionedCall"}; 249 Evaluator = std::make_unique<TFModelEvaluator>( 250 TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName); 251 if (!Evaluator || !Evaluator->isValid()) { 252 Evaluator.reset(); 253 return; 254 } 255 static const std::vector<int64_t> Dim{ 256 1, static_cast<int64_t>( 257 IRToNativeSizeLearning::FunctionFeatures::FeatureCount)}; 258 259 Evaluator->initInput<int32_t>(0, Dim); 260 } 261 262 InlineSizeEstimatorAnalysis::Result 263 InlineSizeEstimatorAnalysis::run(const Function &F, 264 FunctionAnalysisManager &FAM) { 265 if (!Evaluator) 266 return None; 267 auto Features = IRToNativeSizeLearning::getFunctionFeatures( 268 const_cast<Function &>(F), FAM); 269 int32_t *V = Evaluator->getInput<int32_t>(0); 270 Features.fillTensor(V); 271 auto ER = Evaluator->evaluate(); 272 if (!ER) 273 return None; 274 float Ret = *ER->getTensorValue<float>(0); 275 if (Ret < 0.0) 276 Ret = 0.0; 277 return static_cast<size_t>(Ret); 278 } 279 280 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} 281 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis( 282 InlineSizeEstimatorAnalysis &&Other) 283 : Evaluator(std::move(Other.Evaluator)) {} 284 285 #else 286 namespace llvm { 287 class TFModelEvaluator {}; 288 } // namespace llvm 289 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {} 290 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis( 291 InlineSizeEstimatorAnalysis &&) {} 292 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {} 293 InlineSizeEstimatorAnalysis::Result 294 InlineSizeEstimatorAnalysis::run(const Function &F, 295 FunctionAnalysisManager &FAM) { 296 return None; 297 } 298 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; } 299 #endif