xref: /freebsd/contrib/llvm-project/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp (revision cfd6422a5217410fbd66f7a7a8a64d9d85e61229)
1 //===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements feature and label extraction for offline supervised learning
11 // of a IR to native size model.
12 //
13 //===----------------------------------------------------------------------===//
14 #include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
15 
16 #ifdef LLVM_HAVE_TF_API
17 #include "llvm/Analysis/Utils/TFUtils.h"
18 #endif
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/TargetTransformInfo.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Dominators.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/PassManager.h"
27 #include "llvm/MC/MCAsmLayout.h"
28 #include "llvm/Support/Casting.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/raw_ostream.h"
31 
32 #include <algorithm>
33 #include <deque>
34 
35 using namespace llvm;
36 
37 AnalysisKey InlineSizeEstimatorAnalysis::Key;
38 
39 #define DEBUG_TYPE "inline-size-estimator"
40 
41 #ifdef LLVM_HAVE_TF_API
42 cl::opt<std::string> TFIR2NativeModelPath(
43     "ml-inliner-ir2native-model", cl::Hidden,
44     cl::desc("Path to saved model evaluating native size from IR."));
45 
46 namespace {
47 unsigned getMaxInstructionID() {
48 #define LAST_OTHER_INST(NR) return NR;
49 #include "llvm/IR/Instruction.def"
50 }
51 
52 class IRToNativeSizeLearning {
53 public:
54   enum class NamedFeatureIndex : size_t {
55     InitialSize,
56     Blocks,
57     Calls,
58     IsLocal,
59     IsLinkOnceODR,
60     IsLinkOnce,
61     Loops,
62     MaxLoopDepth,
63     MaxDomTreeLevel,
64 
65     NumNamedFeatures
66   };
67   static const size_t NumNamedFeatures =
68       static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
69   struct FunctionFeatures {
70     static std::vector<std::pair<size_t, size_t>>
71         ImportantInstructionSuccessions;
72     static const size_t FeatureCount;
73 
74     std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
75     std::vector<int32_t> InstructionHistogram;
76     std::vector<int32_t> InstructionPairHistogram;
77 
78     void fillTensor(int32_t *Ptr) const;
79     int32_t &operator[](NamedFeatureIndex Pos) {
80       return NamedFeatures[static_cast<size_t>(Pos)];
81     }
82   };
83   IRToNativeSizeLearning() = default;
84 
85   static FunctionFeatures getFunctionFeatures(Function &F,
86                                               FunctionAnalysisManager &FAM);
87 
88 private:
89   /// Sort once the feature tuples.
90   struct SortFeatureTuples {
91     bool IsSorted = false;
92     SortFeatureTuples() {
93       std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
94                 FunctionFeatures::ImportantInstructionSuccessions.end());
95       IsSorted = true;
96     }
97   };
98 
99   static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
100 
101   static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
102 };
103 llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
104     IRToNativeSizeLearning::TupleSorter;
105 
106 // This is a point in time - we determined including these pairs of
107 // consecutive instructions (in the IR layout available at inline time) as
108 // features improves the model performance. We want to move away from manual
109 // feature selection.
110 // The vector is given in opcode pairs rather than labels because 1) labels
111 // weren't readily available, and 2) the successions were hand - extracted
112 std::vector<std::pair<size_t, size_t>>
113     IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
114         {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
115          {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
116          {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
117          {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
118          {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
119          {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
120          {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
121          {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
122          {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
123          {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
124          {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
125          {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
126          {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
127          {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
128          {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
129          {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
130          {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
131          {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
132          {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
133          {1, 27},  {49, 1},  {1, 8},   {56, 2}};
134 
135 // We have: 9 calculated features (the features here); 1 feature for each
136 // instruction opcode; and 1 feature for each manually-identified sequence.
137 // For the latter 2, we build a histogram: we count the number of
138 // occurrences of each instruction opcode or succession of instructions,
139 // respectively.
140 // Note that instruction opcodes start from 1. For convenience, we also have an
141 // always 0 feature for the '0' opcode, hence the extra 1.
142 const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
143     IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
144         .size() +
145     getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
146 
147 size_t getSize(Function &F, TargetTransformInfo &TTI) {
148   size_t Ret = 0;
149   for (auto &BB : F)
150     for (auto &I : BB)
151       Ret += TTI.getInstructionCost(
152           &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
153   return Ret;
154 }
155 
156 size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
157   auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
158   return getSize(F, TTI);
159 }
160 
161 unsigned getMaxDominatorTreeDepth(const Function &F,
162                                   const DominatorTree &Tree) {
163   unsigned Ret = 0;
164   for (auto &BB : F)
165     if (auto *TN = Tree.getNode(&BB))
166       Ret = std::max(Ret, TN->getLevel());
167   return Ret;
168 }
169 } // namespace
170 
171 IRToNativeSizeLearning::FunctionFeatures
172 IRToNativeSizeLearning::getFunctionFeatures(Function &F,
173                                             FunctionAnalysisManager &FAM) {
174   assert(ensureSortedTuples() && "expected lazy initialization");
175 
176   auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
177   FunctionFeatures FF;
178   size_t InstrCount = getMaxInstructionID() + 1;
179   FF.InstructionHistogram.resize(InstrCount);
180 
181   FF.InstructionPairHistogram.resize(
182       FunctionFeatures::ImportantInstructionSuccessions.size());
183 
184   auto StartID = 0;
185   auto LastID = StartID;
186   auto getPairIndex = [](size_t a, size_t b) {
187     auto I =
188         std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
189                   FunctionFeatures::ImportantInstructionSuccessions.end(),
190                   std::make_pair(a, b));
191     if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
192       return -1;
193     return static_cast<int>(std::distance(
194         FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
195   };
196 
197   // We don't want debug calls, because they'd just add noise.
198   for (auto &BB : F) {
199     for (auto I = BB.instructionsWithoutDebug().begin(),
200               E = BB.instructionsWithoutDebug().end();
201          I != E; ++I) {
202       auto ID = I->getOpcode();
203 
204       ++FF.InstructionHistogram[ID];
205       int PairIndex = getPairIndex(LastID, ID);
206       if (PairIndex >= 0)
207         ++FF.InstructionPairHistogram[PairIndex];
208       LastID = ID;
209       if (isa<CallBase>(*I))
210         ++FF[NamedFeatureIndex::Calls];
211     }
212   }
213 
214   FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
215   FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
216   FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
217   FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
218   FF[NamedFeatureIndex::Blocks] =
219       std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
220   auto &LI = FAM.getResult<LoopAnalysis>(F);
221   FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
222   for (auto &L : LI)
223     FF[NamedFeatureIndex::MaxLoopDepth] =
224         std::max(FF[NamedFeatureIndex::MaxLoopDepth],
225                  static_cast<int32_t>(L->getLoopDepth()));
226   FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
227   return FF;
228 }
229 
230 void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
231   std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
232   Ptr += NamedFeatures.size();
233   std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
234   Ptr += InstructionHistogram.size();
235   std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
236             Ptr);
237 }
238 
239 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
240   return !TFIR2NativeModelPath.empty();
241 }
242 
243 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
244   if (!isEvaluatorRequested()) {
245     return;
246   }
247   std::vector<std::string> InputNames{"serving_default_input_1"};
248   std::vector<std::string> OutputName{"StatefulPartitionedCall"};
249   Evaluator = std::make_unique<TFModelEvaluator>(
250       TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
251   if (!Evaluator || !Evaluator->isValid()) {
252     Evaluator.reset();
253     return;
254   }
255   static const std::vector<int64_t> Dim{
256       1, static_cast<int64_t>(
257              IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
258 
259   Evaluator->initInput<int32_t>(0, Dim);
260 }
261 
262 InlineSizeEstimatorAnalysis::Result
263 InlineSizeEstimatorAnalysis::run(const Function &F,
264                                  FunctionAnalysisManager &FAM) {
265   if (!Evaluator)
266     return None;
267   auto Features = IRToNativeSizeLearning::getFunctionFeatures(
268       const_cast<Function &>(F), FAM);
269   int32_t *V = Evaluator->getInput<int32_t>(0);
270   Features.fillTensor(V);
271   auto ER = Evaluator->evaluate();
272   if (!ER)
273     return None;
274   float Ret = *ER->getTensorValue<float>(0);
275   if (Ret < 0.0)
276     Ret = 0.0;
277   return static_cast<size_t>(Ret);
278 }
279 
280 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
281 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
282     InlineSizeEstimatorAnalysis &&Other)
283     : Evaluator(std::move(Other.Evaluator)) {}
284 
285 #else
286 namespace llvm {
287 class TFModelEvaluator {};
288 } // namespace llvm
289 InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
290 InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
291     InlineSizeEstimatorAnalysis &&) {}
292 InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
293 InlineSizeEstimatorAnalysis::Result
294 InlineSizeEstimatorAnalysis::run(const Function &F,
295                                  FunctionAnalysisManager &FAM) {
296   return None;
297 }
298 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
299 #endif