1 //===- CodeLayout.h - Code layout/placement algorithms ---------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Declares methods and data structures for code layout algorithms. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 15 #define LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 16 17 #include "llvm/ADT/ArrayRef.h" 18 19 #include <utility> 20 #include <vector> 21 22 namespace llvm::codelayout { 23 24 using EdgeT = std::pair<uint64_t, uint64_t>; 25 26 struct EdgeCount { 27 uint64_t src; 28 uint64_t dst; 29 uint64_t count; 30 }; 31 32 /// Find a layout of nodes (basic blocks) of a given CFG optimizing jump 33 /// locality and thus processor I-cache utilization. This is achieved via 34 /// increasing the number of fall-through jumps and co-locating frequently 35 /// executed nodes together. 36 /// The nodes are assumed to be indexed by integers from [0, |V|) so that the 37 /// current order is the identity permutation. 38 /// \p NodeSizes: The sizes of the nodes (in bytes). 39 /// \p NodeCounts: The execution counts of the nodes in the profile. 40 /// \p EdgeCounts: The execution counts of every edge (jump) in the profile. The 41 /// map also defines the edges in CFG and should include 0-count edges. 42 /// \returns The best block order found. 43 std::vector<uint64_t> computeExtTspLayout(ArrayRef<uint64_t> NodeSizes, 44 ArrayRef<uint64_t> NodeCounts, 45 ArrayRef<EdgeCount> EdgeCounts); 46 47 /// Estimate the "quality" of a given node order in CFG. The higher the score, 48 /// the better the order is. The score is designed to reflect the locality of 49 /// the given order, which is anti-correlated with the number of I-cache misses 50 /// in a typical execution of the function. 51 double calcExtTspScore(ArrayRef<uint64_t> Order, ArrayRef<uint64_t> NodeSizes, 52 ArrayRef<uint64_t> NodeCounts, 53 ArrayRef<EdgeCount> EdgeCounts); 54 55 /// Estimate the "quality" of the current node order in CFG. 56 double calcExtTspScore(ArrayRef<uint64_t> NodeSizes, 57 ArrayRef<uint64_t> NodeCounts, 58 ArrayRef<EdgeCount> EdgeCounts); 59 60 /// Algorithm-specific params for Cache-Directed Sort. The values are tuned for 61 /// the best performance of large-scale front-end bound binaries. 62 struct CDSortConfig { 63 /// The size of the cache. 64 unsigned CacheEntries = 16; 65 /// The size of a line in the cache. 66 unsigned CacheSize = 2048; 67 /// The maximum size of a chain to create. 68 unsigned MaxChainSize = 128; 69 /// The power exponent for the distance-based locality. 70 double DistancePower = 0.25; 71 /// The scale factor for the frequency-based locality. 72 double FrequencyScale = 0.25; 73 }; 74 75 /// Apply a Cache-Directed Sort for functions represented by a call graph. 76 /// The placement is done by optimizing the call locality by co-locating 77 /// frequently executed functions. 78 /// \p FuncSizes: The sizes of the nodes (in bytes). 79 /// \p FuncCounts: The execution counts of the nodes in the profile. 80 /// \p CallCounts: The execution counts of every edge (jump) in the profile. The 81 /// map also defines the edges in CFG and should include 0-count edges. 82 /// \p CallOffsets: The offsets of the calls from their source nodes. 83 /// \returns The best function order found. 84 std::vector<uint64_t> computeCacheDirectedLayout( 85 ArrayRef<uint64_t> FuncSizes, ArrayRef<uint64_t> FuncCounts, 86 ArrayRef<EdgeCount> CallCounts, ArrayRef<uint64_t> CallOffsets); 87 88 /// Apply a Cache-Directed Sort with a custom config. 89 std::vector<uint64_t> computeCacheDirectedLayout( 90 const CDSortConfig &Config, ArrayRef<uint64_t> FuncSizes, 91 ArrayRef<uint64_t> FuncCounts, ArrayRef<EdgeCount> CallCounts, 92 ArrayRef<uint64_t> CallOffsets); 93 94 } // namespace llvm::codelayout 95 96 #endif // LLVM_TRANSFORMS_UTILS_CODELAYOUT_H 97