xref: /freebsd/contrib/llvm-project/llvm/lib/CodeGen/BasicBlockSections.cpp (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 //===-- BasicBlockSections.cpp ---=========--------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // BasicBlockSections implementation.
10 //
11 // The purpose of this pass is to assign sections to basic blocks when
12 // -fbasic-block-sections= option is used. Further, with profile information
13 // only the subset of basic blocks with profiles are placed in separate sections
14 // and the rest are grouped in a cold section. The exception handling blocks are
15 // treated specially to ensure they are all in one seciton.
16 //
17 // Basic Block Sections
18 // ====================
19 //
20 // With option, -fbasic-block-sections=list, every function may be split into
21 // clusters of basic blocks. Every cluster will be emitted into a separate
22 // section with its basic blocks sequenced in the given order. To get the
23 // optimized performance, the clusters must form an optimal BB layout for the
24 // function. Every cluster's section is labeled with a symbol to allow the
25 // linker to reorder the sections in any arbitrary sequence. A global order of
26 // these sections would encapsulate the function layout.
27 //
28 // There are a couple of challenges to be addressed:
29 //
30 // 1. The last basic block of every cluster should not have any implicit
31 //    fallthrough to its next basic block, as it can be reordered by the linker.
32 //    The compiler should make these fallthroughs explicit by adding
33 //    unconditional jumps..
34 //
35 // 2. All inter-cluster branch targets would now need to be resolved by the
36 //    linker as they cannot be calculated during compile time. This is done
37 //    using static relocations. Further, the compiler tries to use short branch
38 //    instructions on some ISAs for small branch offsets. This is not possible
39 //    for inter-cluster branches as the offset is not determined at compile
40 //    time, and therefore, long branch instructions have to be used for those.
41 //
42 // 3. Debug Information (DebugInfo) and Call Frame Information (CFI) emission
43 //    needs special handling with basic block sections. DebugInfo needs to be
44 //    emitted with more relocations as basic block sections can break a
45 //    function into potentially several disjoint pieces, and CFI needs to be
46 //    emitted per cluster. This also bloats the object file and binary sizes.
47 //
48 // Basic Block Labels
49 // ==================
50 //
51 // With -fbasic-block-sections=labels, we emit the offsets of BB addresses of
52 // every function into the .llvm_bb_addr_map section. Along with the function
53 // symbols, this allows for mapping of virtual addresses in PMU profiles back to
54 // the corresponding basic blocks. This logic is implemented in AsmPrinter. This
55 // pass only assigns the BBSectionType of every function to ``labels``.
56 //
57 //===----------------------------------------------------------------------===//
58 
59 #include "llvm/ADT/Optional.h"
60 #include "llvm/ADT/SmallSet.h"
61 #include "llvm/ADT/SmallVector.h"
62 #include "llvm/ADT/StringMap.h"
63 #include "llvm/ADT/StringRef.h"
64 #include "llvm/CodeGen/BasicBlockSectionUtils.h"
65 #include "llvm/CodeGen/MachineFunction.h"
66 #include "llvm/CodeGen/MachineFunctionPass.h"
67 #include "llvm/CodeGen/MachineModuleInfo.h"
68 #include "llvm/CodeGen/Passes.h"
69 #include "llvm/CodeGen/TargetInstrInfo.h"
70 #include "llvm/InitializePasses.h"
71 #include "llvm/Support/Error.h"
72 #include "llvm/Support/LineIterator.h"
73 #include "llvm/Support/MemoryBuffer.h"
74 #include "llvm/Target/TargetMachine.h"
75 
76 using llvm::SmallSet;
77 using llvm::SmallVector;
78 using llvm::StringMap;
79 using llvm::StringRef;
80 using namespace llvm;
81 
82 // Placing the cold clusters in a separate section mitigates against poor
83 // profiles and allows optimizations such as hugepage mapping to be applied at a
84 // section granularity. Defaults to ".text.split." which is recognized by lld
85 // via the `-z keep-text-section-prefix` flag.
86 cl::opt<std::string> llvm::BBSectionsColdTextPrefix(
87     "bbsections-cold-text-prefix",
88     cl::desc("The text prefix to use for cold basic block clusters"),
89     cl::init(".text.split."), cl::Hidden);
90 
91 cl::opt<bool> BBSectionsDetectSourceDrift(
92     "bbsections-detect-source-drift",
93     cl::desc("This checks if there is a fdo instr. profile hash "
94              "mismatch for this function"),
95     cl::init(true), cl::Hidden);
96 
97 namespace {
98 
99 // This struct represents the cluster information for a machine basic block.
100 struct BBClusterInfo {
101   // MachineBasicBlock ID.
102   unsigned MBBNumber;
103   // Cluster ID this basic block belongs to.
104   unsigned ClusterID;
105   // Position of basic block within the cluster.
106   unsigned PositionInCluster;
107 };
108 
109 using ProgramBBClusterInfoMapTy = StringMap<SmallVector<BBClusterInfo, 4>>;
110 
111 class BasicBlockSections : public MachineFunctionPass {
112 public:
113   static char ID;
114 
115   // This contains the basic-block-sections profile.
116   const MemoryBuffer *MBuf = nullptr;
117 
118   // This encapsulates the BB cluster information for the whole program.
119   //
120   // For every function name, it contains the cluster information for (all or
121   // some of) its basic blocks. The cluster information for every basic block
122   // includes its cluster ID along with the position of the basic block in that
123   // cluster.
124   ProgramBBClusterInfoMapTy ProgramBBClusterInfo;
125 
126   // Some functions have alias names. We use this map to find the main alias
127   // name for which we have mapping in ProgramBBClusterInfo.
128   StringMap<StringRef> FuncAliasMap;
129 
130   BasicBlockSections(const MemoryBuffer *Buf)
131       : MachineFunctionPass(ID), MBuf(Buf) {
132     initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
133   };
134 
135   BasicBlockSections() : MachineFunctionPass(ID) {
136     initializeBasicBlockSectionsPass(*PassRegistry::getPassRegistry());
137   }
138 
139   StringRef getPassName() const override {
140     return "Basic Block Sections Analysis";
141   }
142 
143   void getAnalysisUsage(AnalysisUsage &AU) const override;
144 
145   /// Read profiles of basic blocks if available here.
146   bool doInitialization(Module &M) override;
147 
148   /// Identify basic blocks that need separate sections and prepare to emit them
149   /// accordingly.
150   bool runOnMachineFunction(MachineFunction &MF) override;
151 };
152 
153 } // end anonymous namespace
154 
155 char BasicBlockSections::ID = 0;
156 INITIALIZE_PASS(BasicBlockSections, "bbsections-prepare",
157                 "Prepares for basic block sections, by splitting functions "
158                 "into clusters of basic blocks.",
159                 false, false)
160 
161 // This function updates and optimizes the branching instructions of every basic
162 // block in a given function to account for changes in the layout.
163 static void updateBranches(
164     MachineFunction &MF,
165     const SmallVector<MachineBasicBlock *, 4> &PreLayoutFallThroughs) {
166   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
167   SmallVector<MachineOperand, 4> Cond;
168   for (auto &MBB : MF) {
169     auto NextMBBI = std::next(MBB.getIterator());
170     auto *FTMBB = PreLayoutFallThroughs[MBB.getNumber()];
171     // If this block had a fallthrough before we need an explicit unconditional
172     // branch to that block if either
173     //     1- the block ends a section, which means its next block may be
174     //        reorderd by the linker, or
175     //     2- the fallthrough block is not adjacent to the block in the new
176     //        order.
177     if (FTMBB && (MBB.isEndSection() || &*NextMBBI != FTMBB))
178       TII->insertUnconditionalBranch(MBB, FTMBB, MBB.findBranchDebugLoc());
179 
180     // We do not optimize branches for machine basic blocks ending sections, as
181     // their adjacent block might be reordered by the linker.
182     if (MBB.isEndSection())
183       continue;
184 
185     // It might be possible to optimize branches by flipping the branch
186     // condition.
187     Cond.clear();
188     MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
189     if (TII->analyzeBranch(MBB, TBB, FBB, Cond))
190       continue;
191     MBB.updateTerminator(FTMBB);
192   }
193 }
194 
195 // This function provides the BBCluster information associated with a function.
196 // Returns true if a valid association exists and false otherwise.
197 static bool getBBClusterInfoForFunction(
198     const MachineFunction &MF, const StringMap<StringRef> FuncAliasMap,
199     const ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
200     std::vector<Optional<BBClusterInfo>> &V) {
201   // Get the main alias name for the function.
202   auto FuncName = MF.getName();
203   auto R = FuncAliasMap.find(FuncName);
204   StringRef AliasName = R == FuncAliasMap.end() ? FuncName : R->second;
205 
206   // Find the assoicated cluster information.
207   auto P = ProgramBBClusterInfo.find(AliasName);
208   if (P == ProgramBBClusterInfo.end())
209     return false;
210 
211   if (P->second.empty()) {
212     // This indicates that sections are desired for all basic blocks of this
213     // function. We clear the BBClusterInfo vector to denote this.
214     V.clear();
215     return true;
216   }
217 
218   V.resize(MF.getNumBlockIDs());
219   for (auto bbClusterInfo : P->second) {
220     // Bail out if the cluster information contains invalid MBB numbers.
221     if (bbClusterInfo.MBBNumber >= MF.getNumBlockIDs())
222       return false;
223     V[bbClusterInfo.MBBNumber] = bbClusterInfo;
224   }
225   return true;
226 }
227 
228 // This function sorts basic blocks according to the cluster's information.
229 // All explicitly specified clusters of basic blocks will be ordered
230 // accordingly. All non-specified BBs go into a separate "Cold" section.
231 // Additionally, if exception handling landing pads end up in more than one
232 // clusters, they are moved into a single "Exception" section. Eventually,
233 // clusters are ordered in increasing order of their IDs, with the "Exception"
234 // and "Cold" succeeding all other clusters.
235 // FuncBBClusterInfo represent the cluster information for basic blocks. If this
236 // is empty, it means unique sections for all basic blocks in the function.
237 static void
238 assignSections(MachineFunction &MF,
239                const std::vector<Optional<BBClusterInfo>> &FuncBBClusterInfo) {
240   assert(MF.hasBBSections() && "BB Sections is not set for function.");
241   // This variable stores the section ID of the cluster containing eh_pads (if
242   // all eh_pads are one cluster). If more than one cluster contain eh_pads, we
243   // set it equal to ExceptionSectionID.
244   Optional<MBBSectionID> EHPadsSectionID;
245 
246   for (auto &MBB : MF) {
247     // With the 'all' option, every basic block is placed in a unique section.
248     // With the 'list' option, every basic block is placed in a section
249     // associated with its cluster, unless we want individual unique sections
250     // for every basic block in this function (if FuncBBClusterInfo is empty).
251     if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All ||
252         FuncBBClusterInfo.empty()) {
253       // If unique sections are desired for all basic blocks of the function, we
254       // set every basic block's section ID equal to its number (basic block
255       // id). This further ensures that basic blocks are ordered canonically.
256       MBB.setSectionID({static_cast<unsigned int>(MBB.getNumber())});
257     } else if (FuncBBClusterInfo[MBB.getNumber()].hasValue())
258       MBB.setSectionID(FuncBBClusterInfo[MBB.getNumber()]->ClusterID);
259     else {
260       // BB goes into the special cold section if it is not specified in the
261       // cluster info map.
262       MBB.setSectionID(MBBSectionID::ColdSectionID);
263     }
264 
265     if (MBB.isEHPad() && EHPadsSectionID != MBB.getSectionID() &&
266         EHPadsSectionID != MBBSectionID::ExceptionSectionID) {
267       // If we already have one cluster containing eh_pads, this must be updated
268       // to ExceptionSectionID. Otherwise, we set it equal to the current
269       // section ID.
270       EHPadsSectionID = EHPadsSectionID.hasValue()
271                             ? MBBSectionID::ExceptionSectionID
272                             : MBB.getSectionID();
273     }
274   }
275 
276   // If EHPads are in more than one section, this places all of them in the
277   // special exception section.
278   if (EHPadsSectionID == MBBSectionID::ExceptionSectionID)
279     for (auto &MBB : MF)
280       if (MBB.isEHPad())
281         MBB.setSectionID(EHPadsSectionID.getValue());
282 }
283 
284 void llvm::sortBasicBlocksAndUpdateBranches(
285     MachineFunction &MF, MachineBasicBlockComparator MBBCmp) {
286   SmallVector<MachineBasicBlock *, 4> PreLayoutFallThroughs(
287       MF.getNumBlockIDs());
288   for (auto &MBB : MF)
289     PreLayoutFallThroughs[MBB.getNumber()] = MBB.getFallThrough();
290 
291   MF.sort(MBBCmp);
292 
293   // Set IsBeginSection and IsEndSection according to the assigned section IDs.
294   MF.assignBeginEndSections();
295 
296   // After reordering basic blocks, we must update basic block branches to
297   // insert explicit fallthrough branches when required and optimize branches
298   // when possible.
299   updateBranches(MF, PreLayoutFallThroughs);
300 }
301 
302 // If the exception section begins with a landing pad, that landing pad will
303 // assume a zero offset (relative to @LPStart) in the LSDA. However, a value of
304 // zero implies "no landing pad." This function inserts a NOP just before the EH
305 // pad label to ensure a nonzero offset. Returns true if padding is not needed.
306 static bool avoidZeroOffsetLandingPad(MachineFunction &MF) {
307   for (auto &MBB : MF) {
308     if (MBB.isBeginSection() && MBB.isEHPad()) {
309       MachineBasicBlock::iterator MI = MBB.begin();
310       while (!MI->isEHLabel())
311         ++MI;
312       MCInst Nop = MF.getSubtarget().getInstrInfo()->getNop();
313       BuildMI(MBB, MI, DebugLoc(),
314               MF.getSubtarget().getInstrInfo()->get(Nop.getOpcode()));
315       return false;
316     }
317   }
318   return true;
319 }
320 
321 // This checks if the source of this function has drifted since this binary was
322 // profiled previously.  For now, we are piggy backing on what PGO does to
323 // detect this with instrumented profiles.  PGO emits an hash of the IR and
324 // checks if the hash has changed.  Advanced basic block layout is usually done
325 // on top of PGO optimized binaries and hence this check works well in practice.
326 static bool hasInstrProfHashMismatch(MachineFunction &MF) {
327   if (!BBSectionsDetectSourceDrift)
328     return false;
329 
330   const char MetadataName[] = "instr_prof_hash_mismatch";
331   auto *Existing = MF.getFunction().getMetadata(LLVMContext::MD_annotation);
332   if (Existing) {
333     MDTuple *Tuple = cast<MDTuple>(Existing);
334     for (auto &N : Tuple->operands())
335       if (cast<MDString>(N.get())->getString() == MetadataName)
336         return true;
337   }
338 
339   return false;
340 }
341 
342 bool BasicBlockSections::runOnMachineFunction(MachineFunction &MF) {
343   auto BBSectionsType = MF.getTarget().getBBSectionsType();
344   assert(BBSectionsType != BasicBlockSection::None &&
345          "BB Sections not enabled!");
346 
347   // Check for source drift.  If the source has changed since the profiles
348   // were obtained, optimizing basic blocks might be sub-optimal.
349   // This only applies to BasicBlockSection::List as it creates
350   // clusters of basic blocks using basic block ids. Source drift can
351   // invalidate these groupings leading to sub-optimal code generation with
352   // regards to performance.
353   if (BBSectionsType == BasicBlockSection::List &&
354       hasInstrProfHashMismatch(MF))
355     return true;
356 
357   // Renumber blocks before sorting them for basic block sections.  This is
358   // useful during sorting, basic blocks in the same section will retain the
359   // default order.  This renumbering should also be done for basic block
360   // labels to match the profiles with the correct blocks.
361   MF.RenumberBlocks();
362 
363   if (BBSectionsType == BasicBlockSection::Labels) {
364     MF.setBBSectionsType(BBSectionsType);
365     return true;
366   }
367 
368   std::vector<Optional<BBClusterInfo>> FuncBBClusterInfo;
369   if (BBSectionsType == BasicBlockSection::List &&
370       !getBBClusterInfoForFunction(MF, FuncAliasMap, ProgramBBClusterInfo,
371                                    FuncBBClusterInfo))
372     return true;
373   MF.setBBSectionsType(BBSectionsType);
374   assignSections(MF, FuncBBClusterInfo);
375 
376   // We make sure that the cluster including the entry basic block precedes all
377   // other clusters.
378   auto EntryBBSectionID = MF.front().getSectionID();
379 
380   // Helper function for ordering BB sections as follows:
381   //   * Entry section (section including the entry block).
382   //   * Regular sections (in increasing order of their Number).
383   //     ...
384   //   * Exception section
385   //   * Cold section
386   auto MBBSectionOrder = [EntryBBSectionID](const MBBSectionID &LHS,
387                                             const MBBSectionID &RHS) {
388     // We make sure that the section containing the entry block precedes all the
389     // other sections.
390     if (LHS == EntryBBSectionID || RHS == EntryBBSectionID)
391       return LHS == EntryBBSectionID;
392     return LHS.Type == RHS.Type ? LHS.Number < RHS.Number : LHS.Type < RHS.Type;
393   };
394 
395   // We sort all basic blocks to make sure the basic blocks of every cluster are
396   // contiguous and ordered accordingly. Furthermore, clusters are ordered in
397   // increasing order of their section IDs, with the exception and the
398   // cold section placed at the end of the function.
399   auto Comparator = [&](const MachineBasicBlock &X,
400                         const MachineBasicBlock &Y) {
401     auto XSectionID = X.getSectionID();
402     auto YSectionID = Y.getSectionID();
403     if (XSectionID != YSectionID)
404       return MBBSectionOrder(XSectionID, YSectionID);
405     // If the two basic block are in the same section, the order is decided by
406     // their position within the section.
407     if (XSectionID.Type == MBBSectionID::SectionType::Default)
408       return FuncBBClusterInfo[X.getNumber()]->PositionInCluster <
409              FuncBBClusterInfo[Y.getNumber()]->PositionInCluster;
410     return X.getNumber() < Y.getNumber();
411   };
412 
413   sortBasicBlocksAndUpdateBranches(MF, Comparator);
414   avoidZeroOffsetLandingPad(MF);
415   return true;
416 }
417 
418 // Basic Block Sections can be enabled for a subset of machine basic blocks.
419 // This is done by passing a file containing names of functions for which basic
420 // block sections are desired.  Additionally, machine basic block ids of the
421 // functions can also be specified for a finer granularity. Moreover, a cluster
422 // of basic blocks could be assigned to the same section.
423 // A file with basic block sections for all of function main and three blocks
424 // for function foo (of which 1 and 2 are placed in a cluster) looks like this:
425 // ----------------------------
426 // list.txt:
427 // !main
428 // !foo
429 // !!1 2
430 // !!4
431 static Error getBBClusterInfo(const MemoryBuffer *MBuf,
432                               ProgramBBClusterInfoMapTy &ProgramBBClusterInfo,
433                               StringMap<StringRef> &FuncAliasMap) {
434   assert(MBuf);
435   line_iterator LineIt(*MBuf, /*SkipBlanks=*/true, /*CommentMarker=*/'#');
436 
437   auto invalidProfileError = [&](auto Message) {
438     return make_error<StringError>(
439         Twine("Invalid profile " + MBuf->getBufferIdentifier() + " at line " +
440               Twine(LineIt.line_number()) + ": " + Message),
441         inconvertibleErrorCode());
442   };
443 
444   auto FI = ProgramBBClusterInfo.end();
445 
446   // Current cluster ID corresponding to this function.
447   unsigned CurrentCluster = 0;
448   // Current position in the current cluster.
449   unsigned CurrentPosition = 0;
450 
451   // Temporary set to ensure every basic block ID appears once in the clusters
452   // of a function.
453   SmallSet<unsigned, 4> FuncBBIDs;
454 
455   for (; !LineIt.is_at_eof(); ++LineIt) {
456     StringRef S(*LineIt);
457     if (S[0] == '@')
458       continue;
459     // Check for the leading "!"
460     if (!S.consume_front("!") || S.empty())
461       break;
462     // Check for second "!" which indicates a cluster of basic blocks.
463     if (S.consume_front("!")) {
464       if (FI == ProgramBBClusterInfo.end())
465         return invalidProfileError(
466             "Cluster list does not follow a function name specifier.");
467       SmallVector<StringRef, 4> BBIndexes;
468       S.split(BBIndexes, ' ');
469       // Reset current cluster position.
470       CurrentPosition = 0;
471       for (auto BBIndexStr : BBIndexes) {
472         unsigned long long BBIndex;
473         if (getAsUnsignedInteger(BBIndexStr, 10, BBIndex))
474           return invalidProfileError(Twine("Unsigned integer expected: '") +
475                                      BBIndexStr + "'.");
476         if (!FuncBBIDs.insert(BBIndex).second)
477           return invalidProfileError(Twine("Duplicate basic block id found '") +
478                                      BBIndexStr + "'.");
479         if (!BBIndex && CurrentPosition)
480           return invalidProfileError("Entry BB (0) does not begin a cluster.");
481 
482         FI->second.emplace_back(BBClusterInfo{
483             ((unsigned)BBIndex), CurrentCluster, CurrentPosition++});
484       }
485       CurrentCluster++;
486     } else { // This is a function name specifier.
487       // Function aliases are separated using '/'. We use the first function
488       // name for the cluster info mapping and delegate all other aliases to
489       // this one.
490       SmallVector<StringRef, 4> Aliases;
491       S.split(Aliases, '/');
492       for (size_t i = 1; i < Aliases.size(); ++i)
493         FuncAliasMap.try_emplace(Aliases[i], Aliases.front());
494 
495       // Prepare for parsing clusters of this function name.
496       // Start a new cluster map for this function name.
497       FI = ProgramBBClusterInfo.try_emplace(Aliases.front()).first;
498       CurrentCluster = 0;
499       FuncBBIDs.clear();
500     }
501   }
502   return Error::success();
503 }
504 
505 bool BasicBlockSections::doInitialization(Module &M) {
506   if (!MBuf)
507     return false;
508   if (auto Err = getBBClusterInfo(MBuf, ProgramBBClusterInfo, FuncAliasMap))
509     report_fatal_error(std::move(Err));
510   return false;
511 }
512 
513 void BasicBlockSections::getAnalysisUsage(AnalysisUsage &AU) const {
514   AU.setPreservesAll();
515   MachineFunctionPass::getAnalysisUsage(AU);
516 }
517 
518 MachineFunctionPass *
519 llvm::createBasicBlockSectionsPass(const MemoryBuffer *Buf) {
520   return new BasicBlockSections(Buf);
521 }
522