1 //===- UnwindInfoSection.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "UnwindInfoSection.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputSection.h" 13 #include "OutputSection.h" 14 #include "OutputSegment.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "Target.h" 19 20 #include "lld/Common/ErrorHandler.h" 21 #include "lld/Common/Memory.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/BinaryFormat/MachO.h" 25 #include "llvm/Support/Parallel.h" 26 27 #include <numeric> 28 29 using namespace llvm; 30 using namespace llvm::MachO; 31 using namespace llvm::support::endian; 32 using namespace lld; 33 using namespace lld::macho; 34 35 #define COMMON_ENCODINGS_MAX 127 36 #define COMPACT_ENCODINGS_MAX 256 37 38 #define SECOND_LEVEL_PAGE_BYTES 4096 39 #define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t)) 40 #define REGULAR_SECOND_LEVEL_ENTRIES_MAX \ 41 ((SECOND_LEVEL_PAGE_BYTES - \ 42 sizeof(unwind_info_regular_second_level_page_header)) / \ 43 sizeof(unwind_info_regular_second_level_entry)) 44 #define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ 45 ((SECOND_LEVEL_PAGE_BYTES - \ 46 sizeof(unwind_info_compressed_second_level_page_header)) / \ 47 sizeof(uint32_t)) 48 49 #define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 50 #define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ 51 UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) 52 53 // Compact Unwind format is a Mach-O evolution of DWARF Unwind that 54 // optimizes space and exception-time lookup. Most DWARF unwind 55 // entries can be replaced with Compact Unwind entries, but the ones 56 // that cannot are retained in DWARF form. 57 // 58 // This comment will address macro-level organization of the pre-link 59 // and post-link compact unwind tables. For micro-level organization 60 // pertaining to the bitfield layout of the 32-bit compact unwind 61 // entries, see libunwind/include/mach-o/compact_unwind_encoding.h 62 // 63 // Important clarifying factoids: 64 // 65 // * __LD,__compact_unwind is the compact unwind format for compiler 66 // output and linker input. It is never a final output. It could be 67 // an intermediate output with the `-r` option which retains relocs. 68 // 69 // * __TEXT,__unwind_info is the compact unwind format for final 70 // linker output. It is never an input. 71 // 72 // * __TEXT,__eh_frame is the DWARF format for both linker input and output. 73 // 74 // * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd 75 // level) by ascending address, and the pages are referenced by an 76 // index (1st level) in the section header. 77 // 78 // * Following the headers in __TEXT,__unwind_info, the bulk of the 79 // section contains a vector of compact unwind entries 80 // `{functionOffset, encoding}` sorted by ascending `functionOffset`. 81 // Adjacent entries with the same encoding can be folded to great 82 // advantage, achieving a 3-order-of-magnitude reduction in the 83 // number of entries. 84 // 85 // * The __TEXT,__unwind_info format can accommodate up to 127 unique 86 // encodings for the space-efficient compressed format. In practice, 87 // fewer than a dozen unique encodings are used by C++ programs of 88 // all sizes. Therefore, we don't even bother implementing the regular 89 // non-compressed format. Time will tell if anyone in the field ever 90 // overflows the 127-encodings limit. 91 // 92 // Refer to the definition of unwind_info_section_header in 93 // compact_unwind_encoding.h for an overview of the format we are encoding 94 // here. 95 96 // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410 97 // TODO(gkm): how do we align the 2nd-level pages? 98 99 // The offsets of various fields in the on-disk representation of each compact 100 // unwind entry. 101 struct CompactUnwindOffsets { 102 uint32_t functionAddress; 103 uint32_t functionLength; 104 uint32_t encoding; 105 uint32_t personality; 106 uint32_t lsda; 107 108 CompactUnwindOffsets(size_t wordSize) { 109 if (wordSize == 8) 110 init<uint64_t>(); 111 else { 112 assert(wordSize == 4); 113 init<uint32_t>(); 114 } 115 } 116 117 private: 118 template <class Ptr> void init() { 119 functionAddress = offsetof(Layout<Ptr>, functionAddress); 120 functionLength = offsetof(Layout<Ptr>, functionLength); 121 encoding = offsetof(Layout<Ptr>, encoding); 122 personality = offsetof(Layout<Ptr>, personality); 123 lsda = offsetof(Layout<Ptr>, lsda); 124 } 125 126 template <class Ptr> struct Layout { 127 Ptr functionAddress; 128 uint32_t functionLength; 129 compact_unwind_encoding_t encoding; 130 Ptr personality; 131 Ptr lsda; 132 }; 133 }; 134 135 // LLD's internal representation of a compact unwind entry. 136 struct CompactUnwindEntry { 137 uint64_t functionAddress; 138 uint32_t functionLength; 139 compact_unwind_encoding_t encoding; 140 Symbol *personality; 141 InputSection *lsda; 142 }; 143 144 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>; 145 146 struct SecondLevelPage { 147 uint32_t kind; 148 size_t entryIndex; 149 size_t entryCount; 150 size_t byteCount; 151 std::vector<compact_unwind_encoding_t> localEncodings; 152 EncodingMap localEncodingIndexes; 153 }; 154 155 // UnwindInfoSectionImpl allows us to avoid cluttering our header file with a 156 // lengthy definition of UnwindInfoSection. 157 class UnwindInfoSectionImpl final : public UnwindInfoSection { 158 public: 159 UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {} 160 uint64_t getSize() const override { return unwindInfoSize; } 161 void prepare() override; 162 void finalize() override; 163 void writeTo(uint8_t *buf) const override; 164 165 private: 166 void prepareRelocations(ConcatInputSection *); 167 void relocateCompactUnwind(std::vector<CompactUnwindEntry> &); 168 void encodePersonalities(); 169 Symbol *canonicalizePersonality(Symbol *); 170 171 uint64_t unwindInfoSize = 0; 172 std::vector<decltype(symbols)::value_type> symbolsVec; 173 CompactUnwindOffsets cuOffsets; 174 std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; 175 EncodingMap commonEncodingIndexes; 176 // The entries here will be in the same order as their originating symbols 177 // in symbolsVec. 178 std::vector<CompactUnwindEntry> cuEntries; 179 // Indices into the cuEntries vector. 180 std::vector<size_t> cuIndices; 181 std::vector<Symbol *> personalities; 182 SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> 183 personalityTable; 184 // Indices into cuEntries for CUEs with a non-null LSDA. 185 std::vector<size_t> entriesWithLsda; 186 // Map of cuEntries index to an index within the LSDA array. 187 DenseMap<size_t, uint32_t> lsdaIndex; 188 std::vector<SecondLevelPage> secondLevelPages; 189 uint64_t level2PagesOffset = 0; 190 }; 191 192 UnwindInfoSection::UnwindInfoSection() 193 : SyntheticSection(segment_names::text, section_names::unwindInfo) { 194 align = 4; 195 } 196 197 // Record function symbols that may need entries emitted in __unwind_info, which 198 // stores unwind data for address ranges. 199 // 200 // Note that if several adjacent functions have the same unwind encoding and 201 // personality function and no LSDA, they share one unwind entry. For this to 202 // work, functions without unwind info need explicit "no unwind info" unwind 203 // entries -- else the unwinder would think they have the unwind info of the 204 // closest function with unwind info right before in the image. Thus, we add 205 // function symbols for each unique address regardless of whether they have 206 // associated unwind info. 207 void UnwindInfoSection::addSymbol(const Defined *d) { 208 if (d->unwindEntry) 209 allEntriesAreOmitted = false; 210 // We don't yet know the final output address of this symbol, but we know that 211 // they are uniquely determined by a combination of the isec and value, so 212 // we use that as the key here. 213 auto p = symbols.insert({{d->isec, d->value}, d}); 214 // If we have multiple symbols at the same address, only one of them can have 215 // an associated unwind entry. 216 if (!p.second && d->unwindEntry) { 217 assert(!p.first->second->unwindEntry); 218 p.first->second = d; 219 } 220 } 221 222 void UnwindInfoSectionImpl::prepare() { 223 // This iteration needs to be deterministic, since prepareRelocations may add 224 // entries to the GOT. Hence the use of a MapVector for 225 // UnwindInfoSection::symbols. 226 for (const Defined *d : make_second_range(symbols)) 227 if (d->unwindEntry) { 228 if (d->unwindEntry->getName() == section_names::compactUnwind) { 229 prepareRelocations(d->unwindEntry); 230 } else { 231 // We don't have to add entries to the GOT here because FDEs have 232 // explicit GOT relocations, so Writer::scanRelocations() will add those 233 // GOT entries. However, we still need to canonicalize the personality 234 // pointers (like prepareRelocations() does for CU entries) in order 235 // to avoid overflowing the 3-personality limit. 236 FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; 237 fde.personality = canonicalizePersonality(fde.personality); 238 } 239 } 240 } 241 242 // Compact unwind relocations have different semantics, so we handle them in a 243 // separate code path from regular relocations. First, we do not wish to add 244 // rebase opcodes for __LD,__compact_unwind, because that section doesn't 245 // actually end up in the final binary. Second, personality pointers always 246 // reside in the GOT and must be treated specially. 247 void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { 248 assert(!isec->shouldOmitFromOutput() && 249 "__compact_unwind section should not be omitted"); 250 251 // FIXME: Make this skip relocations for CompactUnwindEntries that 252 // point to dead-stripped functions. That might save some amount of 253 // work. But since there are usually just few personality functions 254 // that are referenced from many places, at least some of them likely 255 // live, it wouldn't reduce number of got entries. 256 for (size_t i = 0; i < isec->relocs.size(); ++i) { 257 Reloc &r = isec->relocs[i]; 258 assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); 259 260 // Functions and LSDA entries always reside in the same object file as the 261 // compact unwind entries that references them, and thus appear as section 262 // relocs. There is no need to prepare them. We only prepare relocs for 263 // personality functions. 264 if (r.offset != cuOffsets.personality) 265 continue; 266 267 if (auto *s = r.referent.dyn_cast<Symbol *>()) { 268 // Personality functions are nearly always system-defined (e.g., 269 // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an 270 // application provides its own personality function, it might be 271 // referenced by an extern Defined symbol reloc, or a local section reloc. 272 if (auto *defined = dyn_cast<Defined>(s)) { 273 // XXX(vyng) This is a a special case for handling duplicate personality 274 // symbols. Note that LD64's behavior is a bit different and it is 275 // inconsistent with how symbol resolution usually work 276 // 277 // So we've decided not to follow it. Instead, simply pick the symbol 278 // with the same name from the symbol table to replace the local one. 279 // 280 // (See discussions/alternatives already considered on D107533) 281 if (!defined->isExternal()) 282 if (Symbol *sym = symtab->find(defined->getName())) 283 if (!sym->isLazy()) 284 r.referent = s = sym; 285 } 286 if (auto *undefined = dyn_cast<Undefined>(s)) { 287 treatUndefinedSymbol(*undefined, isec, r.offset); 288 // treatUndefinedSymbol() can replace s with a DylibSymbol; re-check. 289 if (isa<Undefined>(s)) 290 continue; 291 } 292 293 // Similar to canonicalizePersonality(), but we also register a GOT entry. 294 if (auto *defined = dyn_cast<Defined>(s)) { 295 // Check if we have created a synthetic symbol at the same address. 296 Symbol *&personality = 297 personalityTable[{defined->isec, defined->value}]; 298 if (personality == nullptr) { 299 personality = defined; 300 in.got->addEntry(defined); 301 } else if (personality != defined) { 302 r.referent = personality; 303 } 304 continue; 305 } 306 307 assert(isa<DylibSymbol>(s)); 308 in.got->addEntry(s); 309 continue; 310 } 311 312 if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 313 assert(!isCoalescedWeak(referentIsec)); 314 // Personality functions can be referenced via section relocations 315 // if they live in the same object file. Create placeholder synthetic 316 // symbols for them in the GOT. 317 Symbol *&s = personalityTable[{referentIsec, r.addend}]; 318 if (s == nullptr) { 319 // This runs after dead stripping, so the noDeadStrip argument does not 320 // matter. 321 s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec, 322 r.addend, /*size=*/0, /*isWeakDef=*/false, 323 /*isExternal=*/false, /*isPrivateExtern=*/false, 324 /*includeInSymtab=*/true, 325 /*isThumb=*/false, /*isReferencedDynamically=*/false, 326 /*noDeadStrip=*/false); 327 s->used = true; 328 in.got->addEntry(s); 329 } 330 r.referent = s; 331 r.addend = 0; 332 } 333 } 334 } 335 336 Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) { 337 if (auto *defined = dyn_cast_or_null<Defined>(personality)) { 338 // Check if we have created a synthetic symbol at the same address. 339 Symbol *&synth = personalityTable[{defined->isec, defined->value}]; 340 if (synth == nullptr) 341 synth = defined; 342 else if (synth != defined) 343 return synth; 344 } 345 return personality; 346 } 347 348 // We need to apply the relocations to the pre-link compact unwind section 349 // before converting it to post-link form. There should only be absolute 350 // relocations here: since we are not emitting the pre-link CU section, there 351 // is no source address to make a relative location meaningful. 352 void UnwindInfoSectionImpl::relocateCompactUnwind( 353 std::vector<CompactUnwindEntry> &cuEntries) { 354 parallelFor(0, symbolsVec.size(), [&](size_t i) { 355 CompactUnwindEntry &cu = cuEntries[i]; 356 const Defined *d = symbolsVec[i].second; 357 cu.functionAddress = d->getVA(); 358 if (!d->unwindEntry) 359 return; 360 361 // If we have DWARF unwind info, create a CU entry that points to it. 362 if (d->unwindEntry->getName() == section_names::ehFrame) { 363 cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff; 364 const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; 365 cu.functionLength = fde.funcLength; 366 cu.personality = fde.personality; 367 cu.lsda = fde.lsda; 368 return; 369 } 370 371 assert(d->unwindEntry->getName() == section_names::compactUnwind); 372 373 auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) - 374 target->wordSize; 375 cu.functionLength = 376 support::endian::read32le(buf + cuOffsets.functionLength); 377 cu.encoding = support::endian::read32le(buf + cuOffsets.encoding); 378 for (const Reloc &r : d->unwindEntry->relocs) { 379 if (r.offset == cuOffsets.personality) { 380 cu.personality = r.referent.get<Symbol *>(); 381 } else if (r.offset == cuOffsets.lsda) { 382 if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) 383 cu.lsda = cast<Defined>(referentSym)->isec; 384 else 385 cu.lsda = r.referent.get<InputSection *>(); 386 } 387 } 388 }); 389 } 390 391 // There should only be a handful of unique personality pointers, so we can 392 // encode them as 2-bit indices into a small array. 393 void UnwindInfoSectionImpl::encodePersonalities() { 394 for (size_t idx : cuIndices) { 395 CompactUnwindEntry &cu = cuEntries[idx]; 396 if (cu.personality == nullptr) 397 continue; 398 // Linear search is fast enough for a small array. 399 auto it = find(personalities, cu.personality); 400 uint32_t personalityIndex; // 1-based index 401 if (it != personalities.end()) { 402 personalityIndex = std::distance(personalities.begin(), it) + 1; 403 } else { 404 personalities.push_back(cu.personality); 405 personalityIndex = personalities.size(); 406 } 407 cu.encoding |= 408 personalityIndex << countTrailingZeros( 409 static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK)); 410 } 411 if (personalities.size() > 3) 412 error("too many personalities (" + Twine(personalities.size()) + 413 ") for compact unwind to encode"); 414 } 415 416 static bool canFoldEncoding(compact_unwind_encoding_t encoding) { 417 // From compact_unwind_encoding.h: 418 // UNWIND_X86_64_MODE_STACK_IND: 419 // A "frameless" (RBP not used as frame pointer) function large constant 420 // stack size. This case is like the previous, except the stack size is too 421 // large to encode in the compact unwind encoding. Instead it requires that 422 // the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact 423 // encoding contains the offset to the nnnnnnnn value in the function in 424 // UNWIND_X86_64_FRAMELESS_STACK_SIZE. 425 // Since this means the unwinder has to look at the `subq` in the function 426 // of the unwind info's unwind address, two functions that have identical 427 // unwind info can't be folded if it's using this encoding since both 428 // entries need unique addresses. 429 static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_MASK) == 430 static_cast<uint32_t>(UNWIND_X86_MODE_MASK), 431 ""); 432 static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_STACK_IND) == 433 static_cast<uint32_t>(UNWIND_X86_MODE_STACK_IND), 434 ""); 435 if ((target->cpuType == CPU_TYPE_X86_64 || target->cpuType == CPU_TYPE_X86) && 436 (encoding & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) { 437 // FIXME: Consider passing in the two function addresses and getting 438 // their two stack sizes off the `subq` and only returning false if they're 439 // actually different. 440 return false; 441 } 442 return true; 443 } 444 445 // Scan the __LD,__compact_unwind entries and compute the space needs of 446 // __TEXT,__unwind_info and __TEXT,__eh_frame. 447 void UnwindInfoSectionImpl::finalize() { 448 if (symbols.empty()) 449 return; 450 451 // At this point, the address space for __TEXT,__text has been 452 // assigned, so we can relocate the __LD,__compact_unwind entries 453 // into a temporary buffer. Relocation is necessary in order to sort 454 // the CU entries by function address. Sorting is necessary so that 455 // we can fold adjacent CU entries with identical encoding+personality 456 // and without any LSDA. Folding is necessary because it reduces the 457 // number of CU entries by as much as 3 orders of magnitude! 458 cuEntries.resize(symbols.size()); 459 // The "map" part of the symbols MapVector was only needed for deduplication 460 // in addSymbol(). Now that we are done adding, move the contents to a plain 461 // std::vector for indexed access. 462 symbolsVec = symbols.takeVector(); 463 relocateCompactUnwind(cuEntries); 464 465 // Rather than sort & fold the 32-byte entries directly, we create a 466 // vector of indices to entries and sort & fold that instead. 467 cuIndices.resize(cuEntries.size()); 468 std::iota(cuIndices.begin(), cuIndices.end(), 0); 469 llvm::sort(cuIndices, [&](size_t a, size_t b) { 470 return cuEntries[a].functionAddress < cuEntries[b].functionAddress; 471 }); 472 473 // Fold adjacent entries with matching encoding+personality and without LSDA 474 // We use three iterators on the same cuIndices to fold in-situ: 475 // (1) `foldBegin` is the first of a potential sequence of matching entries 476 // (2) `foldEnd` is the first non-matching entry after `foldBegin`. 477 // The semi-open interval [ foldBegin .. foldEnd ) contains a range 478 // entries that can be folded into a single entry and written to ... 479 // (3) `foldWrite` 480 auto foldWrite = cuIndices.begin(); 481 for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) { 482 auto foldEnd = foldBegin; 483 // Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from 484 // a base address. The base address is normally not contained directly in 485 // the LSDA, and in that case, the personality function treats the starting 486 // address of the function (which is computed by the unwinder) as the base 487 // address and interprets the LSDA accordingly. The unwinder computes the 488 // starting address of a function as the address associated with its CU 489 // entry. For this reason, we cannot fold adjacent entries if they have an 490 // LSDA, because folding would make the unwinder compute the wrong starting 491 // address for the functions with the folded entries, which in turn would 492 // cause the personality function to misinterpret the LSDA for those 493 // functions. In the very rare case where the base address is encoded 494 // directly in the LSDA, two functions at different addresses would 495 // necessarily have different LSDAs, so their CU entries would not have been 496 // folded anyway. 497 while (++foldEnd < cuIndices.end() && 498 cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding && 499 !cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda && 500 // If we've gotten to this point, we don't have an LSDA, which should 501 // also imply that we don't have a personality function, since in all 502 // likelihood a personality function needs the LSDA to do anything 503 // useful. It can be technically valid to have a personality function 504 // and no LSDA though (e.g. the C++ personality __gxx_personality_v0 505 // is just a no-op without LSDA), so we still check for personality 506 // function equivalence to handle that case. 507 cuEntries[*foldBegin].personality == 508 cuEntries[*foldEnd].personality && 509 canFoldEncoding(cuEntries[*foldEnd].encoding)) 510 ; 511 *foldWrite++ = *foldBegin; 512 foldBegin = foldEnd; 513 } 514 cuIndices.erase(foldWrite, cuIndices.end()); 515 516 encodePersonalities(); 517 518 // Count frequencies of the folded encodings 519 EncodingMap encodingFrequencies; 520 for (size_t idx : cuIndices) 521 encodingFrequencies[cuEntries[idx].encoding]++; 522 523 // Make a vector of encodings, sorted by descending frequency 524 for (const auto &frequency : encodingFrequencies) 525 commonEncodings.emplace_back(frequency); 526 llvm::sort(commonEncodings, 527 [](const std::pair<compact_unwind_encoding_t, size_t> &a, 528 const std::pair<compact_unwind_encoding_t, size_t> &b) { 529 if (a.second == b.second) 530 // When frequencies match, secondarily sort on encoding 531 // to maintain parity with validate-unwind-info.py 532 return a.first > b.first; 533 return a.second > b.second; 534 }); 535 536 // Truncate the vector to 127 elements. 537 // Common encoding indexes are limited to 0..126, while encoding 538 // indexes 127..255 are local to each second-level page 539 if (commonEncodings.size() > COMMON_ENCODINGS_MAX) 540 commonEncodings.resize(COMMON_ENCODINGS_MAX); 541 542 // Create a map from encoding to common-encoding-table index 543 for (size_t i = 0; i < commonEncodings.size(); i++) 544 commonEncodingIndexes[commonEncodings[i].first] = i; 545 546 // Split folded encodings into pages, where each page is limited by ... 547 // (a) 4 KiB capacity 548 // (b) 24-bit difference between first & final function address 549 // (c) 8-bit compact-encoding-table index, 550 // for which 0..126 references the global common-encodings table, 551 // and 127..255 references a local per-second-level-page table. 552 // First we try the compact format and determine how many entries fit. 553 // If more entries fit in the regular format, we use that. 554 for (size_t i = 0; i < cuIndices.size();) { 555 size_t idx = cuIndices[i]; 556 secondLevelPages.emplace_back(); 557 SecondLevelPage &page = secondLevelPages.back(); 558 page.entryIndex = i; 559 uint64_t functionAddressMax = 560 cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; 561 size_t n = commonEncodings.size(); 562 size_t wordsRemaining = 563 SECOND_LEVEL_PAGE_WORDS - 564 sizeof(unwind_info_compressed_second_level_page_header) / 565 sizeof(uint32_t); 566 while (wordsRemaining >= 1 && i < cuIndices.size()) { 567 idx = cuIndices[i]; 568 const CompactUnwindEntry *cuPtr = &cuEntries[idx]; 569 if (cuPtr->functionAddress >= functionAddressMax) { 570 break; 571 } else if (commonEncodingIndexes.count(cuPtr->encoding) || 572 page.localEncodingIndexes.count(cuPtr->encoding)) { 573 i++; 574 wordsRemaining--; 575 } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) { 576 page.localEncodings.emplace_back(cuPtr->encoding); 577 page.localEncodingIndexes[cuPtr->encoding] = n++; 578 i++; 579 wordsRemaining -= 2; 580 } else { 581 break; 582 } 583 } 584 page.entryCount = i - page.entryIndex; 585 586 // If this is not the final page, see if it's possible to fit more 587 // entries by using the regular format. This can happen when there 588 // are many unique encodings, and we we saturated the local 589 // encoding table early. 590 if (i < cuIndices.size() && 591 page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { 592 page.kind = UNWIND_SECOND_LEVEL_REGULAR; 593 page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, 594 cuIndices.size() - page.entryIndex); 595 i = page.entryIndex + page.entryCount; 596 } else { 597 page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; 598 } 599 } 600 601 for (size_t idx : cuIndices) { 602 lsdaIndex[idx] = entriesWithLsda.size(); 603 if (cuEntries[idx].lsda) 604 entriesWithLsda.push_back(idx); 605 } 606 607 // compute size of __TEXT,__unwind_info section 608 level2PagesOffset = sizeof(unwind_info_section_header) + 609 commonEncodings.size() * sizeof(uint32_t) + 610 personalities.size() * sizeof(uint32_t) + 611 // The extra second-level-page entry is for the sentinel 612 (secondLevelPages.size() + 1) * 613 sizeof(unwind_info_section_header_index_entry) + 614 entriesWithLsda.size() * 615 sizeof(unwind_info_section_header_lsda_index_entry); 616 unwindInfoSize = 617 level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES; 618 } 619 620 // All inputs are relocated and output addresses are known, so write! 621 622 void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { 623 assert(!cuIndices.empty() && "call only if there is unwind info"); 624 625 // section header 626 auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); 627 uip->version = 1; 628 uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); 629 uip->commonEncodingsArrayCount = commonEncodings.size(); 630 uip->personalityArraySectionOffset = 631 uip->commonEncodingsArraySectionOffset + 632 (uip->commonEncodingsArrayCount * sizeof(uint32_t)); 633 uip->personalityArrayCount = personalities.size(); 634 uip->indexSectionOffset = uip->personalityArraySectionOffset + 635 (uip->personalityArrayCount * sizeof(uint32_t)); 636 uip->indexCount = secondLevelPages.size() + 1; 637 638 // Common encodings 639 auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]); 640 for (const auto &encoding : commonEncodings) 641 *i32p++ = encoding.first; 642 643 // Personalities 644 for (const Symbol *personality : personalities) 645 *i32p++ = personality->getGotVA() - in.header->addr; 646 647 // Level-1 index 648 uint32_t lsdaOffset = 649 uip->indexSectionOffset + 650 uip->indexCount * sizeof(unwind_info_section_header_index_entry); 651 uint64_t l2PagesOffset = level2PagesOffset; 652 auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); 653 for (const SecondLevelPage &page : secondLevelPages) { 654 size_t idx = cuIndices[page.entryIndex]; 655 iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr; 656 iep->secondLevelPagesSectionOffset = l2PagesOffset; 657 iep->lsdaIndexArraySectionOffset = 658 lsdaOffset + lsdaIndex.lookup(idx) * 659 sizeof(unwind_info_section_header_lsda_index_entry); 660 iep++; 661 l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; 662 } 663 // Level-1 sentinel 664 const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()]; 665 iep->functionOffset = 666 cuEnd.functionAddress - in.header->addr + cuEnd.functionLength; 667 iep->secondLevelPagesSectionOffset = 0; 668 iep->lsdaIndexArraySectionOffset = 669 lsdaOffset + entriesWithLsda.size() * 670 sizeof(unwind_info_section_header_lsda_index_entry); 671 iep++; 672 673 // LSDAs 674 auto *lep = 675 reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep); 676 for (size_t idx : entriesWithLsda) { 677 const CompactUnwindEntry &cu = cuEntries[idx]; 678 lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr; 679 lep->functionOffset = cu.functionAddress - in.header->addr; 680 lep++; 681 } 682 683 // Level-2 pages 684 auto *pp = reinterpret_cast<uint32_t *>(lep); 685 for (const SecondLevelPage &page : secondLevelPages) { 686 if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { 687 uintptr_t functionAddressBase = 688 cuEntries[cuIndices[page.entryIndex]].functionAddress; 689 auto *p2p = 690 reinterpret_cast<unwind_info_compressed_second_level_page_header *>( 691 pp); 692 p2p->kind = page.kind; 693 p2p->entryPageOffset = 694 sizeof(unwind_info_compressed_second_level_page_header); 695 p2p->entryCount = page.entryCount; 696 p2p->encodingsPageOffset = 697 p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); 698 p2p->encodingsCount = page.localEncodings.size(); 699 auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); 700 for (size_t i = 0; i < page.entryCount; i++) { 701 const CompactUnwindEntry &cue = 702 cuEntries[cuIndices[page.entryIndex + i]]; 703 auto it = commonEncodingIndexes.find(cue.encoding); 704 if (it == commonEncodingIndexes.end()) 705 it = page.localEncodingIndexes.find(cue.encoding); 706 *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | 707 (cue.functionAddress - functionAddressBase); 708 } 709 if (!page.localEncodings.empty()) 710 memcpy(ep, page.localEncodings.data(), 711 page.localEncodings.size() * sizeof(uint32_t)); 712 } else { 713 auto *p2p = 714 reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp); 715 p2p->kind = page.kind; 716 p2p->entryPageOffset = 717 sizeof(unwind_info_regular_second_level_page_header); 718 p2p->entryCount = page.entryCount; 719 auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); 720 for (size_t i = 0; i < page.entryCount; i++) { 721 const CompactUnwindEntry &cue = 722 cuEntries[cuIndices[page.entryIndex + i]]; 723 *ep++ = cue.functionAddress; 724 *ep++ = cue.encoding; 725 } 726 } 727 pp += SECOND_LEVEL_PAGE_WORDS; 728 } 729 } 730 731 UnwindInfoSection *macho::makeUnwindInfoSection() { 732 return make<UnwindInfoSectionImpl>(); 733 } 734