1 //===- UnwindInfoSection.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "UnwindInfoSection.h" 10 #include "InputSection.h" 11 #include "OutputSection.h" 12 #include "OutputSegment.h" 13 #include "SymbolTable.h" 14 #include "Symbols.h" 15 #include "SyntheticSections.h" 16 #include "Target.h" 17 18 #include "lld/Common/ErrorHandler.h" 19 #include "lld/Common/Memory.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/BinaryFormat/MachO.h" 23 #include "llvm/Support/Parallel.h" 24 25 #include "mach-o/compact_unwind_encoding.h" 26 27 #include <numeric> 28 29 using namespace llvm; 30 using namespace llvm::MachO; 31 using namespace llvm::support::endian; 32 using namespace lld; 33 using namespace lld::macho; 34 35 #define COMMON_ENCODINGS_MAX 127 36 #define COMPACT_ENCODINGS_MAX 256 37 38 #define SECOND_LEVEL_PAGE_BYTES 4096 39 #define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t)) 40 #define REGULAR_SECOND_LEVEL_ENTRIES_MAX \ 41 ((SECOND_LEVEL_PAGE_BYTES - \ 42 sizeof(unwind_info_regular_second_level_page_header)) / \ 43 sizeof(unwind_info_regular_second_level_entry)) 44 #define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ 45 ((SECOND_LEVEL_PAGE_BYTES - \ 46 sizeof(unwind_info_compressed_second_level_page_header)) / \ 47 sizeof(uint32_t)) 48 49 #define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 50 #define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ 51 UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) 52 53 // Compact Unwind format is a Mach-O evolution of DWARF Unwind that 54 // optimizes space and exception-time lookup. Most DWARF unwind 55 // entries can be replaced with Compact Unwind entries, but the ones 56 // that cannot are retained in DWARF form. 57 // 58 // This comment will address macro-level organization of the pre-link 59 // and post-link compact unwind tables. For micro-level organization 60 // pertaining to the bitfield layout of the 32-bit compact unwind 61 // entries, see libunwind/include/mach-o/compact_unwind_encoding.h 62 // 63 // Important clarifying factoids: 64 // 65 // * __LD,__compact_unwind is the compact unwind format for compiler 66 // output and linker input. It is never a final output. It could be 67 // an intermediate output with the `-r` option which retains relocs. 68 // 69 // * __TEXT,__unwind_info is the compact unwind format for final 70 // linker output. It is never an input. 71 // 72 // * __TEXT,__eh_frame is the DWARF format for both linker input and output. 73 // 74 // * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd 75 // level) by ascending address, and the pages are referenced by an 76 // index (1st level) in the section header. 77 // 78 // * Following the headers in __TEXT,__unwind_info, the bulk of the 79 // section contains a vector of compact unwind entries 80 // `{functionOffset, encoding}` sorted by ascending `functionOffset`. 81 // Adjacent entries with the same encoding can be folded to great 82 // advantage, achieving a 3-order-of-magnitude reduction in the 83 // number of entries. 84 // 85 // Refer to the definition of unwind_info_section_header in 86 // compact_unwind_encoding.h for an overview of the format we are encoding 87 // here. 88 89 // TODO(gkm): how do we align the 2nd-level pages? 90 91 // The offsets of various fields in the on-disk representation of each compact 92 // unwind entry. 93 struct CompactUnwindOffsets { 94 uint32_t functionAddress; 95 uint32_t functionLength; 96 uint32_t encoding; 97 uint32_t personality; 98 uint32_t lsda; 99 100 CompactUnwindOffsets(size_t wordSize) { 101 if (wordSize == 8) 102 init<uint64_t>(); 103 else { 104 assert(wordSize == 4); 105 init<uint32_t>(); 106 } 107 } 108 109 private: 110 template <class Ptr> void init() { 111 functionAddress = offsetof(Layout<Ptr>, functionAddress); 112 functionLength = offsetof(Layout<Ptr>, functionLength); 113 encoding = offsetof(Layout<Ptr>, encoding); 114 personality = offsetof(Layout<Ptr>, personality); 115 lsda = offsetof(Layout<Ptr>, lsda); 116 } 117 118 template <class Ptr> struct Layout { 119 Ptr functionAddress; 120 uint32_t functionLength; 121 compact_unwind_encoding_t encoding; 122 Ptr personality; 123 Ptr lsda; 124 }; 125 }; 126 127 // LLD's internal representation of a compact unwind entry. 128 struct CompactUnwindEntry { 129 uint64_t functionAddress; 130 uint32_t functionLength; 131 compact_unwind_encoding_t encoding; 132 Symbol *personality; 133 InputSection *lsda; 134 }; 135 136 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>; 137 138 struct SecondLevelPage { 139 uint32_t kind; 140 size_t entryIndex; 141 size_t entryCount; 142 size_t byteCount; 143 std::vector<compact_unwind_encoding_t> localEncodings; 144 EncodingMap localEncodingIndexes; 145 }; 146 147 // UnwindInfoSectionImpl allows us to avoid cluttering our header file with a 148 // lengthy definition of UnwindInfoSection. 149 class UnwindInfoSectionImpl final : public UnwindInfoSection { 150 public: 151 UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {} 152 uint64_t getSize() const override { return unwindInfoSize; } 153 void prepare() override; 154 void finalize() override; 155 void writeTo(uint8_t *buf) const override; 156 157 private: 158 void prepareRelocations(ConcatInputSection *); 159 void relocateCompactUnwind(std::vector<CompactUnwindEntry> &); 160 void encodePersonalities(); 161 Symbol *canonicalizePersonality(Symbol *); 162 163 uint64_t unwindInfoSize = 0; 164 std::vector<decltype(symbols)::value_type> symbolsVec; 165 CompactUnwindOffsets cuOffsets; 166 std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; 167 EncodingMap commonEncodingIndexes; 168 // The entries here will be in the same order as their originating symbols 169 // in symbolsVec. 170 std::vector<CompactUnwindEntry> cuEntries; 171 // Indices into the cuEntries vector. 172 std::vector<size_t> cuIndices; 173 std::vector<Symbol *> personalities; 174 SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> 175 personalityTable; 176 // Indices into cuEntries for CUEs with a non-null LSDA. 177 std::vector<size_t> entriesWithLsda; 178 // Map of cuEntries index to an index within the LSDA array. 179 DenseMap<size_t, uint32_t> lsdaIndex; 180 std::vector<SecondLevelPage> secondLevelPages; 181 uint64_t level2PagesOffset = 0; 182 // The highest-address function plus its size. The unwinder needs this to 183 // determine the address range that is covered by unwind info. 184 uint64_t cueEndBoundary = 0; 185 }; 186 187 UnwindInfoSection::UnwindInfoSection() 188 : SyntheticSection(segment_names::text, section_names::unwindInfo) { 189 align = 4; 190 } 191 192 // Record function symbols that may need entries emitted in __unwind_info, which 193 // stores unwind data for address ranges. 194 // 195 // Note that if several adjacent functions have the same unwind encoding and 196 // personality function and no LSDA, they share one unwind entry. For this to 197 // work, functions without unwind info need explicit "no unwind info" unwind 198 // entries -- else the unwinder would think they have the unwind info of the 199 // closest function with unwind info right before in the image. Thus, we add 200 // function symbols for each unique address regardless of whether they have 201 // associated unwind info. 202 void UnwindInfoSection::addSymbol(const Defined *d) { 203 if (d->unwindEntry) 204 allEntriesAreOmitted = false; 205 // We don't yet know the final output address of this symbol, but we know that 206 // they are uniquely determined by a combination of the isec and value, so 207 // we use that as the key here. 208 auto p = symbols.insert({{d->isec, d->value}, d}); 209 // If we have multiple symbols at the same address, only one of them can have 210 // an associated unwind entry. 211 if (!p.second && d->unwindEntry) { 212 assert(p.first->second == d || !p.first->second->unwindEntry); 213 p.first->second = d; 214 } 215 } 216 217 void UnwindInfoSectionImpl::prepare() { 218 // This iteration needs to be deterministic, since prepareRelocations may add 219 // entries to the GOT. Hence the use of a MapVector for 220 // UnwindInfoSection::symbols. 221 for (const Defined *d : make_second_range(symbols)) 222 if (d->unwindEntry) { 223 if (d->unwindEntry->getName() == section_names::compactUnwind) { 224 prepareRelocations(d->unwindEntry); 225 } else { 226 // We don't have to add entries to the GOT here because FDEs have 227 // explicit GOT relocations, so Writer::scanRelocations() will add those 228 // GOT entries. However, we still need to canonicalize the personality 229 // pointers (like prepareRelocations() does for CU entries) in order 230 // to avoid overflowing the 3-personality limit. 231 FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; 232 fde.personality = canonicalizePersonality(fde.personality); 233 } 234 } 235 } 236 237 // Compact unwind relocations have different semantics, so we handle them in a 238 // separate code path from regular relocations. First, we do not wish to add 239 // rebase opcodes for __LD,__compact_unwind, because that section doesn't 240 // actually end up in the final binary. Second, personality pointers always 241 // reside in the GOT and must be treated specially. 242 void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { 243 assert(!isec->shouldOmitFromOutput() && 244 "__compact_unwind section should not be omitted"); 245 246 // FIXME: Make this skip relocations for CompactUnwindEntries that 247 // point to dead-stripped functions. That might save some amount of 248 // work. But since there are usually just few personality functions 249 // that are referenced from many places, at least some of them likely 250 // live, it wouldn't reduce number of got entries. 251 for (size_t i = 0; i < isec->relocs.size(); ++i) { 252 Reloc &r = isec->relocs[i]; 253 assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); 254 // Since compact unwind sections aren't part of the inputSections vector, 255 // they don't get canonicalized by scanRelocations(), so we have to do the 256 // canonicalization here. 257 if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) 258 r.referent = referentIsec->canonical(); 259 260 // Functions and LSDA entries always reside in the same object file as the 261 // compact unwind entries that references them, and thus appear as section 262 // relocs. There is no need to prepare them. We only prepare relocs for 263 // personality functions. 264 if (r.offset != cuOffsets.personality) 265 continue; 266 267 if (auto *s = r.referent.dyn_cast<Symbol *>()) { 268 // Personality functions are nearly always system-defined (e.g., 269 // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an 270 // application provides its own personality function, it might be 271 // referenced by an extern Defined symbol reloc, or a local section reloc. 272 if (auto *defined = dyn_cast<Defined>(s)) { 273 // XXX(vyng) This is a special case for handling duplicate personality 274 // symbols. Note that LD64's behavior is a bit different and it is 275 // inconsistent with how symbol resolution usually work 276 // 277 // So we've decided not to follow it. Instead, simply pick the symbol 278 // with the same name from the symbol table to replace the local one. 279 // 280 // (See discussions/alternatives already considered on D107533) 281 if (!defined->isExternal()) 282 if (Symbol *sym = symtab->find(defined->getName())) 283 if (!sym->isLazy()) 284 r.referent = s = sym; 285 } 286 if (auto *undefined = dyn_cast<Undefined>(s)) { 287 treatUndefinedSymbol(*undefined, isec, r.offset); 288 // treatUndefinedSymbol() can replace s with a DylibSymbol; re-check. 289 if (isa<Undefined>(s)) 290 continue; 291 } 292 293 // Similar to canonicalizePersonality(), but we also register a GOT entry. 294 if (auto *defined = dyn_cast<Defined>(s)) { 295 // Check if we have created a synthetic symbol at the same address. 296 Symbol *&personality = 297 personalityTable[{defined->isec, defined->value}]; 298 if (personality == nullptr) { 299 personality = defined; 300 in.got->addEntry(defined); 301 } else if (personality != defined) { 302 r.referent = personality; 303 } 304 continue; 305 } 306 307 assert(isa<DylibSymbol>(s)); 308 in.got->addEntry(s); 309 continue; 310 } 311 312 if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 313 assert(!isCoalescedWeak(referentIsec)); 314 // Personality functions can be referenced via section relocations 315 // if they live in the same object file. Create placeholder synthetic 316 // symbols for them in the GOT. 317 Symbol *&s = personalityTable[{referentIsec, r.addend}]; 318 if (s == nullptr) { 319 // This runs after dead stripping, so the noDeadStrip argument does not 320 // matter. 321 s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec, 322 r.addend, /*size=*/0, /*isWeakDef=*/false, 323 /*isExternal=*/false, /*isPrivateExtern=*/false, 324 /*includeInSymtab=*/true, 325 /*isThumb=*/false, /*isReferencedDynamically=*/false, 326 /*noDeadStrip=*/false); 327 s->used = true; 328 in.got->addEntry(s); 329 } 330 r.referent = s; 331 r.addend = 0; 332 } 333 } 334 } 335 336 Symbol *UnwindInfoSectionImpl::canonicalizePersonality(Symbol *personality) { 337 if (auto *defined = dyn_cast_or_null<Defined>(personality)) { 338 // Check if we have created a synthetic symbol at the same address. 339 Symbol *&synth = personalityTable[{defined->isec, defined->value}]; 340 if (synth == nullptr) 341 synth = defined; 342 else if (synth != defined) 343 return synth; 344 } 345 return personality; 346 } 347 348 // We need to apply the relocations to the pre-link compact unwind section 349 // before converting it to post-link form. There should only be absolute 350 // relocations here: since we are not emitting the pre-link CU section, there 351 // is no source address to make a relative location meaningful. 352 void UnwindInfoSectionImpl::relocateCompactUnwind( 353 std::vector<CompactUnwindEntry> &cuEntries) { 354 parallelFor(0, symbolsVec.size(), [&](size_t i) { 355 CompactUnwindEntry &cu = cuEntries[i]; 356 const Defined *d = symbolsVec[i].second; 357 cu.functionAddress = d->getVA(); 358 if (!d->unwindEntry) 359 return; 360 361 // If we have DWARF unwind info, create a CU entry that points to it. 362 if (d->unwindEntry->getName() == section_names::ehFrame) { 363 cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff; 364 const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; 365 cu.functionLength = fde.funcLength; 366 cu.personality = fde.personality; 367 cu.lsda = fde.lsda; 368 return; 369 } 370 371 assert(d->unwindEntry->getName() == section_names::compactUnwind); 372 373 auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) - 374 target->wordSize; 375 cu.functionLength = 376 support::endian::read32le(buf + cuOffsets.functionLength); 377 cu.encoding = support::endian::read32le(buf + cuOffsets.encoding); 378 for (const Reloc &r : d->unwindEntry->relocs) { 379 if (r.offset == cuOffsets.personality) { 380 cu.personality = r.referent.get<Symbol *>(); 381 } else if (r.offset == cuOffsets.lsda) { 382 if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) 383 cu.lsda = cast<Defined>(referentSym)->isec; 384 else 385 cu.lsda = r.referent.get<InputSection *>(); 386 } 387 } 388 }); 389 } 390 391 // There should only be a handful of unique personality pointers, so we can 392 // encode them as 2-bit indices into a small array. 393 void UnwindInfoSectionImpl::encodePersonalities() { 394 for (size_t idx : cuIndices) { 395 CompactUnwindEntry &cu = cuEntries[idx]; 396 if (cu.personality == nullptr) 397 continue; 398 // Linear search is fast enough for a small array. 399 auto it = find(personalities, cu.personality); 400 uint32_t personalityIndex; // 1-based index 401 if (it != personalities.end()) { 402 personalityIndex = std::distance(personalities.begin(), it) + 1; 403 } else { 404 personalities.push_back(cu.personality); 405 personalityIndex = personalities.size(); 406 } 407 cu.encoding |= 408 personalityIndex << countTrailingZeros( 409 static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK)); 410 } 411 if (personalities.size() > 3) 412 error("too many personalities (" + Twine(personalities.size()) + 413 ") for compact unwind to encode"); 414 } 415 416 static bool canFoldEncoding(compact_unwind_encoding_t encoding) { 417 // From compact_unwind_encoding.h: 418 // UNWIND_X86_64_MODE_STACK_IND: 419 // A "frameless" (RBP not used as frame pointer) function large constant 420 // stack size. This case is like the previous, except the stack size is too 421 // large to encode in the compact unwind encoding. Instead it requires that 422 // the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact 423 // encoding contains the offset to the nnnnnnnn value in the function in 424 // UNWIND_X86_64_FRAMELESS_STACK_SIZE. 425 // Since this means the unwinder has to look at the `subq` in the function 426 // of the unwind info's unwind address, two functions that have identical 427 // unwind info can't be folded if it's using this encoding since both 428 // entries need unique addresses. 429 static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_STACK_IND) == 430 static_cast<uint32_t>(UNWIND_X86_MODE_STACK_IND)); 431 if ((target->cpuType == CPU_TYPE_X86_64 || target->cpuType == CPU_TYPE_X86) && 432 (encoding & UNWIND_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) { 433 // FIXME: Consider passing in the two function addresses and getting 434 // their two stack sizes off the `subq` and only returning false if they're 435 // actually different. 436 return false; 437 } 438 return true; 439 } 440 441 // Scan the __LD,__compact_unwind entries and compute the space needs of 442 // __TEXT,__unwind_info and __TEXT,__eh_frame. 443 void UnwindInfoSectionImpl::finalize() { 444 if (symbols.empty()) 445 return; 446 447 // At this point, the address space for __TEXT,__text has been 448 // assigned, so we can relocate the __LD,__compact_unwind entries 449 // into a temporary buffer. Relocation is necessary in order to sort 450 // the CU entries by function address. Sorting is necessary so that 451 // we can fold adjacent CU entries with identical encoding+personality 452 // and without any LSDA. Folding is necessary because it reduces the 453 // number of CU entries by as much as 3 orders of magnitude! 454 cuEntries.resize(symbols.size()); 455 // The "map" part of the symbols MapVector was only needed for deduplication 456 // in addSymbol(). Now that we are done adding, move the contents to a plain 457 // std::vector for indexed access. 458 symbolsVec = symbols.takeVector(); 459 relocateCompactUnwind(cuEntries); 460 461 // Rather than sort & fold the 32-byte entries directly, we create a 462 // vector of indices to entries and sort & fold that instead. 463 cuIndices.resize(cuEntries.size()); 464 std::iota(cuIndices.begin(), cuIndices.end(), 0); 465 llvm::sort(cuIndices, [&](size_t a, size_t b) { 466 return cuEntries[a].functionAddress < cuEntries[b].functionAddress; 467 }); 468 469 // Record the ending boundary before we fold the entries. 470 cueEndBoundary = cuEntries[cuIndices.back()].functionAddress + 471 cuEntries[cuIndices.back()].functionLength; 472 473 // Fold adjacent entries with matching encoding+personality and without LSDA 474 // We use three iterators on the same cuIndices to fold in-situ: 475 // (1) `foldBegin` is the first of a potential sequence of matching entries 476 // (2) `foldEnd` is the first non-matching entry after `foldBegin`. 477 // The semi-open interval [ foldBegin .. foldEnd ) contains a range 478 // entries that can be folded into a single entry and written to ... 479 // (3) `foldWrite` 480 auto foldWrite = cuIndices.begin(); 481 for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) { 482 auto foldEnd = foldBegin; 483 // Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from 484 // a base address. The base address is normally not contained directly in 485 // the LSDA, and in that case, the personality function treats the starting 486 // address of the function (which is computed by the unwinder) as the base 487 // address and interprets the LSDA accordingly. The unwinder computes the 488 // starting address of a function as the address associated with its CU 489 // entry. For this reason, we cannot fold adjacent entries if they have an 490 // LSDA, because folding would make the unwinder compute the wrong starting 491 // address for the functions with the folded entries, which in turn would 492 // cause the personality function to misinterpret the LSDA for those 493 // functions. In the very rare case where the base address is encoded 494 // directly in the LSDA, two functions at different addresses would 495 // necessarily have different LSDAs, so their CU entries would not have been 496 // folded anyway. 497 while (++foldEnd < cuIndices.end() && 498 cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding && 499 !cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda && 500 // If we've gotten to this point, we don't have an LSDA, which should 501 // also imply that we don't have a personality function, since in all 502 // likelihood a personality function needs the LSDA to do anything 503 // useful. It can be technically valid to have a personality function 504 // and no LSDA though (e.g. the C++ personality __gxx_personality_v0 505 // is just a no-op without LSDA), so we still check for personality 506 // function equivalence to handle that case. 507 cuEntries[*foldBegin].personality == 508 cuEntries[*foldEnd].personality && 509 canFoldEncoding(cuEntries[*foldEnd].encoding)) 510 ; 511 *foldWrite++ = *foldBegin; 512 foldBegin = foldEnd; 513 } 514 cuIndices.erase(foldWrite, cuIndices.end()); 515 516 encodePersonalities(); 517 518 // Count frequencies of the folded encodings 519 EncodingMap encodingFrequencies; 520 for (size_t idx : cuIndices) 521 encodingFrequencies[cuEntries[idx].encoding]++; 522 523 // Make a vector of encodings, sorted by descending frequency 524 for (const auto &frequency : encodingFrequencies) 525 commonEncodings.emplace_back(frequency); 526 llvm::sort(commonEncodings, 527 [](const std::pair<compact_unwind_encoding_t, size_t> &a, 528 const std::pair<compact_unwind_encoding_t, size_t> &b) { 529 if (a.second == b.second) 530 // When frequencies match, secondarily sort on encoding 531 // to maintain parity with validate-unwind-info.py 532 return a.first > b.first; 533 return a.second > b.second; 534 }); 535 536 // Truncate the vector to 127 elements. 537 // Common encoding indexes are limited to 0..126, while encoding 538 // indexes 127..255 are local to each second-level page 539 if (commonEncodings.size() > COMMON_ENCODINGS_MAX) 540 commonEncodings.resize(COMMON_ENCODINGS_MAX); 541 542 // Create a map from encoding to common-encoding-table index 543 for (size_t i = 0; i < commonEncodings.size(); i++) 544 commonEncodingIndexes[commonEncodings[i].first] = i; 545 546 // Split folded encodings into pages, where each page is limited by ... 547 // (a) 4 KiB capacity 548 // (b) 24-bit difference between first & final function address 549 // (c) 8-bit compact-encoding-table index, 550 // for which 0..126 references the global common-encodings table, 551 // and 127..255 references a local per-second-level-page table. 552 // First we try the compact format and determine how many entries fit. 553 // If more entries fit in the regular format, we use that. 554 for (size_t i = 0; i < cuIndices.size();) { 555 size_t idx = cuIndices[i]; 556 secondLevelPages.emplace_back(); 557 SecondLevelPage &page = secondLevelPages.back(); 558 page.entryIndex = i; 559 uint64_t functionAddressMax = 560 cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; 561 size_t n = commonEncodings.size(); 562 size_t wordsRemaining = 563 SECOND_LEVEL_PAGE_WORDS - 564 sizeof(unwind_info_compressed_second_level_page_header) / 565 sizeof(uint32_t); 566 while (wordsRemaining >= 1 && i < cuIndices.size()) { 567 idx = cuIndices[i]; 568 const CompactUnwindEntry *cuPtr = &cuEntries[idx]; 569 if (cuPtr->functionAddress >= functionAddressMax) { 570 break; 571 } else if (commonEncodingIndexes.count(cuPtr->encoding) || 572 page.localEncodingIndexes.count(cuPtr->encoding)) { 573 i++; 574 wordsRemaining--; 575 } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) { 576 page.localEncodings.emplace_back(cuPtr->encoding); 577 page.localEncodingIndexes[cuPtr->encoding] = n++; 578 i++; 579 wordsRemaining -= 2; 580 } else { 581 break; 582 } 583 } 584 page.entryCount = i - page.entryIndex; 585 586 // If this is not the final page, see if it's possible to fit more entries 587 // by using the regular format. This can happen when there are many unique 588 // encodings, and we saturated the local encoding table early. 589 if (i < cuIndices.size() && 590 page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { 591 page.kind = UNWIND_SECOND_LEVEL_REGULAR; 592 page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, 593 cuIndices.size() - page.entryIndex); 594 i = page.entryIndex + page.entryCount; 595 } else { 596 page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; 597 } 598 } 599 600 for (size_t idx : cuIndices) { 601 lsdaIndex[idx] = entriesWithLsda.size(); 602 if (cuEntries[idx].lsda) 603 entriesWithLsda.push_back(idx); 604 } 605 606 // compute size of __TEXT,__unwind_info section 607 level2PagesOffset = sizeof(unwind_info_section_header) + 608 commonEncodings.size() * sizeof(uint32_t) + 609 personalities.size() * sizeof(uint32_t) + 610 // The extra second-level-page entry is for the sentinel 611 (secondLevelPages.size() + 1) * 612 sizeof(unwind_info_section_header_index_entry) + 613 entriesWithLsda.size() * 614 sizeof(unwind_info_section_header_lsda_index_entry); 615 unwindInfoSize = 616 level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES; 617 } 618 619 // All inputs are relocated and output addresses are known, so write! 620 621 void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { 622 assert(!cuIndices.empty() && "call only if there is unwind info"); 623 624 // section header 625 auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); 626 uip->version = 1; 627 uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); 628 uip->commonEncodingsArrayCount = commonEncodings.size(); 629 uip->personalityArraySectionOffset = 630 uip->commonEncodingsArraySectionOffset + 631 (uip->commonEncodingsArrayCount * sizeof(uint32_t)); 632 uip->personalityArrayCount = personalities.size(); 633 uip->indexSectionOffset = uip->personalityArraySectionOffset + 634 (uip->personalityArrayCount * sizeof(uint32_t)); 635 uip->indexCount = secondLevelPages.size() + 1; 636 637 // Common encodings 638 auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]); 639 for (const auto &encoding : commonEncodings) 640 *i32p++ = encoding.first; 641 642 // Personalities 643 for (const Symbol *personality : personalities) 644 *i32p++ = personality->getGotVA() - in.header->addr; 645 646 // FIXME: LD64 checks and warns aboutgaps or overlapse in cuEntries address 647 // ranges. We should do the same too 648 649 // Level-1 index 650 uint32_t lsdaOffset = 651 uip->indexSectionOffset + 652 uip->indexCount * sizeof(unwind_info_section_header_index_entry); 653 uint64_t l2PagesOffset = level2PagesOffset; 654 auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); 655 for (const SecondLevelPage &page : secondLevelPages) { 656 size_t idx = cuIndices[page.entryIndex]; 657 iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr; 658 iep->secondLevelPagesSectionOffset = l2PagesOffset; 659 iep->lsdaIndexArraySectionOffset = 660 lsdaOffset + lsdaIndex.lookup(idx) * 661 sizeof(unwind_info_section_header_lsda_index_entry); 662 iep++; 663 l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; 664 } 665 // Level-1 sentinel 666 // XXX(vyng): Note that LD64 adds +1 here. 667 // Unsure whether it's a bug or it's their workaround for something else. 668 // See comments from https://reviews.llvm.org/D138320. 669 iep->functionOffset = cueEndBoundary - in.header->addr; 670 iep->secondLevelPagesSectionOffset = 0; 671 iep->lsdaIndexArraySectionOffset = 672 lsdaOffset + entriesWithLsda.size() * 673 sizeof(unwind_info_section_header_lsda_index_entry); 674 iep++; 675 676 // LSDAs 677 auto *lep = 678 reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep); 679 for (size_t idx : entriesWithLsda) { 680 const CompactUnwindEntry &cu = cuEntries[idx]; 681 lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr; 682 lep->functionOffset = cu.functionAddress - in.header->addr; 683 lep++; 684 } 685 686 // Level-2 pages 687 auto *pp = reinterpret_cast<uint32_t *>(lep); 688 for (const SecondLevelPage &page : secondLevelPages) { 689 if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { 690 uintptr_t functionAddressBase = 691 cuEntries[cuIndices[page.entryIndex]].functionAddress; 692 auto *p2p = 693 reinterpret_cast<unwind_info_compressed_second_level_page_header *>( 694 pp); 695 p2p->kind = page.kind; 696 p2p->entryPageOffset = 697 sizeof(unwind_info_compressed_second_level_page_header); 698 p2p->entryCount = page.entryCount; 699 p2p->encodingsPageOffset = 700 p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); 701 p2p->encodingsCount = page.localEncodings.size(); 702 auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); 703 for (size_t i = 0; i < page.entryCount; i++) { 704 const CompactUnwindEntry &cue = 705 cuEntries[cuIndices[page.entryIndex + i]]; 706 auto it = commonEncodingIndexes.find(cue.encoding); 707 if (it == commonEncodingIndexes.end()) 708 it = page.localEncodingIndexes.find(cue.encoding); 709 *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | 710 (cue.functionAddress - functionAddressBase); 711 } 712 if (!page.localEncodings.empty()) 713 memcpy(ep, page.localEncodings.data(), 714 page.localEncodings.size() * sizeof(uint32_t)); 715 } else { 716 auto *p2p = 717 reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp); 718 p2p->kind = page.kind; 719 p2p->entryPageOffset = 720 sizeof(unwind_info_regular_second_level_page_header); 721 p2p->entryCount = page.entryCount; 722 auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); 723 for (size_t i = 0; i < page.entryCount; i++) { 724 const CompactUnwindEntry &cue = 725 cuEntries[cuIndices[page.entryIndex + i]]; 726 *ep++ = cue.functionAddress; 727 *ep++ = cue.encoding; 728 } 729 } 730 pp += SECOND_LEVEL_PAGE_WORDS; 731 } 732 } 733 734 UnwindInfoSection *macho::makeUnwindInfoSection() { 735 return make<UnwindInfoSectionImpl>(); 736 } 737