1 //===- UnwindInfoSection.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "UnwindInfoSection.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputSection.h" 13 #include "OutputSection.h" 14 #include "OutputSegment.h" 15 #include "SymbolTable.h" 16 #include "Symbols.h" 17 #include "SyntheticSections.h" 18 #include "Target.h" 19 20 #include "lld/Common/ErrorHandler.h" 21 #include "lld/Common/Memory.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/BinaryFormat/MachO.h" 25 #include "llvm/Support/Parallel.h" 26 27 #include <numeric> 28 29 using namespace llvm; 30 using namespace llvm::MachO; 31 using namespace llvm::support::endian; 32 using namespace lld; 33 using namespace lld::macho; 34 35 #define COMMON_ENCODINGS_MAX 127 36 #define COMPACT_ENCODINGS_MAX 256 37 38 #define SECOND_LEVEL_PAGE_BYTES 4096 39 #define SECOND_LEVEL_PAGE_WORDS (SECOND_LEVEL_PAGE_BYTES / sizeof(uint32_t)) 40 #define REGULAR_SECOND_LEVEL_ENTRIES_MAX \ 41 ((SECOND_LEVEL_PAGE_BYTES - \ 42 sizeof(unwind_info_regular_second_level_page_header)) / \ 43 sizeof(unwind_info_regular_second_level_entry)) 44 #define COMPRESSED_SECOND_LEVEL_ENTRIES_MAX \ 45 ((SECOND_LEVEL_PAGE_BYTES - \ 46 sizeof(unwind_info_compressed_second_level_page_header)) / \ 47 sizeof(uint32_t)) 48 49 #define COMPRESSED_ENTRY_FUNC_OFFSET_BITS 24 50 #define COMPRESSED_ENTRY_FUNC_OFFSET_MASK \ 51 UNWIND_INFO_COMPRESSED_ENTRY_FUNC_OFFSET(~0) 52 53 // Compact Unwind format is a Mach-O evolution of DWARF Unwind that 54 // optimizes space and exception-time lookup. Most DWARF unwind 55 // entries can be replaced with Compact Unwind entries, but the ones 56 // that cannot are retained in DWARF form. 57 // 58 // This comment will address macro-level organization of the pre-link 59 // and post-link compact unwind tables. For micro-level organization 60 // pertaining to the bitfield layout of the 32-bit compact unwind 61 // entries, see libunwind/include/mach-o/compact_unwind_encoding.h 62 // 63 // Important clarifying factoids: 64 // 65 // * __LD,__compact_unwind is the compact unwind format for compiler 66 // output and linker input. It is never a final output. It could be 67 // an intermediate output with the `-r` option which retains relocs. 68 // 69 // * __TEXT,__unwind_info is the compact unwind format for final 70 // linker output. It is never an input. 71 // 72 // * __TEXT,__eh_frame is the DWARF format for both linker input and output. 73 // 74 // * __TEXT,__unwind_info entries are divided into 4 KiB pages (2nd 75 // level) by ascending address, and the pages are referenced by an 76 // index (1st level) in the section header. 77 // 78 // * Following the headers in __TEXT,__unwind_info, the bulk of the 79 // section contains a vector of compact unwind entries 80 // `{functionOffset, encoding}` sorted by ascending `functionOffset`. 81 // Adjacent entries with the same encoding can be folded to great 82 // advantage, achieving a 3-order-of-magnitude reduction in the 83 // number of entries. 84 // 85 // * The __TEXT,__unwind_info format can accommodate up to 127 unique 86 // encodings for the space-efficient compressed format. In practice, 87 // fewer than a dozen unique encodings are used by C++ programs of 88 // all sizes. Therefore, we don't even bother implementing the regular 89 // non-compressed format. Time will tell if anyone in the field ever 90 // overflows the 127-encodings limit. 91 // 92 // Refer to the definition of unwind_info_section_header in 93 // compact_unwind_encoding.h for an overview of the format we are encoding 94 // here. 95 96 // TODO(gkm): prune __eh_frame entries superseded by __unwind_info, PR50410 97 // TODO(gkm): how do we align the 2nd-level pages? 98 99 // The offsets of various fields in the on-disk representation of each compact 100 // unwind entry. 101 struct CompactUnwindOffsets { 102 uint32_t functionAddress; 103 uint32_t functionLength; 104 uint32_t encoding; 105 uint32_t personality; 106 uint32_t lsda; 107 108 CompactUnwindOffsets(size_t wordSize) { 109 if (wordSize == 8) 110 init<uint64_t>(); 111 else { 112 assert(wordSize == 4); 113 init<uint32_t>(); 114 } 115 } 116 117 private: 118 template <class Ptr> void init() { 119 functionAddress = offsetof(Layout<Ptr>, functionAddress); 120 functionLength = offsetof(Layout<Ptr>, functionLength); 121 encoding = offsetof(Layout<Ptr>, encoding); 122 personality = offsetof(Layout<Ptr>, personality); 123 lsda = offsetof(Layout<Ptr>, lsda); 124 } 125 126 template <class Ptr> struct Layout { 127 Ptr functionAddress; 128 uint32_t functionLength; 129 compact_unwind_encoding_t encoding; 130 Ptr personality; 131 Ptr lsda; 132 }; 133 }; 134 135 // LLD's internal representation of a compact unwind entry. 136 struct CompactUnwindEntry { 137 uint64_t functionAddress; 138 uint32_t functionLength; 139 compact_unwind_encoding_t encoding; 140 Symbol *personality; 141 InputSection *lsda; 142 }; 143 144 using EncodingMap = DenseMap<compact_unwind_encoding_t, size_t>; 145 146 struct SecondLevelPage { 147 uint32_t kind; 148 size_t entryIndex; 149 size_t entryCount; 150 size_t byteCount; 151 std::vector<compact_unwind_encoding_t> localEncodings; 152 EncodingMap localEncodingIndexes; 153 }; 154 155 // UnwindInfoSectionImpl allows us to avoid cluttering our header file with a 156 // lengthy definition of UnwindInfoSection. 157 class UnwindInfoSectionImpl final : public UnwindInfoSection { 158 public: 159 UnwindInfoSectionImpl() : cuOffsets(target->wordSize) {} 160 uint64_t getSize() const override { return unwindInfoSize; } 161 void prepareRelocations() override; 162 void finalize() override; 163 void writeTo(uint8_t *buf) const override; 164 165 private: 166 void prepareRelocations(ConcatInputSection *); 167 void relocateCompactUnwind(std::vector<CompactUnwindEntry> &); 168 void encodePersonalities(); 169 170 uint64_t unwindInfoSize = 0; 171 std::vector<decltype(symbols)::value_type> symbolsVec; 172 CompactUnwindOffsets cuOffsets; 173 std::vector<std::pair<compact_unwind_encoding_t, size_t>> commonEncodings; 174 EncodingMap commonEncodingIndexes; 175 // The entries here will be in the same order as their originating symbols 176 // in symbolsVec. 177 std::vector<CompactUnwindEntry> cuEntries; 178 // Indices into the cuEntries vector. 179 std::vector<size_t> cuIndices; 180 std::vector<Symbol *> personalities; 181 SmallDenseMap<std::pair<InputSection *, uint64_t /* addend */>, Symbol *> 182 personalityTable; 183 // Indices into cuEntries for CUEs with a non-null LSDA. 184 std::vector<size_t> entriesWithLsda; 185 // Map of cuEntries index to an index within the LSDA array. 186 DenseMap<size_t, uint32_t> lsdaIndex; 187 std::vector<SecondLevelPage> secondLevelPages; 188 uint64_t level2PagesOffset = 0; 189 }; 190 191 UnwindInfoSection::UnwindInfoSection() 192 : SyntheticSection(segment_names::text, section_names::unwindInfo) { 193 align = 4; 194 } 195 196 // Record function symbols that may need entries emitted in __unwind_info, which 197 // stores unwind data for address ranges. 198 // 199 // Note that if several adjacent functions have the same unwind encoding and 200 // personality function and no LSDA, they share one unwind entry. For this to 201 // work, functions without unwind info need explicit "no unwind info" unwind 202 // entries -- else the unwinder would think they have the unwind info of the 203 // closest function with unwind info right before in the image. Thus, we add 204 // function symbols for each unique address regardless of whether they have 205 // associated unwind info. 206 void UnwindInfoSection::addSymbol(const Defined *d) { 207 if (d->unwindEntry) 208 allEntriesAreOmitted = false; 209 // We don't yet know the final output address of this symbol, but we know that 210 // they are uniquely determined by a combination of the isec and value, so 211 // we use that as the key here. 212 auto p = symbols.insert({{d->isec, d->value}, d}); 213 // If we have multiple symbols at the same address, only one of them can have 214 // an associated unwind entry. 215 if (!p.second && d->unwindEntry) { 216 assert(!p.first->second->unwindEntry); 217 p.first->second = d; 218 } 219 } 220 221 void UnwindInfoSectionImpl::prepareRelocations() { 222 // This iteration needs to be deterministic, since prepareRelocations may add 223 // entries to the GOT. Hence the use of a MapVector for 224 // UnwindInfoSection::symbols. 225 for (const Defined *d : make_second_range(symbols)) 226 if (d->unwindEntry && 227 d->unwindEntry->getName() == section_names::compactUnwind) 228 prepareRelocations(d->unwindEntry); 229 } 230 231 // Compact unwind relocations have different semantics, so we handle them in a 232 // separate code path from regular relocations. First, we do not wish to add 233 // rebase opcodes for __LD,__compact_unwind, because that section doesn't 234 // actually end up in the final binary. Second, personality pointers always 235 // reside in the GOT and must be treated specially. 236 void UnwindInfoSectionImpl::prepareRelocations(ConcatInputSection *isec) { 237 assert(!isec->shouldOmitFromOutput() && 238 "__compact_unwind section should not be omitted"); 239 240 // FIXME: Make this skip relocations for CompactUnwindEntries that 241 // point to dead-stripped functions. That might save some amount of 242 // work. But since there are usually just few personality functions 243 // that are referenced from many places, at least some of them likely 244 // live, it wouldn't reduce number of got entries. 245 for (size_t i = 0; i < isec->relocs.size(); ++i) { 246 Reloc &r = isec->relocs[i]; 247 assert(target->hasAttr(r.type, RelocAttrBits::UNSIGNED)); 248 249 // Functions and LSDA entries always reside in the same object file as the 250 // compact unwind entries that references them, and thus appear as section 251 // relocs. There is no need to prepare them. We only prepare relocs for 252 // personality functions. 253 if (r.offset != cuOffsets.personality) 254 continue; 255 256 if (auto *s = r.referent.dyn_cast<Symbol *>()) { 257 // Personality functions are nearly always system-defined (e.g., 258 // ___gxx_personality_v0 for C++) and relocated as dylib symbols. When an 259 // application provides its own personality function, it might be 260 // referenced by an extern Defined symbol reloc, or a local section reloc. 261 if (auto *defined = dyn_cast<Defined>(s)) { 262 // XXX(vyng) This is a a special case for handling duplicate personality 263 // symbols. Note that LD64's behavior is a bit different and it is 264 // inconsistent with how symbol resolution usually work 265 // 266 // So we've decided not to follow it. Instead, simply pick the symbol 267 // with the same name from the symbol table to replace the local one. 268 // 269 // (See discussions/alternatives already considered on D107533) 270 if (!defined->isExternal()) 271 if (Symbol *sym = symtab->find(defined->getName())) 272 if (!sym->isLazy()) 273 r.referent = s = sym; 274 } 275 if (auto *undefined = dyn_cast<Undefined>(s)) { 276 treatUndefinedSymbol(*undefined, isec, r.offset); 277 // treatUndefinedSymbol() can replace s with a DylibSymbol; re-check. 278 if (isa<Undefined>(s)) 279 continue; 280 } 281 282 if (auto *defined = dyn_cast<Defined>(s)) { 283 // Check if we have created a synthetic symbol at the same address. 284 Symbol *&personality = 285 personalityTable[{defined->isec, defined->value}]; 286 if (personality == nullptr) { 287 personality = defined; 288 in.got->addEntry(defined); 289 } else if (personality != defined) { 290 r.referent = personality; 291 } 292 continue; 293 } 294 assert(isa<DylibSymbol>(s)); 295 in.got->addEntry(s); 296 continue; 297 } 298 299 if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 300 assert(!isCoalescedWeak(referentIsec)); 301 // Personality functions can be referenced via section relocations 302 // if they live in the same object file. Create placeholder synthetic 303 // symbols for them in the GOT. 304 Symbol *&s = personalityTable[{referentIsec, r.addend}]; 305 if (s == nullptr) { 306 // This runs after dead stripping, so the noDeadStrip argument does not 307 // matter. 308 s = make<Defined>("<internal>", /*file=*/nullptr, referentIsec, 309 r.addend, /*size=*/0, /*isWeakDef=*/false, 310 /*isExternal=*/false, /*isPrivateExtern=*/false, 311 /*includeInSymtab=*/true, 312 /*isThumb=*/false, /*isReferencedDynamically=*/false, 313 /*noDeadStrip=*/false); 314 s->used = true; 315 in.got->addEntry(s); 316 } 317 r.referent = s; 318 r.addend = 0; 319 } 320 } 321 } 322 323 // We need to apply the relocations to the pre-link compact unwind section 324 // before converting it to post-link form. There should only be absolute 325 // relocations here: since we are not emitting the pre-link CU section, there 326 // is no source address to make a relative location meaningful. 327 void UnwindInfoSectionImpl::relocateCompactUnwind( 328 std::vector<CompactUnwindEntry> &cuEntries) { 329 parallelFor(0, symbolsVec.size(), [&](size_t i) { 330 CompactUnwindEntry &cu = cuEntries[i]; 331 const Defined *d = symbolsVec[i].second; 332 cu.functionAddress = d->getVA(); 333 if (!d->unwindEntry) 334 return; 335 336 // If we have DWARF unwind info, create a CU entry that points to it. 337 if (d->unwindEntry->getName() == section_names::ehFrame) { 338 cu.encoding = target->modeDwarfEncoding | d->unwindEntry->outSecOff; 339 const FDE &fde = cast<ObjFile>(d->getFile())->fdes[d->unwindEntry]; 340 cu.functionLength = fde.funcLength; 341 cu.personality = fde.personality; 342 cu.lsda = fde.lsda; 343 return; 344 } 345 346 assert(d->unwindEntry->getName() == section_names::compactUnwind); 347 348 auto buf = reinterpret_cast<const uint8_t *>(d->unwindEntry->data.data()) - 349 target->wordSize; 350 cu.functionLength = 351 support::endian::read32le(buf + cuOffsets.functionLength); 352 cu.encoding = support::endian::read32le(buf + cuOffsets.encoding); 353 for (const Reloc &r : d->unwindEntry->relocs) { 354 if (r.offset == cuOffsets.personality) { 355 cu.personality = r.referent.get<Symbol *>(); 356 } else if (r.offset == cuOffsets.lsda) { 357 if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) 358 cu.lsda = cast<Defined>(referentSym)->isec; 359 else 360 cu.lsda = r.referent.get<InputSection *>(); 361 } 362 } 363 }); 364 } 365 366 // There should only be a handful of unique personality pointers, so we can 367 // encode them as 2-bit indices into a small array. 368 void UnwindInfoSectionImpl::encodePersonalities() { 369 for (size_t idx : cuIndices) { 370 CompactUnwindEntry &cu = cuEntries[idx]; 371 if (cu.personality == nullptr) 372 continue; 373 // Linear search is fast enough for a small array. 374 auto it = find(personalities, cu.personality); 375 uint32_t personalityIndex; // 1-based index 376 if (it != personalities.end()) { 377 personalityIndex = std::distance(personalities.begin(), it) + 1; 378 } else { 379 personalities.push_back(cu.personality); 380 personalityIndex = personalities.size(); 381 } 382 cu.encoding |= 383 personalityIndex << countTrailingZeros( 384 static_cast<compact_unwind_encoding_t>(UNWIND_PERSONALITY_MASK)); 385 } 386 if (personalities.size() > 3) 387 error("too many personalities (" + Twine(personalities.size()) + 388 ") for compact unwind to encode"); 389 } 390 391 static bool canFoldEncoding(compact_unwind_encoding_t encoding) { 392 // From compact_unwind_encoding.h: 393 // UNWIND_X86_64_MODE_STACK_IND: 394 // A "frameless" (RBP not used as frame pointer) function large constant 395 // stack size. This case is like the previous, except the stack size is too 396 // large to encode in the compact unwind encoding. Instead it requires that 397 // the function contains "subq $nnnnnnnn,RSP" in its prolog. The compact 398 // encoding contains the offset to the nnnnnnnn value in the function in 399 // UNWIND_X86_64_FRAMELESS_STACK_SIZE. 400 // Since this means the unwinder has to look at the `subq` in the function 401 // of the unwind info's unwind address, two functions that have identical 402 // unwind info can't be folded if it's using this encoding since both 403 // entries need unique addresses. 404 static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_MASK) == 405 static_cast<uint32_t>(UNWIND_X86_MODE_MASK), 406 ""); 407 static_assert(static_cast<uint32_t>(UNWIND_X86_64_MODE_STACK_IND) == 408 static_cast<uint32_t>(UNWIND_X86_MODE_STACK_IND), 409 ""); 410 if ((target->cpuType == CPU_TYPE_X86_64 || target->cpuType == CPU_TYPE_X86) && 411 (encoding & UNWIND_X86_64_MODE_MASK) == UNWIND_X86_64_MODE_STACK_IND) { 412 // FIXME: Consider passing in the two function addresses and getting 413 // their two stack sizes off the `subq` and only returning false if they're 414 // actually different. 415 return false; 416 } 417 return true; 418 } 419 420 // Scan the __LD,__compact_unwind entries and compute the space needs of 421 // __TEXT,__unwind_info and __TEXT,__eh_frame. 422 void UnwindInfoSectionImpl::finalize() { 423 if (symbols.empty()) 424 return; 425 426 // At this point, the address space for __TEXT,__text has been 427 // assigned, so we can relocate the __LD,__compact_unwind entries 428 // into a temporary buffer. Relocation is necessary in order to sort 429 // the CU entries by function address. Sorting is necessary so that 430 // we can fold adjacent CU entries with identical encoding+personality 431 // and without any LSDA. Folding is necessary because it reduces the 432 // number of CU entries by as much as 3 orders of magnitude! 433 cuEntries.resize(symbols.size()); 434 // The "map" part of the symbols MapVector was only needed for deduplication 435 // in addSymbol(). Now that we are done adding, move the contents to a plain 436 // std::vector for indexed access. 437 symbolsVec = symbols.takeVector(); 438 relocateCompactUnwind(cuEntries); 439 440 // Rather than sort & fold the 32-byte entries directly, we create a 441 // vector of indices to entries and sort & fold that instead. 442 cuIndices.resize(cuEntries.size()); 443 std::iota(cuIndices.begin(), cuIndices.end(), 0); 444 llvm::sort(cuIndices, [&](size_t a, size_t b) { 445 return cuEntries[a].functionAddress < cuEntries[b].functionAddress; 446 }); 447 448 // Fold adjacent entries with matching encoding+personality and without LSDA 449 // We use three iterators on the same cuIndices to fold in-situ: 450 // (1) `foldBegin` is the first of a potential sequence of matching entries 451 // (2) `foldEnd` is the first non-matching entry after `foldBegin`. 452 // The semi-open interval [ foldBegin .. foldEnd ) contains a range 453 // entries that can be folded into a single entry and written to ... 454 // (3) `foldWrite` 455 auto foldWrite = cuIndices.begin(); 456 for (auto foldBegin = cuIndices.begin(); foldBegin < cuIndices.end();) { 457 auto foldEnd = foldBegin; 458 // Common LSDA encodings (e.g. for C++ and Objective-C) contain offsets from 459 // a base address. The base address is normally not contained directly in 460 // the LSDA, and in that case, the personality function treats the starting 461 // address of the function (which is computed by the unwinder) as the base 462 // address and interprets the LSDA accordingly. The unwinder computes the 463 // starting address of a function as the address associated with its CU 464 // entry. For this reason, we cannot fold adjacent entries if they have an 465 // LSDA, because folding would make the unwinder compute the wrong starting 466 // address for the functions with the folded entries, which in turn would 467 // cause the personality function to misinterpret the LSDA for those 468 // functions. In the very rare case where the base address is encoded 469 // directly in the LSDA, two functions at different addresses would 470 // necessarily have different LSDAs, so their CU entries would not have been 471 // folded anyway. 472 while (++foldEnd < cuIndices.end() && 473 cuEntries[*foldBegin].encoding == cuEntries[*foldEnd].encoding && 474 !cuEntries[*foldBegin].lsda && !cuEntries[*foldEnd].lsda && 475 // If we've gotten to this point, we don't have an LSDA, which should 476 // also imply that we don't have a personality function, since in all 477 // likelihood a personality function needs the LSDA to do anything 478 // useful. It can be technically valid to have a personality function 479 // and no LSDA though (e.g. the C++ personality __gxx_personality_v0 480 // is just a no-op without LSDA), so we still check for personality 481 // function equivalence to handle that case. 482 cuEntries[*foldBegin].personality == 483 cuEntries[*foldEnd].personality && 484 canFoldEncoding(cuEntries[*foldEnd].encoding)) 485 ; 486 *foldWrite++ = *foldBegin; 487 foldBegin = foldEnd; 488 } 489 cuIndices.erase(foldWrite, cuIndices.end()); 490 491 encodePersonalities(); 492 493 // Count frequencies of the folded encodings 494 EncodingMap encodingFrequencies; 495 for (size_t idx : cuIndices) 496 encodingFrequencies[cuEntries[idx].encoding]++; 497 498 // Make a vector of encodings, sorted by descending frequency 499 for (const auto &frequency : encodingFrequencies) 500 commonEncodings.emplace_back(frequency); 501 llvm::sort(commonEncodings, 502 [](const std::pair<compact_unwind_encoding_t, size_t> &a, 503 const std::pair<compact_unwind_encoding_t, size_t> &b) { 504 if (a.second == b.second) 505 // When frequencies match, secondarily sort on encoding 506 // to maintain parity with validate-unwind-info.py 507 return a.first > b.first; 508 return a.second > b.second; 509 }); 510 511 // Truncate the vector to 127 elements. 512 // Common encoding indexes are limited to 0..126, while encoding 513 // indexes 127..255 are local to each second-level page 514 if (commonEncodings.size() > COMMON_ENCODINGS_MAX) 515 commonEncodings.resize(COMMON_ENCODINGS_MAX); 516 517 // Create a map from encoding to common-encoding-table index 518 for (size_t i = 0; i < commonEncodings.size(); i++) 519 commonEncodingIndexes[commonEncodings[i].first] = i; 520 521 // Split folded encodings into pages, where each page is limited by ... 522 // (a) 4 KiB capacity 523 // (b) 24-bit difference between first & final function address 524 // (c) 8-bit compact-encoding-table index, 525 // for which 0..126 references the global common-encodings table, 526 // and 127..255 references a local per-second-level-page table. 527 // First we try the compact format and determine how many entries fit. 528 // If more entries fit in the regular format, we use that. 529 for (size_t i = 0; i < cuIndices.size();) { 530 size_t idx = cuIndices[i]; 531 secondLevelPages.emplace_back(); 532 SecondLevelPage &page = secondLevelPages.back(); 533 page.entryIndex = i; 534 uint64_t functionAddressMax = 535 cuEntries[idx].functionAddress + COMPRESSED_ENTRY_FUNC_OFFSET_MASK; 536 size_t n = commonEncodings.size(); 537 size_t wordsRemaining = 538 SECOND_LEVEL_PAGE_WORDS - 539 sizeof(unwind_info_compressed_second_level_page_header) / 540 sizeof(uint32_t); 541 while (wordsRemaining >= 1 && i < cuIndices.size()) { 542 idx = cuIndices[i]; 543 const CompactUnwindEntry *cuPtr = &cuEntries[idx]; 544 if (cuPtr->functionAddress >= functionAddressMax) { 545 break; 546 } else if (commonEncodingIndexes.count(cuPtr->encoding) || 547 page.localEncodingIndexes.count(cuPtr->encoding)) { 548 i++; 549 wordsRemaining--; 550 } else if (wordsRemaining >= 2 && n < COMPACT_ENCODINGS_MAX) { 551 page.localEncodings.emplace_back(cuPtr->encoding); 552 page.localEncodingIndexes[cuPtr->encoding] = n++; 553 i++; 554 wordsRemaining -= 2; 555 } else { 556 break; 557 } 558 } 559 page.entryCount = i - page.entryIndex; 560 561 // If this is not the final page, see if it's possible to fit more 562 // entries by using the regular format. This can happen when there 563 // are many unique encodings, and we we saturated the local 564 // encoding table early. 565 if (i < cuIndices.size() && 566 page.entryCount < REGULAR_SECOND_LEVEL_ENTRIES_MAX) { 567 page.kind = UNWIND_SECOND_LEVEL_REGULAR; 568 page.entryCount = std::min(REGULAR_SECOND_LEVEL_ENTRIES_MAX, 569 cuIndices.size() - page.entryIndex); 570 i = page.entryIndex + page.entryCount; 571 } else { 572 page.kind = UNWIND_SECOND_LEVEL_COMPRESSED; 573 } 574 } 575 576 for (size_t idx : cuIndices) { 577 lsdaIndex[idx] = entriesWithLsda.size(); 578 if (cuEntries[idx].lsda) 579 entriesWithLsda.push_back(idx); 580 } 581 582 // compute size of __TEXT,__unwind_info section 583 level2PagesOffset = sizeof(unwind_info_section_header) + 584 commonEncodings.size() * sizeof(uint32_t) + 585 personalities.size() * sizeof(uint32_t) + 586 // The extra second-level-page entry is for the sentinel 587 (secondLevelPages.size() + 1) * 588 sizeof(unwind_info_section_header_index_entry) + 589 entriesWithLsda.size() * 590 sizeof(unwind_info_section_header_lsda_index_entry); 591 unwindInfoSize = 592 level2PagesOffset + secondLevelPages.size() * SECOND_LEVEL_PAGE_BYTES; 593 } 594 595 // All inputs are relocated and output addresses are known, so write! 596 597 void UnwindInfoSectionImpl::writeTo(uint8_t *buf) const { 598 assert(!cuIndices.empty() && "call only if there is unwind info"); 599 600 // section header 601 auto *uip = reinterpret_cast<unwind_info_section_header *>(buf); 602 uip->version = 1; 603 uip->commonEncodingsArraySectionOffset = sizeof(unwind_info_section_header); 604 uip->commonEncodingsArrayCount = commonEncodings.size(); 605 uip->personalityArraySectionOffset = 606 uip->commonEncodingsArraySectionOffset + 607 (uip->commonEncodingsArrayCount * sizeof(uint32_t)); 608 uip->personalityArrayCount = personalities.size(); 609 uip->indexSectionOffset = uip->personalityArraySectionOffset + 610 (uip->personalityArrayCount * sizeof(uint32_t)); 611 uip->indexCount = secondLevelPages.size() + 1; 612 613 // Common encodings 614 auto *i32p = reinterpret_cast<uint32_t *>(&uip[1]); 615 for (const auto &encoding : commonEncodings) 616 *i32p++ = encoding.first; 617 618 // Personalities 619 for (const Symbol *personality : personalities) 620 *i32p++ = personality->getGotVA() - in.header->addr; 621 622 // Level-1 index 623 uint32_t lsdaOffset = 624 uip->indexSectionOffset + 625 uip->indexCount * sizeof(unwind_info_section_header_index_entry); 626 uint64_t l2PagesOffset = level2PagesOffset; 627 auto *iep = reinterpret_cast<unwind_info_section_header_index_entry *>(i32p); 628 for (const SecondLevelPage &page : secondLevelPages) { 629 size_t idx = cuIndices[page.entryIndex]; 630 iep->functionOffset = cuEntries[idx].functionAddress - in.header->addr; 631 iep->secondLevelPagesSectionOffset = l2PagesOffset; 632 iep->lsdaIndexArraySectionOffset = 633 lsdaOffset + lsdaIndex.lookup(idx) * 634 sizeof(unwind_info_section_header_lsda_index_entry); 635 iep++; 636 l2PagesOffset += SECOND_LEVEL_PAGE_BYTES; 637 } 638 // Level-1 sentinel 639 const CompactUnwindEntry &cuEnd = cuEntries[cuIndices.back()]; 640 iep->functionOffset = 641 cuEnd.functionAddress - in.header->addr + cuEnd.functionLength; 642 iep->secondLevelPagesSectionOffset = 0; 643 iep->lsdaIndexArraySectionOffset = 644 lsdaOffset + entriesWithLsda.size() * 645 sizeof(unwind_info_section_header_lsda_index_entry); 646 iep++; 647 648 // LSDAs 649 auto *lep = 650 reinterpret_cast<unwind_info_section_header_lsda_index_entry *>(iep); 651 for (size_t idx : entriesWithLsda) { 652 const CompactUnwindEntry &cu = cuEntries[idx]; 653 lep->lsdaOffset = cu.lsda->getVA(/*off=*/0) - in.header->addr; 654 lep->functionOffset = cu.functionAddress - in.header->addr; 655 lep++; 656 } 657 658 // Level-2 pages 659 auto *pp = reinterpret_cast<uint32_t *>(lep); 660 for (const SecondLevelPage &page : secondLevelPages) { 661 if (page.kind == UNWIND_SECOND_LEVEL_COMPRESSED) { 662 uintptr_t functionAddressBase = 663 cuEntries[cuIndices[page.entryIndex]].functionAddress; 664 auto *p2p = 665 reinterpret_cast<unwind_info_compressed_second_level_page_header *>( 666 pp); 667 p2p->kind = page.kind; 668 p2p->entryPageOffset = 669 sizeof(unwind_info_compressed_second_level_page_header); 670 p2p->entryCount = page.entryCount; 671 p2p->encodingsPageOffset = 672 p2p->entryPageOffset + p2p->entryCount * sizeof(uint32_t); 673 p2p->encodingsCount = page.localEncodings.size(); 674 auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); 675 for (size_t i = 0; i < page.entryCount; i++) { 676 const CompactUnwindEntry &cue = 677 cuEntries[cuIndices[page.entryIndex + i]]; 678 auto it = commonEncodingIndexes.find(cue.encoding); 679 if (it == commonEncodingIndexes.end()) 680 it = page.localEncodingIndexes.find(cue.encoding); 681 *ep++ = (it->second << COMPRESSED_ENTRY_FUNC_OFFSET_BITS) | 682 (cue.functionAddress - functionAddressBase); 683 } 684 if (!page.localEncodings.empty()) 685 memcpy(ep, page.localEncodings.data(), 686 page.localEncodings.size() * sizeof(uint32_t)); 687 } else { 688 auto *p2p = 689 reinterpret_cast<unwind_info_regular_second_level_page_header *>(pp); 690 p2p->kind = page.kind; 691 p2p->entryPageOffset = 692 sizeof(unwind_info_regular_second_level_page_header); 693 p2p->entryCount = page.entryCount; 694 auto *ep = reinterpret_cast<uint32_t *>(&p2p[1]); 695 for (size_t i = 0; i < page.entryCount; i++) { 696 const CompactUnwindEntry &cue = 697 cuEntries[cuIndices[page.entryIndex + i]]; 698 *ep++ = cue.functionAddress; 699 *ep++ = cue.encoding; 700 } 701 } 702 pp += SECOND_LEVEL_PAGE_WORDS; 703 } 704 } 705 706 UnwindInfoSection *macho::makeUnwindInfoSection() { 707 return make<UnwindInfoSectionImpl>(); 708 } 709