1 //===- ObjC.cpp -----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjC.h" 10 #include "ConcatOutputSection.h" 11 #include "InputFiles.h" 12 #include "InputSection.h" 13 #include "Layout.h" 14 #include "OutputSegment.h" 15 #include "SyntheticSections.h" 16 #include "Target.h" 17 18 #include "lld/Common/ErrorHandler.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/Bitcode/BitcodeReader.h" 21 #include "llvm/Support/TimeProfiler.h" 22 23 using namespace llvm; 24 using namespace llvm::MachO; 25 using namespace lld; 26 using namespace lld::macho; 27 28 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) { 29 using SectionHeader = typename LP::section; 30 31 auto *hdr = 32 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart()); 33 if (hdr->magic != LP::magic) 34 return false; 35 36 if (const auto *c = 37 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) { 38 auto sectionHeaders = ArrayRef<SectionHeader>{ 39 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects}; 40 for (const SectionHeader &secHead : sectionHeaders) { 41 StringRef sectname(secHead.sectname, 42 strnlen(secHead.sectname, sizeof(secHead.sectname))); 43 StringRef segname(secHead.segname, 44 strnlen(secHead.segname, sizeof(secHead.segname))); 45 if ((segname == segment_names::data && 46 sectname == section_names::objcCatList) || 47 (segname == segment_names::text && 48 sectname.starts_with(section_names::swift))) { 49 return true; 50 } 51 } 52 } 53 return false; 54 } 55 56 static bool objectHasObjCSection(MemoryBufferRef mb) { 57 if (target->wordSize == 8) 58 return ::objectHasObjCSection<LP64>(mb); 59 else 60 return ::objectHasObjCSection<ILP32>(mb); 61 } 62 63 bool macho::hasObjCSection(MemoryBufferRef mb) { 64 switch (identify_magic(mb.getBuffer())) { 65 case file_magic::macho_object: 66 return objectHasObjCSection(mb); 67 case file_magic::bitcode: 68 return check(isBitcodeContainingObjCCategory(mb)); 69 default: 70 return false; 71 } 72 } 73 74 namespace { 75 76 #define FOR_EACH_CATEGORY_FIELD(DO) \ 77 DO(Ptr, name) \ 78 DO(Ptr, klass) \ 79 DO(Ptr, instanceMethods) \ 80 DO(Ptr, classMethods) \ 81 DO(Ptr, protocols) \ 82 DO(Ptr, instanceProps) \ 83 DO(Ptr, classProps) \ 84 DO(uint32_t, size) 85 86 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); 87 88 #undef FOR_EACH_CATEGORY_FIELD 89 90 #define FOR_EACH_CLASS_FIELD(DO) \ 91 DO(Ptr, metaClass) \ 92 DO(Ptr, superClass) \ 93 DO(Ptr, methodCache) \ 94 DO(Ptr, vtable) \ 95 DO(Ptr, roData) 96 97 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); 98 99 #undef FOR_EACH_CLASS_FIELD 100 101 #define FOR_EACH_RO_CLASS_FIELD(DO) \ 102 DO(uint32_t, flags) \ 103 DO(uint32_t, instanceStart) \ 104 DO(Ptr, instanceSize) \ 105 DO(Ptr, ivarLayout) \ 106 DO(Ptr, name) \ 107 DO(Ptr, baseMethods) \ 108 DO(Ptr, baseProtocols) \ 109 DO(Ptr, ivars) \ 110 DO(Ptr, weakIvarLayout) \ 111 DO(Ptr, baseProperties) 112 113 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); 114 115 #undef FOR_EACH_RO_CLASS_FIELD 116 117 #define FOR_EACH_LIST_HEADER(DO) \ 118 DO(uint32_t, structSize) \ 119 DO(uint32_t, structCount) 120 121 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); 122 123 #undef FOR_EACH_LIST_HEADER 124 125 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount) 126 127 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER); 128 129 #undef FOR_EACH_PROTOCOL_LIST_HEADER 130 131 #define FOR_EACH_METHOD(DO) \ 132 DO(Ptr, name) \ 133 DO(Ptr, type) \ 134 DO(Ptr, impl) 135 136 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); 137 138 #undef FOR_EACH_METHOD 139 140 enum MethodContainerKind { 141 MCK_Class, 142 MCK_Category, 143 }; 144 145 struct MethodContainer { 146 MethodContainerKind kind; 147 const ConcatInputSection *isec; 148 }; 149 150 enum MethodKind { 151 MK_Instance, 152 MK_Static, 153 }; 154 155 struct ObjcClass { 156 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods; 157 DenseMap<CachedHashStringRef, MethodContainer> classMethods; 158 }; 159 160 } // namespace 161 162 class ObjcCategoryChecker { 163 public: 164 ObjcCategoryChecker(); 165 void parseCategory(const ConcatInputSection *catListIsec); 166 167 private: 168 void parseClass(const Defined *classSym); 169 void parseMethods(const ConcatInputSection *methodsIsec, 170 const Symbol *methodContainer, 171 const ConcatInputSection *containerIsec, 172 MethodContainerKind, MethodKind); 173 174 CategoryLayout catLayout; 175 ClassLayout classLayout; 176 ROClassLayout roClassLayout; 177 ListHeaderLayout listHeaderLayout; 178 MethodLayout methodLayout; 179 180 DenseMap<const Symbol *, ObjcClass> classMap; 181 }; 182 183 ObjcCategoryChecker::ObjcCategoryChecker() 184 : catLayout(target->wordSize), classLayout(target->wordSize), 185 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 186 methodLayout(target->wordSize) {} 187 188 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, 189 const Symbol *methodContainerSym, 190 const ConcatInputSection *containerIsec, 191 MethodContainerKind mcKind, 192 MethodKind mKind) { 193 ObjcClass &klass = classMap[methodContainerSym]; 194 for (const Reloc &r : methodsIsec->relocs) { 195 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != 196 methodLayout.nameOffset) 197 continue; 198 199 CachedHashStringRef methodName(r.getReferentString()); 200 // +load methods are special: all implementations are called by the runtime 201 // even if they are part of the same class. Thus there is no need to check 202 // for duplicates. 203 // NOTE: Instead of specifically checking for this method name, ld64 simply 204 // checks whether a class / category is present in __objc_nlclslist / 205 // __objc_nlcatlist respectively. This will be the case if the class / 206 // category has a +load method. It skips optimizing the categories if there 207 // are multiple +load methods. Since it does dupe checking as part of the 208 // optimization process, this avoids spurious dupe messages around +load, 209 // but it also means that legit dupe issues for other methods are ignored. 210 if (mKind == MK_Static && methodName.val() == "load") 211 continue; 212 213 auto &methodMap = 214 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; 215 if (methodMap 216 .try_emplace(methodName, MethodContainer{mcKind, containerIsec}) 217 .second) 218 continue; 219 220 // We have a duplicate; generate a warning message. 221 const auto &mc = methodMap.lookup(methodName); 222 const Reloc *nameReloc = nullptr; 223 if (mc.kind == MCK_Category) { 224 nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); 225 } else { 226 assert(mc.kind == MCK_Class); 227 const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) 228 ->getReferentInputSection(); 229 nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); 230 } 231 StringRef containerName = nameReloc->getReferentString(); 232 StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; 233 234 // We should only ever encounter collisions when parsing category methods 235 // (since the Class struct is parsed before any of its categories). 236 assert(mcKind == MCK_Category); 237 StringRef newCatName = 238 containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString(); 239 240 auto formatObjAndSrcFileName = [](const InputSection *section) { 241 lld::macho::InputFile *inputFile = section->getFile(); 242 std::string result = toString(inputFile); 243 244 auto objFile = dyn_cast_or_null<ObjFile>(inputFile); 245 if (objFile && objFile->compileUnit) 246 result += " (" + objFile->sourceFile() + ")"; 247 248 return result; 249 }; 250 251 StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; 252 warn("method '" + methPrefix + methodName.val() + 253 "' has conflicting definitions:\n>>> defined in category " + 254 newCatName + " from " + formatObjAndSrcFileName(containerIsec) + 255 "\n>>> defined in " + containerType + " " + containerName + " from " + 256 formatObjAndSrcFileName(mc.isec)); 257 } 258 } 259 260 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { 261 auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); 262 if (!classReloc) 263 return; 264 265 auto *classSym = cast<Symbol *>(classReloc->referent); 266 if (auto *d = dyn_cast<Defined>(classSym)) 267 if (!classMap.count(d)) 268 parseClass(d); 269 270 if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { 271 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 272 classSym, catIsec, MCK_Category, MK_Static); 273 } 274 275 if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { 276 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 277 classSym, catIsec, MCK_Category, MK_Instance); 278 } 279 } 280 281 void ObjcCategoryChecker::parseClass(const Defined *classSym) { 282 // Given a Class struct, get its corresponding Methods struct 283 auto getMethodsIsec = 284 [&](const InputSection *classIsec) -> ConcatInputSection * { 285 if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { 286 if (const auto *roIsec = 287 cast_or_null<ConcatInputSection>(r->getReferentInputSection())) { 288 if (const auto *r = 289 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { 290 if (auto *methodsIsec = cast_or_null<ConcatInputSection>( 291 r->getReferentInputSection())) 292 return methodsIsec; 293 } 294 } 295 } 296 return nullptr; 297 }; 298 299 const auto *classIsec = cast<ConcatInputSection>(classSym->isec()); 300 301 // Parse instance methods. 302 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec)) 303 parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, 304 MK_Instance); 305 306 // Class methods are contained in the metaclass. 307 if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset)) 308 if (const auto *classMethodsIsec = getMethodsIsec( 309 cast<ConcatInputSection>(r->getReferentInputSection()))) 310 parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); 311 } 312 313 void objc::checkCategories() { 314 TimeTraceScope timeScope("ObjcCategoryChecker"); 315 316 ObjcCategoryChecker checker; 317 for (const InputSection *isec : inputSections) { 318 if (isec->getName() == section_names::objcCatList) 319 for (const Reloc &r : isec->relocs) { 320 auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection()); 321 checker.parseCategory(catIsec); 322 } 323 } 324 } 325 326 namespace { 327 328 class ObjcCategoryMerger { 329 // In which language was a particular construct originally defined 330 enum SourceLanguage { Unknown, ObjC, Swift }; 331 332 // Information about an input category 333 struct InfoInputCategory { 334 ConcatInputSection *catListIsec; 335 ConcatInputSection *catBodyIsec; 336 uint32_t offCatListIsec = 0; 337 SourceLanguage sourceLanguage = SourceLanguage::Unknown; 338 339 bool wasMerged = false; 340 }; 341 342 // To write new (merged) categories or classes, we will try make limited 343 // assumptions about the alignment and the sections the various class/category 344 // info are stored in and . So we'll just reuse the same sections and 345 // alignment as already used in existing (input) categories. To do this we 346 // have InfoCategoryWriter which contains the various sections that the 347 // generated categories will be written to. 348 struct InfoWriteSection { 349 bool valid = false; // Data has been successfully collected from input 350 uint32_t align = 0; 351 Section *inputSection; 352 Reloc relocTemplate; 353 OutputSection *outputSection; 354 }; 355 356 struct InfoCategoryWriter { 357 InfoWriteSection catListInfo; 358 InfoWriteSection catBodyInfo; 359 InfoWriteSection catNameInfo; 360 InfoWriteSection catPtrListInfo; 361 }; 362 363 // Information about a pointer list in the original categories or class(method 364 // lists, protocol lists, etc) 365 struct PointerListInfo { 366 PointerListInfo() = default; 367 PointerListInfo(const PointerListInfo &) = default; 368 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) 369 : categoryPrefix(_categoryPrefix), 370 pointersPerStruct(_pointersPerStruct) {} 371 372 inline bool operator==(const PointerListInfo &cmp) const { 373 return pointersPerStruct == cmp.pointersPerStruct && 374 structSize == cmp.structSize && structCount == cmp.structCount && 375 allPtrs == cmp.allPtrs; 376 } 377 378 const char *categoryPrefix; 379 380 uint32_t pointersPerStruct = 0; 381 382 uint32_t structSize = 0; 383 uint32_t structCount = 0; 384 385 std::vector<Symbol *> allPtrs; 386 }; 387 388 // Full information describing an ObjC class . This will include all the 389 // additional methods, protocols, and properties that are contained in the 390 // class and all the categories that extend a particular class. 391 struct ClassExtensionInfo { 392 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; 393 394 // Merged names of containers. Ex: base|firstCategory|secondCategory|... 395 std::string mergedContainerName; 396 std::string baseClassName; 397 const Symbol *baseClass = nullptr; 398 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown; 399 400 CategoryLayout &catLayout; 401 402 // In case we generate new data, mark the new data as belonging to this file 403 ObjFile *objFileForMergeData = nullptr; 404 405 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods, 406 /*pointersPerStruct=*/3}; 407 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods, 408 /*pointersPerStruct=*/3}; 409 PointerListInfo protocols = {objc::symbol_names::categoryProtocols, 410 /*pointersPerStruct=*/0}; 411 PointerListInfo instanceProps = {objc::symbol_names::listProprieties, 412 /*pointersPerStruct=*/2}; 413 PointerListInfo classProps = {objc::symbol_names::klassPropList, 414 /*pointersPerStruct=*/2}; 415 }; 416 417 public: 418 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections); 419 void doMerge(); 420 static void doCleanup(); 421 422 private: 423 DenseSet<const Symbol *> collectNlCategories(); 424 void collectAndValidateCategoriesData(); 425 bool 426 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories); 427 428 void eraseISec(ConcatInputSection *isec); 429 void eraseMergedCategories(); 430 431 void generateCatListForNonErasedCategories( 432 MapVector<ConcatInputSection *, std::set<uint64_t>> 433 catListToErasedOffsets); 434 void collectSectionWriteInfoFromIsec(const InputSection *isec, 435 InfoWriteSection &catWriteInfo); 436 bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo); 437 bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 438 ClassExtensionInfo &extInfo); 439 440 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, 441 PointerListInfo &ptrList, 442 SourceLanguage sourceLang); 443 444 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, 445 uint32_t secOffset, 446 SourceLanguage sourceLang); 447 448 bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, 449 PointerListInfo &ptrList); 450 451 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset, 452 const ClassExtensionInfo &extInfo, 453 const PointerListInfo &ptrList); 454 455 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, 456 const ClassExtensionInfo &extInfo, 457 const PointerListInfo &ptrList); 458 459 Defined *emitCategory(const ClassExtensionInfo &extInfo); 460 Defined *emitCatListEntrySec(const std::string &forCategoryName, 461 const std::string &forBaseClassName, 462 ObjFile *objFile); 463 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym, 464 const Symbol *baseClassSym, 465 const std::string &baseClassName, ObjFile *objFile); 466 Defined *emitCategoryName(const std::string &name, ObjFile *objFile); 467 void createSymbolReference(Defined *refFrom, const Symbol *refTo, 468 uint32_t offset, const Reloc &relocTemplate); 469 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset); 470 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 471 uint32_t offset); 472 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 473 uint32_t offset); 474 Defined *getClassRo(const Defined *classSym, bool getMetaRo); 475 SourceLanguage getClassSymSourceLang(const Defined *classSym); 476 bool mergeCategoriesIntoBaseClass(const Defined *baseClass, 477 std::vector<InfoInputCategory> &categories); 478 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); 479 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, 480 uint32_t offset); 481 482 // Allocate a null-terminated StringRef backed by generatedSectionData 483 StringRef newStringData(const char *str); 484 // Allocate section data, backed by generatedSectionData 485 SmallVector<uint8_t> &newSectionData(uint32_t size); 486 487 CategoryLayout catLayout; 488 ClassLayout classLayout; 489 ROClassLayout roClassLayout; 490 ListHeaderLayout listHeaderLayout; 491 MethodLayout methodLayout; 492 ProtocolListHeaderLayout protocolListHeaderLayout; 493 494 InfoCategoryWriter infoCategoryWriter; 495 std::vector<ConcatInputSection *> &allInputSections; 496 // Map of base class Symbol to list of InfoInputCategory's for it 497 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap; 498 499 // Normally, the binary data comes from the input files, but since we're 500 // generating binary data ourselves, we use the below array to store it in. 501 // Need this to be 'static' so the data survives past the ObjcCategoryMerger 502 // object, as the data will be read by the Writer when the final binary is 503 // generated. 504 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 505 generatedSectionData; 506 }; 507 508 SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 509 ObjcCategoryMerger::generatedSectionData; 510 511 ObjcCategoryMerger::ObjcCategoryMerger( 512 std::vector<ConcatInputSection *> &_allInputSections) 513 : catLayout(target->wordSize), classLayout(target->wordSize), 514 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 515 methodLayout(target->wordSize), 516 protocolListHeaderLayout(target->wordSize), 517 allInputSections(_allInputSections) {} 518 519 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec( 520 const InputSection *isec, InfoWriteSection &catWriteInfo) { 521 522 catWriteInfo.inputSection = const_cast<Section *>(&isec->section); 523 catWriteInfo.align = isec->align; 524 catWriteInfo.outputSection = isec->parent; 525 526 assert(catWriteInfo.outputSection && 527 "outputSection may not be null in collectSectionWriteInfoFromIsec."); 528 529 if (isec->relocs.size()) 530 catWriteInfo.relocTemplate = isec->relocs[0]; 531 532 catWriteInfo.valid = true; 533 } 534 535 Symbol * 536 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 537 uint32_t offset) { 538 if (!isec) 539 return nullptr; 540 const Reloc *reloc = isec->getRelocAt(offset); 541 542 if (!reloc) 543 return nullptr; 544 545 Symbol *sym = dyn_cast_if_present<Symbol *>(reloc->referent); 546 547 if (reloc->addend && sym) { 548 assert(isa<Defined>(sym) && "Expected defined for non-zero addend"); 549 Defined *definedSym = cast<Defined>(sym); 550 sym = tryFindDefinedOnIsec(definedSym->isec(), 551 definedSym->value + reloc->addend); 552 } 553 554 return sym; 555 } 556 557 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec, 558 uint32_t offset) { 559 for (Defined *sym : isec->symbols) 560 if ((sym->value <= offset) && (sym->value + sym->size > offset)) 561 return sym; 562 563 return nullptr; 564 } 565 566 Defined * 567 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 568 uint32_t offset) { 569 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset); 570 return dyn_cast_or_null<Defined>(sym); 571 } 572 573 // Get the class's ro_data symbol. If getMetaRo is true, then we will return 574 // the meta-class's ro_data symbol. Otherwise, we will return the class 575 // (instance) ro_data symbol. 576 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, 577 bool getMetaRo) { 578 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec()); 579 if (!isec) 580 return nullptr; 581 582 if (!getMetaRo) 583 return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + 584 classSym->value); 585 586 Defined *metaClass = tryGetDefinedAtIsecOffset( 587 isec, classLayout.metaClassOffset + classSym->value); 588 if (!metaClass) 589 return nullptr; 590 591 return tryGetDefinedAtIsecOffset( 592 dyn_cast<ConcatInputSection>(metaClass->isec()), 593 classLayout.roDataOffset); 594 } 595 596 // Given an ConcatInputSection or CStringInputSection and an offset, if there is 597 // a symbol(Defined) at that offset, then erase the symbol (mark it not live) 598 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( 599 const ConcatInputSection *isec, uint32_t offset) { 600 const Reloc *reloc = isec->getRelocAt(offset); 601 602 if (!reloc) 603 return; 604 605 Defined *sym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 606 if (!sym) 607 return; 608 609 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) 610 eraseISec(cisec); 611 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) { 612 uint32_t totalOffset = sym->value + reloc->addend; 613 StringPiece &piece = csisec->getStringPiece(totalOffset); 614 piece.live = false; 615 } else { 616 llvm_unreachable("erased symbol has to be Defined or CStringInputSection"); 617 } 618 } 619 620 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory( 621 const InfoInputCategory &catInfo) { 622 623 if (!infoCategoryWriter.catListInfo.valid) 624 collectSectionWriteInfoFromIsec(catInfo.catListIsec, 625 infoCategoryWriter.catListInfo); 626 if (!infoCategoryWriter.catBodyInfo.valid) 627 collectSectionWriteInfoFromIsec(catInfo.catBodyIsec, 628 infoCategoryWriter.catBodyInfo); 629 630 if (!infoCategoryWriter.catNameInfo.valid) { 631 lld::macho::Defined *catNameSym = 632 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); 633 634 if (!catNameSym) { 635 // This is an unhandeled case where the category name is not a symbol but 636 // instead points to an CStringInputSection (that doesn't have any symbol) 637 // TODO: Find a small repro and either fix or add a test case for this 638 // scenario 639 return false; 640 } 641 642 collectSectionWriteInfoFromIsec(catNameSym->isec(), 643 infoCategoryWriter.catNameInfo); 644 } 645 646 // Collect writer info from all the category lists (we're assuming they all 647 // would provide the same info) 648 if (!infoCategoryWriter.catPtrListInfo.valid) { 649 for (uint32_t off = catLayout.instanceMethodsOffset; 650 off <= catLayout.classPropsOffset; off += target->wordSize) { 651 if (Defined *ptrList = 652 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) { 653 collectSectionWriteInfoFromIsec(ptrList->isec(), 654 infoCategoryWriter.catPtrListInfo); 655 // we've successfully collected data, so we can break 656 break; 657 } 658 } 659 } 660 661 return true; 662 } 663 664 // Parse a protocol list that might be linked to ConcatInputSection at a given 665 // offset. The format of the protocol list is different than other lists (prop 666 // lists, method lists) so we need to parse it differently 667 void ObjcCategoryMerger::parseProtocolListInfo( 668 const ConcatInputSection *isec, uint32_t secOffset, 669 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) { 670 assert((isec && (secOffset + target->wordSize <= isec->data.size())) && 671 "Tried to read pointer list beyond protocol section end"); 672 673 const Reloc *reloc = isec->getRelocAt(secOffset); 674 if (!reloc) 675 return; 676 677 auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 678 assert(ptrListSym && "Protocol list reloc does not have a valid Defined"); 679 680 // Theoretically protocol count can be either 32b or 64b, depending on 681 // platform pointer size, but to simplify implementation we always just read 682 // the lower 32b which should be good enough. 683 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>( 684 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 685 686 ptrList.structCount += protocolCount; 687 ptrList.structSize = target->wordSize; 688 689 [[maybe_unused]] uint32_t expectedListSize = 690 (protocolCount * target->wordSize) + 691 /*header(count)*/ protocolListHeaderLayout.totalSize + 692 /*extra null value*/ target->wordSize; 693 694 // On Swift, the protocol list does not have the extra (unnecessary) null 695 [[maybe_unused]] uint32_t expectedListSizeSwift = 696 expectedListSize - target->wordSize; 697 698 assert(((expectedListSize == ptrListSym->isec()->data.size() && 699 sourceLang == SourceLanguage::ObjC) || 700 (expectedListSizeSwift == ptrListSym->isec()->data.size() && 701 sourceLang == SourceLanguage::Swift)) && 702 "Protocol list does not match expected size"); 703 704 uint32_t off = protocolListHeaderLayout.totalSize; 705 for (uint32_t inx = 0; inx < protocolCount; ++inx) { 706 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 707 assert(reloc && "No reloc found at protocol list offset"); 708 709 auto *listSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 710 assert(listSym && "Protocol list reloc does not have a valid Defined"); 711 712 ptrList.allPtrs.push_back(listSym); 713 off += target->wordSize; 714 } 715 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) && 716 "expected null terminating protocol"); 717 assert(off + /*extra null value*/ target->wordSize == expectedListSize && 718 "Protocol list end offset does not match expected size"); 719 } 720 721 // Parse a protocol list and return the PointerListInfo for it 722 ObjcCategoryMerger::PointerListInfo 723 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, 724 uint32_t secOffset, 725 SourceLanguage sourceLang) { 726 PointerListInfo ptrList; 727 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang); 728 return ptrList; 729 } 730 731 // Parse a pointer list that might be linked to ConcatInputSection at a given 732 // offset. This can be used for instance methods, class methods, instance props 733 // and class props since they have the same format. 734 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec, 735 uint32_t secOffset, 736 PointerListInfo &ptrList) { 737 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3); 738 assert(isec && "Trying to parse pointer list from null isec"); 739 assert(secOffset + target->wordSize <= isec->data.size() && 740 "Trying to read pointer list beyond section end"); 741 742 const Reloc *reloc = isec->getRelocAt(secOffset); 743 // Empty list is a valid case, return true. 744 if (!reloc) 745 return true; 746 747 auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent)); 748 assert(ptrListSym && "Reloc does not have a valid Defined"); 749 750 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>( 751 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 752 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>( 753 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset); 754 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize); 755 756 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize)); 757 758 ptrList.structCount += thisStructCount; 759 ptrList.structSize = thisStructSize; 760 761 uint32_t expectedListSize = 762 listHeaderLayout.totalSize + (thisStructSize * thisStructCount); 763 assert(expectedListSize == ptrListSym->isec()->data.size() && 764 "Pointer list does not match expected size"); 765 766 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize; 767 off += target->wordSize) { 768 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 769 assert(reloc && "No reloc found at pointer list offset"); 770 771 auto *listSym = 772 dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>()); 773 // Sometimes, the reloc points to a StringPiece (InputSection + addend) 774 // instead of a symbol. 775 // TODO: Skip these cases for now, but we should fix this. 776 if (!listSym) 777 return false; 778 779 ptrList.allPtrs.push_back(listSym); 780 } 781 782 return true; 783 } 784 785 // Here we parse all the information of an input category (catInfo) and 786 // append the parsed info into the structure which will contain all the 787 // information about how a class is extended (extInfo) 788 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 789 ClassExtensionInfo &extInfo) { 790 const Reloc *catNameReloc = 791 catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset); 792 793 // Parse name 794 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'"); 795 796 // is this the first category we are parsing? 797 if (extInfo.mergedContainerName.empty()) 798 extInfo.objFileForMergeData = 799 dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile()); 800 else 801 extInfo.mergedContainerName += "|"; 802 803 assert(extInfo.objFileForMergeData && 804 "Expected to already have valid objextInfo.objFileForMergeData"); 805 806 StringRef catName = catNameReloc->getReferentString(); 807 extInfo.mergedContainerName += catName.str(); 808 809 // Parse base class 810 if (!extInfo.baseClass) { 811 Symbol *classSym = 812 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset); 813 assert(extInfo.baseClassName.empty()); 814 extInfo.baseClass = classSym; 815 llvm::StringRef classPrefix(objc::symbol_names::klass); 816 assert(classSym->getName().starts_with(classPrefix) && 817 "Base class symbol does not start with expected prefix"); 818 extInfo.baseClassName = classSym->getName().substr(classPrefix.size()); 819 } else { 820 assert((extInfo.baseClass == 821 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, 822 catLayout.klassOffset)) && 823 "Trying to parse category info into container with different base " 824 "class"); 825 } 826 827 if (!parsePointerListInfo(catInfo.catBodyIsec, 828 catLayout.instanceMethodsOffset, 829 extInfo.instanceMethods)) 830 return false; 831 832 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset, 833 extInfo.classMethods)) 834 return false; 835 836 parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset, 837 extInfo.protocols, catInfo.sourceLanguage); 838 839 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset, 840 extInfo.instanceProps)) 841 return false; 842 843 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset, 844 extInfo.classProps)) 845 return false; 846 847 return true; 848 } 849 850 // Generate a protocol list (including header) and link it into the parent at 851 // the specified offset. 852 Defined *ObjcCategoryMerger::emitAndLinkProtocolList( 853 Defined *parentSym, uint32_t linkAtOffset, 854 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 855 if (ptrList.allPtrs.empty()) 856 return nullptr; 857 858 assert(ptrList.allPtrs.size() == ptrList.structCount); 859 860 uint32_t bodySize = (ptrList.structCount * target->wordSize) + 861 /*header(count)*/ protocolListHeaderLayout.totalSize + 862 /*extra null value*/ target->wordSize; 863 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 864 865 // This theoretically can be either 32b or 64b, but writing just the first 32b 866 // is good enough 867 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>( 868 bodyData.data() + protocolListHeaderLayout.protocolCountOffset); 869 870 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size(); 871 872 ConcatInputSection *listSec = make<ConcatInputSection>( 873 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 874 infoCategoryWriter.catPtrListInfo.align); 875 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 876 listSec->live = true; 877 878 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 879 880 std::string symName = ptrList.categoryPrefix; 881 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 882 883 Defined *ptrListSym = make<Defined>( 884 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 885 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 886 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 887 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 888 /*isWeakDefCanBeHidden=*/false); 889 890 ptrListSym->used = true; 891 parentSym->getObjectFile()->symbols.push_back(ptrListSym); 892 addInputSection(listSec); 893 894 createSymbolReference(parentSym, ptrListSym, linkAtOffset, 895 infoCategoryWriter.catBodyInfo.relocTemplate); 896 897 uint32_t offset = protocolListHeaderLayout.totalSize; 898 for (Symbol *symbol : ptrList.allPtrs) { 899 createSymbolReference(ptrListSym, symbol, offset, 900 infoCategoryWriter.catPtrListInfo.relocTemplate); 901 offset += target->wordSize; 902 } 903 904 return ptrListSym; 905 } 906 907 // Generate a pointer list (including header) and link it into the parent at the 908 // specified offset. This is used for instance and class methods and 909 // proprieties. 910 void ObjcCategoryMerger::emitAndLinkPointerList( 911 Defined *parentSym, uint32_t linkAtOffset, 912 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 913 if (ptrList.allPtrs.empty()) 914 return; 915 916 assert(ptrList.allPtrs.size() * target->wordSize == 917 ptrList.structCount * ptrList.structSize); 918 919 // Generate body 920 uint32_t bodySize = 921 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount); 922 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 923 924 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>( 925 bodyData.data() + listHeaderLayout.structSizeOffset); 926 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>( 927 bodyData.data() + listHeaderLayout.structCountOffset); 928 929 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize; 930 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount; 931 932 ConcatInputSection *listSec = make<ConcatInputSection>( 933 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 934 infoCategoryWriter.catPtrListInfo.align); 935 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 936 listSec->live = true; 937 938 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 939 940 std::string symName = ptrList.categoryPrefix; 941 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 942 943 Defined *ptrListSym = make<Defined>( 944 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 945 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 946 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 947 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 948 /*isWeakDefCanBeHidden=*/false); 949 950 ptrListSym->used = true; 951 parentSym->getObjectFile()->symbols.push_back(ptrListSym); 952 addInputSection(listSec); 953 954 createSymbolReference(parentSym, ptrListSym, linkAtOffset, 955 infoCategoryWriter.catBodyInfo.relocTemplate); 956 957 uint32_t offset = listHeaderLayout.totalSize; 958 for (Symbol *symbol : ptrList.allPtrs) { 959 createSymbolReference(ptrListSym, symbol, offset, 960 infoCategoryWriter.catPtrListInfo.relocTemplate); 961 offset += target->wordSize; 962 } 963 } 964 965 // This method creates an __objc_catlist ConcatInputSection with a single slot 966 Defined * 967 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, 968 const std::string &forBaseClassName, 969 ObjFile *objFile) { 970 uint32_t sectionSize = target->wordSize; 971 llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize); 972 973 ConcatInputSection *newCatList = 974 make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection, 975 bodyData, infoCategoryWriter.catListInfo.align); 976 newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 977 newCatList->live = true; 978 979 newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 980 981 std::string catSymName = "<__objc_catlist slot for merged category "; 982 catSymName += forBaseClassName + "(" + forCategoryName + ")>"; 983 984 Defined *catListSym = make<Defined>( 985 newStringData(catSymName.c_str()), /*file=*/objFile, newCatList, 986 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 987 /*isPrivateExtern=*/false, /*includeInSymtab=*/false, 988 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 989 /*isWeakDefCanBeHidden=*/false); 990 991 catListSym->used = true; 992 objFile->symbols.push_back(catListSym); 993 addInputSection(newCatList); 994 return catListSym; 995 } 996 997 // Here we generate the main category body and link the name and base class into 998 // it. We don't link any other info yet like the protocol and class/instance 999 // methods/props. 1000 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, 1001 const Defined *nameSym, 1002 const Symbol *baseClassSym, 1003 const std::string &baseClassName, 1004 ObjFile *objFile) { 1005 llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize); 1006 1007 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) + 1008 catLayout.sizeOffset); 1009 *ptrSize = catLayout.totalSize; 1010 1011 ConcatInputSection *newBodySec = 1012 make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection, 1013 bodyData, infoCategoryWriter.catBodyInfo.align); 1014 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; 1015 newBodySec->live = true; 1016 1017 std::string symName = 1018 objc::symbol_names::category + baseClassName + "(" + name + ")"; 1019 Defined *catBodySym = make<Defined>( 1020 newStringData(symName.c_str()), /*file=*/objFile, newBodySec, 1021 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 1022 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1023 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 1024 /*isWeakDefCanBeHidden=*/false); 1025 1026 catBodySym->used = true; 1027 objFile->symbols.push_back(catBodySym); 1028 addInputSection(newBodySec); 1029 1030 createSymbolReference(catBodySym, nameSym, catLayout.nameOffset, 1031 infoCategoryWriter.catBodyInfo.relocTemplate); 1032 1033 // Create a reloc to the base class (either external or internal) 1034 createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset, 1035 infoCategoryWriter.catBodyInfo.relocTemplate); 1036 1037 return catBodySym; 1038 } 1039 1040 // This writes the new category name (for the merged category) into the binary 1041 // and returns the sybmol for it. 1042 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, 1043 ObjFile *objFile) { 1044 StringRef nameStrData = newStringData(name.c_str()); 1045 // We use +1 below to include the null terminator 1046 llvm::ArrayRef<uint8_t> nameData( 1047 reinterpret_cast<const uint8_t *>(nameStrData.data()), 1048 nameStrData.size() + 1); 1049 1050 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection; 1051 CStringInputSection *newStringSec = make<CStringInputSection>( 1052 *infoCategoryWriter.catNameInfo.inputSection, nameData, 1053 infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true); 1054 1055 parentSection->subsections.push_back({0, newStringSec}); 1056 1057 newStringSec->splitIntoPieces(); 1058 newStringSec->pieces[0].live = true; 1059 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; 1060 in.cStringSection->addInput(newStringSec); 1061 assert(newStringSec->pieces.size() == 1); 1062 1063 Defined *catNameSym = make<Defined>( 1064 "<merged category name>", /*file=*/objFile, newStringSec, 1065 /*value=*/0, nameData.size(), 1066 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1067 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1068 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1069 1070 catNameSym->used = true; 1071 objFile->symbols.push_back(catNameSym); 1072 return catNameSym; 1073 } 1074 1075 // This method fully creates a new category from the given ClassExtensionInfo. 1076 // It creates the category name, body and method/protocol/prop lists and links 1077 // them all together. Then it creates a new __objc_catlist entry and adds the 1078 // category to it. Calling this method will fully generate a category which will 1079 // be available in the final binary. 1080 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) { 1081 Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName, 1082 extInfo.objFileForMergeData); 1083 1084 Defined *catBodySym = emitCategoryBody( 1085 extInfo.mergedContainerName, catNameSym, extInfo.baseClass, 1086 extInfo.baseClassName, extInfo.objFileForMergeData); 1087 1088 Defined *catListSym = 1089 emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName, 1090 extInfo.objFileForMergeData); 1091 1092 // Add the single category body to the category list at the offset 0. 1093 createSymbolReference(catListSym, catBodySym, /*offset=*/0, 1094 infoCategoryWriter.catListInfo.relocTemplate); 1095 1096 emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo, 1097 extInfo.instanceMethods); 1098 1099 emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo, 1100 extInfo.classMethods); 1101 1102 emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo, 1103 extInfo.protocols); 1104 1105 emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo, 1106 extInfo.instanceProps); 1107 1108 emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo, 1109 extInfo.classProps); 1110 1111 return catBodySym; 1112 } 1113 1114 // This method merges all the categories (sharing a base class) into a single 1115 // category. 1116 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory( 1117 std::vector<InfoInputCategory> &categories) { 1118 assert(categories.size() > 1 && "Expected at least 2 categories"); 1119 1120 ClassExtensionInfo extInfo(catLayout); 1121 1122 for (auto &catInfo : categories) 1123 if (!parseCatInfoToExtInfo(catInfo, extInfo)) 1124 return false; 1125 1126 Defined *newCatDef = emitCategory(extInfo); 1127 assert(newCatDef && "Failed to create a new category"); 1128 1129 // Suppress unsuded var warning 1130 (void)newCatDef; 1131 1132 for (auto &catInfo : categories) 1133 catInfo.wasMerged = true; 1134 1135 return true; 1136 } 1137 1138 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom, 1139 const Symbol *refTo, 1140 uint32_t offset, 1141 const Reloc &relocTemplate) { 1142 Reloc r = relocTemplate; 1143 r.offset = offset; 1144 r.addend = 0; 1145 r.referent = const_cast<Symbol *>(refTo); 1146 refFrom->isec()->relocs.push_back(r); 1147 } 1148 1149 // Get the list of categories in the '__objc_nlcatlist' section. We can't 1150 // optimize these as they have a '+load' method that has to be called at 1151 // runtime. 1152 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() { 1153 DenseSet<const Symbol *> nlCategories; 1154 1155 for (InputSection *sec : allInputSections) { 1156 if (sec->getName() != section_names::objcNonLazyCatList) 1157 continue; 1158 1159 for (auto &r : sec->relocs) { 1160 const Symbol *sym = r.referent.dyn_cast<Symbol *>(); 1161 nlCategories.insert(sym); 1162 } 1163 } 1164 return nlCategories; 1165 } 1166 1167 void ObjcCategoryMerger::collectAndValidateCategoriesData() { 1168 auto nlCategories = collectNlCategories(); 1169 1170 for (InputSection *sec : allInputSections) { 1171 if (sec->getName() != section_names::objcCatList) 1172 continue; 1173 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec); 1174 assert(catListCisec && 1175 "__objc_catList InputSection is not a ConcatInputSection"); 1176 1177 for (uint32_t off = 0; off < catListCisec->getSize(); 1178 off += target->wordSize) { 1179 Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off); 1180 assert(categorySym && 1181 "Failed to get a valid category at __objc_catlit offset"); 1182 1183 if (nlCategories.count(categorySym)) 1184 continue; 1185 1186 auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec()); 1187 assert(catBodyIsec && 1188 "Category data section is not an ConcatInputSection"); 1189 1190 SourceLanguage eLang = SourceLanguage::Unknown; 1191 if (categorySym->getName().starts_with(objc::symbol_names::category)) 1192 eLang = SourceLanguage::ObjC; 1193 else if (categorySym->getName().starts_with( 1194 objc::symbol_names::swift_objc_category)) 1195 eLang = SourceLanguage::Swift; 1196 else 1197 llvm_unreachable("Unexpected category symbol name"); 1198 1199 InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang}; 1200 1201 // Check that the category has a reloc at 'klassOffset' (which is 1202 // a pointer to the class symbol) 1203 1204 Symbol *classSym = 1205 tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset); 1206 assert(classSym && "Category does not have a valid base class"); 1207 1208 if (!collectCategoryWriterInfoFromCategory(catInputInfo)) 1209 continue; 1210 1211 categoryMap[classSym].push_back(catInputInfo); 1212 } 1213 } 1214 } 1215 1216 // In the input we have multiple __objc_catlist InputSection, each of which may 1217 // contain links to multiple categories. Of these categories, we will merge (and 1218 // erase) only some. There will be some categories that will remain untouched 1219 // (not erased). For these not erased categories, we generate new __objc_catlist 1220 // entries since the parent __objc_catlist entry will be erased 1221 void ObjcCategoryMerger::generateCatListForNonErasedCategories( 1222 const MapVector<ConcatInputSection *, std::set<uint64_t>> 1223 catListToErasedOffsets) { 1224 1225 // Go through all offsets of all __objc_catlist's that we process and if there 1226 // are categories that we didn't process - generate a new __objc_catlist for 1227 // each. 1228 for (auto &mapEntry : catListToErasedOffsets) { 1229 ConcatInputSection *catListIsec = mapEntry.first; 1230 for (uint32_t catListIsecOffset = 0; 1231 catListIsecOffset < catListIsec->data.size(); 1232 catListIsecOffset += target->wordSize) { 1233 // This slot was erased, we can just skip it 1234 if (mapEntry.second.count(catListIsecOffset)) 1235 continue; 1236 1237 Defined *nonErasedCatBody = 1238 tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset); 1239 assert(nonErasedCatBody && "Failed to relocate non-deleted category"); 1240 1241 // Allocate data for the new __objc_catlist slot 1242 llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize); 1243 1244 // We mark the __objc_catlist slot as belonging to the same file as the 1245 // category 1246 ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile()); 1247 1248 ConcatInputSection *listSec = make<ConcatInputSection>( 1249 *infoCategoryWriter.catListInfo.inputSection, bodyData, 1250 infoCategoryWriter.catListInfo.align); 1251 listSec->parent = infoCategoryWriter.catListInfo.outputSection; 1252 listSec->live = true; 1253 1254 std::string slotSymName = "<__objc_catlist slot for category "; 1255 slotSymName += nonErasedCatBody->getName(); 1256 slotSymName += ">"; 1257 1258 Defined *catListSlotSym = make<Defined>( 1259 newStringData(slotSymName.c_str()), /*file=*/objFile, listSec, 1260 /*value=*/0, bodyData.size(), 1261 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1262 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1263 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1264 1265 catListSlotSym->used = true; 1266 objFile->symbols.push_back(catListSlotSym); 1267 addInputSection(listSec); 1268 1269 // Now link the category body into the newly created slot 1270 createSymbolReference(catListSlotSym, nonErasedCatBody, 0, 1271 infoCategoryWriter.catListInfo.relocTemplate); 1272 } 1273 } 1274 } 1275 1276 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) { 1277 isec->live = false; 1278 for (auto &sym : isec->symbols) 1279 sym->used = false; 1280 } 1281 1282 // This fully erases the merged categories, including their body, their names, 1283 // their method/protocol/prop lists and the __objc_catlist entries that link to 1284 // them. 1285 void ObjcCategoryMerger::eraseMergedCategories() { 1286 // Map of InputSection to a set of offsets of the categories that were merged 1287 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets; 1288 1289 for (auto &mapEntry : categoryMap) { 1290 for (InfoInputCategory &catInfo : mapEntry.second) { 1291 if (catInfo.wasMerged) { 1292 eraseISec(catInfo.catListIsec); 1293 catListToErasedOffsets[catInfo.catListIsec].insert( 1294 catInfo.offCatListIsec); 1295 } 1296 } 1297 } 1298 1299 // If there were categories that we did not erase, we need to generate a new 1300 // __objc_catList that contains only the un-merged categories, and get rid of 1301 // the references to the ones we merged. 1302 generateCatListForNonErasedCategories(catListToErasedOffsets); 1303 1304 // Erase the old method lists & names of the categories that were merged 1305 for (auto &mapEntry : categoryMap) { 1306 for (InfoInputCategory &catInfo : mapEntry.second) { 1307 if (!catInfo.wasMerged) 1308 continue; 1309 1310 eraseISec(catInfo.catBodyIsec); 1311 1312 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC 1313 // categories because the name will sometimes also be used for other 1314 // purposes. 1315 // For Swift, see usages of 'l_.str.11.SimpleClass' in 1316 // objc-category-merging-swift.s 1317 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in 1318 // objc-category-merging-erase-objc-name-test.s 1319 // TODO: handle the above in a smarter way 1320 1321 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1322 catLayout.instanceMethodsOffset); 1323 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1324 catLayout.classMethodsOffset); 1325 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1326 catLayout.protocolsOffset); 1327 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1328 catLayout.classPropsOffset); 1329 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1330 catLayout.instancePropsOffset); 1331 } 1332 } 1333 } 1334 1335 void ObjcCategoryMerger::doMerge() { 1336 collectAndValidateCategoriesData(); 1337 1338 for (auto &[baseClass, catInfos] : categoryMap) { 1339 bool merged = false; 1340 if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) { 1341 // Merge all categories into the base class 1342 merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos); 1343 } else if (catInfos.size() > 1) { 1344 // Merge all categories into a new, single category 1345 merged = mergeCategoriesIntoSingleCategory(catInfos); 1346 } 1347 if (!merged) 1348 warn("ObjC category merging skipped for class symbol' " + 1349 baseClass->getName().str() + "'\n"); 1350 } 1351 1352 // Erase all categories that were merged 1353 eraseMergedCategories(); 1354 } 1355 1356 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); } 1357 1358 StringRef ObjcCategoryMerger::newStringData(const char *str) { 1359 uint32_t len = strlen(str); 1360 uint32_t bufSize = len + 1; 1361 SmallVector<uint8_t> &data = newSectionData(bufSize); 1362 char *strData = reinterpret_cast<char *>(data.data()); 1363 // Copy the string chars and null-terminator 1364 memcpy(strData, str, bufSize); 1365 return StringRef(strData, len); 1366 } 1367 1368 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) { 1369 generatedSectionData.push_back( 1370 std::make_unique<SmallVector<uint8_t>>(size, 0)); 1371 return *generatedSectionData.back(); 1372 } 1373 1374 } // namespace 1375 1376 void objc::mergeCategories() { 1377 TimeTraceScope timeScope("ObjcCategoryMerger"); 1378 1379 ObjcCategoryMerger merger(inputSections); 1380 merger.doMerge(); 1381 } 1382 1383 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } 1384 1385 ObjcCategoryMerger::SourceLanguage 1386 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) { 1387 if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1388 return SourceLanguage::Swift; 1389 1390 // If the symbol name matches the ObjC prefix, we don't necessarely know this 1391 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift 1392 // classes. Ex: 1393 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1394 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1395 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN 1396 // 1397 // So we scan for symbols with the same address and check for the Swift class 1398 if (classSym->getName().starts_with(objc::symbol_names::klass)) { 1399 for (auto &sym : classSym->originalIsec->symbols) 1400 if (sym->value == classSym->value) 1401 if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1402 return SourceLanguage::Swift; 1403 return SourceLanguage::ObjC; 1404 } 1405 1406 llvm_unreachable("Unexpected class symbol name during category merging"); 1407 } 1408 1409 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass( 1410 const Defined *baseClass, std::vector<InfoInputCategory> &categories) { 1411 assert(categories.size() >= 1 && "Expected at least one category to merge"); 1412 1413 // Collect all the info from the categories 1414 ClassExtensionInfo extInfo(catLayout); 1415 extInfo.baseClass = baseClass; 1416 extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass); 1417 1418 for (auto &catInfo : categories) 1419 if (!parseCatInfoToExtInfo(catInfo, extInfo)) 1420 return false; 1421 1422 // Get metadata for the base class 1423 Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); 1424 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec()); 1425 Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); 1426 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec()); 1427 1428 // Now collect the info from the base class from the various lists in the 1429 // class metadata 1430 1431 // Protocol lists are a special case - the same protocol list is in classRo 1432 // and metaRo, so we only need to parse it once 1433 parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1434 extInfo.protocols, extInfo.baseClassSourceLanguage); 1435 1436 // Check that the classRo and metaRo protocol lists are identical 1437 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1438 extInfo.baseClassSourceLanguage) == 1439 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset, 1440 extInfo.baseClassSourceLanguage) && 1441 "Category merger expects classRo and metaRo to have the same protocol " 1442 "list"); 1443 1444 parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, 1445 extInfo.classMethods); 1446 parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, 1447 extInfo.instanceMethods); 1448 1449 parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, 1450 extInfo.classProps); 1451 parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, 1452 extInfo.instanceProps); 1453 1454 // Erase the old lists - these will be generated and replaced 1455 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); 1456 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); 1457 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); 1458 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); 1459 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); 1460 eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); 1461 1462 // Emit the newly merged lists - first into the meta RO then into the class RO 1463 // First we emit and link the protocol list into the meta RO. Then we link it 1464 // in the classRo as well (they're supposed to be identical) 1465 if (Defined *protoListSym = 1466 emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, 1467 extInfo, extInfo.protocols)) { 1468 createSymbolReference(classRo, protoListSym, 1469 roClassLayout.baseProtocolsOffset, 1470 infoCategoryWriter.catBodyInfo.relocTemplate); 1471 } 1472 1473 emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, 1474 extInfo.classMethods); 1475 emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, 1476 extInfo.instanceMethods); 1477 1478 emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, 1479 extInfo.classProps); 1480 1481 emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, 1482 extInfo.instanceProps); 1483 1484 // Mark all the categories as merged - this will be used to erase them later 1485 for (auto &catInfo : categories) 1486 catInfo.wasMerged = true; 1487 1488 return true; 1489 } 1490 1491 // Erase the symbol at a given offset in an InputSection 1492 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, 1493 uint32_t offset) { 1494 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); 1495 if (!sym) 1496 return; 1497 1498 // Remove the symbol from isec->symbols 1499 assert(isa<Defined>(sym) && "Can only erase a Defined"); 1500 llvm::erase(isec->symbols, sym); 1501 1502 // Remove the relocs that refer to this symbol 1503 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; 1504 llvm::erase_if(isec->relocs, removeAtOff); 1505 1506 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase 1507 // the whole ConcatInputSection 1508 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec())) 1509 if (cisec->data.size() == sym->size) 1510 eraseISec(cisec); 1511 } 1512