1 //===- ObjC.cpp -----------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ObjC.h" 10 #include "ConcatOutputSection.h" 11 #include "InputFiles.h" 12 #include "InputSection.h" 13 #include "Layout.h" 14 #include "OutputSegment.h" 15 #include "SyntheticSections.h" 16 #include "Target.h" 17 18 #include "lld/Common/ErrorHandler.h" 19 #include "llvm/ADT/DenseMap.h" 20 #include "llvm/BinaryFormat/MachO.h" 21 #include "llvm/Bitcode/BitcodeReader.h" 22 #include "llvm/Support/TimeProfiler.h" 23 24 using namespace llvm; 25 using namespace llvm::MachO; 26 using namespace lld; 27 using namespace lld::macho; 28 29 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) { 30 using SectionHeader = typename LP::section; 31 32 auto *hdr = 33 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart()); 34 if (hdr->magic != LP::magic) 35 return false; 36 37 if (const auto *c = 38 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) { 39 auto sectionHeaders = ArrayRef<SectionHeader>{ 40 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects}; 41 for (const SectionHeader &secHead : sectionHeaders) { 42 StringRef sectname(secHead.sectname, 43 strnlen(secHead.sectname, sizeof(secHead.sectname))); 44 StringRef segname(secHead.segname, 45 strnlen(secHead.segname, sizeof(secHead.segname))); 46 if ((segname == segment_names::data && 47 sectname == section_names::objcCatList) || 48 (segname == segment_names::text && 49 sectname.starts_with(section_names::swift))) { 50 return true; 51 } 52 } 53 } 54 return false; 55 } 56 57 static bool objectHasObjCSection(MemoryBufferRef mb) { 58 if (target->wordSize == 8) 59 return ::objectHasObjCSection<LP64>(mb); 60 else 61 return ::objectHasObjCSection<ILP32>(mb); 62 } 63 64 bool macho::hasObjCSection(MemoryBufferRef mb) { 65 switch (identify_magic(mb.getBuffer())) { 66 case file_magic::macho_object: 67 return objectHasObjCSection(mb); 68 case file_magic::bitcode: 69 return check(isBitcodeContainingObjCCategory(mb)); 70 default: 71 return false; 72 } 73 } 74 75 namespace { 76 77 #define FOR_EACH_CATEGORY_FIELD(DO) \ 78 DO(Ptr, name) \ 79 DO(Ptr, klass) \ 80 DO(Ptr, instanceMethods) \ 81 DO(Ptr, classMethods) \ 82 DO(Ptr, protocols) \ 83 DO(Ptr, instanceProps) \ 84 DO(Ptr, classProps) \ 85 DO(uint32_t, size) 86 87 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); 88 89 #undef FOR_EACH_CATEGORY_FIELD 90 91 #define FOR_EACH_CLASS_FIELD(DO) \ 92 DO(Ptr, metaClass) \ 93 DO(Ptr, superClass) \ 94 DO(Ptr, methodCache) \ 95 DO(Ptr, vtable) \ 96 DO(Ptr, roData) 97 98 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); 99 100 #undef FOR_EACH_CLASS_FIELD 101 102 #define FOR_EACH_RO_CLASS_FIELD(DO) \ 103 DO(uint32_t, flags) \ 104 DO(uint32_t, instanceStart) \ 105 DO(Ptr, instanceSize) \ 106 DO(Ptr, ivarLayout) \ 107 DO(Ptr, name) \ 108 DO(Ptr, baseMethods) \ 109 DO(Ptr, baseProtocols) \ 110 DO(Ptr, ivars) \ 111 DO(Ptr, weakIvarLayout) \ 112 DO(Ptr, baseProperties) 113 114 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); 115 116 #undef FOR_EACH_RO_CLASS_FIELD 117 118 #define FOR_EACH_LIST_HEADER(DO) \ 119 DO(uint32_t, structSize) \ 120 DO(uint32_t, structCount) 121 122 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); 123 124 #undef FOR_EACH_LIST_HEADER 125 126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount) 127 128 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER); 129 130 #undef FOR_EACH_PROTOCOL_LIST_HEADER 131 132 #define FOR_EACH_METHOD(DO) \ 133 DO(Ptr, name) \ 134 DO(Ptr, type) \ 135 DO(Ptr, impl) 136 137 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); 138 139 #undef FOR_EACH_METHOD 140 141 enum MethodContainerKind { 142 MCK_Class, 143 MCK_Category, 144 }; 145 146 struct MethodContainer { 147 MethodContainerKind kind; 148 const ConcatInputSection *isec; 149 }; 150 151 enum MethodKind { 152 MK_Instance, 153 MK_Static, 154 }; 155 156 struct ObjcClass { 157 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods; 158 DenseMap<CachedHashStringRef, MethodContainer> classMethods; 159 }; 160 161 } // namespace 162 163 class ObjcCategoryChecker { 164 public: 165 ObjcCategoryChecker(); 166 void parseCategory(const ConcatInputSection *catListIsec); 167 168 private: 169 void parseClass(const Defined *classSym); 170 void parseMethods(const ConcatInputSection *methodsIsec, 171 const Symbol *methodContainer, 172 const ConcatInputSection *containerIsec, 173 MethodContainerKind, MethodKind); 174 175 CategoryLayout catLayout; 176 ClassLayout classLayout; 177 ROClassLayout roClassLayout; 178 ListHeaderLayout listHeaderLayout; 179 MethodLayout methodLayout; 180 181 DenseMap<const Symbol *, ObjcClass> classMap; 182 }; 183 184 ObjcCategoryChecker::ObjcCategoryChecker() 185 : catLayout(target->wordSize), classLayout(target->wordSize), 186 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 187 methodLayout(target->wordSize) {} 188 189 // \p r must point to an offset within a CStringInputSection or a 190 // ConcatInputSection 191 static StringRef getReferentString(const Reloc &r) { 192 if (auto *isec = r.referent.dyn_cast<InputSection *>()) 193 return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend); 194 195 auto *sym = cast<Defined>(r.referent.get<Symbol *>()); 196 auto *symIsec = sym->isec(); 197 auto symOffset = sym->value + r.addend; 198 199 if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec)) 200 return s->getStringRefAtOffset(symOffset); 201 202 if (isa<ConcatInputSection>(symIsec)) { 203 auto strData = symIsec->data.slice(symOffset); 204 const char *pszData = reinterpret_cast<const char *>(strData.data()); 205 return StringRef(pszData, strnlen(pszData, strData.size())); 206 } 207 208 llvm_unreachable("unknown reference section in getReferentString"); 209 } 210 211 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, 212 const Symbol *methodContainerSym, 213 const ConcatInputSection *containerIsec, 214 MethodContainerKind mcKind, 215 MethodKind mKind) { 216 ObjcClass &klass = classMap[methodContainerSym]; 217 for (const Reloc &r : methodsIsec->relocs) { 218 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != 219 methodLayout.nameOffset) 220 continue; 221 222 CachedHashStringRef methodName(getReferentString(r)); 223 // +load methods are special: all implementations are called by the runtime 224 // even if they are part of the same class. Thus there is no need to check 225 // for duplicates. 226 // NOTE: Instead of specifically checking for this method name, ld64 simply 227 // checks whether a class / category is present in __objc_nlclslist / 228 // __objc_nlcatlist respectively. This will be the case if the class / 229 // category has a +load method. It skips optimizing the categories if there 230 // are multiple +load methods. Since it does dupe checking as part of the 231 // optimization process, this avoids spurious dupe messages around +load, 232 // but it also means that legit dupe issues for other methods are ignored. 233 if (mKind == MK_Static && methodName.val() == "load") 234 continue; 235 236 auto &methodMap = 237 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; 238 if (methodMap 239 .try_emplace(methodName, MethodContainer{mcKind, containerIsec}) 240 .second) 241 continue; 242 243 // We have a duplicate; generate a warning message. 244 const auto &mc = methodMap.lookup(methodName); 245 const Reloc *nameReloc = nullptr; 246 if (mc.kind == MCK_Category) { 247 nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); 248 } else { 249 assert(mc.kind == MCK_Class); 250 const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) 251 ->getReferentInputSection(); 252 nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); 253 } 254 StringRef containerName = getReferentString(*nameReloc); 255 StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; 256 257 // We should only ever encounter collisions when parsing category methods 258 // (since the Class struct is parsed before any of its categories). 259 assert(mcKind == MCK_Category); 260 StringRef newCatName = 261 getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset)); 262 263 auto formatObjAndSrcFileName = [](const InputSection *section) { 264 lld::macho::InputFile *inputFile = section->getFile(); 265 std::string result = toString(inputFile); 266 267 auto objFile = dyn_cast_or_null<ObjFile>(inputFile); 268 if (objFile && objFile->compileUnit) 269 result += " (" + objFile->sourceFile() + ")"; 270 271 return result; 272 }; 273 274 StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; 275 warn("method '" + methPrefix + methodName.val() + 276 "' has conflicting definitions:\n>>> defined in category " + 277 newCatName + " from " + formatObjAndSrcFileName(containerIsec) + 278 "\n>>> defined in " + containerType + " " + containerName + " from " + 279 formatObjAndSrcFileName(mc.isec)); 280 } 281 } 282 283 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { 284 auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); 285 if (!classReloc) 286 return; 287 288 auto *classSym = classReloc->referent.get<Symbol *>(); 289 if (auto *d = dyn_cast<Defined>(classSym)) 290 if (!classMap.count(d)) 291 parseClass(d); 292 293 if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { 294 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 295 classSym, catIsec, MCK_Category, MK_Static); 296 } 297 298 if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { 299 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 300 classSym, catIsec, MCK_Category, MK_Instance); 301 } 302 } 303 304 void ObjcCategoryChecker::parseClass(const Defined *classSym) { 305 // Given a Class struct, get its corresponding Methods struct 306 auto getMethodsIsec = 307 [&](const InputSection *classIsec) -> ConcatInputSection * { 308 if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { 309 if (const auto *roIsec = 310 cast_or_null<ConcatInputSection>(r->getReferentInputSection())) { 311 if (const auto *r = 312 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { 313 if (auto *methodsIsec = cast_or_null<ConcatInputSection>( 314 r->getReferentInputSection())) 315 return methodsIsec; 316 } 317 } 318 } 319 return nullptr; 320 }; 321 322 const auto *classIsec = cast<ConcatInputSection>(classSym->isec()); 323 324 // Parse instance methods. 325 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec)) 326 parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, 327 MK_Instance); 328 329 // Class methods are contained in the metaclass. 330 if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset)) 331 if (const auto *classMethodsIsec = getMethodsIsec( 332 cast<ConcatInputSection>(r->getReferentInputSection()))) 333 parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); 334 } 335 336 void objc::checkCategories() { 337 TimeTraceScope timeScope("ObjcCategoryChecker"); 338 339 ObjcCategoryChecker checker; 340 for (const InputSection *isec : inputSections) { 341 if (isec->getName() == section_names::objcCatList) 342 for (const Reloc &r : isec->relocs) { 343 auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection()); 344 checker.parseCategory(catIsec); 345 } 346 } 347 } 348 349 namespace { 350 351 class ObjcCategoryMerger { 352 // In which language was a particular construct originally defined 353 enum SourceLanguage { Unknown, ObjC, Swift }; 354 355 // Information about an input category 356 struct InfoInputCategory { 357 ConcatInputSection *catListIsec; 358 ConcatInputSection *catBodyIsec; 359 uint32_t offCatListIsec = 0; 360 SourceLanguage sourceLanguage = SourceLanguage::Unknown; 361 362 bool wasMerged = false; 363 }; 364 365 // To write new (merged) categories or classes, we will try make limited 366 // assumptions about the alignment and the sections the various class/category 367 // info are stored in and . So we'll just reuse the same sections and 368 // alignment as already used in existing (input) categories. To do this we 369 // have InfoCategoryWriter which contains the various sections that the 370 // generated categories will be written to. 371 struct InfoWriteSection { 372 bool valid = false; // Data has been successfully collected from input 373 uint32_t align = 0; 374 Section *inputSection; 375 Reloc relocTemplate; 376 OutputSection *outputSection; 377 }; 378 379 struct InfoCategoryWriter { 380 InfoWriteSection catListInfo; 381 InfoWriteSection catBodyInfo; 382 InfoWriteSection catNameInfo; 383 InfoWriteSection catPtrListInfo; 384 }; 385 386 // Information about a pointer list in the original categories or class(method 387 // lists, protocol lists, etc) 388 struct PointerListInfo { 389 PointerListInfo() = default; 390 PointerListInfo(const PointerListInfo &) = default; 391 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) 392 : categoryPrefix(_categoryPrefix), 393 pointersPerStruct(_pointersPerStruct) {} 394 395 inline bool operator==(const PointerListInfo &cmp) const { 396 return pointersPerStruct == cmp.pointersPerStruct && 397 structSize == cmp.structSize && structCount == cmp.structCount && 398 allPtrs == cmp.allPtrs; 399 } 400 401 const char *categoryPrefix; 402 403 uint32_t pointersPerStruct = 0; 404 405 uint32_t structSize = 0; 406 uint32_t structCount = 0; 407 408 std::vector<Symbol *> allPtrs; 409 }; 410 411 // Full information describing an ObjC class . This will include all the 412 // additional methods, protocols, and properties that are contained in the 413 // class and all the categories that extend a particular class. 414 struct ClassExtensionInfo { 415 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; 416 417 // Merged names of containers. Ex: base|firstCategory|secondCategory|... 418 std::string mergedContainerName; 419 std::string baseClassName; 420 const Symbol *baseClass = nullptr; 421 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown; 422 423 CategoryLayout &catLayout; 424 425 // In case we generate new data, mark the new data as belonging to this file 426 ObjFile *objFileForMergeData = nullptr; 427 428 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods, 429 /*pointersPerStruct=*/3}; 430 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods, 431 /*pointersPerStruct=*/3}; 432 PointerListInfo protocols = {objc::symbol_names::categoryProtocols, 433 /*pointersPerStruct=*/0}; 434 PointerListInfo instanceProps = {objc::symbol_names::listProprieties, 435 /*pointersPerStruct=*/2}; 436 PointerListInfo classProps = {objc::symbol_names::klassPropList, 437 /*pointersPerStruct=*/2}; 438 }; 439 440 public: 441 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections); 442 void doMerge(); 443 static void doCleanup(); 444 445 private: 446 DenseSet<const Symbol *> collectNlCategories(); 447 void collectAndValidateCategoriesData(); 448 void 449 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories); 450 451 void eraseISec(ConcatInputSection *isec); 452 void eraseMergedCategories(); 453 454 void generateCatListForNonErasedCategories( 455 MapVector<ConcatInputSection *, std::set<uint64_t>> 456 catListToErasedOffsets); 457 void collectSectionWriteInfoFromIsec(const InputSection *isec, 458 InfoWriteSection &catWriteInfo); 459 void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo); 460 void parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 461 ClassExtensionInfo &extInfo); 462 463 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, 464 PointerListInfo &ptrList, 465 SourceLanguage sourceLang); 466 467 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, 468 uint32_t secOffset, 469 SourceLanguage sourceLang); 470 471 void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, 472 PointerListInfo &ptrList); 473 474 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset, 475 const ClassExtensionInfo &extInfo, 476 const PointerListInfo &ptrList); 477 478 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, 479 const ClassExtensionInfo &extInfo, 480 const PointerListInfo &ptrList); 481 482 Defined *emitCategory(const ClassExtensionInfo &extInfo); 483 Defined *emitCatListEntrySec(const std::string &forCategoryName, 484 const std::string &forBaseClassName, 485 ObjFile *objFile); 486 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym, 487 const Symbol *baseClassSym, 488 const std::string &baseClassName, ObjFile *objFile); 489 Defined *emitCategoryName(const std::string &name, ObjFile *objFile); 490 void createSymbolReference(Defined *refFrom, const Symbol *refTo, 491 uint32_t offset, const Reloc &relocTemplate); 492 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset); 493 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 494 uint32_t offset); 495 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 496 uint32_t offset); 497 Defined *getClassRo(const Defined *classSym, bool getMetaRo); 498 SourceLanguage getClassSymSourceLang(const Defined *classSym); 499 void mergeCategoriesIntoBaseClass(const Defined *baseClass, 500 std::vector<InfoInputCategory> &categories); 501 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); 502 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, 503 uint32_t offset); 504 505 // Allocate a null-terminated StringRef backed by generatedSectionData 506 StringRef newStringData(const char *str); 507 // Allocate section data, backed by generatedSectionData 508 SmallVector<uint8_t> &newSectionData(uint32_t size); 509 510 CategoryLayout catLayout; 511 ClassLayout classLayout; 512 ROClassLayout roClassLayout; 513 ListHeaderLayout listHeaderLayout; 514 MethodLayout methodLayout; 515 ProtocolListHeaderLayout protocolListHeaderLayout; 516 517 InfoCategoryWriter infoCategoryWriter; 518 std::vector<ConcatInputSection *> &allInputSections; 519 // Map of base class Symbol to list of InfoInputCategory's for it 520 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap; 521 522 // Normally, the binary data comes from the input files, but since we're 523 // generating binary data ourselves, we use the below array to store it in. 524 // Need this to be 'static' so the data survives past the ObjcCategoryMerger 525 // object, as the data will be read by the Writer when the final binary is 526 // generated. 527 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 528 generatedSectionData; 529 }; 530 531 SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 532 ObjcCategoryMerger::generatedSectionData; 533 534 ObjcCategoryMerger::ObjcCategoryMerger( 535 std::vector<ConcatInputSection *> &_allInputSections) 536 : catLayout(target->wordSize), classLayout(target->wordSize), 537 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 538 methodLayout(target->wordSize), 539 protocolListHeaderLayout(target->wordSize), 540 allInputSections(_allInputSections) {} 541 542 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec( 543 const InputSection *isec, InfoWriteSection &catWriteInfo) { 544 545 catWriteInfo.inputSection = const_cast<Section *>(&isec->section); 546 catWriteInfo.align = isec->align; 547 catWriteInfo.outputSection = isec->parent; 548 549 assert(catWriteInfo.outputSection && 550 "outputSection may not be null in collectSectionWriteInfoFromIsec."); 551 552 if (isec->relocs.size()) 553 catWriteInfo.relocTemplate = isec->relocs[0]; 554 555 catWriteInfo.valid = true; 556 } 557 558 Symbol * 559 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 560 uint32_t offset) { 561 if (!isec) 562 return nullptr; 563 const Reloc *reloc = isec->getRelocAt(offset); 564 565 if (!reloc) 566 return nullptr; 567 568 Symbol *sym = reloc->referent.get<Symbol *>(); 569 570 if (reloc->addend) { 571 assert(isa<Defined>(sym) && "Expected defined for non-zero addend"); 572 Defined *definedSym = cast<Defined>(sym); 573 sym = tryFindDefinedOnIsec(definedSym->isec(), 574 definedSym->value + reloc->addend); 575 } 576 577 return sym; 578 } 579 580 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec, 581 uint32_t offset) { 582 for (Defined *sym : isec->symbols) 583 if ((sym->value <= offset) && (sym->value + sym->size > offset)) 584 return sym; 585 586 return nullptr; 587 } 588 589 Defined * 590 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 591 uint32_t offset) { 592 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset); 593 return dyn_cast_or_null<Defined>(sym); 594 } 595 596 // Get the class's ro_data symbol. If getMetaRo is true, then we will return 597 // the meta-class's ro_data symbol. Otherwise, we will return the class 598 // (instance) ro_data symbol. 599 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, 600 bool getMetaRo) { 601 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec()); 602 if (!isec) 603 return nullptr; 604 605 if (!getMetaRo) 606 return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + 607 classSym->value); 608 609 Defined *metaClass = tryGetDefinedAtIsecOffset( 610 isec, classLayout.metaClassOffset + classSym->value); 611 if (!metaClass) 612 return nullptr; 613 614 return tryGetDefinedAtIsecOffset( 615 dyn_cast<ConcatInputSection>(metaClass->isec()), 616 classLayout.roDataOffset); 617 } 618 619 // Given an ConcatInputSection or CStringInputSection and an offset, if there is 620 // a symbol(Defined) at that offset, then erase the symbol (mark it not live) 621 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( 622 const ConcatInputSection *isec, uint32_t offset) { 623 const Reloc *reloc = isec->getRelocAt(offset); 624 625 if (!reloc) 626 return; 627 628 Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 629 if (!sym) 630 return; 631 632 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) 633 eraseISec(cisec); 634 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) { 635 uint32_t totalOffset = sym->value + reloc->addend; 636 StringPiece &piece = csisec->getStringPiece(totalOffset); 637 piece.live = false; 638 } else { 639 llvm_unreachable("erased symbol has to be Defined or CStringInputSection"); 640 } 641 } 642 643 void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory( 644 const InfoInputCategory &catInfo) { 645 646 if (!infoCategoryWriter.catListInfo.valid) 647 collectSectionWriteInfoFromIsec(catInfo.catListIsec, 648 infoCategoryWriter.catListInfo); 649 if (!infoCategoryWriter.catBodyInfo.valid) 650 collectSectionWriteInfoFromIsec(catInfo.catBodyIsec, 651 infoCategoryWriter.catBodyInfo); 652 653 if (!infoCategoryWriter.catNameInfo.valid) { 654 lld::macho::Defined *catNameSym = 655 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); 656 assert(catNameSym && "Category does not have a valid name Symbol"); 657 658 collectSectionWriteInfoFromIsec(catNameSym->isec(), 659 infoCategoryWriter.catNameInfo); 660 } 661 662 // Collect writer info from all the category lists (we're assuming they all 663 // would provide the same info) 664 if (!infoCategoryWriter.catPtrListInfo.valid) { 665 for (uint32_t off = catLayout.instanceMethodsOffset; 666 off <= catLayout.classPropsOffset; off += target->wordSize) { 667 if (Defined *ptrList = 668 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) { 669 collectSectionWriteInfoFromIsec(ptrList->isec(), 670 infoCategoryWriter.catPtrListInfo); 671 // we've successfully collected data, so we can break 672 break; 673 } 674 } 675 } 676 } 677 678 // Parse a protocol list that might be linked to ConcatInputSection at a given 679 // offset. The format of the protocol list is different than other lists (prop 680 // lists, method lists) so we need to parse it differently 681 void ObjcCategoryMerger::parseProtocolListInfo( 682 const ConcatInputSection *isec, uint32_t secOffset, 683 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) { 684 assert((isec && (secOffset + target->wordSize <= isec->data.size())) && 685 "Tried to read pointer list beyond protocol section end"); 686 687 const Reloc *reloc = isec->getRelocAt(secOffset); 688 if (!reloc) 689 return; 690 691 auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 692 assert(ptrListSym && "Protocol list reloc does not have a valid Defined"); 693 694 // Theoretically protocol count can be either 32b or 64b, depending on 695 // platform pointer size, but to simplify implementation we always just read 696 // the lower 32b which should be good enough. 697 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>( 698 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 699 700 ptrList.structCount += protocolCount; 701 ptrList.structSize = target->wordSize; 702 703 [[maybe_unused]] uint32_t expectedListSize = 704 (protocolCount * target->wordSize) + 705 /*header(count)*/ protocolListHeaderLayout.totalSize + 706 /*extra null value*/ target->wordSize; 707 708 // On Swift, the protocol list does not have the extra (unnecessary) null 709 [[maybe_unused]] uint32_t expectedListSizeSwift = 710 expectedListSize - target->wordSize; 711 712 assert(((expectedListSize == ptrListSym->isec()->data.size() && 713 sourceLang == SourceLanguage::ObjC) || 714 (expectedListSizeSwift == ptrListSym->isec()->data.size() && 715 sourceLang == SourceLanguage::Swift)) && 716 "Protocol list does not match expected size"); 717 718 uint32_t off = protocolListHeaderLayout.totalSize; 719 for (uint32_t inx = 0; inx < protocolCount; ++inx) { 720 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 721 assert(reloc && "No reloc found at protocol list offset"); 722 723 auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 724 assert(listSym && "Protocol list reloc does not have a valid Defined"); 725 726 ptrList.allPtrs.push_back(listSym); 727 off += target->wordSize; 728 } 729 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) && 730 "expected null terminating protocol"); 731 assert(off + /*extra null value*/ target->wordSize == expectedListSize && 732 "Protocol list end offset does not match expected size"); 733 } 734 735 // Parse a protocol list and return the PointerListInfo for it 736 ObjcCategoryMerger::PointerListInfo 737 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, 738 uint32_t secOffset, 739 SourceLanguage sourceLang) { 740 PointerListInfo ptrList; 741 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang); 742 return ptrList; 743 } 744 745 // Parse a pointer list that might be linked to ConcatInputSection at a given 746 // offset. This can be used for instance methods, class methods, instance props 747 // and class props since they have the same format. 748 void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec, 749 uint32_t secOffset, 750 PointerListInfo &ptrList) { 751 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3); 752 assert(isec && "Trying to parse pointer list from null isec"); 753 assert(secOffset + target->wordSize <= isec->data.size() && 754 "Trying to read pointer list beyond section end"); 755 756 const Reloc *reloc = isec->getRelocAt(secOffset); 757 if (!reloc) 758 return; 759 760 auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 761 assert(ptrListSym && "Reloc does not have a valid Defined"); 762 763 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>( 764 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 765 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>( 766 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset); 767 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize); 768 769 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize)); 770 771 ptrList.structCount += thisStructCount; 772 ptrList.structSize = thisStructSize; 773 774 uint32_t expectedListSize = 775 listHeaderLayout.totalSize + (thisStructSize * thisStructCount); 776 assert(expectedListSize == ptrListSym->isec()->data.size() && 777 "Pointer list does not match expected size"); 778 779 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize; 780 off += target->wordSize) { 781 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 782 assert(reloc && "No reloc found at pointer list offset"); 783 784 auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 785 assert(listSym && "Reloc does not have a valid Defined"); 786 787 ptrList.allPtrs.push_back(listSym); 788 } 789 } 790 791 // Here we parse all the information of an input category (catInfo) and 792 // append the parsed info into the structure which will contain all the 793 // information about how a class is extended (extInfo) 794 void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 795 ClassExtensionInfo &extInfo) { 796 const Reloc *catNameReloc = 797 catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset); 798 799 // Parse name 800 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'"); 801 802 // is this the first category we are parsing? 803 if (extInfo.mergedContainerName.empty()) 804 extInfo.objFileForMergeData = 805 dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile()); 806 else 807 extInfo.mergedContainerName += "|"; 808 809 assert(extInfo.objFileForMergeData && 810 "Expected to already have valid objextInfo.objFileForMergeData"); 811 812 StringRef catName = getReferentString(*catNameReloc); 813 extInfo.mergedContainerName += catName.str(); 814 815 // Parse base class 816 if (!extInfo.baseClass) { 817 Symbol *classSym = 818 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset); 819 assert(extInfo.baseClassName.empty()); 820 extInfo.baseClass = classSym; 821 llvm::StringRef classPrefix(objc::symbol_names::klass); 822 assert(classSym->getName().starts_with(classPrefix) && 823 "Base class symbol does not start with expected prefix"); 824 extInfo.baseClassName = classSym->getName().substr(classPrefix.size()); 825 } else { 826 assert((extInfo.baseClass == 827 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, 828 catLayout.klassOffset)) && 829 "Trying to parse category info into container with different base " 830 "class"); 831 } 832 833 parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset, 834 extInfo.instanceMethods); 835 836 parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset, 837 extInfo.classMethods); 838 839 parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset, 840 extInfo.protocols, catInfo.sourceLanguage); 841 842 parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset, 843 extInfo.instanceProps); 844 845 parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset, 846 extInfo.classProps); 847 } 848 849 // Generate a protocol list (including header) and link it into the parent at 850 // the specified offset. 851 Defined *ObjcCategoryMerger::emitAndLinkProtocolList( 852 Defined *parentSym, uint32_t linkAtOffset, 853 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 854 if (ptrList.allPtrs.empty()) 855 return nullptr; 856 857 assert(ptrList.allPtrs.size() == ptrList.structCount); 858 859 uint32_t bodySize = (ptrList.structCount * target->wordSize) + 860 /*header(count)*/ protocolListHeaderLayout.totalSize + 861 /*extra null value*/ target->wordSize; 862 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 863 864 // This theoretically can be either 32b or 64b, but writing just the first 32b 865 // is good enough 866 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>( 867 bodyData.data() + protocolListHeaderLayout.protocolCountOffset); 868 869 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size(); 870 871 ConcatInputSection *listSec = make<ConcatInputSection>( 872 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 873 infoCategoryWriter.catPtrListInfo.align); 874 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 875 listSec->live = true; 876 877 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 878 879 std::string symName = ptrList.categoryPrefix; 880 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 881 882 Defined *ptrListSym = make<Defined>( 883 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 884 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 885 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 886 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 887 /*isWeakDefCanBeHidden=*/false); 888 889 ptrListSym->used = true; 890 parentSym->getObjectFile()->symbols.push_back(ptrListSym); 891 addInputSection(listSec); 892 893 createSymbolReference(parentSym, ptrListSym, linkAtOffset, 894 infoCategoryWriter.catBodyInfo.relocTemplate); 895 896 uint32_t offset = protocolListHeaderLayout.totalSize; 897 for (Symbol *symbol : ptrList.allPtrs) { 898 createSymbolReference(ptrListSym, symbol, offset, 899 infoCategoryWriter.catPtrListInfo.relocTemplate); 900 offset += target->wordSize; 901 } 902 903 return ptrListSym; 904 } 905 906 // Generate a pointer list (including header) and link it into the parent at the 907 // specified offset. This is used for instance and class methods and 908 // proprieties. 909 void ObjcCategoryMerger::emitAndLinkPointerList( 910 Defined *parentSym, uint32_t linkAtOffset, 911 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 912 if (ptrList.allPtrs.empty()) 913 return; 914 915 assert(ptrList.allPtrs.size() * target->wordSize == 916 ptrList.structCount * ptrList.structSize); 917 918 // Generate body 919 uint32_t bodySize = 920 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount); 921 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 922 923 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>( 924 bodyData.data() + listHeaderLayout.structSizeOffset); 925 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>( 926 bodyData.data() + listHeaderLayout.structCountOffset); 927 928 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize; 929 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount; 930 931 ConcatInputSection *listSec = make<ConcatInputSection>( 932 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 933 infoCategoryWriter.catPtrListInfo.align); 934 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 935 listSec->live = true; 936 937 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 938 939 std::string symName = ptrList.categoryPrefix; 940 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 941 942 Defined *ptrListSym = make<Defined>( 943 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 944 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 945 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 946 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 947 /*isWeakDefCanBeHidden=*/false); 948 949 ptrListSym->used = true; 950 parentSym->getObjectFile()->symbols.push_back(ptrListSym); 951 addInputSection(listSec); 952 953 createSymbolReference(parentSym, ptrListSym, linkAtOffset, 954 infoCategoryWriter.catBodyInfo.relocTemplate); 955 956 uint32_t offset = listHeaderLayout.totalSize; 957 for (Symbol *symbol : ptrList.allPtrs) { 958 createSymbolReference(ptrListSym, symbol, offset, 959 infoCategoryWriter.catPtrListInfo.relocTemplate); 960 offset += target->wordSize; 961 } 962 } 963 964 // This method creates an __objc_catlist ConcatInputSection with a single slot 965 Defined * 966 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, 967 const std::string &forBaseClassName, 968 ObjFile *objFile) { 969 uint32_t sectionSize = target->wordSize; 970 llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize); 971 972 ConcatInputSection *newCatList = 973 make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection, 974 bodyData, infoCategoryWriter.catListInfo.align); 975 newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 976 newCatList->live = true; 977 978 newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 979 980 std::string catSymName = "<__objc_catlist slot for merged category "; 981 catSymName += forBaseClassName + "(" + forCategoryName + ")>"; 982 983 Defined *catListSym = make<Defined>( 984 newStringData(catSymName.c_str()), /*file=*/objFile, newCatList, 985 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 986 /*isPrivateExtern=*/false, /*includeInSymtab=*/false, 987 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 988 /*isWeakDefCanBeHidden=*/false); 989 990 catListSym->used = true; 991 objFile->symbols.push_back(catListSym); 992 addInputSection(newCatList); 993 return catListSym; 994 } 995 996 // Here we generate the main category body and link the name and base class into 997 // it. We don't link any other info yet like the protocol and class/instance 998 // methods/props. 999 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, 1000 const Defined *nameSym, 1001 const Symbol *baseClassSym, 1002 const std::string &baseClassName, 1003 ObjFile *objFile) { 1004 llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize); 1005 1006 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) + 1007 catLayout.sizeOffset); 1008 *ptrSize = catLayout.totalSize; 1009 1010 ConcatInputSection *newBodySec = 1011 make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection, 1012 bodyData, infoCategoryWriter.catBodyInfo.align); 1013 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; 1014 newBodySec->live = true; 1015 1016 std::string symName = 1017 objc::symbol_names::category + baseClassName + "(" + name + ")"; 1018 Defined *catBodySym = make<Defined>( 1019 newStringData(symName.c_str()), /*file=*/objFile, newBodySec, 1020 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 1021 /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1022 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 1023 /*isWeakDefCanBeHidden=*/false); 1024 1025 catBodySym->used = true; 1026 objFile->symbols.push_back(catBodySym); 1027 addInputSection(newBodySec); 1028 1029 createSymbolReference(catBodySym, nameSym, catLayout.nameOffset, 1030 infoCategoryWriter.catBodyInfo.relocTemplate); 1031 1032 // Create a reloc to the base class (either external or internal) 1033 createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset, 1034 infoCategoryWriter.catBodyInfo.relocTemplate); 1035 1036 return catBodySym; 1037 } 1038 1039 // This writes the new category name (for the merged category) into the binary 1040 // and returns the sybmol for it. 1041 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, 1042 ObjFile *objFile) { 1043 StringRef nameStrData = newStringData(name.c_str()); 1044 // We use +1 below to include the null terminator 1045 llvm::ArrayRef<uint8_t> nameData( 1046 reinterpret_cast<const uint8_t *>(nameStrData.data()), 1047 nameStrData.size() + 1); 1048 1049 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection; 1050 CStringInputSection *newStringSec = make<CStringInputSection>( 1051 *infoCategoryWriter.catNameInfo.inputSection, nameData, 1052 infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true); 1053 1054 parentSection->subsections.push_back({0, newStringSec}); 1055 1056 newStringSec->splitIntoPieces(); 1057 newStringSec->pieces[0].live = true; 1058 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; 1059 in.cStringSection->addInput(newStringSec); 1060 assert(newStringSec->pieces.size() == 1); 1061 1062 Defined *catNameSym = make<Defined>( 1063 "<merged category name>", /*file=*/objFile, newStringSec, 1064 /*value=*/0, nameData.size(), 1065 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1066 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1067 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1068 1069 catNameSym->used = true; 1070 objFile->symbols.push_back(catNameSym); 1071 return catNameSym; 1072 } 1073 1074 // This method fully creates a new category from the given ClassExtensionInfo. 1075 // It creates the category name, body and method/protocol/prop lists and links 1076 // them all together. Then it creates a new __objc_catlist entry and adds the 1077 // category to it. Calling this method will fully generate a category which will 1078 // be available in the final binary. 1079 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) { 1080 Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName, 1081 extInfo.objFileForMergeData); 1082 1083 Defined *catBodySym = emitCategoryBody( 1084 extInfo.mergedContainerName, catNameSym, extInfo.baseClass, 1085 extInfo.baseClassName, extInfo.objFileForMergeData); 1086 1087 Defined *catListSym = 1088 emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName, 1089 extInfo.objFileForMergeData); 1090 1091 // Add the single category body to the category list at the offset 0. 1092 createSymbolReference(catListSym, catBodySym, /*offset=*/0, 1093 infoCategoryWriter.catListInfo.relocTemplate); 1094 1095 emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo, 1096 extInfo.instanceMethods); 1097 1098 emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo, 1099 extInfo.classMethods); 1100 1101 emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo, 1102 extInfo.protocols); 1103 1104 emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo, 1105 extInfo.instanceProps); 1106 1107 emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo, 1108 extInfo.classProps); 1109 1110 return catBodySym; 1111 } 1112 1113 // This method merges all the categories (sharing a base class) into a single 1114 // category. 1115 void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory( 1116 std::vector<InfoInputCategory> &categories) { 1117 assert(categories.size() > 1 && "Expected at least 2 categories"); 1118 1119 ClassExtensionInfo extInfo(catLayout); 1120 1121 for (auto &catInfo : categories) 1122 parseCatInfoToExtInfo(catInfo, extInfo); 1123 1124 Defined *newCatDef = emitCategory(extInfo); 1125 assert(newCatDef && "Failed to create a new category"); 1126 1127 // Suppress unsuded var warning 1128 (void)newCatDef; 1129 1130 for (auto &catInfo : categories) 1131 catInfo.wasMerged = true; 1132 } 1133 1134 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom, 1135 const Symbol *refTo, 1136 uint32_t offset, 1137 const Reloc &relocTemplate) { 1138 Reloc r = relocTemplate; 1139 r.offset = offset; 1140 r.addend = 0; 1141 r.referent = const_cast<Symbol *>(refTo); 1142 refFrom->isec()->relocs.push_back(r); 1143 } 1144 1145 // Get the list of categories in the '__objc_nlcatlist' section. We can't 1146 // optimize these as they have a '+load' method that has to be called at 1147 // runtime. 1148 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() { 1149 DenseSet<const Symbol *> nlCategories; 1150 1151 for (InputSection *sec : allInputSections) { 1152 if (sec->getName() != section_names::objcNonLazyCatList) 1153 continue; 1154 1155 for (auto &r : sec->relocs) { 1156 const Symbol *sym = r.referent.dyn_cast<Symbol *>(); 1157 nlCategories.insert(sym); 1158 } 1159 } 1160 return nlCategories; 1161 } 1162 1163 void ObjcCategoryMerger::collectAndValidateCategoriesData() { 1164 auto nlCategories = collectNlCategories(); 1165 1166 for (InputSection *sec : allInputSections) { 1167 if (sec->getName() != section_names::objcCatList) 1168 continue; 1169 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec); 1170 assert(catListCisec && 1171 "__objc_catList InputSection is not a ConcatInputSection"); 1172 1173 for (uint32_t off = 0; off < catListCisec->getSize(); 1174 off += target->wordSize) { 1175 Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off); 1176 assert(categorySym && 1177 "Failed to get a valid category at __objc_catlit offset"); 1178 1179 if (nlCategories.count(categorySym)) 1180 continue; 1181 1182 auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec()); 1183 assert(catBodyIsec && 1184 "Category data section is not an ConcatInputSection"); 1185 1186 SourceLanguage eLang = SourceLanguage::Unknown; 1187 if (categorySym->getName().starts_with(objc::symbol_names::category)) 1188 eLang = SourceLanguage::ObjC; 1189 else if (categorySym->getName().starts_with( 1190 objc::symbol_names::swift_objc_category)) 1191 eLang = SourceLanguage::Swift; 1192 else 1193 llvm_unreachable("Unexpected category symbol name"); 1194 1195 InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang}; 1196 1197 // Check that the category has a reloc at 'klassOffset' (which is 1198 // a pointer to the class symbol) 1199 1200 Symbol *classSym = 1201 tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset); 1202 assert(classSym && "Category does not have a valid base class"); 1203 1204 categoryMap[classSym].push_back(catInputInfo); 1205 1206 collectCategoryWriterInfoFromCategory(catInputInfo); 1207 } 1208 } 1209 } 1210 1211 // In the input we have multiple __objc_catlist InputSection, each of which may 1212 // contain links to multiple categories. Of these categories, we will merge (and 1213 // erase) only some. There will be some categories that will remain untouched 1214 // (not erased). For these not erased categories, we generate new __objc_catlist 1215 // entries since the parent __objc_catlist entry will be erased 1216 void ObjcCategoryMerger::generateCatListForNonErasedCategories( 1217 const MapVector<ConcatInputSection *, std::set<uint64_t>> 1218 catListToErasedOffsets) { 1219 1220 // Go through all offsets of all __objc_catlist's that we process and if there 1221 // are categories that we didn't process - generate a new __objc_catlist for 1222 // each. 1223 for (auto &mapEntry : catListToErasedOffsets) { 1224 ConcatInputSection *catListIsec = mapEntry.first; 1225 for (uint32_t catListIsecOffset = 0; 1226 catListIsecOffset < catListIsec->data.size(); 1227 catListIsecOffset += target->wordSize) { 1228 // This slot was erased, we can just skip it 1229 if (mapEntry.second.count(catListIsecOffset)) 1230 continue; 1231 1232 Defined *nonErasedCatBody = 1233 tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset); 1234 assert(nonErasedCatBody && "Failed to relocate non-deleted category"); 1235 1236 // Allocate data for the new __objc_catlist slot 1237 llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize); 1238 1239 // We mark the __objc_catlist slot as belonging to the same file as the 1240 // category 1241 ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile()); 1242 1243 ConcatInputSection *listSec = make<ConcatInputSection>( 1244 *infoCategoryWriter.catListInfo.inputSection, bodyData, 1245 infoCategoryWriter.catListInfo.align); 1246 listSec->parent = infoCategoryWriter.catListInfo.outputSection; 1247 listSec->live = true; 1248 1249 std::string slotSymName = "<__objc_catlist slot for category "; 1250 slotSymName += nonErasedCatBody->getName(); 1251 slotSymName += ">"; 1252 1253 Defined *catListSlotSym = make<Defined>( 1254 newStringData(slotSymName.c_str()), /*file=*/objFile, listSec, 1255 /*value=*/0, bodyData.size(), 1256 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1257 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1258 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1259 1260 catListSlotSym->used = true; 1261 objFile->symbols.push_back(catListSlotSym); 1262 addInputSection(listSec); 1263 1264 // Now link the category body into the newly created slot 1265 createSymbolReference(catListSlotSym, nonErasedCatBody, 0, 1266 infoCategoryWriter.catListInfo.relocTemplate); 1267 } 1268 } 1269 } 1270 1271 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) { 1272 isec->live = false; 1273 for (auto &sym : isec->symbols) 1274 sym->used = false; 1275 } 1276 1277 // This fully erases the merged categories, including their body, their names, 1278 // their method/protocol/prop lists and the __objc_catlist entries that link to 1279 // them. 1280 void ObjcCategoryMerger::eraseMergedCategories() { 1281 // Map of InputSection to a set of offsets of the categories that were merged 1282 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets; 1283 1284 for (auto &mapEntry : categoryMap) { 1285 for (InfoInputCategory &catInfo : mapEntry.second) { 1286 if (catInfo.wasMerged) { 1287 eraseISec(catInfo.catListIsec); 1288 catListToErasedOffsets[catInfo.catListIsec].insert( 1289 catInfo.offCatListIsec); 1290 } 1291 } 1292 } 1293 1294 // If there were categories that we did not erase, we need to generate a new 1295 // __objc_catList that contains only the un-merged categories, and get rid of 1296 // the references to the ones we merged. 1297 generateCatListForNonErasedCategories(catListToErasedOffsets); 1298 1299 // Erase the old method lists & names of the categories that were merged 1300 for (auto &mapEntry : categoryMap) { 1301 for (InfoInputCategory &catInfo : mapEntry.second) { 1302 if (!catInfo.wasMerged) 1303 continue; 1304 1305 eraseISec(catInfo.catBodyIsec); 1306 1307 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC 1308 // categories because the name will sometimes also be used for other 1309 // purposes. 1310 // For Swift, see usages of 'l_.str.11.SimpleClass' in 1311 // objc-category-merging-swift.s 1312 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in 1313 // objc-category-merging-erase-objc-name-test.s 1314 // TODO: handle the above in a smarter way 1315 1316 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1317 catLayout.instanceMethodsOffset); 1318 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1319 catLayout.classMethodsOffset); 1320 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1321 catLayout.protocolsOffset); 1322 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1323 catLayout.classPropsOffset); 1324 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1325 catLayout.instancePropsOffset); 1326 } 1327 } 1328 } 1329 1330 void ObjcCategoryMerger::doMerge() { 1331 collectAndValidateCategoriesData(); 1332 1333 for (auto &[baseClass, catInfos] : categoryMap) { 1334 if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) { 1335 // Merge all categories into the base class 1336 mergeCategoriesIntoBaseClass(baseClassDef, catInfos); 1337 } else if (catInfos.size() > 1) { 1338 // Merge all categories into a new, single category 1339 mergeCategoriesIntoSingleCategory(catInfos); 1340 } 1341 } 1342 1343 // Erase all categories that were merged 1344 eraseMergedCategories(); 1345 } 1346 1347 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); } 1348 1349 StringRef ObjcCategoryMerger::newStringData(const char *str) { 1350 uint32_t len = strlen(str); 1351 uint32_t bufSize = len + 1; 1352 SmallVector<uint8_t> &data = newSectionData(bufSize); 1353 char *strData = reinterpret_cast<char *>(data.data()); 1354 // Copy the string chars and null-terminator 1355 memcpy(strData, str, bufSize); 1356 return StringRef(strData, len); 1357 } 1358 1359 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) { 1360 generatedSectionData.push_back( 1361 std::make_unique<SmallVector<uint8_t>>(size, 0)); 1362 return *generatedSectionData.back(); 1363 } 1364 1365 } // namespace 1366 1367 void objc::mergeCategories() { 1368 TimeTraceScope timeScope("ObjcCategoryMerger"); 1369 1370 ObjcCategoryMerger merger(inputSections); 1371 merger.doMerge(); 1372 } 1373 1374 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } 1375 1376 ObjcCategoryMerger::SourceLanguage 1377 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) { 1378 if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1379 return SourceLanguage::Swift; 1380 1381 // If the symbol name matches the ObjC prefix, we don't necessarely know this 1382 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift 1383 // classes. Ex: 1384 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1385 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1386 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN 1387 // 1388 // So we scan for symbols with the same address and check for the Swift class 1389 if (classSym->getName().starts_with(objc::symbol_names::klass)) { 1390 for (auto &sym : classSym->originalIsec->symbols) 1391 if (sym->value == classSym->value) 1392 if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1393 return SourceLanguage::Swift; 1394 return SourceLanguage::ObjC; 1395 } 1396 1397 llvm_unreachable("Unexpected class symbol name during category merging"); 1398 } 1399 void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( 1400 const Defined *baseClass, std::vector<InfoInputCategory> &categories) { 1401 assert(categories.size() >= 1 && "Expected at least one category to merge"); 1402 1403 // Collect all the info from the categories 1404 ClassExtensionInfo extInfo(catLayout); 1405 extInfo.baseClass = baseClass; 1406 extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass); 1407 1408 for (auto &catInfo : categories) { 1409 parseCatInfoToExtInfo(catInfo, extInfo); 1410 } 1411 1412 // Get metadata for the base class 1413 Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); 1414 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec()); 1415 Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); 1416 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec()); 1417 1418 // Now collect the info from the base class from the various lists in the 1419 // class metadata 1420 1421 // Protocol lists are a special case - the same protocol list is in classRo 1422 // and metaRo, so we only need to parse it once 1423 parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1424 extInfo.protocols, extInfo.baseClassSourceLanguage); 1425 1426 // Check that the classRo and metaRo protocol lists are identical 1427 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1428 extInfo.baseClassSourceLanguage) == 1429 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset, 1430 extInfo.baseClassSourceLanguage) && 1431 "Category merger expects classRo and metaRo to have the same protocol " 1432 "list"); 1433 1434 parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, 1435 extInfo.classMethods); 1436 parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, 1437 extInfo.instanceMethods); 1438 1439 parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, 1440 extInfo.classProps); 1441 parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, 1442 extInfo.instanceProps); 1443 1444 // Erase the old lists - these will be generated and replaced 1445 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); 1446 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); 1447 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); 1448 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); 1449 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); 1450 eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); 1451 1452 // Emit the newly merged lists - first into the meta RO then into the class RO 1453 // First we emit and link the protocol list into the meta RO. Then we link it 1454 // in the classRo as well (they're supposed to be identical) 1455 if (Defined *protoListSym = 1456 emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, 1457 extInfo, extInfo.protocols)) { 1458 createSymbolReference(classRo, protoListSym, 1459 roClassLayout.baseProtocolsOffset, 1460 infoCategoryWriter.catBodyInfo.relocTemplate); 1461 } 1462 1463 emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, 1464 extInfo.classMethods); 1465 emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, 1466 extInfo.instanceMethods); 1467 1468 emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, 1469 extInfo.classProps); 1470 1471 emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, 1472 extInfo.instanceProps); 1473 1474 // Mark all the categories as merged - this will be used to erase them later 1475 for (auto &catInfo : categories) 1476 catInfo.wasMerged = true; 1477 } 1478 1479 // Erase the symbol at a given offset in an InputSection 1480 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, 1481 uint32_t offset) { 1482 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); 1483 if (!sym) 1484 return; 1485 1486 // Remove the symbol from isec->symbols 1487 assert(isa<Defined>(sym) && "Can only erase a Defined"); 1488 llvm::erase(isec->symbols, sym); 1489 1490 // Remove the relocs that refer to this symbol 1491 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; 1492 llvm::erase_if(isec->relocs, removeAtOff); 1493 1494 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase 1495 // the whole ConcatInputSection 1496 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec())) 1497 if (cisec->data.size() == sym->size) 1498 eraseISec(cisec); 1499 } 1500