1e8d8bef9SDimitry Andric //===- ObjC.cpp -----------------------------------------------------------===// 2e8d8bef9SDimitry Andric // 3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6e8d8bef9SDimitry Andric // 7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===// 8e8d8bef9SDimitry Andric 9e8d8bef9SDimitry Andric #include "ObjC.h" 10*0fca6ea1SDimitry Andric #include "ConcatOutputSection.h" 11e8d8bef9SDimitry Andric #include "InputFiles.h" 12fe6060f1SDimitry Andric #include "InputSection.h" 1306c3fb27SDimitry Andric #include "Layout.h" 14e8d8bef9SDimitry Andric #include "OutputSegment.h" 15*0fca6ea1SDimitry Andric #include "SyntheticSections.h" 16fe6060f1SDimitry Andric #include "Target.h" 17e8d8bef9SDimitry Andric 18bdd1243dSDimitry Andric #include "lld/Common/ErrorHandler.h" 1906c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h" 20e8d8bef9SDimitry Andric #include "llvm/BinaryFormat/MachO.h" 21349cc55cSDimitry Andric #include "llvm/Bitcode/BitcodeReader.h" 22*0fca6ea1SDimitry Andric #include "llvm/Support/TimeProfiler.h" 23e8d8bef9SDimitry Andric 24e8d8bef9SDimitry Andric using namespace llvm; 25e8d8bef9SDimitry Andric using namespace llvm::MachO; 26e8d8bef9SDimitry Andric using namespace lld; 27fe6060f1SDimitry Andric using namespace lld::macho; 28e8d8bef9SDimitry Andric 29349cc55cSDimitry Andric template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) { 30349cc55cSDimitry Andric using SectionHeader = typename LP::section; 31fe6060f1SDimitry Andric 32fe6060f1SDimitry Andric auto *hdr = 33fe6060f1SDimitry Andric reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart()); 34fe6060f1SDimitry Andric if (hdr->magic != LP::magic) 35fe6060f1SDimitry Andric return false; 36fe6060f1SDimitry Andric 37fe6060f1SDimitry Andric if (const auto *c = 38fe6060f1SDimitry Andric findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) { 39349cc55cSDimitry Andric auto sectionHeaders = ArrayRef<SectionHeader>{ 40349cc55cSDimitry Andric reinterpret_cast<const SectionHeader *>(c + 1), c->nsects}; 41349cc55cSDimitry Andric for (const SectionHeader &secHead : sectionHeaders) { 42349cc55cSDimitry Andric StringRef sectname(secHead.sectname, 43349cc55cSDimitry Andric strnlen(secHead.sectname, sizeof(secHead.sectname))); 44349cc55cSDimitry Andric StringRef segname(secHead.segname, 45349cc55cSDimitry Andric strnlen(secHead.segname, sizeof(secHead.segname))); 46fe6060f1SDimitry Andric if ((segname == segment_names::data && 47fe6060f1SDimitry Andric sectname == section_names::objcCatList) || 48fe6060f1SDimitry Andric (segname == segment_names::text && 4906c3fb27SDimitry Andric sectname.starts_with(section_names::swift))) { 50e8d8bef9SDimitry Andric return true; 51e8d8bef9SDimitry Andric } 52e8d8bef9SDimitry Andric } 53e8d8bef9SDimitry Andric } 54e8d8bef9SDimitry Andric return false; 55e8d8bef9SDimitry Andric } 56fe6060f1SDimitry Andric 57349cc55cSDimitry Andric static bool objectHasObjCSection(MemoryBufferRef mb) { 58fe6060f1SDimitry Andric if (target->wordSize == 8) 59349cc55cSDimitry Andric return ::objectHasObjCSection<LP64>(mb); 60fe6060f1SDimitry Andric else 61349cc55cSDimitry Andric return ::objectHasObjCSection<ILP32>(mb); 62349cc55cSDimitry Andric } 63349cc55cSDimitry Andric 64349cc55cSDimitry Andric bool macho::hasObjCSection(MemoryBufferRef mb) { 65349cc55cSDimitry Andric switch (identify_magic(mb.getBuffer())) { 66349cc55cSDimitry Andric case file_magic::macho_object: 67349cc55cSDimitry Andric return objectHasObjCSection(mb); 68349cc55cSDimitry Andric case file_magic::bitcode: 69349cc55cSDimitry Andric return check(isBitcodeContainingObjCCategory(mb)); 70349cc55cSDimitry Andric default: 71349cc55cSDimitry Andric return false; 72349cc55cSDimitry Andric } 73fe6060f1SDimitry Andric } 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric namespace { 7606c3fb27SDimitry Andric 7706c3fb27SDimitry Andric #define FOR_EACH_CATEGORY_FIELD(DO) \ 7806c3fb27SDimitry Andric DO(Ptr, name) \ 7906c3fb27SDimitry Andric DO(Ptr, klass) \ 8006c3fb27SDimitry Andric DO(Ptr, instanceMethods) \ 8106c3fb27SDimitry Andric DO(Ptr, classMethods) \ 8206c3fb27SDimitry Andric DO(Ptr, protocols) \ 8306c3fb27SDimitry Andric DO(Ptr, instanceProps) \ 84*0fca6ea1SDimitry Andric DO(Ptr, classProps) \ 85*0fca6ea1SDimitry Andric DO(uint32_t, size) 8606c3fb27SDimitry Andric 8706c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD); 8806c3fb27SDimitry Andric 8906c3fb27SDimitry Andric #undef FOR_EACH_CATEGORY_FIELD 9006c3fb27SDimitry Andric 9106c3fb27SDimitry Andric #define FOR_EACH_CLASS_FIELD(DO) \ 9206c3fb27SDimitry Andric DO(Ptr, metaClass) \ 9306c3fb27SDimitry Andric DO(Ptr, superClass) \ 9406c3fb27SDimitry Andric DO(Ptr, methodCache) \ 9506c3fb27SDimitry Andric DO(Ptr, vtable) \ 9606c3fb27SDimitry Andric DO(Ptr, roData) 9706c3fb27SDimitry Andric 9806c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD); 9906c3fb27SDimitry Andric 10006c3fb27SDimitry Andric #undef FOR_EACH_CLASS_FIELD 10106c3fb27SDimitry Andric 10206c3fb27SDimitry Andric #define FOR_EACH_RO_CLASS_FIELD(DO) \ 10306c3fb27SDimitry Andric DO(uint32_t, flags) \ 10406c3fb27SDimitry Andric DO(uint32_t, instanceStart) \ 10506c3fb27SDimitry Andric DO(Ptr, instanceSize) \ 10606c3fb27SDimitry Andric DO(Ptr, ivarLayout) \ 10706c3fb27SDimitry Andric DO(Ptr, name) \ 10806c3fb27SDimitry Andric DO(Ptr, baseMethods) \ 10906c3fb27SDimitry Andric DO(Ptr, baseProtocols) \ 11006c3fb27SDimitry Andric DO(Ptr, ivars) \ 11106c3fb27SDimitry Andric DO(Ptr, weakIvarLayout) \ 11206c3fb27SDimitry Andric DO(Ptr, baseProperties) 11306c3fb27SDimitry Andric 11406c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD); 11506c3fb27SDimitry Andric 11606c3fb27SDimitry Andric #undef FOR_EACH_RO_CLASS_FIELD 11706c3fb27SDimitry Andric 11806c3fb27SDimitry Andric #define FOR_EACH_LIST_HEADER(DO) \ 119*0fca6ea1SDimitry Andric DO(uint32_t, structSize) \ 120*0fca6ea1SDimitry Andric DO(uint32_t, structCount) 12106c3fb27SDimitry Andric 12206c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER); 12306c3fb27SDimitry Andric 12406c3fb27SDimitry Andric #undef FOR_EACH_LIST_HEADER 12506c3fb27SDimitry Andric 126*0fca6ea1SDimitry Andric #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount) 127*0fca6ea1SDimitry Andric 128*0fca6ea1SDimitry Andric CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER); 129*0fca6ea1SDimitry Andric 130*0fca6ea1SDimitry Andric #undef FOR_EACH_PROTOCOL_LIST_HEADER 131*0fca6ea1SDimitry Andric 13206c3fb27SDimitry Andric #define FOR_EACH_METHOD(DO) \ 13306c3fb27SDimitry Andric DO(Ptr, name) \ 13406c3fb27SDimitry Andric DO(Ptr, type) \ 13506c3fb27SDimitry Andric DO(Ptr, impl) 13606c3fb27SDimitry Andric 13706c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD); 13806c3fb27SDimitry Andric 13906c3fb27SDimitry Andric #undef FOR_EACH_METHOD 14006c3fb27SDimitry Andric 14106c3fb27SDimitry Andric enum MethodContainerKind { 14206c3fb27SDimitry Andric MCK_Class, 14306c3fb27SDimitry Andric MCK_Category, 14406c3fb27SDimitry Andric }; 14506c3fb27SDimitry Andric 14606c3fb27SDimitry Andric struct MethodContainer { 14706c3fb27SDimitry Andric MethodContainerKind kind; 14806c3fb27SDimitry Andric const ConcatInputSection *isec; 14906c3fb27SDimitry Andric }; 15006c3fb27SDimitry Andric 15106c3fb27SDimitry Andric enum MethodKind { 15206c3fb27SDimitry Andric MK_Instance, 15306c3fb27SDimitry Andric MK_Static, 15406c3fb27SDimitry Andric }; 15506c3fb27SDimitry Andric 15606c3fb27SDimitry Andric struct ObjcClass { 15706c3fb27SDimitry Andric DenseMap<CachedHashStringRef, MethodContainer> instanceMethods; 15806c3fb27SDimitry Andric DenseMap<CachedHashStringRef, MethodContainer> classMethods; 15906c3fb27SDimitry Andric }; 16006c3fb27SDimitry Andric 16106c3fb27SDimitry Andric } // namespace 16206c3fb27SDimitry Andric 16306c3fb27SDimitry Andric class ObjcCategoryChecker { 16406c3fb27SDimitry Andric public: 16506c3fb27SDimitry Andric ObjcCategoryChecker(); 16606c3fb27SDimitry Andric void parseCategory(const ConcatInputSection *catListIsec); 16706c3fb27SDimitry Andric 16806c3fb27SDimitry Andric private: 16906c3fb27SDimitry Andric void parseClass(const Defined *classSym); 17006c3fb27SDimitry Andric void parseMethods(const ConcatInputSection *methodsIsec, 17106c3fb27SDimitry Andric const Symbol *methodContainer, 17206c3fb27SDimitry Andric const ConcatInputSection *containerIsec, 17306c3fb27SDimitry Andric MethodContainerKind, MethodKind); 17406c3fb27SDimitry Andric 17506c3fb27SDimitry Andric CategoryLayout catLayout; 17606c3fb27SDimitry Andric ClassLayout classLayout; 17706c3fb27SDimitry Andric ROClassLayout roClassLayout; 17806c3fb27SDimitry Andric ListHeaderLayout listHeaderLayout; 17906c3fb27SDimitry Andric MethodLayout methodLayout; 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric DenseMap<const Symbol *, ObjcClass> classMap; 18206c3fb27SDimitry Andric }; 18306c3fb27SDimitry Andric 18406c3fb27SDimitry Andric ObjcCategoryChecker::ObjcCategoryChecker() 18506c3fb27SDimitry Andric : catLayout(target->wordSize), classLayout(target->wordSize), 18606c3fb27SDimitry Andric roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 18706c3fb27SDimitry Andric methodLayout(target->wordSize) {} 18806c3fb27SDimitry Andric 189*0fca6ea1SDimitry Andric // \p r must point to an offset within a CStringInputSection or a 190*0fca6ea1SDimitry Andric // ConcatInputSection 19106c3fb27SDimitry Andric static StringRef getReferentString(const Reloc &r) { 19206c3fb27SDimitry Andric if (auto *isec = r.referent.dyn_cast<InputSection *>()) 19306c3fb27SDimitry Andric return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend); 194*0fca6ea1SDimitry Andric 19506c3fb27SDimitry Andric auto *sym = cast<Defined>(r.referent.get<Symbol *>()); 196*0fca6ea1SDimitry Andric auto *symIsec = sym->isec(); 197*0fca6ea1SDimitry Andric auto symOffset = sym->value + r.addend; 198*0fca6ea1SDimitry Andric 199*0fca6ea1SDimitry Andric if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec)) 200*0fca6ea1SDimitry Andric return s->getStringRefAtOffset(symOffset); 201*0fca6ea1SDimitry Andric 202*0fca6ea1SDimitry Andric if (isa<ConcatInputSection>(symIsec)) { 203*0fca6ea1SDimitry Andric auto strData = symIsec->data.slice(symOffset); 204*0fca6ea1SDimitry Andric const char *pszData = reinterpret_cast<const char *>(strData.data()); 205*0fca6ea1SDimitry Andric return StringRef(pszData, strnlen(pszData, strData.size())); 206*0fca6ea1SDimitry Andric } 207*0fca6ea1SDimitry Andric 208*0fca6ea1SDimitry Andric llvm_unreachable("unknown reference section in getReferentString"); 20906c3fb27SDimitry Andric } 21006c3fb27SDimitry Andric 21106c3fb27SDimitry Andric void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec, 21206c3fb27SDimitry Andric const Symbol *methodContainerSym, 21306c3fb27SDimitry Andric const ConcatInputSection *containerIsec, 21406c3fb27SDimitry Andric MethodContainerKind mcKind, 21506c3fb27SDimitry Andric MethodKind mKind) { 21606c3fb27SDimitry Andric ObjcClass &klass = classMap[methodContainerSym]; 21706c3fb27SDimitry Andric for (const Reloc &r : methodsIsec->relocs) { 21806c3fb27SDimitry Andric if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize != 21906c3fb27SDimitry Andric methodLayout.nameOffset) 22006c3fb27SDimitry Andric continue; 22106c3fb27SDimitry Andric 22206c3fb27SDimitry Andric CachedHashStringRef methodName(getReferentString(r)); 22306c3fb27SDimitry Andric // +load methods are special: all implementations are called by the runtime 22406c3fb27SDimitry Andric // even if they are part of the same class. Thus there is no need to check 22506c3fb27SDimitry Andric // for duplicates. 22606c3fb27SDimitry Andric // NOTE: Instead of specifically checking for this method name, ld64 simply 22706c3fb27SDimitry Andric // checks whether a class / category is present in __objc_nlclslist / 22806c3fb27SDimitry Andric // __objc_nlcatlist respectively. This will be the case if the class / 22906c3fb27SDimitry Andric // category has a +load method. It skips optimizing the categories if there 23006c3fb27SDimitry Andric // are multiple +load methods. Since it does dupe checking as part of the 23106c3fb27SDimitry Andric // optimization process, this avoids spurious dupe messages around +load, 23206c3fb27SDimitry Andric // but it also means that legit dupe issues for other methods are ignored. 23306c3fb27SDimitry Andric if (mKind == MK_Static && methodName.val() == "load") 23406c3fb27SDimitry Andric continue; 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric auto &methodMap = 23706c3fb27SDimitry Andric mKind == MK_Instance ? klass.instanceMethods : klass.classMethods; 23806c3fb27SDimitry Andric if (methodMap 23906c3fb27SDimitry Andric .try_emplace(methodName, MethodContainer{mcKind, containerIsec}) 24006c3fb27SDimitry Andric .second) 24106c3fb27SDimitry Andric continue; 24206c3fb27SDimitry Andric 24306c3fb27SDimitry Andric // We have a duplicate; generate a warning message. 24406c3fb27SDimitry Andric const auto &mc = methodMap.lookup(methodName); 24506c3fb27SDimitry Andric const Reloc *nameReloc = nullptr; 24606c3fb27SDimitry Andric if (mc.kind == MCK_Category) { 24706c3fb27SDimitry Andric nameReloc = mc.isec->getRelocAt(catLayout.nameOffset); 24806c3fb27SDimitry Andric } else { 24906c3fb27SDimitry Andric assert(mc.kind == MCK_Class); 25006c3fb27SDimitry Andric const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset) 25106c3fb27SDimitry Andric ->getReferentInputSection(); 25206c3fb27SDimitry Andric nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset); 25306c3fb27SDimitry Andric } 25406c3fb27SDimitry Andric StringRef containerName = getReferentString(*nameReloc); 25506c3fb27SDimitry Andric StringRef methPrefix = mKind == MK_Instance ? "-" : "+"; 25606c3fb27SDimitry Andric 25706c3fb27SDimitry Andric // We should only ever encounter collisions when parsing category methods 25806c3fb27SDimitry Andric // (since the Class struct is parsed before any of its categories). 25906c3fb27SDimitry Andric assert(mcKind == MCK_Category); 26006c3fb27SDimitry Andric StringRef newCatName = 26106c3fb27SDimitry Andric getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset)); 26206c3fb27SDimitry Andric 263*0fca6ea1SDimitry Andric auto formatObjAndSrcFileName = [](const InputSection *section) { 264*0fca6ea1SDimitry Andric lld::macho::InputFile *inputFile = section->getFile(); 265*0fca6ea1SDimitry Andric std::string result = toString(inputFile); 266*0fca6ea1SDimitry Andric 267*0fca6ea1SDimitry Andric auto objFile = dyn_cast_or_null<ObjFile>(inputFile); 268*0fca6ea1SDimitry Andric if (objFile && objFile->compileUnit) 269*0fca6ea1SDimitry Andric result += " (" + objFile->sourceFile() + ")"; 270*0fca6ea1SDimitry Andric 271*0fca6ea1SDimitry Andric return result; 272*0fca6ea1SDimitry Andric }; 273*0fca6ea1SDimitry Andric 27406c3fb27SDimitry Andric StringRef containerType = mc.kind == MCK_Category ? "category" : "class"; 27506c3fb27SDimitry Andric warn("method '" + methPrefix + methodName.val() + 27606c3fb27SDimitry Andric "' has conflicting definitions:\n>>> defined in category " + 277*0fca6ea1SDimitry Andric newCatName + " from " + formatObjAndSrcFileName(containerIsec) + 27806c3fb27SDimitry Andric "\n>>> defined in " + containerType + " " + containerName + " from " + 279*0fca6ea1SDimitry Andric formatObjAndSrcFileName(mc.isec)); 28006c3fb27SDimitry Andric } 28106c3fb27SDimitry Andric } 28206c3fb27SDimitry Andric 28306c3fb27SDimitry Andric void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) { 28406c3fb27SDimitry Andric auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset); 28506c3fb27SDimitry Andric if (!classReloc) 28606c3fb27SDimitry Andric return; 28706c3fb27SDimitry Andric 28806c3fb27SDimitry Andric auto *classSym = classReloc->referent.get<Symbol *>(); 28906c3fb27SDimitry Andric if (auto *d = dyn_cast<Defined>(classSym)) 29006c3fb27SDimitry Andric if (!classMap.count(d)) 29106c3fb27SDimitry Andric parseClass(d); 29206c3fb27SDimitry Andric 29306c3fb27SDimitry Andric if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) { 29406c3fb27SDimitry Andric parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 29506c3fb27SDimitry Andric classSym, catIsec, MCK_Category, MK_Static); 29606c3fb27SDimitry Andric } 29706c3fb27SDimitry Andric 29806c3fb27SDimitry Andric if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) { 29906c3fb27SDimitry Andric parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()), 30006c3fb27SDimitry Andric classSym, catIsec, MCK_Category, MK_Instance); 30106c3fb27SDimitry Andric } 30206c3fb27SDimitry Andric } 30306c3fb27SDimitry Andric 30406c3fb27SDimitry Andric void ObjcCategoryChecker::parseClass(const Defined *classSym) { 30506c3fb27SDimitry Andric // Given a Class struct, get its corresponding Methods struct 30606c3fb27SDimitry Andric auto getMethodsIsec = 30706c3fb27SDimitry Andric [&](const InputSection *classIsec) -> ConcatInputSection * { 30806c3fb27SDimitry Andric if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) { 30906c3fb27SDimitry Andric if (const auto *roIsec = 31006c3fb27SDimitry Andric cast_or_null<ConcatInputSection>(r->getReferentInputSection())) { 31106c3fb27SDimitry Andric if (const auto *r = 31206c3fb27SDimitry Andric roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) { 31306c3fb27SDimitry Andric if (auto *methodsIsec = cast_or_null<ConcatInputSection>( 31406c3fb27SDimitry Andric r->getReferentInputSection())) 31506c3fb27SDimitry Andric return methodsIsec; 31606c3fb27SDimitry Andric } 31706c3fb27SDimitry Andric } 31806c3fb27SDimitry Andric } 31906c3fb27SDimitry Andric return nullptr; 32006c3fb27SDimitry Andric }; 32106c3fb27SDimitry Andric 322*0fca6ea1SDimitry Andric const auto *classIsec = cast<ConcatInputSection>(classSym->isec()); 32306c3fb27SDimitry Andric 32406c3fb27SDimitry Andric // Parse instance methods. 32506c3fb27SDimitry Andric if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec)) 32606c3fb27SDimitry Andric parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class, 32706c3fb27SDimitry Andric MK_Instance); 32806c3fb27SDimitry Andric 32906c3fb27SDimitry Andric // Class methods are contained in the metaclass. 330*0fca6ea1SDimitry Andric if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset)) 33106c3fb27SDimitry Andric if (const auto *classMethodsIsec = getMethodsIsec( 33206c3fb27SDimitry Andric cast<ConcatInputSection>(r->getReferentInputSection()))) 33306c3fb27SDimitry Andric parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static); 33406c3fb27SDimitry Andric } 33506c3fb27SDimitry Andric 33606c3fb27SDimitry Andric void objc::checkCategories() { 337*0fca6ea1SDimitry Andric TimeTraceScope timeScope("ObjcCategoryChecker"); 338*0fca6ea1SDimitry Andric 33906c3fb27SDimitry Andric ObjcCategoryChecker checker; 34006c3fb27SDimitry Andric for (const InputSection *isec : inputSections) { 34106c3fb27SDimitry Andric if (isec->getName() == section_names::objcCatList) 34206c3fb27SDimitry Andric for (const Reloc &r : isec->relocs) { 34306c3fb27SDimitry Andric auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection()); 34406c3fb27SDimitry Andric checker.parseCategory(catIsec); 34506c3fb27SDimitry Andric } 34606c3fb27SDimitry Andric } 34706c3fb27SDimitry Andric } 348*0fca6ea1SDimitry Andric 349*0fca6ea1SDimitry Andric namespace { 350*0fca6ea1SDimitry Andric 351*0fca6ea1SDimitry Andric class ObjcCategoryMerger { 352*0fca6ea1SDimitry Andric // In which language was a particular construct originally defined 353*0fca6ea1SDimitry Andric enum SourceLanguage { Unknown, ObjC, Swift }; 354*0fca6ea1SDimitry Andric 355*0fca6ea1SDimitry Andric // Information about an input category 356*0fca6ea1SDimitry Andric struct InfoInputCategory { 357*0fca6ea1SDimitry Andric ConcatInputSection *catListIsec; 358*0fca6ea1SDimitry Andric ConcatInputSection *catBodyIsec; 359*0fca6ea1SDimitry Andric uint32_t offCatListIsec = 0; 360*0fca6ea1SDimitry Andric SourceLanguage sourceLanguage = SourceLanguage::Unknown; 361*0fca6ea1SDimitry Andric 362*0fca6ea1SDimitry Andric bool wasMerged = false; 363*0fca6ea1SDimitry Andric }; 364*0fca6ea1SDimitry Andric 365*0fca6ea1SDimitry Andric // To write new (merged) categories or classes, we will try make limited 366*0fca6ea1SDimitry Andric // assumptions about the alignment and the sections the various class/category 367*0fca6ea1SDimitry Andric // info are stored in and . So we'll just reuse the same sections and 368*0fca6ea1SDimitry Andric // alignment as already used in existing (input) categories. To do this we 369*0fca6ea1SDimitry Andric // have InfoCategoryWriter which contains the various sections that the 370*0fca6ea1SDimitry Andric // generated categories will be written to. 371*0fca6ea1SDimitry Andric struct InfoWriteSection { 372*0fca6ea1SDimitry Andric bool valid = false; // Data has been successfully collected from input 373*0fca6ea1SDimitry Andric uint32_t align = 0; 374*0fca6ea1SDimitry Andric Section *inputSection; 375*0fca6ea1SDimitry Andric Reloc relocTemplate; 376*0fca6ea1SDimitry Andric OutputSection *outputSection; 377*0fca6ea1SDimitry Andric }; 378*0fca6ea1SDimitry Andric 379*0fca6ea1SDimitry Andric struct InfoCategoryWriter { 380*0fca6ea1SDimitry Andric InfoWriteSection catListInfo; 381*0fca6ea1SDimitry Andric InfoWriteSection catBodyInfo; 382*0fca6ea1SDimitry Andric InfoWriteSection catNameInfo; 383*0fca6ea1SDimitry Andric InfoWriteSection catPtrListInfo; 384*0fca6ea1SDimitry Andric }; 385*0fca6ea1SDimitry Andric 386*0fca6ea1SDimitry Andric // Information about a pointer list in the original categories or class(method 387*0fca6ea1SDimitry Andric // lists, protocol lists, etc) 388*0fca6ea1SDimitry Andric struct PointerListInfo { 389*0fca6ea1SDimitry Andric PointerListInfo() = default; 390*0fca6ea1SDimitry Andric PointerListInfo(const PointerListInfo &) = default; 391*0fca6ea1SDimitry Andric PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct) 392*0fca6ea1SDimitry Andric : categoryPrefix(_categoryPrefix), 393*0fca6ea1SDimitry Andric pointersPerStruct(_pointersPerStruct) {} 394*0fca6ea1SDimitry Andric 395*0fca6ea1SDimitry Andric inline bool operator==(const PointerListInfo &cmp) const { 396*0fca6ea1SDimitry Andric return pointersPerStruct == cmp.pointersPerStruct && 397*0fca6ea1SDimitry Andric structSize == cmp.structSize && structCount == cmp.structCount && 398*0fca6ea1SDimitry Andric allPtrs == cmp.allPtrs; 399*0fca6ea1SDimitry Andric } 400*0fca6ea1SDimitry Andric 401*0fca6ea1SDimitry Andric const char *categoryPrefix; 402*0fca6ea1SDimitry Andric 403*0fca6ea1SDimitry Andric uint32_t pointersPerStruct = 0; 404*0fca6ea1SDimitry Andric 405*0fca6ea1SDimitry Andric uint32_t structSize = 0; 406*0fca6ea1SDimitry Andric uint32_t structCount = 0; 407*0fca6ea1SDimitry Andric 408*0fca6ea1SDimitry Andric std::vector<Symbol *> allPtrs; 409*0fca6ea1SDimitry Andric }; 410*0fca6ea1SDimitry Andric 411*0fca6ea1SDimitry Andric // Full information describing an ObjC class . This will include all the 412*0fca6ea1SDimitry Andric // additional methods, protocols, and properties that are contained in the 413*0fca6ea1SDimitry Andric // class and all the categories that extend a particular class. 414*0fca6ea1SDimitry Andric struct ClassExtensionInfo { 415*0fca6ea1SDimitry Andric ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){}; 416*0fca6ea1SDimitry Andric 417*0fca6ea1SDimitry Andric // Merged names of containers. Ex: base|firstCategory|secondCategory|... 418*0fca6ea1SDimitry Andric std::string mergedContainerName; 419*0fca6ea1SDimitry Andric std::string baseClassName; 420*0fca6ea1SDimitry Andric const Symbol *baseClass = nullptr; 421*0fca6ea1SDimitry Andric SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown; 422*0fca6ea1SDimitry Andric 423*0fca6ea1SDimitry Andric CategoryLayout &catLayout; 424*0fca6ea1SDimitry Andric 425*0fca6ea1SDimitry Andric // In case we generate new data, mark the new data as belonging to this file 426*0fca6ea1SDimitry Andric ObjFile *objFileForMergeData = nullptr; 427*0fca6ea1SDimitry Andric 428*0fca6ea1SDimitry Andric PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods, 429*0fca6ea1SDimitry Andric /*pointersPerStruct=*/3}; 430*0fca6ea1SDimitry Andric PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods, 431*0fca6ea1SDimitry Andric /*pointersPerStruct=*/3}; 432*0fca6ea1SDimitry Andric PointerListInfo protocols = {objc::symbol_names::categoryProtocols, 433*0fca6ea1SDimitry Andric /*pointersPerStruct=*/0}; 434*0fca6ea1SDimitry Andric PointerListInfo instanceProps = {objc::symbol_names::listProprieties, 435*0fca6ea1SDimitry Andric /*pointersPerStruct=*/2}; 436*0fca6ea1SDimitry Andric PointerListInfo classProps = {objc::symbol_names::klassPropList, 437*0fca6ea1SDimitry Andric /*pointersPerStruct=*/2}; 438*0fca6ea1SDimitry Andric }; 439*0fca6ea1SDimitry Andric 440*0fca6ea1SDimitry Andric public: 441*0fca6ea1SDimitry Andric ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections); 442*0fca6ea1SDimitry Andric void doMerge(); 443*0fca6ea1SDimitry Andric static void doCleanup(); 444*0fca6ea1SDimitry Andric 445*0fca6ea1SDimitry Andric private: 446*0fca6ea1SDimitry Andric DenseSet<const Symbol *> collectNlCategories(); 447*0fca6ea1SDimitry Andric void collectAndValidateCategoriesData(); 448*0fca6ea1SDimitry Andric void 449*0fca6ea1SDimitry Andric mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories); 450*0fca6ea1SDimitry Andric 451*0fca6ea1SDimitry Andric void eraseISec(ConcatInputSection *isec); 452*0fca6ea1SDimitry Andric void eraseMergedCategories(); 453*0fca6ea1SDimitry Andric 454*0fca6ea1SDimitry Andric void generateCatListForNonErasedCategories( 455*0fca6ea1SDimitry Andric MapVector<ConcatInputSection *, std::set<uint64_t>> 456*0fca6ea1SDimitry Andric catListToErasedOffsets); 457*0fca6ea1SDimitry Andric void collectSectionWriteInfoFromIsec(const InputSection *isec, 458*0fca6ea1SDimitry Andric InfoWriteSection &catWriteInfo); 459*0fca6ea1SDimitry Andric void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo); 460*0fca6ea1SDimitry Andric void parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 461*0fca6ea1SDimitry Andric ClassExtensionInfo &extInfo); 462*0fca6ea1SDimitry Andric 463*0fca6ea1SDimitry Andric void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset, 464*0fca6ea1SDimitry Andric PointerListInfo &ptrList, 465*0fca6ea1SDimitry Andric SourceLanguage sourceLang); 466*0fca6ea1SDimitry Andric 467*0fca6ea1SDimitry Andric PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec, 468*0fca6ea1SDimitry Andric uint32_t secOffset, 469*0fca6ea1SDimitry Andric SourceLanguage sourceLang); 470*0fca6ea1SDimitry Andric 471*0fca6ea1SDimitry Andric void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset, 472*0fca6ea1SDimitry Andric PointerListInfo &ptrList); 473*0fca6ea1SDimitry Andric 474*0fca6ea1SDimitry Andric void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset, 475*0fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, 476*0fca6ea1SDimitry Andric const PointerListInfo &ptrList); 477*0fca6ea1SDimitry Andric 478*0fca6ea1SDimitry Andric Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset, 479*0fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, 480*0fca6ea1SDimitry Andric const PointerListInfo &ptrList); 481*0fca6ea1SDimitry Andric 482*0fca6ea1SDimitry Andric Defined *emitCategory(const ClassExtensionInfo &extInfo); 483*0fca6ea1SDimitry Andric Defined *emitCatListEntrySec(const std::string &forCategoryName, 484*0fca6ea1SDimitry Andric const std::string &forBaseClassName, 485*0fca6ea1SDimitry Andric ObjFile *objFile); 486*0fca6ea1SDimitry Andric Defined *emitCategoryBody(const std::string &name, const Defined *nameSym, 487*0fca6ea1SDimitry Andric const Symbol *baseClassSym, 488*0fca6ea1SDimitry Andric const std::string &baseClassName, ObjFile *objFile); 489*0fca6ea1SDimitry Andric Defined *emitCategoryName(const std::string &name, ObjFile *objFile); 490*0fca6ea1SDimitry Andric void createSymbolReference(Defined *refFrom, const Symbol *refTo, 491*0fca6ea1SDimitry Andric uint32_t offset, const Reloc &relocTemplate); 492*0fca6ea1SDimitry Andric Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset); 493*0fca6ea1SDimitry Andric Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 494*0fca6ea1SDimitry Andric uint32_t offset); 495*0fca6ea1SDimitry Andric Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 496*0fca6ea1SDimitry Andric uint32_t offset); 497*0fca6ea1SDimitry Andric Defined *getClassRo(const Defined *classSym, bool getMetaRo); 498*0fca6ea1SDimitry Andric SourceLanguage getClassSymSourceLang(const Defined *classSym); 499*0fca6ea1SDimitry Andric void mergeCategoriesIntoBaseClass(const Defined *baseClass, 500*0fca6ea1SDimitry Andric std::vector<InfoInputCategory> &categories); 501*0fca6ea1SDimitry Andric void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset); 502*0fca6ea1SDimitry Andric void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec, 503*0fca6ea1SDimitry Andric uint32_t offset); 504*0fca6ea1SDimitry Andric 505*0fca6ea1SDimitry Andric // Allocate a null-terminated StringRef backed by generatedSectionData 506*0fca6ea1SDimitry Andric StringRef newStringData(const char *str); 507*0fca6ea1SDimitry Andric // Allocate section data, backed by generatedSectionData 508*0fca6ea1SDimitry Andric SmallVector<uint8_t> &newSectionData(uint32_t size); 509*0fca6ea1SDimitry Andric 510*0fca6ea1SDimitry Andric CategoryLayout catLayout; 511*0fca6ea1SDimitry Andric ClassLayout classLayout; 512*0fca6ea1SDimitry Andric ROClassLayout roClassLayout; 513*0fca6ea1SDimitry Andric ListHeaderLayout listHeaderLayout; 514*0fca6ea1SDimitry Andric MethodLayout methodLayout; 515*0fca6ea1SDimitry Andric ProtocolListHeaderLayout protocolListHeaderLayout; 516*0fca6ea1SDimitry Andric 517*0fca6ea1SDimitry Andric InfoCategoryWriter infoCategoryWriter; 518*0fca6ea1SDimitry Andric std::vector<ConcatInputSection *> &allInputSections; 519*0fca6ea1SDimitry Andric // Map of base class Symbol to list of InfoInputCategory's for it 520*0fca6ea1SDimitry Andric MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap; 521*0fca6ea1SDimitry Andric 522*0fca6ea1SDimitry Andric // Normally, the binary data comes from the input files, but since we're 523*0fca6ea1SDimitry Andric // generating binary data ourselves, we use the below array to store it in. 524*0fca6ea1SDimitry Andric // Need this to be 'static' so the data survives past the ObjcCategoryMerger 525*0fca6ea1SDimitry Andric // object, as the data will be read by the Writer when the final binary is 526*0fca6ea1SDimitry Andric // generated. 527*0fca6ea1SDimitry Andric static SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 528*0fca6ea1SDimitry Andric generatedSectionData; 529*0fca6ea1SDimitry Andric }; 530*0fca6ea1SDimitry Andric 531*0fca6ea1SDimitry Andric SmallVector<std::unique_ptr<SmallVector<uint8_t>>> 532*0fca6ea1SDimitry Andric ObjcCategoryMerger::generatedSectionData; 533*0fca6ea1SDimitry Andric 534*0fca6ea1SDimitry Andric ObjcCategoryMerger::ObjcCategoryMerger( 535*0fca6ea1SDimitry Andric std::vector<ConcatInputSection *> &_allInputSections) 536*0fca6ea1SDimitry Andric : catLayout(target->wordSize), classLayout(target->wordSize), 537*0fca6ea1SDimitry Andric roClassLayout(target->wordSize), listHeaderLayout(target->wordSize), 538*0fca6ea1SDimitry Andric methodLayout(target->wordSize), 539*0fca6ea1SDimitry Andric protocolListHeaderLayout(target->wordSize), 540*0fca6ea1SDimitry Andric allInputSections(_allInputSections) {} 541*0fca6ea1SDimitry Andric 542*0fca6ea1SDimitry Andric void ObjcCategoryMerger::collectSectionWriteInfoFromIsec( 543*0fca6ea1SDimitry Andric const InputSection *isec, InfoWriteSection &catWriteInfo) { 544*0fca6ea1SDimitry Andric 545*0fca6ea1SDimitry Andric catWriteInfo.inputSection = const_cast<Section *>(&isec->section); 546*0fca6ea1SDimitry Andric catWriteInfo.align = isec->align; 547*0fca6ea1SDimitry Andric catWriteInfo.outputSection = isec->parent; 548*0fca6ea1SDimitry Andric 549*0fca6ea1SDimitry Andric assert(catWriteInfo.outputSection && 550*0fca6ea1SDimitry Andric "outputSection may not be null in collectSectionWriteInfoFromIsec."); 551*0fca6ea1SDimitry Andric 552*0fca6ea1SDimitry Andric if (isec->relocs.size()) 553*0fca6ea1SDimitry Andric catWriteInfo.relocTemplate = isec->relocs[0]; 554*0fca6ea1SDimitry Andric 555*0fca6ea1SDimitry Andric catWriteInfo.valid = true; 556*0fca6ea1SDimitry Andric } 557*0fca6ea1SDimitry Andric 558*0fca6ea1SDimitry Andric Symbol * 559*0fca6ea1SDimitry Andric ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec, 560*0fca6ea1SDimitry Andric uint32_t offset) { 561*0fca6ea1SDimitry Andric if (!isec) 562*0fca6ea1SDimitry Andric return nullptr; 563*0fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(offset); 564*0fca6ea1SDimitry Andric 565*0fca6ea1SDimitry Andric if (!reloc) 566*0fca6ea1SDimitry Andric return nullptr; 567*0fca6ea1SDimitry Andric 568*0fca6ea1SDimitry Andric Symbol *sym = reloc->referent.get<Symbol *>(); 569*0fca6ea1SDimitry Andric 570*0fca6ea1SDimitry Andric if (reloc->addend) { 571*0fca6ea1SDimitry Andric assert(isa<Defined>(sym) && "Expected defined for non-zero addend"); 572*0fca6ea1SDimitry Andric Defined *definedSym = cast<Defined>(sym); 573*0fca6ea1SDimitry Andric sym = tryFindDefinedOnIsec(definedSym->isec(), 574*0fca6ea1SDimitry Andric definedSym->value + reloc->addend); 575*0fca6ea1SDimitry Andric } 576*0fca6ea1SDimitry Andric 577*0fca6ea1SDimitry Andric return sym; 578*0fca6ea1SDimitry Andric } 579*0fca6ea1SDimitry Andric 580*0fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec, 581*0fca6ea1SDimitry Andric uint32_t offset) { 582*0fca6ea1SDimitry Andric for (Defined *sym : isec->symbols) 583*0fca6ea1SDimitry Andric if ((sym->value <= offset) && (sym->value + sym->size > offset)) 584*0fca6ea1SDimitry Andric return sym; 585*0fca6ea1SDimitry Andric 586*0fca6ea1SDimitry Andric return nullptr; 587*0fca6ea1SDimitry Andric } 588*0fca6ea1SDimitry Andric 589*0fca6ea1SDimitry Andric Defined * 590*0fca6ea1SDimitry Andric ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec, 591*0fca6ea1SDimitry Andric uint32_t offset) { 592*0fca6ea1SDimitry Andric Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset); 593*0fca6ea1SDimitry Andric return dyn_cast_or_null<Defined>(sym); 594*0fca6ea1SDimitry Andric } 595*0fca6ea1SDimitry Andric 596*0fca6ea1SDimitry Andric // Get the class's ro_data symbol. If getMetaRo is true, then we will return 597*0fca6ea1SDimitry Andric // the meta-class's ro_data symbol. Otherwise, we will return the class 598*0fca6ea1SDimitry Andric // (instance) ro_data symbol. 599*0fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym, 600*0fca6ea1SDimitry Andric bool getMetaRo) { 601*0fca6ea1SDimitry Andric ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec()); 602*0fca6ea1SDimitry Andric if (!isec) 603*0fca6ea1SDimitry Andric return nullptr; 604*0fca6ea1SDimitry Andric 605*0fca6ea1SDimitry Andric if (!getMetaRo) 606*0fca6ea1SDimitry Andric return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset + 607*0fca6ea1SDimitry Andric classSym->value); 608*0fca6ea1SDimitry Andric 609*0fca6ea1SDimitry Andric Defined *metaClass = tryGetDefinedAtIsecOffset( 610*0fca6ea1SDimitry Andric isec, classLayout.metaClassOffset + classSym->value); 611*0fca6ea1SDimitry Andric if (!metaClass) 612*0fca6ea1SDimitry Andric return nullptr; 613*0fca6ea1SDimitry Andric 614*0fca6ea1SDimitry Andric return tryGetDefinedAtIsecOffset( 615*0fca6ea1SDimitry Andric dyn_cast<ConcatInputSection>(metaClass->isec()), 616*0fca6ea1SDimitry Andric classLayout.roDataOffset); 617*0fca6ea1SDimitry Andric } 618*0fca6ea1SDimitry Andric 619*0fca6ea1SDimitry Andric // Given an ConcatInputSection or CStringInputSection and an offset, if there is 620*0fca6ea1SDimitry Andric // a symbol(Defined) at that offset, then erase the symbol (mark it not live) 621*0fca6ea1SDimitry Andric void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset( 622*0fca6ea1SDimitry Andric const ConcatInputSection *isec, uint32_t offset) { 623*0fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(offset); 624*0fca6ea1SDimitry Andric 625*0fca6ea1SDimitry Andric if (!reloc) 626*0fca6ea1SDimitry Andric return; 627*0fca6ea1SDimitry Andric 628*0fca6ea1SDimitry Andric Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 629*0fca6ea1SDimitry Andric if (!sym) 630*0fca6ea1SDimitry Andric return; 631*0fca6ea1SDimitry Andric 632*0fca6ea1SDimitry Andric if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec())) 633*0fca6ea1SDimitry Andric eraseISec(cisec); 634*0fca6ea1SDimitry Andric else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) { 635*0fca6ea1SDimitry Andric uint32_t totalOffset = sym->value + reloc->addend; 636*0fca6ea1SDimitry Andric StringPiece &piece = csisec->getStringPiece(totalOffset); 637*0fca6ea1SDimitry Andric piece.live = false; 638*0fca6ea1SDimitry Andric } else { 639*0fca6ea1SDimitry Andric llvm_unreachable("erased symbol has to be Defined or CStringInputSection"); 640*0fca6ea1SDimitry Andric } 641*0fca6ea1SDimitry Andric } 642*0fca6ea1SDimitry Andric 643*0fca6ea1SDimitry Andric void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory( 644*0fca6ea1SDimitry Andric const InfoInputCategory &catInfo) { 645*0fca6ea1SDimitry Andric 646*0fca6ea1SDimitry Andric if (!infoCategoryWriter.catListInfo.valid) 647*0fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(catInfo.catListIsec, 648*0fca6ea1SDimitry Andric infoCategoryWriter.catListInfo); 649*0fca6ea1SDimitry Andric if (!infoCategoryWriter.catBodyInfo.valid) 650*0fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(catInfo.catBodyIsec, 651*0fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo); 652*0fca6ea1SDimitry Andric 653*0fca6ea1SDimitry Andric if (!infoCategoryWriter.catNameInfo.valid) { 654*0fca6ea1SDimitry Andric lld::macho::Defined *catNameSym = 655*0fca6ea1SDimitry Andric tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset); 656*0fca6ea1SDimitry Andric assert(catNameSym && "Category does not have a valid name Symbol"); 657*0fca6ea1SDimitry Andric 658*0fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(catNameSym->isec(), 659*0fca6ea1SDimitry Andric infoCategoryWriter.catNameInfo); 660*0fca6ea1SDimitry Andric } 661*0fca6ea1SDimitry Andric 662*0fca6ea1SDimitry Andric // Collect writer info from all the category lists (we're assuming they all 663*0fca6ea1SDimitry Andric // would provide the same info) 664*0fca6ea1SDimitry Andric if (!infoCategoryWriter.catPtrListInfo.valid) { 665*0fca6ea1SDimitry Andric for (uint32_t off = catLayout.instanceMethodsOffset; 666*0fca6ea1SDimitry Andric off <= catLayout.classPropsOffset; off += target->wordSize) { 667*0fca6ea1SDimitry Andric if (Defined *ptrList = 668*0fca6ea1SDimitry Andric tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) { 669*0fca6ea1SDimitry Andric collectSectionWriteInfoFromIsec(ptrList->isec(), 670*0fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo); 671*0fca6ea1SDimitry Andric // we've successfully collected data, so we can break 672*0fca6ea1SDimitry Andric break; 673*0fca6ea1SDimitry Andric } 674*0fca6ea1SDimitry Andric } 675*0fca6ea1SDimitry Andric } 676*0fca6ea1SDimitry Andric } 677*0fca6ea1SDimitry Andric 678*0fca6ea1SDimitry Andric // Parse a protocol list that might be linked to ConcatInputSection at a given 679*0fca6ea1SDimitry Andric // offset. The format of the protocol list is different than other lists (prop 680*0fca6ea1SDimitry Andric // lists, method lists) so we need to parse it differently 681*0fca6ea1SDimitry Andric void ObjcCategoryMerger::parseProtocolListInfo( 682*0fca6ea1SDimitry Andric const ConcatInputSection *isec, uint32_t secOffset, 683*0fca6ea1SDimitry Andric PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) { 684*0fca6ea1SDimitry Andric assert((isec && (secOffset + target->wordSize <= isec->data.size())) && 685*0fca6ea1SDimitry Andric "Tried to read pointer list beyond protocol section end"); 686*0fca6ea1SDimitry Andric 687*0fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(secOffset); 688*0fca6ea1SDimitry Andric if (!reloc) 689*0fca6ea1SDimitry Andric return; 690*0fca6ea1SDimitry Andric 691*0fca6ea1SDimitry Andric auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 692*0fca6ea1SDimitry Andric assert(ptrListSym && "Protocol list reloc does not have a valid Defined"); 693*0fca6ea1SDimitry Andric 694*0fca6ea1SDimitry Andric // Theoretically protocol count can be either 32b or 64b, depending on 695*0fca6ea1SDimitry Andric // platform pointer size, but to simplify implementation we always just read 696*0fca6ea1SDimitry Andric // the lower 32b which should be good enough. 697*0fca6ea1SDimitry Andric uint32_t protocolCount = *reinterpret_cast<const uint32_t *>( 698*0fca6ea1SDimitry Andric ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 699*0fca6ea1SDimitry Andric 700*0fca6ea1SDimitry Andric ptrList.structCount += protocolCount; 701*0fca6ea1SDimitry Andric ptrList.structSize = target->wordSize; 702*0fca6ea1SDimitry Andric 703*0fca6ea1SDimitry Andric [[maybe_unused]] uint32_t expectedListSize = 704*0fca6ea1SDimitry Andric (protocolCount * target->wordSize) + 705*0fca6ea1SDimitry Andric /*header(count)*/ protocolListHeaderLayout.totalSize + 706*0fca6ea1SDimitry Andric /*extra null value*/ target->wordSize; 707*0fca6ea1SDimitry Andric 708*0fca6ea1SDimitry Andric // On Swift, the protocol list does not have the extra (unnecessary) null 709*0fca6ea1SDimitry Andric [[maybe_unused]] uint32_t expectedListSizeSwift = 710*0fca6ea1SDimitry Andric expectedListSize - target->wordSize; 711*0fca6ea1SDimitry Andric 712*0fca6ea1SDimitry Andric assert(((expectedListSize == ptrListSym->isec()->data.size() && 713*0fca6ea1SDimitry Andric sourceLang == SourceLanguage::ObjC) || 714*0fca6ea1SDimitry Andric (expectedListSizeSwift == ptrListSym->isec()->data.size() && 715*0fca6ea1SDimitry Andric sourceLang == SourceLanguage::Swift)) && 716*0fca6ea1SDimitry Andric "Protocol list does not match expected size"); 717*0fca6ea1SDimitry Andric 718*0fca6ea1SDimitry Andric uint32_t off = protocolListHeaderLayout.totalSize; 719*0fca6ea1SDimitry Andric for (uint32_t inx = 0; inx < protocolCount; ++inx) { 720*0fca6ea1SDimitry Andric const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 721*0fca6ea1SDimitry Andric assert(reloc && "No reloc found at protocol list offset"); 722*0fca6ea1SDimitry Andric 723*0fca6ea1SDimitry Andric auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 724*0fca6ea1SDimitry Andric assert(listSym && "Protocol list reloc does not have a valid Defined"); 725*0fca6ea1SDimitry Andric 726*0fca6ea1SDimitry Andric ptrList.allPtrs.push_back(listSym); 727*0fca6ea1SDimitry Andric off += target->wordSize; 728*0fca6ea1SDimitry Andric } 729*0fca6ea1SDimitry Andric assert((ptrListSym->isec()->getRelocAt(off) == nullptr) && 730*0fca6ea1SDimitry Andric "expected null terminating protocol"); 731*0fca6ea1SDimitry Andric assert(off + /*extra null value*/ target->wordSize == expectedListSize && 732*0fca6ea1SDimitry Andric "Protocol list end offset does not match expected size"); 733*0fca6ea1SDimitry Andric } 734*0fca6ea1SDimitry Andric 735*0fca6ea1SDimitry Andric // Parse a protocol list and return the PointerListInfo for it 736*0fca6ea1SDimitry Andric ObjcCategoryMerger::PointerListInfo 737*0fca6ea1SDimitry Andric ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec, 738*0fca6ea1SDimitry Andric uint32_t secOffset, 739*0fca6ea1SDimitry Andric SourceLanguage sourceLang) { 740*0fca6ea1SDimitry Andric PointerListInfo ptrList; 741*0fca6ea1SDimitry Andric parseProtocolListInfo(isec, secOffset, ptrList, sourceLang); 742*0fca6ea1SDimitry Andric return ptrList; 743*0fca6ea1SDimitry Andric } 744*0fca6ea1SDimitry Andric 745*0fca6ea1SDimitry Andric // Parse a pointer list that might be linked to ConcatInputSection at a given 746*0fca6ea1SDimitry Andric // offset. This can be used for instance methods, class methods, instance props 747*0fca6ea1SDimitry Andric // and class props since they have the same format. 748*0fca6ea1SDimitry Andric void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec, 749*0fca6ea1SDimitry Andric uint32_t secOffset, 750*0fca6ea1SDimitry Andric PointerListInfo &ptrList) { 751*0fca6ea1SDimitry Andric assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3); 752*0fca6ea1SDimitry Andric assert(isec && "Trying to parse pointer list from null isec"); 753*0fca6ea1SDimitry Andric assert(secOffset + target->wordSize <= isec->data.size() && 754*0fca6ea1SDimitry Andric "Trying to read pointer list beyond section end"); 755*0fca6ea1SDimitry Andric 756*0fca6ea1SDimitry Andric const Reloc *reloc = isec->getRelocAt(secOffset); 757*0fca6ea1SDimitry Andric if (!reloc) 758*0fca6ea1SDimitry Andric return; 759*0fca6ea1SDimitry Andric 760*0fca6ea1SDimitry Andric auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 761*0fca6ea1SDimitry Andric assert(ptrListSym && "Reloc does not have a valid Defined"); 762*0fca6ea1SDimitry Andric 763*0fca6ea1SDimitry Andric uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>( 764*0fca6ea1SDimitry Andric ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset); 765*0fca6ea1SDimitry Andric uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>( 766*0fca6ea1SDimitry Andric ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset); 767*0fca6ea1SDimitry Andric assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize); 768*0fca6ea1SDimitry Andric 769*0fca6ea1SDimitry Andric assert(!ptrList.structSize || (thisStructSize == ptrList.structSize)); 770*0fca6ea1SDimitry Andric 771*0fca6ea1SDimitry Andric ptrList.structCount += thisStructCount; 772*0fca6ea1SDimitry Andric ptrList.structSize = thisStructSize; 773*0fca6ea1SDimitry Andric 774*0fca6ea1SDimitry Andric uint32_t expectedListSize = 775*0fca6ea1SDimitry Andric listHeaderLayout.totalSize + (thisStructSize * thisStructCount); 776*0fca6ea1SDimitry Andric assert(expectedListSize == ptrListSym->isec()->data.size() && 777*0fca6ea1SDimitry Andric "Pointer list does not match expected size"); 778*0fca6ea1SDimitry Andric 779*0fca6ea1SDimitry Andric for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize; 780*0fca6ea1SDimitry Andric off += target->wordSize) { 781*0fca6ea1SDimitry Andric const Reloc *reloc = ptrListSym->isec()->getRelocAt(off); 782*0fca6ea1SDimitry Andric assert(reloc && "No reloc found at pointer list offset"); 783*0fca6ea1SDimitry Andric 784*0fca6ea1SDimitry Andric auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>()); 785*0fca6ea1SDimitry Andric assert(listSym && "Reloc does not have a valid Defined"); 786*0fca6ea1SDimitry Andric 787*0fca6ea1SDimitry Andric ptrList.allPtrs.push_back(listSym); 788*0fca6ea1SDimitry Andric } 789*0fca6ea1SDimitry Andric } 790*0fca6ea1SDimitry Andric 791*0fca6ea1SDimitry Andric // Here we parse all the information of an input category (catInfo) and 792*0fca6ea1SDimitry Andric // append the parsed info into the structure which will contain all the 793*0fca6ea1SDimitry Andric // information about how a class is extended (extInfo) 794*0fca6ea1SDimitry Andric void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo, 795*0fca6ea1SDimitry Andric ClassExtensionInfo &extInfo) { 796*0fca6ea1SDimitry Andric const Reloc *catNameReloc = 797*0fca6ea1SDimitry Andric catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset); 798*0fca6ea1SDimitry Andric 799*0fca6ea1SDimitry Andric // Parse name 800*0fca6ea1SDimitry Andric assert(catNameReloc && "Category does not have a reloc at 'nameOffset'"); 801*0fca6ea1SDimitry Andric 802*0fca6ea1SDimitry Andric // is this the first category we are parsing? 803*0fca6ea1SDimitry Andric if (extInfo.mergedContainerName.empty()) 804*0fca6ea1SDimitry Andric extInfo.objFileForMergeData = 805*0fca6ea1SDimitry Andric dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile()); 806*0fca6ea1SDimitry Andric else 807*0fca6ea1SDimitry Andric extInfo.mergedContainerName += "|"; 808*0fca6ea1SDimitry Andric 809*0fca6ea1SDimitry Andric assert(extInfo.objFileForMergeData && 810*0fca6ea1SDimitry Andric "Expected to already have valid objextInfo.objFileForMergeData"); 811*0fca6ea1SDimitry Andric 812*0fca6ea1SDimitry Andric StringRef catName = getReferentString(*catNameReloc); 813*0fca6ea1SDimitry Andric extInfo.mergedContainerName += catName.str(); 814*0fca6ea1SDimitry Andric 815*0fca6ea1SDimitry Andric // Parse base class 816*0fca6ea1SDimitry Andric if (!extInfo.baseClass) { 817*0fca6ea1SDimitry Andric Symbol *classSym = 818*0fca6ea1SDimitry Andric tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset); 819*0fca6ea1SDimitry Andric assert(extInfo.baseClassName.empty()); 820*0fca6ea1SDimitry Andric extInfo.baseClass = classSym; 821*0fca6ea1SDimitry Andric llvm::StringRef classPrefix(objc::symbol_names::klass); 822*0fca6ea1SDimitry Andric assert(classSym->getName().starts_with(classPrefix) && 823*0fca6ea1SDimitry Andric "Base class symbol does not start with expected prefix"); 824*0fca6ea1SDimitry Andric extInfo.baseClassName = classSym->getName().substr(classPrefix.size()); 825*0fca6ea1SDimitry Andric } else { 826*0fca6ea1SDimitry Andric assert((extInfo.baseClass == 827*0fca6ea1SDimitry Andric tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, 828*0fca6ea1SDimitry Andric catLayout.klassOffset)) && 829*0fca6ea1SDimitry Andric "Trying to parse category info into container with different base " 830*0fca6ea1SDimitry Andric "class"); 831*0fca6ea1SDimitry Andric } 832*0fca6ea1SDimitry Andric 833*0fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset, 834*0fca6ea1SDimitry Andric extInfo.instanceMethods); 835*0fca6ea1SDimitry Andric 836*0fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset, 837*0fca6ea1SDimitry Andric extInfo.classMethods); 838*0fca6ea1SDimitry Andric 839*0fca6ea1SDimitry Andric parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset, 840*0fca6ea1SDimitry Andric extInfo.protocols, catInfo.sourceLanguage); 841*0fca6ea1SDimitry Andric 842*0fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset, 843*0fca6ea1SDimitry Andric extInfo.instanceProps); 844*0fca6ea1SDimitry Andric 845*0fca6ea1SDimitry Andric parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset, 846*0fca6ea1SDimitry Andric extInfo.classProps); 847*0fca6ea1SDimitry Andric } 848*0fca6ea1SDimitry Andric 849*0fca6ea1SDimitry Andric // Generate a protocol list (including header) and link it into the parent at 850*0fca6ea1SDimitry Andric // the specified offset. 851*0fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitAndLinkProtocolList( 852*0fca6ea1SDimitry Andric Defined *parentSym, uint32_t linkAtOffset, 853*0fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 854*0fca6ea1SDimitry Andric if (ptrList.allPtrs.empty()) 855*0fca6ea1SDimitry Andric return nullptr; 856*0fca6ea1SDimitry Andric 857*0fca6ea1SDimitry Andric assert(ptrList.allPtrs.size() == ptrList.structCount); 858*0fca6ea1SDimitry Andric 859*0fca6ea1SDimitry Andric uint32_t bodySize = (ptrList.structCount * target->wordSize) + 860*0fca6ea1SDimitry Andric /*header(count)*/ protocolListHeaderLayout.totalSize + 861*0fca6ea1SDimitry Andric /*extra null value*/ target->wordSize; 862*0fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 863*0fca6ea1SDimitry Andric 864*0fca6ea1SDimitry Andric // This theoretically can be either 32b or 64b, but writing just the first 32b 865*0fca6ea1SDimitry Andric // is good enough 866*0fca6ea1SDimitry Andric const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>( 867*0fca6ea1SDimitry Andric bodyData.data() + protocolListHeaderLayout.protocolCountOffset); 868*0fca6ea1SDimitry Andric 869*0fca6ea1SDimitry Andric *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size(); 870*0fca6ea1SDimitry Andric 871*0fca6ea1SDimitry Andric ConcatInputSection *listSec = make<ConcatInputSection>( 872*0fca6ea1SDimitry Andric *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 873*0fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.align); 874*0fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 875*0fca6ea1SDimitry Andric listSec->live = true; 876*0fca6ea1SDimitry Andric addInputSection(listSec); 877*0fca6ea1SDimitry Andric 878*0fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 879*0fca6ea1SDimitry Andric 880*0fca6ea1SDimitry Andric std::string symName = ptrList.categoryPrefix; 881*0fca6ea1SDimitry Andric symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 882*0fca6ea1SDimitry Andric 883*0fca6ea1SDimitry Andric Defined *ptrListSym = make<Defined>( 884*0fca6ea1SDimitry Andric newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 885*0fca6ea1SDimitry Andric listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 886*0fca6ea1SDimitry Andric /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 887*0fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 888*0fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 889*0fca6ea1SDimitry Andric 890*0fca6ea1SDimitry Andric ptrListSym->used = true; 891*0fca6ea1SDimitry Andric parentSym->getObjectFile()->symbols.push_back(ptrListSym); 892*0fca6ea1SDimitry Andric 893*0fca6ea1SDimitry Andric createSymbolReference(parentSym, ptrListSym, linkAtOffset, 894*0fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 895*0fca6ea1SDimitry Andric 896*0fca6ea1SDimitry Andric uint32_t offset = protocolListHeaderLayout.totalSize; 897*0fca6ea1SDimitry Andric for (Symbol *symbol : ptrList.allPtrs) { 898*0fca6ea1SDimitry Andric createSymbolReference(ptrListSym, symbol, offset, 899*0fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.relocTemplate); 900*0fca6ea1SDimitry Andric offset += target->wordSize; 901*0fca6ea1SDimitry Andric } 902*0fca6ea1SDimitry Andric 903*0fca6ea1SDimitry Andric return ptrListSym; 904*0fca6ea1SDimitry Andric } 905*0fca6ea1SDimitry Andric 906*0fca6ea1SDimitry Andric // Generate a pointer list (including header) and link it into the parent at the 907*0fca6ea1SDimitry Andric // specified offset. This is used for instance and class methods and 908*0fca6ea1SDimitry Andric // proprieties. 909*0fca6ea1SDimitry Andric void ObjcCategoryMerger::emitAndLinkPointerList( 910*0fca6ea1SDimitry Andric Defined *parentSym, uint32_t linkAtOffset, 911*0fca6ea1SDimitry Andric const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) { 912*0fca6ea1SDimitry Andric if (ptrList.allPtrs.empty()) 913*0fca6ea1SDimitry Andric return; 914*0fca6ea1SDimitry Andric 915*0fca6ea1SDimitry Andric assert(ptrList.allPtrs.size() * target->wordSize == 916*0fca6ea1SDimitry Andric ptrList.structCount * ptrList.structSize); 917*0fca6ea1SDimitry Andric 918*0fca6ea1SDimitry Andric // Generate body 919*0fca6ea1SDimitry Andric uint32_t bodySize = 920*0fca6ea1SDimitry Andric listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount); 921*0fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize); 922*0fca6ea1SDimitry Andric 923*0fca6ea1SDimitry Andric const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>( 924*0fca6ea1SDimitry Andric bodyData.data() + listHeaderLayout.structSizeOffset); 925*0fca6ea1SDimitry Andric const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>( 926*0fca6ea1SDimitry Andric bodyData.data() + listHeaderLayout.structCountOffset); 927*0fca6ea1SDimitry Andric 928*0fca6ea1SDimitry Andric *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize; 929*0fca6ea1SDimitry Andric *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount; 930*0fca6ea1SDimitry Andric 931*0fca6ea1SDimitry Andric ConcatInputSection *listSec = make<ConcatInputSection>( 932*0fca6ea1SDimitry Andric *infoCategoryWriter.catPtrListInfo.inputSection, bodyData, 933*0fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.align); 934*0fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 935*0fca6ea1SDimitry Andric listSec->live = true; 936*0fca6ea1SDimitry Andric addInputSection(listSec); 937*0fca6ea1SDimitry Andric 938*0fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection; 939*0fca6ea1SDimitry Andric 940*0fca6ea1SDimitry Andric std::string symName = ptrList.categoryPrefix; 941*0fca6ea1SDimitry Andric symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")"; 942*0fca6ea1SDimitry Andric 943*0fca6ea1SDimitry Andric Defined *ptrListSym = make<Defined>( 944*0fca6ea1SDimitry Andric newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(), 945*0fca6ea1SDimitry Andric listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false, 946*0fca6ea1SDimitry Andric /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 947*0fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 948*0fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 949*0fca6ea1SDimitry Andric 950*0fca6ea1SDimitry Andric ptrListSym->used = true; 951*0fca6ea1SDimitry Andric parentSym->getObjectFile()->symbols.push_back(ptrListSym); 952*0fca6ea1SDimitry Andric 953*0fca6ea1SDimitry Andric createSymbolReference(parentSym, ptrListSym, linkAtOffset, 954*0fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 955*0fca6ea1SDimitry Andric 956*0fca6ea1SDimitry Andric uint32_t offset = listHeaderLayout.totalSize; 957*0fca6ea1SDimitry Andric for (Symbol *symbol : ptrList.allPtrs) { 958*0fca6ea1SDimitry Andric createSymbolReference(ptrListSym, symbol, offset, 959*0fca6ea1SDimitry Andric infoCategoryWriter.catPtrListInfo.relocTemplate); 960*0fca6ea1SDimitry Andric offset += target->wordSize; 961*0fca6ea1SDimitry Andric } 962*0fca6ea1SDimitry Andric } 963*0fca6ea1SDimitry Andric 964*0fca6ea1SDimitry Andric // This method creates an __objc_catlist ConcatInputSection with a single slot 965*0fca6ea1SDimitry Andric Defined * 966*0fca6ea1SDimitry Andric ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName, 967*0fca6ea1SDimitry Andric const std::string &forBaseClassName, 968*0fca6ea1SDimitry Andric ObjFile *objFile) { 969*0fca6ea1SDimitry Andric uint32_t sectionSize = target->wordSize; 970*0fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize); 971*0fca6ea1SDimitry Andric 972*0fca6ea1SDimitry Andric ConcatInputSection *newCatList = 973*0fca6ea1SDimitry Andric make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection, 974*0fca6ea1SDimitry Andric bodyData, infoCategoryWriter.catListInfo.align); 975*0fca6ea1SDimitry Andric newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 976*0fca6ea1SDimitry Andric newCatList->live = true; 977*0fca6ea1SDimitry Andric addInputSection(newCatList); 978*0fca6ea1SDimitry Andric 979*0fca6ea1SDimitry Andric newCatList->parent = infoCategoryWriter.catListInfo.outputSection; 980*0fca6ea1SDimitry Andric 981*0fca6ea1SDimitry Andric std::string catSymName = "<__objc_catlist slot for merged category "; 982*0fca6ea1SDimitry Andric catSymName += forBaseClassName + "(" + forCategoryName + ")>"; 983*0fca6ea1SDimitry Andric 984*0fca6ea1SDimitry Andric Defined *catListSym = make<Defined>( 985*0fca6ea1SDimitry Andric newStringData(catSymName.c_str()), /*file=*/objFile, newCatList, 986*0fca6ea1SDimitry Andric /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 987*0fca6ea1SDimitry Andric /*isPrivateExtern=*/false, /*includeInSymtab=*/false, 988*0fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 989*0fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 990*0fca6ea1SDimitry Andric 991*0fca6ea1SDimitry Andric catListSym->used = true; 992*0fca6ea1SDimitry Andric objFile->symbols.push_back(catListSym); 993*0fca6ea1SDimitry Andric return catListSym; 994*0fca6ea1SDimitry Andric } 995*0fca6ea1SDimitry Andric 996*0fca6ea1SDimitry Andric // Here we generate the main category body and link the name and base class into 997*0fca6ea1SDimitry Andric // it. We don't link any other info yet like the protocol and class/instance 998*0fca6ea1SDimitry Andric // methods/props. 999*0fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name, 1000*0fca6ea1SDimitry Andric const Defined *nameSym, 1001*0fca6ea1SDimitry Andric const Symbol *baseClassSym, 1002*0fca6ea1SDimitry Andric const std::string &baseClassName, 1003*0fca6ea1SDimitry Andric ObjFile *objFile) { 1004*0fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize); 1005*0fca6ea1SDimitry Andric 1006*0fca6ea1SDimitry Andric uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) + 1007*0fca6ea1SDimitry Andric catLayout.sizeOffset); 1008*0fca6ea1SDimitry Andric *ptrSize = catLayout.totalSize; 1009*0fca6ea1SDimitry Andric 1010*0fca6ea1SDimitry Andric ConcatInputSection *newBodySec = 1011*0fca6ea1SDimitry Andric make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection, 1012*0fca6ea1SDimitry Andric bodyData, infoCategoryWriter.catBodyInfo.align); 1013*0fca6ea1SDimitry Andric newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection; 1014*0fca6ea1SDimitry Andric newBodySec->live = true; 1015*0fca6ea1SDimitry Andric addInputSection(newBodySec); 1016*0fca6ea1SDimitry Andric 1017*0fca6ea1SDimitry Andric std::string symName = 1018*0fca6ea1SDimitry Andric objc::symbol_names::category + baseClassName + "(" + name + ")"; 1019*0fca6ea1SDimitry Andric Defined *catBodySym = make<Defined>( 1020*0fca6ea1SDimitry Andric newStringData(symName.c_str()), /*file=*/objFile, newBodySec, 1021*0fca6ea1SDimitry Andric /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false, 1022*0fca6ea1SDimitry Andric /*isPrivateExtern=*/false, /*includeInSymtab=*/true, 1023*0fca6ea1SDimitry Andric /*isReferencedDynamically=*/false, /*noDeadStrip=*/false, 1024*0fca6ea1SDimitry Andric /*isWeakDefCanBeHidden=*/false); 1025*0fca6ea1SDimitry Andric 1026*0fca6ea1SDimitry Andric catBodySym->used = true; 1027*0fca6ea1SDimitry Andric objFile->symbols.push_back(catBodySym); 1028*0fca6ea1SDimitry Andric 1029*0fca6ea1SDimitry Andric createSymbolReference(catBodySym, nameSym, catLayout.nameOffset, 1030*0fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 1031*0fca6ea1SDimitry Andric 1032*0fca6ea1SDimitry Andric // Create a reloc to the base class (either external or internal) 1033*0fca6ea1SDimitry Andric createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset, 1034*0fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 1035*0fca6ea1SDimitry Andric 1036*0fca6ea1SDimitry Andric return catBodySym; 1037*0fca6ea1SDimitry Andric } 1038*0fca6ea1SDimitry Andric 1039*0fca6ea1SDimitry Andric // This writes the new category name (for the merged category) into the binary 1040*0fca6ea1SDimitry Andric // and returns the sybmol for it. 1041*0fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name, 1042*0fca6ea1SDimitry Andric ObjFile *objFile) { 1043*0fca6ea1SDimitry Andric StringRef nameStrData = newStringData(name.c_str()); 1044*0fca6ea1SDimitry Andric // We use +1 below to include the null terminator 1045*0fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> nameData( 1046*0fca6ea1SDimitry Andric reinterpret_cast<const uint8_t *>(nameStrData.data()), 1047*0fca6ea1SDimitry Andric nameStrData.size() + 1); 1048*0fca6ea1SDimitry Andric 1049*0fca6ea1SDimitry Andric auto *parentSection = infoCategoryWriter.catNameInfo.inputSection; 1050*0fca6ea1SDimitry Andric CStringInputSection *newStringSec = make<CStringInputSection>( 1051*0fca6ea1SDimitry Andric *infoCategoryWriter.catNameInfo.inputSection, nameData, 1052*0fca6ea1SDimitry Andric infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true); 1053*0fca6ea1SDimitry Andric 1054*0fca6ea1SDimitry Andric parentSection->subsections.push_back({0, newStringSec}); 1055*0fca6ea1SDimitry Andric 1056*0fca6ea1SDimitry Andric newStringSec->splitIntoPieces(); 1057*0fca6ea1SDimitry Andric newStringSec->pieces[0].live = true; 1058*0fca6ea1SDimitry Andric newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection; 1059*0fca6ea1SDimitry Andric in.cStringSection->addInput(newStringSec); 1060*0fca6ea1SDimitry Andric assert(newStringSec->pieces.size() == 1); 1061*0fca6ea1SDimitry Andric 1062*0fca6ea1SDimitry Andric Defined *catNameSym = make<Defined>( 1063*0fca6ea1SDimitry Andric "<merged category name>", /*file=*/objFile, newStringSec, 1064*0fca6ea1SDimitry Andric /*value=*/0, nameData.size(), 1065*0fca6ea1SDimitry Andric /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1066*0fca6ea1SDimitry Andric /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1067*0fca6ea1SDimitry Andric /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1068*0fca6ea1SDimitry Andric 1069*0fca6ea1SDimitry Andric catNameSym->used = true; 1070*0fca6ea1SDimitry Andric objFile->symbols.push_back(catNameSym); 1071*0fca6ea1SDimitry Andric return catNameSym; 1072*0fca6ea1SDimitry Andric } 1073*0fca6ea1SDimitry Andric 1074*0fca6ea1SDimitry Andric // This method fully creates a new category from the given ClassExtensionInfo. 1075*0fca6ea1SDimitry Andric // It creates the category name, body and method/protocol/prop lists and links 1076*0fca6ea1SDimitry Andric // them all together. Then it creates a new __objc_catlist entry and adds the 1077*0fca6ea1SDimitry Andric // category to it. Calling this method will fully generate a category which will 1078*0fca6ea1SDimitry Andric // be available in the final binary. 1079*0fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) { 1080*0fca6ea1SDimitry Andric Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName, 1081*0fca6ea1SDimitry Andric extInfo.objFileForMergeData); 1082*0fca6ea1SDimitry Andric 1083*0fca6ea1SDimitry Andric Defined *catBodySym = emitCategoryBody( 1084*0fca6ea1SDimitry Andric extInfo.mergedContainerName, catNameSym, extInfo.baseClass, 1085*0fca6ea1SDimitry Andric extInfo.baseClassName, extInfo.objFileForMergeData); 1086*0fca6ea1SDimitry Andric 1087*0fca6ea1SDimitry Andric Defined *catListSym = 1088*0fca6ea1SDimitry Andric emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName, 1089*0fca6ea1SDimitry Andric extInfo.objFileForMergeData); 1090*0fca6ea1SDimitry Andric 1091*0fca6ea1SDimitry Andric // Add the single category body to the category list at the offset 0. 1092*0fca6ea1SDimitry Andric createSymbolReference(catListSym, catBodySym, /*offset=*/0, 1093*0fca6ea1SDimitry Andric infoCategoryWriter.catListInfo.relocTemplate); 1094*0fca6ea1SDimitry Andric 1095*0fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo, 1096*0fca6ea1SDimitry Andric extInfo.instanceMethods); 1097*0fca6ea1SDimitry Andric 1098*0fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo, 1099*0fca6ea1SDimitry Andric extInfo.classMethods); 1100*0fca6ea1SDimitry Andric 1101*0fca6ea1SDimitry Andric emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo, 1102*0fca6ea1SDimitry Andric extInfo.protocols); 1103*0fca6ea1SDimitry Andric 1104*0fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo, 1105*0fca6ea1SDimitry Andric extInfo.instanceProps); 1106*0fca6ea1SDimitry Andric 1107*0fca6ea1SDimitry Andric emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo, 1108*0fca6ea1SDimitry Andric extInfo.classProps); 1109*0fca6ea1SDimitry Andric 1110*0fca6ea1SDimitry Andric return catBodySym; 1111*0fca6ea1SDimitry Andric } 1112*0fca6ea1SDimitry Andric 1113*0fca6ea1SDimitry Andric // This method merges all the categories (sharing a base class) into a single 1114*0fca6ea1SDimitry Andric // category. 1115*0fca6ea1SDimitry Andric void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory( 1116*0fca6ea1SDimitry Andric std::vector<InfoInputCategory> &categories) { 1117*0fca6ea1SDimitry Andric assert(categories.size() > 1 && "Expected at least 2 categories"); 1118*0fca6ea1SDimitry Andric 1119*0fca6ea1SDimitry Andric ClassExtensionInfo extInfo(catLayout); 1120*0fca6ea1SDimitry Andric 1121*0fca6ea1SDimitry Andric for (auto &catInfo : categories) 1122*0fca6ea1SDimitry Andric parseCatInfoToExtInfo(catInfo, extInfo); 1123*0fca6ea1SDimitry Andric 1124*0fca6ea1SDimitry Andric Defined *newCatDef = emitCategory(extInfo); 1125*0fca6ea1SDimitry Andric assert(newCatDef && "Failed to create a new category"); 1126*0fca6ea1SDimitry Andric 1127*0fca6ea1SDimitry Andric // Suppress unsuded var warning 1128*0fca6ea1SDimitry Andric (void)newCatDef; 1129*0fca6ea1SDimitry Andric 1130*0fca6ea1SDimitry Andric for (auto &catInfo : categories) 1131*0fca6ea1SDimitry Andric catInfo.wasMerged = true; 1132*0fca6ea1SDimitry Andric } 1133*0fca6ea1SDimitry Andric 1134*0fca6ea1SDimitry Andric void ObjcCategoryMerger::createSymbolReference(Defined *refFrom, 1135*0fca6ea1SDimitry Andric const Symbol *refTo, 1136*0fca6ea1SDimitry Andric uint32_t offset, 1137*0fca6ea1SDimitry Andric const Reloc &relocTemplate) { 1138*0fca6ea1SDimitry Andric Reloc r = relocTemplate; 1139*0fca6ea1SDimitry Andric r.offset = offset; 1140*0fca6ea1SDimitry Andric r.addend = 0; 1141*0fca6ea1SDimitry Andric r.referent = const_cast<Symbol *>(refTo); 1142*0fca6ea1SDimitry Andric refFrom->isec()->relocs.push_back(r); 1143*0fca6ea1SDimitry Andric } 1144*0fca6ea1SDimitry Andric 1145*0fca6ea1SDimitry Andric // Get the list of categories in the '__objc_nlcatlist' section. We can't 1146*0fca6ea1SDimitry Andric // optimize these as they have a '+load' method that has to be called at 1147*0fca6ea1SDimitry Andric // runtime. 1148*0fca6ea1SDimitry Andric DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() { 1149*0fca6ea1SDimitry Andric DenseSet<const Symbol *> nlCategories; 1150*0fca6ea1SDimitry Andric 1151*0fca6ea1SDimitry Andric for (InputSection *sec : allInputSections) { 1152*0fca6ea1SDimitry Andric if (sec->getName() != section_names::objcNonLazyCatList) 1153*0fca6ea1SDimitry Andric continue; 1154*0fca6ea1SDimitry Andric 1155*0fca6ea1SDimitry Andric for (auto &r : sec->relocs) { 1156*0fca6ea1SDimitry Andric const Symbol *sym = r.referent.dyn_cast<Symbol *>(); 1157*0fca6ea1SDimitry Andric nlCategories.insert(sym); 1158*0fca6ea1SDimitry Andric } 1159*0fca6ea1SDimitry Andric } 1160*0fca6ea1SDimitry Andric return nlCategories; 1161*0fca6ea1SDimitry Andric } 1162*0fca6ea1SDimitry Andric 1163*0fca6ea1SDimitry Andric void ObjcCategoryMerger::collectAndValidateCategoriesData() { 1164*0fca6ea1SDimitry Andric auto nlCategories = collectNlCategories(); 1165*0fca6ea1SDimitry Andric 1166*0fca6ea1SDimitry Andric for (InputSection *sec : allInputSections) { 1167*0fca6ea1SDimitry Andric if (sec->getName() != section_names::objcCatList) 1168*0fca6ea1SDimitry Andric continue; 1169*0fca6ea1SDimitry Andric ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec); 1170*0fca6ea1SDimitry Andric assert(catListCisec && 1171*0fca6ea1SDimitry Andric "__objc_catList InputSection is not a ConcatInputSection"); 1172*0fca6ea1SDimitry Andric 1173*0fca6ea1SDimitry Andric for (uint32_t off = 0; off < catListCisec->getSize(); 1174*0fca6ea1SDimitry Andric off += target->wordSize) { 1175*0fca6ea1SDimitry Andric Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off); 1176*0fca6ea1SDimitry Andric assert(categorySym && 1177*0fca6ea1SDimitry Andric "Failed to get a valid category at __objc_catlit offset"); 1178*0fca6ea1SDimitry Andric 1179*0fca6ea1SDimitry Andric if (nlCategories.count(categorySym)) 1180*0fca6ea1SDimitry Andric continue; 1181*0fca6ea1SDimitry Andric 1182*0fca6ea1SDimitry Andric auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec()); 1183*0fca6ea1SDimitry Andric assert(catBodyIsec && 1184*0fca6ea1SDimitry Andric "Category data section is not an ConcatInputSection"); 1185*0fca6ea1SDimitry Andric 1186*0fca6ea1SDimitry Andric SourceLanguage eLang = SourceLanguage::Unknown; 1187*0fca6ea1SDimitry Andric if (categorySym->getName().starts_with(objc::symbol_names::category)) 1188*0fca6ea1SDimitry Andric eLang = SourceLanguage::ObjC; 1189*0fca6ea1SDimitry Andric else if (categorySym->getName().starts_with( 1190*0fca6ea1SDimitry Andric objc::symbol_names::swift_objc_category)) 1191*0fca6ea1SDimitry Andric eLang = SourceLanguage::Swift; 1192*0fca6ea1SDimitry Andric else 1193*0fca6ea1SDimitry Andric llvm_unreachable("Unexpected category symbol name"); 1194*0fca6ea1SDimitry Andric 1195*0fca6ea1SDimitry Andric InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang}; 1196*0fca6ea1SDimitry Andric 1197*0fca6ea1SDimitry Andric // Check that the category has a reloc at 'klassOffset' (which is 1198*0fca6ea1SDimitry Andric // a pointer to the class symbol) 1199*0fca6ea1SDimitry Andric 1200*0fca6ea1SDimitry Andric Symbol *classSym = 1201*0fca6ea1SDimitry Andric tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset); 1202*0fca6ea1SDimitry Andric assert(classSym && "Category does not have a valid base class"); 1203*0fca6ea1SDimitry Andric 1204*0fca6ea1SDimitry Andric categoryMap[classSym].push_back(catInputInfo); 1205*0fca6ea1SDimitry Andric 1206*0fca6ea1SDimitry Andric collectCategoryWriterInfoFromCategory(catInputInfo); 1207*0fca6ea1SDimitry Andric } 1208*0fca6ea1SDimitry Andric } 1209*0fca6ea1SDimitry Andric } 1210*0fca6ea1SDimitry Andric 1211*0fca6ea1SDimitry Andric // In the input we have multiple __objc_catlist InputSection, each of which may 1212*0fca6ea1SDimitry Andric // contain links to multiple categories. Of these categories, we will merge (and 1213*0fca6ea1SDimitry Andric // erase) only some. There will be some categories that will remain untouched 1214*0fca6ea1SDimitry Andric // (not erased). For these not erased categories, we generate new __objc_catlist 1215*0fca6ea1SDimitry Andric // entries since the parent __objc_catlist entry will be erased 1216*0fca6ea1SDimitry Andric void ObjcCategoryMerger::generateCatListForNonErasedCategories( 1217*0fca6ea1SDimitry Andric const MapVector<ConcatInputSection *, std::set<uint64_t>> 1218*0fca6ea1SDimitry Andric catListToErasedOffsets) { 1219*0fca6ea1SDimitry Andric 1220*0fca6ea1SDimitry Andric // Go through all offsets of all __objc_catlist's that we process and if there 1221*0fca6ea1SDimitry Andric // are categories that we didn't process - generate a new __objc_catlist for 1222*0fca6ea1SDimitry Andric // each. 1223*0fca6ea1SDimitry Andric for (auto &mapEntry : catListToErasedOffsets) { 1224*0fca6ea1SDimitry Andric ConcatInputSection *catListIsec = mapEntry.first; 1225*0fca6ea1SDimitry Andric for (uint32_t catListIsecOffset = 0; 1226*0fca6ea1SDimitry Andric catListIsecOffset < catListIsec->data.size(); 1227*0fca6ea1SDimitry Andric catListIsecOffset += target->wordSize) { 1228*0fca6ea1SDimitry Andric // This slot was erased, we can just skip it 1229*0fca6ea1SDimitry Andric if (mapEntry.second.count(catListIsecOffset)) 1230*0fca6ea1SDimitry Andric continue; 1231*0fca6ea1SDimitry Andric 1232*0fca6ea1SDimitry Andric Defined *nonErasedCatBody = 1233*0fca6ea1SDimitry Andric tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset); 1234*0fca6ea1SDimitry Andric assert(nonErasedCatBody && "Failed to relocate non-deleted category"); 1235*0fca6ea1SDimitry Andric 1236*0fca6ea1SDimitry Andric // Allocate data for the new __objc_catlist slot 1237*0fca6ea1SDimitry Andric llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize); 1238*0fca6ea1SDimitry Andric 1239*0fca6ea1SDimitry Andric // We mark the __objc_catlist slot as belonging to the same file as the 1240*0fca6ea1SDimitry Andric // category 1241*0fca6ea1SDimitry Andric ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile()); 1242*0fca6ea1SDimitry Andric 1243*0fca6ea1SDimitry Andric ConcatInputSection *listSec = make<ConcatInputSection>( 1244*0fca6ea1SDimitry Andric *infoCategoryWriter.catListInfo.inputSection, bodyData, 1245*0fca6ea1SDimitry Andric infoCategoryWriter.catListInfo.align); 1246*0fca6ea1SDimitry Andric listSec->parent = infoCategoryWriter.catListInfo.outputSection; 1247*0fca6ea1SDimitry Andric listSec->live = true; 1248*0fca6ea1SDimitry Andric addInputSection(listSec); 1249*0fca6ea1SDimitry Andric 1250*0fca6ea1SDimitry Andric std::string slotSymName = "<__objc_catlist slot for category "; 1251*0fca6ea1SDimitry Andric slotSymName += nonErasedCatBody->getName(); 1252*0fca6ea1SDimitry Andric slotSymName += ">"; 1253*0fca6ea1SDimitry Andric 1254*0fca6ea1SDimitry Andric Defined *catListSlotSym = make<Defined>( 1255*0fca6ea1SDimitry Andric newStringData(slotSymName.c_str()), /*file=*/objFile, listSec, 1256*0fca6ea1SDimitry Andric /*value=*/0, bodyData.size(), 1257*0fca6ea1SDimitry Andric /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false, 1258*0fca6ea1SDimitry Andric /*includeInSymtab=*/false, /*isReferencedDynamically=*/false, 1259*0fca6ea1SDimitry Andric /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false); 1260*0fca6ea1SDimitry Andric 1261*0fca6ea1SDimitry Andric catListSlotSym->used = true; 1262*0fca6ea1SDimitry Andric objFile->symbols.push_back(catListSlotSym); 1263*0fca6ea1SDimitry Andric 1264*0fca6ea1SDimitry Andric // Now link the category body into the newly created slot 1265*0fca6ea1SDimitry Andric createSymbolReference(catListSlotSym, nonErasedCatBody, 0, 1266*0fca6ea1SDimitry Andric infoCategoryWriter.catListInfo.relocTemplate); 1267*0fca6ea1SDimitry Andric } 1268*0fca6ea1SDimitry Andric } 1269*0fca6ea1SDimitry Andric } 1270*0fca6ea1SDimitry Andric 1271*0fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) { 1272*0fca6ea1SDimitry Andric isec->live = false; 1273*0fca6ea1SDimitry Andric for (auto &sym : isec->symbols) 1274*0fca6ea1SDimitry Andric sym->used = false; 1275*0fca6ea1SDimitry Andric } 1276*0fca6ea1SDimitry Andric 1277*0fca6ea1SDimitry Andric // This fully erases the merged categories, including their body, their names, 1278*0fca6ea1SDimitry Andric // their method/protocol/prop lists and the __objc_catlist entries that link to 1279*0fca6ea1SDimitry Andric // them. 1280*0fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseMergedCategories() { 1281*0fca6ea1SDimitry Andric // Map of InputSection to a set of offsets of the categories that were merged 1282*0fca6ea1SDimitry Andric MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets; 1283*0fca6ea1SDimitry Andric 1284*0fca6ea1SDimitry Andric for (auto &mapEntry : categoryMap) { 1285*0fca6ea1SDimitry Andric for (InfoInputCategory &catInfo : mapEntry.second) { 1286*0fca6ea1SDimitry Andric if (catInfo.wasMerged) { 1287*0fca6ea1SDimitry Andric eraseISec(catInfo.catListIsec); 1288*0fca6ea1SDimitry Andric catListToErasedOffsets[catInfo.catListIsec].insert( 1289*0fca6ea1SDimitry Andric catInfo.offCatListIsec); 1290*0fca6ea1SDimitry Andric } 1291*0fca6ea1SDimitry Andric } 1292*0fca6ea1SDimitry Andric } 1293*0fca6ea1SDimitry Andric 1294*0fca6ea1SDimitry Andric // If there were categories that we did not erase, we need to generate a new 1295*0fca6ea1SDimitry Andric // __objc_catList that contains only the un-merged categories, and get rid of 1296*0fca6ea1SDimitry Andric // the references to the ones we merged. 1297*0fca6ea1SDimitry Andric generateCatListForNonErasedCategories(catListToErasedOffsets); 1298*0fca6ea1SDimitry Andric 1299*0fca6ea1SDimitry Andric // Erase the old method lists & names of the categories that were merged 1300*0fca6ea1SDimitry Andric for (auto &mapEntry : categoryMap) { 1301*0fca6ea1SDimitry Andric for (InfoInputCategory &catInfo : mapEntry.second) { 1302*0fca6ea1SDimitry Andric if (!catInfo.wasMerged) 1303*0fca6ea1SDimitry Andric continue; 1304*0fca6ea1SDimitry Andric 1305*0fca6ea1SDimitry Andric eraseISec(catInfo.catBodyIsec); 1306*0fca6ea1SDimitry Andric 1307*0fca6ea1SDimitry Andric // We can't erase 'catLayout.nameOffset' for either Swift or ObjC 1308*0fca6ea1SDimitry Andric // categories because the name will sometimes also be used for other 1309*0fca6ea1SDimitry Andric // purposes. 1310*0fca6ea1SDimitry Andric // For Swift, see usages of 'l_.str.11.SimpleClass' in 1311*0fca6ea1SDimitry Andric // objc-category-merging-swift.s 1312*0fca6ea1SDimitry Andric // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in 1313*0fca6ea1SDimitry Andric // objc-category-merging-erase-objc-name-test.s 1314*0fca6ea1SDimitry Andric // TODO: handle the above in a smarter way 1315*0fca6ea1SDimitry Andric 1316*0fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1317*0fca6ea1SDimitry Andric catLayout.instanceMethodsOffset); 1318*0fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1319*0fca6ea1SDimitry Andric catLayout.classMethodsOffset); 1320*0fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1321*0fca6ea1SDimitry Andric catLayout.protocolsOffset); 1322*0fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1323*0fca6ea1SDimitry Andric catLayout.classPropsOffset); 1324*0fca6ea1SDimitry Andric tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec, 1325*0fca6ea1SDimitry Andric catLayout.instancePropsOffset); 1326*0fca6ea1SDimitry Andric } 1327*0fca6ea1SDimitry Andric } 1328*0fca6ea1SDimitry Andric } 1329*0fca6ea1SDimitry Andric 1330*0fca6ea1SDimitry Andric void ObjcCategoryMerger::doMerge() { 1331*0fca6ea1SDimitry Andric collectAndValidateCategoriesData(); 1332*0fca6ea1SDimitry Andric 1333*0fca6ea1SDimitry Andric for (auto &[baseClass, catInfos] : categoryMap) { 1334*0fca6ea1SDimitry Andric if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) { 1335*0fca6ea1SDimitry Andric // Merge all categories into the base class 1336*0fca6ea1SDimitry Andric mergeCategoriesIntoBaseClass(baseClassDef, catInfos); 1337*0fca6ea1SDimitry Andric } else if (catInfos.size() > 1) { 1338*0fca6ea1SDimitry Andric // Merge all categories into a new, single category 1339*0fca6ea1SDimitry Andric mergeCategoriesIntoSingleCategory(catInfos); 1340*0fca6ea1SDimitry Andric } 1341*0fca6ea1SDimitry Andric } 1342*0fca6ea1SDimitry Andric 1343*0fca6ea1SDimitry Andric // Erase all categories that were merged 1344*0fca6ea1SDimitry Andric eraseMergedCategories(); 1345*0fca6ea1SDimitry Andric } 1346*0fca6ea1SDimitry Andric 1347*0fca6ea1SDimitry Andric void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); } 1348*0fca6ea1SDimitry Andric 1349*0fca6ea1SDimitry Andric StringRef ObjcCategoryMerger::newStringData(const char *str) { 1350*0fca6ea1SDimitry Andric uint32_t len = strlen(str); 1351*0fca6ea1SDimitry Andric uint32_t bufSize = len + 1; 1352*0fca6ea1SDimitry Andric SmallVector<uint8_t> &data = newSectionData(bufSize); 1353*0fca6ea1SDimitry Andric char *strData = reinterpret_cast<char *>(data.data()); 1354*0fca6ea1SDimitry Andric // Copy the string chars and null-terminator 1355*0fca6ea1SDimitry Andric memcpy(strData, str, bufSize); 1356*0fca6ea1SDimitry Andric return StringRef(strData, len); 1357*0fca6ea1SDimitry Andric } 1358*0fca6ea1SDimitry Andric 1359*0fca6ea1SDimitry Andric SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) { 1360*0fca6ea1SDimitry Andric generatedSectionData.push_back( 1361*0fca6ea1SDimitry Andric std::make_unique<SmallVector<uint8_t>>(size, 0)); 1362*0fca6ea1SDimitry Andric return *generatedSectionData.back(); 1363*0fca6ea1SDimitry Andric } 1364*0fca6ea1SDimitry Andric 1365*0fca6ea1SDimitry Andric } // namespace 1366*0fca6ea1SDimitry Andric 1367*0fca6ea1SDimitry Andric void objc::mergeCategories() { 1368*0fca6ea1SDimitry Andric TimeTraceScope timeScope("ObjcCategoryMerger"); 1369*0fca6ea1SDimitry Andric 1370*0fca6ea1SDimitry Andric ObjcCategoryMerger merger(inputSections); 1371*0fca6ea1SDimitry Andric merger.doMerge(); 1372*0fca6ea1SDimitry Andric } 1373*0fca6ea1SDimitry Andric 1374*0fca6ea1SDimitry Andric void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); } 1375*0fca6ea1SDimitry Andric 1376*0fca6ea1SDimitry Andric ObjcCategoryMerger::SourceLanguage 1377*0fca6ea1SDimitry Andric ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) { 1378*0fca6ea1SDimitry Andric if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1379*0fca6ea1SDimitry Andric return SourceLanguage::Swift; 1380*0fca6ea1SDimitry Andric 1381*0fca6ea1SDimitry Andric // If the symbol name matches the ObjC prefix, we don't necessarely know this 1382*0fca6ea1SDimitry Andric // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift 1383*0fca6ea1SDimitry Andric // classes. Ex: 1384*0fca6ea1SDimitry Andric // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1385*0fca6ea1SDimitry Andric // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass 1386*0fca6ea1SDimitry Andric // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN 1387*0fca6ea1SDimitry Andric // 1388*0fca6ea1SDimitry Andric // So we scan for symbols with the same address and check for the Swift class 1389*0fca6ea1SDimitry Andric if (classSym->getName().starts_with(objc::symbol_names::klass)) { 1390*0fca6ea1SDimitry Andric for (auto &sym : classSym->originalIsec->symbols) 1391*0fca6ea1SDimitry Andric if (sym->value == classSym->value) 1392*0fca6ea1SDimitry Andric if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass)) 1393*0fca6ea1SDimitry Andric return SourceLanguage::Swift; 1394*0fca6ea1SDimitry Andric return SourceLanguage::ObjC; 1395*0fca6ea1SDimitry Andric } 1396*0fca6ea1SDimitry Andric 1397*0fca6ea1SDimitry Andric llvm_unreachable("Unexpected class symbol name during category merging"); 1398*0fca6ea1SDimitry Andric } 1399*0fca6ea1SDimitry Andric void ObjcCategoryMerger::mergeCategoriesIntoBaseClass( 1400*0fca6ea1SDimitry Andric const Defined *baseClass, std::vector<InfoInputCategory> &categories) { 1401*0fca6ea1SDimitry Andric assert(categories.size() >= 1 && "Expected at least one category to merge"); 1402*0fca6ea1SDimitry Andric 1403*0fca6ea1SDimitry Andric // Collect all the info from the categories 1404*0fca6ea1SDimitry Andric ClassExtensionInfo extInfo(catLayout); 1405*0fca6ea1SDimitry Andric extInfo.baseClass = baseClass; 1406*0fca6ea1SDimitry Andric extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass); 1407*0fca6ea1SDimitry Andric 1408*0fca6ea1SDimitry Andric for (auto &catInfo : categories) { 1409*0fca6ea1SDimitry Andric parseCatInfoToExtInfo(catInfo, extInfo); 1410*0fca6ea1SDimitry Andric } 1411*0fca6ea1SDimitry Andric 1412*0fca6ea1SDimitry Andric // Get metadata for the base class 1413*0fca6ea1SDimitry Andric Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true); 1414*0fca6ea1SDimitry Andric ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec()); 1415*0fca6ea1SDimitry Andric Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false); 1416*0fca6ea1SDimitry Andric ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec()); 1417*0fca6ea1SDimitry Andric 1418*0fca6ea1SDimitry Andric // Now collect the info from the base class from the various lists in the 1419*0fca6ea1SDimitry Andric // class metadata 1420*0fca6ea1SDimitry Andric 1421*0fca6ea1SDimitry Andric // Protocol lists are a special case - the same protocol list is in classRo 1422*0fca6ea1SDimitry Andric // and metaRo, so we only need to parse it once 1423*0fca6ea1SDimitry Andric parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1424*0fca6ea1SDimitry Andric extInfo.protocols, extInfo.baseClassSourceLanguage); 1425*0fca6ea1SDimitry Andric 1426*0fca6ea1SDimitry Andric // Check that the classRo and metaRo protocol lists are identical 1427*0fca6ea1SDimitry Andric assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset, 1428*0fca6ea1SDimitry Andric extInfo.baseClassSourceLanguage) == 1429*0fca6ea1SDimitry Andric parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset, 1430*0fca6ea1SDimitry Andric extInfo.baseClassSourceLanguage) && 1431*0fca6ea1SDimitry Andric "Category merger expects classRo and metaRo to have the same protocol " 1432*0fca6ea1SDimitry Andric "list"); 1433*0fca6ea1SDimitry Andric 1434*0fca6ea1SDimitry Andric parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset, 1435*0fca6ea1SDimitry Andric extInfo.classMethods); 1436*0fca6ea1SDimitry Andric parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset, 1437*0fca6ea1SDimitry Andric extInfo.instanceMethods); 1438*0fca6ea1SDimitry Andric 1439*0fca6ea1SDimitry Andric parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset, 1440*0fca6ea1SDimitry Andric extInfo.classProps); 1441*0fca6ea1SDimitry Andric parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset, 1442*0fca6ea1SDimitry Andric extInfo.instanceProps); 1443*0fca6ea1SDimitry Andric 1444*0fca6ea1SDimitry Andric // Erase the old lists - these will be generated and replaced 1445*0fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset); 1446*0fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset); 1447*0fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset); 1448*0fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset); 1449*0fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset); 1450*0fca6ea1SDimitry Andric eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset); 1451*0fca6ea1SDimitry Andric 1452*0fca6ea1SDimitry Andric // Emit the newly merged lists - first into the meta RO then into the class RO 1453*0fca6ea1SDimitry Andric // First we emit and link the protocol list into the meta RO. Then we link it 1454*0fca6ea1SDimitry Andric // in the classRo as well (they're supposed to be identical) 1455*0fca6ea1SDimitry Andric if (Defined *protoListSym = 1456*0fca6ea1SDimitry Andric emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset, 1457*0fca6ea1SDimitry Andric extInfo, extInfo.protocols)) { 1458*0fca6ea1SDimitry Andric createSymbolReference(classRo, protoListSym, 1459*0fca6ea1SDimitry Andric roClassLayout.baseProtocolsOffset, 1460*0fca6ea1SDimitry Andric infoCategoryWriter.catBodyInfo.relocTemplate); 1461*0fca6ea1SDimitry Andric } 1462*0fca6ea1SDimitry Andric 1463*0fca6ea1SDimitry Andric emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo, 1464*0fca6ea1SDimitry Andric extInfo.classMethods); 1465*0fca6ea1SDimitry Andric emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo, 1466*0fca6ea1SDimitry Andric extInfo.instanceMethods); 1467*0fca6ea1SDimitry Andric 1468*0fca6ea1SDimitry Andric emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo, 1469*0fca6ea1SDimitry Andric extInfo.classProps); 1470*0fca6ea1SDimitry Andric 1471*0fca6ea1SDimitry Andric emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo, 1472*0fca6ea1SDimitry Andric extInfo.instanceProps); 1473*0fca6ea1SDimitry Andric 1474*0fca6ea1SDimitry Andric // Mark all the categories as merged - this will be used to erase them later 1475*0fca6ea1SDimitry Andric for (auto &catInfo : categories) 1476*0fca6ea1SDimitry Andric catInfo.wasMerged = true; 1477*0fca6ea1SDimitry Andric } 1478*0fca6ea1SDimitry Andric 1479*0fca6ea1SDimitry Andric // Erase the symbol at a given offset in an InputSection 1480*0fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec, 1481*0fca6ea1SDimitry Andric uint32_t offset) { 1482*0fca6ea1SDimitry Andric Defined *sym = tryGetDefinedAtIsecOffset(isec, offset); 1483*0fca6ea1SDimitry Andric if (!sym) 1484*0fca6ea1SDimitry Andric return; 1485*0fca6ea1SDimitry Andric 1486*0fca6ea1SDimitry Andric // Remove the symbol from isec->symbols 1487*0fca6ea1SDimitry Andric assert(isa<Defined>(sym) && "Can only erase a Defined"); 1488*0fca6ea1SDimitry Andric llvm::erase(isec->symbols, sym); 1489*0fca6ea1SDimitry Andric 1490*0fca6ea1SDimitry Andric // Remove the relocs that refer to this symbol 1491*0fca6ea1SDimitry Andric auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; }; 1492*0fca6ea1SDimitry Andric llvm::erase_if(isec->relocs, removeAtOff); 1493*0fca6ea1SDimitry Andric 1494*0fca6ea1SDimitry Andric // Now, if the symbol fully occupies a ConcatInputSection, we can also erase 1495*0fca6ea1SDimitry Andric // the whole ConcatInputSection 1496*0fca6ea1SDimitry Andric if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec())) 1497*0fca6ea1SDimitry Andric if (cisec->data.size() == sym->size) 1498*0fca6ea1SDimitry Andric eraseISec(cisec); 1499*0fca6ea1SDimitry Andric } 1500