xref: /freebsd/contrib/llvm-project/lld/MachO/ObjC.cpp (revision 6c4b055cfb6bf549e9145dde6454cc6b178c35e4)
1e8d8bef9SDimitry Andric //===- ObjC.cpp -----------------------------------------------------------===//
2e8d8bef9SDimitry Andric //
3e8d8bef9SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e8d8bef9SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5e8d8bef9SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e8d8bef9SDimitry Andric //
7e8d8bef9SDimitry Andric //===----------------------------------------------------------------------===//
8e8d8bef9SDimitry Andric 
9e8d8bef9SDimitry Andric #include "ObjC.h"
100fca6ea1SDimitry Andric #include "ConcatOutputSection.h"
11e8d8bef9SDimitry Andric #include "InputFiles.h"
12fe6060f1SDimitry Andric #include "InputSection.h"
1306c3fb27SDimitry Andric #include "Layout.h"
14e8d8bef9SDimitry Andric #include "OutputSegment.h"
150fca6ea1SDimitry Andric #include "SyntheticSections.h"
16fe6060f1SDimitry Andric #include "Target.h"
17e8d8bef9SDimitry Andric 
18bdd1243dSDimitry Andric #include "lld/Common/ErrorHandler.h"
1906c3fb27SDimitry Andric #include "llvm/ADT/DenseMap.h"
20e8d8bef9SDimitry Andric #include "llvm/BinaryFormat/MachO.h"
21349cc55cSDimitry Andric #include "llvm/Bitcode/BitcodeReader.h"
220fca6ea1SDimitry Andric #include "llvm/Support/TimeProfiler.h"
23e8d8bef9SDimitry Andric 
24e8d8bef9SDimitry Andric using namespace llvm;
25e8d8bef9SDimitry Andric using namespace llvm::MachO;
26e8d8bef9SDimitry Andric using namespace lld;
27fe6060f1SDimitry Andric using namespace lld::macho;
28e8d8bef9SDimitry Andric 
objectHasObjCSection(MemoryBufferRef mb)29349cc55cSDimitry Andric template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30349cc55cSDimitry Andric   using SectionHeader = typename LP::section;
31fe6060f1SDimitry Andric 
32fe6060f1SDimitry Andric   auto *hdr =
33fe6060f1SDimitry Andric       reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34fe6060f1SDimitry Andric   if (hdr->magic != LP::magic)
35fe6060f1SDimitry Andric     return false;
36fe6060f1SDimitry Andric 
37fe6060f1SDimitry Andric   if (const auto *c =
38fe6060f1SDimitry Andric           findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39349cc55cSDimitry Andric     auto sectionHeaders = ArrayRef<SectionHeader>{
40349cc55cSDimitry Andric         reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41349cc55cSDimitry Andric     for (const SectionHeader &secHead : sectionHeaders) {
42349cc55cSDimitry Andric       StringRef sectname(secHead.sectname,
43349cc55cSDimitry Andric                          strnlen(secHead.sectname, sizeof(secHead.sectname)));
44349cc55cSDimitry Andric       StringRef segname(secHead.segname,
45349cc55cSDimitry Andric                         strnlen(secHead.segname, sizeof(secHead.segname)));
46fe6060f1SDimitry Andric       if ((segname == segment_names::data &&
47fe6060f1SDimitry Andric            sectname == section_names::objcCatList) ||
48fe6060f1SDimitry Andric           (segname == segment_names::text &&
4906c3fb27SDimitry Andric            sectname.starts_with(section_names::swift))) {
50e8d8bef9SDimitry Andric         return true;
51e8d8bef9SDimitry Andric       }
52e8d8bef9SDimitry Andric     }
53e8d8bef9SDimitry Andric   }
54e8d8bef9SDimitry Andric   return false;
55e8d8bef9SDimitry Andric }
56fe6060f1SDimitry Andric 
objectHasObjCSection(MemoryBufferRef mb)57349cc55cSDimitry Andric static bool objectHasObjCSection(MemoryBufferRef mb) {
58fe6060f1SDimitry Andric   if (target->wordSize == 8)
59349cc55cSDimitry Andric     return ::objectHasObjCSection<LP64>(mb);
60fe6060f1SDimitry Andric   else
61349cc55cSDimitry Andric     return ::objectHasObjCSection<ILP32>(mb);
62349cc55cSDimitry Andric }
63349cc55cSDimitry Andric 
hasObjCSection(MemoryBufferRef mb)64349cc55cSDimitry Andric bool macho::hasObjCSection(MemoryBufferRef mb) {
65349cc55cSDimitry Andric   switch (identify_magic(mb.getBuffer())) {
66349cc55cSDimitry Andric   case file_magic::macho_object:
67349cc55cSDimitry Andric     return objectHasObjCSection(mb);
68349cc55cSDimitry Andric   case file_magic::bitcode:
69349cc55cSDimitry Andric     return check(isBitcodeContainingObjCCategory(mb));
70349cc55cSDimitry Andric   default:
71349cc55cSDimitry Andric     return false;
72349cc55cSDimitry Andric   }
73fe6060f1SDimitry Andric }
7406c3fb27SDimitry Andric 
7506c3fb27SDimitry Andric namespace {
7606c3fb27SDimitry Andric 
7706c3fb27SDimitry Andric #define FOR_EACH_CATEGORY_FIELD(DO)                                            \
7806c3fb27SDimitry Andric   DO(Ptr, name)                                                                \
7906c3fb27SDimitry Andric   DO(Ptr, klass)                                                               \
8006c3fb27SDimitry Andric   DO(Ptr, instanceMethods)                                                     \
8106c3fb27SDimitry Andric   DO(Ptr, classMethods)                                                        \
8206c3fb27SDimitry Andric   DO(Ptr, protocols)                                                           \
8306c3fb27SDimitry Andric   DO(Ptr, instanceProps)                                                       \
840fca6ea1SDimitry Andric   DO(Ptr, classProps)                                                          \
850fca6ea1SDimitry Andric   DO(uint32_t, size)
8606c3fb27SDimitry Andric 
8706c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
8806c3fb27SDimitry Andric 
8906c3fb27SDimitry Andric #undef FOR_EACH_CATEGORY_FIELD
9006c3fb27SDimitry Andric 
9106c3fb27SDimitry Andric #define FOR_EACH_CLASS_FIELD(DO)                                               \
9206c3fb27SDimitry Andric   DO(Ptr, metaClass)                                                           \
9306c3fb27SDimitry Andric   DO(Ptr, superClass)                                                          \
9406c3fb27SDimitry Andric   DO(Ptr, methodCache)                                                         \
9506c3fb27SDimitry Andric   DO(Ptr, vtable)                                                              \
9606c3fb27SDimitry Andric   DO(Ptr, roData)
9706c3fb27SDimitry Andric 
9806c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
9906c3fb27SDimitry Andric 
10006c3fb27SDimitry Andric #undef FOR_EACH_CLASS_FIELD
10106c3fb27SDimitry Andric 
10206c3fb27SDimitry Andric #define FOR_EACH_RO_CLASS_FIELD(DO)                                            \
10306c3fb27SDimitry Andric   DO(uint32_t, flags)                                                          \
10406c3fb27SDimitry Andric   DO(uint32_t, instanceStart)                                                  \
10506c3fb27SDimitry Andric   DO(Ptr, instanceSize)                                                        \
10606c3fb27SDimitry Andric   DO(Ptr, ivarLayout)                                                          \
10706c3fb27SDimitry Andric   DO(Ptr, name)                                                                \
10806c3fb27SDimitry Andric   DO(Ptr, baseMethods)                                                         \
10906c3fb27SDimitry Andric   DO(Ptr, baseProtocols)                                                       \
11006c3fb27SDimitry Andric   DO(Ptr, ivars)                                                               \
11106c3fb27SDimitry Andric   DO(Ptr, weakIvarLayout)                                                      \
11206c3fb27SDimitry Andric   DO(Ptr, baseProperties)
11306c3fb27SDimitry Andric 
11406c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
11506c3fb27SDimitry Andric 
11606c3fb27SDimitry Andric #undef FOR_EACH_RO_CLASS_FIELD
11706c3fb27SDimitry Andric 
11806c3fb27SDimitry Andric #define FOR_EACH_LIST_HEADER(DO)                                               \
1190fca6ea1SDimitry Andric   DO(uint32_t, structSize)                                                     \
1200fca6ea1SDimitry Andric   DO(uint32_t, structCount)
12106c3fb27SDimitry Andric 
12206c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
12306c3fb27SDimitry Andric 
12406c3fb27SDimitry Andric #undef FOR_EACH_LIST_HEADER
12506c3fb27SDimitry Andric 
1260fca6ea1SDimitry Andric #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
1270fca6ea1SDimitry Andric 
1280fca6ea1SDimitry Andric CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
1290fca6ea1SDimitry Andric 
1300fca6ea1SDimitry Andric #undef FOR_EACH_PROTOCOL_LIST_HEADER
1310fca6ea1SDimitry Andric 
13206c3fb27SDimitry Andric #define FOR_EACH_METHOD(DO)                                                    \
13306c3fb27SDimitry Andric   DO(Ptr, name)                                                                \
13406c3fb27SDimitry Andric   DO(Ptr, type)                                                                \
13506c3fb27SDimitry Andric   DO(Ptr, impl)
13606c3fb27SDimitry Andric 
13706c3fb27SDimitry Andric CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
13806c3fb27SDimitry Andric 
13906c3fb27SDimitry Andric #undef FOR_EACH_METHOD
14006c3fb27SDimitry Andric 
14106c3fb27SDimitry Andric enum MethodContainerKind {
14206c3fb27SDimitry Andric   MCK_Class,
14306c3fb27SDimitry Andric   MCK_Category,
14406c3fb27SDimitry Andric };
14506c3fb27SDimitry Andric 
14606c3fb27SDimitry Andric struct MethodContainer {
14706c3fb27SDimitry Andric   MethodContainerKind kind;
14806c3fb27SDimitry Andric   const ConcatInputSection *isec;
14906c3fb27SDimitry Andric };
15006c3fb27SDimitry Andric 
15106c3fb27SDimitry Andric enum MethodKind {
15206c3fb27SDimitry Andric   MK_Instance,
15306c3fb27SDimitry Andric   MK_Static,
15406c3fb27SDimitry Andric };
15506c3fb27SDimitry Andric 
15606c3fb27SDimitry Andric struct ObjcClass {
15706c3fb27SDimitry Andric   DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
15806c3fb27SDimitry Andric   DenseMap<CachedHashStringRef, MethodContainer> classMethods;
15906c3fb27SDimitry Andric };
16006c3fb27SDimitry Andric 
16106c3fb27SDimitry Andric } // namespace
16206c3fb27SDimitry Andric 
16306c3fb27SDimitry Andric class ObjcCategoryChecker {
16406c3fb27SDimitry Andric public:
16506c3fb27SDimitry Andric   ObjcCategoryChecker();
16606c3fb27SDimitry Andric   void parseCategory(const ConcatInputSection *catListIsec);
16706c3fb27SDimitry Andric 
16806c3fb27SDimitry Andric private:
16906c3fb27SDimitry Andric   void parseClass(const Defined *classSym);
17006c3fb27SDimitry Andric   void parseMethods(const ConcatInputSection *methodsIsec,
17106c3fb27SDimitry Andric                     const Symbol *methodContainer,
17206c3fb27SDimitry Andric                     const ConcatInputSection *containerIsec,
17306c3fb27SDimitry Andric                     MethodContainerKind, MethodKind);
17406c3fb27SDimitry Andric 
17506c3fb27SDimitry Andric   CategoryLayout catLayout;
17606c3fb27SDimitry Andric   ClassLayout classLayout;
17706c3fb27SDimitry Andric   ROClassLayout roClassLayout;
17806c3fb27SDimitry Andric   ListHeaderLayout listHeaderLayout;
17906c3fb27SDimitry Andric   MethodLayout methodLayout;
18006c3fb27SDimitry Andric 
18106c3fb27SDimitry Andric   DenseMap<const Symbol *, ObjcClass> classMap;
18206c3fb27SDimitry Andric };
18306c3fb27SDimitry Andric 
ObjcCategoryChecker()18406c3fb27SDimitry Andric ObjcCategoryChecker::ObjcCategoryChecker()
18506c3fb27SDimitry Andric     : catLayout(target->wordSize), classLayout(target->wordSize),
18606c3fb27SDimitry Andric       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
18706c3fb27SDimitry Andric       methodLayout(target->wordSize) {}
18806c3fb27SDimitry Andric 
1890fca6ea1SDimitry Andric // \p r must point to an offset within a CStringInputSection or a
1900fca6ea1SDimitry Andric // ConcatInputSection
getReferentString(const Reloc & r)19106c3fb27SDimitry Andric static StringRef getReferentString(const Reloc &r) {
19206c3fb27SDimitry Andric   if (auto *isec = r.referent.dyn_cast<InputSection *>())
19306c3fb27SDimitry Andric     return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
1940fca6ea1SDimitry Andric 
19506c3fb27SDimitry Andric   auto *sym = cast<Defined>(r.referent.get<Symbol *>());
1960fca6ea1SDimitry Andric   auto *symIsec = sym->isec();
1970fca6ea1SDimitry Andric   auto symOffset = sym->value + r.addend;
1980fca6ea1SDimitry Andric 
1990fca6ea1SDimitry Andric   if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
2000fca6ea1SDimitry Andric     return s->getStringRefAtOffset(symOffset);
2010fca6ea1SDimitry Andric 
2020fca6ea1SDimitry Andric   if (isa<ConcatInputSection>(symIsec)) {
2030fca6ea1SDimitry Andric     auto strData = symIsec->data.slice(symOffset);
2040fca6ea1SDimitry Andric     const char *pszData = reinterpret_cast<const char *>(strData.data());
2050fca6ea1SDimitry Andric     return StringRef(pszData, strnlen(pszData, strData.size()));
2060fca6ea1SDimitry Andric   }
2070fca6ea1SDimitry Andric 
2080fca6ea1SDimitry Andric   llvm_unreachable("unknown reference section in getReferentString");
20906c3fb27SDimitry Andric }
21006c3fb27SDimitry Andric 
parseMethods(const ConcatInputSection * methodsIsec,const Symbol * methodContainerSym,const ConcatInputSection * containerIsec,MethodContainerKind mcKind,MethodKind mKind)21106c3fb27SDimitry Andric void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
21206c3fb27SDimitry Andric                                        const Symbol *methodContainerSym,
21306c3fb27SDimitry Andric                                        const ConcatInputSection *containerIsec,
21406c3fb27SDimitry Andric                                        MethodContainerKind mcKind,
21506c3fb27SDimitry Andric                                        MethodKind mKind) {
21606c3fb27SDimitry Andric   ObjcClass &klass = classMap[methodContainerSym];
21706c3fb27SDimitry Andric   for (const Reloc &r : methodsIsec->relocs) {
21806c3fb27SDimitry Andric     if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
21906c3fb27SDimitry Andric         methodLayout.nameOffset)
22006c3fb27SDimitry Andric       continue;
22106c3fb27SDimitry Andric 
22206c3fb27SDimitry Andric     CachedHashStringRef methodName(getReferentString(r));
22306c3fb27SDimitry Andric     // +load methods are special: all implementations are called by the runtime
22406c3fb27SDimitry Andric     // even if they are part of the same class. Thus there is no need to check
22506c3fb27SDimitry Andric     // for duplicates.
22606c3fb27SDimitry Andric     // NOTE: Instead of specifically checking for this method name, ld64 simply
22706c3fb27SDimitry Andric     // checks whether a class / category is present in __objc_nlclslist /
22806c3fb27SDimitry Andric     // __objc_nlcatlist respectively. This will be the case if the class /
22906c3fb27SDimitry Andric     // category has a +load method. It skips optimizing the categories if there
23006c3fb27SDimitry Andric     // are multiple +load methods. Since it does dupe checking as part of the
23106c3fb27SDimitry Andric     // optimization process, this avoids spurious dupe messages around +load,
23206c3fb27SDimitry Andric     // but it also means that legit dupe issues for other methods are ignored.
23306c3fb27SDimitry Andric     if (mKind == MK_Static && methodName.val() == "load")
23406c3fb27SDimitry Andric       continue;
23506c3fb27SDimitry Andric 
23606c3fb27SDimitry Andric     auto &methodMap =
23706c3fb27SDimitry Andric         mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
23806c3fb27SDimitry Andric     if (methodMap
23906c3fb27SDimitry Andric             .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
24006c3fb27SDimitry Andric             .second)
24106c3fb27SDimitry Andric       continue;
24206c3fb27SDimitry Andric 
24306c3fb27SDimitry Andric     // We have a duplicate; generate a warning message.
24406c3fb27SDimitry Andric     const auto &mc = methodMap.lookup(methodName);
24506c3fb27SDimitry Andric     const Reloc *nameReloc = nullptr;
24606c3fb27SDimitry Andric     if (mc.kind == MCK_Category) {
24706c3fb27SDimitry Andric       nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
24806c3fb27SDimitry Andric     } else {
24906c3fb27SDimitry Andric       assert(mc.kind == MCK_Class);
25006c3fb27SDimitry Andric       const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
25106c3fb27SDimitry Andric                          ->getReferentInputSection();
25206c3fb27SDimitry Andric       nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
25306c3fb27SDimitry Andric     }
25406c3fb27SDimitry Andric     StringRef containerName = getReferentString(*nameReloc);
25506c3fb27SDimitry Andric     StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
25606c3fb27SDimitry Andric 
25706c3fb27SDimitry Andric     // We should only ever encounter collisions when parsing category methods
25806c3fb27SDimitry Andric     // (since the Class struct is parsed before any of its categories).
25906c3fb27SDimitry Andric     assert(mcKind == MCK_Category);
26006c3fb27SDimitry Andric     StringRef newCatName =
26106c3fb27SDimitry Andric         getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset));
26206c3fb27SDimitry Andric 
2630fca6ea1SDimitry Andric     auto formatObjAndSrcFileName = [](const InputSection *section) {
2640fca6ea1SDimitry Andric       lld::macho::InputFile *inputFile = section->getFile();
2650fca6ea1SDimitry Andric       std::string result = toString(inputFile);
2660fca6ea1SDimitry Andric 
2670fca6ea1SDimitry Andric       auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
2680fca6ea1SDimitry Andric       if (objFile && objFile->compileUnit)
2690fca6ea1SDimitry Andric         result += " (" + objFile->sourceFile() + ")";
2700fca6ea1SDimitry Andric 
2710fca6ea1SDimitry Andric       return result;
2720fca6ea1SDimitry Andric     };
2730fca6ea1SDimitry Andric 
27406c3fb27SDimitry Andric     StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
27506c3fb27SDimitry Andric     warn("method '" + methPrefix + methodName.val() +
27606c3fb27SDimitry Andric          "' has conflicting definitions:\n>>> defined in category " +
2770fca6ea1SDimitry Andric          newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
27806c3fb27SDimitry Andric          "\n>>> defined in " + containerType + " " + containerName + " from " +
2790fca6ea1SDimitry Andric          formatObjAndSrcFileName(mc.isec));
28006c3fb27SDimitry Andric   }
28106c3fb27SDimitry Andric }
28206c3fb27SDimitry Andric 
parseCategory(const ConcatInputSection * catIsec)28306c3fb27SDimitry Andric void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
28406c3fb27SDimitry Andric   auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
28506c3fb27SDimitry Andric   if (!classReloc)
28606c3fb27SDimitry Andric     return;
28706c3fb27SDimitry Andric 
28806c3fb27SDimitry Andric   auto *classSym = classReloc->referent.get<Symbol *>();
28906c3fb27SDimitry Andric   if (auto *d = dyn_cast<Defined>(classSym))
29006c3fb27SDimitry Andric     if (!classMap.count(d))
29106c3fb27SDimitry Andric       parseClass(d);
29206c3fb27SDimitry Andric 
29306c3fb27SDimitry Andric   if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
29406c3fb27SDimitry Andric     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
29506c3fb27SDimitry Andric                  classSym, catIsec, MCK_Category, MK_Static);
29606c3fb27SDimitry Andric   }
29706c3fb27SDimitry Andric 
29806c3fb27SDimitry Andric   if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
29906c3fb27SDimitry Andric     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
30006c3fb27SDimitry Andric                  classSym, catIsec, MCK_Category, MK_Instance);
30106c3fb27SDimitry Andric   }
30206c3fb27SDimitry Andric }
30306c3fb27SDimitry Andric 
parseClass(const Defined * classSym)30406c3fb27SDimitry Andric void ObjcCategoryChecker::parseClass(const Defined *classSym) {
30506c3fb27SDimitry Andric   // Given a Class struct, get its corresponding Methods struct
30606c3fb27SDimitry Andric   auto getMethodsIsec =
30706c3fb27SDimitry Andric       [&](const InputSection *classIsec) -> ConcatInputSection * {
30806c3fb27SDimitry Andric     if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
30906c3fb27SDimitry Andric       if (const auto *roIsec =
31006c3fb27SDimitry Andric               cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
31106c3fb27SDimitry Andric         if (const auto *r =
31206c3fb27SDimitry Andric                 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
31306c3fb27SDimitry Andric           if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
31406c3fb27SDimitry Andric                   r->getReferentInputSection()))
31506c3fb27SDimitry Andric             return methodsIsec;
31606c3fb27SDimitry Andric         }
31706c3fb27SDimitry Andric       }
31806c3fb27SDimitry Andric     }
31906c3fb27SDimitry Andric     return nullptr;
32006c3fb27SDimitry Andric   };
32106c3fb27SDimitry Andric 
3220fca6ea1SDimitry Andric   const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
32306c3fb27SDimitry Andric 
32406c3fb27SDimitry Andric   // Parse instance methods.
32506c3fb27SDimitry Andric   if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
32606c3fb27SDimitry Andric     parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
32706c3fb27SDimitry Andric                  MK_Instance);
32806c3fb27SDimitry Andric 
32906c3fb27SDimitry Andric   // Class methods are contained in the metaclass.
3300fca6ea1SDimitry Andric   if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
33106c3fb27SDimitry Andric     if (const auto *classMethodsIsec = getMethodsIsec(
33206c3fb27SDimitry Andric             cast<ConcatInputSection>(r->getReferentInputSection())))
33306c3fb27SDimitry Andric       parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
33406c3fb27SDimitry Andric }
33506c3fb27SDimitry Andric 
checkCategories()33606c3fb27SDimitry Andric void objc::checkCategories() {
3370fca6ea1SDimitry Andric   TimeTraceScope timeScope("ObjcCategoryChecker");
3380fca6ea1SDimitry Andric 
33906c3fb27SDimitry Andric   ObjcCategoryChecker checker;
34006c3fb27SDimitry Andric   for (const InputSection *isec : inputSections) {
34106c3fb27SDimitry Andric     if (isec->getName() == section_names::objcCatList)
34206c3fb27SDimitry Andric       for (const Reloc &r : isec->relocs) {
34306c3fb27SDimitry Andric         auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
34406c3fb27SDimitry Andric         checker.parseCategory(catIsec);
34506c3fb27SDimitry Andric       }
34606c3fb27SDimitry Andric   }
34706c3fb27SDimitry Andric }
3480fca6ea1SDimitry Andric 
3490fca6ea1SDimitry Andric namespace {
3500fca6ea1SDimitry Andric 
3510fca6ea1SDimitry Andric class ObjcCategoryMerger {
3520fca6ea1SDimitry Andric   // In which language was a particular construct originally defined
3530fca6ea1SDimitry Andric   enum SourceLanguage { Unknown, ObjC, Swift };
3540fca6ea1SDimitry Andric 
3550fca6ea1SDimitry Andric   // Information about an input category
3560fca6ea1SDimitry Andric   struct InfoInputCategory {
3570fca6ea1SDimitry Andric     ConcatInputSection *catListIsec;
3580fca6ea1SDimitry Andric     ConcatInputSection *catBodyIsec;
3590fca6ea1SDimitry Andric     uint32_t offCatListIsec = 0;
3600fca6ea1SDimitry Andric     SourceLanguage sourceLanguage = SourceLanguage::Unknown;
3610fca6ea1SDimitry Andric 
3620fca6ea1SDimitry Andric     bool wasMerged = false;
3630fca6ea1SDimitry Andric   };
3640fca6ea1SDimitry Andric 
3650fca6ea1SDimitry Andric   // To write new (merged) categories or classes, we will try make limited
3660fca6ea1SDimitry Andric   // assumptions about the alignment and the sections the various class/category
3670fca6ea1SDimitry Andric   // info are stored in and . So we'll just reuse the same sections and
3680fca6ea1SDimitry Andric   // alignment as already used in existing (input) categories. To do this we
3690fca6ea1SDimitry Andric   // have InfoCategoryWriter which contains the various sections that the
3700fca6ea1SDimitry Andric   // generated categories will be written to.
3710fca6ea1SDimitry Andric   struct InfoWriteSection {
3720fca6ea1SDimitry Andric     bool valid = false; // Data has been successfully collected from input
3730fca6ea1SDimitry Andric     uint32_t align = 0;
3740fca6ea1SDimitry Andric     Section *inputSection;
3750fca6ea1SDimitry Andric     Reloc relocTemplate;
3760fca6ea1SDimitry Andric     OutputSection *outputSection;
3770fca6ea1SDimitry Andric   };
3780fca6ea1SDimitry Andric 
3790fca6ea1SDimitry Andric   struct InfoCategoryWriter {
3800fca6ea1SDimitry Andric     InfoWriteSection catListInfo;
3810fca6ea1SDimitry Andric     InfoWriteSection catBodyInfo;
3820fca6ea1SDimitry Andric     InfoWriteSection catNameInfo;
3830fca6ea1SDimitry Andric     InfoWriteSection catPtrListInfo;
3840fca6ea1SDimitry Andric   };
3850fca6ea1SDimitry Andric 
3860fca6ea1SDimitry Andric   // Information about a pointer list in the original categories or class(method
3870fca6ea1SDimitry Andric   // lists, protocol lists, etc)
3880fca6ea1SDimitry Andric   struct PointerListInfo {
3890fca6ea1SDimitry Andric     PointerListInfo() = default;
3900fca6ea1SDimitry Andric     PointerListInfo(const PointerListInfo &) = default;
PointerListInfo__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo3910fca6ea1SDimitry Andric     PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
3920fca6ea1SDimitry Andric         : categoryPrefix(_categoryPrefix),
3930fca6ea1SDimitry Andric           pointersPerStruct(_pointersPerStruct) {}
3940fca6ea1SDimitry Andric 
operator ==__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo3950fca6ea1SDimitry Andric     inline bool operator==(const PointerListInfo &cmp) const {
3960fca6ea1SDimitry Andric       return pointersPerStruct == cmp.pointersPerStruct &&
3970fca6ea1SDimitry Andric              structSize == cmp.structSize && structCount == cmp.structCount &&
3980fca6ea1SDimitry Andric              allPtrs == cmp.allPtrs;
3990fca6ea1SDimitry Andric     }
4000fca6ea1SDimitry Andric 
4010fca6ea1SDimitry Andric     const char *categoryPrefix;
4020fca6ea1SDimitry Andric 
4030fca6ea1SDimitry Andric     uint32_t pointersPerStruct = 0;
4040fca6ea1SDimitry Andric 
4050fca6ea1SDimitry Andric     uint32_t structSize = 0;
4060fca6ea1SDimitry Andric     uint32_t structCount = 0;
4070fca6ea1SDimitry Andric 
4080fca6ea1SDimitry Andric     std::vector<Symbol *> allPtrs;
4090fca6ea1SDimitry Andric   };
4100fca6ea1SDimitry Andric 
4110fca6ea1SDimitry Andric   // Full information describing an ObjC class . This will include all the
4120fca6ea1SDimitry Andric   // additional methods, protocols, and properties that are contained in the
4130fca6ea1SDimitry Andric   // class and all the categories that extend a particular class.
4140fca6ea1SDimitry Andric   struct ClassExtensionInfo {
ClassExtensionInfo__anonff1b1f600411::ObjcCategoryMerger::ClassExtensionInfo4150fca6ea1SDimitry Andric     ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
4160fca6ea1SDimitry Andric 
4170fca6ea1SDimitry Andric     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
4180fca6ea1SDimitry Andric     std::string mergedContainerName;
4190fca6ea1SDimitry Andric     std::string baseClassName;
4200fca6ea1SDimitry Andric     const Symbol *baseClass = nullptr;
4210fca6ea1SDimitry Andric     SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
4220fca6ea1SDimitry Andric 
4230fca6ea1SDimitry Andric     CategoryLayout &catLayout;
4240fca6ea1SDimitry Andric 
4250fca6ea1SDimitry Andric     // In case we generate new data, mark the new data as belonging to this file
4260fca6ea1SDimitry Andric     ObjFile *objFileForMergeData = nullptr;
4270fca6ea1SDimitry Andric 
4280fca6ea1SDimitry Andric     PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
4290fca6ea1SDimitry Andric                                        /*pointersPerStruct=*/3};
4300fca6ea1SDimitry Andric     PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
4310fca6ea1SDimitry Andric                                     /*pointersPerStruct=*/3};
4320fca6ea1SDimitry Andric     PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
4330fca6ea1SDimitry Andric                                  /*pointersPerStruct=*/0};
4340fca6ea1SDimitry Andric     PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
4350fca6ea1SDimitry Andric                                      /*pointersPerStruct=*/2};
4360fca6ea1SDimitry Andric     PointerListInfo classProps = {objc::symbol_names::klassPropList,
4370fca6ea1SDimitry Andric                                   /*pointersPerStruct=*/2};
4380fca6ea1SDimitry Andric   };
4390fca6ea1SDimitry Andric 
4400fca6ea1SDimitry Andric public:
4410fca6ea1SDimitry Andric   ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
4420fca6ea1SDimitry Andric   void doMerge();
4430fca6ea1SDimitry Andric   static void doCleanup();
4440fca6ea1SDimitry Andric 
4450fca6ea1SDimitry Andric private:
4460fca6ea1SDimitry Andric   DenseSet<const Symbol *> collectNlCategories();
4470fca6ea1SDimitry Andric   void collectAndValidateCategoriesData();
4480fca6ea1SDimitry Andric   void
4490fca6ea1SDimitry Andric   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
4500fca6ea1SDimitry Andric 
4510fca6ea1SDimitry Andric   void eraseISec(ConcatInputSection *isec);
4520fca6ea1SDimitry Andric   void eraseMergedCategories();
4530fca6ea1SDimitry Andric 
4540fca6ea1SDimitry Andric   void generateCatListForNonErasedCategories(
4550fca6ea1SDimitry Andric       MapVector<ConcatInputSection *, std::set<uint64_t>>
4560fca6ea1SDimitry Andric           catListToErasedOffsets);
4570fca6ea1SDimitry Andric   void collectSectionWriteInfoFromIsec(const InputSection *isec,
4580fca6ea1SDimitry Andric                                        InfoWriteSection &catWriteInfo);
4590fca6ea1SDimitry Andric   void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
4600fca6ea1SDimitry Andric   void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
4610fca6ea1SDimitry Andric                              ClassExtensionInfo &extInfo);
4620fca6ea1SDimitry Andric 
4630fca6ea1SDimitry Andric   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
4640fca6ea1SDimitry Andric                              PointerListInfo &ptrList,
4650fca6ea1SDimitry Andric                              SourceLanguage sourceLang);
4660fca6ea1SDimitry Andric 
4670fca6ea1SDimitry Andric   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
4680fca6ea1SDimitry Andric                                         uint32_t secOffset,
4690fca6ea1SDimitry Andric                                         SourceLanguage sourceLang);
4700fca6ea1SDimitry Andric 
4710fca6ea1SDimitry Andric   void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
4720fca6ea1SDimitry Andric                             PointerListInfo &ptrList);
4730fca6ea1SDimitry Andric 
4740fca6ea1SDimitry Andric   void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
4750fca6ea1SDimitry Andric                               const ClassExtensionInfo &extInfo,
4760fca6ea1SDimitry Andric                               const PointerListInfo &ptrList);
4770fca6ea1SDimitry Andric 
4780fca6ea1SDimitry Andric   Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
4790fca6ea1SDimitry Andric                                    const ClassExtensionInfo &extInfo,
4800fca6ea1SDimitry Andric                                    const PointerListInfo &ptrList);
4810fca6ea1SDimitry Andric 
4820fca6ea1SDimitry Andric   Defined *emitCategory(const ClassExtensionInfo &extInfo);
4830fca6ea1SDimitry Andric   Defined *emitCatListEntrySec(const std::string &forCategoryName,
4840fca6ea1SDimitry Andric                                const std::string &forBaseClassName,
4850fca6ea1SDimitry Andric                                ObjFile *objFile);
4860fca6ea1SDimitry Andric   Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
4870fca6ea1SDimitry Andric                             const Symbol *baseClassSym,
4880fca6ea1SDimitry Andric                             const std::string &baseClassName, ObjFile *objFile);
4890fca6ea1SDimitry Andric   Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
4900fca6ea1SDimitry Andric   void createSymbolReference(Defined *refFrom, const Symbol *refTo,
4910fca6ea1SDimitry Andric                              uint32_t offset, const Reloc &relocTemplate);
4920fca6ea1SDimitry Andric   Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
4930fca6ea1SDimitry Andric   Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
4940fca6ea1SDimitry Andric                                    uint32_t offset);
4950fca6ea1SDimitry Andric   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
4960fca6ea1SDimitry Andric                                      uint32_t offset);
4970fca6ea1SDimitry Andric   Defined *getClassRo(const Defined *classSym, bool getMetaRo);
4980fca6ea1SDimitry Andric   SourceLanguage getClassSymSourceLang(const Defined *classSym);
4990fca6ea1SDimitry Andric   void mergeCategoriesIntoBaseClass(const Defined *baseClass,
5000fca6ea1SDimitry Andric                                     std::vector<InfoInputCategory> &categories);
5010fca6ea1SDimitry Andric   void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
5020fca6ea1SDimitry Andric   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
5030fca6ea1SDimitry Andric                                    uint32_t offset);
5040fca6ea1SDimitry Andric 
5050fca6ea1SDimitry Andric   // Allocate a null-terminated StringRef backed by generatedSectionData
5060fca6ea1SDimitry Andric   StringRef newStringData(const char *str);
5070fca6ea1SDimitry Andric   // Allocate section data, backed by generatedSectionData
5080fca6ea1SDimitry Andric   SmallVector<uint8_t> &newSectionData(uint32_t size);
5090fca6ea1SDimitry Andric 
5100fca6ea1SDimitry Andric   CategoryLayout catLayout;
5110fca6ea1SDimitry Andric   ClassLayout classLayout;
5120fca6ea1SDimitry Andric   ROClassLayout roClassLayout;
5130fca6ea1SDimitry Andric   ListHeaderLayout listHeaderLayout;
5140fca6ea1SDimitry Andric   MethodLayout methodLayout;
5150fca6ea1SDimitry Andric   ProtocolListHeaderLayout protocolListHeaderLayout;
5160fca6ea1SDimitry Andric 
5170fca6ea1SDimitry Andric   InfoCategoryWriter infoCategoryWriter;
5180fca6ea1SDimitry Andric   std::vector<ConcatInputSection *> &allInputSections;
5190fca6ea1SDimitry Andric   // Map of base class Symbol to list of InfoInputCategory's for it
5200fca6ea1SDimitry Andric   MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
5210fca6ea1SDimitry Andric 
5220fca6ea1SDimitry Andric   // Normally, the binary data comes from the input files, but since we're
5230fca6ea1SDimitry Andric   // generating binary data ourselves, we use the below array to store it in.
5240fca6ea1SDimitry Andric   // Need this to be 'static' so the data survives past the ObjcCategoryMerger
5250fca6ea1SDimitry Andric   // object, as the data will be read by the Writer when the final binary is
5260fca6ea1SDimitry Andric   // generated.
5270fca6ea1SDimitry Andric   static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
5280fca6ea1SDimitry Andric       generatedSectionData;
5290fca6ea1SDimitry Andric };
5300fca6ea1SDimitry Andric 
5310fca6ea1SDimitry Andric SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
5320fca6ea1SDimitry Andric     ObjcCategoryMerger::generatedSectionData;
5330fca6ea1SDimitry Andric 
ObjcCategoryMerger(std::vector<ConcatInputSection * > & _allInputSections)5340fca6ea1SDimitry Andric ObjcCategoryMerger::ObjcCategoryMerger(
5350fca6ea1SDimitry Andric     std::vector<ConcatInputSection *> &_allInputSections)
5360fca6ea1SDimitry Andric     : catLayout(target->wordSize), classLayout(target->wordSize),
5370fca6ea1SDimitry Andric       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
5380fca6ea1SDimitry Andric       methodLayout(target->wordSize),
5390fca6ea1SDimitry Andric       protocolListHeaderLayout(target->wordSize),
5400fca6ea1SDimitry Andric       allInputSections(_allInputSections) {}
5410fca6ea1SDimitry Andric 
collectSectionWriteInfoFromIsec(const InputSection * isec,InfoWriteSection & catWriteInfo)5420fca6ea1SDimitry Andric void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
5430fca6ea1SDimitry Andric     const InputSection *isec, InfoWriteSection &catWriteInfo) {
5440fca6ea1SDimitry Andric 
5450fca6ea1SDimitry Andric   catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
5460fca6ea1SDimitry Andric   catWriteInfo.align = isec->align;
5470fca6ea1SDimitry Andric   catWriteInfo.outputSection = isec->parent;
5480fca6ea1SDimitry Andric 
5490fca6ea1SDimitry Andric   assert(catWriteInfo.outputSection &&
5500fca6ea1SDimitry Andric          "outputSection may not be null in collectSectionWriteInfoFromIsec.");
5510fca6ea1SDimitry Andric 
5520fca6ea1SDimitry Andric   if (isec->relocs.size())
5530fca6ea1SDimitry Andric     catWriteInfo.relocTemplate = isec->relocs[0];
5540fca6ea1SDimitry Andric 
5550fca6ea1SDimitry Andric   catWriteInfo.valid = true;
5560fca6ea1SDimitry Andric }
5570fca6ea1SDimitry Andric 
5580fca6ea1SDimitry Andric Symbol *
tryGetSymbolAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)5590fca6ea1SDimitry Andric ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
5600fca6ea1SDimitry Andric                                              uint32_t offset) {
5610fca6ea1SDimitry Andric   if (!isec)
5620fca6ea1SDimitry Andric     return nullptr;
5630fca6ea1SDimitry Andric   const Reloc *reloc = isec->getRelocAt(offset);
5640fca6ea1SDimitry Andric 
5650fca6ea1SDimitry Andric   if (!reloc)
5660fca6ea1SDimitry Andric     return nullptr;
5670fca6ea1SDimitry Andric 
5680fca6ea1SDimitry Andric   Symbol *sym = reloc->referent.get<Symbol *>();
5690fca6ea1SDimitry Andric 
5700fca6ea1SDimitry Andric   if (reloc->addend) {
5710fca6ea1SDimitry Andric     assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
5720fca6ea1SDimitry Andric     Defined *definedSym = cast<Defined>(sym);
5730fca6ea1SDimitry Andric     sym = tryFindDefinedOnIsec(definedSym->isec(),
5740fca6ea1SDimitry Andric                                definedSym->value + reloc->addend);
5750fca6ea1SDimitry Andric   }
5760fca6ea1SDimitry Andric 
5770fca6ea1SDimitry Andric   return sym;
5780fca6ea1SDimitry Andric }
5790fca6ea1SDimitry Andric 
tryFindDefinedOnIsec(const InputSection * isec,uint32_t offset)5800fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
5810fca6ea1SDimitry Andric                                                   uint32_t offset) {
5820fca6ea1SDimitry Andric   for (Defined *sym : isec->symbols)
5830fca6ea1SDimitry Andric     if ((sym->value <= offset) && (sym->value + sym->size > offset))
5840fca6ea1SDimitry Andric       return sym;
5850fca6ea1SDimitry Andric 
5860fca6ea1SDimitry Andric   return nullptr;
5870fca6ea1SDimitry Andric }
5880fca6ea1SDimitry Andric 
5890fca6ea1SDimitry Andric Defined *
tryGetDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)5900fca6ea1SDimitry Andric ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
5910fca6ea1SDimitry Andric                                               uint32_t offset) {
5920fca6ea1SDimitry Andric   Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
5930fca6ea1SDimitry Andric   return dyn_cast_or_null<Defined>(sym);
5940fca6ea1SDimitry Andric }
5950fca6ea1SDimitry Andric 
5960fca6ea1SDimitry Andric // Get the class's ro_data symbol. If getMetaRo is true, then we will return
5970fca6ea1SDimitry Andric // the meta-class's ro_data symbol. Otherwise, we will return the class
5980fca6ea1SDimitry Andric // (instance) ro_data symbol.
getClassRo(const Defined * classSym,bool getMetaRo)5990fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
6000fca6ea1SDimitry Andric                                         bool getMetaRo) {
6010fca6ea1SDimitry Andric   ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
6020fca6ea1SDimitry Andric   if (!isec)
6030fca6ea1SDimitry Andric     return nullptr;
6040fca6ea1SDimitry Andric 
6050fca6ea1SDimitry Andric   if (!getMetaRo)
6060fca6ea1SDimitry Andric     return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
6070fca6ea1SDimitry Andric                                                classSym->value);
6080fca6ea1SDimitry Andric 
6090fca6ea1SDimitry Andric   Defined *metaClass = tryGetDefinedAtIsecOffset(
6100fca6ea1SDimitry Andric       isec, classLayout.metaClassOffset + classSym->value);
6110fca6ea1SDimitry Andric   if (!metaClass)
6120fca6ea1SDimitry Andric     return nullptr;
6130fca6ea1SDimitry Andric 
6140fca6ea1SDimitry Andric   return tryGetDefinedAtIsecOffset(
6150fca6ea1SDimitry Andric       dyn_cast<ConcatInputSection>(metaClass->isec()),
6160fca6ea1SDimitry Andric       classLayout.roDataOffset);
6170fca6ea1SDimitry Andric }
6180fca6ea1SDimitry Andric 
6190fca6ea1SDimitry Andric // Given an ConcatInputSection or CStringInputSection and an offset, if there is
6200fca6ea1SDimitry Andric // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
tryEraseDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)6210fca6ea1SDimitry Andric void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
6220fca6ea1SDimitry Andric     const ConcatInputSection *isec, uint32_t offset) {
6230fca6ea1SDimitry Andric   const Reloc *reloc = isec->getRelocAt(offset);
6240fca6ea1SDimitry Andric 
6250fca6ea1SDimitry Andric   if (!reloc)
6260fca6ea1SDimitry Andric     return;
6270fca6ea1SDimitry Andric 
6280fca6ea1SDimitry Andric   Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
6290fca6ea1SDimitry Andric   if (!sym)
6300fca6ea1SDimitry Andric     return;
6310fca6ea1SDimitry Andric 
6320fca6ea1SDimitry Andric   if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
6330fca6ea1SDimitry Andric     eraseISec(cisec);
6340fca6ea1SDimitry Andric   else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
6350fca6ea1SDimitry Andric     uint32_t totalOffset = sym->value + reloc->addend;
6360fca6ea1SDimitry Andric     StringPiece &piece = csisec->getStringPiece(totalOffset);
6370fca6ea1SDimitry Andric     piece.live = false;
6380fca6ea1SDimitry Andric   } else {
6390fca6ea1SDimitry Andric     llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
6400fca6ea1SDimitry Andric   }
6410fca6ea1SDimitry Andric }
6420fca6ea1SDimitry Andric 
collectCategoryWriterInfoFromCategory(const InfoInputCategory & catInfo)6430fca6ea1SDimitry Andric void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
6440fca6ea1SDimitry Andric     const InfoInputCategory &catInfo) {
6450fca6ea1SDimitry Andric 
6460fca6ea1SDimitry Andric   if (!infoCategoryWriter.catListInfo.valid)
6470fca6ea1SDimitry Andric     collectSectionWriteInfoFromIsec(catInfo.catListIsec,
6480fca6ea1SDimitry Andric                                     infoCategoryWriter.catListInfo);
6490fca6ea1SDimitry Andric   if (!infoCategoryWriter.catBodyInfo.valid)
6500fca6ea1SDimitry Andric     collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
6510fca6ea1SDimitry Andric                                     infoCategoryWriter.catBodyInfo);
6520fca6ea1SDimitry Andric 
6530fca6ea1SDimitry Andric   if (!infoCategoryWriter.catNameInfo.valid) {
6540fca6ea1SDimitry Andric     lld::macho::Defined *catNameSym =
6550fca6ea1SDimitry Andric         tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
6560fca6ea1SDimitry Andric     assert(catNameSym && "Category does not have a valid name Symbol");
6570fca6ea1SDimitry Andric 
6580fca6ea1SDimitry Andric     collectSectionWriteInfoFromIsec(catNameSym->isec(),
6590fca6ea1SDimitry Andric                                     infoCategoryWriter.catNameInfo);
6600fca6ea1SDimitry Andric   }
6610fca6ea1SDimitry Andric 
6620fca6ea1SDimitry Andric   // Collect writer info from all the category lists (we're assuming they all
6630fca6ea1SDimitry Andric   // would provide the same info)
6640fca6ea1SDimitry Andric   if (!infoCategoryWriter.catPtrListInfo.valid) {
6650fca6ea1SDimitry Andric     for (uint32_t off = catLayout.instanceMethodsOffset;
6660fca6ea1SDimitry Andric          off <= catLayout.classPropsOffset; off += target->wordSize) {
6670fca6ea1SDimitry Andric       if (Defined *ptrList =
6680fca6ea1SDimitry Andric               tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
6690fca6ea1SDimitry Andric         collectSectionWriteInfoFromIsec(ptrList->isec(),
6700fca6ea1SDimitry Andric                                         infoCategoryWriter.catPtrListInfo);
6710fca6ea1SDimitry Andric         // we've successfully collected data, so we can break
6720fca6ea1SDimitry Andric         break;
6730fca6ea1SDimitry Andric       }
6740fca6ea1SDimitry Andric     }
6750fca6ea1SDimitry Andric   }
6760fca6ea1SDimitry Andric }
6770fca6ea1SDimitry Andric 
6780fca6ea1SDimitry Andric // Parse a protocol list that might be linked to ConcatInputSection at a given
6790fca6ea1SDimitry Andric // offset. The format of the protocol list is different than other lists (prop
6800fca6ea1SDimitry Andric // lists, method lists) so we need to parse it differently
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList,SourceLanguage sourceLang)6810fca6ea1SDimitry Andric void ObjcCategoryMerger::parseProtocolListInfo(
6820fca6ea1SDimitry Andric     const ConcatInputSection *isec, uint32_t secOffset,
6830fca6ea1SDimitry Andric     PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
6840fca6ea1SDimitry Andric   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
6850fca6ea1SDimitry Andric          "Tried to read pointer list beyond protocol section end");
6860fca6ea1SDimitry Andric 
6870fca6ea1SDimitry Andric   const Reloc *reloc = isec->getRelocAt(secOffset);
6880fca6ea1SDimitry Andric   if (!reloc)
6890fca6ea1SDimitry Andric     return;
6900fca6ea1SDimitry Andric 
6910fca6ea1SDimitry Andric   auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
6920fca6ea1SDimitry Andric   assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
6930fca6ea1SDimitry Andric 
6940fca6ea1SDimitry Andric   // Theoretically protocol count can be either 32b or 64b, depending on
6950fca6ea1SDimitry Andric   // platform pointer size, but to simplify implementation we always just read
6960fca6ea1SDimitry Andric   // the lower 32b which should be good enough.
6970fca6ea1SDimitry Andric   uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
6980fca6ea1SDimitry Andric       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
6990fca6ea1SDimitry Andric 
7000fca6ea1SDimitry Andric   ptrList.structCount += protocolCount;
7010fca6ea1SDimitry Andric   ptrList.structSize = target->wordSize;
7020fca6ea1SDimitry Andric 
7030fca6ea1SDimitry Andric   [[maybe_unused]] uint32_t expectedListSize =
7040fca6ea1SDimitry Andric       (protocolCount * target->wordSize) +
7050fca6ea1SDimitry Andric       /*header(count)*/ protocolListHeaderLayout.totalSize +
7060fca6ea1SDimitry Andric       /*extra null value*/ target->wordSize;
7070fca6ea1SDimitry Andric 
7080fca6ea1SDimitry Andric   // On Swift, the protocol list does not have the extra (unnecessary) null
7090fca6ea1SDimitry Andric   [[maybe_unused]] uint32_t expectedListSizeSwift =
7100fca6ea1SDimitry Andric       expectedListSize - target->wordSize;
7110fca6ea1SDimitry Andric 
7120fca6ea1SDimitry Andric   assert(((expectedListSize == ptrListSym->isec()->data.size() &&
7130fca6ea1SDimitry Andric            sourceLang == SourceLanguage::ObjC) ||
7140fca6ea1SDimitry Andric           (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
7150fca6ea1SDimitry Andric            sourceLang == SourceLanguage::Swift)) &&
7160fca6ea1SDimitry Andric          "Protocol list does not match expected size");
7170fca6ea1SDimitry Andric 
7180fca6ea1SDimitry Andric   uint32_t off = protocolListHeaderLayout.totalSize;
7190fca6ea1SDimitry Andric   for (uint32_t inx = 0; inx < protocolCount; ++inx) {
7200fca6ea1SDimitry Andric     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
7210fca6ea1SDimitry Andric     assert(reloc && "No reloc found at protocol list offset");
7220fca6ea1SDimitry Andric 
7230fca6ea1SDimitry Andric     auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
7240fca6ea1SDimitry Andric     assert(listSym && "Protocol list reloc does not have a valid Defined");
7250fca6ea1SDimitry Andric 
7260fca6ea1SDimitry Andric     ptrList.allPtrs.push_back(listSym);
7270fca6ea1SDimitry Andric     off += target->wordSize;
7280fca6ea1SDimitry Andric   }
7290fca6ea1SDimitry Andric   assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
7300fca6ea1SDimitry Andric          "expected null terminating protocol");
7310fca6ea1SDimitry Andric   assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
7320fca6ea1SDimitry Andric          "Protocol list end offset does not match expected size");
7330fca6ea1SDimitry Andric }
7340fca6ea1SDimitry Andric 
7350fca6ea1SDimitry Andric // Parse a protocol list and return the PointerListInfo for it
7360fca6ea1SDimitry Andric ObjcCategoryMerger::PointerListInfo
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,SourceLanguage sourceLang)7370fca6ea1SDimitry Andric ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
7380fca6ea1SDimitry Andric                                           uint32_t secOffset,
7390fca6ea1SDimitry Andric                                           SourceLanguage sourceLang) {
7400fca6ea1SDimitry Andric   PointerListInfo ptrList;
7410fca6ea1SDimitry Andric   parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
7420fca6ea1SDimitry Andric   return ptrList;
7430fca6ea1SDimitry Andric }
7440fca6ea1SDimitry Andric 
7450fca6ea1SDimitry Andric // Parse a pointer list that might be linked to ConcatInputSection at a given
7460fca6ea1SDimitry Andric // offset. This can be used for instance methods, class methods, instance props
7470fca6ea1SDimitry Andric // and class props since they have the same format.
parsePointerListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList)7480fca6ea1SDimitry Andric void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
7490fca6ea1SDimitry Andric                                               uint32_t secOffset,
7500fca6ea1SDimitry Andric                                               PointerListInfo &ptrList) {
7510fca6ea1SDimitry Andric   assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
7520fca6ea1SDimitry Andric   assert(isec && "Trying to parse pointer list from null isec");
7530fca6ea1SDimitry Andric   assert(secOffset + target->wordSize <= isec->data.size() &&
7540fca6ea1SDimitry Andric          "Trying to read pointer list beyond section end");
7550fca6ea1SDimitry Andric 
7560fca6ea1SDimitry Andric   const Reloc *reloc = isec->getRelocAt(secOffset);
7570fca6ea1SDimitry Andric   if (!reloc)
7580fca6ea1SDimitry Andric     return;
7590fca6ea1SDimitry Andric 
7600fca6ea1SDimitry Andric   auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
7610fca6ea1SDimitry Andric   assert(ptrListSym && "Reloc does not have a valid Defined");
7620fca6ea1SDimitry Andric 
7630fca6ea1SDimitry Andric   uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
7640fca6ea1SDimitry Andric       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
7650fca6ea1SDimitry Andric   uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
7660fca6ea1SDimitry Andric       ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
7670fca6ea1SDimitry Andric   assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
7680fca6ea1SDimitry Andric 
7690fca6ea1SDimitry Andric   assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
7700fca6ea1SDimitry Andric 
7710fca6ea1SDimitry Andric   ptrList.structCount += thisStructCount;
7720fca6ea1SDimitry Andric   ptrList.structSize = thisStructSize;
7730fca6ea1SDimitry Andric 
7740fca6ea1SDimitry Andric   uint32_t expectedListSize =
7750fca6ea1SDimitry Andric       listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
7760fca6ea1SDimitry Andric   assert(expectedListSize == ptrListSym->isec()->data.size() &&
7770fca6ea1SDimitry Andric          "Pointer list does not match expected size");
7780fca6ea1SDimitry Andric 
7790fca6ea1SDimitry Andric   for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
7800fca6ea1SDimitry Andric        off += target->wordSize) {
7810fca6ea1SDimitry Andric     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
7820fca6ea1SDimitry Andric     assert(reloc && "No reloc found at pointer list offset");
7830fca6ea1SDimitry Andric 
7840fca6ea1SDimitry Andric     auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
7850fca6ea1SDimitry Andric     assert(listSym && "Reloc does not have a valid Defined");
7860fca6ea1SDimitry Andric 
7870fca6ea1SDimitry Andric     ptrList.allPtrs.push_back(listSym);
7880fca6ea1SDimitry Andric   }
7890fca6ea1SDimitry Andric }
7900fca6ea1SDimitry Andric 
7910fca6ea1SDimitry Andric // Here we parse all the information of an input category (catInfo) and
7920fca6ea1SDimitry Andric // append the parsed info into the structure which will contain all the
7930fca6ea1SDimitry Andric // information about how a class is extended (extInfo)
parseCatInfoToExtInfo(const InfoInputCategory & catInfo,ClassExtensionInfo & extInfo)7940fca6ea1SDimitry Andric void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
7950fca6ea1SDimitry Andric                                                ClassExtensionInfo &extInfo) {
7960fca6ea1SDimitry Andric   const Reloc *catNameReloc =
7970fca6ea1SDimitry Andric       catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
7980fca6ea1SDimitry Andric 
7990fca6ea1SDimitry Andric   // Parse name
8000fca6ea1SDimitry Andric   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
8010fca6ea1SDimitry Andric 
8020fca6ea1SDimitry Andric   // is this the first category we are parsing?
8030fca6ea1SDimitry Andric   if (extInfo.mergedContainerName.empty())
8040fca6ea1SDimitry Andric     extInfo.objFileForMergeData =
8050fca6ea1SDimitry Andric         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
8060fca6ea1SDimitry Andric   else
8070fca6ea1SDimitry Andric     extInfo.mergedContainerName += "|";
8080fca6ea1SDimitry Andric 
8090fca6ea1SDimitry Andric   assert(extInfo.objFileForMergeData &&
8100fca6ea1SDimitry Andric          "Expected to already have valid objextInfo.objFileForMergeData");
8110fca6ea1SDimitry Andric 
8120fca6ea1SDimitry Andric   StringRef catName = getReferentString(*catNameReloc);
8130fca6ea1SDimitry Andric   extInfo.mergedContainerName += catName.str();
8140fca6ea1SDimitry Andric 
8150fca6ea1SDimitry Andric   // Parse base class
8160fca6ea1SDimitry Andric   if (!extInfo.baseClass) {
8170fca6ea1SDimitry Andric     Symbol *classSym =
8180fca6ea1SDimitry Andric         tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
8190fca6ea1SDimitry Andric     assert(extInfo.baseClassName.empty());
8200fca6ea1SDimitry Andric     extInfo.baseClass = classSym;
8210fca6ea1SDimitry Andric     llvm::StringRef classPrefix(objc::symbol_names::klass);
8220fca6ea1SDimitry Andric     assert(classSym->getName().starts_with(classPrefix) &&
8230fca6ea1SDimitry Andric            "Base class symbol does not start with expected prefix");
8240fca6ea1SDimitry Andric     extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
8250fca6ea1SDimitry Andric   } else {
8260fca6ea1SDimitry Andric     assert((extInfo.baseClass ==
8270fca6ea1SDimitry Andric             tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
8280fca6ea1SDimitry Andric                                      catLayout.klassOffset)) &&
8290fca6ea1SDimitry Andric            "Trying to parse category info into container with different base "
8300fca6ea1SDimitry Andric            "class");
8310fca6ea1SDimitry Andric   }
8320fca6ea1SDimitry Andric 
8330fca6ea1SDimitry Andric   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,
8340fca6ea1SDimitry Andric                        extInfo.instanceMethods);
8350fca6ea1SDimitry Andric 
8360fca6ea1SDimitry Andric   parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
8370fca6ea1SDimitry Andric                        extInfo.classMethods);
8380fca6ea1SDimitry Andric 
8390fca6ea1SDimitry Andric   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
8400fca6ea1SDimitry Andric                         extInfo.protocols, catInfo.sourceLanguage);
8410fca6ea1SDimitry Andric 
8420fca6ea1SDimitry Andric   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
8430fca6ea1SDimitry Andric                        extInfo.instanceProps);
8440fca6ea1SDimitry Andric 
8450fca6ea1SDimitry Andric   parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
8460fca6ea1SDimitry Andric                        extInfo.classProps);
8470fca6ea1SDimitry Andric }
8480fca6ea1SDimitry Andric 
8490fca6ea1SDimitry Andric // Generate a protocol list (including header) and link it into the parent at
8500fca6ea1SDimitry Andric // the specified offset.
emitAndLinkProtocolList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)8510fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
8520fca6ea1SDimitry Andric     Defined *parentSym, uint32_t linkAtOffset,
8530fca6ea1SDimitry Andric     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
8540fca6ea1SDimitry Andric   if (ptrList.allPtrs.empty())
8550fca6ea1SDimitry Andric     return nullptr;
8560fca6ea1SDimitry Andric 
8570fca6ea1SDimitry Andric   assert(ptrList.allPtrs.size() == ptrList.structCount);
8580fca6ea1SDimitry Andric 
8590fca6ea1SDimitry Andric   uint32_t bodySize = (ptrList.structCount * target->wordSize) +
8600fca6ea1SDimitry Andric                       /*header(count)*/ protocolListHeaderLayout.totalSize +
8610fca6ea1SDimitry Andric                       /*extra null value*/ target->wordSize;
8620fca6ea1SDimitry Andric   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
8630fca6ea1SDimitry Andric 
8640fca6ea1SDimitry Andric   // This theoretically can be either 32b or 64b, but writing just the first 32b
8650fca6ea1SDimitry Andric   // is good enough
8660fca6ea1SDimitry Andric   const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
8670fca6ea1SDimitry Andric       bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
8680fca6ea1SDimitry Andric 
8690fca6ea1SDimitry Andric   *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
8700fca6ea1SDimitry Andric 
8710fca6ea1SDimitry Andric   ConcatInputSection *listSec = make<ConcatInputSection>(
8720fca6ea1SDimitry Andric       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
8730fca6ea1SDimitry Andric       infoCategoryWriter.catPtrListInfo.align);
8740fca6ea1SDimitry Andric   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
8750fca6ea1SDimitry Andric   listSec->live = true;
8760fca6ea1SDimitry Andric 
8770fca6ea1SDimitry Andric   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
8780fca6ea1SDimitry Andric 
8790fca6ea1SDimitry Andric   std::string symName = ptrList.categoryPrefix;
8800fca6ea1SDimitry Andric   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
8810fca6ea1SDimitry Andric 
8820fca6ea1SDimitry Andric   Defined *ptrListSym = make<Defined>(
8830fca6ea1SDimitry Andric       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
8840fca6ea1SDimitry Andric       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
8850fca6ea1SDimitry Andric       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
8860fca6ea1SDimitry Andric       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
8870fca6ea1SDimitry Andric       /*isWeakDefCanBeHidden=*/false);
8880fca6ea1SDimitry Andric 
8890fca6ea1SDimitry Andric   ptrListSym->used = true;
8900fca6ea1SDimitry Andric   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
891*6c4b055cSDimitry Andric   addInputSection(listSec);
8920fca6ea1SDimitry Andric 
8930fca6ea1SDimitry Andric   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
8940fca6ea1SDimitry Andric                         infoCategoryWriter.catBodyInfo.relocTemplate);
8950fca6ea1SDimitry Andric 
8960fca6ea1SDimitry Andric   uint32_t offset = protocolListHeaderLayout.totalSize;
8970fca6ea1SDimitry Andric   for (Symbol *symbol : ptrList.allPtrs) {
8980fca6ea1SDimitry Andric     createSymbolReference(ptrListSym, symbol, offset,
8990fca6ea1SDimitry Andric                           infoCategoryWriter.catPtrListInfo.relocTemplate);
9000fca6ea1SDimitry Andric     offset += target->wordSize;
9010fca6ea1SDimitry Andric   }
9020fca6ea1SDimitry Andric 
9030fca6ea1SDimitry Andric   return ptrListSym;
9040fca6ea1SDimitry Andric }
9050fca6ea1SDimitry Andric 
9060fca6ea1SDimitry Andric // Generate a pointer list (including header) and link it into the parent at the
9070fca6ea1SDimitry Andric // specified offset. This is used for instance and class methods and
9080fca6ea1SDimitry Andric // proprieties.
emitAndLinkPointerList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)9090fca6ea1SDimitry Andric void ObjcCategoryMerger::emitAndLinkPointerList(
9100fca6ea1SDimitry Andric     Defined *parentSym, uint32_t linkAtOffset,
9110fca6ea1SDimitry Andric     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
9120fca6ea1SDimitry Andric   if (ptrList.allPtrs.empty())
9130fca6ea1SDimitry Andric     return;
9140fca6ea1SDimitry Andric 
9150fca6ea1SDimitry Andric   assert(ptrList.allPtrs.size() * target->wordSize ==
9160fca6ea1SDimitry Andric          ptrList.structCount * ptrList.structSize);
9170fca6ea1SDimitry Andric 
9180fca6ea1SDimitry Andric   // Generate body
9190fca6ea1SDimitry Andric   uint32_t bodySize =
9200fca6ea1SDimitry Andric       listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
9210fca6ea1SDimitry Andric   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
9220fca6ea1SDimitry Andric 
9230fca6ea1SDimitry Andric   const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
9240fca6ea1SDimitry Andric       bodyData.data() + listHeaderLayout.structSizeOffset);
9250fca6ea1SDimitry Andric   const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
9260fca6ea1SDimitry Andric       bodyData.data() + listHeaderLayout.structCountOffset);
9270fca6ea1SDimitry Andric 
9280fca6ea1SDimitry Andric   *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
9290fca6ea1SDimitry Andric   *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
9300fca6ea1SDimitry Andric 
9310fca6ea1SDimitry Andric   ConcatInputSection *listSec = make<ConcatInputSection>(
9320fca6ea1SDimitry Andric       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
9330fca6ea1SDimitry Andric       infoCategoryWriter.catPtrListInfo.align);
9340fca6ea1SDimitry Andric   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
9350fca6ea1SDimitry Andric   listSec->live = true;
9360fca6ea1SDimitry Andric 
9370fca6ea1SDimitry Andric   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
9380fca6ea1SDimitry Andric 
9390fca6ea1SDimitry Andric   std::string symName = ptrList.categoryPrefix;
9400fca6ea1SDimitry Andric   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
9410fca6ea1SDimitry Andric 
9420fca6ea1SDimitry Andric   Defined *ptrListSym = make<Defined>(
9430fca6ea1SDimitry Andric       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
9440fca6ea1SDimitry Andric       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
9450fca6ea1SDimitry Andric       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
9460fca6ea1SDimitry Andric       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
9470fca6ea1SDimitry Andric       /*isWeakDefCanBeHidden=*/false);
9480fca6ea1SDimitry Andric 
9490fca6ea1SDimitry Andric   ptrListSym->used = true;
9500fca6ea1SDimitry Andric   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
951*6c4b055cSDimitry Andric   addInputSection(listSec);
9520fca6ea1SDimitry Andric 
9530fca6ea1SDimitry Andric   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
9540fca6ea1SDimitry Andric                         infoCategoryWriter.catBodyInfo.relocTemplate);
9550fca6ea1SDimitry Andric 
9560fca6ea1SDimitry Andric   uint32_t offset = listHeaderLayout.totalSize;
9570fca6ea1SDimitry Andric   for (Symbol *symbol : ptrList.allPtrs) {
9580fca6ea1SDimitry Andric     createSymbolReference(ptrListSym, symbol, offset,
9590fca6ea1SDimitry Andric                           infoCategoryWriter.catPtrListInfo.relocTemplate);
9600fca6ea1SDimitry Andric     offset += target->wordSize;
9610fca6ea1SDimitry Andric   }
9620fca6ea1SDimitry Andric }
9630fca6ea1SDimitry Andric 
9640fca6ea1SDimitry Andric // This method creates an __objc_catlist ConcatInputSection with a single slot
9650fca6ea1SDimitry Andric Defined *
emitCatListEntrySec(const std::string & forCategoryName,const std::string & forBaseClassName,ObjFile * objFile)9660fca6ea1SDimitry Andric ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
9670fca6ea1SDimitry Andric                                         const std::string &forBaseClassName,
9680fca6ea1SDimitry Andric                                         ObjFile *objFile) {
9690fca6ea1SDimitry Andric   uint32_t sectionSize = target->wordSize;
9700fca6ea1SDimitry Andric   llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
9710fca6ea1SDimitry Andric 
9720fca6ea1SDimitry Andric   ConcatInputSection *newCatList =
9730fca6ea1SDimitry Andric       make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
9740fca6ea1SDimitry Andric                                bodyData, infoCategoryWriter.catListInfo.align);
9750fca6ea1SDimitry Andric   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
9760fca6ea1SDimitry Andric   newCatList->live = true;
9770fca6ea1SDimitry Andric 
9780fca6ea1SDimitry Andric   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
9790fca6ea1SDimitry Andric 
9800fca6ea1SDimitry Andric   std::string catSymName = "<__objc_catlist slot for merged category ";
9810fca6ea1SDimitry Andric   catSymName += forBaseClassName + "(" + forCategoryName + ")>";
9820fca6ea1SDimitry Andric 
9830fca6ea1SDimitry Andric   Defined *catListSym = make<Defined>(
9840fca6ea1SDimitry Andric       newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
9850fca6ea1SDimitry Andric       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
9860fca6ea1SDimitry Andric       /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
9870fca6ea1SDimitry Andric       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
9880fca6ea1SDimitry Andric       /*isWeakDefCanBeHidden=*/false);
9890fca6ea1SDimitry Andric 
9900fca6ea1SDimitry Andric   catListSym->used = true;
9910fca6ea1SDimitry Andric   objFile->symbols.push_back(catListSym);
992*6c4b055cSDimitry Andric   addInputSection(newCatList);
9930fca6ea1SDimitry Andric   return catListSym;
9940fca6ea1SDimitry Andric }
9950fca6ea1SDimitry Andric 
9960fca6ea1SDimitry Andric // Here we generate the main category body and link the name and base class into
9970fca6ea1SDimitry Andric // it. We don't link any other info yet like the protocol and class/instance
9980fca6ea1SDimitry Andric // methods/props.
emitCategoryBody(const std::string & name,const Defined * nameSym,const Symbol * baseClassSym,const std::string & baseClassName,ObjFile * objFile)9990fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
10000fca6ea1SDimitry Andric                                               const Defined *nameSym,
10010fca6ea1SDimitry Andric                                               const Symbol *baseClassSym,
10020fca6ea1SDimitry Andric                                               const std::string &baseClassName,
10030fca6ea1SDimitry Andric                                               ObjFile *objFile) {
10040fca6ea1SDimitry Andric   llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
10050fca6ea1SDimitry Andric 
10060fca6ea1SDimitry Andric   uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
10070fca6ea1SDimitry Andric                                    catLayout.sizeOffset);
10080fca6ea1SDimitry Andric   *ptrSize = catLayout.totalSize;
10090fca6ea1SDimitry Andric 
10100fca6ea1SDimitry Andric   ConcatInputSection *newBodySec =
10110fca6ea1SDimitry Andric       make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
10120fca6ea1SDimitry Andric                                bodyData, infoCategoryWriter.catBodyInfo.align);
10130fca6ea1SDimitry Andric   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
10140fca6ea1SDimitry Andric   newBodySec->live = true;
10150fca6ea1SDimitry Andric 
10160fca6ea1SDimitry Andric   std::string symName =
10170fca6ea1SDimitry Andric       objc::symbol_names::category + baseClassName + "(" + name + ")";
10180fca6ea1SDimitry Andric   Defined *catBodySym = make<Defined>(
10190fca6ea1SDimitry Andric       newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
10200fca6ea1SDimitry Andric       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
10210fca6ea1SDimitry Andric       /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
10220fca6ea1SDimitry Andric       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
10230fca6ea1SDimitry Andric       /*isWeakDefCanBeHidden=*/false);
10240fca6ea1SDimitry Andric 
10250fca6ea1SDimitry Andric   catBodySym->used = true;
10260fca6ea1SDimitry Andric   objFile->symbols.push_back(catBodySym);
1027*6c4b055cSDimitry Andric   addInputSection(newBodySec);
10280fca6ea1SDimitry Andric 
10290fca6ea1SDimitry Andric   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
10300fca6ea1SDimitry Andric                         infoCategoryWriter.catBodyInfo.relocTemplate);
10310fca6ea1SDimitry Andric 
10320fca6ea1SDimitry Andric   // Create a reloc to the base class (either external or internal)
10330fca6ea1SDimitry Andric   createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
10340fca6ea1SDimitry Andric                         infoCategoryWriter.catBodyInfo.relocTemplate);
10350fca6ea1SDimitry Andric 
10360fca6ea1SDimitry Andric   return catBodySym;
10370fca6ea1SDimitry Andric }
10380fca6ea1SDimitry Andric 
10390fca6ea1SDimitry Andric // This writes the new category name (for the merged category) into the binary
10400fca6ea1SDimitry Andric // and returns the sybmol for it.
emitCategoryName(const std::string & name,ObjFile * objFile)10410fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
10420fca6ea1SDimitry Andric                                               ObjFile *objFile) {
10430fca6ea1SDimitry Andric   StringRef nameStrData = newStringData(name.c_str());
10440fca6ea1SDimitry Andric   // We use +1 below to include the null terminator
10450fca6ea1SDimitry Andric   llvm::ArrayRef<uint8_t> nameData(
10460fca6ea1SDimitry Andric       reinterpret_cast<const uint8_t *>(nameStrData.data()),
10470fca6ea1SDimitry Andric       nameStrData.size() + 1);
10480fca6ea1SDimitry Andric 
10490fca6ea1SDimitry Andric   auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
10500fca6ea1SDimitry Andric   CStringInputSection *newStringSec = make<CStringInputSection>(
10510fca6ea1SDimitry Andric       *infoCategoryWriter.catNameInfo.inputSection, nameData,
10520fca6ea1SDimitry Andric       infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
10530fca6ea1SDimitry Andric 
10540fca6ea1SDimitry Andric   parentSection->subsections.push_back({0, newStringSec});
10550fca6ea1SDimitry Andric 
10560fca6ea1SDimitry Andric   newStringSec->splitIntoPieces();
10570fca6ea1SDimitry Andric   newStringSec->pieces[0].live = true;
10580fca6ea1SDimitry Andric   newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
10590fca6ea1SDimitry Andric   in.cStringSection->addInput(newStringSec);
10600fca6ea1SDimitry Andric   assert(newStringSec->pieces.size() == 1);
10610fca6ea1SDimitry Andric 
10620fca6ea1SDimitry Andric   Defined *catNameSym = make<Defined>(
10630fca6ea1SDimitry Andric       "<merged category name>", /*file=*/objFile, newStringSec,
10640fca6ea1SDimitry Andric       /*value=*/0, nameData.size(),
10650fca6ea1SDimitry Andric       /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
10660fca6ea1SDimitry Andric       /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
10670fca6ea1SDimitry Andric       /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
10680fca6ea1SDimitry Andric 
10690fca6ea1SDimitry Andric   catNameSym->used = true;
10700fca6ea1SDimitry Andric   objFile->symbols.push_back(catNameSym);
10710fca6ea1SDimitry Andric   return catNameSym;
10720fca6ea1SDimitry Andric }
10730fca6ea1SDimitry Andric 
10740fca6ea1SDimitry Andric // This method fully creates a new category from the given ClassExtensionInfo.
10750fca6ea1SDimitry Andric // It creates the category name, body and method/protocol/prop lists and links
10760fca6ea1SDimitry Andric // them all together. Then it creates a new __objc_catlist entry and adds the
10770fca6ea1SDimitry Andric // category to it. Calling this method will fully generate a category which will
10780fca6ea1SDimitry Andric // be available in the final binary.
emitCategory(const ClassExtensionInfo & extInfo)10790fca6ea1SDimitry Andric Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
10800fca6ea1SDimitry Andric   Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
10810fca6ea1SDimitry Andric                                          extInfo.objFileForMergeData);
10820fca6ea1SDimitry Andric 
10830fca6ea1SDimitry Andric   Defined *catBodySym = emitCategoryBody(
10840fca6ea1SDimitry Andric       extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
10850fca6ea1SDimitry Andric       extInfo.baseClassName, extInfo.objFileForMergeData);
10860fca6ea1SDimitry Andric 
10870fca6ea1SDimitry Andric   Defined *catListSym =
10880fca6ea1SDimitry Andric       emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
10890fca6ea1SDimitry Andric                           extInfo.objFileForMergeData);
10900fca6ea1SDimitry Andric 
10910fca6ea1SDimitry Andric   // Add the single category body to the category list at the offset 0.
10920fca6ea1SDimitry Andric   createSymbolReference(catListSym, catBodySym, /*offset=*/0,
10930fca6ea1SDimitry Andric                         infoCategoryWriter.catListInfo.relocTemplate);
10940fca6ea1SDimitry Andric 
10950fca6ea1SDimitry Andric   emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
10960fca6ea1SDimitry Andric                          extInfo.instanceMethods);
10970fca6ea1SDimitry Andric 
10980fca6ea1SDimitry Andric   emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
10990fca6ea1SDimitry Andric                          extInfo.classMethods);
11000fca6ea1SDimitry Andric 
11010fca6ea1SDimitry Andric   emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
11020fca6ea1SDimitry Andric                           extInfo.protocols);
11030fca6ea1SDimitry Andric 
11040fca6ea1SDimitry Andric   emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
11050fca6ea1SDimitry Andric                          extInfo.instanceProps);
11060fca6ea1SDimitry Andric 
11070fca6ea1SDimitry Andric   emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
11080fca6ea1SDimitry Andric                          extInfo.classProps);
11090fca6ea1SDimitry Andric 
11100fca6ea1SDimitry Andric   return catBodySym;
11110fca6ea1SDimitry Andric }
11120fca6ea1SDimitry Andric 
11130fca6ea1SDimitry Andric // This method merges all the categories (sharing a base class) into a single
11140fca6ea1SDimitry Andric // category.
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> & categories)11150fca6ea1SDimitry Andric void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
11160fca6ea1SDimitry Andric     std::vector<InfoInputCategory> &categories) {
11170fca6ea1SDimitry Andric   assert(categories.size() > 1 && "Expected at least 2 categories");
11180fca6ea1SDimitry Andric 
11190fca6ea1SDimitry Andric   ClassExtensionInfo extInfo(catLayout);
11200fca6ea1SDimitry Andric 
11210fca6ea1SDimitry Andric   for (auto &catInfo : categories)
11220fca6ea1SDimitry Andric     parseCatInfoToExtInfo(catInfo, extInfo);
11230fca6ea1SDimitry Andric 
11240fca6ea1SDimitry Andric   Defined *newCatDef = emitCategory(extInfo);
11250fca6ea1SDimitry Andric   assert(newCatDef && "Failed to create a new category");
11260fca6ea1SDimitry Andric 
11270fca6ea1SDimitry Andric   // Suppress unsuded var warning
11280fca6ea1SDimitry Andric   (void)newCatDef;
11290fca6ea1SDimitry Andric 
11300fca6ea1SDimitry Andric   for (auto &catInfo : categories)
11310fca6ea1SDimitry Andric     catInfo.wasMerged = true;
11320fca6ea1SDimitry Andric }
11330fca6ea1SDimitry Andric 
createSymbolReference(Defined * refFrom,const Symbol * refTo,uint32_t offset,const Reloc & relocTemplate)11340fca6ea1SDimitry Andric void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
11350fca6ea1SDimitry Andric                                                const Symbol *refTo,
11360fca6ea1SDimitry Andric                                                uint32_t offset,
11370fca6ea1SDimitry Andric                                                const Reloc &relocTemplate) {
11380fca6ea1SDimitry Andric   Reloc r = relocTemplate;
11390fca6ea1SDimitry Andric   r.offset = offset;
11400fca6ea1SDimitry Andric   r.addend = 0;
11410fca6ea1SDimitry Andric   r.referent = const_cast<Symbol *>(refTo);
11420fca6ea1SDimitry Andric   refFrom->isec()->relocs.push_back(r);
11430fca6ea1SDimitry Andric }
11440fca6ea1SDimitry Andric 
11450fca6ea1SDimitry Andric // Get the list of categories in the '__objc_nlcatlist' section. We can't
11460fca6ea1SDimitry Andric // optimize these as they have a '+load' method that has to be called at
11470fca6ea1SDimitry Andric // runtime.
collectNlCategories()11480fca6ea1SDimitry Andric DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
11490fca6ea1SDimitry Andric   DenseSet<const Symbol *> nlCategories;
11500fca6ea1SDimitry Andric 
11510fca6ea1SDimitry Andric   for (InputSection *sec : allInputSections) {
11520fca6ea1SDimitry Andric     if (sec->getName() != section_names::objcNonLazyCatList)
11530fca6ea1SDimitry Andric       continue;
11540fca6ea1SDimitry Andric 
11550fca6ea1SDimitry Andric     for (auto &r : sec->relocs) {
11560fca6ea1SDimitry Andric       const Symbol *sym = r.referent.dyn_cast<Symbol *>();
11570fca6ea1SDimitry Andric       nlCategories.insert(sym);
11580fca6ea1SDimitry Andric     }
11590fca6ea1SDimitry Andric   }
11600fca6ea1SDimitry Andric   return nlCategories;
11610fca6ea1SDimitry Andric }
11620fca6ea1SDimitry Andric 
collectAndValidateCategoriesData()11630fca6ea1SDimitry Andric void ObjcCategoryMerger::collectAndValidateCategoriesData() {
11640fca6ea1SDimitry Andric   auto nlCategories = collectNlCategories();
11650fca6ea1SDimitry Andric 
11660fca6ea1SDimitry Andric   for (InputSection *sec : allInputSections) {
11670fca6ea1SDimitry Andric     if (sec->getName() != section_names::objcCatList)
11680fca6ea1SDimitry Andric       continue;
11690fca6ea1SDimitry Andric     ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
11700fca6ea1SDimitry Andric     assert(catListCisec &&
11710fca6ea1SDimitry Andric            "__objc_catList InputSection is not a ConcatInputSection");
11720fca6ea1SDimitry Andric 
11730fca6ea1SDimitry Andric     for (uint32_t off = 0; off < catListCisec->getSize();
11740fca6ea1SDimitry Andric          off += target->wordSize) {
11750fca6ea1SDimitry Andric       Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
11760fca6ea1SDimitry Andric       assert(categorySym &&
11770fca6ea1SDimitry Andric              "Failed to get a valid category at __objc_catlit offset");
11780fca6ea1SDimitry Andric 
11790fca6ea1SDimitry Andric       if (nlCategories.count(categorySym))
11800fca6ea1SDimitry Andric         continue;
11810fca6ea1SDimitry Andric 
11820fca6ea1SDimitry Andric       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
11830fca6ea1SDimitry Andric       assert(catBodyIsec &&
11840fca6ea1SDimitry Andric              "Category data section is not an ConcatInputSection");
11850fca6ea1SDimitry Andric 
11860fca6ea1SDimitry Andric       SourceLanguage eLang = SourceLanguage::Unknown;
11870fca6ea1SDimitry Andric       if (categorySym->getName().starts_with(objc::symbol_names::category))
11880fca6ea1SDimitry Andric         eLang = SourceLanguage::ObjC;
11890fca6ea1SDimitry Andric       else if (categorySym->getName().starts_with(
11900fca6ea1SDimitry Andric                    objc::symbol_names::swift_objc_category))
11910fca6ea1SDimitry Andric         eLang = SourceLanguage::Swift;
11920fca6ea1SDimitry Andric       else
11930fca6ea1SDimitry Andric         llvm_unreachable("Unexpected category symbol name");
11940fca6ea1SDimitry Andric 
11950fca6ea1SDimitry Andric       InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
11960fca6ea1SDimitry Andric 
11970fca6ea1SDimitry Andric       // Check that the category has a reloc at 'klassOffset' (which is
11980fca6ea1SDimitry Andric       // a pointer to the class symbol)
11990fca6ea1SDimitry Andric 
12000fca6ea1SDimitry Andric       Symbol *classSym =
12010fca6ea1SDimitry Andric           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
12020fca6ea1SDimitry Andric       assert(classSym && "Category does not have a valid base class");
12030fca6ea1SDimitry Andric 
12040fca6ea1SDimitry Andric       categoryMap[classSym].push_back(catInputInfo);
12050fca6ea1SDimitry Andric 
12060fca6ea1SDimitry Andric       collectCategoryWriterInfoFromCategory(catInputInfo);
12070fca6ea1SDimitry Andric     }
12080fca6ea1SDimitry Andric   }
12090fca6ea1SDimitry Andric }
12100fca6ea1SDimitry Andric 
12110fca6ea1SDimitry Andric // In the input we have multiple __objc_catlist InputSection, each of which may
12120fca6ea1SDimitry Andric // contain links to multiple categories. Of these categories, we will merge (and
12130fca6ea1SDimitry Andric // erase) only some. There will be some categories that will remain untouched
12140fca6ea1SDimitry Andric // (not erased). For these not erased categories, we generate new __objc_catlist
12150fca6ea1SDimitry Andric // entries since the parent __objc_catlist entry will be erased
generateCatListForNonErasedCategories(const MapVector<ConcatInputSection *,std::set<uint64_t>> catListToErasedOffsets)12160fca6ea1SDimitry Andric void ObjcCategoryMerger::generateCatListForNonErasedCategories(
12170fca6ea1SDimitry Andric     const MapVector<ConcatInputSection *, std::set<uint64_t>>
12180fca6ea1SDimitry Andric         catListToErasedOffsets) {
12190fca6ea1SDimitry Andric 
12200fca6ea1SDimitry Andric   // Go through all offsets of all __objc_catlist's that we process and if there
12210fca6ea1SDimitry Andric   // are categories that we didn't process - generate a new __objc_catlist for
12220fca6ea1SDimitry Andric   // each.
12230fca6ea1SDimitry Andric   for (auto &mapEntry : catListToErasedOffsets) {
12240fca6ea1SDimitry Andric     ConcatInputSection *catListIsec = mapEntry.first;
12250fca6ea1SDimitry Andric     for (uint32_t catListIsecOffset = 0;
12260fca6ea1SDimitry Andric          catListIsecOffset < catListIsec->data.size();
12270fca6ea1SDimitry Andric          catListIsecOffset += target->wordSize) {
12280fca6ea1SDimitry Andric       // This slot was erased, we can just skip it
12290fca6ea1SDimitry Andric       if (mapEntry.second.count(catListIsecOffset))
12300fca6ea1SDimitry Andric         continue;
12310fca6ea1SDimitry Andric 
12320fca6ea1SDimitry Andric       Defined *nonErasedCatBody =
12330fca6ea1SDimitry Andric           tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
12340fca6ea1SDimitry Andric       assert(nonErasedCatBody && "Failed to relocate non-deleted category");
12350fca6ea1SDimitry Andric 
12360fca6ea1SDimitry Andric       // Allocate data for the new __objc_catlist slot
12370fca6ea1SDimitry Andric       llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
12380fca6ea1SDimitry Andric 
12390fca6ea1SDimitry Andric       // We mark the __objc_catlist slot as belonging to the same file as the
12400fca6ea1SDimitry Andric       // category
12410fca6ea1SDimitry Andric       ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
12420fca6ea1SDimitry Andric 
12430fca6ea1SDimitry Andric       ConcatInputSection *listSec = make<ConcatInputSection>(
12440fca6ea1SDimitry Andric           *infoCategoryWriter.catListInfo.inputSection, bodyData,
12450fca6ea1SDimitry Andric           infoCategoryWriter.catListInfo.align);
12460fca6ea1SDimitry Andric       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
12470fca6ea1SDimitry Andric       listSec->live = true;
12480fca6ea1SDimitry Andric 
12490fca6ea1SDimitry Andric       std::string slotSymName = "<__objc_catlist slot for category ";
12500fca6ea1SDimitry Andric       slotSymName += nonErasedCatBody->getName();
12510fca6ea1SDimitry Andric       slotSymName += ">";
12520fca6ea1SDimitry Andric 
12530fca6ea1SDimitry Andric       Defined *catListSlotSym = make<Defined>(
12540fca6ea1SDimitry Andric           newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
12550fca6ea1SDimitry Andric           /*value=*/0, bodyData.size(),
12560fca6ea1SDimitry Andric           /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
12570fca6ea1SDimitry Andric           /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
12580fca6ea1SDimitry Andric           /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
12590fca6ea1SDimitry Andric 
12600fca6ea1SDimitry Andric       catListSlotSym->used = true;
12610fca6ea1SDimitry Andric       objFile->symbols.push_back(catListSlotSym);
1262*6c4b055cSDimitry Andric       addInputSection(listSec);
12630fca6ea1SDimitry Andric 
12640fca6ea1SDimitry Andric       // Now link the category body into the newly created slot
12650fca6ea1SDimitry Andric       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
12660fca6ea1SDimitry Andric                             infoCategoryWriter.catListInfo.relocTemplate);
12670fca6ea1SDimitry Andric     }
12680fca6ea1SDimitry Andric   }
12690fca6ea1SDimitry Andric }
12700fca6ea1SDimitry Andric 
eraseISec(ConcatInputSection * isec)12710fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
12720fca6ea1SDimitry Andric   isec->live = false;
12730fca6ea1SDimitry Andric   for (auto &sym : isec->symbols)
12740fca6ea1SDimitry Andric     sym->used = false;
12750fca6ea1SDimitry Andric }
12760fca6ea1SDimitry Andric 
12770fca6ea1SDimitry Andric // This fully erases the merged categories, including their body, their names,
12780fca6ea1SDimitry Andric // their method/protocol/prop lists and the __objc_catlist entries that link to
12790fca6ea1SDimitry Andric // them.
eraseMergedCategories()12800fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseMergedCategories() {
12810fca6ea1SDimitry Andric   // Map of InputSection to a set of offsets of the categories that were merged
12820fca6ea1SDimitry Andric   MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
12830fca6ea1SDimitry Andric 
12840fca6ea1SDimitry Andric   for (auto &mapEntry : categoryMap) {
12850fca6ea1SDimitry Andric     for (InfoInputCategory &catInfo : mapEntry.second) {
12860fca6ea1SDimitry Andric       if (catInfo.wasMerged) {
12870fca6ea1SDimitry Andric         eraseISec(catInfo.catListIsec);
12880fca6ea1SDimitry Andric         catListToErasedOffsets[catInfo.catListIsec].insert(
12890fca6ea1SDimitry Andric             catInfo.offCatListIsec);
12900fca6ea1SDimitry Andric       }
12910fca6ea1SDimitry Andric     }
12920fca6ea1SDimitry Andric   }
12930fca6ea1SDimitry Andric 
12940fca6ea1SDimitry Andric   // If there were categories that we did not erase, we need to generate a new
12950fca6ea1SDimitry Andric   // __objc_catList that contains only the un-merged categories, and get rid of
12960fca6ea1SDimitry Andric   // the references to the ones we merged.
12970fca6ea1SDimitry Andric   generateCatListForNonErasedCategories(catListToErasedOffsets);
12980fca6ea1SDimitry Andric 
12990fca6ea1SDimitry Andric   // Erase the old method lists & names of the categories that were merged
13000fca6ea1SDimitry Andric   for (auto &mapEntry : categoryMap) {
13010fca6ea1SDimitry Andric     for (InfoInputCategory &catInfo : mapEntry.second) {
13020fca6ea1SDimitry Andric       if (!catInfo.wasMerged)
13030fca6ea1SDimitry Andric         continue;
13040fca6ea1SDimitry Andric 
13050fca6ea1SDimitry Andric       eraseISec(catInfo.catBodyIsec);
13060fca6ea1SDimitry Andric 
13070fca6ea1SDimitry Andric       // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
13080fca6ea1SDimitry Andric       //   categories because the name will sometimes also be used for other
13090fca6ea1SDimitry Andric       //   purposes.
13100fca6ea1SDimitry Andric       // For Swift, see usages of 'l_.str.11.SimpleClass' in
13110fca6ea1SDimitry Andric       //   objc-category-merging-swift.s
13120fca6ea1SDimitry Andric       // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
13130fca6ea1SDimitry Andric       //   objc-category-merging-erase-objc-name-test.s
13140fca6ea1SDimitry Andric       // TODO: handle the above in a smarter way
13150fca6ea1SDimitry Andric 
13160fca6ea1SDimitry Andric       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
13170fca6ea1SDimitry Andric                                   catLayout.instanceMethodsOffset);
13180fca6ea1SDimitry Andric       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
13190fca6ea1SDimitry Andric                                   catLayout.classMethodsOffset);
13200fca6ea1SDimitry Andric       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
13210fca6ea1SDimitry Andric                                   catLayout.protocolsOffset);
13220fca6ea1SDimitry Andric       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
13230fca6ea1SDimitry Andric                                   catLayout.classPropsOffset);
13240fca6ea1SDimitry Andric       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
13250fca6ea1SDimitry Andric                                   catLayout.instancePropsOffset);
13260fca6ea1SDimitry Andric     }
13270fca6ea1SDimitry Andric   }
13280fca6ea1SDimitry Andric }
13290fca6ea1SDimitry Andric 
doMerge()13300fca6ea1SDimitry Andric void ObjcCategoryMerger::doMerge() {
13310fca6ea1SDimitry Andric   collectAndValidateCategoriesData();
13320fca6ea1SDimitry Andric 
13330fca6ea1SDimitry Andric   for (auto &[baseClass, catInfos] : categoryMap) {
13340fca6ea1SDimitry Andric     if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
13350fca6ea1SDimitry Andric       // Merge all categories into the base class
13360fca6ea1SDimitry Andric       mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
13370fca6ea1SDimitry Andric     } else if (catInfos.size() > 1) {
13380fca6ea1SDimitry Andric       // Merge all categories into a new, single category
13390fca6ea1SDimitry Andric       mergeCategoriesIntoSingleCategory(catInfos);
13400fca6ea1SDimitry Andric     }
13410fca6ea1SDimitry Andric   }
13420fca6ea1SDimitry Andric 
13430fca6ea1SDimitry Andric   // Erase all categories that were merged
13440fca6ea1SDimitry Andric   eraseMergedCategories();
13450fca6ea1SDimitry Andric }
13460fca6ea1SDimitry Andric 
doCleanup()13470fca6ea1SDimitry Andric void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
13480fca6ea1SDimitry Andric 
newStringData(const char * str)13490fca6ea1SDimitry Andric StringRef ObjcCategoryMerger::newStringData(const char *str) {
13500fca6ea1SDimitry Andric   uint32_t len = strlen(str);
13510fca6ea1SDimitry Andric   uint32_t bufSize = len + 1;
13520fca6ea1SDimitry Andric   SmallVector<uint8_t> &data = newSectionData(bufSize);
13530fca6ea1SDimitry Andric   char *strData = reinterpret_cast<char *>(data.data());
13540fca6ea1SDimitry Andric   // Copy the string chars and null-terminator
13550fca6ea1SDimitry Andric   memcpy(strData, str, bufSize);
13560fca6ea1SDimitry Andric   return StringRef(strData, len);
13570fca6ea1SDimitry Andric }
13580fca6ea1SDimitry Andric 
newSectionData(uint32_t size)13590fca6ea1SDimitry Andric SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
13600fca6ea1SDimitry Andric   generatedSectionData.push_back(
13610fca6ea1SDimitry Andric       std::make_unique<SmallVector<uint8_t>>(size, 0));
13620fca6ea1SDimitry Andric   return *generatedSectionData.back();
13630fca6ea1SDimitry Andric }
13640fca6ea1SDimitry Andric 
13650fca6ea1SDimitry Andric } // namespace
13660fca6ea1SDimitry Andric 
mergeCategories()13670fca6ea1SDimitry Andric void objc::mergeCategories() {
13680fca6ea1SDimitry Andric   TimeTraceScope timeScope("ObjcCategoryMerger");
13690fca6ea1SDimitry Andric 
13700fca6ea1SDimitry Andric   ObjcCategoryMerger merger(inputSections);
13710fca6ea1SDimitry Andric   merger.doMerge();
13720fca6ea1SDimitry Andric }
13730fca6ea1SDimitry Andric 
doCleanup()13740fca6ea1SDimitry Andric void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
13750fca6ea1SDimitry Andric 
13760fca6ea1SDimitry Andric ObjcCategoryMerger::SourceLanguage
getClassSymSourceLang(const Defined * classSym)13770fca6ea1SDimitry Andric ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
13780fca6ea1SDimitry Andric   if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
13790fca6ea1SDimitry Andric     return SourceLanguage::Swift;
13800fca6ea1SDimitry Andric 
13810fca6ea1SDimitry Andric   // If the symbol name matches the ObjC prefix, we don't necessarely know this
13820fca6ea1SDimitry Andric   // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
13830fca6ea1SDimitry Andric   // classes. Ex:
13840fca6ea1SDimitry Andric   //  .globl	_OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
13850fca6ea1SDimitry Andric   //  .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
13860fca6ea1SDimitry Andric   //  .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
13870fca6ea1SDimitry Andric   //
13880fca6ea1SDimitry Andric   // So we scan for symbols with the same address and check for the Swift class
13890fca6ea1SDimitry Andric   if (classSym->getName().starts_with(objc::symbol_names::klass)) {
13900fca6ea1SDimitry Andric     for (auto &sym : classSym->originalIsec->symbols)
13910fca6ea1SDimitry Andric       if (sym->value == classSym->value)
13920fca6ea1SDimitry Andric         if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
13930fca6ea1SDimitry Andric           return SourceLanguage::Swift;
13940fca6ea1SDimitry Andric     return SourceLanguage::ObjC;
13950fca6ea1SDimitry Andric   }
13960fca6ea1SDimitry Andric 
13970fca6ea1SDimitry Andric   llvm_unreachable("Unexpected class symbol name during category merging");
13980fca6ea1SDimitry Andric }
mergeCategoriesIntoBaseClass(const Defined * baseClass,std::vector<InfoInputCategory> & categories)13990fca6ea1SDimitry Andric void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
14000fca6ea1SDimitry Andric     const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
14010fca6ea1SDimitry Andric   assert(categories.size() >= 1 && "Expected at least one category to merge");
14020fca6ea1SDimitry Andric 
14030fca6ea1SDimitry Andric   // Collect all the info from the categories
14040fca6ea1SDimitry Andric   ClassExtensionInfo extInfo(catLayout);
14050fca6ea1SDimitry Andric   extInfo.baseClass = baseClass;
14060fca6ea1SDimitry Andric   extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
14070fca6ea1SDimitry Andric 
14080fca6ea1SDimitry Andric   for (auto &catInfo : categories) {
14090fca6ea1SDimitry Andric     parseCatInfoToExtInfo(catInfo, extInfo);
14100fca6ea1SDimitry Andric   }
14110fca6ea1SDimitry Andric 
14120fca6ea1SDimitry Andric   // Get metadata for the base class
14130fca6ea1SDimitry Andric   Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
14140fca6ea1SDimitry Andric   ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
14150fca6ea1SDimitry Andric   Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
14160fca6ea1SDimitry Andric   ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
14170fca6ea1SDimitry Andric 
14180fca6ea1SDimitry Andric   // Now collect the info from the base class from the various lists in the
14190fca6ea1SDimitry Andric   // class metadata
14200fca6ea1SDimitry Andric 
14210fca6ea1SDimitry Andric   // Protocol lists are a special case - the same protocol list is in classRo
14220fca6ea1SDimitry Andric   // and metaRo, so we only need to parse it once
14230fca6ea1SDimitry Andric   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
14240fca6ea1SDimitry Andric                         extInfo.protocols, extInfo.baseClassSourceLanguage);
14250fca6ea1SDimitry Andric 
14260fca6ea1SDimitry Andric   // Check that the classRo and metaRo protocol lists are identical
14270fca6ea1SDimitry Andric   assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
14280fca6ea1SDimitry Andric                                extInfo.baseClassSourceLanguage) ==
14290fca6ea1SDimitry Andric              parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
14300fca6ea1SDimitry Andric                                    extInfo.baseClassSourceLanguage) &&
14310fca6ea1SDimitry Andric          "Category merger expects classRo and metaRo to have the same protocol "
14320fca6ea1SDimitry Andric          "list");
14330fca6ea1SDimitry Andric 
14340fca6ea1SDimitry Andric   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
14350fca6ea1SDimitry Andric                        extInfo.classMethods);
14360fca6ea1SDimitry Andric   parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
14370fca6ea1SDimitry Andric                        extInfo.instanceMethods);
14380fca6ea1SDimitry Andric 
14390fca6ea1SDimitry Andric   parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
14400fca6ea1SDimitry Andric                        extInfo.classProps);
14410fca6ea1SDimitry Andric   parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
14420fca6ea1SDimitry Andric                        extInfo.instanceProps);
14430fca6ea1SDimitry Andric 
14440fca6ea1SDimitry Andric   // Erase the old lists - these will be generated and replaced
14450fca6ea1SDimitry Andric   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
14460fca6ea1SDimitry Andric   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
14470fca6ea1SDimitry Andric   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
14480fca6ea1SDimitry Andric   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
14490fca6ea1SDimitry Andric   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
14500fca6ea1SDimitry Andric   eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
14510fca6ea1SDimitry Andric 
14520fca6ea1SDimitry Andric   // Emit the newly merged lists - first into the meta RO then into the class RO
14530fca6ea1SDimitry Andric   // First we emit and link the protocol list into the meta RO. Then we link it
14540fca6ea1SDimitry Andric   // in the classRo as well (they're supposed to be identical)
14550fca6ea1SDimitry Andric   if (Defined *protoListSym =
14560fca6ea1SDimitry Andric           emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
14570fca6ea1SDimitry Andric                                   extInfo, extInfo.protocols)) {
14580fca6ea1SDimitry Andric     createSymbolReference(classRo, protoListSym,
14590fca6ea1SDimitry Andric                           roClassLayout.baseProtocolsOffset,
14600fca6ea1SDimitry Andric                           infoCategoryWriter.catBodyInfo.relocTemplate);
14610fca6ea1SDimitry Andric   }
14620fca6ea1SDimitry Andric 
14630fca6ea1SDimitry Andric   emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
14640fca6ea1SDimitry Andric                          extInfo.classMethods);
14650fca6ea1SDimitry Andric   emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
14660fca6ea1SDimitry Andric                          extInfo.instanceMethods);
14670fca6ea1SDimitry Andric 
14680fca6ea1SDimitry Andric   emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
14690fca6ea1SDimitry Andric                          extInfo.classProps);
14700fca6ea1SDimitry Andric 
14710fca6ea1SDimitry Andric   emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
14720fca6ea1SDimitry Andric                          extInfo.instanceProps);
14730fca6ea1SDimitry Andric 
14740fca6ea1SDimitry Andric   // Mark all the categories as merged - this will be used to erase them later
14750fca6ea1SDimitry Andric   for (auto &catInfo : categories)
14760fca6ea1SDimitry Andric     catInfo.wasMerged = true;
14770fca6ea1SDimitry Andric }
14780fca6ea1SDimitry Andric 
14790fca6ea1SDimitry Andric // Erase the symbol at a given offset in an InputSection
eraseSymbolAtIsecOffset(ConcatInputSection * isec,uint32_t offset)14800fca6ea1SDimitry Andric void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
14810fca6ea1SDimitry Andric                                                  uint32_t offset) {
14820fca6ea1SDimitry Andric   Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
14830fca6ea1SDimitry Andric   if (!sym)
14840fca6ea1SDimitry Andric     return;
14850fca6ea1SDimitry Andric 
14860fca6ea1SDimitry Andric   // Remove the symbol from isec->symbols
14870fca6ea1SDimitry Andric   assert(isa<Defined>(sym) && "Can only erase a Defined");
14880fca6ea1SDimitry Andric   llvm::erase(isec->symbols, sym);
14890fca6ea1SDimitry Andric 
14900fca6ea1SDimitry Andric   // Remove the relocs that refer to this symbol
14910fca6ea1SDimitry Andric   auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
14920fca6ea1SDimitry Andric   llvm::erase_if(isec->relocs, removeAtOff);
14930fca6ea1SDimitry Andric 
14940fca6ea1SDimitry Andric   // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
14950fca6ea1SDimitry Andric   // the whole ConcatInputSection
14960fca6ea1SDimitry Andric   if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
14970fca6ea1SDimitry Andric     if (cisec->data.size() == sym->size)
14980fca6ea1SDimitry Andric       eraseISec(cisec);
14990fca6ea1SDimitry Andric }
1500