xref: /freebsd/contrib/llvm-project/lld/MachO/ObjC.cpp (revision 6c4b055cfb6bf549e9145dde6454cc6b178c35e4)
1 //===- ObjC.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjC.h"
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "Layout.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
16 #include "Target.h"
17 
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/Support/TimeProfiler.h"
23 
24 using namespace llvm;
25 using namespace llvm::MachO;
26 using namespace lld;
27 using namespace lld::macho;
28 
objectHasObjCSection(MemoryBufferRef mb)29 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30   using SectionHeader = typename LP::section;
31 
32   auto *hdr =
33       reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34   if (hdr->magic != LP::magic)
35     return false;
36 
37   if (const auto *c =
38           findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39     auto sectionHeaders = ArrayRef<SectionHeader>{
40         reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41     for (const SectionHeader &secHead : sectionHeaders) {
42       StringRef sectname(secHead.sectname,
43                          strnlen(secHead.sectname, sizeof(secHead.sectname)));
44       StringRef segname(secHead.segname,
45                         strnlen(secHead.segname, sizeof(secHead.segname)));
46       if ((segname == segment_names::data &&
47            sectname == section_names::objcCatList) ||
48           (segname == segment_names::text &&
49            sectname.starts_with(section_names::swift))) {
50         return true;
51       }
52     }
53   }
54   return false;
55 }
56 
objectHasObjCSection(MemoryBufferRef mb)57 static bool objectHasObjCSection(MemoryBufferRef mb) {
58   if (target->wordSize == 8)
59     return ::objectHasObjCSection<LP64>(mb);
60   else
61     return ::objectHasObjCSection<ILP32>(mb);
62 }
63 
hasObjCSection(MemoryBufferRef mb)64 bool macho::hasObjCSection(MemoryBufferRef mb) {
65   switch (identify_magic(mb.getBuffer())) {
66   case file_magic::macho_object:
67     return objectHasObjCSection(mb);
68   case file_magic::bitcode:
69     return check(isBitcodeContainingObjCCategory(mb));
70   default:
71     return false;
72   }
73 }
74 
75 namespace {
76 
77 #define FOR_EACH_CATEGORY_FIELD(DO)                                            \
78   DO(Ptr, name)                                                                \
79   DO(Ptr, klass)                                                               \
80   DO(Ptr, instanceMethods)                                                     \
81   DO(Ptr, classMethods)                                                        \
82   DO(Ptr, protocols)                                                           \
83   DO(Ptr, instanceProps)                                                       \
84   DO(Ptr, classProps)                                                          \
85   DO(uint32_t, size)
86 
87 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
88 
89 #undef FOR_EACH_CATEGORY_FIELD
90 
91 #define FOR_EACH_CLASS_FIELD(DO)                                               \
92   DO(Ptr, metaClass)                                                           \
93   DO(Ptr, superClass)                                                          \
94   DO(Ptr, methodCache)                                                         \
95   DO(Ptr, vtable)                                                              \
96   DO(Ptr, roData)
97 
98 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
99 
100 #undef FOR_EACH_CLASS_FIELD
101 
102 #define FOR_EACH_RO_CLASS_FIELD(DO)                                            \
103   DO(uint32_t, flags)                                                          \
104   DO(uint32_t, instanceStart)                                                  \
105   DO(Ptr, instanceSize)                                                        \
106   DO(Ptr, ivarLayout)                                                          \
107   DO(Ptr, name)                                                                \
108   DO(Ptr, baseMethods)                                                         \
109   DO(Ptr, baseProtocols)                                                       \
110   DO(Ptr, ivars)                                                               \
111   DO(Ptr, weakIvarLayout)                                                      \
112   DO(Ptr, baseProperties)
113 
114 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
115 
116 #undef FOR_EACH_RO_CLASS_FIELD
117 
118 #define FOR_EACH_LIST_HEADER(DO)                                               \
119   DO(uint32_t, structSize)                                                     \
120   DO(uint32_t, structCount)
121 
122 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
123 
124 #undef FOR_EACH_LIST_HEADER
125 
126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
127 
128 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
129 
130 #undef FOR_EACH_PROTOCOL_LIST_HEADER
131 
132 #define FOR_EACH_METHOD(DO)                                                    \
133   DO(Ptr, name)                                                                \
134   DO(Ptr, type)                                                                \
135   DO(Ptr, impl)
136 
137 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
138 
139 #undef FOR_EACH_METHOD
140 
141 enum MethodContainerKind {
142   MCK_Class,
143   MCK_Category,
144 };
145 
146 struct MethodContainer {
147   MethodContainerKind kind;
148   const ConcatInputSection *isec;
149 };
150 
151 enum MethodKind {
152   MK_Instance,
153   MK_Static,
154 };
155 
156 struct ObjcClass {
157   DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158   DenseMap<CachedHashStringRef, MethodContainer> classMethods;
159 };
160 
161 } // namespace
162 
163 class ObjcCategoryChecker {
164 public:
165   ObjcCategoryChecker();
166   void parseCategory(const ConcatInputSection *catListIsec);
167 
168 private:
169   void parseClass(const Defined *classSym);
170   void parseMethods(const ConcatInputSection *methodsIsec,
171                     const Symbol *methodContainer,
172                     const ConcatInputSection *containerIsec,
173                     MethodContainerKind, MethodKind);
174 
175   CategoryLayout catLayout;
176   ClassLayout classLayout;
177   ROClassLayout roClassLayout;
178   ListHeaderLayout listHeaderLayout;
179   MethodLayout methodLayout;
180 
181   DenseMap<const Symbol *, ObjcClass> classMap;
182 };
183 
ObjcCategoryChecker()184 ObjcCategoryChecker::ObjcCategoryChecker()
185     : catLayout(target->wordSize), classLayout(target->wordSize),
186       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187       methodLayout(target->wordSize) {}
188 
189 // \p r must point to an offset within a CStringInputSection or a
190 // ConcatInputSection
getReferentString(const Reloc & r)191 static StringRef getReferentString(const Reloc &r) {
192   if (auto *isec = r.referent.dyn_cast<InputSection *>())
193     return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
194 
195   auto *sym = cast<Defined>(r.referent.get<Symbol *>());
196   auto *symIsec = sym->isec();
197   auto symOffset = sym->value + r.addend;
198 
199   if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
200     return s->getStringRefAtOffset(symOffset);
201 
202   if (isa<ConcatInputSection>(symIsec)) {
203     auto strData = symIsec->data.slice(symOffset);
204     const char *pszData = reinterpret_cast<const char *>(strData.data());
205     return StringRef(pszData, strnlen(pszData, strData.size()));
206   }
207 
208   llvm_unreachable("unknown reference section in getReferentString");
209 }
210 
parseMethods(const ConcatInputSection * methodsIsec,const Symbol * methodContainerSym,const ConcatInputSection * containerIsec,MethodContainerKind mcKind,MethodKind mKind)211 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
212                                        const Symbol *methodContainerSym,
213                                        const ConcatInputSection *containerIsec,
214                                        MethodContainerKind mcKind,
215                                        MethodKind mKind) {
216   ObjcClass &klass = classMap[methodContainerSym];
217   for (const Reloc &r : methodsIsec->relocs) {
218     if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
219         methodLayout.nameOffset)
220       continue;
221 
222     CachedHashStringRef methodName(getReferentString(r));
223     // +load methods are special: all implementations are called by the runtime
224     // even if they are part of the same class. Thus there is no need to check
225     // for duplicates.
226     // NOTE: Instead of specifically checking for this method name, ld64 simply
227     // checks whether a class / category is present in __objc_nlclslist /
228     // __objc_nlcatlist respectively. This will be the case if the class /
229     // category has a +load method. It skips optimizing the categories if there
230     // are multiple +load methods. Since it does dupe checking as part of the
231     // optimization process, this avoids spurious dupe messages around +load,
232     // but it also means that legit dupe issues for other methods are ignored.
233     if (mKind == MK_Static && methodName.val() == "load")
234       continue;
235 
236     auto &methodMap =
237         mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
238     if (methodMap
239             .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
240             .second)
241       continue;
242 
243     // We have a duplicate; generate a warning message.
244     const auto &mc = methodMap.lookup(methodName);
245     const Reloc *nameReloc = nullptr;
246     if (mc.kind == MCK_Category) {
247       nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
248     } else {
249       assert(mc.kind == MCK_Class);
250       const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
251                          ->getReferentInputSection();
252       nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
253     }
254     StringRef containerName = getReferentString(*nameReloc);
255     StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
256 
257     // We should only ever encounter collisions when parsing category methods
258     // (since the Class struct is parsed before any of its categories).
259     assert(mcKind == MCK_Category);
260     StringRef newCatName =
261         getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset));
262 
263     auto formatObjAndSrcFileName = [](const InputSection *section) {
264       lld::macho::InputFile *inputFile = section->getFile();
265       std::string result = toString(inputFile);
266 
267       auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
268       if (objFile && objFile->compileUnit)
269         result += " (" + objFile->sourceFile() + ")";
270 
271       return result;
272     };
273 
274     StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
275     warn("method '" + methPrefix + methodName.val() +
276          "' has conflicting definitions:\n>>> defined in category " +
277          newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
278          "\n>>> defined in " + containerType + " " + containerName + " from " +
279          formatObjAndSrcFileName(mc.isec));
280   }
281 }
282 
parseCategory(const ConcatInputSection * catIsec)283 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
284   auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
285   if (!classReloc)
286     return;
287 
288   auto *classSym = classReloc->referent.get<Symbol *>();
289   if (auto *d = dyn_cast<Defined>(classSym))
290     if (!classMap.count(d))
291       parseClass(d);
292 
293   if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
294     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
295                  classSym, catIsec, MCK_Category, MK_Static);
296   }
297 
298   if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
299     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
300                  classSym, catIsec, MCK_Category, MK_Instance);
301   }
302 }
303 
parseClass(const Defined * classSym)304 void ObjcCategoryChecker::parseClass(const Defined *classSym) {
305   // Given a Class struct, get its corresponding Methods struct
306   auto getMethodsIsec =
307       [&](const InputSection *classIsec) -> ConcatInputSection * {
308     if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
309       if (const auto *roIsec =
310               cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
311         if (const auto *r =
312                 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
313           if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
314                   r->getReferentInputSection()))
315             return methodsIsec;
316         }
317       }
318     }
319     return nullptr;
320   };
321 
322   const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
323 
324   // Parse instance methods.
325   if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
326     parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
327                  MK_Instance);
328 
329   // Class methods are contained in the metaclass.
330   if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
331     if (const auto *classMethodsIsec = getMethodsIsec(
332             cast<ConcatInputSection>(r->getReferentInputSection())))
333       parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
334 }
335 
checkCategories()336 void objc::checkCategories() {
337   TimeTraceScope timeScope("ObjcCategoryChecker");
338 
339   ObjcCategoryChecker checker;
340   for (const InputSection *isec : inputSections) {
341     if (isec->getName() == section_names::objcCatList)
342       for (const Reloc &r : isec->relocs) {
343         auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
344         checker.parseCategory(catIsec);
345       }
346   }
347 }
348 
349 namespace {
350 
351 class ObjcCategoryMerger {
352   // In which language was a particular construct originally defined
353   enum SourceLanguage { Unknown, ObjC, Swift };
354 
355   // Information about an input category
356   struct InfoInputCategory {
357     ConcatInputSection *catListIsec;
358     ConcatInputSection *catBodyIsec;
359     uint32_t offCatListIsec = 0;
360     SourceLanguage sourceLanguage = SourceLanguage::Unknown;
361 
362     bool wasMerged = false;
363   };
364 
365   // To write new (merged) categories or classes, we will try make limited
366   // assumptions about the alignment and the sections the various class/category
367   // info are stored in and . So we'll just reuse the same sections and
368   // alignment as already used in existing (input) categories. To do this we
369   // have InfoCategoryWriter which contains the various sections that the
370   // generated categories will be written to.
371   struct InfoWriteSection {
372     bool valid = false; // Data has been successfully collected from input
373     uint32_t align = 0;
374     Section *inputSection;
375     Reloc relocTemplate;
376     OutputSection *outputSection;
377   };
378 
379   struct InfoCategoryWriter {
380     InfoWriteSection catListInfo;
381     InfoWriteSection catBodyInfo;
382     InfoWriteSection catNameInfo;
383     InfoWriteSection catPtrListInfo;
384   };
385 
386   // Information about a pointer list in the original categories or class(method
387   // lists, protocol lists, etc)
388   struct PointerListInfo {
389     PointerListInfo() = default;
390     PointerListInfo(const PointerListInfo &) = default;
PointerListInfo__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo391     PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
392         : categoryPrefix(_categoryPrefix),
393           pointersPerStruct(_pointersPerStruct) {}
394 
operator ==__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo395     inline bool operator==(const PointerListInfo &cmp) const {
396       return pointersPerStruct == cmp.pointersPerStruct &&
397              structSize == cmp.structSize && structCount == cmp.structCount &&
398              allPtrs == cmp.allPtrs;
399     }
400 
401     const char *categoryPrefix;
402 
403     uint32_t pointersPerStruct = 0;
404 
405     uint32_t structSize = 0;
406     uint32_t structCount = 0;
407 
408     std::vector<Symbol *> allPtrs;
409   };
410 
411   // Full information describing an ObjC class . This will include all the
412   // additional methods, protocols, and properties that are contained in the
413   // class and all the categories that extend a particular class.
414   struct ClassExtensionInfo {
ClassExtensionInfo__anonff1b1f600411::ObjcCategoryMerger::ClassExtensionInfo415     ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
416 
417     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
418     std::string mergedContainerName;
419     std::string baseClassName;
420     const Symbol *baseClass = nullptr;
421     SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
422 
423     CategoryLayout &catLayout;
424 
425     // In case we generate new data, mark the new data as belonging to this file
426     ObjFile *objFileForMergeData = nullptr;
427 
428     PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
429                                        /*pointersPerStruct=*/3};
430     PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
431                                     /*pointersPerStruct=*/3};
432     PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
433                                  /*pointersPerStruct=*/0};
434     PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
435                                      /*pointersPerStruct=*/2};
436     PointerListInfo classProps = {objc::symbol_names::klassPropList,
437                                   /*pointersPerStruct=*/2};
438   };
439 
440 public:
441   ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
442   void doMerge();
443   static void doCleanup();
444 
445 private:
446   DenseSet<const Symbol *> collectNlCategories();
447   void collectAndValidateCategoriesData();
448   void
449   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
450 
451   void eraseISec(ConcatInputSection *isec);
452   void eraseMergedCategories();
453 
454   void generateCatListForNonErasedCategories(
455       MapVector<ConcatInputSection *, std::set<uint64_t>>
456           catListToErasedOffsets);
457   void collectSectionWriteInfoFromIsec(const InputSection *isec,
458                                        InfoWriteSection &catWriteInfo);
459   void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
460   void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
461                              ClassExtensionInfo &extInfo);
462 
463   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
464                              PointerListInfo &ptrList,
465                              SourceLanguage sourceLang);
466 
467   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
468                                         uint32_t secOffset,
469                                         SourceLanguage sourceLang);
470 
471   void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
472                             PointerListInfo &ptrList);
473 
474   void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
475                               const ClassExtensionInfo &extInfo,
476                               const PointerListInfo &ptrList);
477 
478   Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
479                                    const ClassExtensionInfo &extInfo,
480                                    const PointerListInfo &ptrList);
481 
482   Defined *emitCategory(const ClassExtensionInfo &extInfo);
483   Defined *emitCatListEntrySec(const std::string &forCategoryName,
484                                const std::string &forBaseClassName,
485                                ObjFile *objFile);
486   Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
487                             const Symbol *baseClassSym,
488                             const std::string &baseClassName, ObjFile *objFile);
489   Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
490   void createSymbolReference(Defined *refFrom, const Symbol *refTo,
491                              uint32_t offset, const Reloc &relocTemplate);
492   Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
493   Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
494                                    uint32_t offset);
495   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
496                                      uint32_t offset);
497   Defined *getClassRo(const Defined *classSym, bool getMetaRo);
498   SourceLanguage getClassSymSourceLang(const Defined *classSym);
499   void mergeCategoriesIntoBaseClass(const Defined *baseClass,
500                                     std::vector<InfoInputCategory> &categories);
501   void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
502   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
503                                    uint32_t offset);
504 
505   // Allocate a null-terminated StringRef backed by generatedSectionData
506   StringRef newStringData(const char *str);
507   // Allocate section data, backed by generatedSectionData
508   SmallVector<uint8_t> &newSectionData(uint32_t size);
509 
510   CategoryLayout catLayout;
511   ClassLayout classLayout;
512   ROClassLayout roClassLayout;
513   ListHeaderLayout listHeaderLayout;
514   MethodLayout methodLayout;
515   ProtocolListHeaderLayout protocolListHeaderLayout;
516 
517   InfoCategoryWriter infoCategoryWriter;
518   std::vector<ConcatInputSection *> &allInputSections;
519   // Map of base class Symbol to list of InfoInputCategory's for it
520   MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
521 
522   // Normally, the binary data comes from the input files, but since we're
523   // generating binary data ourselves, we use the below array to store it in.
524   // Need this to be 'static' so the data survives past the ObjcCategoryMerger
525   // object, as the data will be read by the Writer when the final binary is
526   // generated.
527   static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
528       generatedSectionData;
529 };
530 
531 SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
532     ObjcCategoryMerger::generatedSectionData;
533 
ObjcCategoryMerger(std::vector<ConcatInputSection * > & _allInputSections)534 ObjcCategoryMerger::ObjcCategoryMerger(
535     std::vector<ConcatInputSection *> &_allInputSections)
536     : catLayout(target->wordSize), classLayout(target->wordSize),
537       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
538       methodLayout(target->wordSize),
539       protocolListHeaderLayout(target->wordSize),
540       allInputSections(_allInputSections) {}
541 
collectSectionWriteInfoFromIsec(const InputSection * isec,InfoWriteSection & catWriteInfo)542 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
543     const InputSection *isec, InfoWriteSection &catWriteInfo) {
544 
545   catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
546   catWriteInfo.align = isec->align;
547   catWriteInfo.outputSection = isec->parent;
548 
549   assert(catWriteInfo.outputSection &&
550          "outputSection may not be null in collectSectionWriteInfoFromIsec.");
551 
552   if (isec->relocs.size())
553     catWriteInfo.relocTemplate = isec->relocs[0];
554 
555   catWriteInfo.valid = true;
556 }
557 
558 Symbol *
tryGetSymbolAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)559 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
560                                              uint32_t offset) {
561   if (!isec)
562     return nullptr;
563   const Reloc *reloc = isec->getRelocAt(offset);
564 
565   if (!reloc)
566     return nullptr;
567 
568   Symbol *sym = reloc->referent.get<Symbol *>();
569 
570   if (reloc->addend) {
571     assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
572     Defined *definedSym = cast<Defined>(sym);
573     sym = tryFindDefinedOnIsec(definedSym->isec(),
574                                definedSym->value + reloc->addend);
575   }
576 
577   return sym;
578 }
579 
tryFindDefinedOnIsec(const InputSection * isec,uint32_t offset)580 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
581                                                   uint32_t offset) {
582   for (Defined *sym : isec->symbols)
583     if ((sym->value <= offset) && (sym->value + sym->size > offset))
584       return sym;
585 
586   return nullptr;
587 }
588 
589 Defined *
tryGetDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)590 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
591                                               uint32_t offset) {
592   Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
593   return dyn_cast_or_null<Defined>(sym);
594 }
595 
596 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
597 // the meta-class's ro_data symbol. Otherwise, we will return the class
598 // (instance) ro_data symbol.
getClassRo(const Defined * classSym,bool getMetaRo)599 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
600                                         bool getMetaRo) {
601   ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
602   if (!isec)
603     return nullptr;
604 
605   if (!getMetaRo)
606     return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
607                                                classSym->value);
608 
609   Defined *metaClass = tryGetDefinedAtIsecOffset(
610       isec, classLayout.metaClassOffset + classSym->value);
611   if (!metaClass)
612     return nullptr;
613 
614   return tryGetDefinedAtIsecOffset(
615       dyn_cast<ConcatInputSection>(metaClass->isec()),
616       classLayout.roDataOffset);
617 }
618 
619 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
620 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
tryEraseDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)621 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
622     const ConcatInputSection *isec, uint32_t offset) {
623   const Reloc *reloc = isec->getRelocAt(offset);
624 
625   if (!reloc)
626     return;
627 
628   Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
629   if (!sym)
630     return;
631 
632   if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
633     eraseISec(cisec);
634   else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
635     uint32_t totalOffset = sym->value + reloc->addend;
636     StringPiece &piece = csisec->getStringPiece(totalOffset);
637     piece.live = false;
638   } else {
639     llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
640   }
641 }
642 
collectCategoryWriterInfoFromCategory(const InfoInputCategory & catInfo)643 void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
644     const InfoInputCategory &catInfo) {
645 
646   if (!infoCategoryWriter.catListInfo.valid)
647     collectSectionWriteInfoFromIsec(catInfo.catListIsec,
648                                     infoCategoryWriter.catListInfo);
649   if (!infoCategoryWriter.catBodyInfo.valid)
650     collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
651                                     infoCategoryWriter.catBodyInfo);
652 
653   if (!infoCategoryWriter.catNameInfo.valid) {
654     lld::macho::Defined *catNameSym =
655         tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
656     assert(catNameSym && "Category does not have a valid name Symbol");
657 
658     collectSectionWriteInfoFromIsec(catNameSym->isec(),
659                                     infoCategoryWriter.catNameInfo);
660   }
661 
662   // Collect writer info from all the category lists (we're assuming they all
663   // would provide the same info)
664   if (!infoCategoryWriter.catPtrListInfo.valid) {
665     for (uint32_t off = catLayout.instanceMethodsOffset;
666          off <= catLayout.classPropsOffset; off += target->wordSize) {
667       if (Defined *ptrList =
668               tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
669         collectSectionWriteInfoFromIsec(ptrList->isec(),
670                                         infoCategoryWriter.catPtrListInfo);
671         // we've successfully collected data, so we can break
672         break;
673       }
674     }
675   }
676 }
677 
678 // Parse a protocol list that might be linked to ConcatInputSection at a given
679 // offset. The format of the protocol list is different than other lists (prop
680 // lists, method lists) so we need to parse it differently
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList,SourceLanguage sourceLang)681 void ObjcCategoryMerger::parseProtocolListInfo(
682     const ConcatInputSection *isec, uint32_t secOffset,
683     PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
684   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
685          "Tried to read pointer list beyond protocol section end");
686 
687   const Reloc *reloc = isec->getRelocAt(secOffset);
688   if (!reloc)
689     return;
690 
691   auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
692   assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
693 
694   // Theoretically protocol count can be either 32b or 64b, depending on
695   // platform pointer size, but to simplify implementation we always just read
696   // the lower 32b which should be good enough.
697   uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
698       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
699 
700   ptrList.structCount += protocolCount;
701   ptrList.structSize = target->wordSize;
702 
703   [[maybe_unused]] uint32_t expectedListSize =
704       (protocolCount * target->wordSize) +
705       /*header(count)*/ protocolListHeaderLayout.totalSize +
706       /*extra null value*/ target->wordSize;
707 
708   // On Swift, the protocol list does not have the extra (unnecessary) null
709   [[maybe_unused]] uint32_t expectedListSizeSwift =
710       expectedListSize - target->wordSize;
711 
712   assert(((expectedListSize == ptrListSym->isec()->data.size() &&
713            sourceLang == SourceLanguage::ObjC) ||
714           (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
715            sourceLang == SourceLanguage::Swift)) &&
716          "Protocol list does not match expected size");
717 
718   uint32_t off = protocolListHeaderLayout.totalSize;
719   for (uint32_t inx = 0; inx < protocolCount; ++inx) {
720     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
721     assert(reloc && "No reloc found at protocol list offset");
722 
723     auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
724     assert(listSym && "Protocol list reloc does not have a valid Defined");
725 
726     ptrList.allPtrs.push_back(listSym);
727     off += target->wordSize;
728   }
729   assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
730          "expected null terminating protocol");
731   assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
732          "Protocol list end offset does not match expected size");
733 }
734 
735 // Parse a protocol list and return the PointerListInfo for it
736 ObjcCategoryMerger::PointerListInfo
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,SourceLanguage sourceLang)737 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
738                                           uint32_t secOffset,
739                                           SourceLanguage sourceLang) {
740   PointerListInfo ptrList;
741   parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
742   return ptrList;
743 }
744 
745 // Parse a pointer list that might be linked to ConcatInputSection at a given
746 // offset. This can be used for instance methods, class methods, instance props
747 // and class props since they have the same format.
parsePointerListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList)748 void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
749                                               uint32_t secOffset,
750                                               PointerListInfo &ptrList) {
751   assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
752   assert(isec && "Trying to parse pointer list from null isec");
753   assert(secOffset + target->wordSize <= isec->data.size() &&
754          "Trying to read pointer list beyond section end");
755 
756   const Reloc *reloc = isec->getRelocAt(secOffset);
757   if (!reloc)
758     return;
759 
760   auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
761   assert(ptrListSym && "Reloc does not have a valid Defined");
762 
763   uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
764       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
765   uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
766       ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
767   assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
768 
769   assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
770 
771   ptrList.structCount += thisStructCount;
772   ptrList.structSize = thisStructSize;
773 
774   uint32_t expectedListSize =
775       listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
776   assert(expectedListSize == ptrListSym->isec()->data.size() &&
777          "Pointer list does not match expected size");
778 
779   for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
780        off += target->wordSize) {
781     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
782     assert(reloc && "No reloc found at pointer list offset");
783 
784     auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
785     assert(listSym && "Reloc does not have a valid Defined");
786 
787     ptrList.allPtrs.push_back(listSym);
788   }
789 }
790 
791 // Here we parse all the information of an input category (catInfo) and
792 // append the parsed info into the structure which will contain all the
793 // information about how a class is extended (extInfo)
parseCatInfoToExtInfo(const InfoInputCategory & catInfo,ClassExtensionInfo & extInfo)794 void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
795                                                ClassExtensionInfo &extInfo) {
796   const Reloc *catNameReloc =
797       catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
798 
799   // Parse name
800   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
801 
802   // is this the first category we are parsing?
803   if (extInfo.mergedContainerName.empty())
804     extInfo.objFileForMergeData =
805         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
806   else
807     extInfo.mergedContainerName += "|";
808 
809   assert(extInfo.objFileForMergeData &&
810          "Expected to already have valid objextInfo.objFileForMergeData");
811 
812   StringRef catName = getReferentString(*catNameReloc);
813   extInfo.mergedContainerName += catName.str();
814 
815   // Parse base class
816   if (!extInfo.baseClass) {
817     Symbol *classSym =
818         tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
819     assert(extInfo.baseClassName.empty());
820     extInfo.baseClass = classSym;
821     llvm::StringRef classPrefix(objc::symbol_names::klass);
822     assert(classSym->getName().starts_with(classPrefix) &&
823            "Base class symbol does not start with expected prefix");
824     extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
825   } else {
826     assert((extInfo.baseClass ==
827             tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
828                                      catLayout.klassOffset)) &&
829            "Trying to parse category info into container with different base "
830            "class");
831   }
832 
833   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,
834                        extInfo.instanceMethods);
835 
836   parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
837                        extInfo.classMethods);
838 
839   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
840                         extInfo.protocols, catInfo.sourceLanguage);
841 
842   parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
843                        extInfo.instanceProps);
844 
845   parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
846                        extInfo.classProps);
847 }
848 
849 // Generate a protocol list (including header) and link it into the parent at
850 // the specified offset.
emitAndLinkProtocolList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)851 Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
852     Defined *parentSym, uint32_t linkAtOffset,
853     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
854   if (ptrList.allPtrs.empty())
855     return nullptr;
856 
857   assert(ptrList.allPtrs.size() == ptrList.structCount);
858 
859   uint32_t bodySize = (ptrList.structCount * target->wordSize) +
860                       /*header(count)*/ protocolListHeaderLayout.totalSize +
861                       /*extra null value*/ target->wordSize;
862   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
863 
864   // This theoretically can be either 32b or 64b, but writing just the first 32b
865   // is good enough
866   const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
867       bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
868 
869   *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
870 
871   ConcatInputSection *listSec = make<ConcatInputSection>(
872       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
873       infoCategoryWriter.catPtrListInfo.align);
874   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
875   listSec->live = true;
876 
877   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
878 
879   std::string symName = ptrList.categoryPrefix;
880   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
881 
882   Defined *ptrListSym = make<Defined>(
883       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
884       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
885       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
886       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
887       /*isWeakDefCanBeHidden=*/false);
888 
889   ptrListSym->used = true;
890   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
891   addInputSection(listSec);
892 
893   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
894                         infoCategoryWriter.catBodyInfo.relocTemplate);
895 
896   uint32_t offset = protocolListHeaderLayout.totalSize;
897   for (Symbol *symbol : ptrList.allPtrs) {
898     createSymbolReference(ptrListSym, symbol, offset,
899                           infoCategoryWriter.catPtrListInfo.relocTemplate);
900     offset += target->wordSize;
901   }
902 
903   return ptrListSym;
904 }
905 
906 // Generate a pointer list (including header) and link it into the parent at the
907 // specified offset. This is used for instance and class methods and
908 // proprieties.
emitAndLinkPointerList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)909 void ObjcCategoryMerger::emitAndLinkPointerList(
910     Defined *parentSym, uint32_t linkAtOffset,
911     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
912   if (ptrList.allPtrs.empty())
913     return;
914 
915   assert(ptrList.allPtrs.size() * target->wordSize ==
916          ptrList.structCount * ptrList.structSize);
917 
918   // Generate body
919   uint32_t bodySize =
920       listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
921   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
922 
923   const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
924       bodyData.data() + listHeaderLayout.structSizeOffset);
925   const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
926       bodyData.data() + listHeaderLayout.structCountOffset);
927 
928   *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
929   *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
930 
931   ConcatInputSection *listSec = make<ConcatInputSection>(
932       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
933       infoCategoryWriter.catPtrListInfo.align);
934   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
935   listSec->live = true;
936 
937   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
938 
939   std::string symName = ptrList.categoryPrefix;
940   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
941 
942   Defined *ptrListSym = make<Defined>(
943       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
944       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
945       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
946       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
947       /*isWeakDefCanBeHidden=*/false);
948 
949   ptrListSym->used = true;
950   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
951   addInputSection(listSec);
952 
953   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
954                         infoCategoryWriter.catBodyInfo.relocTemplate);
955 
956   uint32_t offset = listHeaderLayout.totalSize;
957   for (Symbol *symbol : ptrList.allPtrs) {
958     createSymbolReference(ptrListSym, symbol, offset,
959                           infoCategoryWriter.catPtrListInfo.relocTemplate);
960     offset += target->wordSize;
961   }
962 }
963 
964 // This method creates an __objc_catlist ConcatInputSection with a single slot
965 Defined *
emitCatListEntrySec(const std::string & forCategoryName,const std::string & forBaseClassName,ObjFile * objFile)966 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
967                                         const std::string &forBaseClassName,
968                                         ObjFile *objFile) {
969   uint32_t sectionSize = target->wordSize;
970   llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
971 
972   ConcatInputSection *newCatList =
973       make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
974                                bodyData, infoCategoryWriter.catListInfo.align);
975   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
976   newCatList->live = true;
977 
978   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
979 
980   std::string catSymName = "<__objc_catlist slot for merged category ";
981   catSymName += forBaseClassName + "(" + forCategoryName + ")>";
982 
983   Defined *catListSym = make<Defined>(
984       newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
985       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
986       /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
987       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
988       /*isWeakDefCanBeHidden=*/false);
989 
990   catListSym->used = true;
991   objFile->symbols.push_back(catListSym);
992   addInputSection(newCatList);
993   return catListSym;
994 }
995 
996 // Here we generate the main category body and link the name and base class into
997 // it. We don't link any other info yet like the protocol and class/instance
998 // methods/props.
emitCategoryBody(const std::string & name,const Defined * nameSym,const Symbol * baseClassSym,const std::string & baseClassName,ObjFile * objFile)999 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1000                                               const Defined *nameSym,
1001                                               const Symbol *baseClassSym,
1002                                               const std::string &baseClassName,
1003                                               ObjFile *objFile) {
1004   llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1005 
1006   uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1007                                    catLayout.sizeOffset);
1008   *ptrSize = catLayout.totalSize;
1009 
1010   ConcatInputSection *newBodySec =
1011       make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1012                                bodyData, infoCategoryWriter.catBodyInfo.align);
1013   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1014   newBodySec->live = true;
1015 
1016   std::string symName =
1017       objc::symbol_names::category + baseClassName + "(" + name + ")";
1018   Defined *catBodySym = make<Defined>(
1019       newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1020       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1021       /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1022       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1023       /*isWeakDefCanBeHidden=*/false);
1024 
1025   catBodySym->used = true;
1026   objFile->symbols.push_back(catBodySym);
1027   addInputSection(newBodySec);
1028 
1029   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1030                         infoCategoryWriter.catBodyInfo.relocTemplate);
1031 
1032   // Create a reloc to the base class (either external or internal)
1033   createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1034                         infoCategoryWriter.catBodyInfo.relocTemplate);
1035 
1036   return catBodySym;
1037 }
1038 
1039 // This writes the new category name (for the merged category) into the binary
1040 // and returns the sybmol for it.
emitCategoryName(const std::string & name,ObjFile * objFile)1041 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1042                                               ObjFile *objFile) {
1043   StringRef nameStrData = newStringData(name.c_str());
1044   // We use +1 below to include the null terminator
1045   llvm::ArrayRef<uint8_t> nameData(
1046       reinterpret_cast<const uint8_t *>(nameStrData.data()),
1047       nameStrData.size() + 1);
1048 
1049   auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1050   CStringInputSection *newStringSec = make<CStringInputSection>(
1051       *infoCategoryWriter.catNameInfo.inputSection, nameData,
1052       infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1053 
1054   parentSection->subsections.push_back({0, newStringSec});
1055 
1056   newStringSec->splitIntoPieces();
1057   newStringSec->pieces[0].live = true;
1058   newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1059   in.cStringSection->addInput(newStringSec);
1060   assert(newStringSec->pieces.size() == 1);
1061 
1062   Defined *catNameSym = make<Defined>(
1063       "<merged category name>", /*file=*/objFile, newStringSec,
1064       /*value=*/0, nameData.size(),
1065       /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1066       /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1067       /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1068 
1069   catNameSym->used = true;
1070   objFile->symbols.push_back(catNameSym);
1071   return catNameSym;
1072 }
1073 
1074 // This method fully creates a new category from the given ClassExtensionInfo.
1075 // It creates the category name, body and method/protocol/prop lists and links
1076 // them all together. Then it creates a new __objc_catlist entry and adds the
1077 // category to it. Calling this method will fully generate a category which will
1078 // be available in the final binary.
emitCategory(const ClassExtensionInfo & extInfo)1079 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1080   Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1081                                          extInfo.objFileForMergeData);
1082 
1083   Defined *catBodySym = emitCategoryBody(
1084       extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1085       extInfo.baseClassName, extInfo.objFileForMergeData);
1086 
1087   Defined *catListSym =
1088       emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1089                           extInfo.objFileForMergeData);
1090 
1091   // Add the single category body to the category list at the offset 0.
1092   createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1093                         infoCategoryWriter.catListInfo.relocTemplate);
1094 
1095   emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1096                          extInfo.instanceMethods);
1097 
1098   emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1099                          extInfo.classMethods);
1100 
1101   emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1102                           extInfo.protocols);
1103 
1104   emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1105                          extInfo.instanceProps);
1106 
1107   emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1108                          extInfo.classProps);
1109 
1110   return catBodySym;
1111 }
1112 
1113 // This method merges all the categories (sharing a base class) into a single
1114 // category.
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> & categories)1115 void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1116     std::vector<InfoInputCategory> &categories) {
1117   assert(categories.size() > 1 && "Expected at least 2 categories");
1118 
1119   ClassExtensionInfo extInfo(catLayout);
1120 
1121   for (auto &catInfo : categories)
1122     parseCatInfoToExtInfo(catInfo, extInfo);
1123 
1124   Defined *newCatDef = emitCategory(extInfo);
1125   assert(newCatDef && "Failed to create a new category");
1126 
1127   // Suppress unsuded var warning
1128   (void)newCatDef;
1129 
1130   for (auto &catInfo : categories)
1131     catInfo.wasMerged = true;
1132 }
1133 
createSymbolReference(Defined * refFrom,const Symbol * refTo,uint32_t offset,const Reloc & relocTemplate)1134 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1135                                                const Symbol *refTo,
1136                                                uint32_t offset,
1137                                                const Reloc &relocTemplate) {
1138   Reloc r = relocTemplate;
1139   r.offset = offset;
1140   r.addend = 0;
1141   r.referent = const_cast<Symbol *>(refTo);
1142   refFrom->isec()->relocs.push_back(r);
1143 }
1144 
1145 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1146 // optimize these as they have a '+load' method that has to be called at
1147 // runtime.
collectNlCategories()1148 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1149   DenseSet<const Symbol *> nlCategories;
1150 
1151   for (InputSection *sec : allInputSections) {
1152     if (sec->getName() != section_names::objcNonLazyCatList)
1153       continue;
1154 
1155     for (auto &r : sec->relocs) {
1156       const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1157       nlCategories.insert(sym);
1158     }
1159   }
1160   return nlCategories;
1161 }
1162 
collectAndValidateCategoriesData()1163 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1164   auto nlCategories = collectNlCategories();
1165 
1166   for (InputSection *sec : allInputSections) {
1167     if (sec->getName() != section_names::objcCatList)
1168       continue;
1169     ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1170     assert(catListCisec &&
1171            "__objc_catList InputSection is not a ConcatInputSection");
1172 
1173     for (uint32_t off = 0; off < catListCisec->getSize();
1174          off += target->wordSize) {
1175       Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1176       assert(categorySym &&
1177              "Failed to get a valid category at __objc_catlit offset");
1178 
1179       if (nlCategories.count(categorySym))
1180         continue;
1181 
1182       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1183       assert(catBodyIsec &&
1184              "Category data section is not an ConcatInputSection");
1185 
1186       SourceLanguage eLang = SourceLanguage::Unknown;
1187       if (categorySym->getName().starts_with(objc::symbol_names::category))
1188         eLang = SourceLanguage::ObjC;
1189       else if (categorySym->getName().starts_with(
1190                    objc::symbol_names::swift_objc_category))
1191         eLang = SourceLanguage::Swift;
1192       else
1193         llvm_unreachable("Unexpected category symbol name");
1194 
1195       InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1196 
1197       // Check that the category has a reloc at 'klassOffset' (which is
1198       // a pointer to the class symbol)
1199 
1200       Symbol *classSym =
1201           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1202       assert(classSym && "Category does not have a valid base class");
1203 
1204       categoryMap[classSym].push_back(catInputInfo);
1205 
1206       collectCategoryWriterInfoFromCategory(catInputInfo);
1207     }
1208   }
1209 }
1210 
1211 // In the input we have multiple __objc_catlist InputSection, each of which may
1212 // contain links to multiple categories. Of these categories, we will merge (and
1213 // erase) only some. There will be some categories that will remain untouched
1214 // (not erased). For these not erased categories, we generate new __objc_catlist
1215 // entries since the parent __objc_catlist entry will be erased
generateCatListForNonErasedCategories(const MapVector<ConcatInputSection *,std::set<uint64_t>> catListToErasedOffsets)1216 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1217     const MapVector<ConcatInputSection *, std::set<uint64_t>>
1218         catListToErasedOffsets) {
1219 
1220   // Go through all offsets of all __objc_catlist's that we process and if there
1221   // are categories that we didn't process - generate a new __objc_catlist for
1222   // each.
1223   for (auto &mapEntry : catListToErasedOffsets) {
1224     ConcatInputSection *catListIsec = mapEntry.first;
1225     for (uint32_t catListIsecOffset = 0;
1226          catListIsecOffset < catListIsec->data.size();
1227          catListIsecOffset += target->wordSize) {
1228       // This slot was erased, we can just skip it
1229       if (mapEntry.second.count(catListIsecOffset))
1230         continue;
1231 
1232       Defined *nonErasedCatBody =
1233           tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1234       assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1235 
1236       // Allocate data for the new __objc_catlist slot
1237       llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1238 
1239       // We mark the __objc_catlist slot as belonging to the same file as the
1240       // category
1241       ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1242 
1243       ConcatInputSection *listSec = make<ConcatInputSection>(
1244           *infoCategoryWriter.catListInfo.inputSection, bodyData,
1245           infoCategoryWriter.catListInfo.align);
1246       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1247       listSec->live = true;
1248 
1249       std::string slotSymName = "<__objc_catlist slot for category ";
1250       slotSymName += nonErasedCatBody->getName();
1251       slotSymName += ">";
1252 
1253       Defined *catListSlotSym = make<Defined>(
1254           newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1255           /*value=*/0, bodyData.size(),
1256           /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1257           /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1258           /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1259 
1260       catListSlotSym->used = true;
1261       objFile->symbols.push_back(catListSlotSym);
1262       addInputSection(listSec);
1263 
1264       // Now link the category body into the newly created slot
1265       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1266                             infoCategoryWriter.catListInfo.relocTemplate);
1267     }
1268   }
1269 }
1270 
eraseISec(ConcatInputSection * isec)1271 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1272   isec->live = false;
1273   for (auto &sym : isec->symbols)
1274     sym->used = false;
1275 }
1276 
1277 // This fully erases the merged categories, including their body, their names,
1278 // their method/protocol/prop lists and the __objc_catlist entries that link to
1279 // them.
eraseMergedCategories()1280 void ObjcCategoryMerger::eraseMergedCategories() {
1281   // Map of InputSection to a set of offsets of the categories that were merged
1282   MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1283 
1284   for (auto &mapEntry : categoryMap) {
1285     for (InfoInputCategory &catInfo : mapEntry.second) {
1286       if (catInfo.wasMerged) {
1287         eraseISec(catInfo.catListIsec);
1288         catListToErasedOffsets[catInfo.catListIsec].insert(
1289             catInfo.offCatListIsec);
1290       }
1291     }
1292   }
1293 
1294   // If there were categories that we did not erase, we need to generate a new
1295   // __objc_catList that contains only the un-merged categories, and get rid of
1296   // the references to the ones we merged.
1297   generateCatListForNonErasedCategories(catListToErasedOffsets);
1298 
1299   // Erase the old method lists & names of the categories that were merged
1300   for (auto &mapEntry : categoryMap) {
1301     for (InfoInputCategory &catInfo : mapEntry.second) {
1302       if (!catInfo.wasMerged)
1303         continue;
1304 
1305       eraseISec(catInfo.catBodyIsec);
1306 
1307       // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1308       //   categories because the name will sometimes also be used for other
1309       //   purposes.
1310       // For Swift, see usages of 'l_.str.11.SimpleClass' in
1311       //   objc-category-merging-swift.s
1312       // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1313       //   objc-category-merging-erase-objc-name-test.s
1314       // TODO: handle the above in a smarter way
1315 
1316       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1317                                   catLayout.instanceMethodsOffset);
1318       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1319                                   catLayout.classMethodsOffset);
1320       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1321                                   catLayout.protocolsOffset);
1322       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1323                                   catLayout.classPropsOffset);
1324       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1325                                   catLayout.instancePropsOffset);
1326     }
1327   }
1328 }
1329 
doMerge()1330 void ObjcCategoryMerger::doMerge() {
1331   collectAndValidateCategoriesData();
1332 
1333   for (auto &[baseClass, catInfos] : categoryMap) {
1334     if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1335       // Merge all categories into the base class
1336       mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1337     } else if (catInfos.size() > 1) {
1338       // Merge all categories into a new, single category
1339       mergeCategoriesIntoSingleCategory(catInfos);
1340     }
1341   }
1342 
1343   // Erase all categories that were merged
1344   eraseMergedCategories();
1345 }
1346 
doCleanup()1347 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1348 
newStringData(const char * str)1349 StringRef ObjcCategoryMerger::newStringData(const char *str) {
1350   uint32_t len = strlen(str);
1351   uint32_t bufSize = len + 1;
1352   SmallVector<uint8_t> &data = newSectionData(bufSize);
1353   char *strData = reinterpret_cast<char *>(data.data());
1354   // Copy the string chars and null-terminator
1355   memcpy(strData, str, bufSize);
1356   return StringRef(strData, len);
1357 }
1358 
newSectionData(uint32_t size)1359 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1360   generatedSectionData.push_back(
1361       std::make_unique<SmallVector<uint8_t>>(size, 0));
1362   return *generatedSectionData.back();
1363 }
1364 
1365 } // namespace
1366 
mergeCategories()1367 void objc::mergeCategories() {
1368   TimeTraceScope timeScope("ObjcCategoryMerger");
1369 
1370   ObjcCategoryMerger merger(inputSections);
1371   merger.doMerge();
1372 }
1373 
doCleanup()1374 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1375 
1376 ObjcCategoryMerger::SourceLanguage
getClassSymSourceLang(const Defined * classSym)1377 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1378   if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1379     return SourceLanguage::Swift;
1380 
1381   // If the symbol name matches the ObjC prefix, we don't necessarely know this
1382   // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1383   // classes. Ex:
1384   //  .globl	_OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1385   //  .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1386   //  .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1387   //
1388   // So we scan for symbols with the same address and check for the Swift class
1389   if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1390     for (auto &sym : classSym->originalIsec->symbols)
1391       if (sym->value == classSym->value)
1392         if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1393           return SourceLanguage::Swift;
1394     return SourceLanguage::ObjC;
1395   }
1396 
1397   llvm_unreachable("Unexpected class symbol name during category merging");
1398 }
mergeCategoriesIntoBaseClass(const Defined * baseClass,std::vector<InfoInputCategory> & categories)1399 void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1400     const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1401   assert(categories.size() >= 1 && "Expected at least one category to merge");
1402 
1403   // Collect all the info from the categories
1404   ClassExtensionInfo extInfo(catLayout);
1405   extInfo.baseClass = baseClass;
1406   extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1407 
1408   for (auto &catInfo : categories) {
1409     parseCatInfoToExtInfo(catInfo, extInfo);
1410   }
1411 
1412   // Get metadata for the base class
1413   Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1414   ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1415   Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1416   ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1417 
1418   // Now collect the info from the base class from the various lists in the
1419   // class metadata
1420 
1421   // Protocol lists are a special case - the same protocol list is in classRo
1422   // and metaRo, so we only need to parse it once
1423   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1424                         extInfo.protocols, extInfo.baseClassSourceLanguage);
1425 
1426   // Check that the classRo and metaRo protocol lists are identical
1427   assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1428                                extInfo.baseClassSourceLanguage) ==
1429              parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1430                                    extInfo.baseClassSourceLanguage) &&
1431          "Category merger expects classRo and metaRo to have the same protocol "
1432          "list");
1433 
1434   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1435                        extInfo.classMethods);
1436   parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1437                        extInfo.instanceMethods);
1438 
1439   parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1440                        extInfo.classProps);
1441   parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1442                        extInfo.instanceProps);
1443 
1444   // Erase the old lists - these will be generated and replaced
1445   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1446   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1447   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1448   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1449   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1450   eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1451 
1452   // Emit the newly merged lists - first into the meta RO then into the class RO
1453   // First we emit and link the protocol list into the meta RO. Then we link it
1454   // in the classRo as well (they're supposed to be identical)
1455   if (Defined *protoListSym =
1456           emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1457                                   extInfo, extInfo.protocols)) {
1458     createSymbolReference(classRo, protoListSym,
1459                           roClassLayout.baseProtocolsOffset,
1460                           infoCategoryWriter.catBodyInfo.relocTemplate);
1461   }
1462 
1463   emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1464                          extInfo.classMethods);
1465   emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1466                          extInfo.instanceMethods);
1467 
1468   emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1469                          extInfo.classProps);
1470 
1471   emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1472                          extInfo.instanceProps);
1473 
1474   // Mark all the categories as merged - this will be used to erase them later
1475   for (auto &catInfo : categories)
1476     catInfo.wasMerged = true;
1477 }
1478 
1479 // Erase the symbol at a given offset in an InputSection
eraseSymbolAtIsecOffset(ConcatInputSection * isec,uint32_t offset)1480 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1481                                                  uint32_t offset) {
1482   Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1483   if (!sym)
1484     return;
1485 
1486   // Remove the symbol from isec->symbols
1487   assert(isa<Defined>(sym) && "Can only erase a Defined");
1488   llvm::erase(isec->symbols, sym);
1489 
1490   // Remove the relocs that refer to this symbol
1491   auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1492   llvm::erase_if(isec->relocs, removeAtOff);
1493 
1494   // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1495   // the whole ConcatInputSection
1496   if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1497     if (cisec->data.size() == sym->size)
1498       eraseISec(cisec);
1499 }
1500