xref: /freebsd/contrib/llvm-project/lld/MachO/ObjC.cpp (revision 700637cbb5e582861067a11aaca4d053546871d2)
1 //===- ObjC.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ObjC.h"
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "Layout.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
16 #include "Target.h"
17 
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/Support/TimeProfiler.h"
22 
23 using namespace llvm;
24 using namespace llvm::MachO;
25 using namespace lld;
26 using namespace lld::macho;
27 
objectHasObjCSection(MemoryBufferRef mb)28 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
29   using SectionHeader = typename LP::section;
30 
31   auto *hdr =
32       reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
33   if (hdr->magic != LP::magic)
34     return false;
35 
36   if (const auto *c =
37           findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
38     auto sectionHeaders = ArrayRef<SectionHeader>{
39         reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
40     for (const SectionHeader &secHead : sectionHeaders) {
41       StringRef sectname(secHead.sectname,
42                          strnlen(secHead.sectname, sizeof(secHead.sectname)));
43       StringRef segname(secHead.segname,
44                         strnlen(secHead.segname, sizeof(secHead.segname)));
45       if ((segname == segment_names::data &&
46            sectname == section_names::objcCatList) ||
47           (segname == segment_names::text &&
48            sectname.starts_with(section_names::swift))) {
49         return true;
50       }
51     }
52   }
53   return false;
54 }
55 
objectHasObjCSection(MemoryBufferRef mb)56 static bool objectHasObjCSection(MemoryBufferRef mb) {
57   if (target->wordSize == 8)
58     return ::objectHasObjCSection<LP64>(mb);
59   else
60     return ::objectHasObjCSection<ILP32>(mb);
61 }
62 
hasObjCSection(MemoryBufferRef mb)63 bool macho::hasObjCSection(MemoryBufferRef mb) {
64   switch (identify_magic(mb.getBuffer())) {
65   case file_magic::macho_object:
66     return objectHasObjCSection(mb);
67   case file_magic::bitcode:
68     return check(isBitcodeContainingObjCCategory(mb));
69   default:
70     return false;
71   }
72 }
73 
74 namespace {
75 
76 #define FOR_EACH_CATEGORY_FIELD(DO)                                            \
77   DO(Ptr, name)                                                                \
78   DO(Ptr, klass)                                                               \
79   DO(Ptr, instanceMethods)                                                     \
80   DO(Ptr, classMethods)                                                        \
81   DO(Ptr, protocols)                                                           \
82   DO(Ptr, instanceProps)                                                       \
83   DO(Ptr, classProps)                                                          \
84   DO(uint32_t, size)
85 
86 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
87 
88 #undef FOR_EACH_CATEGORY_FIELD
89 
90 #define FOR_EACH_CLASS_FIELD(DO)                                               \
91   DO(Ptr, metaClass)                                                           \
92   DO(Ptr, superClass)                                                          \
93   DO(Ptr, methodCache)                                                         \
94   DO(Ptr, vtable)                                                              \
95   DO(Ptr, roData)
96 
97 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
98 
99 #undef FOR_EACH_CLASS_FIELD
100 
101 #define FOR_EACH_RO_CLASS_FIELD(DO)                                            \
102   DO(uint32_t, flags)                                                          \
103   DO(uint32_t, instanceStart)                                                  \
104   DO(Ptr, instanceSize)                                                        \
105   DO(Ptr, ivarLayout)                                                          \
106   DO(Ptr, name)                                                                \
107   DO(Ptr, baseMethods)                                                         \
108   DO(Ptr, baseProtocols)                                                       \
109   DO(Ptr, ivars)                                                               \
110   DO(Ptr, weakIvarLayout)                                                      \
111   DO(Ptr, baseProperties)
112 
113 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
114 
115 #undef FOR_EACH_RO_CLASS_FIELD
116 
117 #define FOR_EACH_LIST_HEADER(DO)                                               \
118   DO(uint32_t, structSize)                                                     \
119   DO(uint32_t, structCount)
120 
121 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
122 
123 #undef FOR_EACH_LIST_HEADER
124 
125 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
126 
127 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
128 
129 #undef FOR_EACH_PROTOCOL_LIST_HEADER
130 
131 #define FOR_EACH_METHOD(DO)                                                    \
132   DO(Ptr, name)                                                                \
133   DO(Ptr, type)                                                                \
134   DO(Ptr, impl)
135 
136 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
137 
138 #undef FOR_EACH_METHOD
139 
140 enum MethodContainerKind {
141   MCK_Class,
142   MCK_Category,
143 };
144 
145 struct MethodContainer {
146   MethodContainerKind kind;
147   const ConcatInputSection *isec;
148 };
149 
150 enum MethodKind {
151   MK_Instance,
152   MK_Static,
153 };
154 
155 struct ObjcClass {
156   DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
157   DenseMap<CachedHashStringRef, MethodContainer> classMethods;
158 };
159 
160 } // namespace
161 
162 class ObjcCategoryChecker {
163 public:
164   ObjcCategoryChecker();
165   void parseCategory(const ConcatInputSection *catListIsec);
166 
167 private:
168   void parseClass(const Defined *classSym);
169   void parseMethods(const ConcatInputSection *methodsIsec,
170                     const Symbol *methodContainer,
171                     const ConcatInputSection *containerIsec,
172                     MethodContainerKind, MethodKind);
173 
174   CategoryLayout catLayout;
175   ClassLayout classLayout;
176   ROClassLayout roClassLayout;
177   ListHeaderLayout listHeaderLayout;
178   MethodLayout methodLayout;
179 
180   DenseMap<const Symbol *, ObjcClass> classMap;
181 };
182 
ObjcCategoryChecker()183 ObjcCategoryChecker::ObjcCategoryChecker()
184     : catLayout(target->wordSize), classLayout(target->wordSize),
185       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
186       methodLayout(target->wordSize) {}
187 
parseMethods(const ConcatInputSection * methodsIsec,const Symbol * methodContainerSym,const ConcatInputSection * containerIsec,MethodContainerKind mcKind,MethodKind mKind)188 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
189                                        const Symbol *methodContainerSym,
190                                        const ConcatInputSection *containerIsec,
191                                        MethodContainerKind mcKind,
192                                        MethodKind mKind) {
193   ObjcClass &klass = classMap[methodContainerSym];
194   for (const Reloc &r : methodsIsec->relocs) {
195     if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
196         methodLayout.nameOffset)
197       continue;
198 
199     CachedHashStringRef methodName(r.getReferentString());
200     // +load methods are special: all implementations are called by the runtime
201     // even if they are part of the same class. Thus there is no need to check
202     // for duplicates.
203     // NOTE: Instead of specifically checking for this method name, ld64 simply
204     // checks whether a class / category is present in __objc_nlclslist /
205     // __objc_nlcatlist respectively. This will be the case if the class /
206     // category has a +load method. It skips optimizing the categories if there
207     // are multiple +load methods. Since it does dupe checking as part of the
208     // optimization process, this avoids spurious dupe messages around +load,
209     // but it also means that legit dupe issues for other methods are ignored.
210     if (mKind == MK_Static && methodName.val() == "load")
211       continue;
212 
213     auto &methodMap =
214         mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
215     if (methodMap
216             .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
217             .second)
218       continue;
219 
220     // We have a duplicate; generate a warning message.
221     const auto &mc = methodMap.lookup(methodName);
222     const Reloc *nameReloc = nullptr;
223     if (mc.kind == MCK_Category) {
224       nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
225     } else {
226       assert(mc.kind == MCK_Class);
227       const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
228                          ->getReferentInputSection();
229       nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
230     }
231     StringRef containerName = nameReloc->getReferentString();
232     StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
233 
234     // We should only ever encounter collisions when parsing category methods
235     // (since the Class struct is parsed before any of its categories).
236     assert(mcKind == MCK_Category);
237     StringRef newCatName =
238         containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString();
239 
240     auto formatObjAndSrcFileName = [](const InputSection *section) {
241       lld::macho::InputFile *inputFile = section->getFile();
242       std::string result = toString(inputFile);
243 
244       auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
245       if (objFile && objFile->compileUnit)
246         result += " (" + objFile->sourceFile() + ")";
247 
248       return result;
249     };
250 
251     StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
252     warn("method '" + methPrefix + methodName.val() +
253          "' has conflicting definitions:\n>>> defined in category " +
254          newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
255          "\n>>> defined in " + containerType + " " + containerName + " from " +
256          formatObjAndSrcFileName(mc.isec));
257   }
258 }
259 
parseCategory(const ConcatInputSection * catIsec)260 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
261   auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
262   if (!classReloc)
263     return;
264 
265   auto *classSym = cast<Symbol *>(classReloc->referent);
266   if (auto *d = dyn_cast<Defined>(classSym))
267     if (!classMap.count(d))
268       parseClass(d);
269 
270   if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
271     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
272                  classSym, catIsec, MCK_Category, MK_Static);
273   }
274 
275   if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
276     parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
277                  classSym, catIsec, MCK_Category, MK_Instance);
278   }
279 }
280 
parseClass(const Defined * classSym)281 void ObjcCategoryChecker::parseClass(const Defined *classSym) {
282   // Given a Class struct, get its corresponding Methods struct
283   auto getMethodsIsec =
284       [&](const InputSection *classIsec) -> ConcatInputSection * {
285     if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
286       if (const auto *roIsec =
287               cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
288         if (const auto *r =
289                 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
290           if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
291                   r->getReferentInputSection()))
292             return methodsIsec;
293         }
294       }
295     }
296     return nullptr;
297   };
298 
299   const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
300 
301   // Parse instance methods.
302   if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
303     parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
304                  MK_Instance);
305 
306   // Class methods are contained in the metaclass.
307   if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
308     if (const auto *classMethodsIsec = getMethodsIsec(
309             cast<ConcatInputSection>(r->getReferentInputSection())))
310       parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
311 }
312 
checkCategories()313 void objc::checkCategories() {
314   TimeTraceScope timeScope("ObjcCategoryChecker");
315 
316   ObjcCategoryChecker checker;
317   for (const InputSection *isec : inputSections) {
318     if (isec->getName() == section_names::objcCatList)
319       for (const Reloc &r : isec->relocs) {
320         auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
321         checker.parseCategory(catIsec);
322       }
323   }
324 }
325 
326 namespace {
327 
328 class ObjcCategoryMerger {
329   // In which language was a particular construct originally defined
330   enum SourceLanguage { Unknown, ObjC, Swift };
331 
332   // Information about an input category
333   struct InfoInputCategory {
334     ConcatInputSection *catListIsec;
335     ConcatInputSection *catBodyIsec;
336     uint32_t offCatListIsec = 0;
337     SourceLanguage sourceLanguage = SourceLanguage::Unknown;
338 
339     bool wasMerged = false;
340   };
341 
342   // To write new (merged) categories or classes, we will try make limited
343   // assumptions about the alignment and the sections the various class/category
344   // info are stored in and . So we'll just reuse the same sections and
345   // alignment as already used in existing (input) categories. To do this we
346   // have InfoCategoryWriter which contains the various sections that the
347   // generated categories will be written to.
348   struct InfoWriteSection {
349     bool valid = false; // Data has been successfully collected from input
350     uint32_t align = 0;
351     Section *inputSection;
352     Reloc relocTemplate;
353     OutputSection *outputSection;
354   };
355 
356   struct InfoCategoryWriter {
357     InfoWriteSection catListInfo;
358     InfoWriteSection catBodyInfo;
359     InfoWriteSection catNameInfo;
360     InfoWriteSection catPtrListInfo;
361   };
362 
363   // Information about a pointer list in the original categories or class(method
364   // lists, protocol lists, etc)
365   struct PointerListInfo {
366     PointerListInfo() = default;
367     PointerListInfo(const PointerListInfo &) = default;
PointerListInfo__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo368     PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
369         : categoryPrefix(_categoryPrefix),
370           pointersPerStruct(_pointersPerStruct) {}
371 
operator ==__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo372     inline bool operator==(const PointerListInfo &cmp) const {
373       return pointersPerStruct == cmp.pointersPerStruct &&
374              structSize == cmp.structSize && structCount == cmp.structCount &&
375              allPtrs == cmp.allPtrs;
376     }
377 
378     const char *categoryPrefix;
379 
380     uint32_t pointersPerStruct = 0;
381 
382     uint32_t structSize = 0;
383     uint32_t structCount = 0;
384 
385     std::vector<Symbol *> allPtrs;
386   };
387 
388   // Full information describing an ObjC class . This will include all the
389   // additional methods, protocols, and properties that are contained in the
390   // class and all the categories that extend a particular class.
391   struct ClassExtensionInfo {
ClassExtensionInfo__anonff1b1f600411::ObjcCategoryMerger::ClassExtensionInfo392     ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
393 
394     // Merged names of containers. Ex: base|firstCategory|secondCategory|...
395     std::string mergedContainerName;
396     std::string baseClassName;
397     const Symbol *baseClass = nullptr;
398     SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
399 
400     CategoryLayout &catLayout;
401 
402     // In case we generate new data, mark the new data as belonging to this file
403     ObjFile *objFileForMergeData = nullptr;
404 
405     PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
406                                        /*pointersPerStruct=*/3};
407     PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
408                                     /*pointersPerStruct=*/3};
409     PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
410                                  /*pointersPerStruct=*/0};
411     PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
412                                      /*pointersPerStruct=*/2};
413     PointerListInfo classProps = {objc::symbol_names::klassPropList,
414                                   /*pointersPerStruct=*/2};
415   };
416 
417 public:
418   ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
419   void doMerge();
420   static void doCleanup();
421 
422 private:
423   DenseSet<const Symbol *> collectNlCategories();
424   void collectAndValidateCategoriesData();
425   bool
426   mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
427 
428   void eraseISec(ConcatInputSection *isec);
429   void eraseMergedCategories();
430 
431   void generateCatListForNonErasedCategories(
432       MapVector<ConcatInputSection *, std::set<uint64_t>>
433           catListToErasedOffsets);
434   void collectSectionWriteInfoFromIsec(const InputSection *isec,
435                                        InfoWriteSection &catWriteInfo);
436   bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
437   bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
438                              ClassExtensionInfo &extInfo);
439 
440   void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
441                              PointerListInfo &ptrList,
442                              SourceLanguage sourceLang);
443 
444   PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
445                                         uint32_t secOffset,
446                                         SourceLanguage sourceLang);
447 
448   bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
449                             PointerListInfo &ptrList);
450 
451   void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
452                               const ClassExtensionInfo &extInfo,
453                               const PointerListInfo &ptrList);
454 
455   Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
456                                    const ClassExtensionInfo &extInfo,
457                                    const PointerListInfo &ptrList);
458 
459   Defined *emitCategory(const ClassExtensionInfo &extInfo);
460   Defined *emitCatListEntrySec(const std::string &forCategoryName,
461                                const std::string &forBaseClassName,
462                                ObjFile *objFile);
463   Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
464                             const Symbol *baseClassSym,
465                             const std::string &baseClassName, ObjFile *objFile);
466   Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
467   void createSymbolReference(Defined *refFrom, const Symbol *refTo,
468                              uint32_t offset, const Reloc &relocTemplate);
469   Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
470   Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
471                                    uint32_t offset);
472   Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
473                                      uint32_t offset);
474   Defined *getClassRo(const Defined *classSym, bool getMetaRo);
475   SourceLanguage getClassSymSourceLang(const Defined *classSym);
476   bool mergeCategoriesIntoBaseClass(const Defined *baseClass,
477                                     std::vector<InfoInputCategory> &categories);
478   void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
479   void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
480                                    uint32_t offset);
481 
482   // Allocate a null-terminated StringRef backed by generatedSectionData
483   StringRef newStringData(const char *str);
484   // Allocate section data, backed by generatedSectionData
485   SmallVector<uint8_t> &newSectionData(uint32_t size);
486 
487   CategoryLayout catLayout;
488   ClassLayout classLayout;
489   ROClassLayout roClassLayout;
490   ListHeaderLayout listHeaderLayout;
491   MethodLayout methodLayout;
492   ProtocolListHeaderLayout protocolListHeaderLayout;
493 
494   InfoCategoryWriter infoCategoryWriter;
495   std::vector<ConcatInputSection *> &allInputSections;
496   // Map of base class Symbol to list of InfoInputCategory's for it
497   MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
498 
499   // Normally, the binary data comes from the input files, but since we're
500   // generating binary data ourselves, we use the below array to store it in.
501   // Need this to be 'static' so the data survives past the ObjcCategoryMerger
502   // object, as the data will be read by the Writer when the final binary is
503   // generated.
504   static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
505       generatedSectionData;
506 };
507 
508 SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
509     ObjcCategoryMerger::generatedSectionData;
510 
ObjcCategoryMerger(std::vector<ConcatInputSection * > & _allInputSections)511 ObjcCategoryMerger::ObjcCategoryMerger(
512     std::vector<ConcatInputSection *> &_allInputSections)
513     : catLayout(target->wordSize), classLayout(target->wordSize),
514       roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
515       methodLayout(target->wordSize),
516       protocolListHeaderLayout(target->wordSize),
517       allInputSections(_allInputSections) {}
518 
collectSectionWriteInfoFromIsec(const InputSection * isec,InfoWriteSection & catWriteInfo)519 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
520     const InputSection *isec, InfoWriteSection &catWriteInfo) {
521 
522   catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
523   catWriteInfo.align = isec->align;
524   catWriteInfo.outputSection = isec->parent;
525 
526   assert(catWriteInfo.outputSection &&
527          "outputSection may not be null in collectSectionWriteInfoFromIsec.");
528 
529   if (isec->relocs.size())
530     catWriteInfo.relocTemplate = isec->relocs[0];
531 
532   catWriteInfo.valid = true;
533 }
534 
535 Symbol *
tryGetSymbolAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)536 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
537                                              uint32_t offset) {
538   if (!isec)
539     return nullptr;
540   const Reloc *reloc = isec->getRelocAt(offset);
541 
542   if (!reloc)
543     return nullptr;
544 
545   Symbol *sym = dyn_cast_if_present<Symbol *>(reloc->referent);
546 
547   if (reloc->addend && sym) {
548     assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
549     Defined *definedSym = cast<Defined>(sym);
550     sym = tryFindDefinedOnIsec(definedSym->isec(),
551                                definedSym->value + reloc->addend);
552   }
553 
554   return sym;
555 }
556 
tryFindDefinedOnIsec(const InputSection * isec,uint32_t offset)557 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
558                                                   uint32_t offset) {
559   for (Defined *sym : isec->symbols)
560     if ((sym->value <= offset) && (sym->value + sym->size > offset))
561       return sym;
562 
563   return nullptr;
564 }
565 
566 Defined *
tryGetDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)567 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
568                                               uint32_t offset) {
569   Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
570   return dyn_cast_or_null<Defined>(sym);
571 }
572 
573 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
574 // the meta-class's ro_data symbol. Otherwise, we will return the class
575 // (instance) ro_data symbol.
getClassRo(const Defined * classSym,bool getMetaRo)576 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
577                                         bool getMetaRo) {
578   ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
579   if (!isec)
580     return nullptr;
581 
582   if (!getMetaRo)
583     return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
584                                                classSym->value);
585 
586   Defined *metaClass = tryGetDefinedAtIsecOffset(
587       isec, classLayout.metaClassOffset + classSym->value);
588   if (!metaClass)
589     return nullptr;
590 
591   return tryGetDefinedAtIsecOffset(
592       dyn_cast<ConcatInputSection>(metaClass->isec()),
593       classLayout.roDataOffset);
594 }
595 
596 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
597 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
tryEraseDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)598 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
599     const ConcatInputSection *isec, uint32_t offset) {
600   const Reloc *reloc = isec->getRelocAt(offset);
601 
602   if (!reloc)
603     return;
604 
605   Defined *sym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
606   if (!sym)
607     return;
608 
609   if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
610     eraseISec(cisec);
611   else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
612     uint32_t totalOffset = sym->value + reloc->addend;
613     StringPiece &piece = csisec->getStringPiece(totalOffset);
614     piece.live = false;
615   } else {
616     llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
617   }
618 }
619 
collectCategoryWriterInfoFromCategory(const InfoInputCategory & catInfo)620 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
621     const InfoInputCategory &catInfo) {
622 
623   if (!infoCategoryWriter.catListInfo.valid)
624     collectSectionWriteInfoFromIsec(catInfo.catListIsec,
625                                     infoCategoryWriter.catListInfo);
626   if (!infoCategoryWriter.catBodyInfo.valid)
627     collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
628                                     infoCategoryWriter.catBodyInfo);
629 
630   if (!infoCategoryWriter.catNameInfo.valid) {
631     lld::macho::Defined *catNameSym =
632         tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
633 
634     if (!catNameSym) {
635       // This is an unhandeled case where the category name is not a symbol but
636       // instead points to an CStringInputSection (that doesn't have any symbol)
637       // TODO: Find a small repro and either fix or add a test case for this
638       // scenario
639       return false;
640     }
641 
642     collectSectionWriteInfoFromIsec(catNameSym->isec(),
643                                     infoCategoryWriter.catNameInfo);
644   }
645 
646   // Collect writer info from all the category lists (we're assuming they all
647   // would provide the same info)
648   if (!infoCategoryWriter.catPtrListInfo.valid) {
649     for (uint32_t off = catLayout.instanceMethodsOffset;
650          off <= catLayout.classPropsOffset; off += target->wordSize) {
651       if (Defined *ptrList =
652               tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
653         collectSectionWriteInfoFromIsec(ptrList->isec(),
654                                         infoCategoryWriter.catPtrListInfo);
655         // we've successfully collected data, so we can break
656         break;
657       }
658     }
659   }
660 
661   return true;
662 }
663 
664 // Parse a protocol list that might be linked to ConcatInputSection at a given
665 // offset. The format of the protocol list is different than other lists (prop
666 // lists, method lists) so we need to parse it differently
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList,SourceLanguage sourceLang)667 void ObjcCategoryMerger::parseProtocolListInfo(
668     const ConcatInputSection *isec, uint32_t secOffset,
669     PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
670   assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
671          "Tried to read pointer list beyond protocol section end");
672 
673   const Reloc *reloc = isec->getRelocAt(secOffset);
674   if (!reloc)
675     return;
676 
677   auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
678   assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
679 
680   // Theoretically protocol count can be either 32b or 64b, depending on
681   // platform pointer size, but to simplify implementation we always just read
682   // the lower 32b which should be good enough.
683   uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
684       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
685 
686   ptrList.structCount += protocolCount;
687   ptrList.structSize = target->wordSize;
688 
689   [[maybe_unused]] uint32_t expectedListSize =
690       (protocolCount * target->wordSize) +
691       /*header(count)*/ protocolListHeaderLayout.totalSize +
692       /*extra null value*/ target->wordSize;
693 
694   // On Swift, the protocol list does not have the extra (unnecessary) null
695   [[maybe_unused]] uint32_t expectedListSizeSwift =
696       expectedListSize - target->wordSize;
697 
698   assert(((expectedListSize == ptrListSym->isec()->data.size() &&
699            sourceLang == SourceLanguage::ObjC) ||
700           (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
701            sourceLang == SourceLanguage::Swift)) &&
702          "Protocol list does not match expected size");
703 
704   uint32_t off = protocolListHeaderLayout.totalSize;
705   for (uint32_t inx = 0; inx < protocolCount; ++inx) {
706     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
707     assert(reloc && "No reloc found at protocol list offset");
708 
709     auto *listSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
710     assert(listSym && "Protocol list reloc does not have a valid Defined");
711 
712     ptrList.allPtrs.push_back(listSym);
713     off += target->wordSize;
714   }
715   assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
716          "expected null terminating protocol");
717   assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
718          "Protocol list end offset does not match expected size");
719 }
720 
721 // Parse a protocol list and return the PointerListInfo for it
722 ObjcCategoryMerger::PointerListInfo
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,SourceLanguage sourceLang)723 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
724                                           uint32_t secOffset,
725                                           SourceLanguage sourceLang) {
726   PointerListInfo ptrList;
727   parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
728   return ptrList;
729 }
730 
731 // Parse a pointer list that might be linked to ConcatInputSection at a given
732 // offset. This can be used for instance methods, class methods, instance props
733 // and class props since they have the same format.
parsePointerListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList)734 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
735                                               uint32_t secOffset,
736                                               PointerListInfo &ptrList) {
737   assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
738   assert(isec && "Trying to parse pointer list from null isec");
739   assert(secOffset + target->wordSize <= isec->data.size() &&
740          "Trying to read pointer list beyond section end");
741 
742   const Reloc *reloc = isec->getRelocAt(secOffset);
743   // Empty list is a valid case, return true.
744   if (!reloc)
745     return true;
746 
747   auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
748   assert(ptrListSym && "Reloc does not have a valid Defined");
749 
750   uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
751       ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
752   uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
753       ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
754   assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
755 
756   assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
757 
758   ptrList.structCount += thisStructCount;
759   ptrList.structSize = thisStructSize;
760 
761   uint32_t expectedListSize =
762       listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
763   assert(expectedListSize == ptrListSym->isec()->data.size() &&
764          "Pointer list does not match expected size");
765 
766   for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
767        off += target->wordSize) {
768     const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
769     assert(reloc && "No reloc found at pointer list offset");
770 
771     auto *listSym =
772         dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>());
773     // Sometimes, the reloc points to a StringPiece (InputSection + addend)
774     // instead of a symbol.
775     // TODO: Skip these cases for now, but we should fix this.
776     if (!listSym)
777       return false;
778 
779     ptrList.allPtrs.push_back(listSym);
780   }
781 
782   return true;
783 }
784 
785 // Here we parse all the information of an input category (catInfo) and
786 // append the parsed info into the structure which will contain all the
787 // information about how a class is extended (extInfo)
parseCatInfoToExtInfo(const InfoInputCategory & catInfo,ClassExtensionInfo & extInfo)788 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
789                                                ClassExtensionInfo &extInfo) {
790   const Reloc *catNameReloc =
791       catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
792 
793   // Parse name
794   assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
795 
796   // is this the first category we are parsing?
797   if (extInfo.mergedContainerName.empty())
798     extInfo.objFileForMergeData =
799         dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
800   else
801     extInfo.mergedContainerName += "|";
802 
803   assert(extInfo.objFileForMergeData &&
804          "Expected to already have valid objextInfo.objFileForMergeData");
805 
806   StringRef catName = catNameReloc->getReferentString();
807   extInfo.mergedContainerName += catName.str();
808 
809   // Parse base class
810   if (!extInfo.baseClass) {
811     Symbol *classSym =
812         tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
813     assert(extInfo.baseClassName.empty());
814     extInfo.baseClass = classSym;
815     llvm::StringRef classPrefix(objc::symbol_names::klass);
816     assert(classSym->getName().starts_with(classPrefix) &&
817            "Base class symbol does not start with expected prefix");
818     extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
819   } else {
820     assert((extInfo.baseClass ==
821             tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
822                                      catLayout.klassOffset)) &&
823            "Trying to parse category info into container with different base "
824            "class");
825   }
826 
827   if (!parsePointerListInfo(catInfo.catBodyIsec,
828                             catLayout.instanceMethodsOffset,
829                             extInfo.instanceMethods))
830     return false;
831 
832   if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
833                             extInfo.classMethods))
834     return false;
835 
836   parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
837                         extInfo.protocols, catInfo.sourceLanguage);
838 
839   if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
840                             extInfo.instanceProps))
841     return false;
842 
843   if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
844                             extInfo.classProps))
845     return false;
846 
847   return true;
848 }
849 
850 // Generate a protocol list (including header) and link it into the parent at
851 // the specified offset.
emitAndLinkProtocolList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)852 Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
853     Defined *parentSym, uint32_t linkAtOffset,
854     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
855   if (ptrList.allPtrs.empty())
856     return nullptr;
857 
858   assert(ptrList.allPtrs.size() == ptrList.structCount);
859 
860   uint32_t bodySize = (ptrList.structCount * target->wordSize) +
861                       /*header(count)*/ protocolListHeaderLayout.totalSize +
862                       /*extra null value*/ target->wordSize;
863   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
864 
865   // This theoretically can be either 32b or 64b, but writing just the first 32b
866   // is good enough
867   const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
868       bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
869 
870   *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
871 
872   ConcatInputSection *listSec = make<ConcatInputSection>(
873       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
874       infoCategoryWriter.catPtrListInfo.align);
875   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
876   listSec->live = true;
877 
878   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
879 
880   std::string symName = ptrList.categoryPrefix;
881   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
882 
883   Defined *ptrListSym = make<Defined>(
884       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
885       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
886       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
887       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
888       /*isWeakDefCanBeHidden=*/false);
889 
890   ptrListSym->used = true;
891   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
892   addInputSection(listSec);
893 
894   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
895                         infoCategoryWriter.catBodyInfo.relocTemplate);
896 
897   uint32_t offset = protocolListHeaderLayout.totalSize;
898   for (Symbol *symbol : ptrList.allPtrs) {
899     createSymbolReference(ptrListSym, symbol, offset,
900                           infoCategoryWriter.catPtrListInfo.relocTemplate);
901     offset += target->wordSize;
902   }
903 
904   return ptrListSym;
905 }
906 
907 // Generate a pointer list (including header) and link it into the parent at the
908 // specified offset. This is used for instance and class methods and
909 // proprieties.
emitAndLinkPointerList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)910 void ObjcCategoryMerger::emitAndLinkPointerList(
911     Defined *parentSym, uint32_t linkAtOffset,
912     const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
913   if (ptrList.allPtrs.empty())
914     return;
915 
916   assert(ptrList.allPtrs.size() * target->wordSize ==
917          ptrList.structCount * ptrList.structSize);
918 
919   // Generate body
920   uint32_t bodySize =
921       listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
922   llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
923 
924   const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
925       bodyData.data() + listHeaderLayout.structSizeOffset);
926   const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
927       bodyData.data() + listHeaderLayout.structCountOffset);
928 
929   *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
930   *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
931 
932   ConcatInputSection *listSec = make<ConcatInputSection>(
933       *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
934       infoCategoryWriter.catPtrListInfo.align);
935   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
936   listSec->live = true;
937 
938   listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
939 
940   std::string symName = ptrList.categoryPrefix;
941   symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
942 
943   Defined *ptrListSym = make<Defined>(
944       newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
945       listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
946       /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
947       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
948       /*isWeakDefCanBeHidden=*/false);
949 
950   ptrListSym->used = true;
951   parentSym->getObjectFile()->symbols.push_back(ptrListSym);
952   addInputSection(listSec);
953 
954   createSymbolReference(parentSym, ptrListSym, linkAtOffset,
955                         infoCategoryWriter.catBodyInfo.relocTemplate);
956 
957   uint32_t offset = listHeaderLayout.totalSize;
958   for (Symbol *symbol : ptrList.allPtrs) {
959     createSymbolReference(ptrListSym, symbol, offset,
960                           infoCategoryWriter.catPtrListInfo.relocTemplate);
961     offset += target->wordSize;
962   }
963 }
964 
965 // This method creates an __objc_catlist ConcatInputSection with a single slot
966 Defined *
emitCatListEntrySec(const std::string & forCategoryName,const std::string & forBaseClassName,ObjFile * objFile)967 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
968                                         const std::string &forBaseClassName,
969                                         ObjFile *objFile) {
970   uint32_t sectionSize = target->wordSize;
971   llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
972 
973   ConcatInputSection *newCatList =
974       make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
975                                bodyData, infoCategoryWriter.catListInfo.align);
976   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
977   newCatList->live = true;
978 
979   newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
980 
981   std::string catSymName = "<__objc_catlist slot for merged category ";
982   catSymName += forBaseClassName + "(" + forCategoryName + ")>";
983 
984   Defined *catListSym = make<Defined>(
985       newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
986       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
987       /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
988       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
989       /*isWeakDefCanBeHidden=*/false);
990 
991   catListSym->used = true;
992   objFile->symbols.push_back(catListSym);
993   addInputSection(newCatList);
994   return catListSym;
995 }
996 
997 // Here we generate the main category body and link the name and base class into
998 // it. We don't link any other info yet like the protocol and class/instance
999 // methods/props.
emitCategoryBody(const std::string & name,const Defined * nameSym,const Symbol * baseClassSym,const std::string & baseClassName,ObjFile * objFile)1000 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1001                                               const Defined *nameSym,
1002                                               const Symbol *baseClassSym,
1003                                               const std::string &baseClassName,
1004                                               ObjFile *objFile) {
1005   llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1006 
1007   uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1008                                    catLayout.sizeOffset);
1009   *ptrSize = catLayout.totalSize;
1010 
1011   ConcatInputSection *newBodySec =
1012       make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1013                                bodyData, infoCategoryWriter.catBodyInfo.align);
1014   newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1015   newBodySec->live = true;
1016 
1017   std::string symName =
1018       objc::symbol_names::category + baseClassName + "(" + name + ")";
1019   Defined *catBodySym = make<Defined>(
1020       newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1021       /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1022       /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1023       /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1024       /*isWeakDefCanBeHidden=*/false);
1025 
1026   catBodySym->used = true;
1027   objFile->symbols.push_back(catBodySym);
1028   addInputSection(newBodySec);
1029 
1030   createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1031                         infoCategoryWriter.catBodyInfo.relocTemplate);
1032 
1033   // Create a reloc to the base class (either external or internal)
1034   createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1035                         infoCategoryWriter.catBodyInfo.relocTemplate);
1036 
1037   return catBodySym;
1038 }
1039 
1040 // This writes the new category name (for the merged category) into the binary
1041 // and returns the sybmol for it.
emitCategoryName(const std::string & name,ObjFile * objFile)1042 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1043                                               ObjFile *objFile) {
1044   StringRef nameStrData = newStringData(name.c_str());
1045   // We use +1 below to include the null terminator
1046   llvm::ArrayRef<uint8_t> nameData(
1047       reinterpret_cast<const uint8_t *>(nameStrData.data()),
1048       nameStrData.size() + 1);
1049 
1050   auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1051   CStringInputSection *newStringSec = make<CStringInputSection>(
1052       *infoCategoryWriter.catNameInfo.inputSection, nameData,
1053       infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1054 
1055   parentSection->subsections.push_back({0, newStringSec});
1056 
1057   newStringSec->splitIntoPieces();
1058   newStringSec->pieces[0].live = true;
1059   newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1060   in.cStringSection->addInput(newStringSec);
1061   assert(newStringSec->pieces.size() == 1);
1062 
1063   Defined *catNameSym = make<Defined>(
1064       "<merged category name>", /*file=*/objFile, newStringSec,
1065       /*value=*/0, nameData.size(),
1066       /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1067       /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1068       /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1069 
1070   catNameSym->used = true;
1071   objFile->symbols.push_back(catNameSym);
1072   return catNameSym;
1073 }
1074 
1075 // This method fully creates a new category from the given ClassExtensionInfo.
1076 // It creates the category name, body and method/protocol/prop lists and links
1077 // them all together. Then it creates a new __objc_catlist entry and adds the
1078 // category to it. Calling this method will fully generate a category which will
1079 // be available in the final binary.
emitCategory(const ClassExtensionInfo & extInfo)1080 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1081   Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1082                                          extInfo.objFileForMergeData);
1083 
1084   Defined *catBodySym = emitCategoryBody(
1085       extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1086       extInfo.baseClassName, extInfo.objFileForMergeData);
1087 
1088   Defined *catListSym =
1089       emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1090                           extInfo.objFileForMergeData);
1091 
1092   // Add the single category body to the category list at the offset 0.
1093   createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1094                         infoCategoryWriter.catListInfo.relocTemplate);
1095 
1096   emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1097                          extInfo.instanceMethods);
1098 
1099   emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1100                          extInfo.classMethods);
1101 
1102   emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1103                           extInfo.protocols);
1104 
1105   emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1106                          extInfo.instanceProps);
1107 
1108   emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1109                          extInfo.classProps);
1110 
1111   return catBodySym;
1112 }
1113 
1114 // This method merges all the categories (sharing a base class) into a single
1115 // category.
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> & categories)1116 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1117     std::vector<InfoInputCategory> &categories) {
1118   assert(categories.size() > 1 && "Expected at least 2 categories");
1119 
1120   ClassExtensionInfo extInfo(catLayout);
1121 
1122   for (auto &catInfo : categories)
1123     if (!parseCatInfoToExtInfo(catInfo, extInfo))
1124       return false;
1125 
1126   Defined *newCatDef = emitCategory(extInfo);
1127   assert(newCatDef && "Failed to create a new category");
1128 
1129   // Suppress unsuded var warning
1130   (void)newCatDef;
1131 
1132   for (auto &catInfo : categories)
1133     catInfo.wasMerged = true;
1134 
1135   return true;
1136 }
1137 
createSymbolReference(Defined * refFrom,const Symbol * refTo,uint32_t offset,const Reloc & relocTemplate)1138 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1139                                                const Symbol *refTo,
1140                                                uint32_t offset,
1141                                                const Reloc &relocTemplate) {
1142   Reloc r = relocTemplate;
1143   r.offset = offset;
1144   r.addend = 0;
1145   r.referent = const_cast<Symbol *>(refTo);
1146   refFrom->isec()->relocs.push_back(r);
1147 }
1148 
1149 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1150 // optimize these as they have a '+load' method that has to be called at
1151 // runtime.
collectNlCategories()1152 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1153   DenseSet<const Symbol *> nlCategories;
1154 
1155   for (InputSection *sec : allInputSections) {
1156     if (sec->getName() != section_names::objcNonLazyCatList)
1157       continue;
1158 
1159     for (auto &r : sec->relocs) {
1160       const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1161       nlCategories.insert(sym);
1162     }
1163   }
1164   return nlCategories;
1165 }
1166 
collectAndValidateCategoriesData()1167 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1168   auto nlCategories = collectNlCategories();
1169 
1170   for (InputSection *sec : allInputSections) {
1171     if (sec->getName() != section_names::objcCatList)
1172       continue;
1173     ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1174     assert(catListCisec &&
1175            "__objc_catList InputSection is not a ConcatInputSection");
1176 
1177     for (uint32_t off = 0; off < catListCisec->getSize();
1178          off += target->wordSize) {
1179       Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1180       assert(categorySym &&
1181              "Failed to get a valid category at __objc_catlit offset");
1182 
1183       if (nlCategories.count(categorySym))
1184         continue;
1185 
1186       auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1187       assert(catBodyIsec &&
1188              "Category data section is not an ConcatInputSection");
1189 
1190       SourceLanguage eLang = SourceLanguage::Unknown;
1191       if (categorySym->getName().starts_with(objc::symbol_names::category))
1192         eLang = SourceLanguage::ObjC;
1193       else if (categorySym->getName().starts_with(
1194                    objc::symbol_names::swift_objc_category))
1195         eLang = SourceLanguage::Swift;
1196       else
1197         llvm_unreachable("Unexpected category symbol name");
1198 
1199       InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1200 
1201       // Check that the category has a reloc at 'klassOffset' (which is
1202       // a pointer to the class symbol)
1203 
1204       Symbol *classSym =
1205           tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1206       assert(classSym && "Category does not have a valid base class");
1207 
1208       if (!collectCategoryWriterInfoFromCategory(catInputInfo))
1209         continue;
1210 
1211       categoryMap[classSym].push_back(catInputInfo);
1212     }
1213   }
1214 }
1215 
1216 // In the input we have multiple __objc_catlist InputSection, each of which may
1217 // contain links to multiple categories. Of these categories, we will merge (and
1218 // erase) only some. There will be some categories that will remain untouched
1219 // (not erased). For these not erased categories, we generate new __objc_catlist
1220 // entries since the parent __objc_catlist entry will be erased
generateCatListForNonErasedCategories(const MapVector<ConcatInputSection *,std::set<uint64_t>> catListToErasedOffsets)1221 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1222     const MapVector<ConcatInputSection *, std::set<uint64_t>>
1223         catListToErasedOffsets) {
1224 
1225   // Go through all offsets of all __objc_catlist's that we process and if there
1226   // are categories that we didn't process - generate a new __objc_catlist for
1227   // each.
1228   for (auto &mapEntry : catListToErasedOffsets) {
1229     ConcatInputSection *catListIsec = mapEntry.first;
1230     for (uint32_t catListIsecOffset = 0;
1231          catListIsecOffset < catListIsec->data.size();
1232          catListIsecOffset += target->wordSize) {
1233       // This slot was erased, we can just skip it
1234       if (mapEntry.second.count(catListIsecOffset))
1235         continue;
1236 
1237       Defined *nonErasedCatBody =
1238           tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1239       assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1240 
1241       // Allocate data for the new __objc_catlist slot
1242       llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1243 
1244       // We mark the __objc_catlist slot as belonging to the same file as the
1245       // category
1246       ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1247 
1248       ConcatInputSection *listSec = make<ConcatInputSection>(
1249           *infoCategoryWriter.catListInfo.inputSection, bodyData,
1250           infoCategoryWriter.catListInfo.align);
1251       listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1252       listSec->live = true;
1253 
1254       std::string slotSymName = "<__objc_catlist slot for category ";
1255       slotSymName += nonErasedCatBody->getName();
1256       slotSymName += ">";
1257 
1258       Defined *catListSlotSym = make<Defined>(
1259           newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1260           /*value=*/0, bodyData.size(),
1261           /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1262           /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1263           /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1264 
1265       catListSlotSym->used = true;
1266       objFile->symbols.push_back(catListSlotSym);
1267       addInputSection(listSec);
1268 
1269       // Now link the category body into the newly created slot
1270       createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1271                             infoCategoryWriter.catListInfo.relocTemplate);
1272     }
1273   }
1274 }
1275 
eraseISec(ConcatInputSection * isec)1276 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1277   isec->live = false;
1278   for (auto &sym : isec->symbols)
1279     sym->used = false;
1280 }
1281 
1282 // This fully erases the merged categories, including their body, their names,
1283 // their method/protocol/prop lists and the __objc_catlist entries that link to
1284 // them.
eraseMergedCategories()1285 void ObjcCategoryMerger::eraseMergedCategories() {
1286   // Map of InputSection to a set of offsets of the categories that were merged
1287   MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1288 
1289   for (auto &mapEntry : categoryMap) {
1290     for (InfoInputCategory &catInfo : mapEntry.second) {
1291       if (catInfo.wasMerged) {
1292         eraseISec(catInfo.catListIsec);
1293         catListToErasedOffsets[catInfo.catListIsec].insert(
1294             catInfo.offCatListIsec);
1295       }
1296     }
1297   }
1298 
1299   // If there were categories that we did not erase, we need to generate a new
1300   // __objc_catList that contains only the un-merged categories, and get rid of
1301   // the references to the ones we merged.
1302   generateCatListForNonErasedCategories(catListToErasedOffsets);
1303 
1304   // Erase the old method lists & names of the categories that were merged
1305   for (auto &mapEntry : categoryMap) {
1306     for (InfoInputCategory &catInfo : mapEntry.second) {
1307       if (!catInfo.wasMerged)
1308         continue;
1309 
1310       eraseISec(catInfo.catBodyIsec);
1311 
1312       // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1313       //   categories because the name will sometimes also be used for other
1314       //   purposes.
1315       // For Swift, see usages of 'l_.str.11.SimpleClass' in
1316       //   objc-category-merging-swift.s
1317       // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1318       //   objc-category-merging-erase-objc-name-test.s
1319       // TODO: handle the above in a smarter way
1320 
1321       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1322                                   catLayout.instanceMethodsOffset);
1323       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1324                                   catLayout.classMethodsOffset);
1325       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1326                                   catLayout.protocolsOffset);
1327       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1328                                   catLayout.classPropsOffset);
1329       tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1330                                   catLayout.instancePropsOffset);
1331     }
1332   }
1333 }
1334 
doMerge()1335 void ObjcCategoryMerger::doMerge() {
1336   collectAndValidateCategoriesData();
1337 
1338   for (auto &[baseClass, catInfos] : categoryMap) {
1339     bool merged = false;
1340     if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1341       // Merge all categories into the base class
1342       merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1343     } else if (catInfos.size() > 1) {
1344       // Merge all categories into a new, single category
1345       merged = mergeCategoriesIntoSingleCategory(catInfos);
1346     }
1347     if (!merged)
1348       warn("ObjC category merging skipped for class symbol' " +
1349            baseClass->getName().str() + "'\n");
1350   }
1351 
1352   // Erase all categories that were merged
1353   eraseMergedCategories();
1354 }
1355 
doCleanup()1356 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1357 
newStringData(const char * str)1358 StringRef ObjcCategoryMerger::newStringData(const char *str) {
1359   uint32_t len = strlen(str);
1360   uint32_t bufSize = len + 1;
1361   SmallVector<uint8_t> &data = newSectionData(bufSize);
1362   char *strData = reinterpret_cast<char *>(data.data());
1363   // Copy the string chars and null-terminator
1364   memcpy(strData, str, bufSize);
1365   return StringRef(strData, len);
1366 }
1367 
newSectionData(uint32_t size)1368 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1369   generatedSectionData.push_back(
1370       std::make_unique<SmallVector<uint8_t>>(size, 0));
1371   return *generatedSectionData.back();
1372 }
1373 
1374 } // namespace
1375 
mergeCategories()1376 void objc::mergeCategories() {
1377   TimeTraceScope timeScope("ObjcCategoryMerger");
1378 
1379   ObjcCategoryMerger merger(inputSections);
1380   merger.doMerge();
1381 }
1382 
doCleanup()1383 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1384 
1385 ObjcCategoryMerger::SourceLanguage
getClassSymSourceLang(const Defined * classSym)1386 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1387   if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1388     return SourceLanguage::Swift;
1389 
1390   // If the symbol name matches the ObjC prefix, we don't necessarely know this
1391   // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1392   // classes. Ex:
1393   //  .globl	_OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1394   //  .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1395   //  .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1396   //
1397   // So we scan for symbols with the same address and check for the Swift class
1398   if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1399     for (auto &sym : classSym->originalIsec->symbols)
1400       if (sym->value == classSym->value)
1401         if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1402           return SourceLanguage::Swift;
1403     return SourceLanguage::ObjC;
1404   }
1405 
1406   llvm_unreachable("Unexpected class symbol name during category merging");
1407 }
1408 
mergeCategoriesIntoBaseClass(const Defined * baseClass,std::vector<InfoInputCategory> & categories)1409 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1410     const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1411   assert(categories.size() >= 1 && "Expected at least one category to merge");
1412 
1413   // Collect all the info from the categories
1414   ClassExtensionInfo extInfo(catLayout);
1415   extInfo.baseClass = baseClass;
1416   extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1417 
1418   for (auto &catInfo : categories)
1419     if (!parseCatInfoToExtInfo(catInfo, extInfo))
1420       return false;
1421 
1422   // Get metadata for the base class
1423   Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1424   ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1425   Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1426   ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1427 
1428   // Now collect the info from the base class from the various lists in the
1429   // class metadata
1430 
1431   // Protocol lists are a special case - the same protocol list is in classRo
1432   // and metaRo, so we only need to parse it once
1433   parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1434                         extInfo.protocols, extInfo.baseClassSourceLanguage);
1435 
1436   // Check that the classRo and metaRo protocol lists are identical
1437   assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1438                                extInfo.baseClassSourceLanguage) ==
1439              parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1440                                    extInfo.baseClassSourceLanguage) &&
1441          "Category merger expects classRo and metaRo to have the same protocol "
1442          "list");
1443 
1444   parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1445                        extInfo.classMethods);
1446   parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1447                        extInfo.instanceMethods);
1448 
1449   parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1450                        extInfo.classProps);
1451   parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1452                        extInfo.instanceProps);
1453 
1454   // Erase the old lists - these will be generated and replaced
1455   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1456   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1457   eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1458   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1459   eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1460   eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1461 
1462   // Emit the newly merged lists - first into the meta RO then into the class RO
1463   // First we emit and link the protocol list into the meta RO. Then we link it
1464   // in the classRo as well (they're supposed to be identical)
1465   if (Defined *protoListSym =
1466           emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1467                                   extInfo, extInfo.protocols)) {
1468     createSymbolReference(classRo, protoListSym,
1469                           roClassLayout.baseProtocolsOffset,
1470                           infoCategoryWriter.catBodyInfo.relocTemplate);
1471   }
1472 
1473   emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1474                          extInfo.classMethods);
1475   emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1476                          extInfo.instanceMethods);
1477 
1478   emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1479                          extInfo.classProps);
1480 
1481   emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1482                          extInfo.instanceProps);
1483 
1484   // Mark all the categories as merged - this will be used to erase them later
1485   for (auto &catInfo : categories)
1486     catInfo.wasMerged = true;
1487 
1488   return true;
1489 }
1490 
1491 // Erase the symbol at a given offset in an InputSection
eraseSymbolAtIsecOffset(ConcatInputSection * isec,uint32_t offset)1492 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1493                                                  uint32_t offset) {
1494   Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1495   if (!sym)
1496     return;
1497 
1498   // Remove the symbol from isec->symbols
1499   assert(isa<Defined>(sym) && "Can only erase a Defined");
1500   llvm::erase(isec->symbols, sym);
1501 
1502   // Remove the relocs that refer to this symbol
1503   auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1504   llvm::erase_if(isec->relocs, removeAtOff);
1505 
1506   // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1507   // the whole ConcatInputSection
1508   if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1509     if (cisec->data.size() == sym->size)
1510       eraseISec(cisec);
1511 }
1512