1 //===- ObjC.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ObjC.h"
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "Layout.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
16 #include "Target.h"
17
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/BinaryFormat/MachO.h"
21 #include "llvm/Bitcode/BitcodeReader.h"
22 #include "llvm/Support/TimeProfiler.h"
23
24 using namespace llvm;
25 using namespace llvm::MachO;
26 using namespace lld;
27 using namespace lld::macho;
28
objectHasObjCSection(MemoryBufferRef mb)29 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
30 using SectionHeader = typename LP::section;
31
32 auto *hdr =
33 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
34 if (hdr->magic != LP::magic)
35 return false;
36
37 if (const auto *c =
38 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
39 auto sectionHeaders = ArrayRef<SectionHeader>{
40 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
41 for (const SectionHeader &secHead : sectionHeaders) {
42 StringRef sectname(secHead.sectname,
43 strnlen(secHead.sectname, sizeof(secHead.sectname)));
44 StringRef segname(secHead.segname,
45 strnlen(secHead.segname, sizeof(secHead.segname)));
46 if ((segname == segment_names::data &&
47 sectname == section_names::objcCatList) ||
48 (segname == segment_names::text &&
49 sectname.starts_with(section_names::swift))) {
50 return true;
51 }
52 }
53 }
54 return false;
55 }
56
objectHasObjCSection(MemoryBufferRef mb)57 static bool objectHasObjCSection(MemoryBufferRef mb) {
58 if (target->wordSize == 8)
59 return ::objectHasObjCSection<LP64>(mb);
60 else
61 return ::objectHasObjCSection<ILP32>(mb);
62 }
63
hasObjCSection(MemoryBufferRef mb)64 bool macho::hasObjCSection(MemoryBufferRef mb) {
65 switch (identify_magic(mb.getBuffer())) {
66 case file_magic::macho_object:
67 return objectHasObjCSection(mb);
68 case file_magic::bitcode:
69 return check(isBitcodeContainingObjCCategory(mb));
70 default:
71 return false;
72 }
73 }
74
75 namespace {
76
77 #define FOR_EACH_CATEGORY_FIELD(DO) \
78 DO(Ptr, name) \
79 DO(Ptr, klass) \
80 DO(Ptr, instanceMethods) \
81 DO(Ptr, classMethods) \
82 DO(Ptr, protocols) \
83 DO(Ptr, instanceProps) \
84 DO(Ptr, classProps) \
85 DO(uint32_t, size)
86
87 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
88
89 #undef FOR_EACH_CATEGORY_FIELD
90
91 #define FOR_EACH_CLASS_FIELD(DO) \
92 DO(Ptr, metaClass) \
93 DO(Ptr, superClass) \
94 DO(Ptr, methodCache) \
95 DO(Ptr, vtable) \
96 DO(Ptr, roData)
97
98 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
99
100 #undef FOR_EACH_CLASS_FIELD
101
102 #define FOR_EACH_RO_CLASS_FIELD(DO) \
103 DO(uint32_t, flags) \
104 DO(uint32_t, instanceStart) \
105 DO(Ptr, instanceSize) \
106 DO(Ptr, ivarLayout) \
107 DO(Ptr, name) \
108 DO(Ptr, baseMethods) \
109 DO(Ptr, baseProtocols) \
110 DO(Ptr, ivars) \
111 DO(Ptr, weakIvarLayout) \
112 DO(Ptr, baseProperties)
113
114 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
115
116 #undef FOR_EACH_RO_CLASS_FIELD
117
118 #define FOR_EACH_LIST_HEADER(DO) \
119 DO(uint32_t, structSize) \
120 DO(uint32_t, structCount)
121
122 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
123
124 #undef FOR_EACH_LIST_HEADER
125
126 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
127
128 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
129
130 #undef FOR_EACH_PROTOCOL_LIST_HEADER
131
132 #define FOR_EACH_METHOD(DO) \
133 DO(Ptr, name) \
134 DO(Ptr, type) \
135 DO(Ptr, impl)
136
137 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
138
139 #undef FOR_EACH_METHOD
140
141 enum MethodContainerKind {
142 MCK_Class,
143 MCK_Category,
144 };
145
146 struct MethodContainer {
147 MethodContainerKind kind;
148 const ConcatInputSection *isec;
149 };
150
151 enum MethodKind {
152 MK_Instance,
153 MK_Static,
154 };
155
156 struct ObjcClass {
157 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
158 DenseMap<CachedHashStringRef, MethodContainer> classMethods;
159 };
160
161 } // namespace
162
163 class ObjcCategoryChecker {
164 public:
165 ObjcCategoryChecker();
166 void parseCategory(const ConcatInputSection *catListIsec);
167
168 private:
169 void parseClass(const Defined *classSym);
170 void parseMethods(const ConcatInputSection *methodsIsec,
171 const Symbol *methodContainer,
172 const ConcatInputSection *containerIsec,
173 MethodContainerKind, MethodKind);
174
175 CategoryLayout catLayout;
176 ClassLayout classLayout;
177 ROClassLayout roClassLayout;
178 ListHeaderLayout listHeaderLayout;
179 MethodLayout methodLayout;
180
181 DenseMap<const Symbol *, ObjcClass> classMap;
182 };
183
ObjcCategoryChecker()184 ObjcCategoryChecker::ObjcCategoryChecker()
185 : catLayout(target->wordSize), classLayout(target->wordSize),
186 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
187 methodLayout(target->wordSize) {}
188
189 // \p r must point to an offset within a CStringInputSection or a
190 // ConcatInputSection
getReferentString(const Reloc & r)191 static StringRef getReferentString(const Reloc &r) {
192 if (auto *isec = r.referent.dyn_cast<InputSection *>())
193 return cast<CStringInputSection>(isec)->getStringRefAtOffset(r.addend);
194
195 auto *sym = cast<Defined>(r.referent.get<Symbol *>());
196 auto *symIsec = sym->isec();
197 auto symOffset = sym->value + r.addend;
198
199 if (auto *s = dyn_cast_or_null<CStringInputSection>(symIsec))
200 return s->getStringRefAtOffset(symOffset);
201
202 if (isa<ConcatInputSection>(symIsec)) {
203 auto strData = symIsec->data.slice(symOffset);
204 const char *pszData = reinterpret_cast<const char *>(strData.data());
205 return StringRef(pszData, strnlen(pszData, strData.size()));
206 }
207
208 llvm_unreachable("unknown reference section in getReferentString");
209 }
210
parseMethods(const ConcatInputSection * methodsIsec,const Symbol * methodContainerSym,const ConcatInputSection * containerIsec,MethodContainerKind mcKind,MethodKind mKind)211 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
212 const Symbol *methodContainerSym,
213 const ConcatInputSection *containerIsec,
214 MethodContainerKind mcKind,
215 MethodKind mKind) {
216 ObjcClass &klass = classMap[methodContainerSym];
217 for (const Reloc &r : methodsIsec->relocs) {
218 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
219 methodLayout.nameOffset)
220 continue;
221
222 CachedHashStringRef methodName(getReferentString(r));
223 // +load methods are special: all implementations are called by the runtime
224 // even if they are part of the same class. Thus there is no need to check
225 // for duplicates.
226 // NOTE: Instead of specifically checking for this method name, ld64 simply
227 // checks whether a class / category is present in __objc_nlclslist /
228 // __objc_nlcatlist respectively. This will be the case if the class /
229 // category has a +load method. It skips optimizing the categories if there
230 // are multiple +load methods. Since it does dupe checking as part of the
231 // optimization process, this avoids spurious dupe messages around +load,
232 // but it also means that legit dupe issues for other methods are ignored.
233 if (mKind == MK_Static && methodName.val() == "load")
234 continue;
235
236 auto &methodMap =
237 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
238 if (methodMap
239 .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
240 .second)
241 continue;
242
243 // We have a duplicate; generate a warning message.
244 const auto &mc = methodMap.lookup(methodName);
245 const Reloc *nameReloc = nullptr;
246 if (mc.kind == MCK_Category) {
247 nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
248 } else {
249 assert(mc.kind == MCK_Class);
250 const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
251 ->getReferentInputSection();
252 nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
253 }
254 StringRef containerName = getReferentString(*nameReloc);
255 StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
256
257 // We should only ever encounter collisions when parsing category methods
258 // (since the Class struct is parsed before any of its categories).
259 assert(mcKind == MCK_Category);
260 StringRef newCatName =
261 getReferentString(*containerIsec->getRelocAt(catLayout.nameOffset));
262
263 auto formatObjAndSrcFileName = [](const InputSection *section) {
264 lld::macho::InputFile *inputFile = section->getFile();
265 std::string result = toString(inputFile);
266
267 auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
268 if (objFile && objFile->compileUnit)
269 result += " (" + objFile->sourceFile() + ")";
270
271 return result;
272 };
273
274 StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
275 warn("method '" + methPrefix + methodName.val() +
276 "' has conflicting definitions:\n>>> defined in category " +
277 newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
278 "\n>>> defined in " + containerType + " " + containerName + " from " +
279 formatObjAndSrcFileName(mc.isec));
280 }
281 }
282
parseCategory(const ConcatInputSection * catIsec)283 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
284 auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
285 if (!classReloc)
286 return;
287
288 auto *classSym = classReloc->referent.get<Symbol *>();
289 if (auto *d = dyn_cast<Defined>(classSym))
290 if (!classMap.count(d))
291 parseClass(d);
292
293 if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
294 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
295 classSym, catIsec, MCK_Category, MK_Static);
296 }
297
298 if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
299 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
300 classSym, catIsec, MCK_Category, MK_Instance);
301 }
302 }
303
parseClass(const Defined * classSym)304 void ObjcCategoryChecker::parseClass(const Defined *classSym) {
305 // Given a Class struct, get its corresponding Methods struct
306 auto getMethodsIsec =
307 [&](const InputSection *classIsec) -> ConcatInputSection * {
308 if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
309 if (const auto *roIsec =
310 cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
311 if (const auto *r =
312 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
313 if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
314 r->getReferentInputSection()))
315 return methodsIsec;
316 }
317 }
318 }
319 return nullptr;
320 };
321
322 const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
323
324 // Parse instance methods.
325 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
326 parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
327 MK_Instance);
328
329 // Class methods are contained in the metaclass.
330 if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
331 if (const auto *classMethodsIsec = getMethodsIsec(
332 cast<ConcatInputSection>(r->getReferentInputSection())))
333 parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
334 }
335
checkCategories()336 void objc::checkCategories() {
337 TimeTraceScope timeScope("ObjcCategoryChecker");
338
339 ObjcCategoryChecker checker;
340 for (const InputSection *isec : inputSections) {
341 if (isec->getName() == section_names::objcCatList)
342 for (const Reloc &r : isec->relocs) {
343 auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
344 checker.parseCategory(catIsec);
345 }
346 }
347 }
348
349 namespace {
350
351 class ObjcCategoryMerger {
352 // In which language was a particular construct originally defined
353 enum SourceLanguage { Unknown, ObjC, Swift };
354
355 // Information about an input category
356 struct InfoInputCategory {
357 ConcatInputSection *catListIsec;
358 ConcatInputSection *catBodyIsec;
359 uint32_t offCatListIsec = 0;
360 SourceLanguage sourceLanguage = SourceLanguage::Unknown;
361
362 bool wasMerged = false;
363 };
364
365 // To write new (merged) categories or classes, we will try make limited
366 // assumptions about the alignment and the sections the various class/category
367 // info are stored in and . So we'll just reuse the same sections and
368 // alignment as already used in existing (input) categories. To do this we
369 // have InfoCategoryWriter which contains the various sections that the
370 // generated categories will be written to.
371 struct InfoWriteSection {
372 bool valid = false; // Data has been successfully collected from input
373 uint32_t align = 0;
374 Section *inputSection;
375 Reloc relocTemplate;
376 OutputSection *outputSection;
377 };
378
379 struct InfoCategoryWriter {
380 InfoWriteSection catListInfo;
381 InfoWriteSection catBodyInfo;
382 InfoWriteSection catNameInfo;
383 InfoWriteSection catPtrListInfo;
384 };
385
386 // Information about a pointer list in the original categories or class(method
387 // lists, protocol lists, etc)
388 struct PointerListInfo {
389 PointerListInfo() = default;
390 PointerListInfo(const PointerListInfo &) = default;
PointerListInfo__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo391 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
392 : categoryPrefix(_categoryPrefix),
393 pointersPerStruct(_pointersPerStruct) {}
394
operator ==__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo395 inline bool operator==(const PointerListInfo &cmp) const {
396 return pointersPerStruct == cmp.pointersPerStruct &&
397 structSize == cmp.structSize && structCount == cmp.structCount &&
398 allPtrs == cmp.allPtrs;
399 }
400
401 const char *categoryPrefix;
402
403 uint32_t pointersPerStruct = 0;
404
405 uint32_t structSize = 0;
406 uint32_t structCount = 0;
407
408 std::vector<Symbol *> allPtrs;
409 };
410
411 // Full information describing an ObjC class . This will include all the
412 // additional methods, protocols, and properties that are contained in the
413 // class and all the categories that extend a particular class.
414 struct ClassExtensionInfo {
ClassExtensionInfo__anonff1b1f600411::ObjcCategoryMerger::ClassExtensionInfo415 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
416
417 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
418 std::string mergedContainerName;
419 std::string baseClassName;
420 const Symbol *baseClass = nullptr;
421 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
422
423 CategoryLayout &catLayout;
424
425 // In case we generate new data, mark the new data as belonging to this file
426 ObjFile *objFileForMergeData = nullptr;
427
428 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
429 /*pointersPerStruct=*/3};
430 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
431 /*pointersPerStruct=*/3};
432 PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
433 /*pointersPerStruct=*/0};
434 PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
435 /*pointersPerStruct=*/2};
436 PointerListInfo classProps = {objc::symbol_names::klassPropList,
437 /*pointersPerStruct=*/2};
438 };
439
440 public:
441 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
442 void doMerge();
443 static void doCleanup();
444
445 private:
446 DenseSet<const Symbol *> collectNlCategories();
447 void collectAndValidateCategoriesData();
448 void
449 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
450
451 void eraseISec(ConcatInputSection *isec);
452 void eraseMergedCategories();
453
454 void generateCatListForNonErasedCategories(
455 MapVector<ConcatInputSection *, std::set<uint64_t>>
456 catListToErasedOffsets);
457 void collectSectionWriteInfoFromIsec(const InputSection *isec,
458 InfoWriteSection &catWriteInfo);
459 void collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
460 void parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
461 ClassExtensionInfo &extInfo);
462
463 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
464 PointerListInfo &ptrList,
465 SourceLanguage sourceLang);
466
467 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
468 uint32_t secOffset,
469 SourceLanguage sourceLang);
470
471 void parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
472 PointerListInfo &ptrList);
473
474 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
475 const ClassExtensionInfo &extInfo,
476 const PointerListInfo &ptrList);
477
478 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
479 const ClassExtensionInfo &extInfo,
480 const PointerListInfo &ptrList);
481
482 Defined *emitCategory(const ClassExtensionInfo &extInfo);
483 Defined *emitCatListEntrySec(const std::string &forCategoryName,
484 const std::string &forBaseClassName,
485 ObjFile *objFile);
486 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
487 const Symbol *baseClassSym,
488 const std::string &baseClassName, ObjFile *objFile);
489 Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
490 void createSymbolReference(Defined *refFrom, const Symbol *refTo,
491 uint32_t offset, const Reloc &relocTemplate);
492 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
493 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
494 uint32_t offset);
495 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
496 uint32_t offset);
497 Defined *getClassRo(const Defined *classSym, bool getMetaRo);
498 SourceLanguage getClassSymSourceLang(const Defined *classSym);
499 void mergeCategoriesIntoBaseClass(const Defined *baseClass,
500 std::vector<InfoInputCategory> &categories);
501 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
502 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
503 uint32_t offset);
504
505 // Allocate a null-terminated StringRef backed by generatedSectionData
506 StringRef newStringData(const char *str);
507 // Allocate section data, backed by generatedSectionData
508 SmallVector<uint8_t> &newSectionData(uint32_t size);
509
510 CategoryLayout catLayout;
511 ClassLayout classLayout;
512 ROClassLayout roClassLayout;
513 ListHeaderLayout listHeaderLayout;
514 MethodLayout methodLayout;
515 ProtocolListHeaderLayout protocolListHeaderLayout;
516
517 InfoCategoryWriter infoCategoryWriter;
518 std::vector<ConcatInputSection *> &allInputSections;
519 // Map of base class Symbol to list of InfoInputCategory's for it
520 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
521
522 // Normally, the binary data comes from the input files, but since we're
523 // generating binary data ourselves, we use the below array to store it in.
524 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
525 // object, as the data will be read by the Writer when the final binary is
526 // generated.
527 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
528 generatedSectionData;
529 };
530
531 SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
532 ObjcCategoryMerger::generatedSectionData;
533
ObjcCategoryMerger(std::vector<ConcatInputSection * > & _allInputSections)534 ObjcCategoryMerger::ObjcCategoryMerger(
535 std::vector<ConcatInputSection *> &_allInputSections)
536 : catLayout(target->wordSize), classLayout(target->wordSize),
537 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
538 methodLayout(target->wordSize),
539 protocolListHeaderLayout(target->wordSize),
540 allInputSections(_allInputSections) {}
541
collectSectionWriteInfoFromIsec(const InputSection * isec,InfoWriteSection & catWriteInfo)542 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
543 const InputSection *isec, InfoWriteSection &catWriteInfo) {
544
545 catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
546 catWriteInfo.align = isec->align;
547 catWriteInfo.outputSection = isec->parent;
548
549 assert(catWriteInfo.outputSection &&
550 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
551
552 if (isec->relocs.size())
553 catWriteInfo.relocTemplate = isec->relocs[0];
554
555 catWriteInfo.valid = true;
556 }
557
558 Symbol *
tryGetSymbolAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)559 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
560 uint32_t offset) {
561 if (!isec)
562 return nullptr;
563 const Reloc *reloc = isec->getRelocAt(offset);
564
565 if (!reloc)
566 return nullptr;
567
568 Symbol *sym = reloc->referent.get<Symbol *>();
569
570 if (reloc->addend) {
571 assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
572 Defined *definedSym = cast<Defined>(sym);
573 sym = tryFindDefinedOnIsec(definedSym->isec(),
574 definedSym->value + reloc->addend);
575 }
576
577 return sym;
578 }
579
tryFindDefinedOnIsec(const InputSection * isec,uint32_t offset)580 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
581 uint32_t offset) {
582 for (Defined *sym : isec->symbols)
583 if ((sym->value <= offset) && (sym->value + sym->size > offset))
584 return sym;
585
586 return nullptr;
587 }
588
589 Defined *
tryGetDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)590 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
591 uint32_t offset) {
592 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
593 return dyn_cast_or_null<Defined>(sym);
594 }
595
596 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
597 // the meta-class's ro_data symbol. Otherwise, we will return the class
598 // (instance) ro_data symbol.
getClassRo(const Defined * classSym,bool getMetaRo)599 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
600 bool getMetaRo) {
601 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
602 if (!isec)
603 return nullptr;
604
605 if (!getMetaRo)
606 return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
607 classSym->value);
608
609 Defined *metaClass = tryGetDefinedAtIsecOffset(
610 isec, classLayout.metaClassOffset + classSym->value);
611 if (!metaClass)
612 return nullptr;
613
614 return tryGetDefinedAtIsecOffset(
615 dyn_cast<ConcatInputSection>(metaClass->isec()),
616 classLayout.roDataOffset);
617 }
618
619 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
620 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
tryEraseDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)621 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
622 const ConcatInputSection *isec, uint32_t offset) {
623 const Reloc *reloc = isec->getRelocAt(offset);
624
625 if (!reloc)
626 return;
627
628 Defined *sym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
629 if (!sym)
630 return;
631
632 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
633 eraseISec(cisec);
634 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
635 uint32_t totalOffset = sym->value + reloc->addend;
636 StringPiece &piece = csisec->getStringPiece(totalOffset);
637 piece.live = false;
638 } else {
639 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
640 }
641 }
642
collectCategoryWriterInfoFromCategory(const InfoInputCategory & catInfo)643 void ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
644 const InfoInputCategory &catInfo) {
645
646 if (!infoCategoryWriter.catListInfo.valid)
647 collectSectionWriteInfoFromIsec(catInfo.catListIsec,
648 infoCategoryWriter.catListInfo);
649 if (!infoCategoryWriter.catBodyInfo.valid)
650 collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
651 infoCategoryWriter.catBodyInfo);
652
653 if (!infoCategoryWriter.catNameInfo.valid) {
654 lld::macho::Defined *catNameSym =
655 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
656 assert(catNameSym && "Category does not have a valid name Symbol");
657
658 collectSectionWriteInfoFromIsec(catNameSym->isec(),
659 infoCategoryWriter.catNameInfo);
660 }
661
662 // Collect writer info from all the category lists (we're assuming they all
663 // would provide the same info)
664 if (!infoCategoryWriter.catPtrListInfo.valid) {
665 for (uint32_t off = catLayout.instanceMethodsOffset;
666 off <= catLayout.classPropsOffset; off += target->wordSize) {
667 if (Defined *ptrList =
668 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
669 collectSectionWriteInfoFromIsec(ptrList->isec(),
670 infoCategoryWriter.catPtrListInfo);
671 // we've successfully collected data, so we can break
672 break;
673 }
674 }
675 }
676 }
677
678 // Parse a protocol list that might be linked to ConcatInputSection at a given
679 // offset. The format of the protocol list is different than other lists (prop
680 // lists, method lists) so we need to parse it differently
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList,SourceLanguage sourceLang)681 void ObjcCategoryMerger::parseProtocolListInfo(
682 const ConcatInputSection *isec, uint32_t secOffset,
683 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
684 assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
685 "Tried to read pointer list beyond protocol section end");
686
687 const Reloc *reloc = isec->getRelocAt(secOffset);
688 if (!reloc)
689 return;
690
691 auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
692 assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
693
694 // Theoretically protocol count can be either 32b or 64b, depending on
695 // platform pointer size, but to simplify implementation we always just read
696 // the lower 32b which should be good enough.
697 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
698 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
699
700 ptrList.structCount += protocolCount;
701 ptrList.structSize = target->wordSize;
702
703 [[maybe_unused]] uint32_t expectedListSize =
704 (protocolCount * target->wordSize) +
705 /*header(count)*/ protocolListHeaderLayout.totalSize +
706 /*extra null value*/ target->wordSize;
707
708 // On Swift, the protocol list does not have the extra (unnecessary) null
709 [[maybe_unused]] uint32_t expectedListSizeSwift =
710 expectedListSize - target->wordSize;
711
712 assert(((expectedListSize == ptrListSym->isec()->data.size() &&
713 sourceLang == SourceLanguage::ObjC) ||
714 (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
715 sourceLang == SourceLanguage::Swift)) &&
716 "Protocol list does not match expected size");
717
718 uint32_t off = protocolListHeaderLayout.totalSize;
719 for (uint32_t inx = 0; inx < protocolCount; ++inx) {
720 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
721 assert(reloc && "No reloc found at protocol list offset");
722
723 auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
724 assert(listSym && "Protocol list reloc does not have a valid Defined");
725
726 ptrList.allPtrs.push_back(listSym);
727 off += target->wordSize;
728 }
729 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
730 "expected null terminating protocol");
731 assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
732 "Protocol list end offset does not match expected size");
733 }
734
735 // Parse a protocol list and return the PointerListInfo for it
736 ObjcCategoryMerger::PointerListInfo
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,SourceLanguage sourceLang)737 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
738 uint32_t secOffset,
739 SourceLanguage sourceLang) {
740 PointerListInfo ptrList;
741 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
742 return ptrList;
743 }
744
745 // Parse a pointer list that might be linked to ConcatInputSection at a given
746 // offset. This can be used for instance methods, class methods, instance props
747 // and class props since they have the same format.
parsePointerListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList)748 void ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
749 uint32_t secOffset,
750 PointerListInfo &ptrList) {
751 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
752 assert(isec && "Trying to parse pointer list from null isec");
753 assert(secOffset + target->wordSize <= isec->data.size() &&
754 "Trying to read pointer list beyond section end");
755
756 const Reloc *reloc = isec->getRelocAt(secOffset);
757 if (!reloc)
758 return;
759
760 auto *ptrListSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
761 assert(ptrListSym && "Reloc does not have a valid Defined");
762
763 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
764 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
765 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
766 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
767 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
768
769 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
770
771 ptrList.structCount += thisStructCount;
772 ptrList.structSize = thisStructSize;
773
774 uint32_t expectedListSize =
775 listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
776 assert(expectedListSize == ptrListSym->isec()->data.size() &&
777 "Pointer list does not match expected size");
778
779 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
780 off += target->wordSize) {
781 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
782 assert(reloc && "No reloc found at pointer list offset");
783
784 auto *listSym = dyn_cast_or_null<Defined>(reloc->referent.get<Symbol *>());
785 assert(listSym && "Reloc does not have a valid Defined");
786
787 ptrList.allPtrs.push_back(listSym);
788 }
789 }
790
791 // Here we parse all the information of an input category (catInfo) and
792 // append the parsed info into the structure which will contain all the
793 // information about how a class is extended (extInfo)
parseCatInfoToExtInfo(const InfoInputCategory & catInfo,ClassExtensionInfo & extInfo)794 void ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
795 ClassExtensionInfo &extInfo) {
796 const Reloc *catNameReloc =
797 catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
798
799 // Parse name
800 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
801
802 // is this the first category we are parsing?
803 if (extInfo.mergedContainerName.empty())
804 extInfo.objFileForMergeData =
805 dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
806 else
807 extInfo.mergedContainerName += "|";
808
809 assert(extInfo.objFileForMergeData &&
810 "Expected to already have valid objextInfo.objFileForMergeData");
811
812 StringRef catName = getReferentString(*catNameReloc);
813 extInfo.mergedContainerName += catName.str();
814
815 // Parse base class
816 if (!extInfo.baseClass) {
817 Symbol *classSym =
818 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
819 assert(extInfo.baseClassName.empty());
820 extInfo.baseClass = classSym;
821 llvm::StringRef classPrefix(objc::symbol_names::klass);
822 assert(classSym->getName().starts_with(classPrefix) &&
823 "Base class symbol does not start with expected prefix");
824 extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
825 } else {
826 assert((extInfo.baseClass ==
827 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
828 catLayout.klassOffset)) &&
829 "Trying to parse category info into container with different base "
830 "class");
831 }
832
833 parsePointerListInfo(catInfo.catBodyIsec, catLayout.instanceMethodsOffset,
834 extInfo.instanceMethods);
835
836 parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
837 extInfo.classMethods);
838
839 parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
840 extInfo.protocols, catInfo.sourceLanguage);
841
842 parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
843 extInfo.instanceProps);
844
845 parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
846 extInfo.classProps);
847 }
848
849 // Generate a protocol list (including header) and link it into the parent at
850 // the specified offset.
emitAndLinkProtocolList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)851 Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
852 Defined *parentSym, uint32_t linkAtOffset,
853 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
854 if (ptrList.allPtrs.empty())
855 return nullptr;
856
857 assert(ptrList.allPtrs.size() == ptrList.structCount);
858
859 uint32_t bodySize = (ptrList.structCount * target->wordSize) +
860 /*header(count)*/ protocolListHeaderLayout.totalSize +
861 /*extra null value*/ target->wordSize;
862 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
863
864 // This theoretically can be either 32b or 64b, but writing just the first 32b
865 // is good enough
866 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
867 bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
868
869 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
870
871 ConcatInputSection *listSec = make<ConcatInputSection>(
872 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
873 infoCategoryWriter.catPtrListInfo.align);
874 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
875 listSec->live = true;
876
877 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
878
879 std::string symName = ptrList.categoryPrefix;
880 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
881
882 Defined *ptrListSym = make<Defined>(
883 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
884 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
885 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
886 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
887 /*isWeakDefCanBeHidden=*/false);
888
889 ptrListSym->used = true;
890 parentSym->getObjectFile()->symbols.push_back(ptrListSym);
891 addInputSection(listSec);
892
893 createSymbolReference(parentSym, ptrListSym, linkAtOffset,
894 infoCategoryWriter.catBodyInfo.relocTemplate);
895
896 uint32_t offset = protocolListHeaderLayout.totalSize;
897 for (Symbol *symbol : ptrList.allPtrs) {
898 createSymbolReference(ptrListSym, symbol, offset,
899 infoCategoryWriter.catPtrListInfo.relocTemplate);
900 offset += target->wordSize;
901 }
902
903 return ptrListSym;
904 }
905
906 // Generate a pointer list (including header) and link it into the parent at the
907 // specified offset. This is used for instance and class methods and
908 // proprieties.
emitAndLinkPointerList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)909 void ObjcCategoryMerger::emitAndLinkPointerList(
910 Defined *parentSym, uint32_t linkAtOffset,
911 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
912 if (ptrList.allPtrs.empty())
913 return;
914
915 assert(ptrList.allPtrs.size() * target->wordSize ==
916 ptrList.structCount * ptrList.structSize);
917
918 // Generate body
919 uint32_t bodySize =
920 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
921 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
922
923 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
924 bodyData.data() + listHeaderLayout.structSizeOffset);
925 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
926 bodyData.data() + listHeaderLayout.structCountOffset);
927
928 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
929 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
930
931 ConcatInputSection *listSec = make<ConcatInputSection>(
932 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
933 infoCategoryWriter.catPtrListInfo.align);
934 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
935 listSec->live = true;
936
937 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
938
939 std::string symName = ptrList.categoryPrefix;
940 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
941
942 Defined *ptrListSym = make<Defined>(
943 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
944 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
945 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
946 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
947 /*isWeakDefCanBeHidden=*/false);
948
949 ptrListSym->used = true;
950 parentSym->getObjectFile()->symbols.push_back(ptrListSym);
951 addInputSection(listSec);
952
953 createSymbolReference(parentSym, ptrListSym, linkAtOffset,
954 infoCategoryWriter.catBodyInfo.relocTemplate);
955
956 uint32_t offset = listHeaderLayout.totalSize;
957 for (Symbol *symbol : ptrList.allPtrs) {
958 createSymbolReference(ptrListSym, symbol, offset,
959 infoCategoryWriter.catPtrListInfo.relocTemplate);
960 offset += target->wordSize;
961 }
962 }
963
964 // This method creates an __objc_catlist ConcatInputSection with a single slot
965 Defined *
emitCatListEntrySec(const std::string & forCategoryName,const std::string & forBaseClassName,ObjFile * objFile)966 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
967 const std::string &forBaseClassName,
968 ObjFile *objFile) {
969 uint32_t sectionSize = target->wordSize;
970 llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
971
972 ConcatInputSection *newCatList =
973 make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
974 bodyData, infoCategoryWriter.catListInfo.align);
975 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
976 newCatList->live = true;
977
978 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
979
980 std::string catSymName = "<__objc_catlist slot for merged category ";
981 catSymName += forBaseClassName + "(" + forCategoryName + ")>";
982
983 Defined *catListSym = make<Defined>(
984 newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
985 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
986 /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
987 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
988 /*isWeakDefCanBeHidden=*/false);
989
990 catListSym->used = true;
991 objFile->symbols.push_back(catListSym);
992 addInputSection(newCatList);
993 return catListSym;
994 }
995
996 // Here we generate the main category body and link the name and base class into
997 // it. We don't link any other info yet like the protocol and class/instance
998 // methods/props.
emitCategoryBody(const std::string & name,const Defined * nameSym,const Symbol * baseClassSym,const std::string & baseClassName,ObjFile * objFile)999 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1000 const Defined *nameSym,
1001 const Symbol *baseClassSym,
1002 const std::string &baseClassName,
1003 ObjFile *objFile) {
1004 llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1005
1006 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1007 catLayout.sizeOffset);
1008 *ptrSize = catLayout.totalSize;
1009
1010 ConcatInputSection *newBodySec =
1011 make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1012 bodyData, infoCategoryWriter.catBodyInfo.align);
1013 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1014 newBodySec->live = true;
1015
1016 std::string symName =
1017 objc::symbol_names::category + baseClassName + "(" + name + ")";
1018 Defined *catBodySym = make<Defined>(
1019 newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1020 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1021 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1022 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1023 /*isWeakDefCanBeHidden=*/false);
1024
1025 catBodySym->used = true;
1026 objFile->symbols.push_back(catBodySym);
1027 addInputSection(newBodySec);
1028
1029 createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1030 infoCategoryWriter.catBodyInfo.relocTemplate);
1031
1032 // Create a reloc to the base class (either external or internal)
1033 createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1034 infoCategoryWriter.catBodyInfo.relocTemplate);
1035
1036 return catBodySym;
1037 }
1038
1039 // This writes the new category name (for the merged category) into the binary
1040 // and returns the sybmol for it.
emitCategoryName(const std::string & name,ObjFile * objFile)1041 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1042 ObjFile *objFile) {
1043 StringRef nameStrData = newStringData(name.c_str());
1044 // We use +1 below to include the null terminator
1045 llvm::ArrayRef<uint8_t> nameData(
1046 reinterpret_cast<const uint8_t *>(nameStrData.data()),
1047 nameStrData.size() + 1);
1048
1049 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1050 CStringInputSection *newStringSec = make<CStringInputSection>(
1051 *infoCategoryWriter.catNameInfo.inputSection, nameData,
1052 infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1053
1054 parentSection->subsections.push_back({0, newStringSec});
1055
1056 newStringSec->splitIntoPieces();
1057 newStringSec->pieces[0].live = true;
1058 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1059 in.cStringSection->addInput(newStringSec);
1060 assert(newStringSec->pieces.size() == 1);
1061
1062 Defined *catNameSym = make<Defined>(
1063 "<merged category name>", /*file=*/objFile, newStringSec,
1064 /*value=*/0, nameData.size(),
1065 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1066 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1067 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1068
1069 catNameSym->used = true;
1070 objFile->symbols.push_back(catNameSym);
1071 return catNameSym;
1072 }
1073
1074 // This method fully creates a new category from the given ClassExtensionInfo.
1075 // It creates the category name, body and method/protocol/prop lists and links
1076 // them all together. Then it creates a new __objc_catlist entry and adds the
1077 // category to it. Calling this method will fully generate a category which will
1078 // be available in the final binary.
emitCategory(const ClassExtensionInfo & extInfo)1079 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1080 Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1081 extInfo.objFileForMergeData);
1082
1083 Defined *catBodySym = emitCategoryBody(
1084 extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1085 extInfo.baseClassName, extInfo.objFileForMergeData);
1086
1087 Defined *catListSym =
1088 emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1089 extInfo.objFileForMergeData);
1090
1091 // Add the single category body to the category list at the offset 0.
1092 createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1093 infoCategoryWriter.catListInfo.relocTemplate);
1094
1095 emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1096 extInfo.instanceMethods);
1097
1098 emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1099 extInfo.classMethods);
1100
1101 emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1102 extInfo.protocols);
1103
1104 emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1105 extInfo.instanceProps);
1106
1107 emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1108 extInfo.classProps);
1109
1110 return catBodySym;
1111 }
1112
1113 // This method merges all the categories (sharing a base class) into a single
1114 // category.
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> & categories)1115 void ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1116 std::vector<InfoInputCategory> &categories) {
1117 assert(categories.size() > 1 && "Expected at least 2 categories");
1118
1119 ClassExtensionInfo extInfo(catLayout);
1120
1121 for (auto &catInfo : categories)
1122 parseCatInfoToExtInfo(catInfo, extInfo);
1123
1124 Defined *newCatDef = emitCategory(extInfo);
1125 assert(newCatDef && "Failed to create a new category");
1126
1127 // Suppress unsuded var warning
1128 (void)newCatDef;
1129
1130 for (auto &catInfo : categories)
1131 catInfo.wasMerged = true;
1132 }
1133
createSymbolReference(Defined * refFrom,const Symbol * refTo,uint32_t offset,const Reloc & relocTemplate)1134 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1135 const Symbol *refTo,
1136 uint32_t offset,
1137 const Reloc &relocTemplate) {
1138 Reloc r = relocTemplate;
1139 r.offset = offset;
1140 r.addend = 0;
1141 r.referent = const_cast<Symbol *>(refTo);
1142 refFrom->isec()->relocs.push_back(r);
1143 }
1144
1145 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1146 // optimize these as they have a '+load' method that has to be called at
1147 // runtime.
collectNlCategories()1148 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1149 DenseSet<const Symbol *> nlCategories;
1150
1151 for (InputSection *sec : allInputSections) {
1152 if (sec->getName() != section_names::objcNonLazyCatList)
1153 continue;
1154
1155 for (auto &r : sec->relocs) {
1156 const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1157 nlCategories.insert(sym);
1158 }
1159 }
1160 return nlCategories;
1161 }
1162
collectAndValidateCategoriesData()1163 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1164 auto nlCategories = collectNlCategories();
1165
1166 for (InputSection *sec : allInputSections) {
1167 if (sec->getName() != section_names::objcCatList)
1168 continue;
1169 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1170 assert(catListCisec &&
1171 "__objc_catList InputSection is not a ConcatInputSection");
1172
1173 for (uint32_t off = 0; off < catListCisec->getSize();
1174 off += target->wordSize) {
1175 Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1176 assert(categorySym &&
1177 "Failed to get a valid category at __objc_catlit offset");
1178
1179 if (nlCategories.count(categorySym))
1180 continue;
1181
1182 auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1183 assert(catBodyIsec &&
1184 "Category data section is not an ConcatInputSection");
1185
1186 SourceLanguage eLang = SourceLanguage::Unknown;
1187 if (categorySym->getName().starts_with(objc::symbol_names::category))
1188 eLang = SourceLanguage::ObjC;
1189 else if (categorySym->getName().starts_with(
1190 objc::symbol_names::swift_objc_category))
1191 eLang = SourceLanguage::Swift;
1192 else
1193 llvm_unreachable("Unexpected category symbol name");
1194
1195 InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1196
1197 // Check that the category has a reloc at 'klassOffset' (which is
1198 // a pointer to the class symbol)
1199
1200 Symbol *classSym =
1201 tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1202 assert(classSym && "Category does not have a valid base class");
1203
1204 categoryMap[classSym].push_back(catInputInfo);
1205
1206 collectCategoryWriterInfoFromCategory(catInputInfo);
1207 }
1208 }
1209 }
1210
1211 // In the input we have multiple __objc_catlist InputSection, each of which may
1212 // contain links to multiple categories. Of these categories, we will merge (and
1213 // erase) only some. There will be some categories that will remain untouched
1214 // (not erased). For these not erased categories, we generate new __objc_catlist
1215 // entries since the parent __objc_catlist entry will be erased
generateCatListForNonErasedCategories(const MapVector<ConcatInputSection *,std::set<uint64_t>> catListToErasedOffsets)1216 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1217 const MapVector<ConcatInputSection *, std::set<uint64_t>>
1218 catListToErasedOffsets) {
1219
1220 // Go through all offsets of all __objc_catlist's that we process and if there
1221 // are categories that we didn't process - generate a new __objc_catlist for
1222 // each.
1223 for (auto &mapEntry : catListToErasedOffsets) {
1224 ConcatInputSection *catListIsec = mapEntry.first;
1225 for (uint32_t catListIsecOffset = 0;
1226 catListIsecOffset < catListIsec->data.size();
1227 catListIsecOffset += target->wordSize) {
1228 // This slot was erased, we can just skip it
1229 if (mapEntry.second.count(catListIsecOffset))
1230 continue;
1231
1232 Defined *nonErasedCatBody =
1233 tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1234 assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1235
1236 // Allocate data for the new __objc_catlist slot
1237 llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1238
1239 // We mark the __objc_catlist slot as belonging to the same file as the
1240 // category
1241 ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1242
1243 ConcatInputSection *listSec = make<ConcatInputSection>(
1244 *infoCategoryWriter.catListInfo.inputSection, bodyData,
1245 infoCategoryWriter.catListInfo.align);
1246 listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1247 listSec->live = true;
1248
1249 std::string slotSymName = "<__objc_catlist slot for category ";
1250 slotSymName += nonErasedCatBody->getName();
1251 slotSymName += ">";
1252
1253 Defined *catListSlotSym = make<Defined>(
1254 newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1255 /*value=*/0, bodyData.size(),
1256 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1257 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1258 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1259
1260 catListSlotSym->used = true;
1261 objFile->symbols.push_back(catListSlotSym);
1262 addInputSection(listSec);
1263
1264 // Now link the category body into the newly created slot
1265 createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1266 infoCategoryWriter.catListInfo.relocTemplate);
1267 }
1268 }
1269 }
1270
eraseISec(ConcatInputSection * isec)1271 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1272 isec->live = false;
1273 for (auto &sym : isec->symbols)
1274 sym->used = false;
1275 }
1276
1277 // This fully erases the merged categories, including their body, their names,
1278 // their method/protocol/prop lists and the __objc_catlist entries that link to
1279 // them.
eraseMergedCategories()1280 void ObjcCategoryMerger::eraseMergedCategories() {
1281 // Map of InputSection to a set of offsets of the categories that were merged
1282 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1283
1284 for (auto &mapEntry : categoryMap) {
1285 for (InfoInputCategory &catInfo : mapEntry.second) {
1286 if (catInfo.wasMerged) {
1287 eraseISec(catInfo.catListIsec);
1288 catListToErasedOffsets[catInfo.catListIsec].insert(
1289 catInfo.offCatListIsec);
1290 }
1291 }
1292 }
1293
1294 // If there were categories that we did not erase, we need to generate a new
1295 // __objc_catList that contains only the un-merged categories, and get rid of
1296 // the references to the ones we merged.
1297 generateCatListForNonErasedCategories(catListToErasedOffsets);
1298
1299 // Erase the old method lists & names of the categories that were merged
1300 for (auto &mapEntry : categoryMap) {
1301 for (InfoInputCategory &catInfo : mapEntry.second) {
1302 if (!catInfo.wasMerged)
1303 continue;
1304
1305 eraseISec(catInfo.catBodyIsec);
1306
1307 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1308 // categories because the name will sometimes also be used for other
1309 // purposes.
1310 // For Swift, see usages of 'l_.str.11.SimpleClass' in
1311 // objc-category-merging-swift.s
1312 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1313 // objc-category-merging-erase-objc-name-test.s
1314 // TODO: handle the above in a smarter way
1315
1316 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1317 catLayout.instanceMethodsOffset);
1318 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1319 catLayout.classMethodsOffset);
1320 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1321 catLayout.protocolsOffset);
1322 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1323 catLayout.classPropsOffset);
1324 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1325 catLayout.instancePropsOffset);
1326 }
1327 }
1328 }
1329
doMerge()1330 void ObjcCategoryMerger::doMerge() {
1331 collectAndValidateCategoriesData();
1332
1333 for (auto &[baseClass, catInfos] : categoryMap) {
1334 if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1335 // Merge all categories into the base class
1336 mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1337 } else if (catInfos.size() > 1) {
1338 // Merge all categories into a new, single category
1339 mergeCategoriesIntoSingleCategory(catInfos);
1340 }
1341 }
1342
1343 // Erase all categories that were merged
1344 eraseMergedCategories();
1345 }
1346
doCleanup()1347 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1348
newStringData(const char * str)1349 StringRef ObjcCategoryMerger::newStringData(const char *str) {
1350 uint32_t len = strlen(str);
1351 uint32_t bufSize = len + 1;
1352 SmallVector<uint8_t> &data = newSectionData(bufSize);
1353 char *strData = reinterpret_cast<char *>(data.data());
1354 // Copy the string chars and null-terminator
1355 memcpy(strData, str, bufSize);
1356 return StringRef(strData, len);
1357 }
1358
newSectionData(uint32_t size)1359 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1360 generatedSectionData.push_back(
1361 std::make_unique<SmallVector<uint8_t>>(size, 0));
1362 return *generatedSectionData.back();
1363 }
1364
1365 } // namespace
1366
mergeCategories()1367 void objc::mergeCategories() {
1368 TimeTraceScope timeScope("ObjcCategoryMerger");
1369
1370 ObjcCategoryMerger merger(inputSections);
1371 merger.doMerge();
1372 }
1373
doCleanup()1374 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1375
1376 ObjcCategoryMerger::SourceLanguage
getClassSymSourceLang(const Defined * classSym)1377 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1378 if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1379 return SourceLanguage::Swift;
1380
1381 // If the symbol name matches the ObjC prefix, we don't necessarely know this
1382 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1383 // classes. Ex:
1384 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1385 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1386 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1387 //
1388 // So we scan for symbols with the same address and check for the Swift class
1389 if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1390 for (auto &sym : classSym->originalIsec->symbols)
1391 if (sym->value == classSym->value)
1392 if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1393 return SourceLanguage::Swift;
1394 return SourceLanguage::ObjC;
1395 }
1396
1397 llvm_unreachable("Unexpected class symbol name during category merging");
1398 }
mergeCategoriesIntoBaseClass(const Defined * baseClass,std::vector<InfoInputCategory> & categories)1399 void ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1400 const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1401 assert(categories.size() >= 1 && "Expected at least one category to merge");
1402
1403 // Collect all the info from the categories
1404 ClassExtensionInfo extInfo(catLayout);
1405 extInfo.baseClass = baseClass;
1406 extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1407
1408 for (auto &catInfo : categories) {
1409 parseCatInfoToExtInfo(catInfo, extInfo);
1410 }
1411
1412 // Get metadata for the base class
1413 Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1414 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1415 Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1416 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1417
1418 // Now collect the info from the base class from the various lists in the
1419 // class metadata
1420
1421 // Protocol lists are a special case - the same protocol list is in classRo
1422 // and metaRo, so we only need to parse it once
1423 parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1424 extInfo.protocols, extInfo.baseClassSourceLanguage);
1425
1426 // Check that the classRo and metaRo protocol lists are identical
1427 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1428 extInfo.baseClassSourceLanguage) ==
1429 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1430 extInfo.baseClassSourceLanguage) &&
1431 "Category merger expects classRo and metaRo to have the same protocol "
1432 "list");
1433
1434 parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1435 extInfo.classMethods);
1436 parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1437 extInfo.instanceMethods);
1438
1439 parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1440 extInfo.classProps);
1441 parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1442 extInfo.instanceProps);
1443
1444 // Erase the old lists - these will be generated and replaced
1445 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1446 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1447 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1448 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1449 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1450 eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1451
1452 // Emit the newly merged lists - first into the meta RO then into the class RO
1453 // First we emit and link the protocol list into the meta RO. Then we link it
1454 // in the classRo as well (they're supposed to be identical)
1455 if (Defined *protoListSym =
1456 emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1457 extInfo, extInfo.protocols)) {
1458 createSymbolReference(classRo, protoListSym,
1459 roClassLayout.baseProtocolsOffset,
1460 infoCategoryWriter.catBodyInfo.relocTemplate);
1461 }
1462
1463 emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1464 extInfo.classMethods);
1465 emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1466 extInfo.instanceMethods);
1467
1468 emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1469 extInfo.classProps);
1470
1471 emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1472 extInfo.instanceProps);
1473
1474 // Mark all the categories as merged - this will be used to erase them later
1475 for (auto &catInfo : categories)
1476 catInfo.wasMerged = true;
1477 }
1478
1479 // Erase the symbol at a given offset in an InputSection
eraseSymbolAtIsecOffset(ConcatInputSection * isec,uint32_t offset)1480 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1481 uint32_t offset) {
1482 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1483 if (!sym)
1484 return;
1485
1486 // Remove the symbol from isec->symbols
1487 assert(isa<Defined>(sym) && "Can only erase a Defined");
1488 llvm::erase(isec->symbols, sym);
1489
1490 // Remove the relocs that refer to this symbol
1491 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1492 llvm::erase_if(isec->relocs, removeAtOff);
1493
1494 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1495 // the whole ConcatInputSection
1496 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1497 if (cisec->data.size() == sym->size)
1498 eraseISec(cisec);
1499 }
1500