1 //===- ObjC.cpp -----------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "ObjC.h"
10 #include "ConcatOutputSection.h"
11 #include "InputFiles.h"
12 #include "InputSection.h"
13 #include "Layout.h"
14 #include "OutputSegment.h"
15 #include "SyntheticSections.h"
16 #include "Target.h"
17
18 #include "lld/Common/ErrorHandler.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/Bitcode/BitcodeReader.h"
21 #include "llvm/Support/TimeProfiler.h"
22
23 using namespace llvm;
24 using namespace llvm::MachO;
25 using namespace lld;
26 using namespace lld::macho;
27
objectHasObjCSection(MemoryBufferRef mb)28 template <class LP> static bool objectHasObjCSection(MemoryBufferRef mb) {
29 using SectionHeader = typename LP::section;
30
31 auto *hdr =
32 reinterpret_cast<const typename LP::mach_header *>(mb.getBufferStart());
33 if (hdr->magic != LP::magic)
34 return false;
35
36 if (const auto *c =
37 findCommand<typename LP::segment_command>(hdr, LP::segmentLCType)) {
38 auto sectionHeaders = ArrayRef<SectionHeader>{
39 reinterpret_cast<const SectionHeader *>(c + 1), c->nsects};
40 for (const SectionHeader &secHead : sectionHeaders) {
41 StringRef sectname(secHead.sectname,
42 strnlen(secHead.sectname, sizeof(secHead.sectname)));
43 StringRef segname(secHead.segname,
44 strnlen(secHead.segname, sizeof(secHead.segname)));
45 if ((segname == segment_names::data &&
46 sectname == section_names::objcCatList) ||
47 (segname == segment_names::text &&
48 sectname.starts_with(section_names::swift))) {
49 return true;
50 }
51 }
52 }
53 return false;
54 }
55
objectHasObjCSection(MemoryBufferRef mb)56 static bool objectHasObjCSection(MemoryBufferRef mb) {
57 if (target->wordSize == 8)
58 return ::objectHasObjCSection<LP64>(mb);
59 else
60 return ::objectHasObjCSection<ILP32>(mb);
61 }
62
hasObjCSection(MemoryBufferRef mb)63 bool macho::hasObjCSection(MemoryBufferRef mb) {
64 switch (identify_magic(mb.getBuffer())) {
65 case file_magic::macho_object:
66 return objectHasObjCSection(mb);
67 case file_magic::bitcode:
68 return check(isBitcodeContainingObjCCategory(mb));
69 default:
70 return false;
71 }
72 }
73
74 namespace {
75
76 #define FOR_EACH_CATEGORY_FIELD(DO) \
77 DO(Ptr, name) \
78 DO(Ptr, klass) \
79 DO(Ptr, instanceMethods) \
80 DO(Ptr, classMethods) \
81 DO(Ptr, protocols) \
82 DO(Ptr, instanceProps) \
83 DO(Ptr, classProps) \
84 DO(uint32_t, size)
85
86 CREATE_LAYOUT_CLASS(Category, FOR_EACH_CATEGORY_FIELD);
87
88 #undef FOR_EACH_CATEGORY_FIELD
89
90 #define FOR_EACH_CLASS_FIELD(DO) \
91 DO(Ptr, metaClass) \
92 DO(Ptr, superClass) \
93 DO(Ptr, methodCache) \
94 DO(Ptr, vtable) \
95 DO(Ptr, roData)
96
97 CREATE_LAYOUT_CLASS(Class, FOR_EACH_CLASS_FIELD);
98
99 #undef FOR_EACH_CLASS_FIELD
100
101 #define FOR_EACH_RO_CLASS_FIELD(DO) \
102 DO(uint32_t, flags) \
103 DO(uint32_t, instanceStart) \
104 DO(Ptr, instanceSize) \
105 DO(Ptr, ivarLayout) \
106 DO(Ptr, name) \
107 DO(Ptr, baseMethods) \
108 DO(Ptr, baseProtocols) \
109 DO(Ptr, ivars) \
110 DO(Ptr, weakIvarLayout) \
111 DO(Ptr, baseProperties)
112
113 CREATE_LAYOUT_CLASS(ROClass, FOR_EACH_RO_CLASS_FIELD);
114
115 #undef FOR_EACH_RO_CLASS_FIELD
116
117 #define FOR_EACH_LIST_HEADER(DO) \
118 DO(uint32_t, structSize) \
119 DO(uint32_t, structCount)
120
121 CREATE_LAYOUT_CLASS(ListHeader, FOR_EACH_LIST_HEADER);
122
123 #undef FOR_EACH_LIST_HEADER
124
125 #define FOR_EACH_PROTOCOL_LIST_HEADER(DO) DO(Ptr, protocolCount)
126
127 CREATE_LAYOUT_CLASS(ProtocolListHeader, FOR_EACH_PROTOCOL_LIST_HEADER);
128
129 #undef FOR_EACH_PROTOCOL_LIST_HEADER
130
131 #define FOR_EACH_METHOD(DO) \
132 DO(Ptr, name) \
133 DO(Ptr, type) \
134 DO(Ptr, impl)
135
136 CREATE_LAYOUT_CLASS(Method, FOR_EACH_METHOD);
137
138 #undef FOR_EACH_METHOD
139
140 enum MethodContainerKind {
141 MCK_Class,
142 MCK_Category,
143 };
144
145 struct MethodContainer {
146 MethodContainerKind kind;
147 const ConcatInputSection *isec;
148 };
149
150 enum MethodKind {
151 MK_Instance,
152 MK_Static,
153 };
154
155 struct ObjcClass {
156 DenseMap<CachedHashStringRef, MethodContainer> instanceMethods;
157 DenseMap<CachedHashStringRef, MethodContainer> classMethods;
158 };
159
160 } // namespace
161
162 class ObjcCategoryChecker {
163 public:
164 ObjcCategoryChecker();
165 void parseCategory(const ConcatInputSection *catListIsec);
166
167 private:
168 void parseClass(const Defined *classSym);
169 void parseMethods(const ConcatInputSection *methodsIsec,
170 const Symbol *methodContainer,
171 const ConcatInputSection *containerIsec,
172 MethodContainerKind, MethodKind);
173
174 CategoryLayout catLayout;
175 ClassLayout classLayout;
176 ROClassLayout roClassLayout;
177 ListHeaderLayout listHeaderLayout;
178 MethodLayout methodLayout;
179
180 DenseMap<const Symbol *, ObjcClass> classMap;
181 };
182
ObjcCategoryChecker()183 ObjcCategoryChecker::ObjcCategoryChecker()
184 : catLayout(target->wordSize), classLayout(target->wordSize),
185 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
186 methodLayout(target->wordSize) {}
187
parseMethods(const ConcatInputSection * methodsIsec,const Symbol * methodContainerSym,const ConcatInputSection * containerIsec,MethodContainerKind mcKind,MethodKind mKind)188 void ObjcCategoryChecker::parseMethods(const ConcatInputSection *methodsIsec,
189 const Symbol *methodContainerSym,
190 const ConcatInputSection *containerIsec,
191 MethodContainerKind mcKind,
192 MethodKind mKind) {
193 ObjcClass &klass = classMap[methodContainerSym];
194 for (const Reloc &r : methodsIsec->relocs) {
195 if ((r.offset - listHeaderLayout.totalSize) % methodLayout.totalSize !=
196 methodLayout.nameOffset)
197 continue;
198
199 CachedHashStringRef methodName(r.getReferentString());
200 // +load methods are special: all implementations are called by the runtime
201 // even if they are part of the same class. Thus there is no need to check
202 // for duplicates.
203 // NOTE: Instead of specifically checking for this method name, ld64 simply
204 // checks whether a class / category is present in __objc_nlclslist /
205 // __objc_nlcatlist respectively. This will be the case if the class /
206 // category has a +load method. It skips optimizing the categories if there
207 // are multiple +load methods. Since it does dupe checking as part of the
208 // optimization process, this avoids spurious dupe messages around +load,
209 // but it also means that legit dupe issues for other methods are ignored.
210 if (mKind == MK_Static && methodName.val() == "load")
211 continue;
212
213 auto &methodMap =
214 mKind == MK_Instance ? klass.instanceMethods : klass.classMethods;
215 if (methodMap
216 .try_emplace(methodName, MethodContainer{mcKind, containerIsec})
217 .second)
218 continue;
219
220 // We have a duplicate; generate a warning message.
221 const auto &mc = methodMap.lookup(methodName);
222 const Reloc *nameReloc = nullptr;
223 if (mc.kind == MCK_Category) {
224 nameReloc = mc.isec->getRelocAt(catLayout.nameOffset);
225 } else {
226 assert(mc.kind == MCK_Class);
227 const auto *roIsec = mc.isec->getRelocAt(classLayout.roDataOffset)
228 ->getReferentInputSection();
229 nameReloc = roIsec->getRelocAt(roClassLayout.nameOffset);
230 }
231 StringRef containerName = nameReloc->getReferentString();
232 StringRef methPrefix = mKind == MK_Instance ? "-" : "+";
233
234 // We should only ever encounter collisions when parsing category methods
235 // (since the Class struct is parsed before any of its categories).
236 assert(mcKind == MCK_Category);
237 StringRef newCatName =
238 containerIsec->getRelocAt(catLayout.nameOffset)->getReferentString();
239
240 auto formatObjAndSrcFileName = [](const InputSection *section) {
241 lld::macho::InputFile *inputFile = section->getFile();
242 std::string result = toString(inputFile);
243
244 auto objFile = dyn_cast_or_null<ObjFile>(inputFile);
245 if (objFile && objFile->compileUnit)
246 result += " (" + objFile->sourceFile() + ")";
247
248 return result;
249 };
250
251 StringRef containerType = mc.kind == MCK_Category ? "category" : "class";
252 warn("method '" + methPrefix + methodName.val() +
253 "' has conflicting definitions:\n>>> defined in category " +
254 newCatName + " from " + formatObjAndSrcFileName(containerIsec) +
255 "\n>>> defined in " + containerType + " " + containerName + " from " +
256 formatObjAndSrcFileName(mc.isec));
257 }
258 }
259
parseCategory(const ConcatInputSection * catIsec)260 void ObjcCategoryChecker::parseCategory(const ConcatInputSection *catIsec) {
261 auto *classReloc = catIsec->getRelocAt(catLayout.klassOffset);
262 if (!classReloc)
263 return;
264
265 auto *classSym = cast<Symbol *>(classReloc->referent);
266 if (auto *d = dyn_cast<Defined>(classSym))
267 if (!classMap.count(d))
268 parseClass(d);
269
270 if (const auto *r = catIsec->getRelocAt(catLayout.classMethodsOffset)) {
271 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
272 classSym, catIsec, MCK_Category, MK_Static);
273 }
274
275 if (const auto *r = catIsec->getRelocAt(catLayout.instanceMethodsOffset)) {
276 parseMethods(cast<ConcatInputSection>(r->getReferentInputSection()),
277 classSym, catIsec, MCK_Category, MK_Instance);
278 }
279 }
280
parseClass(const Defined * classSym)281 void ObjcCategoryChecker::parseClass(const Defined *classSym) {
282 // Given a Class struct, get its corresponding Methods struct
283 auto getMethodsIsec =
284 [&](const InputSection *classIsec) -> ConcatInputSection * {
285 if (const auto *r = classIsec->getRelocAt(classLayout.roDataOffset)) {
286 if (const auto *roIsec =
287 cast_or_null<ConcatInputSection>(r->getReferentInputSection())) {
288 if (const auto *r =
289 roIsec->getRelocAt(roClassLayout.baseMethodsOffset)) {
290 if (auto *methodsIsec = cast_or_null<ConcatInputSection>(
291 r->getReferentInputSection()))
292 return methodsIsec;
293 }
294 }
295 }
296 return nullptr;
297 };
298
299 const auto *classIsec = cast<ConcatInputSection>(classSym->isec());
300
301 // Parse instance methods.
302 if (const auto *instanceMethodsIsec = getMethodsIsec(classIsec))
303 parseMethods(instanceMethodsIsec, classSym, classIsec, MCK_Class,
304 MK_Instance);
305
306 // Class methods are contained in the metaclass.
307 if (const auto *r = classSym->isec()->getRelocAt(classLayout.metaClassOffset))
308 if (const auto *classMethodsIsec = getMethodsIsec(
309 cast<ConcatInputSection>(r->getReferentInputSection())))
310 parseMethods(classMethodsIsec, classSym, classIsec, MCK_Class, MK_Static);
311 }
312
checkCategories()313 void objc::checkCategories() {
314 TimeTraceScope timeScope("ObjcCategoryChecker");
315
316 ObjcCategoryChecker checker;
317 for (const InputSection *isec : inputSections) {
318 if (isec->getName() == section_names::objcCatList)
319 for (const Reloc &r : isec->relocs) {
320 auto *catIsec = cast<ConcatInputSection>(r.getReferentInputSection());
321 checker.parseCategory(catIsec);
322 }
323 }
324 }
325
326 namespace {
327
328 class ObjcCategoryMerger {
329 // In which language was a particular construct originally defined
330 enum SourceLanguage { Unknown, ObjC, Swift };
331
332 // Information about an input category
333 struct InfoInputCategory {
334 ConcatInputSection *catListIsec;
335 ConcatInputSection *catBodyIsec;
336 uint32_t offCatListIsec = 0;
337 SourceLanguage sourceLanguage = SourceLanguage::Unknown;
338
339 bool wasMerged = false;
340 };
341
342 // To write new (merged) categories or classes, we will try make limited
343 // assumptions about the alignment and the sections the various class/category
344 // info are stored in and . So we'll just reuse the same sections and
345 // alignment as already used in existing (input) categories. To do this we
346 // have InfoCategoryWriter which contains the various sections that the
347 // generated categories will be written to.
348 struct InfoWriteSection {
349 bool valid = false; // Data has been successfully collected from input
350 uint32_t align = 0;
351 Section *inputSection;
352 Reloc relocTemplate;
353 OutputSection *outputSection;
354 };
355
356 struct InfoCategoryWriter {
357 InfoWriteSection catListInfo;
358 InfoWriteSection catBodyInfo;
359 InfoWriteSection catNameInfo;
360 InfoWriteSection catPtrListInfo;
361 };
362
363 // Information about a pointer list in the original categories or class(method
364 // lists, protocol lists, etc)
365 struct PointerListInfo {
366 PointerListInfo() = default;
367 PointerListInfo(const PointerListInfo &) = default;
PointerListInfo__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo368 PointerListInfo(const char *_categoryPrefix, uint32_t _pointersPerStruct)
369 : categoryPrefix(_categoryPrefix),
370 pointersPerStruct(_pointersPerStruct) {}
371
operator ==__anonff1b1f600411::ObjcCategoryMerger::PointerListInfo372 inline bool operator==(const PointerListInfo &cmp) const {
373 return pointersPerStruct == cmp.pointersPerStruct &&
374 structSize == cmp.structSize && structCount == cmp.structCount &&
375 allPtrs == cmp.allPtrs;
376 }
377
378 const char *categoryPrefix;
379
380 uint32_t pointersPerStruct = 0;
381
382 uint32_t structSize = 0;
383 uint32_t structCount = 0;
384
385 std::vector<Symbol *> allPtrs;
386 };
387
388 // Full information describing an ObjC class . This will include all the
389 // additional methods, protocols, and properties that are contained in the
390 // class and all the categories that extend a particular class.
391 struct ClassExtensionInfo {
ClassExtensionInfo__anonff1b1f600411::ObjcCategoryMerger::ClassExtensionInfo392 ClassExtensionInfo(CategoryLayout &_catLayout) : catLayout(_catLayout){};
393
394 // Merged names of containers. Ex: base|firstCategory|secondCategory|...
395 std::string mergedContainerName;
396 std::string baseClassName;
397 const Symbol *baseClass = nullptr;
398 SourceLanguage baseClassSourceLanguage = SourceLanguage::Unknown;
399
400 CategoryLayout &catLayout;
401
402 // In case we generate new data, mark the new data as belonging to this file
403 ObjFile *objFileForMergeData = nullptr;
404
405 PointerListInfo instanceMethods = {objc::symbol_names::instanceMethods,
406 /*pointersPerStruct=*/3};
407 PointerListInfo classMethods = {objc::symbol_names::categoryClassMethods,
408 /*pointersPerStruct=*/3};
409 PointerListInfo protocols = {objc::symbol_names::categoryProtocols,
410 /*pointersPerStruct=*/0};
411 PointerListInfo instanceProps = {objc::symbol_names::listProprieties,
412 /*pointersPerStruct=*/2};
413 PointerListInfo classProps = {objc::symbol_names::klassPropList,
414 /*pointersPerStruct=*/2};
415 };
416
417 public:
418 ObjcCategoryMerger(std::vector<ConcatInputSection *> &_allInputSections);
419 void doMerge();
420 static void doCleanup();
421
422 private:
423 DenseSet<const Symbol *> collectNlCategories();
424 void collectAndValidateCategoriesData();
425 bool
426 mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> &categories);
427
428 void eraseISec(ConcatInputSection *isec);
429 void eraseMergedCategories();
430
431 void generateCatListForNonErasedCategories(
432 MapVector<ConcatInputSection *, std::set<uint64_t>>
433 catListToErasedOffsets);
434 void collectSectionWriteInfoFromIsec(const InputSection *isec,
435 InfoWriteSection &catWriteInfo);
436 bool collectCategoryWriterInfoFromCategory(const InfoInputCategory &catInfo);
437 bool parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
438 ClassExtensionInfo &extInfo);
439
440 void parseProtocolListInfo(const ConcatInputSection *isec, uint32_t secOffset,
441 PointerListInfo &ptrList,
442 SourceLanguage sourceLang);
443
444 PointerListInfo parseProtocolListInfo(const ConcatInputSection *isec,
445 uint32_t secOffset,
446 SourceLanguage sourceLang);
447
448 bool parsePointerListInfo(const ConcatInputSection *isec, uint32_t secOffset,
449 PointerListInfo &ptrList);
450
451 void emitAndLinkPointerList(Defined *parentSym, uint32_t linkAtOffset,
452 const ClassExtensionInfo &extInfo,
453 const PointerListInfo &ptrList);
454
455 Defined *emitAndLinkProtocolList(Defined *parentSym, uint32_t linkAtOffset,
456 const ClassExtensionInfo &extInfo,
457 const PointerListInfo &ptrList);
458
459 Defined *emitCategory(const ClassExtensionInfo &extInfo);
460 Defined *emitCatListEntrySec(const std::string &forCategoryName,
461 const std::string &forBaseClassName,
462 ObjFile *objFile);
463 Defined *emitCategoryBody(const std::string &name, const Defined *nameSym,
464 const Symbol *baseClassSym,
465 const std::string &baseClassName, ObjFile *objFile);
466 Defined *emitCategoryName(const std::string &name, ObjFile *objFile);
467 void createSymbolReference(Defined *refFrom, const Symbol *refTo,
468 uint32_t offset, const Reloc &relocTemplate);
469 Defined *tryFindDefinedOnIsec(const InputSection *isec, uint32_t offset);
470 Symbol *tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
471 uint32_t offset);
472 Defined *tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
473 uint32_t offset);
474 Defined *getClassRo(const Defined *classSym, bool getMetaRo);
475 SourceLanguage getClassSymSourceLang(const Defined *classSym);
476 bool mergeCategoriesIntoBaseClass(const Defined *baseClass,
477 std::vector<InfoInputCategory> &categories);
478 void eraseSymbolAtIsecOffset(ConcatInputSection *isec, uint32_t offset);
479 void tryEraseDefinedAtIsecOffset(const ConcatInputSection *isec,
480 uint32_t offset);
481
482 // Allocate a null-terminated StringRef backed by generatedSectionData
483 StringRef newStringData(const char *str);
484 // Allocate section data, backed by generatedSectionData
485 SmallVector<uint8_t> &newSectionData(uint32_t size);
486
487 CategoryLayout catLayout;
488 ClassLayout classLayout;
489 ROClassLayout roClassLayout;
490 ListHeaderLayout listHeaderLayout;
491 MethodLayout methodLayout;
492 ProtocolListHeaderLayout protocolListHeaderLayout;
493
494 InfoCategoryWriter infoCategoryWriter;
495 std::vector<ConcatInputSection *> &allInputSections;
496 // Map of base class Symbol to list of InfoInputCategory's for it
497 MapVector<const Symbol *, std::vector<InfoInputCategory>> categoryMap;
498
499 // Normally, the binary data comes from the input files, but since we're
500 // generating binary data ourselves, we use the below array to store it in.
501 // Need this to be 'static' so the data survives past the ObjcCategoryMerger
502 // object, as the data will be read by the Writer when the final binary is
503 // generated.
504 static SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
505 generatedSectionData;
506 };
507
508 SmallVector<std::unique_ptr<SmallVector<uint8_t>>>
509 ObjcCategoryMerger::generatedSectionData;
510
ObjcCategoryMerger(std::vector<ConcatInputSection * > & _allInputSections)511 ObjcCategoryMerger::ObjcCategoryMerger(
512 std::vector<ConcatInputSection *> &_allInputSections)
513 : catLayout(target->wordSize), classLayout(target->wordSize),
514 roClassLayout(target->wordSize), listHeaderLayout(target->wordSize),
515 methodLayout(target->wordSize),
516 protocolListHeaderLayout(target->wordSize),
517 allInputSections(_allInputSections) {}
518
collectSectionWriteInfoFromIsec(const InputSection * isec,InfoWriteSection & catWriteInfo)519 void ObjcCategoryMerger::collectSectionWriteInfoFromIsec(
520 const InputSection *isec, InfoWriteSection &catWriteInfo) {
521
522 catWriteInfo.inputSection = const_cast<Section *>(&isec->section);
523 catWriteInfo.align = isec->align;
524 catWriteInfo.outputSection = isec->parent;
525
526 assert(catWriteInfo.outputSection &&
527 "outputSection may not be null in collectSectionWriteInfoFromIsec.");
528
529 if (isec->relocs.size())
530 catWriteInfo.relocTemplate = isec->relocs[0];
531
532 catWriteInfo.valid = true;
533 }
534
535 Symbol *
tryGetSymbolAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)536 ObjcCategoryMerger::tryGetSymbolAtIsecOffset(const ConcatInputSection *isec,
537 uint32_t offset) {
538 if (!isec)
539 return nullptr;
540 const Reloc *reloc = isec->getRelocAt(offset);
541
542 if (!reloc)
543 return nullptr;
544
545 Symbol *sym = dyn_cast_if_present<Symbol *>(reloc->referent);
546
547 if (reloc->addend && sym) {
548 assert(isa<Defined>(sym) && "Expected defined for non-zero addend");
549 Defined *definedSym = cast<Defined>(sym);
550 sym = tryFindDefinedOnIsec(definedSym->isec(),
551 definedSym->value + reloc->addend);
552 }
553
554 return sym;
555 }
556
tryFindDefinedOnIsec(const InputSection * isec,uint32_t offset)557 Defined *ObjcCategoryMerger::tryFindDefinedOnIsec(const InputSection *isec,
558 uint32_t offset) {
559 for (Defined *sym : isec->symbols)
560 if ((sym->value <= offset) && (sym->value + sym->size > offset))
561 return sym;
562
563 return nullptr;
564 }
565
566 Defined *
tryGetDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)567 ObjcCategoryMerger::tryGetDefinedAtIsecOffset(const ConcatInputSection *isec,
568 uint32_t offset) {
569 Symbol *sym = tryGetSymbolAtIsecOffset(isec, offset);
570 return dyn_cast_or_null<Defined>(sym);
571 }
572
573 // Get the class's ro_data symbol. If getMetaRo is true, then we will return
574 // the meta-class's ro_data symbol. Otherwise, we will return the class
575 // (instance) ro_data symbol.
getClassRo(const Defined * classSym,bool getMetaRo)576 Defined *ObjcCategoryMerger::getClassRo(const Defined *classSym,
577 bool getMetaRo) {
578 ConcatInputSection *isec = dyn_cast<ConcatInputSection>(classSym->isec());
579 if (!isec)
580 return nullptr;
581
582 if (!getMetaRo)
583 return tryGetDefinedAtIsecOffset(isec, classLayout.roDataOffset +
584 classSym->value);
585
586 Defined *metaClass = tryGetDefinedAtIsecOffset(
587 isec, classLayout.metaClassOffset + classSym->value);
588 if (!metaClass)
589 return nullptr;
590
591 return tryGetDefinedAtIsecOffset(
592 dyn_cast<ConcatInputSection>(metaClass->isec()),
593 classLayout.roDataOffset);
594 }
595
596 // Given an ConcatInputSection or CStringInputSection and an offset, if there is
597 // a symbol(Defined) at that offset, then erase the symbol (mark it not live)
tryEraseDefinedAtIsecOffset(const ConcatInputSection * isec,uint32_t offset)598 void ObjcCategoryMerger::tryEraseDefinedAtIsecOffset(
599 const ConcatInputSection *isec, uint32_t offset) {
600 const Reloc *reloc = isec->getRelocAt(offset);
601
602 if (!reloc)
603 return;
604
605 Defined *sym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
606 if (!sym)
607 return;
608
609 if (auto *cisec = dyn_cast_or_null<ConcatInputSection>(sym->isec()))
610 eraseISec(cisec);
611 else if (auto *csisec = dyn_cast_or_null<CStringInputSection>(sym->isec())) {
612 uint32_t totalOffset = sym->value + reloc->addend;
613 StringPiece &piece = csisec->getStringPiece(totalOffset);
614 piece.live = false;
615 } else {
616 llvm_unreachable("erased symbol has to be Defined or CStringInputSection");
617 }
618 }
619
collectCategoryWriterInfoFromCategory(const InfoInputCategory & catInfo)620 bool ObjcCategoryMerger::collectCategoryWriterInfoFromCategory(
621 const InfoInputCategory &catInfo) {
622
623 if (!infoCategoryWriter.catListInfo.valid)
624 collectSectionWriteInfoFromIsec(catInfo.catListIsec,
625 infoCategoryWriter.catListInfo);
626 if (!infoCategoryWriter.catBodyInfo.valid)
627 collectSectionWriteInfoFromIsec(catInfo.catBodyIsec,
628 infoCategoryWriter.catBodyInfo);
629
630 if (!infoCategoryWriter.catNameInfo.valid) {
631 lld::macho::Defined *catNameSym =
632 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, catLayout.nameOffset);
633
634 if (!catNameSym) {
635 // This is an unhandeled case where the category name is not a symbol but
636 // instead points to an CStringInputSection (that doesn't have any symbol)
637 // TODO: Find a small repro and either fix or add a test case for this
638 // scenario
639 return false;
640 }
641
642 collectSectionWriteInfoFromIsec(catNameSym->isec(),
643 infoCategoryWriter.catNameInfo);
644 }
645
646 // Collect writer info from all the category lists (we're assuming they all
647 // would provide the same info)
648 if (!infoCategoryWriter.catPtrListInfo.valid) {
649 for (uint32_t off = catLayout.instanceMethodsOffset;
650 off <= catLayout.classPropsOffset; off += target->wordSize) {
651 if (Defined *ptrList =
652 tryGetDefinedAtIsecOffset(catInfo.catBodyIsec, off)) {
653 collectSectionWriteInfoFromIsec(ptrList->isec(),
654 infoCategoryWriter.catPtrListInfo);
655 // we've successfully collected data, so we can break
656 break;
657 }
658 }
659 }
660
661 return true;
662 }
663
664 // Parse a protocol list that might be linked to ConcatInputSection at a given
665 // offset. The format of the protocol list is different than other lists (prop
666 // lists, method lists) so we need to parse it differently
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList,SourceLanguage sourceLang)667 void ObjcCategoryMerger::parseProtocolListInfo(
668 const ConcatInputSection *isec, uint32_t secOffset,
669 PointerListInfo &ptrList, [[maybe_unused]] SourceLanguage sourceLang) {
670 assert((isec && (secOffset + target->wordSize <= isec->data.size())) &&
671 "Tried to read pointer list beyond protocol section end");
672
673 const Reloc *reloc = isec->getRelocAt(secOffset);
674 if (!reloc)
675 return;
676
677 auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
678 assert(ptrListSym && "Protocol list reloc does not have a valid Defined");
679
680 // Theoretically protocol count can be either 32b or 64b, depending on
681 // platform pointer size, but to simplify implementation we always just read
682 // the lower 32b which should be good enough.
683 uint32_t protocolCount = *reinterpret_cast<const uint32_t *>(
684 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
685
686 ptrList.structCount += protocolCount;
687 ptrList.structSize = target->wordSize;
688
689 [[maybe_unused]] uint32_t expectedListSize =
690 (protocolCount * target->wordSize) +
691 /*header(count)*/ protocolListHeaderLayout.totalSize +
692 /*extra null value*/ target->wordSize;
693
694 // On Swift, the protocol list does not have the extra (unnecessary) null
695 [[maybe_unused]] uint32_t expectedListSizeSwift =
696 expectedListSize - target->wordSize;
697
698 assert(((expectedListSize == ptrListSym->isec()->data.size() &&
699 sourceLang == SourceLanguage::ObjC) ||
700 (expectedListSizeSwift == ptrListSym->isec()->data.size() &&
701 sourceLang == SourceLanguage::Swift)) &&
702 "Protocol list does not match expected size");
703
704 uint32_t off = protocolListHeaderLayout.totalSize;
705 for (uint32_t inx = 0; inx < protocolCount; ++inx) {
706 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
707 assert(reloc && "No reloc found at protocol list offset");
708
709 auto *listSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
710 assert(listSym && "Protocol list reloc does not have a valid Defined");
711
712 ptrList.allPtrs.push_back(listSym);
713 off += target->wordSize;
714 }
715 assert((ptrListSym->isec()->getRelocAt(off) == nullptr) &&
716 "expected null terminating protocol");
717 assert(off + /*extra null value*/ target->wordSize == expectedListSize &&
718 "Protocol list end offset does not match expected size");
719 }
720
721 // Parse a protocol list and return the PointerListInfo for it
722 ObjcCategoryMerger::PointerListInfo
parseProtocolListInfo(const ConcatInputSection * isec,uint32_t secOffset,SourceLanguage sourceLang)723 ObjcCategoryMerger::parseProtocolListInfo(const ConcatInputSection *isec,
724 uint32_t secOffset,
725 SourceLanguage sourceLang) {
726 PointerListInfo ptrList;
727 parseProtocolListInfo(isec, secOffset, ptrList, sourceLang);
728 return ptrList;
729 }
730
731 // Parse a pointer list that might be linked to ConcatInputSection at a given
732 // offset. This can be used for instance methods, class methods, instance props
733 // and class props since they have the same format.
parsePointerListInfo(const ConcatInputSection * isec,uint32_t secOffset,PointerListInfo & ptrList)734 bool ObjcCategoryMerger::parsePointerListInfo(const ConcatInputSection *isec,
735 uint32_t secOffset,
736 PointerListInfo &ptrList) {
737 assert(ptrList.pointersPerStruct == 2 || ptrList.pointersPerStruct == 3);
738 assert(isec && "Trying to parse pointer list from null isec");
739 assert(secOffset + target->wordSize <= isec->data.size() &&
740 "Trying to read pointer list beyond section end");
741
742 const Reloc *reloc = isec->getRelocAt(secOffset);
743 // Empty list is a valid case, return true.
744 if (!reloc)
745 return true;
746
747 auto *ptrListSym = dyn_cast_or_null<Defined>(cast<Symbol *>(reloc->referent));
748 assert(ptrListSym && "Reloc does not have a valid Defined");
749
750 uint32_t thisStructSize = *reinterpret_cast<const uint32_t *>(
751 ptrListSym->isec()->data.data() + listHeaderLayout.structSizeOffset);
752 uint32_t thisStructCount = *reinterpret_cast<const uint32_t *>(
753 ptrListSym->isec()->data.data() + listHeaderLayout.structCountOffset);
754 assert(thisStructSize == ptrList.pointersPerStruct * target->wordSize);
755
756 assert(!ptrList.structSize || (thisStructSize == ptrList.structSize));
757
758 ptrList.structCount += thisStructCount;
759 ptrList.structSize = thisStructSize;
760
761 uint32_t expectedListSize =
762 listHeaderLayout.totalSize + (thisStructSize * thisStructCount);
763 assert(expectedListSize == ptrListSym->isec()->data.size() &&
764 "Pointer list does not match expected size");
765
766 for (uint32_t off = listHeaderLayout.totalSize; off < expectedListSize;
767 off += target->wordSize) {
768 const Reloc *reloc = ptrListSym->isec()->getRelocAt(off);
769 assert(reloc && "No reloc found at pointer list offset");
770
771 auto *listSym =
772 dyn_cast_or_null<Defined>(reloc->referent.dyn_cast<Symbol *>());
773 // Sometimes, the reloc points to a StringPiece (InputSection + addend)
774 // instead of a symbol.
775 // TODO: Skip these cases for now, but we should fix this.
776 if (!listSym)
777 return false;
778
779 ptrList.allPtrs.push_back(listSym);
780 }
781
782 return true;
783 }
784
785 // Here we parse all the information of an input category (catInfo) and
786 // append the parsed info into the structure which will contain all the
787 // information about how a class is extended (extInfo)
parseCatInfoToExtInfo(const InfoInputCategory & catInfo,ClassExtensionInfo & extInfo)788 bool ObjcCategoryMerger::parseCatInfoToExtInfo(const InfoInputCategory &catInfo,
789 ClassExtensionInfo &extInfo) {
790 const Reloc *catNameReloc =
791 catInfo.catBodyIsec->getRelocAt(catLayout.nameOffset);
792
793 // Parse name
794 assert(catNameReloc && "Category does not have a reloc at 'nameOffset'");
795
796 // is this the first category we are parsing?
797 if (extInfo.mergedContainerName.empty())
798 extInfo.objFileForMergeData =
799 dyn_cast_or_null<ObjFile>(catInfo.catBodyIsec->getFile());
800 else
801 extInfo.mergedContainerName += "|";
802
803 assert(extInfo.objFileForMergeData &&
804 "Expected to already have valid objextInfo.objFileForMergeData");
805
806 StringRef catName = catNameReloc->getReferentString();
807 extInfo.mergedContainerName += catName.str();
808
809 // Parse base class
810 if (!extInfo.baseClass) {
811 Symbol *classSym =
812 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec, catLayout.klassOffset);
813 assert(extInfo.baseClassName.empty());
814 extInfo.baseClass = classSym;
815 llvm::StringRef classPrefix(objc::symbol_names::klass);
816 assert(classSym->getName().starts_with(classPrefix) &&
817 "Base class symbol does not start with expected prefix");
818 extInfo.baseClassName = classSym->getName().substr(classPrefix.size());
819 } else {
820 assert((extInfo.baseClass ==
821 tryGetSymbolAtIsecOffset(catInfo.catBodyIsec,
822 catLayout.klassOffset)) &&
823 "Trying to parse category info into container with different base "
824 "class");
825 }
826
827 if (!parsePointerListInfo(catInfo.catBodyIsec,
828 catLayout.instanceMethodsOffset,
829 extInfo.instanceMethods))
830 return false;
831
832 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classMethodsOffset,
833 extInfo.classMethods))
834 return false;
835
836 parseProtocolListInfo(catInfo.catBodyIsec, catLayout.protocolsOffset,
837 extInfo.protocols, catInfo.sourceLanguage);
838
839 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.instancePropsOffset,
840 extInfo.instanceProps))
841 return false;
842
843 if (!parsePointerListInfo(catInfo.catBodyIsec, catLayout.classPropsOffset,
844 extInfo.classProps))
845 return false;
846
847 return true;
848 }
849
850 // Generate a protocol list (including header) and link it into the parent at
851 // the specified offset.
emitAndLinkProtocolList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)852 Defined *ObjcCategoryMerger::emitAndLinkProtocolList(
853 Defined *parentSym, uint32_t linkAtOffset,
854 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
855 if (ptrList.allPtrs.empty())
856 return nullptr;
857
858 assert(ptrList.allPtrs.size() == ptrList.structCount);
859
860 uint32_t bodySize = (ptrList.structCount * target->wordSize) +
861 /*header(count)*/ protocolListHeaderLayout.totalSize +
862 /*extra null value*/ target->wordSize;
863 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
864
865 // This theoretically can be either 32b or 64b, but writing just the first 32b
866 // is good enough
867 const uint32_t *ptrProtoCount = reinterpret_cast<const uint32_t *>(
868 bodyData.data() + protocolListHeaderLayout.protocolCountOffset);
869
870 *const_cast<uint32_t *>(ptrProtoCount) = ptrList.allPtrs.size();
871
872 ConcatInputSection *listSec = make<ConcatInputSection>(
873 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
874 infoCategoryWriter.catPtrListInfo.align);
875 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
876 listSec->live = true;
877
878 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
879
880 std::string symName = ptrList.categoryPrefix;
881 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
882
883 Defined *ptrListSym = make<Defined>(
884 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
885 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
886 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
887 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
888 /*isWeakDefCanBeHidden=*/false);
889
890 ptrListSym->used = true;
891 parentSym->getObjectFile()->symbols.push_back(ptrListSym);
892 addInputSection(listSec);
893
894 createSymbolReference(parentSym, ptrListSym, linkAtOffset,
895 infoCategoryWriter.catBodyInfo.relocTemplate);
896
897 uint32_t offset = protocolListHeaderLayout.totalSize;
898 for (Symbol *symbol : ptrList.allPtrs) {
899 createSymbolReference(ptrListSym, symbol, offset,
900 infoCategoryWriter.catPtrListInfo.relocTemplate);
901 offset += target->wordSize;
902 }
903
904 return ptrListSym;
905 }
906
907 // Generate a pointer list (including header) and link it into the parent at the
908 // specified offset. This is used for instance and class methods and
909 // proprieties.
emitAndLinkPointerList(Defined * parentSym,uint32_t linkAtOffset,const ClassExtensionInfo & extInfo,const PointerListInfo & ptrList)910 void ObjcCategoryMerger::emitAndLinkPointerList(
911 Defined *parentSym, uint32_t linkAtOffset,
912 const ClassExtensionInfo &extInfo, const PointerListInfo &ptrList) {
913 if (ptrList.allPtrs.empty())
914 return;
915
916 assert(ptrList.allPtrs.size() * target->wordSize ==
917 ptrList.structCount * ptrList.structSize);
918
919 // Generate body
920 uint32_t bodySize =
921 listHeaderLayout.totalSize + (ptrList.structSize * ptrList.structCount);
922 llvm::ArrayRef<uint8_t> bodyData = newSectionData(bodySize);
923
924 const uint32_t *ptrStructSize = reinterpret_cast<const uint32_t *>(
925 bodyData.data() + listHeaderLayout.structSizeOffset);
926 const uint32_t *ptrStructCount = reinterpret_cast<const uint32_t *>(
927 bodyData.data() + listHeaderLayout.structCountOffset);
928
929 *const_cast<uint32_t *>(ptrStructSize) = ptrList.structSize;
930 *const_cast<uint32_t *>(ptrStructCount) = ptrList.structCount;
931
932 ConcatInputSection *listSec = make<ConcatInputSection>(
933 *infoCategoryWriter.catPtrListInfo.inputSection, bodyData,
934 infoCategoryWriter.catPtrListInfo.align);
935 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
936 listSec->live = true;
937
938 listSec->parent = infoCategoryWriter.catPtrListInfo.outputSection;
939
940 std::string symName = ptrList.categoryPrefix;
941 symName += extInfo.baseClassName + "(" + extInfo.mergedContainerName + ")";
942
943 Defined *ptrListSym = make<Defined>(
944 newStringData(symName.c_str()), /*file=*/parentSym->getObjectFile(),
945 listSec, /*value=*/0, bodyData.size(), /*isWeakDef=*/false,
946 /*isExternal=*/false, /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
947 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
948 /*isWeakDefCanBeHidden=*/false);
949
950 ptrListSym->used = true;
951 parentSym->getObjectFile()->symbols.push_back(ptrListSym);
952 addInputSection(listSec);
953
954 createSymbolReference(parentSym, ptrListSym, linkAtOffset,
955 infoCategoryWriter.catBodyInfo.relocTemplate);
956
957 uint32_t offset = listHeaderLayout.totalSize;
958 for (Symbol *symbol : ptrList.allPtrs) {
959 createSymbolReference(ptrListSym, symbol, offset,
960 infoCategoryWriter.catPtrListInfo.relocTemplate);
961 offset += target->wordSize;
962 }
963 }
964
965 // This method creates an __objc_catlist ConcatInputSection with a single slot
966 Defined *
emitCatListEntrySec(const std::string & forCategoryName,const std::string & forBaseClassName,ObjFile * objFile)967 ObjcCategoryMerger::emitCatListEntrySec(const std::string &forCategoryName,
968 const std::string &forBaseClassName,
969 ObjFile *objFile) {
970 uint32_t sectionSize = target->wordSize;
971 llvm::ArrayRef<uint8_t> bodyData = newSectionData(sectionSize);
972
973 ConcatInputSection *newCatList =
974 make<ConcatInputSection>(*infoCategoryWriter.catListInfo.inputSection,
975 bodyData, infoCategoryWriter.catListInfo.align);
976 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
977 newCatList->live = true;
978
979 newCatList->parent = infoCategoryWriter.catListInfo.outputSection;
980
981 std::string catSymName = "<__objc_catlist slot for merged category ";
982 catSymName += forBaseClassName + "(" + forCategoryName + ")>";
983
984 Defined *catListSym = make<Defined>(
985 newStringData(catSymName.c_str()), /*file=*/objFile, newCatList,
986 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
987 /*isPrivateExtern=*/false, /*includeInSymtab=*/false,
988 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
989 /*isWeakDefCanBeHidden=*/false);
990
991 catListSym->used = true;
992 objFile->symbols.push_back(catListSym);
993 addInputSection(newCatList);
994 return catListSym;
995 }
996
997 // Here we generate the main category body and link the name and base class into
998 // it. We don't link any other info yet like the protocol and class/instance
999 // methods/props.
emitCategoryBody(const std::string & name,const Defined * nameSym,const Symbol * baseClassSym,const std::string & baseClassName,ObjFile * objFile)1000 Defined *ObjcCategoryMerger::emitCategoryBody(const std::string &name,
1001 const Defined *nameSym,
1002 const Symbol *baseClassSym,
1003 const std::string &baseClassName,
1004 ObjFile *objFile) {
1005 llvm::ArrayRef<uint8_t> bodyData = newSectionData(catLayout.totalSize);
1006
1007 uint32_t *ptrSize = (uint32_t *)(const_cast<uint8_t *>(bodyData.data()) +
1008 catLayout.sizeOffset);
1009 *ptrSize = catLayout.totalSize;
1010
1011 ConcatInputSection *newBodySec =
1012 make<ConcatInputSection>(*infoCategoryWriter.catBodyInfo.inputSection,
1013 bodyData, infoCategoryWriter.catBodyInfo.align);
1014 newBodySec->parent = infoCategoryWriter.catBodyInfo.outputSection;
1015 newBodySec->live = true;
1016
1017 std::string symName =
1018 objc::symbol_names::category + baseClassName + "(" + name + ")";
1019 Defined *catBodySym = make<Defined>(
1020 newStringData(symName.c_str()), /*file=*/objFile, newBodySec,
1021 /*value=*/0, bodyData.size(), /*isWeakDef=*/false, /*isExternal=*/false,
1022 /*isPrivateExtern=*/false, /*includeInSymtab=*/true,
1023 /*isReferencedDynamically=*/false, /*noDeadStrip=*/false,
1024 /*isWeakDefCanBeHidden=*/false);
1025
1026 catBodySym->used = true;
1027 objFile->symbols.push_back(catBodySym);
1028 addInputSection(newBodySec);
1029
1030 createSymbolReference(catBodySym, nameSym, catLayout.nameOffset,
1031 infoCategoryWriter.catBodyInfo.relocTemplate);
1032
1033 // Create a reloc to the base class (either external or internal)
1034 createSymbolReference(catBodySym, baseClassSym, catLayout.klassOffset,
1035 infoCategoryWriter.catBodyInfo.relocTemplate);
1036
1037 return catBodySym;
1038 }
1039
1040 // This writes the new category name (for the merged category) into the binary
1041 // and returns the sybmol for it.
emitCategoryName(const std::string & name,ObjFile * objFile)1042 Defined *ObjcCategoryMerger::emitCategoryName(const std::string &name,
1043 ObjFile *objFile) {
1044 StringRef nameStrData = newStringData(name.c_str());
1045 // We use +1 below to include the null terminator
1046 llvm::ArrayRef<uint8_t> nameData(
1047 reinterpret_cast<const uint8_t *>(nameStrData.data()),
1048 nameStrData.size() + 1);
1049
1050 auto *parentSection = infoCategoryWriter.catNameInfo.inputSection;
1051 CStringInputSection *newStringSec = make<CStringInputSection>(
1052 *infoCategoryWriter.catNameInfo.inputSection, nameData,
1053 infoCategoryWriter.catNameInfo.align, /*dedupLiterals=*/true);
1054
1055 parentSection->subsections.push_back({0, newStringSec});
1056
1057 newStringSec->splitIntoPieces();
1058 newStringSec->pieces[0].live = true;
1059 newStringSec->parent = infoCategoryWriter.catNameInfo.outputSection;
1060 in.cStringSection->addInput(newStringSec);
1061 assert(newStringSec->pieces.size() == 1);
1062
1063 Defined *catNameSym = make<Defined>(
1064 "<merged category name>", /*file=*/objFile, newStringSec,
1065 /*value=*/0, nameData.size(),
1066 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1067 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1068 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1069
1070 catNameSym->used = true;
1071 objFile->symbols.push_back(catNameSym);
1072 return catNameSym;
1073 }
1074
1075 // This method fully creates a new category from the given ClassExtensionInfo.
1076 // It creates the category name, body and method/protocol/prop lists and links
1077 // them all together. Then it creates a new __objc_catlist entry and adds the
1078 // category to it. Calling this method will fully generate a category which will
1079 // be available in the final binary.
emitCategory(const ClassExtensionInfo & extInfo)1080 Defined *ObjcCategoryMerger::emitCategory(const ClassExtensionInfo &extInfo) {
1081 Defined *catNameSym = emitCategoryName(extInfo.mergedContainerName,
1082 extInfo.objFileForMergeData);
1083
1084 Defined *catBodySym = emitCategoryBody(
1085 extInfo.mergedContainerName, catNameSym, extInfo.baseClass,
1086 extInfo.baseClassName, extInfo.objFileForMergeData);
1087
1088 Defined *catListSym =
1089 emitCatListEntrySec(extInfo.mergedContainerName, extInfo.baseClassName,
1090 extInfo.objFileForMergeData);
1091
1092 // Add the single category body to the category list at the offset 0.
1093 createSymbolReference(catListSym, catBodySym, /*offset=*/0,
1094 infoCategoryWriter.catListInfo.relocTemplate);
1095
1096 emitAndLinkPointerList(catBodySym, catLayout.instanceMethodsOffset, extInfo,
1097 extInfo.instanceMethods);
1098
1099 emitAndLinkPointerList(catBodySym, catLayout.classMethodsOffset, extInfo,
1100 extInfo.classMethods);
1101
1102 emitAndLinkProtocolList(catBodySym, catLayout.protocolsOffset, extInfo,
1103 extInfo.protocols);
1104
1105 emitAndLinkPointerList(catBodySym, catLayout.instancePropsOffset, extInfo,
1106 extInfo.instanceProps);
1107
1108 emitAndLinkPointerList(catBodySym, catLayout.classPropsOffset, extInfo,
1109 extInfo.classProps);
1110
1111 return catBodySym;
1112 }
1113
1114 // This method merges all the categories (sharing a base class) into a single
1115 // category.
mergeCategoriesIntoSingleCategory(std::vector<InfoInputCategory> & categories)1116 bool ObjcCategoryMerger::mergeCategoriesIntoSingleCategory(
1117 std::vector<InfoInputCategory> &categories) {
1118 assert(categories.size() > 1 && "Expected at least 2 categories");
1119
1120 ClassExtensionInfo extInfo(catLayout);
1121
1122 for (auto &catInfo : categories)
1123 if (!parseCatInfoToExtInfo(catInfo, extInfo))
1124 return false;
1125
1126 Defined *newCatDef = emitCategory(extInfo);
1127 assert(newCatDef && "Failed to create a new category");
1128
1129 // Suppress unsuded var warning
1130 (void)newCatDef;
1131
1132 for (auto &catInfo : categories)
1133 catInfo.wasMerged = true;
1134
1135 return true;
1136 }
1137
createSymbolReference(Defined * refFrom,const Symbol * refTo,uint32_t offset,const Reloc & relocTemplate)1138 void ObjcCategoryMerger::createSymbolReference(Defined *refFrom,
1139 const Symbol *refTo,
1140 uint32_t offset,
1141 const Reloc &relocTemplate) {
1142 Reloc r = relocTemplate;
1143 r.offset = offset;
1144 r.addend = 0;
1145 r.referent = const_cast<Symbol *>(refTo);
1146 refFrom->isec()->relocs.push_back(r);
1147 }
1148
1149 // Get the list of categories in the '__objc_nlcatlist' section. We can't
1150 // optimize these as they have a '+load' method that has to be called at
1151 // runtime.
collectNlCategories()1152 DenseSet<const Symbol *> ObjcCategoryMerger::collectNlCategories() {
1153 DenseSet<const Symbol *> nlCategories;
1154
1155 for (InputSection *sec : allInputSections) {
1156 if (sec->getName() != section_names::objcNonLazyCatList)
1157 continue;
1158
1159 for (auto &r : sec->relocs) {
1160 const Symbol *sym = r.referent.dyn_cast<Symbol *>();
1161 nlCategories.insert(sym);
1162 }
1163 }
1164 return nlCategories;
1165 }
1166
collectAndValidateCategoriesData()1167 void ObjcCategoryMerger::collectAndValidateCategoriesData() {
1168 auto nlCategories = collectNlCategories();
1169
1170 for (InputSection *sec : allInputSections) {
1171 if (sec->getName() != section_names::objcCatList)
1172 continue;
1173 ConcatInputSection *catListCisec = dyn_cast<ConcatInputSection>(sec);
1174 assert(catListCisec &&
1175 "__objc_catList InputSection is not a ConcatInputSection");
1176
1177 for (uint32_t off = 0; off < catListCisec->getSize();
1178 off += target->wordSize) {
1179 Defined *categorySym = tryGetDefinedAtIsecOffset(catListCisec, off);
1180 assert(categorySym &&
1181 "Failed to get a valid category at __objc_catlit offset");
1182
1183 if (nlCategories.count(categorySym))
1184 continue;
1185
1186 auto *catBodyIsec = dyn_cast<ConcatInputSection>(categorySym->isec());
1187 assert(catBodyIsec &&
1188 "Category data section is not an ConcatInputSection");
1189
1190 SourceLanguage eLang = SourceLanguage::Unknown;
1191 if (categorySym->getName().starts_with(objc::symbol_names::category))
1192 eLang = SourceLanguage::ObjC;
1193 else if (categorySym->getName().starts_with(
1194 objc::symbol_names::swift_objc_category))
1195 eLang = SourceLanguage::Swift;
1196 else
1197 llvm_unreachable("Unexpected category symbol name");
1198
1199 InfoInputCategory catInputInfo{catListCisec, catBodyIsec, off, eLang};
1200
1201 // Check that the category has a reloc at 'klassOffset' (which is
1202 // a pointer to the class symbol)
1203
1204 Symbol *classSym =
1205 tryGetSymbolAtIsecOffset(catBodyIsec, catLayout.klassOffset);
1206 assert(classSym && "Category does not have a valid base class");
1207
1208 if (!collectCategoryWriterInfoFromCategory(catInputInfo))
1209 continue;
1210
1211 categoryMap[classSym].push_back(catInputInfo);
1212 }
1213 }
1214 }
1215
1216 // In the input we have multiple __objc_catlist InputSection, each of which may
1217 // contain links to multiple categories. Of these categories, we will merge (and
1218 // erase) only some. There will be some categories that will remain untouched
1219 // (not erased). For these not erased categories, we generate new __objc_catlist
1220 // entries since the parent __objc_catlist entry will be erased
generateCatListForNonErasedCategories(const MapVector<ConcatInputSection *,std::set<uint64_t>> catListToErasedOffsets)1221 void ObjcCategoryMerger::generateCatListForNonErasedCategories(
1222 const MapVector<ConcatInputSection *, std::set<uint64_t>>
1223 catListToErasedOffsets) {
1224
1225 // Go through all offsets of all __objc_catlist's that we process and if there
1226 // are categories that we didn't process - generate a new __objc_catlist for
1227 // each.
1228 for (auto &mapEntry : catListToErasedOffsets) {
1229 ConcatInputSection *catListIsec = mapEntry.first;
1230 for (uint32_t catListIsecOffset = 0;
1231 catListIsecOffset < catListIsec->data.size();
1232 catListIsecOffset += target->wordSize) {
1233 // This slot was erased, we can just skip it
1234 if (mapEntry.second.count(catListIsecOffset))
1235 continue;
1236
1237 Defined *nonErasedCatBody =
1238 tryGetDefinedAtIsecOffset(catListIsec, catListIsecOffset);
1239 assert(nonErasedCatBody && "Failed to relocate non-deleted category");
1240
1241 // Allocate data for the new __objc_catlist slot
1242 llvm::ArrayRef<uint8_t> bodyData = newSectionData(target->wordSize);
1243
1244 // We mark the __objc_catlist slot as belonging to the same file as the
1245 // category
1246 ObjFile *objFile = dyn_cast<ObjFile>(nonErasedCatBody->getFile());
1247
1248 ConcatInputSection *listSec = make<ConcatInputSection>(
1249 *infoCategoryWriter.catListInfo.inputSection, bodyData,
1250 infoCategoryWriter.catListInfo.align);
1251 listSec->parent = infoCategoryWriter.catListInfo.outputSection;
1252 listSec->live = true;
1253
1254 std::string slotSymName = "<__objc_catlist slot for category ";
1255 slotSymName += nonErasedCatBody->getName();
1256 slotSymName += ">";
1257
1258 Defined *catListSlotSym = make<Defined>(
1259 newStringData(slotSymName.c_str()), /*file=*/objFile, listSec,
1260 /*value=*/0, bodyData.size(),
1261 /*isWeakDef=*/false, /*isExternal=*/false, /*isPrivateExtern=*/false,
1262 /*includeInSymtab=*/false, /*isReferencedDynamically=*/false,
1263 /*noDeadStrip=*/false, /*isWeakDefCanBeHidden=*/false);
1264
1265 catListSlotSym->used = true;
1266 objFile->symbols.push_back(catListSlotSym);
1267 addInputSection(listSec);
1268
1269 // Now link the category body into the newly created slot
1270 createSymbolReference(catListSlotSym, nonErasedCatBody, 0,
1271 infoCategoryWriter.catListInfo.relocTemplate);
1272 }
1273 }
1274 }
1275
eraseISec(ConcatInputSection * isec)1276 void ObjcCategoryMerger::eraseISec(ConcatInputSection *isec) {
1277 isec->live = false;
1278 for (auto &sym : isec->symbols)
1279 sym->used = false;
1280 }
1281
1282 // This fully erases the merged categories, including their body, their names,
1283 // their method/protocol/prop lists and the __objc_catlist entries that link to
1284 // them.
eraseMergedCategories()1285 void ObjcCategoryMerger::eraseMergedCategories() {
1286 // Map of InputSection to a set of offsets of the categories that were merged
1287 MapVector<ConcatInputSection *, std::set<uint64_t>> catListToErasedOffsets;
1288
1289 for (auto &mapEntry : categoryMap) {
1290 for (InfoInputCategory &catInfo : mapEntry.second) {
1291 if (catInfo.wasMerged) {
1292 eraseISec(catInfo.catListIsec);
1293 catListToErasedOffsets[catInfo.catListIsec].insert(
1294 catInfo.offCatListIsec);
1295 }
1296 }
1297 }
1298
1299 // If there were categories that we did not erase, we need to generate a new
1300 // __objc_catList that contains only the un-merged categories, and get rid of
1301 // the references to the ones we merged.
1302 generateCatListForNonErasedCategories(catListToErasedOffsets);
1303
1304 // Erase the old method lists & names of the categories that were merged
1305 for (auto &mapEntry : categoryMap) {
1306 for (InfoInputCategory &catInfo : mapEntry.second) {
1307 if (!catInfo.wasMerged)
1308 continue;
1309
1310 eraseISec(catInfo.catBodyIsec);
1311
1312 // We can't erase 'catLayout.nameOffset' for either Swift or ObjC
1313 // categories because the name will sometimes also be used for other
1314 // purposes.
1315 // For Swift, see usages of 'l_.str.11.SimpleClass' in
1316 // objc-category-merging-swift.s
1317 // For ObjC, see usages of 'l_OBJC_CLASS_NAME_.1' in
1318 // objc-category-merging-erase-objc-name-test.s
1319 // TODO: handle the above in a smarter way
1320
1321 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1322 catLayout.instanceMethodsOffset);
1323 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1324 catLayout.classMethodsOffset);
1325 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1326 catLayout.protocolsOffset);
1327 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1328 catLayout.classPropsOffset);
1329 tryEraseDefinedAtIsecOffset(catInfo.catBodyIsec,
1330 catLayout.instancePropsOffset);
1331 }
1332 }
1333 }
1334
doMerge()1335 void ObjcCategoryMerger::doMerge() {
1336 collectAndValidateCategoriesData();
1337
1338 for (auto &[baseClass, catInfos] : categoryMap) {
1339 bool merged = false;
1340 if (auto *baseClassDef = dyn_cast<Defined>(baseClass)) {
1341 // Merge all categories into the base class
1342 merged = mergeCategoriesIntoBaseClass(baseClassDef, catInfos);
1343 } else if (catInfos.size() > 1) {
1344 // Merge all categories into a new, single category
1345 merged = mergeCategoriesIntoSingleCategory(catInfos);
1346 }
1347 if (!merged)
1348 warn("ObjC category merging skipped for class symbol' " +
1349 baseClass->getName().str() + "'\n");
1350 }
1351
1352 // Erase all categories that were merged
1353 eraseMergedCategories();
1354 }
1355
doCleanup()1356 void ObjcCategoryMerger::doCleanup() { generatedSectionData.clear(); }
1357
newStringData(const char * str)1358 StringRef ObjcCategoryMerger::newStringData(const char *str) {
1359 uint32_t len = strlen(str);
1360 uint32_t bufSize = len + 1;
1361 SmallVector<uint8_t> &data = newSectionData(bufSize);
1362 char *strData = reinterpret_cast<char *>(data.data());
1363 // Copy the string chars and null-terminator
1364 memcpy(strData, str, bufSize);
1365 return StringRef(strData, len);
1366 }
1367
newSectionData(uint32_t size)1368 SmallVector<uint8_t> &ObjcCategoryMerger::newSectionData(uint32_t size) {
1369 generatedSectionData.push_back(
1370 std::make_unique<SmallVector<uint8_t>>(size, 0));
1371 return *generatedSectionData.back();
1372 }
1373
1374 } // namespace
1375
mergeCategories()1376 void objc::mergeCategories() {
1377 TimeTraceScope timeScope("ObjcCategoryMerger");
1378
1379 ObjcCategoryMerger merger(inputSections);
1380 merger.doMerge();
1381 }
1382
doCleanup()1383 void objc::doCleanup() { ObjcCategoryMerger::doCleanup(); }
1384
1385 ObjcCategoryMerger::SourceLanguage
getClassSymSourceLang(const Defined * classSym)1386 ObjcCategoryMerger::getClassSymSourceLang(const Defined *classSym) {
1387 if (classSym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1388 return SourceLanguage::Swift;
1389
1390 // If the symbol name matches the ObjC prefix, we don't necessarely know this
1391 // comes from ObjC, since Swift creates ObjC-like alias symbols for some Swift
1392 // classes. Ex:
1393 // .globl _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1394 // .private_extern _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass
1395 // .set _OBJC_CLASS_$__TtC11MyTestClass11MyTestClass, _$s11MyTestClassAACN
1396 //
1397 // So we scan for symbols with the same address and check for the Swift class
1398 if (classSym->getName().starts_with(objc::symbol_names::klass)) {
1399 for (auto &sym : classSym->originalIsec->symbols)
1400 if (sym->value == classSym->value)
1401 if (sym->getName().starts_with(objc::symbol_names::swift_objc_klass))
1402 return SourceLanguage::Swift;
1403 return SourceLanguage::ObjC;
1404 }
1405
1406 llvm_unreachable("Unexpected class symbol name during category merging");
1407 }
1408
mergeCategoriesIntoBaseClass(const Defined * baseClass,std::vector<InfoInputCategory> & categories)1409 bool ObjcCategoryMerger::mergeCategoriesIntoBaseClass(
1410 const Defined *baseClass, std::vector<InfoInputCategory> &categories) {
1411 assert(categories.size() >= 1 && "Expected at least one category to merge");
1412
1413 // Collect all the info from the categories
1414 ClassExtensionInfo extInfo(catLayout);
1415 extInfo.baseClass = baseClass;
1416 extInfo.baseClassSourceLanguage = getClassSymSourceLang(baseClass);
1417
1418 for (auto &catInfo : categories)
1419 if (!parseCatInfoToExtInfo(catInfo, extInfo))
1420 return false;
1421
1422 // Get metadata for the base class
1423 Defined *metaRo = getClassRo(baseClass, /*getMetaRo=*/true);
1424 ConcatInputSection *metaIsec = dyn_cast<ConcatInputSection>(metaRo->isec());
1425 Defined *classRo = getClassRo(baseClass, /*getMetaRo=*/false);
1426 ConcatInputSection *classIsec = dyn_cast<ConcatInputSection>(classRo->isec());
1427
1428 // Now collect the info from the base class from the various lists in the
1429 // class metadata
1430
1431 // Protocol lists are a special case - the same protocol list is in classRo
1432 // and metaRo, so we only need to parse it once
1433 parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1434 extInfo.protocols, extInfo.baseClassSourceLanguage);
1435
1436 // Check that the classRo and metaRo protocol lists are identical
1437 assert(parseProtocolListInfo(classIsec, roClassLayout.baseProtocolsOffset,
1438 extInfo.baseClassSourceLanguage) ==
1439 parseProtocolListInfo(metaIsec, roClassLayout.baseProtocolsOffset,
1440 extInfo.baseClassSourceLanguage) &&
1441 "Category merger expects classRo and metaRo to have the same protocol "
1442 "list");
1443
1444 parsePointerListInfo(metaIsec, roClassLayout.baseMethodsOffset,
1445 extInfo.classMethods);
1446 parsePointerListInfo(classIsec, roClassLayout.baseMethodsOffset,
1447 extInfo.instanceMethods);
1448
1449 parsePointerListInfo(metaIsec, roClassLayout.basePropertiesOffset,
1450 extInfo.classProps);
1451 parsePointerListInfo(classIsec, roClassLayout.basePropertiesOffset,
1452 extInfo.instanceProps);
1453
1454 // Erase the old lists - these will be generated and replaced
1455 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseMethodsOffset);
1456 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.baseProtocolsOffset);
1457 eraseSymbolAtIsecOffset(metaIsec, roClassLayout.basePropertiesOffset);
1458 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseMethodsOffset);
1459 eraseSymbolAtIsecOffset(classIsec, roClassLayout.baseProtocolsOffset);
1460 eraseSymbolAtIsecOffset(classIsec, roClassLayout.basePropertiesOffset);
1461
1462 // Emit the newly merged lists - first into the meta RO then into the class RO
1463 // First we emit and link the protocol list into the meta RO. Then we link it
1464 // in the classRo as well (they're supposed to be identical)
1465 if (Defined *protoListSym =
1466 emitAndLinkProtocolList(metaRo, roClassLayout.baseProtocolsOffset,
1467 extInfo, extInfo.protocols)) {
1468 createSymbolReference(classRo, protoListSym,
1469 roClassLayout.baseProtocolsOffset,
1470 infoCategoryWriter.catBodyInfo.relocTemplate);
1471 }
1472
1473 emitAndLinkPointerList(metaRo, roClassLayout.baseMethodsOffset, extInfo,
1474 extInfo.classMethods);
1475 emitAndLinkPointerList(classRo, roClassLayout.baseMethodsOffset, extInfo,
1476 extInfo.instanceMethods);
1477
1478 emitAndLinkPointerList(metaRo, roClassLayout.basePropertiesOffset, extInfo,
1479 extInfo.classProps);
1480
1481 emitAndLinkPointerList(classRo, roClassLayout.basePropertiesOffset, extInfo,
1482 extInfo.instanceProps);
1483
1484 // Mark all the categories as merged - this will be used to erase them later
1485 for (auto &catInfo : categories)
1486 catInfo.wasMerged = true;
1487
1488 return true;
1489 }
1490
1491 // Erase the symbol at a given offset in an InputSection
eraseSymbolAtIsecOffset(ConcatInputSection * isec,uint32_t offset)1492 void ObjcCategoryMerger::eraseSymbolAtIsecOffset(ConcatInputSection *isec,
1493 uint32_t offset) {
1494 Defined *sym = tryGetDefinedAtIsecOffset(isec, offset);
1495 if (!sym)
1496 return;
1497
1498 // Remove the symbol from isec->symbols
1499 assert(isa<Defined>(sym) && "Can only erase a Defined");
1500 llvm::erase(isec->symbols, sym);
1501
1502 // Remove the relocs that refer to this symbol
1503 auto removeAtOff = [offset](Reloc const &r) { return r.offset == offset; };
1504 llvm::erase_if(isec->relocs, removeAtOff);
1505
1506 // Now, if the symbol fully occupies a ConcatInputSection, we can also erase
1507 // the whole ConcatInputSection
1508 if (ConcatInputSection *cisec = dyn_cast<ConcatInputSection>(sym->isec()))
1509 if (cisec->data.size() == sym->size)
1510 eraseISec(cisec);
1511 }
1512