1 //===- InputSection.cpp ---------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputSection.h" 10 #include "ConcatOutputSection.h" 11 #include "Config.h" 12 #include "InputFiles.h" 13 #include "OutputSegment.h" 14 #include "Symbols.h" 15 #include "SyntheticSections.h" 16 #include "Target.h" 17 #include "UnwindInfoSection.h" 18 #include "Writer.h" 19 20 #include "lld/Common/ErrorHandler.h" 21 #include "lld/Common/Memory.h" 22 #include "llvm/Support/Endian.h" 23 #include "llvm/Support/xxhash.h" 24 25 using namespace llvm; 26 using namespace llvm::MachO; 27 using namespace llvm::support; 28 using namespace lld; 29 using namespace lld::macho; 30 31 // Verify ConcatInputSection's size on 64-bit builds. The size of std::vector 32 // can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL), 33 // so account for that. 34 static_assert(sizeof(void *) != 8 || 35 sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88, 36 "Try to minimize ConcatInputSection's size, we create many " 37 "instances of it"); 38 39 std::vector<ConcatInputSection *> macho::inputSections; 40 41 uint64_t InputSection::getFileSize() const { 42 return isZeroFill(getFlags()) ? 0 : getSize(); 43 } 44 45 uint64_t InputSection::getVA(uint64_t off) const { 46 return parent->addr + getOffset(off); 47 } 48 49 static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) { 50 const RelocAttrs &relocAttrs = target->getRelocAttrs(type); 51 if (relocAttrs.hasAttr(RelocAttrBits::BRANCH)) 52 return sym->resolveBranchVA(); 53 if (relocAttrs.hasAttr(RelocAttrBits::GOT)) 54 return sym->resolveGotVA(); 55 if (relocAttrs.hasAttr(RelocAttrBits::TLV)) 56 return sym->resolveTlvVA(); 57 return sym->getVA(); 58 } 59 60 const Defined *InputSection::getContainingSymbol(uint64_t off) const { 61 auto *nextSym = llvm::upper_bound( 62 symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; }); 63 if (nextSym == symbols.begin()) 64 return nullptr; 65 return *std::prev(nextSym); 66 } 67 68 std::string InputSection::getLocation(uint64_t off) const { 69 // First, try to find a symbol that's near the offset. Use it as a reference 70 // point. 71 if (auto *sym = getContainingSymbol(off)) 72 return (toString(getFile()) + ":(symbol " + toString(*sym) + "+0x" + 73 Twine::utohexstr(off - sym->value) + ")") 74 .str(); 75 76 // If that fails, use the section itself as a reference point. 77 for (const Subsection &subsec : section.subsections) { 78 if (subsec.isec == this) { 79 off += subsec.offset; 80 break; 81 } 82 } 83 84 return (toString(getFile()) + ":(" + getName() + "+0x" + 85 Twine::utohexstr(off) + ")") 86 .str(); 87 } 88 89 std::string InputSection::getSourceLocation(uint64_t off) const { 90 auto *obj = dyn_cast_or_null<ObjFile>(getFile()); 91 if (!obj) 92 return {}; 93 94 DWARFCache *dwarf = obj->getDwarf(); 95 if (!dwarf) 96 return std::string(); 97 98 for (const Subsection &subsec : section.subsections) { 99 if (subsec.isec == this) { 100 off += subsec.offset; 101 break; 102 } 103 } 104 105 auto createMsg = [&](StringRef path, unsigned line) { 106 std::string filename = sys::path::filename(path).str(); 107 std::string lineStr = (":" + Twine(line)).str(); 108 if (filename == path) 109 return filename + lineStr; 110 return (filename + lineStr + " (" + path + lineStr + ")").str(); 111 }; 112 113 // First, look up a function for a given offset. 114 if (std::optional<DILineInfo> li = dwarf->getDILineInfo( 115 section.addr + off, object::SectionedAddress::UndefSection)) 116 return createMsg(li->FileName, li->Line); 117 118 // If it failed, look up again as a variable. 119 if (const Defined *sym = getContainingSymbol(off)) { 120 // Symbols are generally prefixed with an underscore, which is not included 121 // in the debug information. 122 StringRef symName = sym->getName(); 123 if (!symName.empty() && symName[0] == '_') 124 symName = symName.substr(1); 125 126 if (std::optional<std::pair<std::string, unsigned>> fileLine = 127 dwarf->getVariableLoc(symName)) 128 return createMsg(fileLine->first, fileLine->second); 129 } 130 131 // Try to get the source file's name from the DWARF information. 132 if (obj->compileUnit) 133 return obj->sourceFile(); 134 135 return {}; 136 } 137 138 const Reloc *InputSection::getRelocAt(uint32_t off) const { 139 auto it = llvm::find_if( 140 relocs, [=](const macho::Reloc &r) { return r.offset == off; }); 141 if (it == relocs.end()) 142 return nullptr; 143 return &*it; 144 } 145 146 void ConcatInputSection::foldIdentical(ConcatInputSection *copy) { 147 align = std::max(align, copy->align); 148 copy->live = false; 149 copy->wasCoalesced = true; 150 copy->replacement = this; 151 for (auto ©Sym : copy->symbols) { 152 copySym->wasIdenticalCodeFolded = true; 153 copySym->size = 0; 154 } 155 156 symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end()); 157 copy->symbols.clear(); 158 159 // Remove duplicate compact unwind info for symbols at the same address. 160 if (symbols.empty()) 161 return; 162 for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) { 163 assert((*it)->value == 0); 164 (*it)->unwindEntry = nullptr; 165 } 166 } 167 168 void ConcatInputSection::writeTo(uint8_t *buf) { 169 assert(!shouldOmitFromOutput()); 170 171 if (getFileSize() == 0) 172 return; 173 174 memcpy(buf, data.data(), data.size()); 175 176 for (size_t i = 0; i < relocs.size(); i++) { 177 const Reloc &r = relocs[i]; 178 uint8_t *loc = buf + r.offset; 179 uint64_t referentVA = 0; 180 181 const bool needsFixup = config->emitChainedFixups && 182 target->hasAttr(r.type, RelocAttrBits::UNSIGNED); 183 if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) { 184 const Symbol *fromSym = r.referent.get<Symbol *>(); 185 const Reloc &minuend = relocs[++i]; 186 uint64_t minuendVA; 187 if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>()) 188 minuendVA = toSym->getVA() + minuend.addend; 189 else { 190 auto *referentIsec = minuend.referent.get<InputSection *>(); 191 assert(!::shouldOmitFromOutput(referentIsec)); 192 minuendVA = referentIsec->getVA(minuend.addend); 193 } 194 referentVA = minuendVA - fromSym->getVA(); 195 } else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) { 196 if (target->hasAttr(r.type, RelocAttrBits::LOAD) && 197 !referentSym->isInGot()) 198 target->relaxGotLoad(loc, r.type); 199 // For dtrace symbols, do not handle them as normal undefined symbols 200 if (referentSym->getName().starts_with("___dtrace_")) { 201 // Change dtrace call site to pre-defined instructions 202 target->handleDtraceReloc(referentSym, r, loc); 203 continue; 204 } 205 referentVA = resolveSymbolVA(referentSym, r.type) + r.addend; 206 207 if (isThreadLocalVariables(getFlags()) && isa<Defined>(referentSym)) { 208 // References from thread-local variable sections are treated as offsets 209 // relative to the start of the thread-local data memory area, which 210 // is initialized via copying all the TLV data sections (which are all 211 // contiguous). 212 referentVA -= firstTLVDataSection->addr; 213 } else if (needsFixup) { 214 writeChainedFixup(loc, referentSym, r.addend); 215 continue; 216 } 217 } else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) { 218 assert(!::shouldOmitFromOutput(referentIsec)); 219 referentVA = referentIsec->getVA(r.addend); 220 221 if (needsFixup) { 222 writeChainedRebase(loc, referentVA); 223 continue; 224 } 225 } 226 target->relocateOne(loc, r, referentVA, getVA() + r.offset); 227 } 228 } 229 230 ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName, 231 StringRef sectName, 232 uint32_t flags, 233 ArrayRef<uint8_t> data, 234 uint32_t align) { 235 Section §ion = 236 *make<Section>(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0); 237 auto isec = make<ConcatInputSection>(section, data, align); 238 section.subsections.push_back({0, isec}); 239 return isec; 240 } 241 242 void CStringInputSection::splitIntoPieces() { 243 size_t off = 0; 244 StringRef s = toStringRef(data); 245 while (!s.empty()) { 246 size_t end = s.find(0); 247 if (end == StringRef::npos) 248 fatal(getLocation(off) + ": string is not null terminated"); 249 uint32_t hash = deduplicateLiterals ? xxh3_64bits(s.take_front(end)) : 0; 250 pieces.emplace_back(off, hash); 251 size_t size = end + 1; // include null terminator 252 s = s.substr(size); 253 off += size; 254 } 255 } 256 257 StringPiece &CStringInputSection::getStringPiece(uint64_t off) { 258 if (off >= data.size()) 259 fatal(toString(this) + ": offset is outside the section"); 260 261 auto it = 262 partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 263 return it[-1]; 264 } 265 266 const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const { 267 return const_cast<CStringInputSection *>(this)->getStringPiece(off); 268 } 269 270 size_t CStringInputSection::getStringPieceIndex(uint64_t off) const { 271 if (off >= data.size()) 272 fatal(toString(this) + ": offset is outside the section"); 273 274 auto it = 275 partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; }); 276 return std::distance(pieces.begin(), it) - 1; 277 } 278 279 uint64_t CStringInputSection::getOffset(uint64_t off) const { 280 const StringPiece &piece = getStringPiece(off); 281 uint64_t addend = off - piece.inSecOff; 282 return piece.outSecOff + addend; 283 } 284 285 WordLiteralInputSection::WordLiteralInputSection(const Section §ion, 286 ArrayRef<uint8_t> data, 287 uint32_t align) 288 : InputSection(WordLiteralKind, section, data, align) { 289 switch (sectionType(getFlags())) { 290 case S_4BYTE_LITERALS: 291 power2LiteralSize = 2; 292 break; 293 case S_8BYTE_LITERALS: 294 power2LiteralSize = 3; 295 break; 296 case S_16BYTE_LITERALS: 297 power2LiteralSize = 4; 298 break; 299 default: 300 llvm_unreachable("invalid literal section type"); 301 } 302 303 live.resize(data.size() >> power2LiteralSize, !config->deadStrip); 304 } 305 306 uint64_t WordLiteralInputSection::getOffset(uint64_t off) const { 307 auto *osec = cast<WordLiteralSection>(parent); 308 const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data()); 309 switch (sectionType(getFlags())) { 310 case S_4BYTE_LITERALS: 311 return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3); 312 case S_8BYTE_LITERALS: 313 return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7); 314 case S_16BYTE_LITERALS: 315 return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15); 316 default: 317 llvm_unreachable("invalid literal section type"); 318 } 319 } 320 321 bool macho::isCodeSection(const InputSection *isec) { 322 uint32_t type = sectionType(isec->getFlags()); 323 if (type != S_REGULAR && type != S_COALESCED) 324 return false; 325 326 uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR; 327 if (attr == S_ATTR_PURE_INSTRUCTIONS) 328 return true; 329 330 if (isec->getSegName() == segment_names::text) 331 return StringSwitch<bool>(isec->getName()) 332 .Cases(section_names::textCoalNt, section_names::staticInit, true) 333 .Default(false); 334 335 return false; 336 } 337 338 bool macho::isCfStringSection(const InputSection *isec) { 339 return isec->getName() == section_names::cfString && 340 isec->getSegName() == segment_names::data; 341 } 342 343 bool macho::isClassRefsSection(const InputSection *isec) { 344 return isec->getName() == section_names::objcClassRefs && 345 isec->getSegName() == segment_names::data; 346 } 347 348 bool macho::isSelRefsSection(const InputSection *isec) { 349 return isec->getName() == section_names::objcSelrefs && 350 isec->getSegName() == segment_names::data; 351 } 352 353 bool macho::isEhFrameSection(const InputSection *isec) { 354 return isec->getName() == section_names::ehFrame && 355 isec->getSegName() == segment_names::text; 356 } 357 358 bool macho::isGccExceptTabSection(const InputSection *isec) { 359 return isec->getName() == section_names::gccExceptTab && 360 isec->getSegName() == segment_names::text; 361 } 362 363 std::string lld::toString(const InputSection *isec) { 364 return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str(); 365 } 366