10b57cec5SDimitry Andric //===- LinkerScript.cpp ---------------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains the parser/evaluator of the linker script. 100b57cec5SDimitry Andric // 110b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 120b57cec5SDimitry Andric 130b57cec5SDimitry Andric #include "LinkerScript.h" 140b57cec5SDimitry Andric #include "Config.h" 150b57cec5SDimitry Andric #include "InputSection.h" 160b57cec5SDimitry Andric #include "OutputSections.h" 170b57cec5SDimitry Andric #include "SymbolTable.h" 180b57cec5SDimitry Andric #include "Symbols.h" 190b57cec5SDimitry Andric #include "SyntheticSections.h" 200b57cec5SDimitry Andric #include "Target.h" 210b57cec5SDimitry Andric #include "Writer.h" 220b57cec5SDimitry Andric #include "lld/Common/Memory.h" 230b57cec5SDimitry Andric #include "lld/Common/Strings.h" 240b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 250b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 260b57cec5SDimitry Andric #include "llvm/BinaryFormat/ELF.h" 270b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 280b57cec5SDimitry Andric #include "llvm/Support/Endian.h" 290b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 300b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h" 315ffd83dbSDimitry Andric #include "llvm/Support/Parallel.h" 320b57cec5SDimitry Andric #include "llvm/Support/Path.h" 33e8d8bef9SDimitry Andric #include "llvm/Support/TimeProfiler.h" 340b57cec5SDimitry Andric #include <algorithm> 350b57cec5SDimitry Andric #include <cassert> 360b57cec5SDimitry Andric #include <cstddef> 370b57cec5SDimitry Andric #include <cstdint> 380b57cec5SDimitry Andric #include <iterator> 390b57cec5SDimitry Andric #include <limits> 400b57cec5SDimitry Andric #include <string> 410b57cec5SDimitry Andric #include <vector> 420b57cec5SDimitry Andric 430b57cec5SDimitry Andric using namespace llvm; 440b57cec5SDimitry Andric using namespace llvm::ELF; 450b57cec5SDimitry Andric using namespace llvm::object; 460b57cec5SDimitry Andric using namespace llvm::support::endian; 475ffd83dbSDimitry Andric using namespace lld; 485ffd83dbSDimitry Andric using namespace lld::elf; 490b57cec5SDimitry Andric 505ffd83dbSDimitry Andric LinkerScript *elf::script; 510b57cec5SDimitry Andric 5285868e8aSDimitry Andric static uint64_t getOutputSectionVA(SectionBase *sec) { 5385868e8aSDimitry Andric OutputSection *os = sec->getOutputSection(); 5485868e8aSDimitry Andric assert(os && "input section has no output section assigned"); 5585868e8aSDimitry Andric return os ? os->addr : 0; 560b57cec5SDimitry Andric } 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric uint64_t ExprValue::getValue() const { 590b57cec5SDimitry Andric if (sec) 6085868e8aSDimitry Andric return alignTo(sec->getOffset(val) + getOutputSectionVA(sec), 610b57cec5SDimitry Andric alignment); 620b57cec5SDimitry Andric return alignTo(val, alignment); 630b57cec5SDimitry Andric } 640b57cec5SDimitry Andric 650b57cec5SDimitry Andric uint64_t ExprValue::getSecAddr() const { 660b57cec5SDimitry Andric if (sec) 6785868e8aSDimitry Andric return sec->getOffset(0) + getOutputSectionVA(sec); 680b57cec5SDimitry Andric return 0; 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric uint64_t ExprValue::getSectionOffset() const { 720b57cec5SDimitry Andric // If the alignment is trivial, we don't have to compute the full 730b57cec5SDimitry Andric // value to know the offset. This allows this function to succeed in 740b57cec5SDimitry Andric // cases where the output section is not yet known. 7585868e8aSDimitry Andric if (alignment == 1 && !sec) 760b57cec5SDimitry Andric return val; 770b57cec5SDimitry Andric return getValue() - getSecAddr(); 780b57cec5SDimitry Andric } 790b57cec5SDimitry Andric 800b57cec5SDimitry Andric OutputSection *LinkerScript::createOutputSection(StringRef name, 810b57cec5SDimitry Andric StringRef location) { 820b57cec5SDimitry Andric OutputSection *&secRef = nameToOutputSection[name]; 830b57cec5SDimitry Andric OutputSection *sec; 840b57cec5SDimitry Andric if (secRef && secRef->location.empty()) { 850b57cec5SDimitry Andric // There was a forward reference. 860b57cec5SDimitry Andric sec = secRef; 870b57cec5SDimitry Andric } else { 880b57cec5SDimitry Andric sec = make<OutputSection>(name, SHT_PROGBITS, 0); 890b57cec5SDimitry Andric if (!secRef) 900b57cec5SDimitry Andric secRef = sec; 910b57cec5SDimitry Andric } 925ffd83dbSDimitry Andric sec->location = std::string(location); 930b57cec5SDimitry Andric return sec; 940b57cec5SDimitry Andric } 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric OutputSection *LinkerScript::getOrCreateOutputSection(StringRef name) { 970b57cec5SDimitry Andric OutputSection *&cmdRef = nameToOutputSection[name]; 980b57cec5SDimitry Andric if (!cmdRef) 990b57cec5SDimitry Andric cmdRef = make<OutputSection>(name, SHT_PROGBITS, 0); 1000b57cec5SDimitry Andric return cmdRef; 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric // Expands the memory region by the specified size. 1040b57cec5SDimitry Andric static void expandMemoryRegion(MemoryRegion *memRegion, uint64_t size, 1050b57cec5SDimitry Andric StringRef regionName, StringRef secName) { 1060b57cec5SDimitry Andric memRegion->curPos += size; 1075ffd83dbSDimitry Andric uint64_t newSize = memRegion->curPos - (memRegion->origin)().getValue(); 1085ffd83dbSDimitry Andric uint64_t length = (memRegion->length)().getValue(); 1095ffd83dbSDimitry Andric if (newSize > length) 1100b57cec5SDimitry Andric error("section '" + secName + "' will not fit in region '" + regionName + 1115ffd83dbSDimitry Andric "': overflowed by " + Twine(newSize - length) + " bytes"); 1120b57cec5SDimitry Andric } 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric void LinkerScript::expandMemoryRegions(uint64_t size) { 1150b57cec5SDimitry Andric if (ctx->memRegion) 1160b57cec5SDimitry Andric expandMemoryRegion(ctx->memRegion, size, ctx->memRegion->name, 1170b57cec5SDimitry Andric ctx->outSec->name); 1180b57cec5SDimitry Andric // Only expand the LMARegion if it is different from memRegion. 1190b57cec5SDimitry Andric if (ctx->lmaRegion && ctx->memRegion != ctx->lmaRegion) 1200b57cec5SDimitry Andric expandMemoryRegion(ctx->lmaRegion, size, ctx->lmaRegion->name, 1210b57cec5SDimitry Andric ctx->outSec->name); 1220b57cec5SDimitry Andric } 1230b57cec5SDimitry Andric 1240b57cec5SDimitry Andric void LinkerScript::expandOutputSection(uint64_t size) { 1250b57cec5SDimitry Andric ctx->outSec->size += size; 1260b57cec5SDimitry Andric expandMemoryRegions(size); 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric void LinkerScript::setDot(Expr e, const Twine &loc, bool inSec) { 1300b57cec5SDimitry Andric uint64_t val = e().getValue(); 1310b57cec5SDimitry Andric if (val < dot && inSec) 1320b57cec5SDimitry Andric error(loc + ": unable to move location counter backward for: " + 1330b57cec5SDimitry Andric ctx->outSec->name); 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // Update to location counter means update to section size. 1360b57cec5SDimitry Andric if (inSec) 1370b57cec5SDimitry Andric expandOutputSection(val - dot); 1380b57cec5SDimitry Andric 1390b57cec5SDimitry Andric dot = val; 1400b57cec5SDimitry Andric } 1410b57cec5SDimitry Andric 1420b57cec5SDimitry Andric // Used for handling linker symbol assignments, for both finalizing 1430b57cec5SDimitry Andric // their values and doing early declarations. Returns true if symbol 1440b57cec5SDimitry Andric // should be defined from linker script. 1450b57cec5SDimitry Andric static bool shouldDefineSym(SymbolAssignment *cmd) { 1460b57cec5SDimitry Andric if (cmd->name == ".") 1470b57cec5SDimitry Andric return false; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric if (!cmd->provide) 1500b57cec5SDimitry Andric return true; 1510b57cec5SDimitry Andric 1520b57cec5SDimitry Andric // If a symbol was in PROVIDE(), we need to define it only 1530b57cec5SDimitry Andric // when it is a referenced undefined symbol. 1540b57cec5SDimitry Andric Symbol *b = symtab->find(cmd->name); 1550b57cec5SDimitry Andric if (b && !b->isDefined()) 1560b57cec5SDimitry Andric return true; 1570b57cec5SDimitry Andric return false; 1580b57cec5SDimitry Andric } 1590b57cec5SDimitry Andric 16085868e8aSDimitry Andric // Called by processSymbolAssignments() to assign definitions to 16185868e8aSDimitry Andric // linker-script-defined symbols. 1620b57cec5SDimitry Andric void LinkerScript::addSymbol(SymbolAssignment *cmd) { 1630b57cec5SDimitry Andric if (!shouldDefineSym(cmd)) 1640b57cec5SDimitry Andric return; 1650b57cec5SDimitry Andric 1660b57cec5SDimitry Andric // Define a symbol. 1670b57cec5SDimitry Andric ExprValue value = cmd->expression(); 1680b57cec5SDimitry Andric SectionBase *sec = value.isAbsolute() ? nullptr : value.sec; 1690b57cec5SDimitry Andric uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; 1700b57cec5SDimitry Andric 1710b57cec5SDimitry Andric // When this function is called, section addresses have not been 1720b57cec5SDimitry Andric // fixed yet. So, we may or may not know the value of the RHS 1730b57cec5SDimitry Andric // expression. 1740b57cec5SDimitry Andric // 1750b57cec5SDimitry Andric // For example, if an expression is `x = 42`, we know x is always 42. 1760b57cec5SDimitry Andric // However, if an expression is `x = .`, there's no way to know its 1770b57cec5SDimitry Andric // value at the moment. 1780b57cec5SDimitry Andric // 1790b57cec5SDimitry Andric // We want to set symbol values early if we can. This allows us to 1800b57cec5SDimitry Andric // use symbols as variables in linker scripts. Doing so allows us to 1810b57cec5SDimitry Andric // write expressions like this: `alignment = 16; . = ALIGN(., alignment)`. 1820b57cec5SDimitry Andric uint64_t symValue = value.sec ? 0 : value.getValue(); 1830b57cec5SDimitry Andric 18416d6b3b3SDimitry Andric Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, value.type, 18585868e8aSDimitry Andric symValue, 0, sec); 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric Symbol *sym = symtab->insert(cmd->name); 18885868e8aSDimitry Andric sym->mergeProperties(newSym); 18985868e8aSDimitry Andric sym->replace(newSym); 1900b57cec5SDimitry Andric cmd->sym = cast<Defined>(sym); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric // This function is called from LinkerScript::declareSymbols. 1940b57cec5SDimitry Andric // It creates a placeholder symbol if needed. 1950b57cec5SDimitry Andric static void declareSymbol(SymbolAssignment *cmd) { 1960b57cec5SDimitry Andric if (!shouldDefineSym(cmd)) 1970b57cec5SDimitry Andric return; 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric uint8_t visibility = cmd->hidden ? STV_HIDDEN : STV_DEFAULT; 20085868e8aSDimitry Andric Defined newSym(nullptr, cmd->name, STB_GLOBAL, visibility, STT_NOTYPE, 0, 0, 2010b57cec5SDimitry Andric nullptr); 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric // We can't calculate final value right now. 2040b57cec5SDimitry Andric Symbol *sym = symtab->insert(cmd->name); 20585868e8aSDimitry Andric sym->mergeProperties(newSym); 20685868e8aSDimitry Andric sym->replace(newSym); 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric cmd->sym = cast<Defined>(sym); 2090b57cec5SDimitry Andric cmd->provide = false; 2100b57cec5SDimitry Andric sym->scriptDefined = true; 2110b57cec5SDimitry Andric } 2120b57cec5SDimitry Andric 21385868e8aSDimitry Andric using SymbolAssignmentMap = 21485868e8aSDimitry Andric DenseMap<const Defined *, std::pair<SectionBase *, uint64_t>>; 21585868e8aSDimitry Andric 21685868e8aSDimitry Andric // Collect section/value pairs of linker-script-defined symbols. This is used to 21785868e8aSDimitry Andric // check whether symbol values converge. 21885868e8aSDimitry Andric static SymbolAssignmentMap 21985868e8aSDimitry Andric getSymbolAssignmentValues(const std::vector<BaseCommand *> §ionCommands) { 22085868e8aSDimitry Andric SymbolAssignmentMap ret; 22185868e8aSDimitry Andric for (BaseCommand *base : sectionCommands) { 22285868e8aSDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { 22385868e8aSDimitry Andric if (cmd->sym) // sym is nullptr for dot. 22485868e8aSDimitry Andric ret.try_emplace(cmd->sym, 22585868e8aSDimitry Andric std::make_pair(cmd->sym->section, cmd->sym->value)); 22685868e8aSDimitry Andric continue; 22785868e8aSDimitry Andric } 22885868e8aSDimitry Andric for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands) 22985868e8aSDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base)) 23085868e8aSDimitry Andric if (cmd->sym) 23185868e8aSDimitry Andric ret.try_emplace(cmd->sym, 23285868e8aSDimitry Andric std::make_pair(cmd->sym->section, cmd->sym->value)); 23385868e8aSDimitry Andric } 23485868e8aSDimitry Andric return ret; 23585868e8aSDimitry Andric } 23685868e8aSDimitry Andric 23785868e8aSDimitry Andric // Returns the lexicographical smallest (for determinism) Defined whose 23885868e8aSDimitry Andric // section/value has changed. 23985868e8aSDimitry Andric static const Defined * 24085868e8aSDimitry Andric getChangedSymbolAssignment(const SymbolAssignmentMap &oldValues) { 24185868e8aSDimitry Andric const Defined *changed = nullptr; 24285868e8aSDimitry Andric for (auto &it : oldValues) { 24385868e8aSDimitry Andric const Defined *sym = it.first; 24485868e8aSDimitry Andric if (std::make_pair(sym->section, sym->value) != it.second && 24585868e8aSDimitry Andric (!changed || sym->getName() < changed->getName())) 24685868e8aSDimitry Andric changed = sym; 24785868e8aSDimitry Andric } 24885868e8aSDimitry Andric return changed; 24985868e8aSDimitry Andric } 25085868e8aSDimitry Andric 2515ffd83dbSDimitry Andric // Process INSERT [AFTER|BEFORE] commands. For each command, we move the 2525ffd83dbSDimitry Andric // specified output section to the designated place. 2530b57cec5SDimitry Andric void LinkerScript::processInsertCommands() { 254*fe6060f1SDimitry Andric std::vector<OutputSection *> moves; 2555ffd83dbSDimitry Andric for (const InsertCommand &cmd : insertCommands) { 256*fe6060f1SDimitry Andric for (StringRef name : cmd.names) { 257*fe6060f1SDimitry Andric // If base is empty, it may have been discarded by 2585ffd83dbSDimitry Andric // adjustSectionsBeforeSorting(). We do not handle such output sections. 259*fe6060f1SDimitry Andric auto from = llvm::find_if(sectionCommands, [&](BaseCommand *base) { 260*fe6060f1SDimitry Andric return isa<OutputSection>(base) && 261*fe6060f1SDimitry Andric cast<OutputSection>(base)->name == name; 262*fe6060f1SDimitry Andric }); 2635ffd83dbSDimitry Andric if (from == sectionCommands.end()) 2640b57cec5SDimitry Andric continue; 265*fe6060f1SDimitry Andric moves.push_back(cast<OutputSection>(*from)); 2665ffd83dbSDimitry Andric sectionCommands.erase(from); 267*fe6060f1SDimitry Andric } 2680b57cec5SDimitry Andric 2695ffd83dbSDimitry Andric auto insertPos = llvm::find_if(sectionCommands, [&cmd](BaseCommand *base) { 2705ffd83dbSDimitry Andric auto *to = dyn_cast<OutputSection>(base); 2715ffd83dbSDimitry Andric return to != nullptr && to->name == cmd.where; 2725ffd83dbSDimitry Andric }); 2735ffd83dbSDimitry Andric if (insertPos == sectionCommands.end()) { 274*fe6060f1SDimitry Andric error("unable to insert " + cmd.names[0] + 2755ffd83dbSDimitry Andric (cmd.isAfter ? " after " : " before ") + cmd.where); 2765ffd83dbSDimitry Andric } else { 2775ffd83dbSDimitry Andric if (cmd.isAfter) 2785ffd83dbSDimitry Andric ++insertPos; 279*fe6060f1SDimitry Andric sectionCommands.insert(insertPos, moves.begin(), moves.end()); 2805ffd83dbSDimitry Andric } 281*fe6060f1SDimitry Andric moves.clear(); 2825ffd83dbSDimitry Andric } 2830b57cec5SDimitry Andric } 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric // Symbols defined in script should not be inlined by LTO. At the same time 2860b57cec5SDimitry Andric // we don't know their final values until late stages of link. Here we scan 2870b57cec5SDimitry Andric // over symbol assignment commands and create placeholder symbols if needed. 2880b57cec5SDimitry Andric void LinkerScript::declareSymbols() { 2890b57cec5SDimitry Andric assert(!ctx); 2900b57cec5SDimitry Andric for (BaseCommand *base : sectionCommands) { 2910b57cec5SDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { 2920b57cec5SDimitry Andric declareSymbol(cmd); 2930b57cec5SDimitry Andric continue; 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric // If the output section directive has constraints, 2970b57cec5SDimitry Andric // we can't say for sure if it is going to be included or not. 2980b57cec5SDimitry Andric // Skip such sections for now. Improve the checks if we ever 2990b57cec5SDimitry Andric // need symbols from that sections to be declared early. 3000b57cec5SDimitry Andric auto *sec = cast<OutputSection>(base); 3010b57cec5SDimitry Andric if (sec->constraint != ConstraintKind::NoConstraint) 3020b57cec5SDimitry Andric continue; 3030b57cec5SDimitry Andric for (BaseCommand *base2 : sec->sectionCommands) 3040b57cec5SDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(base2)) 3050b57cec5SDimitry Andric declareSymbol(cmd); 3060b57cec5SDimitry Andric } 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // This function is called from assignAddresses, while we are 3100b57cec5SDimitry Andric // fixing the output section addresses. This function is supposed 3110b57cec5SDimitry Andric // to set the final value for a given symbol assignment. 3120b57cec5SDimitry Andric void LinkerScript::assignSymbol(SymbolAssignment *cmd, bool inSec) { 3130b57cec5SDimitry Andric if (cmd->name == ".") { 3140b57cec5SDimitry Andric setDot(cmd->expression, cmd->location, inSec); 3150b57cec5SDimitry Andric return; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric if (!cmd->sym) 3190b57cec5SDimitry Andric return; 3200b57cec5SDimitry Andric 3210b57cec5SDimitry Andric ExprValue v = cmd->expression(); 3220b57cec5SDimitry Andric if (v.isAbsolute()) { 3230b57cec5SDimitry Andric cmd->sym->section = nullptr; 3240b57cec5SDimitry Andric cmd->sym->value = v.getValue(); 3250b57cec5SDimitry Andric } else { 3260b57cec5SDimitry Andric cmd->sym->section = v.sec; 3270b57cec5SDimitry Andric cmd->sym->value = v.getSectionOffset(); 3280b57cec5SDimitry Andric } 32916d6b3b3SDimitry Andric cmd->sym->type = v.type; 3300b57cec5SDimitry Andric } 3310b57cec5SDimitry Andric 332e8d8bef9SDimitry Andric static inline StringRef getFilename(const InputFile *file) { 333e8d8bef9SDimitry Andric return file ? file->getNameForScript() : StringRef(); 334e8d8bef9SDimitry Andric } 335e8d8bef9SDimitry Andric 336e8d8bef9SDimitry Andric bool InputSectionDescription::matchesFile(const InputFile *file) const { 337e8d8bef9SDimitry Andric if (filePat.isTrivialMatchAll()) 338e8d8bef9SDimitry Andric return true; 339e8d8bef9SDimitry Andric 340e8d8bef9SDimitry Andric if (!matchesFileCache || matchesFileCache->first != file) 341e8d8bef9SDimitry Andric matchesFileCache.emplace(file, filePat.match(getFilename(file))); 342e8d8bef9SDimitry Andric 343e8d8bef9SDimitry Andric return matchesFileCache->second; 344e8d8bef9SDimitry Andric } 345e8d8bef9SDimitry Andric 346e8d8bef9SDimitry Andric bool SectionPattern::excludesFile(const InputFile *file) const { 347e8d8bef9SDimitry Andric if (excludedFilePat.empty()) 348e8d8bef9SDimitry Andric return false; 349e8d8bef9SDimitry Andric 350e8d8bef9SDimitry Andric if (!excludesFileCache || excludesFileCache->first != file) 351e8d8bef9SDimitry Andric excludesFileCache.emplace(file, excludedFilePat.match(getFilename(file))); 352e8d8bef9SDimitry Andric 353e8d8bef9SDimitry Andric return excludesFileCache->second; 3540b57cec5SDimitry Andric } 3550b57cec5SDimitry Andric 3560b57cec5SDimitry Andric bool LinkerScript::shouldKeep(InputSectionBase *s) { 3570b57cec5SDimitry Andric for (InputSectionDescription *id : keptSections) 358e8d8bef9SDimitry Andric if (id->matchesFile(s->file)) 3590b57cec5SDimitry Andric for (SectionPattern &p : id->sectionPatterns) 3605ffd83dbSDimitry Andric if (p.sectionPat.match(s->name) && 3615ffd83dbSDimitry Andric (s->flags & id->withFlags) == id->withFlags && 3625ffd83dbSDimitry Andric (s->flags & id->withoutFlags) == 0) 3630b57cec5SDimitry Andric return true; 3640b57cec5SDimitry Andric return false; 3650b57cec5SDimitry Andric } 3660b57cec5SDimitry Andric 3670b57cec5SDimitry Andric // A helper function for the SORT() command. 36885868e8aSDimitry Andric static bool matchConstraints(ArrayRef<InputSectionBase *> sections, 3690b57cec5SDimitry Andric ConstraintKind kind) { 3700b57cec5SDimitry Andric if (kind == ConstraintKind::NoConstraint) 3710b57cec5SDimitry Andric return true; 3720b57cec5SDimitry Andric 3730b57cec5SDimitry Andric bool isRW = llvm::any_of( 37485868e8aSDimitry Andric sections, [](InputSectionBase *sec) { return sec->flags & SHF_WRITE; }); 3750b57cec5SDimitry Andric 3760b57cec5SDimitry Andric return (isRW && kind == ConstraintKind::ReadWrite) || 3770b57cec5SDimitry Andric (!isRW && kind == ConstraintKind::ReadOnly); 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 38085868e8aSDimitry Andric static void sortSections(MutableArrayRef<InputSectionBase *> vec, 3810b57cec5SDimitry Andric SortSectionPolicy k) { 38285868e8aSDimitry Andric auto alignmentComparator = [](InputSectionBase *a, InputSectionBase *b) { 38385868e8aSDimitry Andric // ">" is not a mistake. Sections with larger alignments are placed 38485868e8aSDimitry Andric // before sections with smaller alignments in order to reduce the 38585868e8aSDimitry Andric // amount of padding necessary. This is compatible with GNU. 38685868e8aSDimitry Andric return a->alignment > b->alignment; 38785868e8aSDimitry Andric }; 38885868e8aSDimitry Andric auto nameComparator = [](InputSectionBase *a, InputSectionBase *b) { 38985868e8aSDimitry Andric return a->name < b->name; 39085868e8aSDimitry Andric }; 39185868e8aSDimitry Andric auto priorityComparator = [](InputSectionBase *a, InputSectionBase *b) { 39285868e8aSDimitry Andric return getPriority(a->name) < getPriority(b->name); 39385868e8aSDimitry Andric }; 39485868e8aSDimitry Andric 39585868e8aSDimitry Andric switch (k) { 39685868e8aSDimitry Andric case SortSectionPolicy::Default: 39785868e8aSDimitry Andric case SortSectionPolicy::None: 39885868e8aSDimitry Andric return; 39985868e8aSDimitry Andric case SortSectionPolicy::Alignment: 40085868e8aSDimitry Andric return llvm::stable_sort(vec, alignmentComparator); 40185868e8aSDimitry Andric case SortSectionPolicy::Name: 40285868e8aSDimitry Andric return llvm::stable_sort(vec, nameComparator); 40385868e8aSDimitry Andric case SortSectionPolicy::Priority: 40485868e8aSDimitry Andric return llvm::stable_sort(vec, priorityComparator); 40585868e8aSDimitry Andric } 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric // Sort sections as instructed by SORT-family commands and --sort-section 4090b57cec5SDimitry Andric // option. Because SORT-family commands can be nested at most two depth 4100b57cec5SDimitry Andric // (e.g. SORT_BY_NAME(SORT_BY_ALIGNMENT(.text.*))) and because the command 4110b57cec5SDimitry Andric // line option is respected even if a SORT command is given, the exact 4120b57cec5SDimitry Andric // behavior we have here is a bit complicated. Here are the rules. 4130b57cec5SDimitry Andric // 4140b57cec5SDimitry Andric // 1. If two SORT commands are given, --sort-section is ignored. 4150b57cec5SDimitry Andric // 2. If one SORT command is given, and if it is not SORT_NONE, 4160b57cec5SDimitry Andric // --sort-section is handled as an inner SORT command. 4170b57cec5SDimitry Andric // 3. If one SORT command is given, and if it is SORT_NONE, don't sort. 4180b57cec5SDimitry Andric // 4. If no SORT command is given, sort according to --sort-section. 41985868e8aSDimitry Andric static void sortInputSections(MutableArrayRef<InputSectionBase *> vec, 420e8d8bef9SDimitry Andric SortSectionPolicy outer, 421e8d8bef9SDimitry Andric SortSectionPolicy inner) { 422e8d8bef9SDimitry Andric if (outer == SortSectionPolicy::None) 4230b57cec5SDimitry Andric return; 4240b57cec5SDimitry Andric 425e8d8bef9SDimitry Andric if (inner == SortSectionPolicy::Default) 4260b57cec5SDimitry Andric sortSections(vec, config->sortSection); 4270b57cec5SDimitry Andric else 428e8d8bef9SDimitry Andric sortSections(vec, inner); 429e8d8bef9SDimitry Andric sortSections(vec, outer); 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric // Compute and remember which sections the InputSectionDescription matches. 43385868e8aSDimitry Andric std::vector<InputSectionBase *> 4345ffd83dbSDimitry Andric LinkerScript::computeInputSections(const InputSectionDescription *cmd, 4355ffd83dbSDimitry Andric ArrayRef<InputSectionBase *> sections) { 43685868e8aSDimitry Andric std::vector<InputSectionBase *> ret; 437e8d8bef9SDimitry Andric std::vector<size_t> indexes; 438e8d8bef9SDimitry Andric DenseSet<size_t> seen; 439e8d8bef9SDimitry Andric auto sortByPositionThenCommandLine = [&](size_t begin, size_t end) { 440e8d8bef9SDimitry Andric llvm::sort(MutableArrayRef<size_t>(indexes).slice(begin, end - begin)); 441e8d8bef9SDimitry Andric for (size_t i = begin; i != end; ++i) 442e8d8bef9SDimitry Andric ret[i] = sections[indexes[i]]; 443e8d8bef9SDimitry Andric sortInputSections( 444e8d8bef9SDimitry Andric MutableArrayRef<InputSectionBase *>(ret).slice(begin, end - begin), 445e8d8bef9SDimitry Andric config->sortSection, SortSectionPolicy::None); 446e8d8bef9SDimitry Andric }; 4470b57cec5SDimitry Andric 4480b57cec5SDimitry Andric // Collects all sections that satisfy constraints of Cmd. 449e8d8bef9SDimitry Andric size_t sizeAfterPrevSort = 0; 4500b57cec5SDimitry Andric for (const SectionPattern &pat : cmd->sectionPatterns) { 451e8d8bef9SDimitry Andric size_t sizeBeforeCurrPat = ret.size(); 4520b57cec5SDimitry Andric 453e8d8bef9SDimitry Andric for (size_t i = 0, e = sections.size(); i != e; ++i) { 454e8d8bef9SDimitry Andric // Skip if the section is dead or has been matched by a previous input 455e8d8bef9SDimitry Andric // section description or a previous pattern. 456e8d8bef9SDimitry Andric InputSectionBase *sec = sections[i]; 457e8d8bef9SDimitry Andric if (!sec->isLive() || sec->parent || seen.contains(i)) 4580b57cec5SDimitry Andric continue; 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric // For -emit-relocs we have to ignore entries like 4610b57cec5SDimitry Andric // .rela.dyn : { *(.rela.data) } 4620b57cec5SDimitry Andric // which are common because they are in the default bfd script. 4630b57cec5SDimitry Andric // We do not ignore SHT_REL[A] linker-synthesized sections here because 4640b57cec5SDimitry Andric // want to support scripts that do custom layout for them. 46585868e8aSDimitry Andric if (isa<InputSection>(sec) && 46685868e8aSDimitry Andric cast<InputSection>(sec)->getRelocatedSection()) 4670b57cec5SDimitry Andric continue; 4680b57cec5SDimitry Andric 4695ffd83dbSDimitry Andric // Check the name early to improve performance in the common case. 4705ffd83dbSDimitry Andric if (!pat.sectionPat.match(sec->name)) 4715ffd83dbSDimitry Andric continue; 4725ffd83dbSDimitry Andric 473e8d8bef9SDimitry Andric if (!cmd->matchesFile(sec->file) || pat.excludesFile(sec->file) || 4745ffd83dbSDimitry Andric (sec->flags & cmd->withFlags) != cmd->withFlags || 4755ffd83dbSDimitry Andric (sec->flags & cmd->withoutFlags) != 0) 4760b57cec5SDimitry Andric continue; 4770b57cec5SDimitry Andric 47885868e8aSDimitry Andric ret.push_back(sec); 479e8d8bef9SDimitry Andric indexes.push_back(i); 480e8d8bef9SDimitry Andric seen.insert(i); 4810b57cec5SDimitry Andric } 4820b57cec5SDimitry Andric 483e8d8bef9SDimitry Andric if (pat.sortOuter == SortSectionPolicy::Default) 484e8d8bef9SDimitry Andric continue; 485e8d8bef9SDimitry Andric 486e8d8bef9SDimitry Andric // Matched sections are ordered by radix sort with the keys being (SORT*, 487e8d8bef9SDimitry Andric // --sort-section, input order), where SORT* (if present) is most 488e8d8bef9SDimitry Andric // significant. 489e8d8bef9SDimitry Andric // 490e8d8bef9SDimitry Andric // Matched sections between the previous SORT* and this SORT* are sorted by 491e8d8bef9SDimitry Andric // (--sort-alignment, input order). 492e8d8bef9SDimitry Andric sortByPositionThenCommandLine(sizeAfterPrevSort, sizeBeforeCurrPat); 493e8d8bef9SDimitry Andric // Matched sections by this SORT* pattern are sorted using all 3 keys. 494e8d8bef9SDimitry Andric // ret[sizeBeforeCurrPat,ret.size()) are already in the input order, so we 495e8d8bef9SDimitry Andric // just sort by sortOuter and sortInner. 49685868e8aSDimitry Andric sortInputSections( 497e8d8bef9SDimitry Andric MutableArrayRef<InputSectionBase *>(ret).slice(sizeBeforeCurrPat), 498e8d8bef9SDimitry Andric pat.sortOuter, pat.sortInner); 499e8d8bef9SDimitry Andric sizeAfterPrevSort = ret.size(); 5000b57cec5SDimitry Andric } 501e8d8bef9SDimitry Andric // Matched sections after the last SORT* are sorted by (--sort-alignment, 502e8d8bef9SDimitry Andric // input order). 503e8d8bef9SDimitry Andric sortByPositionThenCommandLine(sizeAfterPrevSort, ret.size()); 5040b57cec5SDimitry Andric return ret; 5050b57cec5SDimitry Andric } 5060b57cec5SDimitry Andric 50785868e8aSDimitry Andric void LinkerScript::discard(InputSectionBase *s) { 508480093f4SDimitry Andric if (s == in.shStrTab || s == mainPart->relrDyn) 5090b57cec5SDimitry Andric error("discarding " + s->name + " section is not allowed"); 5100b57cec5SDimitry Andric 5110b57cec5SDimitry Andric // You can discard .hash and .gnu.hash sections by linker scripts. Since 5120b57cec5SDimitry Andric // they are synthesized sections, we need to handle them differently than 5130b57cec5SDimitry Andric // other regular sections. 5140b57cec5SDimitry Andric if (s == mainPart->gnuHashTab) 5150b57cec5SDimitry Andric mainPart->gnuHashTab = nullptr; 5160b57cec5SDimitry Andric if (s == mainPart->hashTab) 5170b57cec5SDimitry Andric mainPart->hashTab = nullptr; 5180b57cec5SDimitry Andric 5190b57cec5SDimitry Andric s->markDead(); 52085868e8aSDimitry Andric s->parent = nullptr; 52185868e8aSDimitry Andric for (InputSection *ds : s->dependentSections) 52285868e8aSDimitry Andric discard(ds); 5230b57cec5SDimitry Andric } 5240b57cec5SDimitry Andric 5255ffd83dbSDimitry Andric void LinkerScript::discardSynthetic(OutputSection &outCmd) { 5265ffd83dbSDimitry Andric for (Partition &part : partitions) { 5275ffd83dbSDimitry Andric if (!part.armExidx || !part.armExidx->isLive()) 5285ffd83dbSDimitry Andric continue; 5295ffd83dbSDimitry Andric std::vector<InputSectionBase *> secs(part.armExidx->exidxSections.begin(), 5305ffd83dbSDimitry Andric part.armExidx->exidxSections.end()); 5315ffd83dbSDimitry Andric for (BaseCommand *base : outCmd.sectionCommands) 5325ffd83dbSDimitry Andric if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { 5335ffd83dbSDimitry Andric std::vector<InputSectionBase *> matches = 5345ffd83dbSDimitry Andric computeInputSections(cmd, secs); 5355ffd83dbSDimitry Andric for (InputSectionBase *s : matches) 5365ffd83dbSDimitry Andric discard(s); 5375ffd83dbSDimitry Andric } 5385ffd83dbSDimitry Andric } 5395ffd83dbSDimitry Andric } 5405ffd83dbSDimitry Andric 54185868e8aSDimitry Andric std::vector<InputSectionBase *> 5420b57cec5SDimitry Andric LinkerScript::createInputSectionList(OutputSection &outCmd) { 54385868e8aSDimitry Andric std::vector<InputSectionBase *> ret; 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric for (BaseCommand *base : outCmd.sectionCommands) { 5460b57cec5SDimitry Andric if (auto *cmd = dyn_cast<InputSectionDescription>(base)) { 5475ffd83dbSDimitry Andric cmd->sectionBases = computeInputSections(cmd, inputSections); 54885868e8aSDimitry Andric for (InputSectionBase *s : cmd->sectionBases) 54985868e8aSDimitry Andric s->parent = &outCmd; 55085868e8aSDimitry Andric ret.insert(ret.end(), cmd->sectionBases.begin(), cmd->sectionBases.end()); 5510b57cec5SDimitry Andric } 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric return ret; 5540b57cec5SDimitry Andric } 5550b57cec5SDimitry Andric 55685868e8aSDimitry Andric // Create output sections described by SECTIONS commands. 5570b57cec5SDimitry Andric void LinkerScript::processSectionCommands() { 558*fe6060f1SDimitry Andric auto process = [this](OutputSection *osec) { 559*fe6060f1SDimitry Andric std::vector<InputSectionBase *> v = createInputSectionList(*osec); 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric // The output section name `/DISCARD/' is special. 5620b57cec5SDimitry Andric // Any input section assigned to it is discarded. 563*fe6060f1SDimitry Andric if (osec->name == "/DISCARD/") { 56485868e8aSDimitry Andric for (InputSectionBase *s : v) 56585868e8aSDimitry Andric discard(s); 566*fe6060f1SDimitry Andric discardSynthetic(*osec); 567*fe6060f1SDimitry Andric osec->sectionCommands.clear(); 568*fe6060f1SDimitry Andric return false; 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric // This is for ONLY_IF_RO and ONLY_IF_RW. An output section directive 5720b57cec5SDimitry Andric // ".foo : ONLY_IF_R[OW] { ... }" is handled only if all member input 5730b57cec5SDimitry Andric // sections satisfy a given constraint. If not, a directive is handled 5740b57cec5SDimitry Andric // as if it wasn't present from the beginning. 5750b57cec5SDimitry Andric // 5760b57cec5SDimitry Andric // Because we'll iterate over SectionCommands many more times, the easy 5770b57cec5SDimitry Andric // way to "make it as if it wasn't present" is to make it empty. 578*fe6060f1SDimitry Andric if (!matchConstraints(v, osec->constraint)) { 5790b57cec5SDimitry Andric for (InputSectionBase *s : v) 58085868e8aSDimitry Andric s->parent = nullptr; 581*fe6060f1SDimitry Andric osec->sectionCommands.clear(); 582*fe6060f1SDimitry Andric return false; 5830b57cec5SDimitry Andric } 5840b57cec5SDimitry Andric 5850b57cec5SDimitry Andric // Handle subalign (e.g. ".foo : SUBALIGN(32) { ... }"). If subalign 5860b57cec5SDimitry Andric // is given, input sections are aligned to that value, whether the 5870b57cec5SDimitry Andric // given value is larger or smaller than the original section alignment. 588*fe6060f1SDimitry Andric if (osec->subalignExpr) { 589*fe6060f1SDimitry Andric uint32_t subalign = osec->subalignExpr().getValue(); 5900b57cec5SDimitry Andric for (InputSectionBase *s : v) 5910b57cec5SDimitry Andric s->alignment = subalign; 5920b57cec5SDimitry Andric } 5930b57cec5SDimitry Andric 59485868e8aSDimitry Andric // Set the partition field the same way OutputSection::recordSection() 59585868e8aSDimitry Andric // does. Partitions cannot be used with the SECTIONS command, so this is 59685868e8aSDimitry Andric // always 1. 597*fe6060f1SDimitry Andric osec->partition = 1; 598*fe6060f1SDimitry Andric return true; 599*fe6060f1SDimitry Andric }; 6000b57cec5SDimitry Andric 601*fe6060f1SDimitry Andric // Process OVERWRITE_SECTIONS first so that it can overwrite the main script 602*fe6060f1SDimitry Andric // or orphans. 603*fe6060f1SDimitry Andric DenseMap<StringRef, OutputSection *> map; 604*fe6060f1SDimitry Andric size_t i = 0; 605*fe6060f1SDimitry Andric for (OutputSection *osec : overwriteSections) 606*fe6060f1SDimitry Andric if (process(osec) && !map.try_emplace(osec->name, osec).second) 607*fe6060f1SDimitry Andric warn("OVERWRITE_SECTIONS specifies duplicate " + osec->name); 608*fe6060f1SDimitry Andric for (BaseCommand *&base : sectionCommands) 609*fe6060f1SDimitry Andric if (auto *osec = dyn_cast<OutputSection>(base)) { 610*fe6060f1SDimitry Andric if (OutputSection *overwrite = map.lookup(osec->name)) { 611*fe6060f1SDimitry Andric log(overwrite->location + " overwrites " + osec->name); 612*fe6060f1SDimitry Andric overwrite->sectionIndex = i++; 613*fe6060f1SDimitry Andric base = overwrite; 614*fe6060f1SDimitry Andric } else if (process(osec)) { 615*fe6060f1SDimitry Andric osec->sectionIndex = i++; 6160b57cec5SDimitry Andric } 6170b57cec5SDimitry Andric } 618*fe6060f1SDimitry Andric 619*fe6060f1SDimitry Andric // If an OVERWRITE_SECTIONS specified output section is not in 620*fe6060f1SDimitry Andric // sectionCommands, append it to the end. The section will be inserted by 621*fe6060f1SDimitry Andric // orphan placement. 622*fe6060f1SDimitry Andric for (OutputSection *osec : overwriteSections) 623*fe6060f1SDimitry Andric if (osec->partition == 1 && osec->sectionIndex == UINT32_MAX) 624*fe6060f1SDimitry Andric sectionCommands.push_back(osec); 62585868e8aSDimitry Andric } 62685868e8aSDimitry Andric 62785868e8aSDimitry Andric void LinkerScript::processSymbolAssignments() { 62885868e8aSDimitry Andric // Dot outside an output section still represents a relative address, whose 62985868e8aSDimitry Andric // sh_shndx should not be SHN_UNDEF or SHN_ABS. Create a dummy aether section 63085868e8aSDimitry Andric // that fills the void outside a section. It has an index of one, which is 63185868e8aSDimitry Andric // indistinguishable from any other regular section index. 63285868e8aSDimitry Andric aether = make<OutputSection>("", 0, SHF_ALLOC); 63385868e8aSDimitry Andric aether->sectionIndex = 1; 63485868e8aSDimitry Andric 63585868e8aSDimitry Andric // ctx captures the local AddressState and makes it accessible deliberately. 63685868e8aSDimitry Andric // This is needed as there are some cases where we cannot just thread the 63785868e8aSDimitry Andric // current state through to a lambda function created by the script parser. 63885868e8aSDimitry Andric AddressState state; 63985868e8aSDimitry Andric ctx = &state; 64085868e8aSDimitry Andric ctx->outSec = aether; 64185868e8aSDimitry Andric 64285868e8aSDimitry Andric for (BaseCommand *base : sectionCommands) { 64385868e8aSDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(base)) 64485868e8aSDimitry Andric addSymbol(cmd); 64585868e8aSDimitry Andric else 64685868e8aSDimitry Andric for (BaseCommand *sub_base : cast<OutputSection>(base)->sectionCommands) 64785868e8aSDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(sub_base)) 64885868e8aSDimitry Andric addSymbol(cmd); 64985868e8aSDimitry Andric } 65085868e8aSDimitry Andric 6510b57cec5SDimitry Andric ctx = nullptr; 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric static OutputSection *findByName(ArrayRef<BaseCommand *> vec, 6550b57cec5SDimitry Andric StringRef name) { 6560b57cec5SDimitry Andric for (BaseCommand *base : vec) 6570b57cec5SDimitry Andric if (auto *sec = dyn_cast<OutputSection>(base)) 6580b57cec5SDimitry Andric if (sec->name == name) 6590b57cec5SDimitry Andric return sec; 6600b57cec5SDimitry Andric return nullptr; 6610b57cec5SDimitry Andric } 6620b57cec5SDimitry Andric 6630b57cec5SDimitry Andric static OutputSection *createSection(InputSectionBase *isec, 6640b57cec5SDimitry Andric StringRef outsecName) { 6650b57cec5SDimitry Andric OutputSection *sec = script->createOutputSection(outsecName, "<internal>"); 66685868e8aSDimitry Andric sec->recordSection(isec); 6670b57cec5SDimitry Andric return sec; 6680b57cec5SDimitry Andric } 6690b57cec5SDimitry Andric 6700b57cec5SDimitry Andric static OutputSection * 6710b57cec5SDimitry Andric addInputSec(StringMap<TinyPtrVector<OutputSection *>> &map, 6720b57cec5SDimitry Andric InputSectionBase *isec, StringRef outsecName) { 6730b57cec5SDimitry Andric // Sections with SHT_GROUP or SHF_GROUP attributes reach here only when the -r 6740b57cec5SDimitry Andric // option is given. A section with SHT_GROUP defines a "section group", and 6750b57cec5SDimitry Andric // its members have SHF_GROUP attribute. Usually these flags have already been 6760b57cec5SDimitry Andric // stripped by InputFiles.cpp as section groups are processed and uniquified. 6770b57cec5SDimitry Andric // However, for the -r option, we want to pass through all section groups 6780b57cec5SDimitry Andric // as-is because adding/removing members or merging them with other groups 6790b57cec5SDimitry Andric // change their semantics. 6800b57cec5SDimitry Andric if (isec->type == SHT_GROUP || (isec->flags & SHF_GROUP)) 6810b57cec5SDimitry Andric return createSection(isec, outsecName); 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric // Imagine .zed : { *(.foo) *(.bar) } script. Both foo and bar may have 6840b57cec5SDimitry Andric // relocation sections .rela.foo and .rela.bar for example. Most tools do 6850b57cec5SDimitry Andric // not allow multiple REL[A] sections for output section. Hence we 6860b57cec5SDimitry Andric // should combine these relocation sections into single output. 6870b57cec5SDimitry Andric // We skip synthetic sections because it can be .rela.dyn/.rela.plt or any 6880b57cec5SDimitry Andric // other REL[A] sections created by linker itself. 6890b57cec5SDimitry Andric if (!isa<SyntheticSection>(isec) && 6900b57cec5SDimitry Andric (isec->type == SHT_REL || isec->type == SHT_RELA)) { 6910b57cec5SDimitry Andric auto *sec = cast<InputSection>(isec); 6920b57cec5SDimitry Andric OutputSection *out = sec->getRelocatedSection()->getOutputSection(); 6930b57cec5SDimitry Andric 6940b57cec5SDimitry Andric if (out->relocationSection) { 69585868e8aSDimitry Andric out->relocationSection->recordSection(sec); 6960b57cec5SDimitry Andric return nullptr; 6970b57cec5SDimitry Andric } 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric out->relocationSection = createSection(isec, outsecName); 7000b57cec5SDimitry Andric return out->relocationSection; 7010b57cec5SDimitry Andric } 7020b57cec5SDimitry Andric 7030b57cec5SDimitry Andric // The ELF spec just says 7040b57cec5SDimitry Andric // ---------------------------------------------------------------- 7050b57cec5SDimitry Andric // In the first phase, input sections that match in name, type and 7060b57cec5SDimitry Andric // attribute flags should be concatenated into single sections. 7070b57cec5SDimitry Andric // ---------------------------------------------------------------- 7080b57cec5SDimitry Andric // 7090b57cec5SDimitry Andric // However, it is clear that at least some flags have to be ignored for 7100b57cec5SDimitry Andric // section merging. At the very least SHF_GROUP and SHF_COMPRESSED have to be 7110b57cec5SDimitry Andric // ignored. We should not have two output .text sections just because one was 7120b57cec5SDimitry Andric // in a group and another was not for example. 7130b57cec5SDimitry Andric // 7140b57cec5SDimitry Andric // It also seems that wording was a late addition and didn't get the 7150b57cec5SDimitry Andric // necessary scrutiny. 7160b57cec5SDimitry Andric // 7170b57cec5SDimitry Andric // Merging sections with different flags is expected by some users. One 7180b57cec5SDimitry Andric // reason is that if one file has 7190b57cec5SDimitry Andric // 7200b57cec5SDimitry Andric // int *const bar __attribute__((section(".foo"))) = (int *)0; 7210b57cec5SDimitry Andric // 7220b57cec5SDimitry Andric // gcc with -fPIC will produce a read only .foo section. But if another 7230b57cec5SDimitry Andric // file has 7240b57cec5SDimitry Andric // 7250b57cec5SDimitry Andric // int zed; 7260b57cec5SDimitry Andric // int *const bar __attribute__((section(".foo"))) = (int *)&zed; 7270b57cec5SDimitry Andric // 7280b57cec5SDimitry Andric // gcc with -fPIC will produce a read write section. 7290b57cec5SDimitry Andric // 7300b57cec5SDimitry Andric // Last but not least, when using linker script the merge rules are forced by 7310b57cec5SDimitry Andric // the script. Unfortunately, linker scripts are name based. This means that 7320b57cec5SDimitry Andric // expressions like *(.foo*) can refer to multiple input sections with 7330b57cec5SDimitry Andric // different flags. We cannot put them in different output sections or we 7340b57cec5SDimitry Andric // would produce wrong results for 7350b57cec5SDimitry Andric // 7360b57cec5SDimitry Andric // start = .; *(.foo.*) end = .; *(.bar) 7370b57cec5SDimitry Andric // 7380b57cec5SDimitry Andric // and a mapping of .foo1 and .bar1 to one section and .foo2 and .bar2 to 7390b57cec5SDimitry Andric // another. The problem is that there is no way to layout those output 7400b57cec5SDimitry Andric // sections such that the .foo sections are the only thing between the start 7410b57cec5SDimitry Andric // and end symbols. 7420b57cec5SDimitry Andric // 7430b57cec5SDimitry Andric // Given the above issues, we instead merge sections by name and error on 7440b57cec5SDimitry Andric // incompatible types and flags. 7450b57cec5SDimitry Andric TinyPtrVector<OutputSection *> &v = map[outsecName]; 7460b57cec5SDimitry Andric for (OutputSection *sec : v) { 7470b57cec5SDimitry Andric if (sec->partition != isec->partition) 7480b57cec5SDimitry Andric continue; 74985868e8aSDimitry Andric 75085868e8aSDimitry Andric if (config->relocatable && (isec->flags & SHF_LINK_ORDER)) { 75185868e8aSDimitry Andric // Merging two SHF_LINK_ORDER sections with different sh_link fields will 75285868e8aSDimitry Andric // change their semantics, so we only merge them in -r links if they will 75385868e8aSDimitry Andric // end up being linked to the same output section. The casts are fine 75485868e8aSDimitry Andric // because everything in the map was created by the orphan placement code. 75585868e8aSDimitry Andric auto *firstIsec = cast<InputSectionBase>( 75685868e8aSDimitry Andric cast<InputSectionDescription>(sec->sectionCommands[0]) 75785868e8aSDimitry Andric ->sectionBases[0]); 758eaeb601bSDimitry Andric OutputSection *firstIsecOut = 759eaeb601bSDimitry Andric firstIsec->flags & SHF_LINK_ORDER 760eaeb601bSDimitry Andric ? firstIsec->getLinkOrderDep()->getOutputSection() 761eaeb601bSDimitry Andric : nullptr; 762eaeb601bSDimitry Andric if (firstIsecOut != isec->getLinkOrderDep()->getOutputSection()) 76385868e8aSDimitry Andric continue; 76485868e8aSDimitry Andric } 76585868e8aSDimitry Andric 76685868e8aSDimitry Andric sec->recordSection(isec); 7670b57cec5SDimitry Andric return nullptr; 7680b57cec5SDimitry Andric } 7690b57cec5SDimitry Andric 7700b57cec5SDimitry Andric OutputSection *sec = createSection(isec, outsecName); 7710b57cec5SDimitry Andric v.push_back(sec); 7720b57cec5SDimitry Andric return sec; 7730b57cec5SDimitry Andric } 7740b57cec5SDimitry Andric 7750b57cec5SDimitry Andric // Add sections that didn't match any sections command. 7760b57cec5SDimitry Andric void LinkerScript::addOrphanSections() { 7770b57cec5SDimitry Andric StringMap<TinyPtrVector<OutputSection *>> map; 7780b57cec5SDimitry Andric std::vector<OutputSection *> v; 7790b57cec5SDimitry Andric 78085868e8aSDimitry Andric std::function<void(InputSectionBase *)> add; 78185868e8aSDimitry Andric add = [&](InputSectionBase *s) { 78285868e8aSDimitry Andric if (s->isLive() && !s->parent) { 7835ffd83dbSDimitry Andric orphanSections.push_back(s); 7845ffd83dbSDimitry Andric 7850b57cec5SDimitry Andric StringRef name = getOutputSectionName(s); 7865ffd83dbSDimitry Andric if (config->unique) { 7875ffd83dbSDimitry Andric v.push_back(createSection(s, name)); 7885ffd83dbSDimitry Andric } else if (OutputSection *sec = findByName(sectionCommands, name)) { 78985868e8aSDimitry Andric sec->recordSection(s); 79085868e8aSDimitry Andric } else { 7910b57cec5SDimitry Andric if (OutputSection *os = addInputSec(map, s, name)) 7920b57cec5SDimitry Andric v.push_back(os); 79385868e8aSDimitry Andric assert(isa<MergeInputSection>(s) || 79485868e8aSDimitry Andric s->getOutputSection()->sectionIndex == UINT32_MAX); 79585868e8aSDimitry Andric } 79685868e8aSDimitry Andric } 79785868e8aSDimitry Andric 79885868e8aSDimitry Andric if (config->relocatable) 79985868e8aSDimitry Andric for (InputSectionBase *depSec : s->dependentSections) 80085868e8aSDimitry Andric if (depSec->flags & SHF_LINK_ORDER) 80185868e8aSDimitry Andric add(depSec); 8020b57cec5SDimitry Andric }; 8030b57cec5SDimitry Andric 804*fe6060f1SDimitry Andric // For further --emit-reloc handling code we need target output section 8050b57cec5SDimitry Andric // to be created before we create relocation output section, so we want 8060b57cec5SDimitry Andric // to create target sections first. We do not want priority handling 8070b57cec5SDimitry Andric // for synthetic sections because them are special. 8080b57cec5SDimitry Andric for (InputSectionBase *isec : inputSections) { 80985868e8aSDimitry Andric // In -r links, SHF_LINK_ORDER sections are added while adding their parent 81085868e8aSDimitry Andric // sections because we need to know the parent's output section before we 81185868e8aSDimitry Andric // can select an output section for the SHF_LINK_ORDER section. 81285868e8aSDimitry Andric if (config->relocatable && (isec->flags & SHF_LINK_ORDER)) 81385868e8aSDimitry Andric continue; 81485868e8aSDimitry Andric 8150b57cec5SDimitry Andric if (auto *sec = dyn_cast<InputSection>(isec)) 8160b57cec5SDimitry Andric if (InputSectionBase *rel = sec->getRelocatedSection()) 8170b57cec5SDimitry Andric if (auto *relIS = dyn_cast_or_null<InputSectionBase>(rel->parent)) 8180b57cec5SDimitry Andric add(relIS); 8190b57cec5SDimitry Andric add(isec); 8200b57cec5SDimitry Andric } 8210b57cec5SDimitry Andric 8220b57cec5SDimitry Andric // If no SECTIONS command was given, we should insert sections commands 8230b57cec5SDimitry Andric // before others, so that we can handle scripts which refers them, 8240b57cec5SDimitry Andric // for example: "foo = ABSOLUTE(ADDR(.text)));". 8250b57cec5SDimitry Andric // When SECTIONS command is present we just add all orphans to the end. 8260b57cec5SDimitry Andric if (hasSectionsCommand) 8270b57cec5SDimitry Andric sectionCommands.insert(sectionCommands.end(), v.begin(), v.end()); 8280b57cec5SDimitry Andric else 8290b57cec5SDimitry Andric sectionCommands.insert(sectionCommands.begin(), v.begin(), v.end()); 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8325ffd83dbSDimitry Andric void LinkerScript::diagnoseOrphanHandling() const { 833e8d8bef9SDimitry Andric llvm::TimeTraceScope timeScope("Diagnose orphan sections"); 834e8d8bef9SDimitry Andric if (config->orphanHandling == OrphanHandlingPolicy::Place) 835e8d8bef9SDimitry Andric return; 8365ffd83dbSDimitry Andric for (const InputSectionBase *sec : orphanSections) { 8375ffd83dbSDimitry Andric // Input SHT_REL[A] retained by --emit-relocs are ignored by 8385ffd83dbSDimitry Andric // computeInputSections(). Don't warn/error. 8395ffd83dbSDimitry Andric if (isa<InputSection>(sec) && 8405ffd83dbSDimitry Andric cast<InputSection>(sec)->getRelocatedSection()) 8415ffd83dbSDimitry Andric continue; 8425ffd83dbSDimitry Andric 8435ffd83dbSDimitry Andric StringRef name = getOutputSectionName(sec); 8445ffd83dbSDimitry Andric if (config->orphanHandling == OrphanHandlingPolicy::Error) 8455ffd83dbSDimitry Andric error(toString(sec) + " is being placed in '" + name + "'"); 846e8d8bef9SDimitry Andric else 8475ffd83dbSDimitry Andric warn(toString(sec) + " is being placed in '" + name + "'"); 8485ffd83dbSDimitry Andric } 8495ffd83dbSDimitry Andric } 8505ffd83dbSDimitry Andric 8510b57cec5SDimitry Andric uint64_t LinkerScript::advance(uint64_t size, unsigned alignment) { 8520b57cec5SDimitry Andric bool isTbss = 8530b57cec5SDimitry Andric (ctx->outSec->flags & SHF_TLS) && ctx->outSec->type == SHT_NOBITS; 8540b57cec5SDimitry Andric uint64_t start = isTbss ? dot + ctx->threadBssOffset : dot; 8550b57cec5SDimitry Andric start = alignTo(start, alignment); 8560b57cec5SDimitry Andric uint64_t end = start + size; 8570b57cec5SDimitry Andric 8580b57cec5SDimitry Andric if (isTbss) 8590b57cec5SDimitry Andric ctx->threadBssOffset = end - dot; 8600b57cec5SDimitry Andric else 8610b57cec5SDimitry Andric dot = end; 8620b57cec5SDimitry Andric return end; 8630b57cec5SDimitry Andric } 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric void LinkerScript::output(InputSection *s) { 8660b57cec5SDimitry Andric assert(ctx->outSec == s->getParent()); 8670b57cec5SDimitry Andric uint64_t before = advance(0, 1); 8680b57cec5SDimitry Andric uint64_t pos = advance(s->getSize(), s->alignment); 8690b57cec5SDimitry Andric s->outSecOff = pos - s->getSize() - ctx->outSec->addr; 8700b57cec5SDimitry Andric 8710b57cec5SDimitry Andric // Update output section size after adding each section. This is so that 8720b57cec5SDimitry Andric // SIZEOF works correctly in the case below: 8730b57cec5SDimitry Andric // .foo { *(.aaa) a = SIZEOF(.foo); *(.bbb) } 8740b57cec5SDimitry Andric expandOutputSection(pos - before); 8750b57cec5SDimitry Andric } 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric void LinkerScript::switchTo(OutputSection *sec) { 8780b57cec5SDimitry Andric ctx->outSec = sec; 8790b57cec5SDimitry Andric 8805ffd83dbSDimitry Andric uint64_t pos = advance(0, 1); 8815ffd83dbSDimitry Andric if (sec->addrExpr && script->hasSectionsCommand) { 8825ffd83dbSDimitry Andric // The alignment is ignored. 8835ffd83dbSDimitry Andric ctx->outSec->addr = pos; 8845ffd83dbSDimitry Andric } else { 8855ffd83dbSDimitry Andric // ctx->outSec->alignment is the max of ALIGN and the maximum of input 8865ffd83dbSDimitry Andric // section alignments. 8870b57cec5SDimitry Andric ctx->outSec->addr = advance(0, ctx->outSec->alignment); 8885ffd83dbSDimitry Andric expandMemoryRegions(ctx->outSec->addr - pos); 8895ffd83dbSDimitry Andric } 8900b57cec5SDimitry Andric } 8910b57cec5SDimitry Andric 8920b57cec5SDimitry Andric // This function searches for a memory region to place the given output 8930b57cec5SDimitry Andric // section in. If found, a pointer to the appropriate memory region is 8940b57cec5SDimitry Andric // returned. Otherwise, a nullptr is returned. 8950b57cec5SDimitry Andric MemoryRegion *LinkerScript::findMemoryRegion(OutputSection *sec) { 8960b57cec5SDimitry Andric // If a memory region name was specified in the output section command, 8970b57cec5SDimitry Andric // then try to find that region first. 8980b57cec5SDimitry Andric if (!sec->memoryRegionName.empty()) { 8990b57cec5SDimitry Andric if (MemoryRegion *m = memoryRegions.lookup(sec->memoryRegionName)) 9000b57cec5SDimitry Andric return m; 9010b57cec5SDimitry Andric error("memory region '" + sec->memoryRegionName + "' not declared"); 9020b57cec5SDimitry Andric return nullptr; 9030b57cec5SDimitry Andric } 9040b57cec5SDimitry Andric 9050b57cec5SDimitry Andric // If at least one memory region is defined, all sections must 9060b57cec5SDimitry Andric // belong to some memory region. Otherwise, we don't need to do 9070b57cec5SDimitry Andric // anything for memory regions. 9080b57cec5SDimitry Andric if (memoryRegions.empty()) 9090b57cec5SDimitry Andric return nullptr; 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric // See if a region can be found by matching section flags. 9120b57cec5SDimitry Andric for (auto &pair : memoryRegions) { 9130b57cec5SDimitry Andric MemoryRegion *m = pair.second; 9140b57cec5SDimitry Andric if ((m->flags & sec->flags) && (m->negFlags & sec->flags) == 0) 9150b57cec5SDimitry Andric return m; 9160b57cec5SDimitry Andric } 9170b57cec5SDimitry Andric 9180b57cec5SDimitry Andric // Otherwise, no suitable region was found. 9190b57cec5SDimitry Andric if (sec->flags & SHF_ALLOC) 9200b57cec5SDimitry Andric error("no memory region specified for section '" + sec->name + "'"); 9210b57cec5SDimitry Andric return nullptr; 9220b57cec5SDimitry Andric } 9230b57cec5SDimitry Andric 9240b57cec5SDimitry Andric static OutputSection *findFirstSection(PhdrEntry *load) { 9250b57cec5SDimitry Andric for (OutputSection *sec : outputSections) 9260b57cec5SDimitry Andric if (sec->ptLoad == load) 9270b57cec5SDimitry Andric return sec; 9280b57cec5SDimitry Andric return nullptr; 9290b57cec5SDimitry Andric } 9300b57cec5SDimitry Andric 9310b57cec5SDimitry Andric // This function assigns offsets to input sections and an output section 9320b57cec5SDimitry Andric // for a single sections command (e.g. ".text { *(.text); }"). 9330b57cec5SDimitry Andric void LinkerScript::assignOffsets(OutputSection *sec) { 9345ffd83dbSDimitry Andric const bool sameMemRegion = ctx->memRegion == sec->memRegion; 9355ffd83dbSDimitry Andric const bool prevLMARegionIsDefault = ctx->lmaRegion == nullptr; 936e8d8bef9SDimitry Andric const uint64_t savedDot = dot; 9370b57cec5SDimitry Andric ctx->memRegion = sec->memRegion; 9380b57cec5SDimitry Andric ctx->lmaRegion = sec->lmaRegion; 939e8d8bef9SDimitry Andric 940e8d8bef9SDimitry Andric if (sec->flags & SHF_ALLOC) { 9410b57cec5SDimitry Andric if (ctx->memRegion) 9420b57cec5SDimitry Andric dot = ctx->memRegion->curPos; 943e8d8bef9SDimitry Andric if (sec->addrExpr) 9440b57cec5SDimitry Andric setDot(sec->addrExpr, sec->location, false); 9450b57cec5SDimitry Andric 94685868e8aSDimitry Andric // If the address of the section has been moved forward by an explicit 94785868e8aSDimitry Andric // expression so that it now starts past the current curPos of the enclosing 94885868e8aSDimitry Andric // region, we need to expand the current region to account for the space 94985868e8aSDimitry Andric // between the previous section, if any, and the start of this section. 95085868e8aSDimitry Andric if (ctx->memRegion && ctx->memRegion->curPos < dot) 95185868e8aSDimitry Andric expandMemoryRegion(ctx->memRegion, dot - ctx->memRegion->curPos, 95285868e8aSDimitry Andric ctx->memRegion->name, sec->name); 953e8d8bef9SDimitry Andric } else { 954e8d8bef9SDimitry Andric // Non-SHF_ALLOC sections have zero addresses. 955e8d8bef9SDimitry Andric dot = 0; 956e8d8bef9SDimitry Andric } 95785868e8aSDimitry Andric 9580b57cec5SDimitry Andric switchTo(sec); 9590b57cec5SDimitry Andric 9605ffd83dbSDimitry Andric // ctx->lmaOffset is LMA minus VMA. If LMA is explicitly specified via AT() or 9615ffd83dbSDimitry Andric // AT>, recompute ctx->lmaOffset; otherwise, if both previous/current LMA 9625ffd83dbSDimitry Andric // region is the default, and the two sections are in the same memory region, 9635ffd83dbSDimitry Andric // reuse previous lmaOffset; otherwise, reset lmaOffset to 0. This emulates 9645ffd83dbSDimitry Andric // heuristics described in 9655ffd83dbSDimitry Andric // https://sourceware.org/binutils/docs/ld/Output-Section-LMA.html 9660b57cec5SDimitry Andric if (sec->lmaExpr) 9670b57cec5SDimitry Andric ctx->lmaOffset = sec->lmaExpr().getValue() - dot; 9685ffd83dbSDimitry Andric else if (MemoryRegion *mr = sec->lmaRegion) 9695ffd83dbSDimitry Andric ctx->lmaOffset = alignTo(mr->curPos, sec->alignment) - dot; 9705ffd83dbSDimitry Andric else if (!sameMemRegion || !prevLMARegionIsDefault) 9715ffd83dbSDimitry Andric ctx->lmaOffset = 0; 9720b57cec5SDimitry Andric 9735ffd83dbSDimitry Andric // Propagate ctx->lmaOffset to the first "non-header" section. 9740b57cec5SDimitry Andric if (PhdrEntry *l = ctx->outSec->ptLoad) 9750b57cec5SDimitry Andric if (sec == findFirstSection(l)) 9760b57cec5SDimitry Andric l->lmaOffset = ctx->lmaOffset; 9770b57cec5SDimitry Andric 9780b57cec5SDimitry Andric // We can call this method multiple times during the creation of 9790b57cec5SDimitry Andric // thunks and want to start over calculation each time. 9800b57cec5SDimitry Andric sec->size = 0; 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric // We visited SectionsCommands from processSectionCommands to 9830b57cec5SDimitry Andric // layout sections. Now, we visit SectionsCommands again to fix 9840b57cec5SDimitry Andric // section offsets. 9850b57cec5SDimitry Andric for (BaseCommand *base : sec->sectionCommands) { 9860b57cec5SDimitry Andric // This handles the assignments to symbol or to the dot. 9870b57cec5SDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { 9880b57cec5SDimitry Andric cmd->addr = dot; 9890b57cec5SDimitry Andric assignSymbol(cmd, true); 9900b57cec5SDimitry Andric cmd->size = dot - cmd->addr; 9910b57cec5SDimitry Andric continue; 9920b57cec5SDimitry Andric } 9930b57cec5SDimitry Andric 9940b57cec5SDimitry Andric // Handle BYTE(), SHORT(), LONG(), or QUAD(). 9950b57cec5SDimitry Andric if (auto *cmd = dyn_cast<ByteCommand>(base)) { 9960b57cec5SDimitry Andric cmd->offset = dot - ctx->outSec->addr; 9970b57cec5SDimitry Andric dot += cmd->size; 9980b57cec5SDimitry Andric expandOutputSection(cmd->size); 9990b57cec5SDimitry Andric continue; 10000b57cec5SDimitry Andric } 10010b57cec5SDimitry Andric 10020b57cec5SDimitry Andric // Handle a single input section description command. 10030b57cec5SDimitry Andric // It calculates and assigns the offsets for each section and also 10040b57cec5SDimitry Andric // updates the output section size. 10050b57cec5SDimitry Andric for (InputSection *sec : cast<InputSectionDescription>(base)->sections) 10060b57cec5SDimitry Andric output(sec); 10070b57cec5SDimitry Andric } 1008e8d8bef9SDimitry Andric 1009e8d8bef9SDimitry Andric // Non-SHF_ALLOC sections do not affect the addresses of other OutputSections 1010e8d8bef9SDimitry Andric // as they are not part of the process image. 1011e8d8bef9SDimitry Andric if (!(sec->flags & SHF_ALLOC)) 1012e8d8bef9SDimitry Andric dot = savedDot; 10130b57cec5SDimitry Andric } 10140b57cec5SDimitry Andric 10150b57cec5SDimitry Andric static bool isDiscardable(OutputSection &sec) { 10160b57cec5SDimitry Andric if (sec.name == "/DISCARD/") 10170b57cec5SDimitry Andric return true; 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric // We do not want to remove OutputSections with expressions that reference 10200b57cec5SDimitry Andric // symbols even if the OutputSection is empty. We want to ensure that the 10210b57cec5SDimitry Andric // expressions can be evaluated and report an error if they cannot. 10220b57cec5SDimitry Andric if (sec.expressionsUseSymbols) 10230b57cec5SDimitry Andric return false; 10240b57cec5SDimitry Andric 10250b57cec5SDimitry Andric // OutputSections may be referenced by name in ADDR and LOADADDR expressions, 10260b57cec5SDimitry Andric // as an empty Section can has a valid VMA and LMA we keep the OutputSection 10270b57cec5SDimitry Andric // to maintain the integrity of the other Expression. 10280b57cec5SDimitry Andric if (sec.usedInExpression) 10290b57cec5SDimitry Andric return false; 10300b57cec5SDimitry Andric 10310b57cec5SDimitry Andric for (BaseCommand *base : sec.sectionCommands) { 10320b57cec5SDimitry Andric if (auto cmd = dyn_cast<SymbolAssignment>(base)) 10330b57cec5SDimitry Andric // Don't create empty output sections just for unreferenced PROVIDE 10340b57cec5SDimitry Andric // symbols. 10350b57cec5SDimitry Andric if (cmd->name != "." && !cmd->sym) 10360b57cec5SDimitry Andric continue; 10370b57cec5SDimitry Andric 10380b57cec5SDimitry Andric if (!isa<InputSectionDescription>(*base)) 10390b57cec5SDimitry Andric return false; 10400b57cec5SDimitry Andric } 10410b57cec5SDimitry Andric return true; 10420b57cec5SDimitry Andric } 10430b57cec5SDimitry Andric 1044e8d8bef9SDimitry Andric static void maybePropagatePhdrs(OutputSection &sec, 1045e8d8bef9SDimitry Andric std::vector<StringRef> &phdrs) { 1046e8d8bef9SDimitry Andric if (sec.phdrs.empty()) { 1047e8d8bef9SDimitry Andric // To match the bfd linker script behaviour, only propagate program 1048e8d8bef9SDimitry Andric // headers to sections that are allocated. 1049e8d8bef9SDimitry Andric if (sec.flags & SHF_ALLOC) 1050e8d8bef9SDimitry Andric sec.phdrs = phdrs; 1051e8d8bef9SDimitry Andric } else { 1052e8d8bef9SDimitry Andric phdrs = sec.phdrs; 1053e8d8bef9SDimitry Andric } 1054e8d8bef9SDimitry Andric } 1055e8d8bef9SDimitry Andric 10560b57cec5SDimitry Andric void LinkerScript::adjustSectionsBeforeSorting() { 10570b57cec5SDimitry Andric // If the output section contains only symbol assignments, create a 10580b57cec5SDimitry Andric // corresponding output section. The issue is what to do with linker script 10590b57cec5SDimitry Andric // like ".foo : { symbol = 42; }". One option would be to convert it to 10600b57cec5SDimitry Andric // "symbol = 42;". That is, move the symbol out of the empty section 10610b57cec5SDimitry Andric // description. That seems to be what bfd does for this simple case. The 10620b57cec5SDimitry Andric // problem is that this is not completely general. bfd will give up and 10630b57cec5SDimitry Andric // create a dummy section too if there is a ". = . + 1" inside the section 10640b57cec5SDimitry Andric // for example. 10650b57cec5SDimitry Andric // Given that we want to create the section, we have to worry what impact 10660b57cec5SDimitry Andric // it will have on the link. For example, if we just create a section with 10670b57cec5SDimitry Andric // 0 for flags, it would change which PT_LOADs are created. 10680b57cec5SDimitry Andric // We could remember that particular section is dummy and ignore it in 10690b57cec5SDimitry Andric // other parts of the linker, but unfortunately there are quite a few places 10700b57cec5SDimitry Andric // that would need to change: 10710b57cec5SDimitry Andric // * The program header creation. 10720b57cec5SDimitry Andric // * The orphan section placement. 10730b57cec5SDimitry Andric // * The address assignment. 10740b57cec5SDimitry Andric // The other option is to pick flags that minimize the impact the section 10750b57cec5SDimitry Andric // will have on the rest of the linker. That is why we copy the flags from 10760b57cec5SDimitry Andric // the previous sections. Only a few flags are needed to keep the impact low. 10770b57cec5SDimitry Andric uint64_t flags = SHF_ALLOC; 10780b57cec5SDimitry Andric 1079e8d8bef9SDimitry Andric std::vector<StringRef> defPhdrs; 10800b57cec5SDimitry Andric for (BaseCommand *&cmd : sectionCommands) { 10810b57cec5SDimitry Andric auto *sec = dyn_cast<OutputSection>(cmd); 10820b57cec5SDimitry Andric if (!sec) 10830b57cec5SDimitry Andric continue; 10840b57cec5SDimitry Andric 10850b57cec5SDimitry Andric // Handle align (e.g. ".foo : ALIGN(16) { ... }"). 10860b57cec5SDimitry Andric if (sec->alignExpr) 10870b57cec5SDimitry Andric sec->alignment = 10880b57cec5SDimitry Andric std::max<uint32_t>(sec->alignment, sec->alignExpr().getValue()); 10890b57cec5SDimitry Andric 10900b57cec5SDimitry Andric // The input section might have been removed (if it was an empty synthetic 10910b57cec5SDimitry Andric // section), but we at least know the flags. 10920b57cec5SDimitry Andric if (sec->hasInputSections) 10930b57cec5SDimitry Andric flags = sec->flags; 10940b57cec5SDimitry Andric 10950b57cec5SDimitry Andric // We do not want to keep any special flags for output section 10960b57cec5SDimitry Andric // in case it is empty. 10975ffd83dbSDimitry Andric bool isEmpty = (getFirstInputSection(sec) == nullptr); 10980b57cec5SDimitry Andric if (isEmpty) 10990b57cec5SDimitry Andric sec->flags = flags & ((sec->nonAlloc ? 0 : (uint64_t)SHF_ALLOC) | 11000b57cec5SDimitry Andric SHF_WRITE | SHF_EXECINSTR); 11010b57cec5SDimitry Andric 1102e8d8bef9SDimitry Andric // The code below may remove empty output sections. We should save the 1103e8d8bef9SDimitry Andric // specified program headers (if exist) and propagate them to subsequent 1104e8d8bef9SDimitry Andric // sections which do not specify program headers. 1105e8d8bef9SDimitry Andric // An example of such a linker script is: 1106e8d8bef9SDimitry Andric // SECTIONS { .empty : { *(.empty) } :rw 1107e8d8bef9SDimitry Andric // .foo : { *(.foo) } } 1108e8d8bef9SDimitry Andric // Note: at this point the order of output sections has not been finalized, 1109e8d8bef9SDimitry Andric // because orphans have not been inserted into their expected positions. We 1110e8d8bef9SDimitry Andric // will handle them in adjustSectionsAfterSorting(). 1111e8d8bef9SDimitry Andric if (sec->sectionIndex != UINT32_MAX) 1112e8d8bef9SDimitry Andric maybePropagatePhdrs(*sec, defPhdrs); 1113e8d8bef9SDimitry Andric 11140b57cec5SDimitry Andric if (isEmpty && isDiscardable(*sec)) { 11150b57cec5SDimitry Andric sec->markDead(); 11160b57cec5SDimitry Andric cmd = nullptr; 11170b57cec5SDimitry Andric } 11180b57cec5SDimitry Andric } 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric // It is common practice to use very generic linker scripts. So for any 11210b57cec5SDimitry Andric // given run some of the output sections in the script will be empty. 11220b57cec5SDimitry Andric // We could create corresponding empty output sections, but that would 11230b57cec5SDimitry Andric // clutter the output. 11240b57cec5SDimitry Andric // We instead remove trivially empty sections. The bfd linker seems even 11250b57cec5SDimitry Andric // more aggressive at removing them. 11260b57cec5SDimitry Andric llvm::erase_if(sectionCommands, [&](BaseCommand *base) { return !base; }); 11270b57cec5SDimitry Andric } 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andric void LinkerScript::adjustSectionsAfterSorting() { 11300b57cec5SDimitry Andric // Try and find an appropriate memory region to assign offsets in. 11310b57cec5SDimitry Andric for (BaseCommand *base : sectionCommands) { 11320b57cec5SDimitry Andric if (auto *sec = dyn_cast<OutputSection>(base)) { 11330b57cec5SDimitry Andric if (!sec->lmaRegionName.empty()) { 11340b57cec5SDimitry Andric if (MemoryRegion *m = memoryRegions.lookup(sec->lmaRegionName)) 11350b57cec5SDimitry Andric sec->lmaRegion = m; 11360b57cec5SDimitry Andric else 11370b57cec5SDimitry Andric error("memory region '" + sec->lmaRegionName + "' not declared"); 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric sec->memRegion = findMemoryRegion(sec); 11400b57cec5SDimitry Andric } 11410b57cec5SDimitry Andric } 11420b57cec5SDimitry Andric 11430b57cec5SDimitry Andric // If output section command doesn't specify any segments, 11440b57cec5SDimitry Andric // and we haven't previously assigned any section to segment, 11450b57cec5SDimitry Andric // then we simply assign section to the very first load segment. 11460b57cec5SDimitry Andric // Below is an example of such linker script: 11470b57cec5SDimitry Andric // PHDRS { seg PT_LOAD; } 11480b57cec5SDimitry Andric // SECTIONS { .aaa : { *(.aaa) } } 11490b57cec5SDimitry Andric std::vector<StringRef> defPhdrs; 11500b57cec5SDimitry Andric auto firstPtLoad = llvm::find_if(phdrsCommands, [](const PhdrsCommand &cmd) { 11510b57cec5SDimitry Andric return cmd.type == PT_LOAD; 11520b57cec5SDimitry Andric }); 11530b57cec5SDimitry Andric if (firstPtLoad != phdrsCommands.end()) 11540b57cec5SDimitry Andric defPhdrs.push_back(firstPtLoad->name); 11550b57cec5SDimitry Andric 11560b57cec5SDimitry Andric // Walk the commands and propagate the program headers to commands that don't 11570b57cec5SDimitry Andric // explicitly specify them. 1158e8d8bef9SDimitry Andric for (BaseCommand *base : sectionCommands) 1159e8d8bef9SDimitry Andric if (auto *sec = dyn_cast<OutputSection>(base)) 1160e8d8bef9SDimitry Andric maybePropagatePhdrs(*sec, defPhdrs); 11610b57cec5SDimitry Andric } 11620b57cec5SDimitry Andric 11630b57cec5SDimitry Andric static uint64_t computeBase(uint64_t min, bool allocateHeaders) { 11640b57cec5SDimitry Andric // If there is no SECTIONS or if the linkerscript is explicit about program 11650b57cec5SDimitry Andric // headers, do our best to allocate them. 11660b57cec5SDimitry Andric if (!script->hasSectionsCommand || allocateHeaders) 11670b57cec5SDimitry Andric return 0; 11680b57cec5SDimitry Andric // Otherwise only allocate program headers if that would not add a page. 11690b57cec5SDimitry Andric return alignDown(min, config->maxPageSize); 11700b57cec5SDimitry Andric } 11710b57cec5SDimitry Andric 117269660011SDimitry Andric // When the SECTIONS command is used, try to find an address for the file and 117369660011SDimitry Andric // program headers output sections, which can be added to the first PT_LOAD 117469660011SDimitry Andric // segment when program headers are created. 11750b57cec5SDimitry Andric // 117669660011SDimitry Andric // We check if the headers fit below the first allocated section. If there isn't 117769660011SDimitry Andric // enough space for these sections, we'll remove them from the PT_LOAD segment, 117869660011SDimitry Andric // and we'll also remove the PT_PHDR segment. 11790b57cec5SDimitry Andric void LinkerScript::allocateHeaders(std::vector<PhdrEntry *> &phdrs) { 11800b57cec5SDimitry Andric uint64_t min = std::numeric_limits<uint64_t>::max(); 11810b57cec5SDimitry Andric for (OutputSection *sec : outputSections) 11820b57cec5SDimitry Andric if (sec->flags & SHF_ALLOC) 11830b57cec5SDimitry Andric min = std::min<uint64_t>(min, sec->addr); 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric auto it = llvm::find_if( 11860b57cec5SDimitry Andric phdrs, [](const PhdrEntry *e) { return e->p_type == PT_LOAD; }); 11870b57cec5SDimitry Andric if (it == phdrs.end()) 11880b57cec5SDimitry Andric return; 11890b57cec5SDimitry Andric PhdrEntry *firstPTLoad = *it; 11900b57cec5SDimitry Andric 11910b57cec5SDimitry Andric bool hasExplicitHeaders = 11920b57cec5SDimitry Andric llvm::any_of(phdrsCommands, [](const PhdrsCommand &cmd) { 11930b57cec5SDimitry Andric return cmd.hasPhdrs || cmd.hasFilehdr; 11940b57cec5SDimitry Andric }); 11950b57cec5SDimitry Andric bool paged = !config->omagic && !config->nmagic; 11960b57cec5SDimitry Andric uint64_t headerSize = getHeaderSize(); 11970b57cec5SDimitry Andric if ((paged || hasExplicitHeaders) && 11980b57cec5SDimitry Andric headerSize <= min - computeBase(min, hasExplicitHeaders)) { 11990b57cec5SDimitry Andric min = alignDown(min - headerSize, config->maxPageSize); 12000b57cec5SDimitry Andric Out::elfHeader->addr = min; 12010b57cec5SDimitry Andric Out::programHeaders->addr = min + Out::elfHeader->size; 12020b57cec5SDimitry Andric return; 12030b57cec5SDimitry Andric } 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric // Error if we were explicitly asked to allocate headers. 12060b57cec5SDimitry Andric if (hasExplicitHeaders) 12070b57cec5SDimitry Andric error("could not allocate headers"); 12080b57cec5SDimitry Andric 12090b57cec5SDimitry Andric Out::elfHeader->ptLoad = nullptr; 12100b57cec5SDimitry Andric Out::programHeaders->ptLoad = nullptr; 12110b57cec5SDimitry Andric firstPTLoad->firstSec = findFirstSection(firstPTLoad); 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric llvm::erase_if(phdrs, 12140b57cec5SDimitry Andric [](const PhdrEntry *e) { return e->p_type == PT_PHDR; }); 12150b57cec5SDimitry Andric } 12160b57cec5SDimitry Andric 12170b57cec5SDimitry Andric LinkerScript::AddressState::AddressState() { 12180b57cec5SDimitry Andric for (auto &mri : script->memoryRegions) { 12190b57cec5SDimitry Andric MemoryRegion *mr = mri.second; 12205ffd83dbSDimitry Andric mr->curPos = (mr->origin)().getValue(); 12210b57cec5SDimitry Andric } 12220b57cec5SDimitry Andric } 12230b57cec5SDimitry Andric 12240b57cec5SDimitry Andric // Here we assign addresses as instructed by linker script SECTIONS 12250b57cec5SDimitry Andric // sub-commands. Doing that allows us to use final VA values, so here 12260b57cec5SDimitry Andric // we also handle rest commands like symbol assignments and ASSERTs. 122785868e8aSDimitry Andric // Returns a symbol that has changed its section or value, or nullptr if no 122885868e8aSDimitry Andric // symbol has changed. 122985868e8aSDimitry Andric const Defined *LinkerScript::assignAddresses() { 123069660011SDimitry Andric if (script->hasSectionsCommand) { 123169660011SDimitry Andric // With a linker script, assignment of addresses to headers is covered by 123269660011SDimitry Andric // allocateHeaders(). 123369660011SDimitry Andric dot = config->imageBase.getValueOr(0); 123469660011SDimitry Andric } else { 123569660011SDimitry Andric // Assign addresses to headers right now. 123669660011SDimitry Andric dot = target->getImageBase(); 123769660011SDimitry Andric Out::elfHeader->addr = dot; 123869660011SDimitry Andric Out::programHeaders->addr = dot + Out::elfHeader->size; 123969660011SDimitry Andric dot += getHeaderSize(); 124069660011SDimitry Andric } 12410b57cec5SDimitry Andric 124285868e8aSDimitry Andric auto deleter = std::make_unique<AddressState>(); 12430b57cec5SDimitry Andric ctx = deleter.get(); 12440b57cec5SDimitry Andric errorOnMissingSection = true; 12450b57cec5SDimitry Andric switchTo(aether); 12460b57cec5SDimitry Andric 124785868e8aSDimitry Andric SymbolAssignmentMap oldValues = getSymbolAssignmentValues(sectionCommands); 12480b57cec5SDimitry Andric for (BaseCommand *base : sectionCommands) { 12490b57cec5SDimitry Andric if (auto *cmd = dyn_cast<SymbolAssignment>(base)) { 12500b57cec5SDimitry Andric cmd->addr = dot; 12510b57cec5SDimitry Andric assignSymbol(cmd, false); 12520b57cec5SDimitry Andric cmd->size = dot - cmd->addr; 12530b57cec5SDimitry Andric continue; 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric assignOffsets(cast<OutputSection>(base)); 12560b57cec5SDimitry Andric } 125785868e8aSDimitry Andric 12580b57cec5SDimitry Andric ctx = nullptr; 125985868e8aSDimitry Andric return getChangedSymbolAssignment(oldValues); 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric 12620b57cec5SDimitry Andric // Creates program headers as instructed by PHDRS linker script command. 12630b57cec5SDimitry Andric std::vector<PhdrEntry *> LinkerScript::createPhdrs() { 12640b57cec5SDimitry Andric std::vector<PhdrEntry *> ret; 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric // Process PHDRS and FILEHDR keywords because they are not 12670b57cec5SDimitry Andric // real output sections and cannot be added in the following loop. 12680b57cec5SDimitry Andric for (const PhdrsCommand &cmd : phdrsCommands) { 12690b57cec5SDimitry Andric PhdrEntry *phdr = make<PhdrEntry>(cmd.type, cmd.flags ? *cmd.flags : PF_R); 12700b57cec5SDimitry Andric 12710b57cec5SDimitry Andric if (cmd.hasFilehdr) 12720b57cec5SDimitry Andric phdr->add(Out::elfHeader); 12730b57cec5SDimitry Andric if (cmd.hasPhdrs) 12740b57cec5SDimitry Andric phdr->add(Out::programHeaders); 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric if (cmd.lmaExpr) { 12770b57cec5SDimitry Andric phdr->p_paddr = cmd.lmaExpr().getValue(); 12780b57cec5SDimitry Andric phdr->hasLMA = true; 12790b57cec5SDimitry Andric } 12800b57cec5SDimitry Andric ret.push_back(phdr); 12810b57cec5SDimitry Andric } 12820b57cec5SDimitry Andric 12830b57cec5SDimitry Andric // Add output sections to program headers. 12840b57cec5SDimitry Andric for (OutputSection *sec : outputSections) { 12850b57cec5SDimitry Andric // Assign headers specified by linker script 12860b57cec5SDimitry Andric for (size_t id : getPhdrIndices(sec)) { 12870b57cec5SDimitry Andric ret[id]->add(sec); 12880b57cec5SDimitry Andric if (!phdrsCommands[id].flags.hasValue()) 12890b57cec5SDimitry Andric ret[id]->p_flags |= sec->getPhdrFlags(); 12900b57cec5SDimitry Andric } 12910b57cec5SDimitry Andric } 12920b57cec5SDimitry Andric return ret; 12930b57cec5SDimitry Andric } 12940b57cec5SDimitry Andric 12950b57cec5SDimitry Andric // Returns true if we should emit an .interp section. 12960b57cec5SDimitry Andric // 12970b57cec5SDimitry Andric // We usually do. But if PHDRS commands are given, and 12980b57cec5SDimitry Andric // no PT_INTERP is there, there's no place to emit an 12990b57cec5SDimitry Andric // .interp, so we don't do that in that case. 13000b57cec5SDimitry Andric bool LinkerScript::needsInterpSection() { 13010b57cec5SDimitry Andric if (phdrsCommands.empty()) 13020b57cec5SDimitry Andric return true; 13030b57cec5SDimitry Andric for (PhdrsCommand &cmd : phdrsCommands) 13040b57cec5SDimitry Andric if (cmd.type == PT_INTERP) 13050b57cec5SDimitry Andric return true; 13060b57cec5SDimitry Andric return false; 13070b57cec5SDimitry Andric } 13080b57cec5SDimitry Andric 13090b57cec5SDimitry Andric ExprValue LinkerScript::getSymbolValue(StringRef name, const Twine &loc) { 13100b57cec5SDimitry Andric if (name == ".") { 13110b57cec5SDimitry Andric if (ctx) 13120b57cec5SDimitry Andric return {ctx->outSec, false, dot - ctx->outSec->addr, loc}; 13130b57cec5SDimitry Andric error(loc + ": unable to get location counter value"); 13140b57cec5SDimitry Andric return 0; 13150b57cec5SDimitry Andric } 13160b57cec5SDimitry Andric 13170b57cec5SDimitry Andric if (Symbol *sym = symtab->find(name)) { 131816d6b3b3SDimitry Andric if (auto *ds = dyn_cast<Defined>(sym)) { 131916d6b3b3SDimitry Andric ExprValue v{ds->section, false, ds->value, loc}; 132016d6b3b3SDimitry Andric // Retain the original st_type, so that the alias will get the same 132116d6b3b3SDimitry Andric // behavior in relocation processing. Any operation will reset st_type to 132216d6b3b3SDimitry Andric // STT_NOTYPE. 132316d6b3b3SDimitry Andric v.type = ds->type; 132416d6b3b3SDimitry Andric return v; 132516d6b3b3SDimitry Andric } 13260b57cec5SDimitry Andric if (isa<SharedSymbol>(sym)) 13270b57cec5SDimitry Andric if (!errorOnMissingSection) 13280b57cec5SDimitry Andric return {nullptr, false, 0, loc}; 13290b57cec5SDimitry Andric } 13300b57cec5SDimitry Andric 13310b57cec5SDimitry Andric error(loc + ": symbol not found: " + name); 13320b57cec5SDimitry Andric return 0; 13330b57cec5SDimitry Andric } 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric // Returns the index of the segment named Name. 13360b57cec5SDimitry Andric static Optional<size_t> getPhdrIndex(ArrayRef<PhdrsCommand> vec, 13370b57cec5SDimitry Andric StringRef name) { 13380b57cec5SDimitry Andric for (size_t i = 0; i < vec.size(); ++i) 13390b57cec5SDimitry Andric if (vec[i].name == name) 13400b57cec5SDimitry Andric return i; 13410b57cec5SDimitry Andric return None; 13420b57cec5SDimitry Andric } 13430b57cec5SDimitry Andric 13440b57cec5SDimitry Andric // Returns indices of ELF headers containing specific section. Each index is a 13450b57cec5SDimitry Andric // zero based number of ELF header listed within PHDRS {} script block. 13460b57cec5SDimitry Andric std::vector<size_t> LinkerScript::getPhdrIndices(OutputSection *cmd) { 13470b57cec5SDimitry Andric std::vector<size_t> ret; 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric for (StringRef s : cmd->phdrs) { 13500b57cec5SDimitry Andric if (Optional<size_t> idx = getPhdrIndex(phdrsCommands, s)) 13510b57cec5SDimitry Andric ret.push_back(*idx); 13520b57cec5SDimitry Andric else if (s != "NONE") 13535ffd83dbSDimitry Andric error(cmd->location + ": program header '" + s + 13540b57cec5SDimitry Andric "' is not listed in PHDRS"); 13550b57cec5SDimitry Andric } 13560b57cec5SDimitry Andric return ret; 13570b57cec5SDimitry Andric } 1358