1 //===- ScriptParser.cpp ---------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a recursive-descendent parser for linker scripts. 10 // Parsed results are stored to Config and Script global objects. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ScriptParser.h" 15 #include "Config.h" 16 #include "Driver.h" 17 #include "InputFiles.h" 18 #include "LinkerScript.h" 19 #include "OutputSections.h" 20 #include "ScriptLexer.h" 21 #include "SymbolTable.h" 22 #include "Symbols.h" 23 #include "Target.h" 24 #include "lld/Common/CommonLinkerContext.h" 25 #include "llvm/ADT/SmallString.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/StringSet.h" 28 #include "llvm/ADT/StringSwitch.h" 29 #include "llvm/BinaryFormat/ELF.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/FileSystem.h" 33 #include "llvm/Support/MathExtras.h" 34 #include "llvm/Support/Path.h" 35 #include "llvm/Support/SaveAndRestore.h" 36 #include "llvm/Support/TimeProfiler.h" 37 #include <cassert> 38 #include <limits> 39 #include <optional> 40 #include <vector> 41 42 using namespace llvm; 43 using namespace llvm::ELF; 44 using namespace llvm::support::endian; 45 using namespace lld; 46 using namespace lld::elf; 47 48 namespace { 49 class ScriptParser final : ScriptLexer { 50 public: 51 ScriptParser(MemoryBufferRef mb) : ScriptLexer(mb) { 52 // Initialize IsUnderSysroot 53 if (config->sysroot == "") 54 return; 55 StringRef path = mb.getBufferIdentifier(); 56 for (; !path.empty(); path = sys::path::parent_path(path)) { 57 if (!sys::fs::equivalent(config->sysroot, path)) 58 continue; 59 isUnderSysroot = true; 60 return; 61 } 62 } 63 64 void readLinkerScript(); 65 void readVersionScript(); 66 void readDynamicList(); 67 void readDefsym(StringRef name); 68 69 private: 70 void addFile(StringRef path); 71 72 void readAsNeeded(); 73 void readEntry(); 74 void readExtern(); 75 void readGroup(); 76 void readInclude(); 77 void readInput(); 78 void readMemory(); 79 void readOutput(); 80 void readOutputArch(); 81 void readOutputFormat(); 82 void readOverwriteSections(); 83 void readPhdrs(); 84 void readRegionAlias(); 85 void readSearchDir(); 86 void readSections(); 87 void readTarget(); 88 void readVersion(); 89 void readVersionScriptCommand(); 90 void readNoCrossRefs(bool to); 91 92 SymbolAssignment *readSymbolAssignment(StringRef name); 93 ByteCommand *readByteCommand(StringRef tok); 94 std::array<uint8_t, 4> readFill(); 95 bool readSectionDirective(OutputSection *cmd, StringRef tok); 96 void readSectionAddressType(OutputSection *cmd); 97 OutputDesc *readOverlaySectionDescription(); 98 OutputDesc *readOutputSectionDescription(StringRef outSec); 99 SmallVector<SectionCommand *, 0> readOverlay(); 100 SmallVector<StringRef, 0> readOutputSectionPhdrs(); 101 std::pair<uint64_t, uint64_t> readInputSectionFlags(); 102 InputSectionDescription *readInputSectionDescription(StringRef tok); 103 StringMatcher readFilePatterns(); 104 SmallVector<SectionPattern, 0> readInputSectionsList(); 105 InputSectionDescription *readInputSectionRules(StringRef filePattern, 106 uint64_t withFlags, 107 uint64_t withoutFlags); 108 unsigned readPhdrType(); 109 SortSectionPolicy peekSortKind(); 110 SortSectionPolicy readSortKind(); 111 SymbolAssignment *readProvideHidden(bool provide, bool hidden); 112 SymbolAssignment *readAssignment(StringRef tok); 113 void readSort(); 114 Expr readAssert(); 115 Expr readConstant(); 116 Expr getPageSize(); 117 118 Expr readMemoryAssignment(StringRef, StringRef, StringRef); 119 void readMemoryAttributes(uint32_t &flags, uint32_t &invFlags, 120 uint32_t &negFlags, uint32_t &negInvFlags); 121 122 Expr combine(StringRef op, Expr l, Expr r); 123 Expr readExpr(); 124 Expr readExpr1(Expr lhs, int minPrec); 125 StringRef readParenLiteral(); 126 Expr readPrimary(); 127 Expr readTernary(Expr cond); 128 Expr readParenExpr(); 129 130 // For parsing version script. 131 SmallVector<SymbolVersion, 0> readVersionExtern(); 132 void readAnonymousDeclaration(); 133 void readVersionDeclaration(StringRef verStr); 134 135 std::pair<SmallVector<SymbolVersion, 0>, SmallVector<SymbolVersion, 0>> 136 readSymbols(); 137 138 // True if a script being read is in the --sysroot directory. 139 bool isUnderSysroot = false; 140 141 // A set to detect an INCLUDE() cycle. 142 StringSet<> seen; 143 144 // If we are currently parsing a PROVIDE|PROVIDE_HIDDEN command, 145 // then this member is set to the PROVIDE symbol name. 146 std::optional<llvm::StringRef> activeProvideSym; 147 }; 148 } // namespace 149 150 static StringRef unquote(StringRef s) { 151 if (s.starts_with("\"")) 152 return s.substr(1, s.size() - 2); 153 return s; 154 } 155 156 // Some operations only support one non absolute value. Move the 157 // absolute one to the right hand side for convenience. 158 static void moveAbsRight(ExprValue &a, ExprValue &b) { 159 if (a.sec == nullptr || (a.forceAbsolute && !b.isAbsolute())) 160 std::swap(a, b); 161 if (!b.isAbsolute()) 162 script->recordError( 163 a.loc + ": at least one side of the expression must be absolute"); 164 } 165 166 static ExprValue add(ExprValue a, ExprValue b) { 167 moveAbsRight(a, b); 168 return {a.sec, a.forceAbsolute, a.getSectionOffset() + b.getValue(), a.loc}; 169 } 170 171 static ExprValue sub(ExprValue a, ExprValue b) { 172 // The distance between two symbols in sections is absolute. 173 if (!a.isAbsolute() && !b.isAbsolute()) 174 return a.getValue() - b.getValue(); 175 return {a.sec, false, a.getSectionOffset() - b.getValue(), a.loc}; 176 } 177 178 static ExprValue bitAnd(ExprValue a, ExprValue b) { 179 moveAbsRight(a, b); 180 return {a.sec, a.forceAbsolute, 181 (a.getValue() & b.getValue()) - a.getSecAddr(), a.loc}; 182 } 183 184 static ExprValue bitXor(ExprValue a, ExprValue b) { 185 moveAbsRight(a, b); 186 return {a.sec, a.forceAbsolute, 187 (a.getValue() ^ b.getValue()) - a.getSecAddr(), a.loc}; 188 } 189 190 static ExprValue bitOr(ExprValue a, ExprValue b) { 191 moveAbsRight(a, b); 192 return {a.sec, a.forceAbsolute, 193 (a.getValue() | b.getValue()) - a.getSecAddr(), a.loc}; 194 } 195 196 void ScriptParser::readDynamicList() { 197 expect("{"); 198 SmallVector<SymbolVersion, 0> locals; 199 SmallVector<SymbolVersion, 0> globals; 200 std::tie(locals, globals) = readSymbols(); 201 expect(";"); 202 203 if (!atEOF()) { 204 setError("EOF expected, but got " + next()); 205 return; 206 } 207 if (!locals.empty()) { 208 setError("\"local:\" scope not supported in --dynamic-list"); 209 return; 210 } 211 212 for (SymbolVersion v : globals) 213 config->dynamicList.push_back(v); 214 } 215 216 void ScriptParser::readVersionScript() { 217 readVersionScriptCommand(); 218 if (!atEOF()) 219 setError("EOF expected, but got " + next()); 220 } 221 222 void ScriptParser::readVersionScriptCommand() { 223 if (consume("{")) { 224 readAnonymousDeclaration(); 225 return; 226 } 227 228 while (!atEOF() && !errorCount() && peek() != "}") { 229 StringRef verStr = next(); 230 if (verStr == "{") { 231 setError("anonymous version definition is used in " 232 "combination with other version definitions"); 233 return; 234 } 235 expect("{"); 236 readVersionDeclaration(verStr); 237 } 238 } 239 240 void ScriptParser::readVersion() { 241 expect("{"); 242 readVersionScriptCommand(); 243 expect("}"); 244 } 245 246 void ScriptParser::readLinkerScript() { 247 while (!atEOF()) { 248 StringRef tok = next(); 249 if (tok == ";") 250 continue; 251 252 if (tok == "ENTRY") { 253 readEntry(); 254 } else if (tok == "EXTERN") { 255 readExtern(); 256 } else if (tok == "GROUP") { 257 readGroup(); 258 } else if (tok == "INCLUDE") { 259 readInclude(); 260 } else if (tok == "INPUT") { 261 readInput(); 262 } else if (tok == "MEMORY") { 263 readMemory(); 264 } else if (tok == "OUTPUT") { 265 readOutput(); 266 } else if (tok == "OUTPUT_ARCH") { 267 readOutputArch(); 268 } else if (tok == "OUTPUT_FORMAT") { 269 readOutputFormat(); 270 } else if (tok == "OVERWRITE_SECTIONS") { 271 readOverwriteSections(); 272 } else if (tok == "PHDRS") { 273 readPhdrs(); 274 } else if (tok == "REGION_ALIAS") { 275 readRegionAlias(); 276 } else if (tok == "SEARCH_DIR") { 277 readSearchDir(); 278 } else if (tok == "SECTIONS") { 279 readSections(); 280 } else if (tok == "TARGET") { 281 readTarget(); 282 } else if (tok == "VERSION") { 283 readVersion(); 284 } else if (tok == "NOCROSSREFS") { 285 readNoCrossRefs(/*to=*/false); 286 } else if (tok == "NOCROSSREFS_TO") { 287 readNoCrossRefs(/*to=*/true); 288 } else if (SymbolAssignment *cmd = readAssignment(tok)) { 289 script->sectionCommands.push_back(cmd); 290 } else { 291 setError("unknown directive: " + tok); 292 } 293 } 294 } 295 296 void ScriptParser::readDefsym(StringRef name) { 297 if (errorCount()) 298 return; 299 Expr e = readExpr(); 300 if (!atEOF()) 301 setError("EOF expected, but got " + next()); 302 auto *cmd = make<SymbolAssignment>( 303 name, e, 0, getCurrentMB().getBufferIdentifier().str()); 304 script->sectionCommands.push_back(cmd); 305 } 306 307 void ScriptParser::readNoCrossRefs(bool to) { 308 expect("("); 309 NoCrossRefCommand cmd{{}, to}; 310 while (!errorCount() && !consume(")")) 311 cmd.outputSections.push_back(unquote(next())); 312 if (cmd.outputSections.size() < 2) 313 warn(getCurrentLocation() + ": ignored with fewer than 2 output sections"); 314 else 315 script->noCrossRefs.push_back(std::move(cmd)); 316 } 317 318 void ScriptParser::addFile(StringRef s) { 319 if (isUnderSysroot && s.starts_with("/")) { 320 SmallString<128> pathData; 321 StringRef path = (config->sysroot + s).toStringRef(pathData); 322 if (sys::fs::exists(path)) 323 ctx.driver.addFile(saver().save(path), /*withLOption=*/false); 324 else 325 setError("cannot find " + s + " inside " + config->sysroot); 326 return; 327 } 328 329 if (s.starts_with("/")) { 330 // Case 1: s is an absolute path. Just open it. 331 ctx.driver.addFile(s, /*withLOption=*/false); 332 } else if (s.starts_with("=")) { 333 // Case 2: relative to the sysroot. 334 if (config->sysroot.empty()) 335 ctx.driver.addFile(s.substr(1), /*withLOption=*/false); 336 else 337 ctx.driver.addFile(saver().save(config->sysroot + "/" + s.substr(1)), 338 /*withLOption=*/false); 339 } else if (s.starts_with("-l")) { 340 // Case 3: search in the list of library paths. 341 ctx.driver.addLibrary(s.substr(2)); 342 } else { 343 // Case 4: s is a relative path. Search in the directory of the script file. 344 std::string filename = std::string(getCurrentMB().getBufferIdentifier()); 345 StringRef directory = sys::path::parent_path(filename); 346 if (!directory.empty()) { 347 SmallString<0> path(directory); 348 sys::path::append(path, s); 349 if (sys::fs::exists(path)) { 350 ctx.driver.addFile(path, /*withLOption=*/false); 351 return; 352 } 353 } 354 // Then search in the current working directory. 355 if (sys::fs::exists(s)) { 356 ctx.driver.addFile(s, /*withLOption=*/false); 357 } else { 358 // Finally, search in the list of library paths. 359 if (std::optional<std::string> path = findFromSearchPaths(s)) 360 ctx.driver.addFile(saver().save(*path), /*withLOption=*/true); 361 else 362 setError("unable to find " + s); 363 } 364 } 365 } 366 367 void ScriptParser::readAsNeeded() { 368 expect("("); 369 bool orig = config->asNeeded; 370 config->asNeeded = true; 371 while (!errorCount() && !consume(")")) 372 addFile(unquote(next())); 373 config->asNeeded = orig; 374 } 375 376 void ScriptParser::readEntry() { 377 // -e <symbol> takes predecence over ENTRY(<symbol>). 378 expect("("); 379 StringRef tok = next(); 380 if (config->entry.empty()) 381 config->entry = unquote(tok); 382 expect(")"); 383 } 384 385 void ScriptParser::readExtern() { 386 expect("("); 387 while (!errorCount() && !consume(")")) 388 config->undefined.push_back(unquote(next())); 389 } 390 391 void ScriptParser::readGroup() { 392 bool orig = InputFile::isInGroup; 393 InputFile::isInGroup = true; 394 readInput(); 395 InputFile::isInGroup = orig; 396 if (!orig) 397 ++InputFile::nextGroupId; 398 } 399 400 void ScriptParser::readInclude() { 401 StringRef tok = unquote(next()); 402 403 if (!seen.insert(tok).second) { 404 setError("there is a cycle in linker script INCLUDEs"); 405 return; 406 } 407 408 if (std::optional<std::string> path = searchScript(tok)) { 409 if (std::optional<MemoryBufferRef> mb = readFile(*path)) 410 tokenize(*mb); 411 return; 412 } 413 setError("cannot find linker script " + tok); 414 } 415 416 void ScriptParser::readInput() { 417 expect("("); 418 while (!errorCount() && !consume(")")) { 419 if (consume("AS_NEEDED")) 420 readAsNeeded(); 421 else 422 addFile(unquote(next())); 423 } 424 } 425 426 void ScriptParser::readOutput() { 427 // -o <file> takes predecence over OUTPUT(<file>). 428 expect("("); 429 StringRef tok = next(); 430 if (config->outputFile.empty()) 431 config->outputFile = unquote(tok); 432 expect(")"); 433 } 434 435 void ScriptParser::readOutputArch() { 436 // OUTPUT_ARCH is ignored for now. 437 expect("("); 438 while (!errorCount() && !consume(")")) 439 skip(); 440 } 441 442 static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) { 443 return StringSwitch<std::pair<ELFKind, uint16_t>>(s) 444 .Case("elf32-i386", {ELF32LEKind, EM_386}) 445 .Case("elf32-avr", {ELF32LEKind, EM_AVR}) 446 .Case("elf32-iamcu", {ELF32LEKind, EM_IAMCU}) 447 .Case("elf32-littlearm", {ELF32LEKind, EM_ARM}) 448 .Case("elf32-bigarm", {ELF32BEKind, EM_ARM}) 449 .Case("elf32-x86-64", {ELF32LEKind, EM_X86_64}) 450 .Case("elf64-aarch64", {ELF64LEKind, EM_AARCH64}) 451 .Case("elf64-littleaarch64", {ELF64LEKind, EM_AARCH64}) 452 .Case("elf64-bigaarch64", {ELF64BEKind, EM_AARCH64}) 453 .Case("elf32-powerpc", {ELF32BEKind, EM_PPC}) 454 .Case("elf32-powerpcle", {ELF32LEKind, EM_PPC}) 455 .Case("elf64-powerpc", {ELF64BEKind, EM_PPC64}) 456 .Case("elf64-powerpcle", {ELF64LEKind, EM_PPC64}) 457 .Case("elf64-x86-64", {ELF64LEKind, EM_X86_64}) 458 .Cases("elf32-tradbigmips", "elf32-bigmips", {ELF32BEKind, EM_MIPS}) 459 .Case("elf32-ntradbigmips", {ELF32BEKind, EM_MIPS}) 460 .Case("elf32-tradlittlemips", {ELF32LEKind, EM_MIPS}) 461 .Case("elf32-ntradlittlemips", {ELF32LEKind, EM_MIPS}) 462 .Case("elf64-tradbigmips", {ELF64BEKind, EM_MIPS}) 463 .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS}) 464 .Case("elf32-littleriscv", {ELF32LEKind, EM_RISCV}) 465 .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV}) 466 .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9}) 467 .Case("elf32-msp430", {ELF32LEKind, EM_MSP430}) 468 .Case("elf32-loongarch", {ELF32LEKind, EM_LOONGARCH}) 469 .Case("elf64-loongarch", {ELF64LEKind, EM_LOONGARCH}) 470 .Case("elf64-s390", {ELF64BEKind, EM_S390}) 471 .Cases("elf32-hexagon", "elf32-littlehexagon", {ELF32LEKind, EM_HEXAGON}) 472 .Default({ELFNoneKind, EM_NONE}); 473 } 474 475 // Parse OUTPUT_FORMAT(bfdname) or OUTPUT_FORMAT(default, big, little). Choose 476 // big if -EB is specified, little if -EL is specified, or default if neither is 477 // specified. 478 void ScriptParser::readOutputFormat() { 479 expect("("); 480 481 StringRef s = unquote(next()); 482 if (!consume(")")) { 483 expect(","); 484 StringRef tmp = unquote(next()); 485 if (config->optEB) 486 s = tmp; 487 expect(","); 488 tmp = unquote(next()); 489 if (config->optEL) 490 s = tmp; 491 consume(")"); 492 } 493 // If more than one OUTPUT_FORMAT is specified, only the first is checked. 494 if (!config->bfdname.empty()) 495 return; 496 config->bfdname = s; 497 498 if (s == "binary") { 499 config->oFormatBinary = true; 500 return; 501 } 502 503 if (s.consume_back("-freebsd")) 504 config->osabi = ELFOSABI_FREEBSD; 505 506 std::tie(config->ekind, config->emachine) = parseBfdName(s); 507 if (config->emachine == EM_NONE) 508 setError("unknown output format name: " + config->bfdname); 509 if (s == "elf32-ntradlittlemips" || s == "elf32-ntradbigmips") 510 config->mipsN32Abi = true; 511 if (config->emachine == EM_MSP430) 512 config->osabi = ELFOSABI_STANDALONE; 513 } 514 515 void ScriptParser::readPhdrs() { 516 expect("{"); 517 518 while (!errorCount() && !consume("}")) { 519 PhdrsCommand cmd; 520 cmd.name = next(); 521 cmd.type = readPhdrType(); 522 523 while (!errorCount() && !consume(";")) { 524 if (consume("FILEHDR")) 525 cmd.hasFilehdr = true; 526 else if (consume("PHDRS")) 527 cmd.hasPhdrs = true; 528 else if (consume("AT")) 529 cmd.lmaExpr = readParenExpr(); 530 else if (consume("FLAGS")) 531 cmd.flags = readParenExpr()().getValue(); 532 else 533 setError("unexpected header attribute: " + next()); 534 } 535 536 script->phdrsCommands.push_back(cmd); 537 } 538 } 539 540 void ScriptParser::readRegionAlias() { 541 expect("("); 542 StringRef alias = unquote(next()); 543 expect(","); 544 StringRef name = next(); 545 expect(")"); 546 547 if (script->memoryRegions.count(alias)) 548 setError("redefinition of memory region '" + alias + "'"); 549 if (!script->memoryRegions.count(name)) 550 setError("memory region '" + name + "' is not defined"); 551 script->memoryRegions.insert({alias, script->memoryRegions[name]}); 552 } 553 554 void ScriptParser::readSearchDir() { 555 expect("("); 556 StringRef tok = next(); 557 if (!config->nostdlib) 558 config->searchPaths.push_back(unquote(tok)); 559 expect(")"); 560 } 561 562 // This reads an overlay description. Overlays are used to describe output 563 // sections that use the same virtual memory range and normally would trigger 564 // linker's sections sanity check failures. 565 // https://sourceware.org/binutils/docs/ld/Overlay-Description.html#Overlay-Description 566 SmallVector<SectionCommand *, 0> ScriptParser::readOverlay() { 567 Expr addrExpr; 568 if (consume(":")) { 569 addrExpr = [] { return script->getDot(); }; 570 } else { 571 addrExpr = readExpr(); 572 expect(":"); 573 } 574 // When AT is omitted, LMA should equal VMA. script->getDot() when evaluating 575 // lmaExpr will ensure this, even if the start address is specified. 576 Expr lmaExpr = 577 consume("AT") ? readParenExpr() : [] { return script->getDot(); }; 578 expect("{"); 579 580 SmallVector<SectionCommand *, 0> v; 581 OutputSection *prev = nullptr; 582 while (!errorCount() && !consume("}")) { 583 // VA is the same for all sections. The LMAs are consecutive in memory 584 // starting from the base load address specified. 585 OutputDesc *osd = readOverlaySectionDescription(); 586 osd->osec.addrExpr = addrExpr; 587 if (prev) { 588 osd->osec.lmaExpr = [=] { return prev->getLMA() + prev->size; }; 589 } else { 590 osd->osec.lmaExpr = lmaExpr; 591 // Use first section address for subsequent sections as initial addrExpr 592 // can be DOT. Ensure the first section, even if empty, is not discarded. 593 osd->osec.usedInExpression = true; 594 addrExpr = [=]() -> ExprValue { return {&osd->osec, false, 0, ""}; }; 595 } 596 v.push_back(osd); 597 prev = &osd->osec; 598 } 599 600 // According to the specification, at the end of the overlay, the location 601 // counter should be equal to the overlay base address plus size of the 602 // largest section seen in the overlay. 603 // Here we want to create the Dot assignment command to achieve that. 604 Expr moveDot = [=] { 605 uint64_t max = 0; 606 for (SectionCommand *cmd : v) 607 max = std::max(max, cast<OutputDesc>(cmd)->osec.size); 608 return addrExpr().getValue() + max; 609 }; 610 v.push_back(make<SymbolAssignment>(".", moveDot, 0, getCurrentLocation())); 611 return v; 612 } 613 614 void ScriptParser::readOverwriteSections() { 615 expect("{"); 616 while (!errorCount() && !consume("}")) 617 script->overwriteSections.push_back(readOutputSectionDescription(next())); 618 } 619 620 void ScriptParser::readSections() { 621 expect("{"); 622 SmallVector<SectionCommand *, 0> v; 623 while (!errorCount() && !consume("}")) { 624 StringRef tok = next(); 625 if (tok == "OVERLAY") { 626 for (SectionCommand *cmd : readOverlay()) 627 v.push_back(cmd); 628 continue; 629 } else if (tok == "INCLUDE") { 630 readInclude(); 631 continue; 632 } 633 634 if (SectionCommand *cmd = readAssignment(tok)) 635 v.push_back(cmd); 636 else 637 v.push_back(readOutputSectionDescription(tok)); 638 } 639 640 // If DATA_SEGMENT_RELRO_END is absent, for sections after DATA_SEGMENT_ALIGN, 641 // the relro fields should be cleared. 642 if (!script->seenRelroEnd) 643 for (SectionCommand *cmd : v) 644 if (auto *osd = dyn_cast<OutputDesc>(cmd)) 645 osd->osec.relro = false; 646 647 script->sectionCommands.insert(script->sectionCommands.end(), v.begin(), 648 v.end()); 649 650 if (atEOF() || !consume("INSERT")) { 651 script->hasSectionsCommand = true; 652 return; 653 } 654 655 bool isAfter = false; 656 if (consume("AFTER")) 657 isAfter = true; 658 else if (!consume("BEFORE")) 659 setError("expected AFTER/BEFORE, but got '" + next() + "'"); 660 StringRef where = next(); 661 SmallVector<StringRef, 0> names; 662 for (SectionCommand *cmd : v) 663 if (auto *os = dyn_cast<OutputDesc>(cmd)) 664 names.push_back(os->osec.name); 665 if (!names.empty()) 666 script->insertCommands.push_back({std::move(names), isAfter, where}); 667 } 668 669 void ScriptParser::readTarget() { 670 // TARGET(foo) is an alias for "--format foo". Unlike GNU linkers, 671 // we accept only a limited set of BFD names (i.e. "elf" or "binary") 672 // for --format. We recognize only /^elf/ and "binary" in the linker 673 // script as well. 674 expect("("); 675 StringRef tok = unquote(next()); 676 expect(")"); 677 678 if (tok.starts_with("elf")) 679 config->formatBinary = false; 680 else if (tok == "binary") 681 config->formatBinary = true; 682 else 683 setError("unknown target: " + tok); 684 } 685 686 static int precedence(StringRef op) { 687 return StringSwitch<int>(op) 688 .Cases("*", "/", "%", 11) 689 .Cases("+", "-", 10) 690 .Cases("<<", ">>", 9) 691 .Cases("<", "<=", ">", ">=", 8) 692 .Cases("==", "!=", 7) 693 .Case("&", 6) 694 .Case("^", 5) 695 .Case("|", 4) 696 .Case("&&", 3) 697 .Case("||", 2) 698 .Case("?", 1) 699 .Default(-1); 700 } 701 702 StringMatcher ScriptParser::readFilePatterns() { 703 StringMatcher Matcher; 704 705 while (!errorCount() && !consume(")")) 706 Matcher.addPattern(SingleStringMatcher(next())); 707 return Matcher; 708 } 709 710 SortSectionPolicy ScriptParser::peekSortKind() { 711 return StringSwitch<SortSectionPolicy>(peek()) 712 .Case("REVERSE", SortSectionPolicy::Reverse) 713 .Cases("SORT", "SORT_BY_NAME", SortSectionPolicy::Name) 714 .Case("SORT_BY_ALIGNMENT", SortSectionPolicy::Alignment) 715 .Case("SORT_BY_INIT_PRIORITY", SortSectionPolicy::Priority) 716 .Case("SORT_NONE", SortSectionPolicy::None) 717 .Default(SortSectionPolicy::Default); 718 } 719 720 SortSectionPolicy ScriptParser::readSortKind() { 721 SortSectionPolicy ret = peekSortKind(); 722 if (ret != SortSectionPolicy::Default) 723 skip(); 724 return ret; 725 } 726 727 // Reads SECTIONS command contents in the following form: 728 // 729 // <contents> ::= <elem>* 730 // <elem> ::= <exclude>? <glob-pattern> 731 // <exclude> ::= "EXCLUDE_FILE" "(" <glob-pattern>+ ")" 732 // 733 // For example, 734 // 735 // *(.foo EXCLUDE_FILE (a.o) .bar EXCLUDE_FILE (b.o) .baz) 736 // 737 // is parsed as ".foo", ".bar" with "a.o", and ".baz" with "b.o". 738 // The semantics of that is section .foo in any file, section .bar in 739 // any file but a.o, and section .baz in any file but b.o. 740 SmallVector<SectionPattern, 0> ScriptParser::readInputSectionsList() { 741 SmallVector<SectionPattern, 0> ret; 742 while (!errorCount() && peek() != ")") { 743 StringMatcher excludeFilePat; 744 if (consume("EXCLUDE_FILE")) { 745 expect("("); 746 excludeFilePat = readFilePatterns(); 747 } 748 749 StringMatcher SectionMatcher; 750 // Break if the next token is ), EXCLUDE_FILE, or SORT*. 751 while (!errorCount() && peekSortKind() == SortSectionPolicy::Default) { 752 StringRef s = peek(); 753 if (s == ")" || s == "EXCLUDE_FILE") 754 break; 755 // Detect common mistakes when certain non-wildcard meta characters are 756 // used without a closing ')'. 757 if (!s.empty() && strchr("(){}", s[0])) { 758 skip(); 759 setError("section pattern is expected"); 760 break; 761 } 762 SectionMatcher.addPattern(unquote(next())); 763 } 764 765 if (!SectionMatcher.empty()) 766 ret.push_back({std::move(excludeFilePat), std::move(SectionMatcher)}); 767 else if (excludeFilePat.empty()) 768 break; 769 else 770 setError("section pattern is expected"); 771 } 772 return ret; 773 } 774 775 // Reads contents of "SECTIONS" directive. That directive contains a 776 // list of glob patterns for input sections. The grammar is as follows. 777 // 778 // <patterns> ::= <section-list> 779 // | <sort> "(" <section-list> ")" 780 // | <sort> "(" <sort> "(" <section-list> ")" ")" 781 // 782 // <sort> ::= "SORT" | "SORT_BY_NAME" | "SORT_BY_ALIGNMENT" 783 // | "SORT_BY_INIT_PRIORITY" | "SORT_NONE" 784 // 785 // <section-list> is parsed by readInputSectionsList(). 786 InputSectionDescription * 787 ScriptParser::readInputSectionRules(StringRef filePattern, uint64_t withFlags, 788 uint64_t withoutFlags) { 789 auto *cmd = 790 make<InputSectionDescription>(filePattern, withFlags, withoutFlags); 791 expect("("); 792 793 while (!errorCount() && !consume(")")) { 794 SortSectionPolicy outer = readSortKind(); 795 SortSectionPolicy inner = SortSectionPolicy::Default; 796 SmallVector<SectionPattern, 0> v; 797 if (outer != SortSectionPolicy::Default) { 798 expect("("); 799 inner = readSortKind(); 800 if (inner != SortSectionPolicy::Default) { 801 expect("("); 802 v = readInputSectionsList(); 803 expect(")"); 804 } else { 805 v = readInputSectionsList(); 806 } 807 expect(")"); 808 } else { 809 v = readInputSectionsList(); 810 } 811 812 for (SectionPattern &pat : v) { 813 pat.sortInner = inner; 814 pat.sortOuter = outer; 815 } 816 817 std::move(v.begin(), v.end(), std::back_inserter(cmd->sectionPatterns)); 818 } 819 return cmd; 820 } 821 822 InputSectionDescription * 823 ScriptParser::readInputSectionDescription(StringRef tok) { 824 // Input section wildcard can be surrounded by KEEP. 825 // https://sourceware.org/binutils/docs/ld/Input-Section-Keep.html#Input-Section-Keep 826 uint64_t withFlags = 0; 827 uint64_t withoutFlags = 0; 828 if (tok == "KEEP") { 829 expect("("); 830 if (consume("INPUT_SECTION_FLAGS")) 831 std::tie(withFlags, withoutFlags) = readInputSectionFlags(); 832 InputSectionDescription *cmd = 833 readInputSectionRules(next(), withFlags, withoutFlags); 834 expect(")"); 835 script->keptSections.push_back(cmd); 836 return cmd; 837 } 838 if (tok == "INPUT_SECTION_FLAGS") { 839 std::tie(withFlags, withoutFlags) = readInputSectionFlags(); 840 tok = next(); 841 } 842 return readInputSectionRules(tok, withFlags, withoutFlags); 843 } 844 845 void ScriptParser::readSort() { 846 expect("("); 847 expect("CONSTRUCTORS"); 848 expect(")"); 849 } 850 851 Expr ScriptParser::readAssert() { 852 expect("("); 853 Expr e = readExpr(); 854 expect(","); 855 StringRef msg = unquote(next()); 856 expect(")"); 857 858 return [=] { 859 if (!e().getValue()) 860 errorOrWarn(msg); 861 return script->getDot(); 862 }; 863 } 864 865 #define ECase(X) \ 866 { #X, X } 867 constexpr std::pair<const char *, unsigned> typeMap[] = { 868 ECase(SHT_PROGBITS), ECase(SHT_NOTE), ECase(SHT_NOBITS), 869 ECase(SHT_INIT_ARRAY), ECase(SHT_FINI_ARRAY), ECase(SHT_PREINIT_ARRAY), 870 }; 871 #undef ECase 872 873 // Tries to read the special directive for an output section definition which 874 // can be one of following: "(NOLOAD)", "(COPY)", "(INFO)", "(OVERLAY)", and 875 // "(TYPE=<value>)". 876 bool ScriptParser::readSectionDirective(OutputSection *cmd, StringRef tok) { 877 if (tok != "NOLOAD" && tok != "COPY" && tok != "INFO" && tok != "OVERLAY" && 878 tok != "TYPE") 879 return false; 880 881 if (consume("NOLOAD")) { 882 cmd->type = SHT_NOBITS; 883 cmd->typeIsSet = true; 884 } else if (consume("TYPE")) { 885 expect("="); 886 StringRef value = peek(); 887 auto it = llvm::find_if(typeMap, [=](auto e) { return e.first == value; }); 888 if (it != std::end(typeMap)) { 889 // The value is a recognized literal SHT_*. 890 cmd->type = it->second; 891 skip(); 892 } else if (value.starts_with("SHT_")) { 893 setError("unknown section type " + value); 894 } else { 895 // Otherwise, read an expression. 896 cmd->type = readExpr()().getValue(); 897 } 898 cmd->typeIsSet = true; 899 } else { 900 skip(); // This is "COPY", "INFO" or "OVERLAY". 901 cmd->nonAlloc = true; 902 } 903 expect(")"); 904 return true; 905 } 906 907 // Reads an expression and/or the special directive for an output 908 // section definition. Directive is one of following: "(NOLOAD)", 909 // "(COPY)", "(INFO)" or "(OVERLAY)". 910 // 911 // An output section name can be followed by an address expression 912 // and/or directive. This grammar is not LL(1) because "(" can be 913 // interpreted as either the beginning of some expression or beginning 914 // of directive. 915 // 916 // https://sourceware.org/binutils/docs/ld/Output-Section-Address.html 917 // https://sourceware.org/binutils/docs/ld/Output-Section-Type.html 918 void ScriptParser::readSectionAddressType(OutputSection *cmd) { 919 if (consume("(")) { 920 // Temporarily set inExpr to support TYPE=<value> without spaces. 921 SaveAndRestore saved(inExpr, true); 922 if (readSectionDirective(cmd, peek())) 923 return; 924 cmd->addrExpr = readExpr(); 925 expect(")"); 926 } else { 927 cmd->addrExpr = readExpr(); 928 } 929 930 if (consume("(")) { 931 SaveAndRestore saved(inExpr, true); 932 StringRef tok = peek(); 933 if (!readSectionDirective(cmd, tok)) 934 setError("unknown section directive: " + tok); 935 } 936 } 937 938 static Expr checkAlignment(Expr e, std::string &loc) { 939 return [=] { 940 uint64_t alignment = std::max((uint64_t)1, e().getValue()); 941 if (!isPowerOf2_64(alignment)) { 942 error(loc + ": alignment must be power of 2"); 943 return (uint64_t)1; // Return a dummy value. 944 } 945 return alignment; 946 }; 947 } 948 949 OutputDesc *ScriptParser::readOverlaySectionDescription() { 950 OutputDesc *osd = script->createOutputSection(next(), getCurrentLocation()); 951 osd->osec.inOverlay = true; 952 expect("{"); 953 while (!errorCount() && !consume("}")) { 954 uint64_t withFlags = 0; 955 uint64_t withoutFlags = 0; 956 if (consume("INPUT_SECTION_FLAGS")) 957 std::tie(withFlags, withoutFlags) = readInputSectionFlags(); 958 osd->osec.commands.push_back( 959 readInputSectionRules(next(), withFlags, withoutFlags)); 960 } 961 osd->osec.phdrs = readOutputSectionPhdrs(); 962 return osd; 963 } 964 965 OutputDesc *ScriptParser::readOutputSectionDescription(StringRef outSec) { 966 OutputDesc *cmd = 967 script->createOutputSection(unquote(outSec), getCurrentLocation()); 968 OutputSection *osec = &cmd->osec; 969 // Maybe relro. Will reset to false if DATA_SEGMENT_RELRO_END is absent. 970 osec->relro = script->seenDataAlign && !script->seenRelroEnd; 971 972 size_t symbolsReferenced = script->referencedSymbols.size(); 973 974 if (peek() != ":") 975 readSectionAddressType(osec); 976 expect(":"); 977 978 std::string location = getCurrentLocation(); 979 if (consume("AT")) 980 osec->lmaExpr = readParenExpr(); 981 if (consume("ALIGN")) 982 osec->alignExpr = checkAlignment(readParenExpr(), location); 983 if (consume("SUBALIGN")) 984 osec->subalignExpr = checkAlignment(readParenExpr(), location); 985 986 // Parse constraints. 987 if (consume("ONLY_IF_RO")) 988 osec->constraint = ConstraintKind::ReadOnly; 989 if (consume("ONLY_IF_RW")) 990 osec->constraint = ConstraintKind::ReadWrite; 991 expect("{"); 992 993 while (!errorCount() && !consume("}")) { 994 StringRef tok = next(); 995 if (tok == ";") { 996 // Empty commands are allowed. Do nothing here. 997 } else if (SymbolAssignment *assign = readAssignment(tok)) { 998 osec->commands.push_back(assign); 999 } else if (ByteCommand *data = readByteCommand(tok)) { 1000 osec->commands.push_back(data); 1001 } else if (tok == "CONSTRUCTORS") { 1002 // CONSTRUCTORS is a keyword to make the linker recognize C++ ctors/dtors 1003 // by name. This is for very old file formats such as ECOFF/XCOFF. 1004 // For ELF, we should ignore. 1005 } else if (tok == "FILL") { 1006 // We handle the FILL command as an alias for =fillexp section attribute, 1007 // which is different from what GNU linkers do. 1008 // https://sourceware.org/binutils/docs/ld/Output-Section-Data.html 1009 if (peek() != "(") 1010 setError("( expected, but got " + peek()); 1011 osec->filler = readFill(); 1012 } else if (tok == "SORT") { 1013 readSort(); 1014 } else if (tok == "INCLUDE") { 1015 readInclude(); 1016 } else if (tok == "(" || tok == ")") { 1017 setError("expected filename pattern"); 1018 } else if (peek() == "(") { 1019 osec->commands.push_back(readInputSectionDescription(tok)); 1020 } else { 1021 // We have a file name and no input sections description. It is not a 1022 // commonly used syntax, but still acceptable. In that case, all sections 1023 // from the file will be included. 1024 // FIXME: GNU ld permits INPUT_SECTION_FLAGS to be used here. We do not 1025 // handle this case here as it will already have been matched by the 1026 // case above. 1027 auto *isd = make<InputSectionDescription>(tok); 1028 isd->sectionPatterns.push_back({{}, StringMatcher("*")}); 1029 osec->commands.push_back(isd); 1030 } 1031 } 1032 1033 if (consume(">")) 1034 osec->memoryRegionName = std::string(next()); 1035 1036 if (consume("AT")) { 1037 expect(">"); 1038 osec->lmaRegionName = std::string(next()); 1039 } 1040 1041 if (osec->lmaExpr && !osec->lmaRegionName.empty()) 1042 error("section can't have both LMA and a load region"); 1043 1044 osec->phdrs = readOutputSectionPhdrs(); 1045 1046 if (peek() == "=" || peek().starts_with("=")) { 1047 inExpr = true; 1048 consume("="); 1049 osec->filler = readFill(); 1050 inExpr = false; 1051 } 1052 1053 // Consume optional comma following output section command. 1054 consume(","); 1055 1056 if (script->referencedSymbols.size() > symbolsReferenced) 1057 osec->expressionsUseSymbols = true; 1058 return cmd; 1059 } 1060 1061 // Reads a `=<fillexp>` expression and returns its value as a big-endian number. 1062 // https://sourceware.org/binutils/docs/ld/Output-Section-Fill.html 1063 // We do not support using symbols in such expressions. 1064 // 1065 // When reading a hexstring, ld.bfd handles it as a blob of arbitrary 1066 // size, while ld.gold always handles it as a 32-bit big-endian number. 1067 // We are compatible with ld.gold because it's easier to implement. 1068 // Also, we require that expressions with operators must be wrapped into 1069 // round brackets. We did it to resolve the ambiguity when parsing scripts like: 1070 // SECTIONS { .foo : { ... } =120+3 /DISCARD/ : { ... } } 1071 std::array<uint8_t, 4> ScriptParser::readFill() { 1072 uint64_t value = readPrimary()().val; 1073 if (value > UINT32_MAX) 1074 setError("filler expression result does not fit 32-bit: 0x" + 1075 Twine::utohexstr(value)); 1076 1077 std::array<uint8_t, 4> buf; 1078 write32be(buf.data(), (uint32_t)value); 1079 return buf; 1080 } 1081 1082 SymbolAssignment *ScriptParser::readProvideHidden(bool provide, bool hidden) { 1083 expect("("); 1084 StringRef name = next(), eq = peek(); 1085 if (eq != "=") { 1086 setError("= expected, but got " + next()); 1087 while (!atEOF() && next() != ")") 1088 ; 1089 return nullptr; 1090 } 1091 llvm::SaveAndRestore saveActiveProvideSym(activeProvideSym); 1092 if (provide) 1093 activeProvideSym = name; 1094 SymbolAssignment *cmd = readSymbolAssignment(name); 1095 cmd->provide = provide; 1096 cmd->hidden = hidden; 1097 expect(")"); 1098 return cmd; 1099 } 1100 1101 SymbolAssignment *ScriptParser::readAssignment(StringRef tok) { 1102 // Assert expression returns Dot, so this is equal to ".=." 1103 if (tok == "ASSERT") 1104 return make<SymbolAssignment>(".", readAssert(), 0, getCurrentLocation()); 1105 1106 size_t oldPos = pos; 1107 SymbolAssignment *cmd = nullptr; 1108 bool savedSeenRelroEnd = script->seenRelroEnd; 1109 const StringRef op = peek(); 1110 if (op.starts_with("=")) { 1111 // Support = followed by an expression without whitespace. 1112 SaveAndRestore saved(inExpr, true); 1113 cmd = readSymbolAssignment(tok); 1114 } else if ((op.size() == 2 && op[1] == '=' && strchr("*/+-&^|", op[0])) || 1115 op == "<<=" || op == ">>=") { 1116 cmd = readSymbolAssignment(tok); 1117 } else if (tok == "PROVIDE") { 1118 SaveAndRestore saved(inExpr, true); 1119 cmd = readProvideHidden(true, false); 1120 } else if (tok == "HIDDEN") { 1121 SaveAndRestore saved(inExpr, true); 1122 cmd = readProvideHidden(false, true); 1123 } else if (tok == "PROVIDE_HIDDEN") { 1124 SaveAndRestore saved(inExpr, true); 1125 cmd = readProvideHidden(true, true); 1126 } 1127 1128 if (cmd) { 1129 cmd->dataSegmentRelroEnd = !savedSeenRelroEnd && script->seenRelroEnd; 1130 cmd->commandString = 1131 tok.str() + " " + 1132 llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " "); 1133 expect(";"); 1134 } 1135 return cmd; 1136 } 1137 1138 SymbolAssignment *ScriptParser::readSymbolAssignment(StringRef name) { 1139 name = unquote(name); 1140 StringRef op = next(); 1141 assert(op == "=" || op == "*=" || op == "/=" || op == "+=" || op == "-=" || 1142 op == "&=" || op == "^=" || op == "|=" || op == "<<=" || op == ">>="); 1143 // Note: GNU ld does not support %=. 1144 Expr e = readExpr(); 1145 if (op != "=") { 1146 std::string loc = getCurrentLocation(); 1147 e = [=, c = op[0]]() -> ExprValue { 1148 ExprValue lhs = script->getSymbolValue(name, loc); 1149 switch (c) { 1150 case '*': 1151 return lhs.getValue() * e().getValue(); 1152 case '/': 1153 if (uint64_t rv = e().getValue()) 1154 return lhs.getValue() / rv; 1155 error(loc + ": division by zero"); 1156 return 0; 1157 case '+': 1158 return add(lhs, e()); 1159 case '-': 1160 return sub(lhs, e()); 1161 case '<': 1162 return lhs.getValue() << e().getValue() % 64; 1163 case '>': 1164 return lhs.getValue() >> e().getValue() % 64; 1165 case '&': 1166 return lhs.getValue() & e().getValue(); 1167 case '^': 1168 return lhs.getValue() ^ e().getValue(); 1169 case '|': 1170 return lhs.getValue() | e().getValue(); 1171 default: 1172 llvm_unreachable(""); 1173 } 1174 }; 1175 } 1176 return make<SymbolAssignment>(name, e, ctx.scriptSymOrderCounter++, 1177 getCurrentLocation()); 1178 } 1179 1180 // This is an operator-precedence parser to parse a linker 1181 // script expression. 1182 Expr ScriptParser::readExpr() { 1183 // Our lexer is context-aware. Set the in-expression bit so that 1184 // they apply different tokenization rules. 1185 SaveAndRestore saved(inExpr, true); 1186 Expr e = readExpr1(readPrimary(), 0); 1187 return e; 1188 } 1189 1190 Expr ScriptParser::combine(StringRef op, Expr l, Expr r) { 1191 if (op == "+") 1192 return [=] { return add(l(), r()); }; 1193 if (op == "-") 1194 return [=] { return sub(l(), r()); }; 1195 if (op == "*") 1196 return [=] { return l().getValue() * r().getValue(); }; 1197 if (op == "/") { 1198 std::string loc = getCurrentLocation(); 1199 return [=]() -> uint64_t { 1200 if (uint64_t rv = r().getValue()) 1201 return l().getValue() / rv; 1202 error(loc + ": division by zero"); 1203 return 0; 1204 }; 1205 } 1206 if (op == "%") { 1207 std::string loc = getCurrentLocation(); 1208 return [=]() -> uint64_t { 1209 if (uint64_t rv = r().getValue()) 1210 return l().getValue() % rv; 1211 error(loc + ": modulo by zero"); 1212 return 0; 1213 }; 1214 } 1215 if (op == "<<") 1216 return [=] { return l().getValue() << r().getValue() % 64; }; 1217 if (op == ">>") 1218 return [=] { return l().getValue() >> r().getValue() % 64; }; 1219 if (op == "<") 1220 return [=] { return l().getValue() < r().getValue(); }; 1221 if (op == ">") 1222 return [=] { return l().getValue() > r().getValue(); }; 1223 if (op == ">=") 1224 return [=] { return l().getValue() >= r().getValue(); }; 1225 if (op == "<=") 1226 return [=] { return l().getValue() <= r().getValue(); }; 1227 if (op == "==") 1228 return [=] { return l().getValue() == r().getValue(); }; 1229 if (op == "!=") 1230 return [=] { return l().getValue() != r().getValue(); }; 1231 if (op == "||") 1232 return [=] { return l().getValue() || r().getValue(); }; 1233 if (op == "&&") 1234 return [=] { return l().getValue() && r().getValue(); }; 1235 if (op == "&") 1236 return [=] { return bitAnd(l(), r()); }; 1237 if (op == "^") 1238 return [=] { return bitXor(l(), r()); }; 1239 if (op == "|") 1240 return [=] { return bitOr(l(), r()); }; 1241 llvm_unreachable("invalid operator"); 1242 } 1243 1244 // This is a part of the operator-precedence parser. This function 1245 // assumes that the remaining token stream starts with an operator. 1246 Expr ScriptParser::readExpr1(Expr lhs, int minPrec) { 1247 while (!atEOF() && !errorCount()) { 1248 // Read an operator and an expression. 1249 StringRef op1 = peek(); 1250 if (precedence(op1) < minPrec) 1251 break; 1252 skip(); 1253 if (op1 == "?") 1254 return readTernary(lhs); 1255 Expr rhs = readPrimary(); 1256 1257 // Evaluate the remaining part of the expression first if the 1258 // next operator has greater precedence than the previous one. 1259 // For example, if we have read "+" and "3", and if the next 1260 // operator is "*", then we'll evaluate 3 * ... part first. 1261 while (!atEOF()) { 1262 StringRef op2 = peek(); 1263 if (precedence(op2) <= precedence(op1)) 1264 break; 1265 rhs = readExpr1(rhs, precedence(op2)); 1266 } 1267 1268 lhs = combine(op1, lhs, rhs); 1269 } 1270 return lhs; 1271 } 1272 1273 Expr ScriptParser::getPageSize() { 1274 std::string location = getCurrentLocation(); 1275 return [=]() -> uint64_t { 1276 if (target) 1277 return config->commonPageSize; 1278 error(location + ": unable to calculate page size"); 1279 return 4096; // Return a dummy value. 1280 }; 1281 } 1282 1283 Expr ScriptParser::readConstant() { 1284 StringRef s = readParenLiteral(); 1285 if (s == "COMMONPAGESIZE") 1286 return getPageSize(); 1287 if (s == "MAXPAGESIZE") 1288 return [] { return config->maxPageSize; }; 1289 setError("unknown constant: " + s); 1290 return [] { return 0; }; 1291 } 1292 1293 // Parses Tok as an integer. It recognizes hexadecimal (prefixed with 1294 // "0x" or suffixed with "H") and decimal numbers. Decimal numbers may 1295 // have "K" (Ki) or "M" (Mi) suffixes. 1296 static std::optional<uint64_t> parseInt(StringRef tok) { 1297 // Hexadecimal 1298 uint64_t val; 1299 if (tok.starts_with_insensitive("0x")) { 1300 if (!to_integer(tok.substr(2), val, 16)) 1301 return std::nullopt; 1302 return val; 1303 } 1304 if (tok.ends_with_insensitive("H")) { 1305 if (!to_integer(tok.drop_back(), val, 16)) 1306 return std::nullopt; 1307 return val; 1308 } 1309 1310 // Decimal 1311 if (tok.ends_with_insensitive("K")) { 1312 if (!to_integer(tok.drop_back(), val, 10)) 1313 return std::nullopt; 1314 return val * 1024; 1315 } 1316 if (tok.ends_with_insensitive("M")) { 1317 if (!to_integer(tok.drop_back(), val, 10)) 1318 return std::nullopt; 1319 return val * 1024 * 1024; 1320 } 1321 if (!to_integer(tok, val, 10)) 1322 return std::nullopt; 1323 return val; 1324 } 1325 1326 ByteCommand *ScriptParser::readByteCommand(StringRef tok) { 1327 int size = StringSwitch<int>(tok) 1328 .Case("BYTE", 1) 1329 .Case("SHORT", 2) 1330 .Case("LONG", 4) 1331 .Case("QUAD", 8) 1332 .Default(-1); 1333 if (size == -1) 1334 return nullptr; 1335 1336 size_t oldPos = pos; 1337 Expr e = readParenExpr(); 1338 std::string commandString = 1339 tok.str() + " " + 1340 llvm::join(tokens.begin() + oldPos, tokens.begin() + pos, " "); 1341 return make<ByteCommand>(e, size, commandString); 1342 } 1343 1344 static std::optional<uint64_t> parseFlag(StringRef tok) { 1345 if (std::optional<uint64_t> asInt = parseInt(tok)) 1346 return asInt; 1347 #define CASE_ENT(enum) #enum, ELF::enum 1348 return StringSwitch<std::optional<uint64_t>>(tok) 1349 .Case(CASE_ENT(SHF_WRITE)) 1350 .Case(CASE_ENT(SHF_ALLOC)) 1351 .Case(CASE_ENT(SHF_EXECINSTR)) 1352 .Case(CASE_ENT(SHF_MERGE)) 1353 .Case(CASE_ENT(SHF_STRINGS)) 1354 .Case(CASE_ENT(SHF_INFO_LINK)) 1355 .Case(CASE_ENT(SHF_LINK_ORDER)) 1356 .Case(CASE_ENT(SHF_OS_NONCONFORMING)) 1357 .Case(CASE_ENT(SHF_GROUP)) 1358 .Case(CASE_ENT(SHF_TLS)) 1359 .Case(CASE_ENT(SHF_COMPRESSED)) 1360 .Case(CASE_ENT(SHF_EXCLUDE)) 1361 .Case(CASE_ENT(SHF_ARM_PURECODE)) 1362 .Default(std::nullopt); 1363 #undef CASE_ENT 1364 } 1365 1366 // Reads the '(' <flags> ')' list of section flags in 1367 // INPUT_SECTION_FLAGS '(' <flags> ')' in the 1368 // following form: 1369 // <flags> ::= <flag> 1370 // | <flags> & flag 1371 // <flag> ::= Recognized Flag Name, or Integer value of flag. 1372 // If the first character of <flag> is a ! then this means without flag, 1373 // otherwise with flag. 1374 // Example: SHF_EXECINSTR & !SHF_WRITE means with flag SHF_EXECINSTR and 1375 // without flag SHF_WRITE. 1376 std::pair<uint64_t, uint64_t> ScriptParser::readInputSectionFlags() { 1377 uint64_t withFlags = 0; 1378 uint64_t withoutFlags = 0; 1379 expect("("); 1380 while (!errorCount()) { 1381 StringRef tok = unquote(next()); 1382 bool without = tok.consume_front("!"); 1383 if (std::optional<uint64_t> flag = parseFlag(tok)) { 1384 if (without) 1385 withoutFlags |= *flag; 1386 else 1387 withFlags |= *flag; 1388 } else { 1389 setError("unrecognised flag: " + tok); 1390 } 1391 if (consume(")")) 1392 break; 1393 if (!consume("&")) { 1394 next(); 1395 setError("expected & or )"); 1396 } 1397 } 1398 return std::make_pair(withFlags, withoutFlags); 1399 } 1400 1401 StringRef ScriptParser::readParenLiteral() { 1402 expect("("); 1403 bool orig = inExpr; 1404 inExpr = false; 1405 StringRef tok = next(); 1406 inExpr = orig; 1407 expect(")"); 1408 return tok; 1409 } 1410 1411 static void checkIfExists(const OutputSection &osec, StringRef location) { 1412 if (osec.location.empty() && script->errorOnMissingSection) 1413 script->recordError(location + ": undefined section " + osec.name); 1414 } 1415 1416 static bool isValidSymbolName(StringRef s) { 1417 auto valid = [](char c) { 1418 return isAlnum(c) || c == '$' || c == '.' || c == '_'; 1419 }; 1420 return !s.empty() && !isDigit(s[0]) && llvm::all_of(s, valid); 1421 } 1422 1423 Expr ScriptParser::readPrimary() { 1424 if (peek() == "(") 1425 return readParenExpr(); 1426 1427 if (consume("~")) { 1428 Expr e = readPrimary(); 1429 return [=] { return ~e().getValue(); }; 1430 } 1431 if (consume("!")) { 1432 Expr e = readPrimary(); 1433 return [=] { return !e().getValue(); }; 1434 } 1435 if (consume("-")) { 1436 Expr e = readPrimary(); 1437 return [=] { return -e().getValue(); }; 1438 } 1439 1440 StringRef tok = next(); 1441 std::string location = getCurrentLocation(); 1442 1443 // Built-in functions are parsed here. 1444 // https://sourceware.org/binutils/docs/ld/Builtin-Functions.html. 1445 if (tok == "ABSOLUTE") { 1446 Expr inner = readParenExpr(); 1447 return [=] { 1448 ExprValue i = inner(); 1449 i.forceAbsolute = true; 1450 return i; 1451 }; 1452 } 1453 if (tok == "ADDR") { 1454 StringRef name = unquote(readParenLiteral()); 1455 OutputSection *osec = &script->getOrCreateOutputSection(name)->osec; 1456 osec->usedInExpression = true; 1457 return [=]() -> ExprValue { 1458 checkIfExists(*osec, location); 1459 return {osec, false, 0, location}; 1460 }; 1461 } 1462 if (tok == "ALIGN") { 1463 expect("("); 1464 Expr e = readExpr(); 1465 if (consume(")")) { 1466 e = checkAlignment(e, location); 1467 return [=] { return alignToPowerOf2(script->getDot(), e().getValue()); }; 1468 } 1469 expect(","); 1470 Expr e2 = checkAlignment(readExpr(), location); 1471 expect(")"); 1472 return [=] { 1473 ExprValue v = e(); 1474 v.alignment = e2().getValue(); 1475 return v; 1476 }; 1477 } 1478 if (tok == "ALIGNOF") { 1479 StringRef name = unquote(readParenLiteral()); 1480 OutputSection *osec = &script->getOrCreateOutputSection(name)->osec; 1481 return [=] { 1482 checkIfExists(*osec, location); 1483 return osec->addralign; 1484 }; 1485 } 1486 if (tok == "ASSERT") 1487 return readAssert(); 1488 if (tok == "CONSTANT") 1489 return readConstant(); 1490 if (tok == "DATA_SEGMENT_ALIGN") { 1491 expect("("); 1492 Expr e = readExpr(); 1493 expect(","); 1494 readExpr(); 1495 expect(")"); 1496 script->seenDataAlign = true; 1497 return [=] { 1498 uint64_t align = std::max(uint64_t(1), e().getValue()); 1499 return (script->getDot() + align - 1) & -align; 1500 }; 1501 } 1502 if (tok == "DATA_SEGMENT_END") { 1503 expect("("); 1504 expect("."); 1505 expect(")"); 1506 return [] { return script->getDot(); }; 1507 } 1508 if (tok == "DATA_SEGMENT_RELRO_END") { 1509 // GNU linkers implements more complicated logic to handle 1510 // DATA_SEGMENT_RELRO_END. We instead ignore the arguments and 1511 // just align to the next page boundary for simplicity. 1512 expect("("); 1513 readExpr(); 1514 expect(","); 1515 readExpr(); 1516 expect(")"); 1517 script->seenRelroEnd = true; 1518 return [=] { return alignToPowerOf2(script->getDot(), config->maxPageSize); }; 1519 } 1520 if (tok == "DEFINED") { 1521 StringRef name = unquote(readParenLiteral()); 1522 // Return 1 if s is defined. If the definition is only found in a linker 1523 // script, it must happen before this DEFINED. 1524 auto order = ctx.scriptSymOrderCounter++; 1525 return [=] { 1526 Symbol *s = symtab.find(name); 1527 return s && s->isDefined() && ctx.scriptSymOrder.lookup(s) < order ? 1 1528 : 0; 1529 }; 1530 } 1531 if (tok == "LENGTH") { 1532 StringRef name = readParenLiteral(); 1533 if (script->memoryRegions.count(name) == 0) { 1534 setError("memory region not defined: " + name); 1535 return [] { return 0; }; 1536 } 1537 return script->memoryRegions[name]->length; 1538 } 1539 if (tok == "LOADADDR") { 1540 StringRef name = unquote(readParenLiteral()); 1541 OutputSection *osec = &script->getOrCreateOutputSection(name)->osec; 1542 osec->usedInExpression = true; 1543 return [=] { 1544 checkIfExists(*osec, location); 1545 return osec->getLMA(); 1546 }; 1547 } 1548 if (tok == "LOG2CEIL") { 1549 expect("("); 1550 Expr a = readExpr(); 1551 expect(")"); 1552 return [=] { 1553 // LOG2CEIL(0) is defined to be 0. 1554 return llvm::Log2_64_Ceil(std::max(a().getValue(), UINT64_C(1))); 1555 }; 1556 } 1557 if (tok == "MAX" || tok == "MIN") { 1558 expect("("); 1559 Expr a = readExpr(); 1560 expect(","); 1561 Expr b = readExpr(); 1562 expect(")"); 1563 if (tok == "MIN") 1564 return [=] { return std::min(a().getValue(), b().getValue()); }; 1565 return [=] { return std::max(a().getValue(), b().getValue()); }; 1566 } 1567 if (tok == "ORIGIN") { 1568 StringRef name = readParenLiteral(); 1569 if (script->memoryRegions.count(name) == 0) { 1570 setError("memory region not defined: " + name); 1571 return [] { return 0; }; 1572 } 1573 return script->memoryRegions[name]->origin; 1574 } 1575 if (tok == "SEGMENT_START") { 1576 expect("("); 1577 skip(); 1578 expect(","); 1579 Expr e = readExpr(); 1580 expect(")"); 1581 return [=] { return e(); }; 1582 } 1583 if (tok == "SIZEOF") { 1584 StringRef name = unquote(readParenLiteral()); 1585 OutputSection *cmd = &script->getOrCreateOutputSection(name)->osec; 1586 // Linker script does not create an output section if its content is empty. 1587 // We want to allow SIZEOF(.foo) where .foo is a section which happened to 1588 // be empty. 1589 return [=] { return cmd->size; }; 1590 } 1591 if (tok == "SIZEOF_HEADERS") 1592 return [=] { return elf::getHeaderSize(); }; 1593 1594 // Tok is the dot. 1595 if (tok == ".") 1596 return [=] { return script->getSymbolValue(tok, location); }; 1597 1598 // Tok is a literal number. 1599 if (std::optional<uint64_t> val = parseInt(tok)) 1600 return [=] { return *val; }; 1601 1602 // Tok is a symbol name. 1603 if (tok.starts_with("\"")) 1604 tok = unquote(tok); 1605 else if (!isValidSymbolName(tok)) 1606 setError("malformed number: " + tok); 1607 if (activeProvideSym) 1608 script->provideMap[*activeProvideSym].push_back(tok); 1609 else 1610 script->referencedSymbols.push_back(tok); 1611 return [=] { return script->getSymbolValue(tok, location); }; 1612 } 1613 1614 Expr ScriptParser::readTernary(Expr cond) { 1615 Expr l = readExpr(); 1616 expect(":"); 1617 Expr r = readExpr(); 1618 return [=] { return cond().getValue() ? l() : r(); }; 1619 } 1620 1621 Expr ScriptParser::readParenExpr() { 1622 expect("("); 1623 Expr e = readExpr(); 1624 expect(")"); 1625 return e; 1626 } 1627 1628 SmallVector<StringRef, 0> ScriptParser::readOutputSectionPhdrs() { 1629 SmallVector<StringRef, 0> phdrs; 1630 while (!errorCount() && peek().starts_with(":")) { 1631 StringRef tok = next(); 1632 phdrs.push_back((tok.size() == 1) ? next() : tok.substr(1)); 1633 } 1634 return phdrs; 1635 } 1636 1637 // Read a program header type name. The next token must be a 1638 // name of a program header type or a constant (e.g. "0x3"). 1639 unsigned ScriptParser::readPhdrType() { 1640 StringRef tok = next(); 1641 if (std::optional<uint64_t> val = parseInt(tok)) 1642 return *val; 1643 1644 unsigned ret = StringSwitch<unsigned>(tok) 1645 .Case("PT_NULL", PT_NULL) 1646 .Case("PT_LOAD", PT_LOAD) 1647 .Case("PT_DYNAMIC", PT_DYNAMIC) 1648 .Case("PT_INTERP", PT_INTERP) 1649 .Case("PT_NOTE", PT_NOTE) 1650 .Case("PT_SHLIB", PT_SHLIB) 1651 .Case("PT_PHDR", PT_PHDR) 1652 .Case("PT_TLS", PT_TLS) 1653 .Case("PT_GNU_EH_FRAME", PT_GNU_EH_FRAME) 1654 .Case("PT_GNU_STACK", PT_GNU_STACK) 1655 .Case("PT_GNU_RELRO", PT_GNU_RELRO) 1656 .Case("PT_OPENBSD_MUTABLE", PT_OPENBSD_MUTABLE) 1657 .Case("PT_OPENBSD_RANDOMIZE", PT_OPENBSD_RANDOMIZE) 1658 .Case("PT_OPENBSD_SYSCALLS", PT_OPENBSD_SYSCALLS) 1659 .Case("PT_OPENBSD_WXNEEDED", PT_OPENBSD_WXNEEDED) 1660 .Case("PT_OPENBSD_BOOTDATA", PT_OPENBSD_BOOTDATA) 1661 .Default(-1); 1662 1663 if (ret == (unsigned)-1) { 1664 setError("invalid program header type: " + tok); 1665 return PT_NULL; 1666 } 1667 return ret; 1668 } 1669 1670 // Reads an anonymous version declaration. 1671 void ScriptParser::readAnonymousDeclaration() { 1672 SmallVector<SymbolVersion, 0> locals; 1673 SmallVector<SymbolVersion, 0> globals; 1674 std::tie(locals, globals) = readSymbols(); 1675 for (const SymbolVersion &pat : locals) 1676 config->versionDefinitions[VER_NDX_LOCAL].localPatterns.push_back(pat); 1677 for (const SymbolVersion &pat : globals) 1678 config->versionDefinitions[VER_NDX_GLOBAL].nonLocalPatterns.push_back(pat); 1679 1680 expect(";"); 1681 } 1682 1683 // Reads a non-anonymous version definition, 1684 // e.g. "VerStr { global: foo; bar; local: *; };". 1685 void ScriptParser::readVersionDeclaration(StringRef verStr) { 1686 // Read a symbol list. 1687 SmallVector<SymbolVersion, 0> locals; 1688 SmallVector<SymbolVersion, 0> globals; 1689 std::tie(locals, globals) = readSymbols(); 1690 1691 // Create a new version definition and add that to the global symbols. 1692 VersionDefinition ver; 1693 ver.name = verStr; 1694 ver.nonLocalPatterns = std::move(globals); 1695 ver.localPatterns = std::move(locals); 1696 ver.id = config->versionDefinitions.size(); 1697 config->versionDefinitions.push_back(ver); 1698 1699 // Each version may have a parent version. For example, "Ver2" 1700 // defined as "Ver2 { global: foo; local: *; } Ver1;" has "Ver1" 1701 // as a parent. This version hierarchy is, probably against your 1702 // instinct, purely for hint; the runtime doesn't care about it 1703 // at all. In LLD, we simply ignore it. 1704 if (next() != ";") 1705 expect(";"); 1706 } 1707 1708 bool elf::hasWildcard(StringRef s) { 1709 return s.find_first_of("?*[") != StringRef::npos; 1710 } 1711 1712 // Reads a list of symbols, e.g. "{ global: foo; bar; local: *; };". 1713 std::pair<SmallVector<SymbolVersion, 0>, SmallVector<SymbolVersion, 0>> 1714 ScriptParser::readSymbols() { 1715 SmallVector<SymbolVersion, 0> locals; 1716 SmallVector<SymbolVersion, 0> globals; 1717 SmallVector<SymbolVersion, 0> *v = &globals; 1718 1719 while (!errorCount()) { 1720 if (consume("}")) 1721 break; 1722 if (consumeLabel("local")) { 1723 v = &locals; 1724 continue; 1725 } 1726 if (consumeLabel("global")) { 1727 v = &globals; 1728 continue; 1729 } 1730 1731 if (consume("extern")) { 1732 SmallVector<SymbolVersion, 0> ext = readVersionExtern(); 1733 v->insert(v->end(), ext.begin(), ext.end()); 1734 } else { 1735 StringRef tok = next(); 1736 v->push_back({unquote(tok), false, hasWildcard(tok)}); 1737 } 1738 expect(";"); 1739 } 1740 return {locals, globals}; 1741 } 1742 1743 // Reads an "extern C++" directive, e.g., 1744 // "extern "C++" { ns::*; "f(int, double)"; };" 1745 // 1746 // The last semicolon is optional. E.g. this is OK: 1747 // "extern "C++" { ns::*; "f(int, double)" };" 1748 SmallVector<SymbolVersion, 0> ScriptParser::readVersionExtern() { 1749 StringRef tok = next(); 1750 bool isCXX = tok == "\"C++\""; 1751 if (!isCXX && tok != "\"C\"") 1752 setError("Unknown language"); 1753 expect("{"); 1754 1755 SmallVector<SymbolVersion, 0> ret; 1756 while (!errorCount() && peek() != "}") { 1757 StringRef tok = next(); 1758 ret.push_back( 1759 {unquote(tok), isCXX, !tok.starts_with("\"") && hasWildcard(tok)}); 1760 if (consume("}")) 1761 return ret; 1762 expect(";"); 1763 } 1764 1765 expect("}"); 1766 return ret; 1767 } 1768 1769 Expr ScriptParser::readMemoryAssignment(StringRef s1, StringRef s2, 1770 StringRef s3) { 1771 if (!consume(s1) && !consume(s2) && !consume(s3)) { 1772 setError("expected one of: " + s1 + ", " + s2 + ", or " + s3); 1773 return [] { return 0; }; 1774 } 1775 expect("="); 1776 return readExpr(); 1777 } 1778 1779 // Parse the MEMORY command as specified in: 1780 // https://sourceware.org/binutils/docs/ld/MEMORY.html 1781 // 1782 // MEMORY { name [(attr)] : ORIGIN = origin, LENGTH = len ... } 1783 void ScriptParser::readMemory() { 1784 expect("{"); 1785 while (!errorCount() && !consume("}")) { 1786 StringRef tok = next(); 1787 if (tok == "INCLUDE") { 1788 readInclude(); 1789 continue; 1790 } 1791 1792 uint32_t flags = 0; 1793 uint32_t invFlags = 0; 1794 uint32_t negFlags = 0; 1795 uint32_t negInvFlags = 0; 1796 if (consume("(")) { 1797 readMemoryAttributes(flags, invFlags, negFlags, negInvFlags); 1798 expect(")"); 1799 } 1800 expect(":"); 1801 1802 Expr origin = readMemoryAssignment("ORIGIN", "org", "o"); 1803 expect(","); 1804 Expr length = readMemoryAssignment("LENGTH", "len", "l"); 1805 1806 // Add the memory region to the region map. 1807 MemoryRegion *mr = make<MemoryRegion>(tok, origin, length, flags, invFlags, 1808 negFlags, negInvFlags); 1809 if (!script->memoryRegions.insert({tok, mr}).second) 1810 setError("region '" + tok + "' already defined"); 1811 } 1812 } 1813 1814 // This function parses the attributes used to match against section 1815 // flags when placing output sections in a memory region. These flags 1816 // are only used when an explicit memory region name is not used. 1817 void ScriptParser::readMemoryAttributes(uint32_t &flags, uint32_t &invFlags, 1818 uint32_t &negFlags, 1819 uint32_t &negInvFlags) { 1820 bool invert = false; 1821 1822 for (char c : next().lower()) { 1823 if (c == '!') { 1824 invert = !invert; 1825 std::swap(flags, negFlags); 1826 std::swap(invFlags, negInvFlags); 1827 continue; 1828 } 1829 if (c == 'w') 1830 flags |= SHF_WRITE; 1831 else if (c == 'x') 1832 flags |= SHF_EXECINSTR; 1833 else if (c == 'a') 1834 flags |= SHF_ALLOC; 1835 else if (c == 'r') 1836 invFlags |= SHF_WRITE; 1837 else 1838 setError("invalid memory region attribute"); 1839 } 1840 1841 if (invert) { 1842 std::swap(flags, negFlags); 1843 std::swap(invFlags, negInvFlags); 1844 } 1845 } 1846 1847 void elf::readLinkerScript(MemoryBufferRef mb) { 1848 llvm::TimeTraceScope timeScope("Read linker script", 1849 mb.getBufferIdentifier()); 1850 ScriptParser(mb).readLinkerScript(); 1851 } 1852 1853 void elf::readVersionScript(MemoryBufferRef mb) { 1854 llvm::TimeTraceScope timeScope("Read version script", 1855 mb.getBufferIdentifier()); 1856 ScriptParser(mb).readVersionScript(); 1857 } 1858 1859 void elf::readDynamicList(MemoryBufferRef mb) { 1860 llvm::TimeTraceScope timeScope("Read dynamic list", mb.getBufferIdentifier()); 1861 ScriptParser(mb).readDynamicList(); 1862 } 1863 1864 void elf::readDefsym(StringRef name, MemoryBufferRef mb) { 1865 llvm::TimeTraceScope timeScope("Read defsym input", name); 1866 ScriptParser(mb).readDefsym(name); 1867 } 1868