1 //===- ARMErrataFix.cpp ---------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // This file implements Section Patching for the purpose of working around the 9 // Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions 10 // can result in an incorrect instruction fetch or processor deadlock." The 11 // erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the 12 // Cortex-A8. A high level description of the patching technique is given in 13 // the opening comment of AArch64ErrataFix.cpp. 14 //===----------------------------------------------------------------------===// 15 16 #include "ARMErrataFix.h" 17 18 #include "Config.h" 19 #include "LinkerScript.h" 20 #include "OutputSections.h" 21 #include "Relocations.h" 22 #include "Symbols.h" 23 #include "SyntheticSections.h" 24 #include "Target.h" 25 #include "lld/Common/Memory.h" 26 #include "lld/Common/Strings.h" 27 #include "llvm/Support/Endian.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <algorithm> 30 31 using namespace llvm; 32 using namespace llvm::ELF; 33 using namespace llvm::object; 34 using namespace llvm::support; 35 using namespace llvm::support::endian; 36 37 namespace lld { 38 namespace elf { 39 40 // The documented title for Erratum 657417 is: 41 // "A 32bit branch instruction that spans two 4K regions can result in an 42 // incorrect instruction fetch or processor deadlock". Graphically using a 43 // 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff 44 // xxxxxx000 // Memory region 1 start 45 // target: 46 // ... 47 // xxxxxxffe f7fe // First halfword of branch to target: 48 // xxxxxx000 // Memory region 2 start 49 // xxxxxx002 bfff // Second halfword of branch to target: 50 // 51 // The specific trigger conditions that can be detected at link time are: 52 // - There is a 32-bit Thumb-2 branch instruction with an address of the form 53 // xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the 54 // second 2 bytes are in region 2. 55 // - The branch instruction is one of BLX, BL, B.w BCC.w 56 // - The instruction preceding the branch is a 32-bit non-branch instruction. 57 // - The target of the branch is in region 1. 58 // 59 // The linker mitigation for the fix is to redirect any branch that meets the 60 // erratum conditions to a patch section containing a branch to the target. 61 // 62 // As adding patch sections may move branches onto region boundaries the patch 63 // must iterate until no more patches are added. 64 // 65 // Example, before: 66 // 00000FFA func: NOP.w // 32-bit Thumb function 67 // 00000FFE B.W func // 32-bit branch spanning 2 regions, dest in 1st. 68 // Example, after: 69 // 00000FFA func: NOP.w // 32-bit Thumb function 70 // 00000FFE B.w __CortexA8657417_00000FFE 71 // 00001002 2 - bytes padding 72 // 00001004 __CortexA8657417_00000FFE: B.w func 73 74 class Patch657417Section : public SyntheticSection { 75 public: 76 Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM); 77 78 void writeTo(uint8_t *buf) override; 79 80 size_t getSize() const override { return 4; } 81 82 // Get the virtual address of the branch instruction at patcheeOffset. 83 uint64_t getBranchAddr() const; 84 85 static bool classof(const SectionBase *d) { 86 return d->kind() == InputSectionBase::Synthetic && d->name ==".text.patch"; 87 } 88 89 // The Section we are patching. 90 const InputSection *patchee; 91 // The offset of the instruction in the Patchee section we are patching. 92 uint64_t patcheeOffset; 93 // A label for the start of the Patch that we can use as a relocation target. 94 Symbol *patchSym; 95 // A decoding of the branch instruction at patcheeOffset. 96 uint32_t instr; 97 // True If the patch is to be written in ARM state, otherwise the patch will 98 // be written in Thumb state. 99 bool isARM; 100 }; 101 102 // Return true if the half-word, when taken as the first of a pair of halfwords 103 // is the first half of a 32-bit instruction. 104 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition 105 // section A6.3: 32-bit Thumb instruction encoding 106 // | HW1 | HW2 | 107 // | 1 1 1 | op1 (2) | op2 (7) | x (4) |op| x (15) | 108 // With op1 == 0b00, a 16-bit instruction is encoded. 109 // 110 // We test only the first halfword, looking for op != 0b00. 111 static bool is32bitInstruction(uint16_t hw) { 112 return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000; 113 } 114 115 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition 116 // section A6.3.4 Branches and miscellaneous control. 117 // | HW1 | HW2 | 118 // | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) | 119 // op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W) 120 // op1 == 0x1 | Branch (B.W) 121 // op1 == 1x0 | Branch with Link and Exchange (BLX.w) 122 // op1 == 1x1 | Branch with Link (BL.W) 123 124 static bool isBcc(uint32_t instr) { 125 return (instr & 0xf800d000) == 0xf0008000 && 126 (instr & 0x03800000) != 0x03800000; 127 } 128 129 static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; } 130 131 static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; } 132 133 static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; } 134 135 static bool is32bitBranch(uint32_t instr) { 136 return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr); 137 } 138 139 Patch657417Section::Patch657417Section(InputSection *p, uint64_t off, 140 uint32_t instr, bool isARM) 141 : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4, 142 ".text.patch"), 143 patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) { 144 parent = p->getParent(); 145 patchSym = addSyntheticLocal( 146 saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC, 147 isARM ? 0 : 1, getSize(), *this); 148 addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this); 149 } 150 151 uint64_t Patch657417Section::getBranchAddr() const { 152 return patchee->getVA(patcheeOffset); 153 } 154 155 // Given a branch instruction instr at sourceAddr work out its destination 156 // address. This is only used when the branch instruction has no relocation. 157 static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) { 158 uint8_t buf[4]; 159 write16le(buf, instr >> 16); 160 write16le(buf + 2, instr & 0x0000ffff); 161 int64_t offset; 162 if (isBcc(instr)) 163 offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19); 164 else if (isB(instr)) 165 offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24); 166 else 167 offset = target->getImplicitAddend(buf, R_ARM_THM_CALL); 168 return sourceAddr + offset + 4; 169 } 170 171 void Patch657417Section::writeTo(uint8_t *buf) { 172 // The base instruction of the patch is always a 32-bit unconditional branch. 173 if (isARM) 174 write32le(buf, 0xea000000); 175 else 176 write32le(buf, 0x9000f000); 177 // If we have a relocation then apply it. For a SyntheticSection buf already 178 // has outSecOff added, but relocateAlloc also adds outSecOff so we need to 179 // subtract to avoid double counting. 180 if (!relocations.empty()) { 181 relocateAlloc(buf - outSecOff, buf - outSecOff + getSize()); 182 return; 183 } 184 185 // If we don't have a relocation then we must calculate and write the offset 186 // ourselves. 187 // Get the destination offset from the addend in the branch instruction. 188 // We cannot use the instruction in the patchee section as this will have 189 // been altered to point to us! 190 uint64_t s = getThumbDestAddr(getBranchAddr(), instr); 191 uint64_t p = getVA(4); 192 target->relocateOne(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p); 193 } 194 195 // Given a branch instruction spanning two 4KiB regions, at offset off from the 196 // start of isec, return true if the destination of the branch is within the 197 // first of the two 4Kib regions. 198 static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off, 199 uint32_t instr, const Relocation *r) { 200 uint64_t sourceAddr = isec->getVA(0) + off; 201 assert((sourceAddr & 0xfff) == 0xffe); 202 uint64_t destAddr = sourceAddr; 203 // If there is a branch relocation at the same offset we must use this to 204 // find the destination address as the branch could be indirected via a thunk 205 // or the PLT. 206 if (r) { 207 uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA(); 208 // Account for Thumb PC bias, usually cancelled to 0 by addend of -4. 209 destAddr = dst + r->addend + 4; 210 } else { 211 // If there is no relocation, we must have an intra-section branch 212 // We must extract the offset from the addend manually. 213 destAddr = getThumbDestAddr(sourceAddr, instr); 214 } 215 216 return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000); 217 } 218 219 // Return true if a branch can reach a patch section placed after isec. 220 // The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB. 221 static bool patchInRange(const InputSection *isec, uint64_t off, 222 uint32_t instr) { 223 224 // We need the branch at source to reach a patch section placed immediately 225 // after isec. As there can be more than one patch in the patch section we 226 // add 0x100 as contingency to account for worst case of 1 branch every 4KiB 227 // for a 1 MiB range. 228 return target->inBranchRange( 229 isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off), 230 isec->getVA() + isec->getSize() + 0x100); 231 } 232 233 struct ScanResult { 234 // Offset of branch within its InputSection. 235 uint64_t off; 236 // Cached decoding of the branch instruction. 237 uint32_t instr; 238 // Branch relocation at off. Will be nullptr if no relocation exists. 239 Relocation *rel; 240 }; 241 242 // Detect the erratum sequence, returning the offset of the branch instruction 243 // and a decoding of the branch. If the erratum sequence is not found then 244 // return an offset of 0 for the branch. 0 is a safe value to use for no patch 245 // as there must be at least one 32-bit non-branch instruction before the 246 // branch so the minimum offset for a patch is 4. 247 static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off, 248 uint64_t limit) { 249 uint64_t isecAddr = isec->getVA(0); 250 // Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We 251 // need to check for a 32-bit instruction immediately before a 32-bit branch 252 // at 0xffe modulo 0x1000. 253 off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr; 254 if (off >= limit || limit - off < 8) { 255 // Need at least 2 4-byte sized instructions to trigger erratum. 256 off = limit; 257 return {0, 0, nullptr}; 258 } 259 260 ScanResult scanRes = {0, 0, nullptr}; 261 const uint8_t *buf = isec->data().begin(); 262 // ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive 263 // little-endian halfwords. 264 const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off); 265 uint16_t hw11 = *instBuf++; 266 uint16_t hw12 = *instBuf++; 267 uint16_t hw21 = *instBuf++; 268 uint16_t hw22 = *instBuf++; 269 if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) { 270 uint32_t instr1 = (hw11 << 16) | hw12; 271 uint32_t instr2 = (hw21 << 16) | hw22; 272 if (!is32bitBranch(instr1) && is32bitBranch(instr2)) { 273 // Find a relocation for the branch if it exists. This will be used 274 // to determine the target. 275 uint64_t branchOff = off + 4; 276 auto relIt = llvm::find_if(isec->relocations, [=](const Relocation &r) { 277 return r.offset == branchOff && 278 (r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 || 279 r.type == R_ARM_THM_CALL); 280 }); 281 if (relIt != isec->relocations.end()) 282 scanRes.rel = &(*relIt); 283 if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) { 284 if (patchInRange(isec, branchOff, instr2)) { 285 scanRes.off = branchOff; 286 scanRes.instr = instr2; 287 } else { 288 warn(toString(isec->file) + 289 ": skipping cortex-a8 657417 erratum sequence, section " + 290 isec->name + " is too large to patch"); 291 } 292 } 293 } 294 } 295 off += 0x1000; 296 return scanRes; 297 } 298 299 void ARMErr657417Patcher::init() { 300 // The Arm ABI permits a mix of ARM, Thumb and Data in the same 301 // InputSection. We must only scan Thumb instructions to avoid false 302 // matches. We use the mapping symbols in the InputObjects to identify this 303 // data, caching the results in sectionMap so we don't have to recalculate 304 // it each pass. 305 306 // The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe 307 // half open intervals [Symbol Value, Next Symbol Value) of code and data 308 // within sections. If there is no next symbol then the half open interval is 309 // [Symbol Value, End of section). The type, code or data, is determined by 310 // the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data. 311 auto isArmMapSymbol = [](const Symbol *s) { 312 return s->getName() == "$a" || s->getName().startswith("$a."); 313 }; 314 auto isThumbMapSymbol = [](const Symbol *s) { 315 return s->getName() == "$t" || s->getName().startswith("$t."); 316 }; 317 auto isDataMapSymbol = [](const Symbol *s) { 318 return s->getName() == "$d" || s->getName().startswith("$d."); 319 }; 320 321 // Collect mapping symbols for every executable InputSection. 322 for (InputFile *file : objectFiles) { 323 auto *f = cast<ObjFile<ELF32LE>>(file); 324 for (Symbol *s : f->getLocalSymbols()) { 325 auto *def = dyn_cast<Defined>(s); 326 if (!def) 327 continue; 328 if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) && 329 !isDataMapSymbol(def)) 330 continue; 331 if (auto *sec = dyn_cast_or_null<InputSection>(def->section)) 332 if (sec->flags & SHF_EXECINSTR) 333 sectionMap[sec].push_back(def); 334 } 335 } 336 // For each InputSection make sure the mapping symbols are in sorted in 337 // ascending order and are in alternating Thumb, non-Thumb order. 338 for (auto &kv : sectionMap) { 339 std::vector<const Defined *> &mapSyms = kv.second; 340 llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) { 341 return a->value < b->value; 342 }); 343 mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(), 344 [=](const Defined *a, const Defined *b) { 345 return (isThumbMapSymbol(a) == 346 isThumbMapSymbol(b)); 347 }), 348 mapSyms.end()); 349 // Always start with a Thumb Mapping Symbol 350 if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front())) 351 mapSyms.erase(mapSyms.begin()); 352 } 353 initialized = true; 354 } 355 356 void ARMErr657417Patcher::insertPatches( 357 InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) { 358 uint64_t spacing = 0x100000 - 0x7500; 359 uint64_t isecLimit; 360 uint64_t prevIsecLimit = isd.sections.front()->outSecOff; 361 uint64_t patchUpperBound = prevIsecLimit + spacing; 362 uint64_t outSecAddr = isd.sections.front()->getParent()->addr; 363 364 // Set the outSecOff of patches to the place where we want to insert them. 365 // We use a similar strategy to initial thunk placement, using 1 MiB as the 366 // range of the Thumb-2 conditional branch with a contingency accounting for 367 // thunk generation. 368 auto patchIt = patches.begin(); 369 auto patchEnd = patches.end(); 370 for (const InputSection *isec : isd.sections) { 371 isecLimit = isec->outSecOff + isec->getSize(); 372 if (isecLimit > patchUpperBound) { 373 for (; patchIt != patchEnd; ++patchIt) { 374 if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit) 375 break; 376 (*patchIt)->outSecOff = prevIsecLimit; 377 } 378 patchUpperBound = prevIsecLimit + spacing; 379 } 380 prevIsecLimit = isecLimit; 381 } 382 for (; patchIt != patchEnd; ++patchIt) 383 (*patchIt)->outSecOff = isecLimit; 384 385 // Merge all patch sections. We use the outSecOff assigned above to 386 // determine the insertion point. This is ok as we only merge into an 387 // InputSectionDescription once per pass, and at the end of the pass 388 // assignAddresses() will recalculate all the outSecOff values. 389 std::vector<InputSection *> tmp; 390 tmp.reserve(isd.sections.size() + patches.size()); 391 auto mergeCmp = [](const InputSection *a, const InputSection *b) { 392 if (a->outSecOff != b->outSecOff) 393 return a->outSecOff < b->outSecOff; 394 return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b); 395 }; 396 std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(), 397 patches.end(), std::back_inserter(tmp), mergeCmp); 398 isd.sections = std::move(tmp); 399 } 400 401 // Given a branch instruction described by ScanRes redirect it to a patch 402 // section containing an unconditional branch instruction to the target. 403 // Ensure that this patch section is 4-byte aligned so that the branch cannot 404 // span two 4 KiB regions. Place the patch section so that it is always after 405 // isec so the branch we are patching always goes forwards. 406 static void implementPatch(ScanResult sr, InputSection *isec, 407 std::vector<Patch657417Section *> &patches) { 408 409 log("detected cortex-a8-657419 erratum sequence starting at " + 410 utohexstr(isec->getVA(sr.off)) + " in unpatched output."); 411 Patch657417Section *psec; 412 // We have two cases to deal with. 413 // Case 1. There is a relocation at patcheeOffset to a symbol. The 414 // unconditional branch in the patch must have a relocation so that any 415 // further redirection via the PLT or a Thunk happens as normal. At 416 // patcheeOffset we redirect the existing relocation to a Symbol defined at 417 // the start of the patch section. 418 // 419 // Case 2. There is no relocation at patcheeOffset. We are unlikely to have 420 // a symbol that we can use as a target for a relocation in the patch section. 421 // Luckily we know that the destination cannot be indirected via the PLT or 422 // a Thunk so we can just write the destination directly. 423 if (sr.rel) { 424 // Case 1. We have an existing relocation to redirect to patch and a 425 // Symbol target. 426 427 // Create a branch relocation for the unconditional branch in the patch. 428 // This can be redirected via the PLT or Thunks. 429 RelType patchRelType = R_ARM_THM_JUMP24; 430 int64_t patchRelAddend = sr.rel->addend; 431 bool destIsARM = false; 432 if (isBL(sr.instr) || isBLX(sr.instr)) { 433 // The final target of the branch may be ARM or Thumb, if the target 434 // is ARM then we write the patch in ARM state to avoid a state change 435 // Thunk from the patch to the target. 436 uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA() 437 : sr.rel->sym->getVA(); 438 destIsARM = (dstSymAddr & 1) == 0; 439 } 440 psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM); 441 if (destIsARM) { 442 // The patch will be in ARM state. Use an ARM relocation and account for 443 // the larger ARM PC-bias of 8 rather than Thumb's 4. 444 patchRelType = R_ARM_JUMP24; 445 patchRelAddend -= 4; 446 } 447 psec->relocations.push_back( 448 Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym}); 449 // Redirect the existing branch relocation to the patch. 450 sr.rel->expr = R_PC; 451 sr.rel->addend = -4; 452 sr.rel->sym = psec->patchSym; 453 } else { 454 // Case 2. We do not have a relocation to the patch. Add a relocation of the 455 // appropriate type to the patch at patcheeOffset. 456 457 // The destination is ARM if we have a BLX. 458 psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr)); 459 RelType type; 460 if (isBcc(sr.instr)) 461 type = R_ARM_THM_JUMP19; 462 else if (isB(sr.instr)) 463 type = R_ARM_THM_JUMP24; 464 else 465 type = R_ARM_THM_CALL; 466 isec->relocations.push_back( 467 Relocation{R_PC, type, sr.off, -4, psec->patchSym}); 468 } 469 patches.push_back(psec); 470 } 471 472 // Scan all the instructions in InputSectionDescription, for each instance of 473 // the erratum sequence create a Patch657417Section. We return the list of 474 // Patch657417Sections that need to be applied to the InputSectionDescription. 475 std::vector<Patch657417Section *> 476 ARMErr657417Patcher::patchInputSectionDescription( 477 InputSectionDescription &isd) { 478 std::vector<Patch657417Section *> patches; 479 for (InputSection *isec : isd.sections) { 480 // LLD doesn't use the erratum sequence in SyntheticSections. 481 if (isa<SyntheticSection>(isec)) 482 continue; 483 // Use sectionMap to make sure we only scan Thumb code and not Arm or inline 484 // data. We have already sorted mapSyms in ascending order and removed 485 // consecutive mapping symbols of the same type. Our range of executable 486 // instructions to scan is therefore [thumbSym->value, nonThumbSym->value) 487 // or [thumbSym->value, section size). 488 std::vector<const Defined *> &mapSyms = sectionMap[isec]; 489 490 auto thumbSym = mapSyms.begin(); 491 while (thumbSym != mapSyms.end()) { 492 auto nonThumbSym = std::next(thumbSym); 493 uint64_t off = (*thumbSym)->value; 494 uint64_t limit = (nonThumbSym == mapSyms.end()) ? isec->data().size() 495 : (*nonThumbSym)->value; 496 497 while (off < limit) { 498 ScanResult sr = scanCortexA8Errata657417(isec, off, limit); 499 if (sr.off) 500 implementPatch(sr, isec, patches); 501 } 502 if (nonThumbSym == mapSyms.end()) 503 break; 504 thumbSym = std::next(nonThumbSym); 505 } 506 } 507 return patches; 508 } 509 510 bool ARMErr657417Patcher::createFixes() { 511 if (!initialized) 512 init(); 513 514 bool addressesChanged = false; 515 for (OutputSection *os : outputSections) { 516 if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR)) 517 continue; 518 for (BaseCommand *bc : os->sectionCommands) 519 if (auto *isd = dyn_cast<InputSectionDescription>(bc)) { 520 std::vector<Patch657417Section *> patches = 521 patchInputSectionDescription(*isd); 522 if (!patches.empty()) { 523 insertPatches(*isd, patches); 524 addressesChanged = true; 525 } 526 } 527 } 528 return addressesChanged; 529 } 530 531 } // namespace elf 532 } // namespace lld 533