xref: /freebsd/contrib/llvm-project/lld/ELF/ARMErrataFix.cpp (revision dd41de95a84d979615a2ef11df6850622bf6184e)
1 //===- ARMErrataFix.cpp ---------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // This file implements Section Patching for the purpose of working around the
9 // Cortex-a8 erratum 657417 "A 32bit branch instruction that spans 2 4K regions
10 // can result in an incorrect instruction fetch or processor deadlock." The
11 // erratum affects all but r1p7, r2p5, r2p6, r3p1 and r3p2 revisions of the
12 // Cortex-A8. A high level description of the patching technique is given in
13 // the opening comment of AArch64ErrataFix.cpp.
14 //===----------------------------------------------------------------------===//
15 
16 #include "ARMErrataFix.h"
17 
18 #include "Config.h"
19 #include "LinkerScript.h"
20 #include "OutputSections.h"
21 #include "Relocations.h"
22 #include "Symbols.h"
23 #include "SyntheticSections.h"
24 #include "Target.h"
25 #include "lld/Common/Memory.h"
26 #include "lld/Common/Strings.h"
27 #include "llvm/Support/Endian.h"
28 #include "llvm/Support/raw_ostream.h"
29 #include <algorithm>
30 
31 using namespace llvm;
32 using namespace llvm::ELF;
33 using namespace llvm::object;
34 using namespace llvm::support;
35 using namespace llvm::support::endian;
36 using namespace lld;
37 using namespace lld::elf;
38 
39 // The documented title for Erratum 657417 is:
40 // "A 32bit branch instruction that spans two 4K regions can result in an
41 // incorrect instruction fetch or processor deadlock". Graphically using a
42 // 32-bit B.w instruction encoded as a pair of halfwords 0xf7fe 0xbfff
43 // xxxxxx000 // Memory region 1 start
44 // target:
45 // ...
46 // xxxxxxffe f7fe // First halfword of branch to target:
47 // xxxxxx000 // Memory region 2 start
48 // xxxxxx002 bfff // Second halfword of branch to target:
49 //
50 // The specific trigger conditions that can be detected at link time are:
51 // - There is a 32-bit Thumb-2 branch instruction with an address of the form
52 //   xxxxxxFFE. The first 2 bytes of the instruction are in 4KiB region 1, the
53 //   second 2 bytes are in region 2.
54 // - The branch instruction is one of BLX, BL, B.w BCC.w
55 // - The instruction preceding the branch is a 32-bit non-branch instruction.
56 // - The target of the branch is in region 1.
57 //
58 // The linker mitigation for the fix is to redirect any branch that meets the
59 // erratum conditions to a patch section containing a branch to the target.
60 //
61 // As adding patch sections may move branches onto region boundaries the patch
62 // must iterate until no more patches are added.
63 //
64 // Example, before:
65 // 00000FFA func: NOP.w      // 32-bit Thumb function
66 // 00000FFE       B.W func   // 32-bit branch spanning 2 regions, dest in 1st.
67 // Example, after:
68 // 00000FFA func: NOP.w      // 32-bit Thumb function
69 // 00000FFE       B.w __CortexA8657417_00000FFE
70 // 00001002       2 - bytes padding
71 // 00001004 __CortexA8657417_00000FFE: B.w func
72 
73 class elf::Patch657417Section : public SyntheticSection {
74 public:
75   Patch657417Section(InputSection *p, uint64_t off, uint32_t instr, bool isARM);
76 
77   void writeTo(uint8_t *buf) override;
78 
79   size_t getSize() const override { return 4; }
80 
81   // Get the virtual address of the branch instruction at patcheeOffset.
82   uint64_t getBranchAddr() const;
83 
84   static bool classof(const SectionBase *d) {
85     return d->kind() == InputSectionBase::Synthetic && d->name ==".text.patch";
86   }
87 
88   // The Section we are patching.
89   const InputSection *patchee;
90   // The offset of the instruction in the Patchee section we are patching.
91   uint64_t patcheeOffset;
92   // A label for the start of the Patch that we can use as a relocation target.
93   Symbol *patchSym;
94   // A decoding of the branch instruction at patcheeOffset.
95   uint32_t instr;
96   // True If the patch is to be written in ARM state, otherwise the patch will
97   // be written in Thumb state.
98   bool isARM;
99 };
100 
101 // Return true if the half-word, when taken as the first of a pair of halfwords
102 // is the first half of a 32-bit instruction.
103 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
104 // section A6.3: 32-bit Thumb instruction encoding
105 // |             HW1                   |               HW2                |
106 // | 1 1 1 | op1 (2) | op2 (7) | x (4) |op|           x (15)              |
107 // With op1 == 0b00, a 16-bit instruction is encoded.
108 //
109 // We test only the first halfword, looking for op != 0b00.
110 static bool is32bitInstruction(uint16_t hw) {
111   return (hw & 0xe000) == 0xe000 && (hw & 0x1800) != 0x0000;
112 }
113 
114 // Reference from ARM Architecture Reference Manual ARMv7-A and ARMv7-R edition
115 // section A6.3.4 Branches and miscellaneous control.
116 // |             HW1              |               HW2                |
117 // | 1 1 1 | 1 0 | op (7) | x (4) | 1 | op1 (3) | op2 (4) | imm8 (8) |
118 // op1 == 0x0 op != x111xxx | Conditional branch (Bcc.W)
119 // op1 == 0x1               | Branch (B.W)
120 // op1 == 1x0               | Branch with Link and Exchange (BLX.w)
121 // op1 == 1x1               | Branch with Link (BL.W)
122 
123 static bool isBcc(uint32_t instr) {
124   return (instr & 0xf800d000) == 0xf0008000 &&
125          (instr & 0x03800000) != 0x03800000;
126 }
127 
128 static bool isB(uint32_t instr) { return (instr & 0xf800d000) == 0xf0009000; }
129 
130 static bool isBLX(uint32_t instr) { return (instr & 0xf800d000) == 0xf000c000; }
131 
132 static bool isBL(uint32_t instr) { return (instr & 0xf800d000) == 0xf000d000; }
133 
134 static bool is32bitBranch(uint32_t instr) {
135   return isBcc(instr) || isB(instr) || isBL(instr) || isBLX(instr);
136 }
137 
138 Patch657417Section::Patch657417Section(InputSection *p, uint64_t off,
139                                        uint32_t instr, bool isARM)
140     : SyntheticSection(SHF_ALLOC | SHF_EXECINSTR, SHT_PROGBITS, 4,
141                        ".text.patch"),
142       patchee(p), patcheeOffset(off), instr(instr), isARM(isARM) {
143   parent = p->getParent();
144   patchSym = addSyntheticLocal(
145       saver.save("__CortexA8657417_" + utohexstr(getBranchAddr())), STT_FUNC,
146       isARM ? 0 : 1, getSize(), *this);
147   addSyntheticLocal(saver.save(isARM ? "$a" : "$t"), STT_NOTYPE, 0, 0, *this);
148 }
149 
150 uint64_t Patch657417Section::getBranchAddr() const {
151   return patchee->getVA(patcheeOffset);
152 }
153 
154 // Given a branch instruction instr at sourceAddr work out its destination
155 // address. This is only used when the branch instruction has no relocation.
156 static uint64_t getThumbDestAddr(uint64_t sourceAddr, uint32_t instr) {
157   uint8_t buf[4];
158   write16le(buf, instr >> 16);
159   write16le(buf + 2, instr & 0x0000ffff);
160   int64_t offset;
161   if (isBcc(instr))
162     offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP19);
163   else if (isB(instr))
164     offset = target->getImplicitAddend(buf, R_ARM_THM_JUMP24);
165   else
166     offset = target->getImplicitAddend(buf, R_ARM_THM_CALL);
167   return sourceAddr + offset + 4;
168 }
169 
170 void Patch657417Section::writeTo(uint8_t *buf) {
171   // The base instruction of the patch is always a 32-bit unconditional branch.
172   if (isARM)
173     write32le(buf, 0xea000000);
174   else
175     write32le(buf, 0x9000f000);
176   // If we have a relocation then apply it. For a SyntheticSection buf already
177   // has outSecOff added, but relocateAlloc also adds outSecOff so we need to
178   // subtract to avoid double counting.
179   if (!relocations.empty()) {
180     relocateAlloc(buf - outSecOff, buf - outSecOff + getSize());
181     return;
182   }
183 
184   // If we don't have a relocation then we must calculate and write the offset
185   // ourselves.
186   // Get the destination offset from the addend in the branch instruction.
187   // We cannot use the instruction in the patchee section as this will have
188   // been altered to point to us!
189   uint64_t s = getThumbDestAddr(getBranchAddr(), instr);
190   uint64_t p = getVA(4);
191   target->relocateNoSym(buf, isARM ? R_ARM_JUMP24 : R_ARM_THM_JUMP24, s - p);
192 }
193 
194 // Given a branch instruction spanning two 4KiB regions, at offset off from the
195 // start of isec, return true if the destination of the branch is within the
196 // first of the two 4Kib regions.
197 static bool branchDestInFirstRegion(const InputSection *isec, uint64_t off,
198                                     uint32_t instr, const Relocation *r) {
199   uint64_t sourceAddr = isec->getVA(0) + off;
200   assert((sourceAddr & 0xfff) == 0xffe);
201   uint64_t destAddr = sourceAddr;
202   // If there is a branch relocation at the same offset we must use this to
203   // find the destination address as the branch could be indirected via a thunk
204   // or the PLT.
205   if (r) {
206     uint64_t dst = (r->expr == R_PLT_PC) ? r->sym->getPltVA() : r->sym->getVA();
207     // Account for Thumb PC bias, usually cancelled to 0 by addend of -4.
208     destAddr = dst + r->addend + 4;
209   } else {
210     // If there is no relocation, we must have an intra-section branch
211     // We must extract the offset from the addend manually.
212     destAddr = getThumbDestAddr(sourceAddr, instr);
213   }
214 
215   return (destAddr & 0xfffff000) == (sourceAddr & 0xfffff000);
216 }
217 
218 // Return true if a branch can reach a patch section placed after isec.
219 // The Bcc.w instruction has a range of 1 MiB, all others have 16 MiB.
220 static bool patchInRange(const InputSection *isec, uint64_t off,
221                          uint32_t instr) {
222 
223   // We need the branch at source to reach a patch section placed immediately
224   // after isec. As there can be more than one patch in the patch section we
225   // add 0x100 as contingency to account for worst case of 1 branch every 4KiB
226   // for a 1 MiB range.
227   return target->inBranchRange(
228       isBcc(instr) ? R_ARM_THM_JUMP19 : R_ARM_THM_JUMP24, isec->getVA(off),
229       isec->getVA() + isec->getSize() + 0x100);
230 }
231 
232 struct ScanResult {
233   // Offset of branch within its InputSection.
234   uint64_t off;
235   // Cached decoding of the branch instruction.
236   uint32_t instr;
237   // Branch relocation at off. Will be nullptr if no relocation exists.
238   Relocation *rel;
239 };
240 
241 // Detect the erratum sequence, returning the offset of the branch instruction
242 // and a decoding of the branch. If the erratum sequence is not found then
243 // return an offset of 0 for the branch. 0 is a safe value to use for no patch
244 // as there must be at least one 32-bit non-branch instruction before the
245 // branch so the minimum offset for a patch is 4.
246 static ScanResult scanCortexA8Errata657417(InputSection *isec, uint64_t &off,
247                                            uint64_t limit) {
248   uint64_t isecAddr = isec->getVA(0);
249   // Advance Off so that (isecAddr + off) modulo 0x1000 is at least 0xffa. We
250   // need to check for a 32-bit instruction immediately before a 32-bit branch
251   // at 0xffe modulo 0x1000.
252   off = alignTo(isecAddr + off, 0x1000, 0xffa) - isecAddr;
253   if (off >= limit || limit - off < 8) {
254     // Need at least 2 4-byte sized instructions to trigger erratum.
255     off = limit;
256     return {0, 0, nullptr};
257   }
258 
259   ScanResult scanRes = {0, 0, nullptr};
260   const uint8_t *buf = isec->data().begin();
261   // ARMv7-A Thumb 32-bit instructions are encoded 2 consecutive
262   // little-endian halfwords.
263   const ulittle16_t *instBuf = reinterpret_cast<const ulittle16_t *>(buf + off);
264   uint16_t hw11 = *instBuf++;
265   uint16_t hw12 = *instBuf++;
266   uint16_t hw21 = *instBuf++;
267   uint16_t hw22 = *instBuf++;
268   if (is32bitInstruction(hw11) && is32bitInstruction(hw21)) {
269     uint32_t instr1 = (hw11 << 16) | hw12;
270     uint32_t instr2 = (hw21 << 16) | hw22;
271     if (!is32bitBranch(instr1) && is32bitBranch(instr2)) {
272       // Find a relocation for the branch if it exists. This will be used
273       // to determine the target.
274       uint64_t branchOff = off + 4;
275       auto relIt = llvm::find_if(isec->relocations, [=](const Relocation &r) {
276         return r.offset == branchOff &&
277                (r.type == R_ARM_THM_JUMP19 || r.type == R_ARM_THM_JUMP24 ||
278                 r.type == R_ARM_THM_CALL);
279       });
280       if (relIt != isec->relocations.end())
281         scanRes.rel = &(*relIt);
282       if (branchDestInFirstRegion(isec, branchOff, instr2, scanRes.rel)) {
283         if (patchInRange(isec, branchOff, instr2)) {
284           scanRes.off = branchOff;
285           scanRes.instr = instr2;
286         } else {
287           warn(toString(isec->file) +
288                ": skipping cortex-a8 657417 erratum sequence, section " +
289                isec->name + " is too large to patch");
290         }
291       }
292     }
293   }
294   off += 0x1000;
295   return scanRes;
296 }
297 
298 void ARMErr657417Patcher::init() {
299   // The Arm ABI permits a mix of ARM, Thumb and Data in the same
300   // InputSection. We must only scan Thumb instructions to avoid false
301   // matches. We use the mapping symbols in the InputObjects to identify this
302   // data, caching the results in sectionMap so we don't have to recalculate
303   // it each pass.
304 
305   // The ABI Section 4.5.5 Mapping symbols; defines local symbols that describe
306   // half open intervals [Symbol Value, Next Symbol Value) of code and data
307   // within sections. If there is no next symbol then the half open interval is
308   // [Symbol Value, End of section). The type, code or data, is determined by
309   // the mapping symbol name, $a for Arm code, $t for Thumb code, $d for data.
310   auto isArmMapSymbol = [](const Symbol *s) {
311     return s->getName() == "$a" || s->getName().startswith("$a.");
312   };
313   auto isThumbMapSymbol = [](const Symbol *s) {
314     return s->getName() == "$t" || s->getName().startswith("$t.");
315   };
316   auto isDataMapSymbol = [](const Symbol *s) {
317     return s->getName() == "$d" || s->getName().startswith("$d.");
318   };
319 
320   // Collect mapping symbols for every executable InputSection.
321   for (InputFile *file : objectFiles) {
322     auto *f = cast<ObjFile<ELF32LE>>(file);
323     for (Symbol *s : f->getLocalSymbols()) {
324       auto *def = dyn_cast<Defined>(s);
325       if (!def)
326         continue;
327       if (!isArmMapSymbol(def) && !isThumbMapSymbol(def) &&
328           !isDataMapSymbol(def))
329         continue;
330       if (auto *sec = dyn_cast_or_null<InputSection>(def->section))
331         if (sec->flags & SHF_EXECINSTR)
332           sectionMap[sec].push_back(def);
333     }
334   }
335   // For each InputSection make sure the mapping symbols are in sorted in
336   // ascending order and are in alternating Thumb, non-Thumb order.
337   for (auto &kv : sectionMap) {
338     std::vector<const Defined *> &mapSyms = kv.second;
339     llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
340       return a->value < b->value;
341     });
342     mapSyms.erase(std::unique(mapSyms.begin(), mapSyms.end(),
343                               [=](const Defined *a, const Defined *b) {
344                                 return (isThumbMapSymbol(a) ==
345                                         isThumbMapSymbol(b));
346                               }),
347                   mapSyms.end());
348     // Always start with a Thumb Mapping Symbol
349     if (!mapSyms.empty() && !isThumbMapSymbol(mapSyms.front()))
350       mapSyms.erase(mapSyms.begin());
351   }
352   initialized = true;
353 }
354 
355 void ARMErr657417Patcher::insertPatches(
356     InputSectionDescription &isd, std::vector<Patch657417Section *> &patches) {
357   uint64_t spacing = 0x100000 - 0x7500;
358   uint64_t isecLimit;
359   uint64_t prevIsecLimit = isd.sections.front()->outSecOff;
360   uint64_t patchUpperBound = prevIsecLimit + spacing;
361   uint64_t outSecAddr = isd.sections.front()->getParent()->addr;
362 
363   // Set the outSecOff of patches to the place where we want to insert them.
364   // We use a similar strategy to initial thunk placement, using 1 MiB as the
365   // range of the Thumb-2 conditional branch with a contingency accounting for
366   // thunk generation.
367   auto patchIt = patches.begin();
368   auto patchEnd = patches.end();
369   for (const InputSection *isec : isd.sections) {
370     isecLimit = isec->outSecOff + isec->getSize();
371     if (isecLimit > patchUpperBound) {
372       for (; patchIt != patchEnd; ++patchIt) {
373         if ((*patchIt)->getBranchAddr() - outSecAddr >= prevIsecLimit)
374           break;
375         (*patchIt)->outSecOff = prevIsecLimit;
376       }
377       patchUpperBound = prevIsecLimit + spacing;
378     }
379     prevIsecLimit = isecLimit;
380   }
381   for (; patchIt != patchEnd; ++patchIt)
382     (*patchIt)->outSecOff = isecLimit;
383 
384   // Merge all patch sections. We use the outSecOff assigned above to
385   // determine the insertion point. This is ok as we only merge into an
386   // InputSectionDescription once per pass, and at the end of the pass
387   // assignAddresses() will recalculate all the outSecOff values.
388   std::vector<InputSection *> tmp;
389   tmp.reserve(isd.sections.size() + patches.size());
390   auto mergeCmp = [](const InputSection *a, const InputSection *b) {
391     if (a->outSecOff != b->outSecOff)
392       return a->outSecOff < b->outSecOff;
393     return isa<Patch657417Section>(a) && !isa<Patch657417Section>(b);
394   };
395   std::merge(isd.sections.begin(), isd.sections.end(), patches.begin(),
396              patches.end(), std::back_inserter(tmp), mergeCmp);
397   isd.sections = std::move(tmp);
398 }
399 
400 // Given a branch instruction described by ScanRes redirect it to a patch
401 // section containing an unconditional branch instruction to the target.
402 // Ensure that this patch section is 4-byte aligned so that the branch cannot
403 // span two 4 KiB regions. Place the patch section so that it is always after
404 // isec so the branch we are patching always goes forwards.
405 static void implementPatch(ScanResult sr, InputSection *isec,
406                            std::vector<Patch657417Section *> &patches) {
407 
408   log("detected cortex-a8-657419 erratum sequence starting at " +
409       utohexstr(isec->getVA(sr.off)) + " in unpatched output.");
410   Patch657417Section *psec;
411   // We have two cases to deal with.
412   // Case 1. There is a relocation at patcheeOffset to a symbol. The
413   // unconditional branch in the patch must have a relocation so that any
414   // further redirection via the PLT or a Thunk happens as normal. At
415   // patcheeOffset we redirect the existing relocation to a Symbol defined at
416   // the start of the patch section.
417   //
418   // Case 2. There is no relocation at patcheeOffset. We are unlikely to have
419   // a symbol that we can use as a target for a relocation in the patch section.
420   // Luckily we know that the destination cannot be indirected via the PLT or
421   // a Thunk so we can just write the destination directly.
422   if (sr.rel) {
423     // Case 1. We have an existing relocation to redirect to patch and a
424     // Symbol target.
425 
426     // Create a branch relocation for the unconditional branch in the patch.
427     // This can be redirected via the PLT or Thunks.
428     RelType patchRelType = R_ARM_THM_JUMP24;
429     int64_t patchRelAddend = sr.rel->addend;
430     bool destIsARM = false;
431     if (isBL(sr.instr) || isBLX(sr.instr)) {
432       // The final target of the branch may be ARM or Thumb, if the target
433       // is ARM then we write the patch in ARM state to avoid a state change
434       // Thunk from the patch to the target.
435       uint64_t dstSymAddr = (sr.rel->expr == R_PLT_PC) ? sr.rel->sym->getPltVA()
436                                                        : sr.rel->sym->getVA();
437       destIsARM = (dstSymAddr & 1) == 0;
438     }
439     psec = make<Patch657417Section>(isec, sr.off, sr.instr, destIsARM);
440     if (destIsARM) {
441       // The patch will be in ARM state. Use an ARM relocation and account for
442       // the larger ARM PC-bias of 8 rather than Thumb's 4.
443       patchRelType = R_ARM_JUMP24;
444       patchRelAddend -= 4;
445     }
446     psec->relocations.push_back(
447         Relocation{sr.rel->expr, patchRelType, 0, patchRelAddend, sr.rel->sym});
448     // Redirect the existing branch relocation to the patch.
449     sr.rel->expr = R_PC;
450     sr.rel->addend = -4;
451     sr.rel->sym = psec->patchSym;
452   } else {
453     // Case 2. We do not have a relocation to the patch. Add a relocation of the
454     // appropriate type to the patch at patcheeOffset.
455 
456     // The destination is ARM if we have a BLX.
457     psec = make<Patch657417Section>(isec, sr.off, sr.instr, isBLX(sr.instr));
458     RelType type;
459     if (isBcc(sr.instr))
460       type = R_ARM_THM_JUMP19;
461     else if (isB(sr.instr))
462       type = R_ARM_THM_JUMP24;
463     else
464       type = R_ARM_THM_CALL;
465     isec->relocations.push_back(
466         Relocation{R_PC, type, sr.off, -4, psec->patchSym});
467   }
468   patches.push_back(psec);
469 }
470 
471 // Scan all the instructions in InputSectionDescription, for each instance of
472 // the erratum sequence create a Patch657417Section. We return the list of
473 // Patch657417Sections that need to be applied to the InputSectionDescription.
474 std::vector<Patch657417Section *>
475 ARMErr657417Patcher::patchInputSectionDescription(
476     InputSectionDescription &isd) {
477   std::vector<Patch657417Section *> patches;
478   for (InputSection *isec : isd.sections) {
479     // LLD doesn't use the erratum sequence in SyntheticSections.
480     if (isa<SyntheticSection>(isec))
481       continue;
482     // Use sectionMap to make sure we only scan Thumb code and not Arm or inline
483     // data. We have already sorted mapSyms in ascending order and removed
484     // consecutive mapping symbols of the same type. Our range of executable
485     // instructions to scan is therefore [thumbSym->value, nonThumbSym->value)
486     // or [thumbSym->value, section size).
487     std::vector<const Defined *> &mapSyms = sectionMap[isec];
488 
489     auto thumbSym = mapSyms.begin();
490     while (thumbSym != mapSyms.end()) {
491       auto nonThumbSym = std::next(thumbSym);
492       uint64_t off = (*thumbSym)->value;
493       uint64_t limit = (nonThumbSym == mapSyms.end()) ? isec->data().size()
494                                                       : (*nonThumbSym)->value;
495 
496       while (off < limit) {
497         ScanResult sr = scanCortexA8Errata657417(isec, off, limit);
498         if (sr.off)
499           implementPatch(sr, isec, patches);
500       }
501       if (nonThumbSym == mapSyms.end())
502         break;
503       thumbSym = std::next(nonThumbSym);
504     }
505   }
506   return patches;
507 }
508 
509 bool ARMErr657417Patcher::createFixes() {
510   if (!initialized)
511     init();
512 
513   bool addressesChanged = false;
514   for (OutputSection *os : outputSections) {
515     if (!(os->flags & SHF_ALLOC) || !(os->flags & SHF_EXECINSTR))
516       continue;
517     for (BaseCommand *bc : os->sectionCommands)
518       if (auto *isd = dyn_cast<InputSectionDescription>(bc)) {
519         std::vector<Patch657417Section *> patches =
520             patchInputSectionDescription(*isd);
521         if (!patches.empty()) {
522           insertPatches(*isd, patches);
523           addressesChanged = true;
524         }
525       }
526   }
527   return addressesChanged;
528 }
529