1 //===- AArch64.cpp --------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "OutputSections.h" 11 #include "Symbols.h" 12 #include "SyntheticSections.h" 13 #include "Target.h" 14 #include "TargetImpl.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/Support/Endian.h" 17 18 using namespace llvm; 19 using namespace llvm::support::endian; 20 using namespace llvm::ELF; 21 using namespace lld; 22 using namespace lld::elf; 23 24 // Page(Expr) is the page address of the expression Expr, defined 25 // as (Expr & ~0xFFF). (This applies even if the machine page size 26 // supported by the platform has a different value.) 27 uint64_t elf::getAArch64Page(uint64_t expr) { 28 return expr & ~static_cast<uint64_t>(0xFFF); 29 } 30 31 // A BTI landing pad is a valid target for an indirect branch when the Branch 32 // Target Identification has been enabled. As linker generated branches are 33 // via x16 the BTI landing pads are defined as: BTI C, BTI J, BTI JC, PACIASP, 34 // PACIBSP. 35 bool elf::isAArch64BTILandingPad(Ctx &ctx, Symbol &s, int64_t a) { 36 // PLT entries accessed indirectly have a BTI c. 37 if (s.isInPlt(ctx)) 38 return true; 39 Defined *d = dyn_cast<Defined>(&s); 40 if (!isa_and_nonnull<InputSection>(d->section)) 41 // All places that we cannot disassemble are responsible for making 42 // the target a BTI landing pad. 43 return true; 44 InputSection *isec = cast<InputSection>(d->section); 45 uint64_t off = d->value + a; 46 // Likely user error, but protect ourselves against out of bounds 47 // access. 48 if (off >= isec->getSize()) 49 return true; 50 const uint8_t *buf = isec->content().begin(); 51 const uint32_t instr = read32le(buf + off); 52 // All BTI instructions are HINT instructions which all have same encoding 53 // apart from bits [11:5] 54 if ((instr & 0xd503201f) == 0xd503201f && 55 is_contained({/*PACIASP*/ 0xd503233f, /*PACIBSP*/ 0xd503237f, 56 /*BTI C*/ 0xd503245f, /*BTI J*/ 0xd503249f, 57 /*BTI JC*/ 0xd50324df}, 58 instr)) 59 return true; 60 return false; 61 } 62 63 namespace { 64 class AArch64 : public TargetInfo { 65 public: 66 AArch64(Ctx &); 67 RelExpr getRelExpr(RelType type, const Symbol &s, 68 const uint8_t *loc) const override; 69 RelType getDynRel(RelType type) const override; 70 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 71 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 72 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 73 void writePltHeader(uint8_t *buf) const override; 74 void writePlt(uint8_t *buf, const Symbol &sym, 75 uint64_t pltEntryAddr) const override; 76 bool needsThunk(RelExpr expr, RelType type, const InputFile *file, 77 uint64_t branchAddr, const Symbol &s, 78 int64_t a) const override; 79 uint32_t getThunkSectionSpacing() const override; 80 bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override; 81 bool usesOnlyLowPageBits(RelType type) const override; 82 void relocate(uint8_t *loc, const Relocation &rel, 83 uint64_t val) const override; 84 RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override; 85 void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override; 86 void applyBranchToBranchOpt() const override; 87 88 private: 89 void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 90 void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 91 void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const; 92 }; 93 94 struct AArch64Relaxer { 95 Ctx &ctx; 96 bool safeToRelaxAdrpLdr = false; 97 98 AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs); 99 bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel, 100 uint64_t secAddr, uint8_t *buf) const; 101 bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel, 102 uint64_t secAddr, uint8_t *buf) const; 103 }; 104 } // namespace 105 106 // Return the bits [Start, End] from Val shifted Start bits. 107 // For instance, getBits(0xF0, 4, 8) returns 0xF. 108 static uint64_t getBits(uint64_t val, int start, int end) { 109 uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1; 110 return (val >> start) & mask; 111 } 112 113 AArch64::AArch64(Ctx &ctx) : TargetInfo(ctx) { 114 copyRel = R_AARCH64_COPY; 115 relativeRel = R_AARCH64_RELATIVE; 116 iRelativeRel = R_AARCH64_IRELATIVE; 117 gotRel = R_AARCH64_GLOB_DAT; 118 pltRel = R_AARCH64_JUMP_SLOT; 119 symbolicRel = R_AARCH64_ABS64; 120 tlsDescRel = R_AARCH64_TLSDESC; 121 tlsGotRel = R_AARCH64_TLS_TPREL64; 122 pltHeaderSize = 32; 123 pltEntrySize = 16; 124 ipltEntrySize = 16; 125 defaultMaxPageSize = 65536; 126 127 // Align to the 2 MiB page size (known as a superpage or huge page). 128 // FreeBSD automatically promotes 2 MiB-aligned allocations. 129 defaultImageBase = 0x200000; 130 131 needsThunks = true; 132 } 133 134 RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, 135 const uint8_t *loc) const { 136 switch (type) { 137 case R_AARCH64_ABS16: 138 case R_AARCH64_ABS32: 139 case R_AARCH64_ABS64: 140 case R_AARCH64_ADD_ABS_LO12_NC: 141 case R_AARCH64_LDST128_ABS_LO12_NC: 142 case R_AARCH64_LDST16_ABS_LO12_NC: 143 case R_AARCH64_LDST32_ABS_LO12_NC: 144 case R_AARCH64_LDST64_ABS_LO12_NC: 145 case R_AARCH64_LDST8_ABS_LO12_NC: 146 case R_AARCH64_MOVW_SABS_G0: 147 case R_AARCH64_MOVW_SABS_G1: 148 case R_AARCH64_MOVW_SABS_G2: 149 case R_AARCH64_MOVW_UABS_G0: 150 case R_AARCH64_MOVW_UABS_G0_NC: 151 case R_AARCH64_MOVW_UABS_G1: 152 case R_AARCH64_MOVW_UABS_G1_NC: 153 case R_AARCH64_MOVW_UABS_G2: 154 case R_AARCH64_MOVW_UABS_G2_NC: 155 case R_AARCH64_MOVW_UABS_G3: 156 return R_ABS; 157 case R_AARCH64_AUTH_ABS64: 158 return RE_AARCH64_AUTH; 159 case R_AARCH64_TLSDESC_ADR_PAGE21: 160 return RE_AARCH64_TLSDESC_PAGE; 161 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: 162 return RE_AARCH64_AUTH_TLSDESC_PAGE; 163 case R_AARCH64_TLSDESC_LD64_LO12: 164 case R_AARCH64_TLSDESC_ADD_LO12: 165 return R_TLSDESC; 166 case R_AARCH64_AUTH_TLSDESC_LD64_LO12: 167 case R_AARCH64_AUTH_TLSDESC_ADD_LO12: 168 return RE_AARCH64_AUTH_TLSDESC; 169 case R_AARCH64_TLSDESC_CALL: 170 return R_TLSDESC_CALL; 171 case R_AARCH64_TLSLE_ADD_TPREL_HI12: 172 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: 173 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: 174 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: 175 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: 176 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: 177 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: 178 case R_AARCH64_TLSLE_MOVW_TPREL_G0: 179 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: 180 case R_AARCH64_TLSLE_MOVW_TPREL_G1: 181 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: 182 case R_AARCH64_TLSLE_MOVW_TPREL_G2: 183 return R_TPREL; 184 case R_AARCH64_CALL26: 185 case R_AARCH64_CONDBR19: 186 case R_AARCH64_JUMP26: 187 case R_AARCH64_TSTBR14: 188 return R_PLT_PC; 189 case R_AARCH64_PLT32: 190 const_cast<Symbol &>(s).thunkAccessed = true; 191 return R_PLT_PC; 192 case R_AARCH64_PREL16: 193 case R_AARCH64_PREL32: 194 case R_AARCH64_PREL64: 195 case R_AARCH64_ADR_PREL_LO21: 196 case R_AARCH64_LD_PREL_LO19: 197 case R_AARCH64_MOVW_PREL_G0: 198 case R_AARCH64_MOVW_PREL_G0_NC: 199 case R_AARCH64_MOVW_PREL_G1: 200 case R_AARCH64_MOVW_PREL_G1_NC: 201 case R_AARCH64_MOVW_PREL_G2: 202 case R_AARCH64_MOVW_PREL_G2_NC: 203 case R_AARCH64_MOVW_PREL_G3: 204 return R_PC; 205 case R_AARCH64_ADR_PREL_PG_HI21: 206 case R_AARCH64_ADR_PREL_PG_HI21_NC: 207 return RE_AARCH64_PAGE_PC; 208 case R_AARCH64_LD64_GOT_LO12_NC: 209 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 210 return R_GOT; 211 case R_AARCH64_AUTH_LD64_GOT_LO12_NC: 212 case R_AARCH64_AUTH_GOT_ADD_LO12_NC: 213 return RE_AARCH64_AUTH_GOT; 214 case R_AARCH64_AUTH_GOT_LD_PREL19: 215 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: 216 return RE_AARCH64_AUTH_GOT_PC; 217 case R_AARCH64_LD64_GOTPAGE_LO15: 218 return RE_AARCH64_GOT_PAGE; 219 case R_AARCH64_ADR_GOT_PAGE: 220 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: 221 return RE_AARCH64_GOT_PAGE_PC; 222 case R_AARCH64_AUTH_ADR_GOT_PAGE: 223 return RE_AARCH64_AUTH_GOT_PAGE_PC; 224 case R_AARCH64_GOTPCREL32: 225 case R_AARCH64_GOT_LD_PREL19: 226 return R_GOT_PC; 227 case R_AARCH64_NONE: 228 return R_NONE; 229 default: 230 Err(ctx) << getErrorLoc(ctx, loc) << "unknown relocation (" << type.v 231 << ") against symbol " << &s; 232 return R_NONE; 233 } 234 } 235 236 RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const { 237 if (expr == R_RELAX_TLS_GD_TO_IE) { 238 if (type == R_AARCH64_TLSDESC_ADR_PAGE21) 239 return RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC; 240 return R_RELAX_TLS_GD_TO_IE_ABS; 241 } 242 return expr; 243 } 244 245 bool AArch64::usesOnlyLowPageBits(RelType type) const { 246 switch (type) { 247 default: 248 return false; 249 case R_AARCH64_ADD_ABS_LO12_NC: 250 case R_AARCH64_LD64_GOT_LO12_NC: 251 case R_AARCH64_LDST128_ABS_LO12_NC: 252 case R_AARCH64_LDST16_ABS_LO12_NC: 253 case R_AARCH64_LDST32_ABS_LO12_NC: 254 case R_AARCH64_LDST64_ABS_LO12_NC: 255 case R_AARCH64_LDST8_ABS_LO12_NC: 256 case R_AARCH64_TLSDESC_ADD_LO12: 257 case R_AARCH64_TLSDESC_LD64_LO12: 258 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 259 return true; 260 } 261 } 262 263 RelType AArch64::getDynRel(RelType type) const { 264 if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64) 265 return type; 266 return R_AARCH64_NONE; 267 } 268 269 int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const { 270 switch (type) { 271 case R_AARCH64_TLSDESC: 272 return read64(ctx, buf + 8); 273 case R_AARCH64_NONE: 274 case R_AARCH64_GLOB_DAT: 275 case R_AARCH64_AUTH_GLOB_DAT: 276 case R_AARCH64_JUMP_SLOT: 277 return 0; 278 case R_AARCH64_ABS16: 279 case R_AARCH64_PREL16: 280 return SignExtend64<16>(read16(ctx, buf)); 281 case R_AARCH64_ABS32: 282 case R_AARCH64_PREL32: 283 return SignExtend64<32>(read32(ctx, buf)); 284 case R_AARCH64_ABS64: 285 case R_AARCH64_PREL64: 286 case R_AARCH64_RELATIVE: 287 case R_AARCH64_IRELATIVE: 288 case R_AARCH64_TLS_TPREL64: 289 return read64(ctx, buf); 290 291 // The following relocation types all point at instructions, and 292 // relocate an immediate field in the instruction. 293 // 294 // The general rule, from AAELF64 §5.7.2 "Addends and PC-bias", 295 // says: "If the relocation relocates an instruction the immediate 296 // field of the instruction is extracted, scaled as required by 297 // the instruction field encoding, and sign-extended to 64 bits". 298 299 // The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ 300 // instructions, which have a 16-bit immediate field with its low 301 // bit in bit 5 of the instruction encoding. When the immediate 302 // field is used as an implicit addend for REL-type relocations, 303 // it is treated as added to the low bits of the output value, not 304 // shifted depending on the relocation type. 305 // 306 // This allows REL relocations to express the requirement 'please 307 // add 12345 to this symbol value and give me the four 16-bit 308 // chunks of the result', by putting the same addend 12345 in all 309 // four instructions. Carries between the 16-bit chunks are 310 // handled correctly, because the whole 64-bit addition is done 311 // once per relocation. 312 case R_AARCH64_MOVW_UABS_G0: 313 case R_AARCH64_MOVW_UABS_G0_NC: 314 case R_AARCH64_MOVW_UABS_G1: 315 case R_AARCH64_MOVW_UABS_G1_NC: 316 case R_AARCH64_MOVW_UABS_G2: 317 case R_AARCH64_MOVW_UABS_G2_NC: 318 case R_AARCH64_MOVW_UABS_G3: 319 return SignExtend64<16>(getBits(read32le(buf), 5, 20)); 320 321 // R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which 322 // has a 14-bit offset measured in instructions, i.e. shifted left 323 // by 2. 324 case R_AARCH64_TSTBR14: 325 return SignExtend64<16>(getBits(read32le(buf), 5, 18) << 2); 326 327 // R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction, 328 // which has a 19-bit offset measured in instructions. 329 // 330 // R_AARCH64_LD_PREL_LO19 operates on the LDR (literal) 331 // instruction, which also has a 19-bit offset, measured in 4-byte 332 // chunks. So the calculation is the same as for 333 // R_AARCH64_CONDBR19. 334 case R_AARCH64_CONDBR19: 335 case R_AARCH64_LD_PREL_LO19: 336 return SignExtend64<21>(getBits(read32le(buf), 5, 23) << 2); 337 338 // R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The 339 // immediate can optionally be shifted left by 12 bits, but this 340 // relocation is intended for the case where it is not. 341 case R_AARCH64_ADD_ABS_LO12_NC: 342 return SignExtend64<12>(getBits(read32le(buf), 10, 21)); 343 344 // R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose 345 // 21-bit immediate is split between two bits high up in the word 346 // (in fact the two _lowest_ order bits of the value) and 19 bits 347 // lower down. 348 // 349 // R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction, 350 // which encodes the immediate in the same way, but will shift it 351 // left by 12 bits when the instruction executes. For the same 352 // reason as the MOVW family, we don't apply that left shift here. 353 case R_AARCH64_ADR_PREL_LO21: 354 case R_AARCH64_ADR_PREL_PG_HI21: 355 case R_AARCH64_ADR_PREL_PG_HI21_NC: 356 return SignExtend64<21>((getBits(read32le(buf), 5, 23) << 2) | 357 getBits(read32le(buf), 29, 30)); 358 359 // R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a 360 // 26-bit offset measured in instructions. 361 case R_AARCH64_JUMP26: 362 case R_AARCH64_CALL26: 363 return SignExtend64<28>(getBits(read32le(buf), 0, 25) << 2); 364 365 default: 366 InternalErr(ctx, buf) << "cannot read addend for relocation " << type; 367 return 0; 368 } 369 } 370 371 void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const { 372 write64(ctx, buf, ctx.in.plt->getVA()); 373 } 374 375 void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 376 if (ctx.arg.writeAddends) 377 write64(ctx, buf, s.getVA(ctx)); 378 } 379 380 void AArch64::writePltHeader(uint8_t *buf) const { 381 const uint8_t pltData[] = { 382 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! 383 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2])) 384 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))] 385 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2])) 386 0x20, 0x02, 0x1f, 0xd6, // br x17 387 0x1f, 0x20, 0x03, 0xd5, // nop 388 0x1f, 0x20, 0x03, 0xd5, // nop 389 0x1f, 0x20, 0x03, 0xd5 // nop 390 }; 391 memcpy(buf, pltData, sizeof(pltData)); 392 393 uint64_t got = ctx.in.gotPlt->getVA(); 394 uint64_t plt = ctx.in.plt->getVA(); 395 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, 396 getAArch64Page(got + 16) - getAArch64Page(plt + 4)); 397 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); 398 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); 399 } 400 401 void AArch64::writePlt(uint8_t *buf, const Symbol &sym, 402 uint64_t pltEntryAddr) const { 403 const uint8_t inst[] = { 404 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n])) 405 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))] 406 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n])) 407 0x20, 0x02, 0x1f, 0xd6 // br x17 408 }; 409 memcpy(buf, inst, sizeof(inst)); 410 411 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx); 412 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, 413 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); 414 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); 415 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); 416 } 417 418 bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file, 419 uint64_t branchAddr, const Symbol &s, 420 int64_t a) const { 421 // If s is an undefined weak symbol and does not have a PLT entry then it will 422 // be resolved as a branch to the next instruction. If it is hidden, its 423 // binding has been converted to local, so we just check isUndefined() here. A 424 // undefined non-weak symbol will have been errored. 425 if (s.isUndefined() && !s.isInPlt(ctx)) 426 return false; 427 // ELF for the ARM 64-bit architecture, section Call and Jump relocations 428 // only permits range extension thunks for R_AARCH64_CALL26 and 429 // R_AARCH64_JUMP26 relocation types. 430 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && 431 type != R_AARCH64_PLT32) 432 return false; 433 uint64_t dst = expr == R_PLT_PC ? s.getPltVA(ctx) : s.getVA(ctx, a); 434 return !inBranchRange(type, branchAddr, dst); 435 } 436 437 uint32_t AArch64::getThunkSectionSpacing() const { 438 // See comment in Arch/ARM.cpp for a more detailed explanation of 439 // getThunkSectionSpacing(). For AArch64 the only branches we are permitted to 440 // Thunk have a range of +/- 128 MiB 441 return (128 * 1024 * 1024) - 0x30000; 442 } 443 444 bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const { 445 if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 && 446 type != R_AARCH64_PLT32) 447 return true; 448 // The AArch64 call and unconditional branch instructions have a range of 449 // +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB. 450 uint64_t range = 451 type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024); 452 if (dst > src) { 453 // Immediate of branch is signed. 454 range -= 4; 455 return dst - src <= range; 456 } 457 return src - dst <= range; 458 } 459 460 static void write32AArch64Addr(uint8_t *l, uint64_t imm) { 461 uint32_t immLo = (imm & 0x3) << 29; 462 uint32_t immHi = (imm & 0x1FFFFC) << 3; 463 uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3); 464 write32le(l, (read32le(l) & ~mask) | immLo | immHi); 465 } 466 467 static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) { 468 write32le(p, (read32le(p) & ~mask) | v); 469 } 470 471 // Update the immediate field in a AARCH64 ldr, str, and add instruction. 472 static void write32Imm12(uint8_t *l, uint64_t imm) { 473 writeMaskedBits32le(l, (imm & 0xFFF) << 10, 0xFFF << 10); 474 } 475 476 // Update the immediate field in an AArch64 movk, movn or movz instruction 477 // for a signed relocation, and update the opcode of a movn or movz instruction 478 // to match the sign of the operand. 479 static void writeSMovWImm(uint8_t *loc, uint32_t imm) { 480 uint32_t inst = read32le(loc); 481 // Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk. 482 if (!(inst & (1 << 29))) { 483 // movn or movz. 484 if (imm & 0x10000) { 485 // Change opcode to movn, which takes an inverted operand. 486 imm ^= 0xFFFF; 487 inst &= ~(1 << 30); 488 } else { 489 // Change opcode to movz. 490 inst |= 1 << 30; 491 } 492 } 493 write32le(loc, inst | ((imm & 0xFFFF) << 5)); 494 } 495 496 void AArch64::relocate(uint8_t *loc, const Relocation &rel, 497 uint64_t val) const { 498 switch (rel.type) { 499 case R_AARCH64_ABS16: 500 case R_AARCH64_PREL16: 501 checkIntUInt(ctx, loc, val, 16, rel); 502 write16(ctx, loc, val); 503 break; 504 case R_AARCH64_ABS32: 505 case R_AARCH64_PREL32: 506 checkIntUInt(ctx, loc, val, 32, rel); 507 write32(ctx, loc, val); 508 break; 509 case R_AARCH64_PLT32: 510 case R_AARCH64_GOTPCREL32: 511 checkInt(ctx, loc, val, 32, rel); 512 write32(ctx, loc, val); 513 break; 514 case R_AARCH64_ABS64: 515 // AArch64 relocations to tagged symbols have extended semantics, as 516 // described here: 517 // https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative. 518 // tl;dr: encode the symbol's special addend in the place, which is an 519 // offset to the point where the logical tag is derived from. Quick hack, if 520 // the addend is within the symbol's bounds, no need to encode the tag 521 // derivation offset. 522 if (rel.sym && rel.sym->isTagged() && 523 (rel.addend < 0 || 524 rel.addend >= static_cast<int64_t>(rel.sym->getSize()))) 525 write64(ctx, loc, -rel.addend); 526 else 527 write64(ctx, loc, val); 528 break; 529 case R_AARCH64_PREL64: 530 write64(ctx, loc, val); 531 break; 532 case R_AARCH64_AUTH_ABS64: 533 // If val is wider than 32 bits, the relocation must have been moved from 534 // .relr.auth.dyn to .rela.dyn, and the addend write is not needed. 535 // 536 // If val fits in 32 bits, we have two potential scenarios: 537 // * True RELR: Write the 32-bit `val`. 538 // * RELA: Even if the value now fits in 32 bits, it might have been 539 // converted from RELR during an iteration in 540 // finalizeAddressDependentContent(). Writing the value is harmless 541 // because dynamic linking ignores it. 542 if (isInt<32>(val)) 543 write32(ctx, loc, val); 544 break; 545 case R_AARCH64_ADD_ABS_LO12_NC: 546 case R_AARCH64_AUTH_GOT_ADD_LO12_NC: 547 write32Imm12(loc, val); 548 break; 549 case R_AARCH64_ADR_GOT_PAGE: 550 case R_AARCH64_AUTH_ADR_GOT_PAGE: 551 case R_AARCH64_ADR_PREL_PG_HI21: 552 case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21: 553 case R_AARCH64_TLSDESC_ADR_PAGE21: 554 case R_AARCH64_AUTH_TLSDESC_ADR_PAGE21: 555 checkInt(ctx, loc, val, 33, rel); 556 [[fallthrough]]; 557 case R_AARCH64_ADR_PREL_PG_HI21_NC: 558 write32AArch64Addr(loc, val >> 12); 559 break; 560 case R_AARCH64_ADR_PREL_LO21: 561 case R_AARCH64_AUTH_GOT_ADR_PREL_LO21: 562 checkInt(ctx, loc, val, 21, rel); 563 write32AArch64Addr(loc, val); 564 break; 565 case R_AARCH64_JUMP26: 566 // Normally we would just write the bits of the immediate field, however 567 // when patching instructions for the cpu errata fix -fix-cortex-a53-843419 568 // we want to replace a non-branch instruction with a branch immediate 569 // instruction. By writing all the bits of the instruction including the 570 // opcode and the immediate (0 001 | 01 imm26) we can do this 571 // transformation by placing a R_AARCH64_JUMP26 relocation at the offset of 572 // the instruction we want to patch. 573 write32le(loc, 0x14000000); 574 [[fallthrough]]; 575 case R_AARCH64_CALL26: 576 checkInt(ctx, loc, val, 28, rel); 577 writeMaskedBits32le(loc, (val & 0x0FFFFFFC) >> 2, 0x0FFFFFFC >> 2); 578 break; 579 case R_AARCH64_CONDBR19: 580 case R_AARCH64_LD_PREL_LO19: 581 case R_AARCH64_GOT_LD_PREL19: 582 case R_AARCH64_AUTH_GOT_LD_PREL19: 583 checkAlignment(ctx, loc, val, 4, rel); 584 checkInt(ctx, loc, val, 21, rel); 585 writeMaskedBits32le(loc, (val & 0x1FFFFC) << 3, 0x1FFFFC << 3); 586 break; 587 case R_AARCH64_LDST8_ABS_LO12_NC: 588 case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC: 589 write32Imm12(loc, getBits(val, 0, 11)); 590 break; 591 case R_AARCH64_LDST16_ABS_LO12_NC: 592 case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC: 593 checkAlignment(ctx, loc, val, 2, rel); 594 write32Imm12(loc, getBits(val, 1, 11)); 595 break; 596 case R_AARCH64_LDST32_ABS_LO12_NC: 597 case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC: 598 checkAlignment(ctx, loc, val, 4, rel); 599 write32Imm12(loc, getBits(val, 2, 11)); 600 break; 601 case R_AARCH64_LDST64_ABS_LO12_NC: 602 case R_AARCH64_LD64_GOT_LO12_NC: 603 case R_AARCH64_AUTH_LD64_GOT_LO12_NC: 604 case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC: 605 case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC: 606 case R_AARCH64_TLSDESC_LD64_LO12: 607 case R_AARCH64_AUTH_TLSDESC_LD64_LO12: 608 checkAlignment(ctx, loc, val, 8, rel); 609 write32Imm12(loc, getBits(val, 3, 11)); 610 break; 611 case R_AARCH64_LDST128_ABS_LO12_NC: 612 case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC: 613 checkAlignment(ctx, loc, val, 16, rel); 614 write32Imm12(loc, getBits(val, 4, 11)); 615 break; 616 case R_AARCH64_LD64_GOTPAGE_LO15: 617 checkAlignment(ctx, loc, val, 8, rel); 618 write32Imm12(loc, getBits(val, 3, 14)); 619 break; 620 case R_AARCH64_MOVW_UABS_G0: 621 checkUInt(ctx, loc, val, 16, rel); 622 [[fallthrough]]; 623 case R_AARCH64_MOVW_UABS_G0_NC: 624 writeMaskedBits32le(loc, (val & 0xFFFF) << 5, 0xFFFF << 5); 625 break; 626 case R_AARCH64_MOVW_UABS_G1: 627 checkUInt(ctx, loc, val, 32, rel); 628 [[fallthrough]]; 629 case R_AARCH64_MOVW_UABS_G1_NC: 630 writeMaskedBits32le(loc, (val & 0xFFFF0000) >> 11, 0xFFFF0000 >> 11); 631 break; 632 case R_AARCH64_MOVW_UABS_G2: 633 checkUInt(ctx, loc, val, 48, rel); 634 [[fallthrough]]; 635 case R_AARCH64_MOVW_UABS_G2_NC: 636 writeMaskedBits32le(loc, (val & 0xFFFF00000000) >> 27, 637 0xFFFF00000000 >> 27); 638 break; 639 case R_AARCH64_MOVW_UABS_G3: 640 writeMaskedBits32le(loc, (val & 0xFFFF000000000000) >> 43, 641 0xFFFF000000000000 >> 43); 642 break; 643 case R_AARCH64_MOVW_PREL_G0: 644 case R_AARCH64_MOVW_SABS_G0: 645 case R_AARCH64_TLSLE_MOVW_TPREL_G0: 646 checkInt(ctx, loc, val, 17, rel); 647 [[fallthrough]]; 648 case R_AARCH64_MOVW_PREL_G0_NC: 649 case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC: 650 writeSMovWImm(loc, val); 651 break; 652 case R_AARCH64_MOVW_PREL_G1: 653 case R_AARCH64_MOVW_SABS_G1: 654 case R_AARCH64_TLSLE_MOVW_TPREL_G1: 655 checkInt(ctx, loc, val, 33, rel); 656 [[fallthrough]]; 657 case R_AARCH64_MOVW_PREL_G1_NC: 658 case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC: 659 writeSMovWImm(loc, val >> 16); 660 break; 661 case R_AARCH64_MOVW_PREL_G2: 662 case R_AARCH64_MOVW_SABS_G2: 663 case R_AARCH64_TLSLE_MOVW_TPREL_G2: 664 checkInt(ctx, loc, val, 49, rel); 665 [[fallthrough]]; 666 case R_AARCH64_MOVW_PREL_G2_NC: 667 writeSMovWImm(loc, val >> 32); 668 break; 669 case R_AARCH64_MOVW_PREL_G3: 670 writeSMovWImm(loc, val >> 48); 671 break; 672 case R_AARCH64_TSTBR14: 673 checkInt(ctx, loc, val, 16, rel); 674 writeMaskedBits32le(loc, (val & 0xFFFC) << 3, 0xFFFC << 3); 675 break; 676 case R_AARCH64_TLSLE_ADD_TPREL_HI12: 677 checkUInt(ctx, loc, val, 24, rel); 678 write32Imm12(loc, val >> 12); 679 break; 680 case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC: 681 case R_AARCH64_TLSDESC_ADD_LO12: 682 case R_AARCH64_AUTH_TLSDESC_ADD_LO12: 683 write32Imm12(loc, val); 684 break; 685 case R_AARCH64_TLSDESC: 686 // For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word. 687 write64(ctx, loc + 8, val); 688 break; 689 default: 690 llvm_unreachable("unknown relocation"); 691 } 692 } 693 694 void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, 695 uint64_t val) const { 696 // TLSDESC Global-Dynamic relocation are in the form: 697 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] 698 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] 699 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12] 700 // .tlsdesccall [R_AARCH64_TLSDESC_CALL] 701 // blr x1 702 // And it can optimized to: 703 // movz x0, #0x0, lsl #16 704 // movk x0, #0x10 705 // nop 706 // nop 707 checkUInt(ctx, loc, val, 32, rel); 708 709 switch (rel.type) { 710 case R_AARCH64_TLSDESC_ADD_LO12: 711 case R_AARCH64_TLSDESC_CALL: 712 write32le(loc, 0xd503201f); // nop 713 return; 714 case R_AARCH64_TLSDESC_ADR_PAGE21: 715 write32le(loc, 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz 716 return; 717 case R_AARCH64_TLSDESC_LD64_LO12: 718 write32le(loc, 0xf2800000 | ((val & 0xffff) << 5)); // movk 719 return; 720 default: 721 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); 722 } 723 } 724 725 void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, 726 uint64_t val) const { 727 // TLSDESC Global-Dynamic relocation are in the form: 728 // adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21] 729 // ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12] 730 // add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12] 731 // .tlsdesccall [R_AARCH64_TLSDESC_CALL] 732 // blr x1 733 // And it can optimized to: 734 // adrp x0, :gottprel:v 735 // ldr x0, [x0, :gottprel_lo12:v] 736 // nop 737 // nop 738 739 switch (rel.type) { 740 case R_AARCH64_TLSDESC_ADD_LO12: 741 case R_AARCH64_TLSDESC_CALL: 742 write32le(loc, 0xd503201f); // nop 743 break; 744 case R_AARCH64_TLSDESC_ADR_PAGE21: 745 write32le(loc, 0x90000000); // adrp 746 relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val); 747 break; 748 case R_AARCH64_TLSDESC_LD64_LO12: 749 write32le(loc, 0xf9400000); // ldr 750 relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val); 751 break; 752 default: 753 llvm_unreachable("unsupported relocation for TLS GD to LE relaxation"); 754 } 755 } 756 757 void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, 758 uint64_t val) const { 759 checkUInt(ctx, loc, val, 32, rel); 760 761 if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) { 762 // Generate MOVZ. 763 uint32_t regNo = read32le(loc) & 0x1f; 764 write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5)); 765 return; 766 } 767 if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) { 768 // Generate MOVK. 769 uint32_t regNo = read32le(loc) & 0x1f; 770 write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5)); 771 return; 772 } 773 llvm_unreachable("invalid relocation for TLS IE to LE relaxation"); 774 } 775 776 AArch64Relaxer::AArch64Relaxer(Ctx &ctx, ArrayRef<Relocation> relocs) 777 : ctx(ctx) { 778 if (!ctx.arg.relax) 779 return; 780 // Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC 781 // always appear in pairs. 782 size_t i = 0; 783 const size_t size = relocs.size(); 784 for (; i != size; ++i) { 785 if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) { 786 if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) { 787 ++i; 788 continue; 789 } 790 break; 791 } else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) { 792 break; 793 } 794 } 795 safeToRelaxAdrpLdr = i == size; 796 } 797 798 bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel, 799 const Relocation &addRel, uint64_t secAddr, 800 uint8_t *buf) const { 801 // When the address of sym is within the range of ADR then 802 // we may relax 803 // ADRP xn, sym 804 // ADD xn, xn, :lo12: sym 805 // to 806 // NOP 807 // ADR xn, sym 808 if (!ctx.arg.relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 || 809 addRel.type != R_AARCH64_ADD_ABS_LO12_NC) 810 return false; 811 // Check if the relocations apply to consecutive instructions. 812 if (adrpRel.offset + 4 != addRel.offset) 813 return false; 814 if (adrpRel.sym != addRel.sym) 815 return false; 816 if (adrpRel.addend != 0 || addRel.addend != 0) 817 return false; 818 819 uint32_t adrpInstr = read32le(buf + adrpRel.offset); 820 uint32_t addInstr = read32le(buf + addRel.offset); 821 // Check if the first instruction is ADRP and the second instruction is ADD. 822 if ((adrpInstr & 0x9f000000) != 0x90000000 || 823 (addInstr & 0xffc00000) != 0x91000000) 824 return false; 825 uint32_t adrpDestReg = adrpInstr & 0x1f; 826 uint32_t addDestReg = addInstr & 0x1f; 827 uint32_t addSrcReg = (addInstr >> 5) & 0x1f; 828 if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg) 829 return false; 830 831 Symbol &sym = *adrpRel.sym; 832 // Check if the address difference is within 1MiB range. 833 int64_t val = sym.getVA(ctx) - (secAddr + addRel.offset); 834 if (val < -1024 * 1024 || val >= 1024 * 1024) 835 return false; 836 837 Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset, 838 /*addend=*/0, &sym}; 839 // nop 840 write32le(buf + adrpRel.offset, 0xd503201f); 841 // adr x_<dest_reg> 842 write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg); 843 ctx.target->relocate(buf + adrRel.offset, adrRel, val); 844 return true; 845 } 846 847 bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel, 848 const Relocation &ldrRel, uint64_t secAddr, 849 uint8_t *buf) const { 850 if (!safeToRelaxAdrpLdr) 851 return false; 852 853 // When the definition of sym is not preemptible then we may 854 // be able to relax 855 // ADRP xn, :got: sym 856 // LDR xn, [ xn :got_lo12: sym] 857 // to 858 // ADRP xn, sym 859 // ADD xn, xn, :lo_12: sym 860 861 if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE || 862 ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC) 863 return false; 864 // Check if the relocations apply to consecutive instructions. 865 if (adrpRel.offset + 4 != ldrRel.offset) 866 return false; 867 // Check if the relocations reference the same symbol and 868 // skip undefined, preemptible and STT_GNU_IFUNC symbols. 869 if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() || 870 adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc()) 871 return false; 872 // Check if the addends of the both relocations are zero. 873 if (adrpRel.addend != 0 || ldrRel.addend != 0) 874 return false; 875 uint32_t adrpInstr = read32le(buf + adrpRel.offset); 876 uint32_t ldrInstr = read32le(buf + ldrRel.offset); 877 // Check if the first instruction is ADRP and the second instruction is LDR. 878 if ((adrpInstr & 0x9f000000) != 0x90000000 || 879 (ldrInstr & 0x3b000000) != 0x39000000) 880 return false; 881 // Check the value of the sf bit. 882 if (!(ldrInstr >> 31)) 883 return false; 884 uint32_t adrpDestReg = adrpInstr & 0x1f; 885 uint32_t ldrDestReg = ldrInstr & 0x1f; 886 uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f; 887 // Check if ADPR and LDR use the same register. 888 if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg) 889 return false; 890 891 Symbol &sym = *adrpRel.sym; 892 // GOT references to absolute symbols can't be relaxed to use ADRP/ADD in 893 // position-independent code because these instructions produce a relative 894 // address. 895 if (ctx.arg.isPic && !cast<Defined>(sym).section) 896 return false; 897 // Check if the address difference is within 4GB range. 898 int64_t val = 899 getAArch64Page(sym.getVA(ctx)) - getAArch64Page(secAddr + adrpRel.offset); 900 if (val != llvm::SignExtend64(val, 33)) 901 return false; 902 903 Relocation adrpSymRel = {RE_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21, 904 adrpRel.offset, /*addend=*/0, &sym}; 905 Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset, 906 /*addend=*/0, &sym}; 907 908 // adrp x_<dest_reg> 909 write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg); 910 // add x_<dest reg>, x_<dest reg> 911 write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5)); 912 913 ctx.target->relocate( 914 buf + adrpSymRel.offset, adrpSymRel, 915 SignExtend64(getAArch64Page(sym.getVA(ctx)) - 916 getAArch64Page(secAddr + adrpSymRel.offset), 917 64)); 918 ctx.target->relocate(buf + addRel.offset, addRel, 919 SignExtend64(sym.getVA(ctx), 64)); 920 tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf); 921 return true; 922 } 923 924 // Tagged symbols have upper address bits that are added by the dynamic loader, 925 // and thus need the full 64-bit GOT entry. Do not relax such symbols. 926 static bool needsGotForMemtag(const Relocation &rel) { 927 return rel.sym->isTagged() && needsGot(rel.expr); 928 } 929 930 void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const { 931 uint64_t secAddr = sec.getOutputSection()->addr; 932 if (auto *s = dyn_cast<InputSection>(&sec)) 933 secAddr += s->outSecOff; 934 else if (auto *ehIn = dyn_cast<EhInputSection>(&sec)) 935 secAddr += ehIn->getParent()->outSecOff; 936 AArch64Relaxer relaxer(ctx, sec.relocs()); 937 for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) { 938 const Relocation &rel = sec.relocs()[i]; 939 uint8_t *loc = buf + rel.offset; 940 const uint64_t val = sec.getRelocTargetVA(ctx, rel, secAddr + rel.offset); 941 942 if (needsGotForMemtag(rel)) { 943 relocate(loc, rel, val); 944 continue; 945 } 946 947 switch (rel.expr) { 948 case RE_AARCH64_GOT_PAGE_PC: 949 if (i + 1 < size && 950 relaxer.tryRelaxAdrpLdr(rel, sec.relocs()[i + 1], secAddr, buf)) { 951 ++i; 952 continue; 953 } 954 break; 955 case RE_AARCH64_PAGE_PC: 956 if (i + 1 < size && 957 relaxer.tryRelaxAdrpAdd(rel, sec.relocs()[i + 1], secAddr, buf)) { 958 ++i; 959 continue; 960 } 961 break; 962 case RE_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC: 963 case R_RELAX_TLS_GD_TO_IE_ABS: 964 relaxTlsGdToIe(loc, rel, val); 965 continue; 966 case R_RELAX_TLS_GD_TO_LE: 967 relaxTlsGdToLe(loc, rel, val); 968 continue; 969 case R_RELAX_TLS_IE_TO_LE: 970 relaxTlsIeToLe(loc, rel, val); 971 continue; 972 default: 973 break; 974 } 975 relocate(loc, rel, val); 976 } 977 } 978 979 static std::optional<uint64_t> getControlTransferAddend(InputSection &is, 980 Relocation &r) { 981 // Identify a control transfer relocation for the branch-to-branch 982 // optimization. A "control transfer relocation" means a B or BL 983 // target but it also includes relative vtable relocations for example. 984 // 985 // We require the relocation type to be JUMP26, CALL26 or PLT32. With a 986 // relocation type of PLT32 the value may be assumed to be used for branching 987 // directly to the symbol and the addend is only used to produce the relocated 988 // value (hence the effective addend is always 0). This is because if a PLT is 989 // needed the addend will be added to the address of the PLT, and it doesn't 990 // make sense to branch into the middle of a PLT. For example, relative vtable 991 // relocations use PLT32 and 0 or a positive value as the addend but still are 992 // used to branch to the symbol. 993 // 994 // With JUMP26 or CALL26 the only reasonable interpretation of a non-zero 995 // addend is that we are branching to symbol+addend so that becomes the 996 // effective addend. 997 if (r.type == R_AARCH64_PLT32) 998 return 0; 999 if (r.type == R_AARCH64_JUMP26 || r.type == R_AARCH64_CALL26) 1000 return r.addend; 1001 return std::nullopt; 1002 } 1003 1004 static std::pair<Relocation *, uint64_t> 1005 getBranchInfoAtTarget(InputSection &is, uint64_t offset) { 1006 auto *i = llvm::partition_point( 1007 is.relocations, [&](Relocation &r) { return r.offset < offset; }); 1008 if (i != is.relocations.end() && i->offset == offset && 1009 i->type == R_AARCH64_JUMP26) { 1010 return {i, i->addend}; 1011 } 1012 return {nullptr, 0}; 1013 } 1014 1015 static void redirectControlTransferRelocations(Relocation &r1, 1016 const Relocation &r2) { 1017 r1.expr = r2.expr; 1018 r1.sym = r2.sym; 1019 // With PLT32 we must respect the original addend as that affects the value's 1020 // interpretation. With the other relocation types the original addend is 1021 // irrelevant because it referred to an offset within the original target 1022 // section so we overwrite it. 1023 if (r1.type == R_AARCH64_PLT32) 1024 r1.addend += r2.addend; 1025 else 1026 r1.addend = r2.addend; 1027 } 1028 1029 void AArch64::applyBranchToBranchOpt() const { 1030 applyBranchToBranchOptImpl(ctx, getControlTransferAddend, 1031 getBranchInfoAtTarget, 1032 redirectControlTransferRelocations); 1033 } 1034 1035 // AArch64 may use security features in variant PLT sequences. These are: 1036 // Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target 1037 // Indicator (BTI) introduced in armv8.5-a. The additional instructions used 1038 // in the variant Plt sequences are encoded in the Hint space so they can be 1039 // deployed on older architectures, which treat the instructions as a nop. 1040 // PAC and BTI can be combined leading to the following combinations: 1041 // writePltHeader 1042 // writePltHeaderBti (no PAC Header needed) 1043 // writePlt 1044 // writePltBti (BTI only) 1045 // writePltPac (PAC only) 1046 // writePltBtiPac (BTI and PAC) 1047 // 1048 // When PAC is enabled the dynamic loader encrypts the address that it places 1049 // in the .got.plt using the pacia1716 instruction which encrypts the value in 1050 // x17 using the modifier in x16. The static linker places autia1716 before the 1051 // indirect branch to x17 to authenticate the address in x17 with the modifier 1052 // in x16. This makes it more difficult for an attacker to modify the value in 1053 // the .got.plt. 1054 // 1055 // When BTI is enabled all indirect branches must land on a bti instruction. 1056 // The static linker must place a bti instruction at the start of any PLT entry 1057 // that may be the target of an indirect branch. As the PLT entries call the 1058 // lazy resolver indirectly this must have a bti instruction at start. In 1059 // general a bti instruction is not needed for a PLT entry as indirect calls 1060 // are resolved to the function address and not the PLT entry for the function. 1061 // There are a small number of cases where the PLT address can escape, such as 1062 // taking the address of a function or ifunc via a non got-generating 1063 // relocation, and a shared library refers to that symbol. 1064 // 1065 // We use the bti c variant of the instruction which permits indirect branches 1066 // (br) via x16/x17 and indirect function calls (blr) via any register. The ABI 1067 // guarantees that all indirect branches from code requiring BTI protection 1068 // will go via x16/x17 1069 1070 namespace { 1071 class AArch64BtiPac final : public AArch64 { 1072 public: 1073 AArch64BtiPac(Ctx &); 1074 void writePltHeader(uint8_t *buf) const override; 1075 void writePlt(uint8_t *buf, const Symbol &sym, 1076 uint64_t pltEntryAddr) const override; 1077 1078 private: 1079 bool btiHeader; // bti instruction needed in PLT Header and Entry 1080 enum { 1081 PEK_NoAuth, 1082 PEK_AuthHint, // use autia1716 instr for authenticated branch in PLT entry 1083 PEK_Auth, // use braa instr for authenticated branch in PLT entry 1084 } pacEntryKind; 1085 }; 1086 } // namespace 1087 1088 AArch64BtiPac::AArch64BtiPac(Ctx &ctx) : AArch64(ctx) { 1089 btiHeader = (ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI); 1090 // A BTI (Branch Target Indicator) Plt Entry is only required if the 1091 // address of the PLT entry can be taken by the program, which permits an 1092 // indirect jump to the PLT entry. This can happen when the address 1093 // of the PLT entry for a function is canonicalised due to the address of 1094 // the function in an executable being taken by a shared library, or 1095 // non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating 1096 // relocations. 1097 // The PAC PLT entries require dynamic loader support and this isn't known 1098 // from properties in the objects, so we use the command line flag. 1099 // By default we only use hint-space instructions, but if we detect the 1100 // PAuthABI, which requires v8.3-A, we can use the non-hint space 1101 // instructions. 1102 1103 if (ctx.arg.zPacPlt) { 1104 if (ctx.aarch64PauthAbiCoreInfo && ctx.aarch64PauthAbiCoreInfo->isValid()) 1105 pacEntryKind = PEK_Auth; 1106 else 1107 pacEntryKind = PEK_AuthHint; 1108 } else { 1109 pacEntryKind = PEK_NoAuth; 1110 } 1111 1112 if (btiHeader || (pacEntryKind != PEK_NoAuth)) { 1113 pltEntrySize = 24; 1114 ipltEntrySize = 24; 1115 } 1116 } 1117 1118 void AArch64BtiPac::writePltHeader(uint8_t *buf) const { 1119 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c 1120 const uint8_t pltData[] = { 1121 0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]! 1122 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2])) 1123 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))] 1124 0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2])) 1125 0x20, 0x02, 0x1f, 0xd6, // br x17 1126 0x1f, 0x20, 0x03, 0xd5, // nop 1127 0x1f, 0x20, 0x03, 0xd5 // nop 1128 }; 1129 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop 1130 1131 uint64_t got = ctx.in.gotPlt->getVA(); 1132 uint64_t plt = ctx.in.plt->getVA(); 1133 1134 if (btiHeader) { 1135 // PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C 1136 // instruction. 1137 memcpy(buf, btiData, sizeof(btiData)); 1138 buf += sizeof(btiData); 1139 plt += sizeof(btiData); 1140 } 1141 memcpy(buf, pltData, sizeof(pltData)); 1142 1143 relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21, 1144 getAArch64Page(got + 16) - getAArch64Page(plt + 4)); 1145 relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16); 1146 relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16); 1147 if (!btiHeader) 1148 // We didn't add the BTI c instruction so round out size with NOP. 1149 memcpy(buf + sizeof(pltData), nopData, sizeof(nopData)); 1150 } 1151 1152 void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym, 1153 uint64_t pltEntryAddr) const { 1154 // The PLT entry is of the form: 1155 // [btiData] addrInst (pacBr | stdBr) [nopData] 1156 const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c 1157 const uint8_t addrInst[] = { 1158 0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n])) 1159 0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))] 1160 0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n])) 1161 }; 1162 const uint8_t pacHintBr[] = { 1163 0x9f, 0x21, 0x03, 0xd5, // autia1716 1164 0x20, 0x02, 0x1f, 0xd6 // br x17 1165 }; 1166 const uint8_t pacBr[] = { 1167 0x30, 0x0a, 0x1f, 0xd7, // braa x17, x16 1168 0x1f, 0x20, 0x03, 0xd5 // nop 1169 }; 1170 const uint8_t stdBr[] = { 1171 0x20, 0x02, 0x1f, 0xd6, // br x17 1172 0x1f, 0x20, 0x03, 0xd5 // nop 1173 }; 1174 const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop 1175 1176 // NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may 1177 // escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its 1178 // address may escape if referenced by a direct relocation. If relative 1179 // vtables are used then if the vtable is in a shared object the offsets will 1180 // be to the PLT entry. The condition is conservative. 1181 bool hasBti = btiHeader && 1182 (sym.hasFlag(NEEDS_COPY) || sym.isInIplt || sym.thunkAccessed); 1183 if (hasBti) { 1184 memcpy(buf, btiData, sizeof(btiData)); 1185 buf += sizeof(btiData); 1186 pltEntryAddr += sizeof(btiData); 1187 } 1188 1189 uint64_t gotPltEntryAddr = sym.getGotPltVA(ctx); 1190 memcpy(buf, addrInst, sizeof(addrInst)); 1191 relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21, 1192 getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr)); 1193 relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr); 1194 relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr); 1195 1196 if (pacEntryKind != PEK_NoAuth) 1197 memcpy(buf + sizeof(addrInst), 1198 pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr, 1199 sizeof(pacEntryKind == PEK_AuthHint ? pacHintBr : pacBr)); 1200 else 1201 memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr)); 1202 if (!hasBti) 1203 // We didn't add the BTI c instruction so round out size with NOP. 1204 memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData)); 1205 } 1206 1207 template <class ELFT> 1208 static void 1209 addTaggedSymbolReferences(Ctx &ctx, InputSectionBase &sec, 1210 DenseMap<Symbol *, unsigned> &referenceCount) { 1211 assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC); 1212 1213 const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>(); 1214 if (rels.areRelocsRel()) 1215 ErrAlways(ctx) 1216 << "non-RELA relocations are not allowed with memtag globals"; 1217 1218 for (const typename ELFT::Rela &rel : rels.relas) { 1219 Symbol &sym = sec.file->getRelocTargetSym(rel); 1220 // Linker-synthesized symbols such as __executable_start may be referenced 1221 // as tagged in input objfiles, and we don't want them to be tagged. A 1222 // cheap way to exclude them is the type check, but their type is 1223 // STT_NOTYPE. In addition, this save us from checking untaggable symbols, 1224 // like functions or TLS symbols. 1225 if (sym.type != STT_OBJECT) 1226 continue; 1227 // STB_LOCAL symbols can't be referenced from outside the object file, and 1228 // thus don't need to be checked for references from other object files. 1229 if (sym.binding == STB_LOCAL) { 1230 sym.setIsTagged(true); 1231 continue; 1232 } 1233 ++referenceCount[&sym]; 1234 } 1235 sec.markDead(); 1236 } 1237 1238 // A tagged symbol must be denoted as being tagged by all references and the 1239 // chosen definition. For simplicity, here, it must also be denoted as tagged 1240 // for all definitions. Otherwise: 1241 // 1242 // 1. A tagged definition can be used by an untagged declaration, in which case 1243 // the untagged access may be PC-relative, causing a tag mismatch at 1244 // runtime. 1245 // 2. An untagged definition can be used by a tagged declaration, where the 1246 // compiler has taken advantage of the increased alignment of the tagged 1247 // declaration, but the alignment at runtime is wrong, causing a fault. 1248 // 1249 // Ideally, this isn't a problem, as any TU that imports or exports tagged 1250 // symbols should also be built with tagging. But, to handle these cases, we 1251 // demote the symbol to be untagged. 1252 void elf::createTaggedSymbols(Ctx &ctx) { 1253 assert(hasMemtag(ctx)); 1254 1255 // First, collect all symbols that are marked as tagged, and count how many 1256 // times they're marked as tagged. 1257 DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount; 1258 for (InputFile *file : ctx.objectFiles) { 1259 if (file->kind() != InputFile::ObjKind) 1260 continue; 1261 for (InputSectionBase *section : file->getSections()) { 1262 if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC || 1263 section == &InputSection::discarded) 1264 continue; 1265 invokeELFT(addTaggedSymbolReferences, ctx, *section, 1266 taggedSymbolReferenceCount); 1267 } 1268 } 1269 1270 // Now, go through all the symbols. If the number of declarations + 1271 // definitions to a symbol exceeds the amount of times they're marked as 1272 // tagged, it means we have an objfile that uses the untagged variant of the 1273 // symbol. 1274 for (InputFile *file : ctx.objectFiles) { 1275 if (file->kind() != InputFile::BinaryKind && 1276 file->kind() != InputFile::ObjKind) 1277 continue; 1278 1279 for (Symbol *symbol : file->getSymbols()) { 1280 // See `addTaggedSymbolReferences` for more details. 1281 if (symbol->type != STT_OBJECT || 1282 symbol->binding == STB_LOCAL) 1283 continue; 1284 auto it = taggedSymbolReferenceCount.find(symbol); 1285 if (it == taggedSymbolReferenceCount.end()) continue; 1286 unsigned &remainingAllowedTaggedRefs = it->second; 1287 if (remainingAllowedTaggedRefs == 0) { 1288 taggedSymbolReferenceCount.erase(it); 1289 continue; 1290 } 1291 --remainingAllowedTaggedRefs; 1292 } 1293 } 1294 1295 // `addTaggedSymbolReferences` has already checked that we have RELA 1296 // relocations, the only other way to get written addends is with 1297 // --apply-dynamic-relocs. 1298 if (!taggedSymbolReferenceCount.empty() && ctx.arg.writeAddends) 1299 ErrAlways(ctx) << "--apply-dynamic-relocs cannot be used with MTE globals"; 1300 1301 // Now, `taggedSymbolReferenceCount` should only contain symbols that are 1302 // defined as tagged exactly the same amount as it's referenced, meaning all 1303 // uses are tagged. 1304 for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) { 1305 assert(remainingTaggedRefs == 0 && 1306 "Symbol is defined as tagged more times than it's used"); 1307 symbol->setIsTagged(true); 1308 } 1309 } 1310 1311 void elf::setAArch64TargetInfo(Ctx &ctx) { 1312 if ((ctx.arg.andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) || 1313 ctx.arg.zPacPlt) 1314 ctx.target.reset(new AArch64BtiPac(ctx)); 1315 else 1316 ctx.target.reset(new AArch64(ctx)); 1317 } 1318