1 //===- LoongArch.cpp ------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "OutputSections.h" 11 #include "Symbols.h" 12 #include "SyntheticSections.h" 13 #include "Target.h" 14 15 using namespace llvm; 16 using namespace llvm::object; 17 using namespace llvm::support::endian; 18 using namespace llvm::ELF; 19 using namespace lld; 20 using namespace lld::elf; 21 22 namespace { 23 class LoongArch final : public TargetInfo { 24 public: 25 LoongArch(); 26 uint32_t calcEFlags() const override; 27 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 28 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 29 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 30 void writePltHeader(uint8_t *buf) const override; 31 void writePlt(uint8_t *buf, const Symbol &sym, 32 uint64_t pltEntryAddr) const override; 33 RelType getDynRel(RelType type) const override; 34 RelExpr getRelExpr(RelType type, const Symbol &s, 35 const uint8_t *loc) const override; 36 bool usesOnlyLowPageBits(RelType type) const override; 37 void relocate(uint8_t *loc, const Relocation &rel, 38 uint64_t val) const override; 39 }; 40 } // end anonymous namespace 41 42 enum Op { 43 SUB_W = 0x00110000, 44 SUB_D = 0x00118000, 45 BREAK = 0x002a0000, 46 SRLI_W = 0x00448000, 47 SRLI_D = 0x00450000, 48 ADDI_W = 0x02800000, 49 ADDI_D = 0x02c00000, 50 ANDI = 0x03400000, 51 PCADDU12I = 0x1c000000, 52 LD_W = 0x28800000, 53 LD_D = 0x28c00000, 54 JIRL = 0x4c000000, 55 }; 56 57 enum Reg { 58 R_ZERO = 0, 59 R_RA = 1, 60 R_TP = 2, 61 R_T0 = 12, 62 R_T1 = 13, 63 R_T2 = 14, 64 R_T3 = 15, 65 }; 66 67 // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences 68 // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` 69 // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the 70 // "page") for the next instruction to add in the "page offset". (`pcalau12i` 71 // stands for something like "PC ALigned Add Upper that starts from the 12th 72 // bit, Immediate".) 73 // 74 // Here a "page" is in fact just another way to refer to the 12-bit range 75 // allowed by the immediate field of the addi/ld/st instructions, and not 76 // related to the system or the kernel's actual page size. The sematics happens 77 // to match the AArch64 `adrp`, so the concept of "page" is borrowed here. 78 static uint64_t getLoongArchPage(uint64_t p) { 79 return p & ~static_cast<uint64_t>(0xfff); 80 } 81 82 static uint32_t lo12(uint32_t val) { return val & 0xfff; } 83 84 // Calculate the adjusted page delta between dest and PC. 85 uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { 86 // Consider the large code model access pattern, of which the smaller code 87 // models' access patterns are a subset: 88 // 89 // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] 90 // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] 91 // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] 92 // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] 93 // {ldx,stx,add}.* dest, U, T 94 // 95 // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, 96 // with RQ, P, ZY, X and A representing the respective bitfields as unsigned 97 // integers. We have: 98 // 99 // page(dest) = 0xZZZ'YYYYY'XXXXX'000 100 // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 101 // ---------------------------------- 102 // 0xddd'ccccc'bbbbb'000 103 // 104 // Now consider the above pattern's actual effects: 105 // 106 // page(pc) 0xRRR'QQQQQ'PPPPP'000 107 // pcalau12i + 0xiii'iiiii'bbbbb'000 108 // addi + 0xjjj'jjjjj'kkkkk'AAA 109 // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 110 // -------------------------------------------------- 111 // dest = U + T 112 // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) 113 // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A 114 // = (ZY<<32) + (X<<12) + A 115 // 116 // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k 117 // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k 118 // 119 // where i and k are terms representing the effect of b's and A's sign 120 // extension respectively. 121 // 122 // i = signed b < 0 ? -0x10000'0000 : 0 123 // k = signed A < 0 ? -0x1000 : 0 124 // 125 // The j term is a bit complex: it represents the higher half of 126 // sign-extended bits from A that are effectively lost if i == 0 but k != 0, 127 // due to overwriting by lu32i.d & lu52i.d. 128 // 129 // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 130 // 131 // The actual effect of the instruction sequence before the final addition, 132 // i.e. our desired result value, is thus: 133 // 134 // result = (cd<<32) + (b<<12) 135 // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k 136 // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k 137 // = page(dest) - page(pc) - i - j - k 138 // 139 // when signed A >= 0 && signed b >= 0: 140 // 141 // i = j = k = 0 142 // result = page(dest) - page(pc) 143 // 144 // when signed A >= 0 && signed b < 0: 145 // 146 // i = -0x10000'0000, j = k = 0 147 // result = page(dest) - page(pc) + 0x10000'0000 148 // 149 // when signed A < 0 && signed b >= 0: 150 // 151 // i = 0, j = 0x10000'0000, k = -0x1000 152 // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 153 // 154 // when signed A < 0 && signed b < 0: 155 // 156 // i = -0x10000'0000, j = 0, k = -0x1000 157 // result = page(dest) - page(pc) + 0x1000 158 uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); 159 bool negativeA = lo12(dest) > 0x7ff; 160 bool negativeB = (result & 0x8000'0000) != 0; 161 162 if (negativeA) 163 result += 0x1000; 164 if (negativeA && !negativeB) 165 result -= 0x10000'0000; 166 else if (!negativeA && negativeB) 167 result += 0x10000'0000; 168 return result; 169 } 170 171 static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } 172 173 static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) { 174 return op | d | (j << 5) | (k << 10); 175 } 176 177 // Extract bits v[begin:end], where range is inclusive. 178 static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { 179 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; 180 } 181 182 static uint32_t setD5k16(uint32_t insn, uint32_t imm) { 183 uint32_t immLo = extractBits(imm, 15, 0); 184 uint32_t immHi = extractBits(imm, 20, 16); 185 return (insn & 0xfc0003e0) | (immLo << 10) | immHi; 186 } 187 188 static uint32_t setD10k16(uint32_t insn, uint32_t imm) { 189 uint32_t immLo = extractBits(imm, 15, 0); 190 uint32_t immHi = extractBits(imm, 25, 16); 191 return (insn & 0xfc000000) | (immLo << 10) | immHi; 192 } 193 194 static uint32_t setJ20(uint32_t insn, uint32_t imm) { 195 return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5); 196 } 197 198 static uint32_t setK12(uint32_t insn, uint32_t imm) { 199 return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10); 200 } 201 202 static uint32_t setK16(uint32_t insn, uint32_t imm) { 203 return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10); 204 } 205 206 static bool isJirl(uint32_t insn) { 207 return (insn & 0xfc000000) == JIRL; 208 } 209 210 LoongArch::LoongArch() { 211 // The LoongArch ISA itself does not have a limit on page sizes. According to 212 // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is 213 // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to 214 // "unlimited". 215 // However, practically the maximum usable page size is constrained by the 216 // kernel implementation, and 64KiB is the biggest non-huge page size 217 // supported by Linux as of v6.4. The most widespread page size in use, 218 // though, is 16KiB. 219 defaultCommonPageSize = 16384; 220 defaultMaxPageSize = 65536; 221 write32le(trapInstr.data(), BREAK); // break 0 222 223 copyRel = R_LARCH_COPY; 224 pltRel = R_LARCH_JUMP_SLOT; 225 relativeRel = R_LARCH_RELATIVE; 226 iRelativeRel = R_LARCH_IRELATIVE; 227 228 if (config->is64) { 229 symbolicRel = R_LARCH_64; 230 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; 231 tlsOffsetRel = R_LARCH_TLS_DTPREL64; 232 tlsGotRel = R_LARCH_TLS_TPREL64; 233 } else { 234 symbolicRel = R_LARCH_32; 235 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; 236 tlsOffsetRel = R_LARCH_TLS_DTPREL32; 237 tlsGotRel = R_LARCH_TLS_TPREL32; 238 } 239 240 gotRel = symbolicRel; 241 242 // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map 243 gotPltHeaderEntriesNum = 2; 244 245 pltHeaderSize = 32; 246 pltEntrySize = 16; 247 ipltEntrySize = 16; 248 } 249 250 static uint32_t getEFlags(const InputFile *f) { 251 if (config->is64) 252 return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags; 253 return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags; 254 } 255 256 static bool inputFileHasCode(const InputFile *f) { 257 for (const auto *sec : f->getSections()) 258 if (sec && sec->flags & SHF_EXECINSTR) 259 return true; 260 261 return false; 262 } 263 264 uint32_t LoongArch::calcEFlags() const { 265 // If there are only binary input files (from -b binary), use a 266 // value of 0 for the ELF header flags. 267 if (ctx.objectFiles.empty()) 268 return 0; 269 270 uint32_t target = 0; 271 const InputFile *targetFile; 272 for (const InputFile *f : ctx.objectFiles) { 273 // Do not enforce ABI compatibility if the input file does not contain code. 274 // This is useful for allowing linkage with data-only object files produced 275 // with tools like objcopy, that have zero e_flags. 276 if (!inputFileHasCode(f)) 277 continue; 278 279 // Take the first non-zero e_flags as the reference. 280 uint32_t flags = getEFlags(f); 281 if (target == 0 && flags != 0) { 282 target = flags; 283 targetFile = f; 284 } 285 286 if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) != 287 (target & EF_LOONGARCH_ABI_MODIFIER_MASK)) 288 error(toString(f) + 289 ": cannot link object files with different ABI from " + 290 toString(targetFile)); 291 292 // We cannot process psABI v1.x / object ABI v0 files (containing stack 293 // relocations), unlike ld.bfd. 294 // 295 // Instead of blindly accepting every v0 object and only failing at 296 // relocation processing time, just disallow interlink altogether. We 297 // don't expect significant usage of object ABI v0 in the wild (the old 298 // world may continue using object ABI v0 for a while, but as it's not 299 // binary-compatible with the upstream i.e. new-world ecosystem, it's not 300 // being considered here). 301 // 302 // There are briefly some new-world systems with object ABI v0 binaries too. 303 // It is because these systems were built before the new ABI was finalized. 304 // These are not supported either due to the extremely small number of them, 305 // and the few impacted users are advised to simply rebuild world or 306 // reinstall a recent system. 307 if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1) 308 error(toString(f) + ": unsupported object file ABI version"); 309 } 310 311 return target; 312 } 313 314 int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { 315 switch (type) { 316 default: 317 internalLinkerError(getErrorLocation(buf), 318 "cannot read addend for relocation " + toString(type)); 319 return 0; 320 case R_LARCH_32: 321 case R_LARCH_TLS_DTPMOD32: 322 case R_LARCH_TLS_DTPREL32: 323 case R_LARCH_TLS_TPREL32: 324 return SignExtend64<32>(read32le(buf)); 325 case R_LARCH_64: 326 case R_LARCH_TLS_DTPMOD64: 327 case R_LARCH_TLS_DTPREL64: 328 case R_LARCH_TLS_TPREL64: 329 return read64le(buf); 330 case R_LARCH_RELATIVE: 331 case R_LARCH_IRELATIVE: 332 return config->is64 ? read64le(buf) : read32le(buf); 333 case R_LARCH_NONE: 334 case R_LARCH_JUMP_SLOT: 335 // These relocations are defined as not having an implicit addend. 336 return 0; 337 } 338 } 339 340 void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { 341 if (config->is64) 342 write64le(buf, in.plt->getVA()); 343 else 344 write32le(buf, in.plt->getVA()); 345 } 346 347 void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 348 if (config->writeAddends) { 349 if (config->is64) 350 write64le(buf, s.getVA()); 351 else 352 write32le(buf, s.getVA()); 353 } 354 } 355 356 void LoongArch::writePltHeader(uint8_t *buf) const { 357 // The LoongArch PLT is currently structured just like that of RISCV. 358 // Annoyingly, this means the PLT is still using `pcaddu12i` to perform 359 // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`), 360 // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that 361 // is used everywhere else involving PC-relative operations in the LoongArch 362 // ELF psABI v2.00. 363 // 364 // The `pcrel_{hi20,lo12}` operators are illustrative only and not really 365 // supported by LoongArch assemblers. 366 // 367 // pcaddu12i $t2, %pcrel_hi20(.got.plt) 368 // sub.[wd] $t1, $t1, $t3 369 // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve 370 // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0] 371 // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt) 372 // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0] 373 // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map 374 // jr $t3 375 uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); 376 uint32_t sub = config->is64 ? SUB_D : SUB_W; 377 uint32_t ld = config->is64 ? LD_D : LD_W; 378 uint32_t addi = config->is64 ? ADDI_D : ADDI_W; 379 uint32_t srli = config->is64 ? SRLI_D : SRLI_W; 380 write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0)); 381 write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3)); 382 write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset))); 383 write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12))); 384 write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset))); 385 write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2)); 386 write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize)); 387 write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0)); 388 } 389 390 void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, 391 uint64_t pltEntryAddr) const { 392 // See the comment in writePltHeader for reason why pcaddu12i is used instead 393 // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days. 394 // 395 // pcaddu12i $t3, %pcrel_hi20(f@.got.plt) 396 // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt) 397 // jirl $t1, $t3, 0 398 // nop 399 uint32_t offset = sym.getGotPltVA() - pltEntryAddr; 400 write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0)); 401 write32le(buf + 4, 402 insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset))); 403 write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0)); 404 write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0)); 405 } 406 407 RelType LoongArch::getDynRel(RelType type) const { 408 return type == target->symbolicRel ? type 409 : static_cast<RelType>(R_LARCH_NONE); 410 } 411 412 RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, 413 const uint8_t *loc) const { 414 switch (type) { 415 case R_LARCH_NONE: 416 case R_LARCH_MARK_LA: 417 case R_LARCH_MARK_PCREL: 418 return R_NONE; 419 case R_LARCH_32: 420 case R_LARCH_64: 421 case R_LARCH_ABS_HI20: 422 case R_LARCH_ABS_LO12: 423 case R_LARCH_ABS64_LO20: 424 case R_LARCH_ABS64_HI12: 425 return R_ABS; 426 case R_LARCH_PCALA_LO12: 427 // We could just R_ABS, but the JIRL instruction reuses the relocation type 428 // for a different purpose. The questionable usage is part of glibc 2.37 429 // libc_nonshared.a [1], which is linked into user programs, so we have to 430 // work around it for a while, even if a new relocation type may be 431 // introduced in the future [2]. 432 // 433 // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a 434 // [2]: https://github.com/loongson/la-abi-specs/pull/3 435 return isJirl(read32le(loc)) ? R_PLT : R_ABS; 436 case R_LARCH_TLS_DTPREL32: 437 case R_LARCH_TLS_DTPREL64: 438 return R_DTPREL; 439 case R_LARCH_TLS_TPREL32: 440 case R_LARCH_TLS_TPREL64: 441 case R_LARCH_TLS_LE_HI20: 442 case R_LARCH_TLS_LE_LO12: 443 case R_LARCH_TLS_LE64_LO20: 444 case R_LARCH_TLS_LE64_HI12: 445 return R_TPREL; 446 case R_LARCH_ADD6: 447 case R_LARCH_ADD8: 448 case R_LARCH_ADD16: 449 case R_LARCH_ADD32: 450 case R_LARCH_ADD64: 451 case R_LARCH_SUB6: 452 case R_LARCH_SUB8: 453 case R_LARCH_SUB16: 454 case R_LARCH_SUB32: 455 case R_LARCH_SUB64: 456 // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse 457 // the RelExpr to avoid code duplication. 458 return R_RISCV_ADD; 459 case R_LARCH_32_PCREL: 460 case R_LARCH_64_PCREL: 461 case R_LARCH_PCREL20_S2: 462 return R_PC; 463 case R_LARCH_B16: 464 case R_LARCH_B21: 465 case R_LARCH_B26: 466 case R_LARCH_CALL36: 467 return R_PLT_PC; 468 case R_LARCH_GOT_PC_HI20: 469 case R_LARCH_GOT64_PC_LO20: 470 case R_LARCH_GOT64_PC_HI12: 471 case R_LARCH_TLS_IE_PC_HI20: 472 case R_LARCH_TLS_IE64_PC_LO20: 473 case R_LARCH_TLS_IE64_PC_HI12: 474 return R_LOONGARCH_GOT_PAGE_PC; 475 case R_LARCH_GOT_PC_LO12: 476 case R_LARCH_TLS_IE_PC_LO12: 477 return R_LOONGARCH_GOT; 478 case R_LARCH_TLS_LD_PC_HI20: 479 case R_LARCH_TLS_GD_PC_HI20: 480 return R_LOONGARCH_TLSGD_PAGE_PC; 481 case R_LARCH_PCALA_HI20: 482 // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT 483 // anyway so why waste time checking only to get everything relaxed back to 484 // it? 485 // 486 // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want 487 // both the HI20 and LO12 to potentially refer to the PLT. But in reality 488 // the HI20 reloc appears earlier, and the relocs don't contain enough 489 // information to let us properly resolve semantics per symbol. 490 // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20 491 // relocs, hence it is nearly impossible to 100% accurately determine each 492 // HI20's "flavor" without taking big performance hits, in the presence of 493 // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far 494 // apart that relationship is not certain anymore), and programmer mistakes 495 // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3). 496 // 497 // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark 498 // every HI20 reloc referring to the same symbol differently; this is not 499 // feasible with the current function signature of getRelExpr that doesn't 500 // allow for such inter-pass state. 501 // 502 // So, unfortunately we have to again workaround this quirk the same way as 503 // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only 504 // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later 505 // stage. 506 return R_LOONGARCH_PLT_PAGE_PC; 507 case R_LARCH_PCALA64_LO20: 508 case R_LARCH_PCALA64_HI12: 509 return R_LOONGARCH_PAGE_PC; 510 case R_LARCH_GOT_HI20: 511 case R_LARCH_GOT_LO12: 512 case R_LARCH_GOT64_LO20: 513 case R_LARCH_GOT64_HI12: 514 case R_LARCH_TLS_IE_HI20: 515 case R_LARCH_TLS_IE_LO12: 516 case R_LARCH_TLS_IE64_LO20: 517 case R_LARCH_TLS_IE64_HI12: 518 return R_GOT; 519 case R_LARCH_TLS_LD_HI20: 520 return R_TLSLD_GOT; 521 case R_LARCH_TLS_GD_HI20: 522 return R_TLSGD_GOT; 523 case R_LARCH_RELAX: 524 // LoongArch linker relaxation is not implemented yet. 525 return R_NONE; 526 527 // Other known relocs that are explicitly unimplemented: 528 // 529 // - psABI v1 relocs that need a stateful stack machine to work, and not 530 // required when implementing psABI v2; 531 // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the 532 // two GNU vtable-related relocs). 533 // 534 // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51 535 default: 536 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 537 ") against symbol " + toString(s)); 538 return R_NONE; 539 } 540 } 541 542 bool LoongArch::usesOnlyLowPageBits(RelType type) const { 543 switch (type) { 544 default: 545 return false; 546 case R_LARCH_PCALA_LO12: 547 case R_LARCH_GOT_LO12: 548 case R_LARCH_GOT_PC_LO12: 549 case R_LARCH_TLS_IE_PC_LO12: 550 return true; 551 } 552 } 553 554 void LoongArch::relocate(uint8_t *loc, const Relocation &rel, 555 uint64_t val) const { 556 switch (rel.type) { 557 case R_LARCH_32_PCREL: 558 checkInt(loc, val, 32, rel); 559 [[fallthrough]]; 560 case R_LARCH_32: 561 case R_LARCH_TLS_DTPREL32: 562 write32le(loc, val); 563 return; 564 case R_LARCH_64: 565 case R_LARCH_TLS_DTPREL64: 566 case R_LARCH_64_PCREL: 567 write64le(loc, val); 568 return; 569 570 case R_LARCH_PCREL20_S2: 571 checkInt(loc, val, 22, rel); 572 checkAlignment(loc, val, 4, rel); 573 write32le(loc, setJ20(read32le(loc), val >> 2)); 574 return; 575 576 case R_LARCH_B16: 577 checkInt(loc, val, 18, rel); 578 checkAlignment(loc, val, 4, rel); 579 write32le(loc, setK16(read32le(loc), val >> 2)); 580 return; 581 582 case R_LARCH_B21: 583 checkInt(loc, val, 23, rel); 584 checkAlignment(loc, val, 4, rel); 585 write32le(loc, setD5k16(read32le(loc), val >> 2)); 586 return; 587 588 case R_LARCH_B26: 589 checkInt(loc, val, 28, rel); 590 checkAlignment(loc, val, 4, rel); 591 write32le(loc, setD10k16(read32le(loc), val >> 2)); 592 return; 593 594 case R_LARCH_CALL36: { 595 // This relocation is designed for adjancent pcaddu18i+jirl pairs that 596 // are patched in one time. Because of sign extension of these insns' 597 // immediate fields, the relocation range is [-128G - 0x20000, +128G - 598 // 0x20000) (of course must be 4-byte aligned). 599 if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38)) 600 reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000, 601 llvm::maxIntN(38) - 0x20000); 602 checkAlignment(loc, val, 4, rel); 603 // Since jirl performs sign extension on the offset immediate, adds (1<<17) 604 // to original val to get the correct hi20. 605 uint32_t hi20 = extractBits(val + (1 << 17), 37, 18); 606 // Despite the name, the lower part is actually 18 bits with 4-byte aligned. 607 uint32_t lo16 = extractBits(val, 17, 2); 608 write32le(loc, setJ20(read32le(loc), hi20)); 609 write32le(loc + 4, setK16(read32le(loc + 4), lo16)); 610 return; 611 } 612 613 // Relocs intended for `addi`, `ld` or `st`. 614 case R_LARCH_PCALA_LO12: 615 // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 616 // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes 617 // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly 618 // its immediate slot width is different too (16, not 12). 619 // In this case, process like an R_LARCH_B16, but without overflow checking 620 // and only taking the value's lowest 12 bits. 621 if (isJirl(read32le(loc))) { 622 checkAlignment(loc, val, 4, rel); 623 val = SignExtend64<12>(val); 624 write32le(loc, setK16(read32le(loc), val >> 2)); 625 return; 626 } 627 [[fallthrough]]; 628 case R_LARCH_ABS_LO12: 629 case R_LARCH_GOT_PC_LO12: 630 case R_LARCH_GOT_LO12: 631 case R_LARCH_TLS_LE_LO12: 632 case R_LARCH_TLS_IE_PC_LO12: 633 case R_LARCH_TLS_IE_LO12: 634 write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0))); 635 return; 636 637 // Relocs intended for `lu12i.w` or `pcalau12i`. 638 case R_LARCH_ABS_HI20: 639 case R_LARCH_PCALA_HI20: 640 case R_LARCH_GOT_PC_HI20: 641 case R_LARCH_GOT_HI20: 642 case R_LARCH_TLS_LE_HI20: 643 case R_LARCH_TLS_IE_PC_HI20: 644 case R_LARCH_TLS_IE_HI20: 645 case R_LARCH_TLS_LD_PC_HI20: 646 case R_LARCH_TLS_LD_HI20: 647 case R_LARCH_TLS_GD_PC_HI20: 648 case R_LARCH_TLS_GD_HI20: 649 write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12))); 650 return; 651 652 // Relocs intended for `lu32i.d`. 653 case R_LARCH_ABS64_LO20: 654 case R_LARCH_PCALA64_LO20: 655 case R_LARCH_GOT64_PC_LO20: 656 case R_LARCH_GOT64_LO20: 657 case R_LARCH_TLS_LE64_LO20: 658 case R_LARCH_TLS_IE64_PC_LO20: 659 case R_LARCH_TLS_IE64_LO20: 660 write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32))); 661 return; 662 663 // Relocs intended for `lu52i.d`. 664 case R_LARCH_ABS64_HI12: 665 case R_LARCH_PCALA64_HI12: 666 case R_LARCH_GOT64_PC_HI12: 667 case R_LARCH_GOT64_HI12: 668 case R_LARCH_TLS_LE64_HI12: 669 case R_LARCH_TLS_IE64_PC_HI12: 670 case R_LARCH_TLS_IE64_HI12: 671 write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); 672 return; 673 674 case R_LARCH_ADD6: 675 *loc = (*loc & 0xc0) | ((*loc + val) & 0x3f); 676 return; 677 case R_LARCH_ADD8: 678 *loc += val; 679 return; 680 case R_LARCH_ADD16: 681 write16le(loc, read16le(loc) + val); 682 return; 683 case R_LARCH_ADD32: 684 write32le(loc, read32le(loc) + val); 685 return; 686 case R_LARCH_ADD64: 687 write64le(loc, read64le(loc) + val); 688 return; 689 case R_LARCH_SUB6: 690 *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f); 691 return; 692 case R_LARCH_SUB8: 693 *loc -= val; 694 return; 695 case R_LARCH_SUB16: 696 write16le(loc, read16le(loc) - val); 697 return; 698 case R_LARCH_SUB32: 699 write32le(loc, read32le(loc) - val); 700 return; 701 case R_LARCH_SUB64: 702 write64le(loc, read64le(loc) - val); 703 return; 704 705 case R_LARCH_MARK_LA: 706 case R_LARCH_MARK_PCREL: 707 // no-op 708 return; 709 710 case R_LARCH_RELAX: 711 return; // Ignored (for now) 712 713 default: 714 llvm_unreachable("unknown relocation"); 715 } 716 } 717 718 TargetInfo *elf::getLoongArchTargetInfo() { 719 static LoongArch target; 720 return ⌖ 721 } 722