1 //===- LoongArch.cpp ------------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "InputFiles.h" 10 #include "OutputSections.h" 11 #include "Symbols.h" 12 #include "SyntheticSections.h" 13 #include "Target.h" 14 15 using namespace llvm; 16 using namespace llvm::object; 17 using namespace llvm::support::endian; 18 using namespace llvm::ELF; 19 using namespace lld; 20 using namespace lld::elf; 21 22 namespace { 23 class LoongArch final : public TargetInfo { 24 public: 25 LoongArch(); 26 uint32_t calcEFlags() const override; 27 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override; 28 void writeGotPlt(uint8_t *buf, const Symbol &s) const override; 29 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override; 30 void writePltHeader(uint8_t *buf) const override; 31 void writePlt(uint8_t *buf, const Symbol &sym, 32 uint64_t pltEntryAddr) const override; 33 RelType getDynRel(RelType type) const override; 34 RelExpr getRelExpr(RelType type, const Symbol &s, 35 const uint8_t *loc) const override; 36 bool usesOnlyLowPageBits(RelType type) const override; 37 void relocate(uint8_t *loc, const Relocation &rel, 38 uint64_t val) const override; 39 }; 40 } // end anonymous namespace 41 42 enum Op { 43 SUB_W = 0x00110000, 44 SUB_D = 0x00118000, 45 BREAK = 0x002a0000, 46 SRLI_W = 0x00448000, 47 SRLI_D = 0x00450000, 48 ADDI_W = 0x02800000, 49 ADDI_D = 0x02c00000, 50 ANDI = 0x03400000, 51 PCADDU12I = 0x1c000000, 52 LD_W = 0x28800000, 53 LD_D = 0x28c00000, 54 JIRL = 0x4c000000, 55 }; 56 57 enum Reg { 58 R_ZERO = 0, 59 R_RA = 1, 60 R_TP = 2, 61 R_T0 = 12, 62 R_T1 = 13, 63 R_T2 = 14, 64 R_T3 = 15, 65 }; 66 67 // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences 68 // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i` 69 // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the 70 // "page") for the next instruction to add in the "page offset". (`pcalau12i` 71 // stands for something like "PC ALigned Add Upper that starts from the 12th 72 // bit, Immediate".) 73 // 74 // Here a "page" is in fact just another way to refer to the 12-bit range 75 // allowed by the immediate field of the addi/ld/st instructions, and not 76 // related to the system or the kernel's actual page size. The sematics happens 77 // to match the AArch64 `adrp`, so the concept of "page" is borrowed here. 78 static uint64_t getLoongArchPage(uint64_t p) { 79 return p & ~static_cast<uint64_t>(0xfff); 80 } 81 82 static uint32_t lo12(uint32_t val) { return val & 0xfff; } 83 84 // Calculate the adjusted page delta between dest and PC. 85 uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) { 86 // Consider the large code model access pattern, of which the smaller code 87 // models' access patterns are a subset: 88 // 89 // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff] 90 // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff] 91 // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff] 92 // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff] 93 // {ldx,stx,add}.* dest, U, T 94 // 95 // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA, 96 // with RQ, P, ZY, X and A representing the respective bitfields as unsigned 97 // integers. We have: 98 // 99 // page(dest) = 0xZZZ'YYYYY'XXXXX'000 100 // - page(pc) = 0xRRR'QQQQQ'PPPPP'000 101 // ---------------------------------- 102 // 0xddd'ccccc'bbbbb'000 103 // 104 // Now consider the above pattern's actual effects: 105 // 106 // page(pc) 0xRRR'QQQQQ'PPPPP'000 107 // pcalau12i + 0xiii'iiiii'bbbbb'000 108 // addi + 0xjjj'jjjjj'kkkkk'AAA 109 // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000 110 // -------------------------------------------------- 111 // dest = U + T 112 // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32)) 113 // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A 114 // = (ZY<<32) + (X<<12) + A 115 // 116 // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k 117 // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k 118 // 119 // where i and k are terms representing the effect of b's and A's sign 120 // extension respectively. 121 // 122 // i = signed b < 0 ? -0x10000'0000 : 0 123 // k = signed A < 0 ? -0x1000 : 0 124 // 125 // The j term is a bit complex: it represents the higher half of 126 // sign-extended bits from A that are effectively lost if i == 0 but k != 0, 127 // due to overwriting by lu32i.d & lu52i.d. 128 // 129 // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0 130 // 131 // The actual effect of the instruction sequence before the final addition, 132 // i.e. our desired result value, is thus: 133 // 134 // result = (cd<<32) + (b<<12) 135 // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k 136 // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k 137 // = page(dest) - page(pc) - i - j - k 138 // 139 // when signed A >= 0 && signed b >= 0: 140 // 141 // i = j = k = 0 142 // result = page(dest) - page(pc) 143 // 144 // when signed A >= 0 && signed b < 0: 145 // 146 // i = -0x10000'0000, j = k = 0 147 // result = page(dest) - page(pc) + 0x10000'0000 148 // 149 // when signed A < 0 && signed b >= 0: 150 // 151 // i = 0, j = 0x10000'0000, k = -0x1000 152 // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000 153 // 154 // when signed A < 0 && signed b < 0: 155 // 156 // i = -0x10000'0000, j = 0, k = -0x1000 157 // result = page(dest) - page(pc) + 0x1000 158 uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc); 159 bool negativeA = lo12(dest) > 0x7ff; 160 bool negativeB = (result & 0x8000'0000) != 0; 161 162 if (negativeA) 163 result += 0x1000; 164 if (negativeA && !negativeB) 165 result -= 0x10000'0000; 166 else if (!negativeA && negativeB) 167 result += 0x10000'0000; 168 169 return result; 170 } 171 172 static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; } 173 174 static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) { 175 return op | d | (j << 5) | (k << 10); 176 } 177 178 // Extract bits v[begin:end], where range is inclusive. 179 static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) { 180 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end; 181 } 182 183 static uint32_t setD5k16(uint32_t insn, uint32_t imm) { 184 uint32_t immLo = extractBits(imm, 15, 0); 185 uint32_t immHi = extractBits(imm, 20, 16); 186 return (insn & 0xfc0003e0) | (immLo << 10) | immHi; 187 } 188 189 static uint32_t setD10k16(uint32_t insn, uint32_t imm) { 190 uint32_t immLo = extractBits(imm, 15, 0); 191 uint32_t immHi = extractBits(imm, 25, 16); 192 return (insn & 0xfc000000) | (immLo << 10) | immHi; 193 } 194 195 static uint32_t setJ20(uint32_t insn, uint32_t imm) { 196 return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5); 197 } 198 199 static uint32_t setK12(uint32_t insn, uint32_t imm) { 200 return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10); 201 } 202 203 static uint32_t setK16(uint32_t insn, uint32_t imm) { 204 return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10); 205 } 206 207 static bool isJirl(uint32_t insn) { 208 return (insn & 0xfc000000) == JIRL; 209 } 210 211 LoongArch::LoongArch() { 212 // The LoongArch ISA itself does not have a limit on page sizes. According to 213 // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is 214 // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to 215 // "unlimited". 216 // However, practically the maximum usable page size is constrained by the 217 // kernel implementation, and 64KiB is the biggest non-huge page size 218 // supported by Linux as of v6.4. The most widespread page size in use, 219 // though, is 16KiB. 220 defaultCommonPageSize = 16384; 221 defaultMaxPageSize = 65536; 222 write32le(trapInstr.data(), BREAK); // break 0 223 224 copyRel = R_LARCH_COPY; 225 pltRel = R_LARCH_JUMP_SLOT; 226 relativeRel = R_LARCH_RELATIVE; 227 iRelativeRel = R_LARCH_IRELATIVE; 228 229 if (config->is64) { 230 symbolicRel = R_LARCH_64; 231 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64; 232 tlsOffsetRel = R_LARCH_TLS_DTPREL64; 233 tlsGotRel = R_LARCH_TLS_TPREL64; 234 } else { 235 symbolicRel = R_LARCH_32; 236 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32; 237 tlsOffsetRel = R_LARCH_TLS_DTPREL32; 238 tlsGotRel = R_LARCH_TLS_TPREL32; 239 } 240 241 gotRel = symbolicRel; 242 243 // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map 244 gotPltHeaderEntriesNum = 2; 245 246 pltHeaderSize = 32; 247 pltEntrySize = 16; 248 ipltEntrySize = 16; 249 } 250 251 static uint32_t getEFlags(const InputFile *f) { 252 if (config->is64) 253 return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags; 254 return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags; 255 } 256 257 static bool inputFileHasCode(const InputFile *f) { 258 for (const auto *sec : f->getSections()) 259 if (sec && sec->flags & SHF_EXECINSTR) 260 return true; 261 262 return false; 263 } 264 265 uint32_t LoongArch::calcEFlags() const { 266 // If there are only binary input files (from -b binary), use a 267 // value of 0 for the ELF header flags. 268 if (ctx.objectFiles.empty()) 269 return 0; 270 271 uint32_t target = 0; 272 const InputFile *targetFile; 273 for (const InputFile *f : ctx.objectFiles) { 274 // Do not enforce ABI compatibility if the input file does not contain code. 275 // This is useful for allowing linkage with data-only object files produced 276 // with tools like objcopy, that have zero e_flags. 277 if (!inputFileHasCode(f)) 278 continue; 279 280 // Take the first non-zero e_flags as the reference. 281 uint32_t flags = getEFlags(f); 282 if (target == 0 && flags != 0) { 283 target = flags; 284 targetFile = f; 285 } 286 287 if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) != 288 (target & EF_LOONGARCH_ABI_MODIFIER_MASK)) 289 error(toString(f) + 290 ": cannot link object files with different ABI from " + 291 toString(targetFile)); 292 293 // We cannot process psABI v1.x / object ABI v0 files (containing stack 294 // relocations), unlike ld.bfd. 295 // 296 // Instead of blindly accepting every v0 object and only failing at 297 // relocation processing time, just disallow interlink altogether. We 298 // don't expect significant usage of object ABI v0 in the wild (the old 299 // world may continue using object ABI v0 for a while, but as it's not 300 // binary-compatible with the upstream i.e. new-world ecosystem, it's not 301 // being considered here). 302 // 303 // There are briefly some new-world systems with object ABI v0 binaries too. 304 // It is because these systems were built before the new ABI was finalized. 305 // These are not supported either due to the extremely small number of them, 306 // and the few impacted users are advised to simply rebuild world or 307 // reinstall a recent system. 308 if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1) 309 error(toString(f) + ": unsupported object file ABI version"); 310 } 311 312 return target; 313 } 314 315 int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const { 316 switch (type) { 317 default: 318 internalLinkerError(getErrorLocation(buf), 319 "cannot read addend for relocation " + toString(type)); 320 return 0; 321 case R_LARCH_32: 322 case R_LARCH_TLS_DTPMOD32: 323 case R_LARCH_TLS_DTPREL32: 324 case R_LARCH_TLS_TPREL32: 325 return SignExtend64<32>(read32le(buf)); 326 case R_LARCH_64: 327 case R_LARCH_TLS_DTPMOD64: 328 case R_LARCH_TLS_DTPREL64: 329 case R_LARCH_TLS_TPREL64: 330 return read64le(buf); 331 case R_LARCH_RELATIVE: 332 case R_LARCH_IRELATIVE: 333 return config->is64 ? read64le(buf) : read32le(buf); 334 case R_LARCH_NONE: 335 case R_LARCH_JUMP_SLOT: 336 // These relocations are defined as not having an implicit addend. 337 return 0; 338 } 339 } 340 341 void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const { 342 if (config->is64) 343 write64le(buf, in.plt->getVA()); 344 else 345 write32le(buf, in.plt->getVA()); 346 } 347 348 void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const { 349 if (config->writeAddends) { 350 if (config->is64) 351 write64le(buf, s.getVA()); 352 else 353 write32le(buf, s.getVA()); 354 } 355 } 356 357 void LoongArch::writePltHeader(uint8_t *buf) const { 358 // The LoongArch PLT is currently structured just like that of RISCV. 359 // Annoyingly, this means the PLT is still using `pcaddu12i` to perform 360 // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`), 361 // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that 362 // is used everywhere else involving PC-relative operations in the LoongArch 363 // ELF psABI v2.00. 364 // 365 // The `pcrel_{hi20,lo12}` operators are illustrative only and not really 366 // supported by LoongArch assemblers. 367 // 368 // pcaddu12i $t2, %pcrel_hi20(.got.plt) 369 // sub.[wd] $t1, $t1, $t3 370 // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve 371 // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0] 372 // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt) 373 // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0] 374 // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map 375 // jr $t3 376 uint32_t offset = in.gotPlt->getVA() - in.plt->getVA(); 377 uint32_t sub = config->is64 ? SUB_D : SUB_W; 378 uint32_t ld = config->is64 ? LD_D : LD_W; 379 uint32_t addi = config->is64 ? ADDI_D : ADDI_W; 380 uint32_t srli = config->is64 ? SRLI_D : SRLI_W; 381 write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0)); 382 write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3)); 383 write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset))); 384 write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12))); 385 write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset))); 386 write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2)); 387 write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize)); 388 write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0)); 389 } 390 391 void LoongArch::writePlt(uint8_t *buf, const Symbol &sym, 392 uint64_t pltEntryAddr) const { 393 // See the comment in writePltHeader for reason why pcaddu12i is used instead 394 // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days. 395 // 396 // pcaddu12i $t3, %pcrel_hi20(f@.got.plt) 397 // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt) 398 // jirl $t1, $t3, 0 399 // nop 400 uint32_t offset = sym.getGotPltVA() - pltEntryAddr; 401 write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0)); 402 write32le(buf + 4, 403 insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset))); 404 write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0)); 405 write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0)); 406 } 407 408 RelType LoongArch::getDynRel(RelType type) const { 409 return type == target->symbolicRel ? type 410 : static_cast<RelType>(R_LARCH_NONE); 411 } 412 413 RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s, 414 const uint8_t *loc) const { 415 switch (type) { 416 case R_LARCH_NONE: 417 case R_LARCH_MARK_LA: 418 case R_LARCH_MARK_PCREL: 419 return R_NONE; 420 case R_LARCH_32: 421 case R_LARCH_64: 422 case R_LARCH_ABS_HI20: 423 case R_LARCH_ABS_LO12: 424 case R_LARCH_ABS64_LO20: 425 case R_LARCH_ABS64_HI12: 426 return R_ABS; 427 case R_LARCH_PCALA_LO12: 428 // We could just R_ABS, but the JIRL instruction reuses the relocation type 429 // for a different purpose. The questionable usage is part of glibc 2.37 430 // libc_nonshared.a [1], which is linked into user programs, so we have to 431 // work around it for a while, even if a new relocation type may be 432 // introduced in the future [2]. 433 // 434 // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a 435 // [2]: https://github.com/loongson/la-abi-specs/pull/3 436 return isJirl(read32le(loc)) ? R_PLT : R_ABS; 437 case R_LARCH_TLS_DTPREL32: 438 case R_LARCH_TLS_DTPREL64: 439 return R_DTPREL; 440 case R_LARCH_TLS_TPREL32: 441 case R_LARCH_TLS_TPREL64: 442 case R_LARCH_TLS_LE_HI20: 443 case R_LARCH_TLS_LE_LO12: 444 case R_LARCH_TLS_LE64_LO20: 445 case R_LARCH_TLS_LE64_HI12: 446 return R_TPREL; 447 case R_LARCH_ADD8: 448 case R_LARCH_ADD16: 449 case R_LARCH_ADD32: 450 case R_LARCH_ADD64: 451 case R_LARCH_SUB8: 452 case R_LARCH_SUB16: 453 case R_LARCH_SUB32: 454 case R_LARCH_SUB64: 455 // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse 456 // the RelExpr to avoid code duplication. 457 return R_RISCV_ADD; 458 case R_LARCH_32_PCREL: 459 case R_LARCH_64_PCREL: 460 case R_LARCH_PCREL20_S2: 461 return R_PC; 462 case R_LARCH_B16: 463 case R_LARCH_B21: 464 case R_LARCH_B26: 465 return R_PLT_PC; 466 case R_LARCH_GOT_PC_HI20: 467 case R_LARCH_GOT64_PC_LO20: 468 case R_LARCH_GOT64_PC_HI12: 469 case R_LARCH_TLS_IE_PC_HI20: 470 case R_LARCH_TLS_IE64_PC_LO20: 471 case R_LARCH_TLS_IE64_PC_HI12: 472 return R_LOONGARCH_GOT_PAGE_PC; 473 case R_LARCH_GOT_PC_LO12: 474 case R_LARCH_TLS_IE_PC_LO12: 475 return R_LOONGARCH_GOT; 476 case R_LARCH_TLS_LD_PC_HI20: 477 case R_LARCH_TLS_GD_PC_HI20: 478 return R_LOONGARCH_TLSGD_PAGE_PC; 479 case R_LARCH_PCALA_HI20: 480 // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT 481 // anyway so why waste time checking only to get everything relaxed back to 482 // it? 483 // 484 // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want 485 // both the HI20 and LO12 to potentially refer to the PLT. But in reality 486 // the HI20 reloc appears earlier, and the relocs don't contain enough 487 // information to let us properly resolve semantics per symbol. 488 // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20 489 // relocs, hence it is nearly impossible to 100% accurately determine each 490 // HI20's "flavor" without taking big performance hits, in the presence of 491 // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far 492 // apart that relationship is not certain anymore), and programmer mistakes 493 // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3). 494 // 495 // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark 496 // every HI20 reloc referring to the same symbol differently; this is not 497 // feasible with the current function signature of getRelExpr that doesn't 498 // allow for such inter-pass state. 499 // 500 // So, unfortunately we have to again workaround this quirk the same way as 501 // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only 502 // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later 503 // stage. 504 return R_LOONGARCH_PLT_PAGE_PC; 505 case R_LARCH_PCALA64_LO20: 506 case R_LARCH_PCALA64_HI12: 507 return R_LOONGARCH_PAGE_PC; 508 case R_LARCH_GOT_HI20: 509 case R_LARCH_GOT_LO12: 510 case R_LARCH_GOT64_LO20: 511 case R_LARCH_GOT64_HI12: 512 case R_LARCH_TLS_IE_HI20: 513 case R_LARCH_TLS_IE_LO12: 514 case R_LARCH_TLS_IE64_LO20: 515 case R_LARCH_TLS_IE64_HI12: 516 return R_GOT; 517 case R_LARCH_TLS_LD_HI20: 518 return R_TLSLD_GOT; 519 case R_LARCH_TLS_GD_HI20: 520 return R_TLSGD_GOT; 521 case R_LARCH_RELAX: 522 // LoongArch linker relaxation is not implemented yet. 523 return R_NONE; 524 525 // Other known relocs that are explicitly unimplemented: 526 // 527 // - psABI v1 relocs that need a stateful stack machine to work, and not 528 // required when implementing psABI v2; 529 // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the 530 // two GNU vtable-related relocs). 531 // 532 // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51 533 default: 534 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) + 535 ") against symbol " + toString(s)); 536 return R_NONE; 537 } 538 } 539 540 bool LoongArch::usesOnlyLowPageBits(RelType type) const { 541 switch (type) { 542 default: 543 return false; 544 case R_LARCH_PCALA_LO12: 545 case R_LARCH_GOT_LO12: 546 case R_LARCH_GOT_PC_LO12: 547 case R_LARCH_TLS_IE_PC_LO12: 548 return true; 549 } 550 } 551 552 void LoongArch::relocate(uint8_t *loc, const Relocation &rel, 553 uint64_t val) const { 554 switch (rel.type) { 555 case R_LARCH_32_PCREL: 556 checkInt(loc, val, 32, rel); 557 [[fallthrough]]; 558 case R_LARCH_32: 559 case R_LARCH_TLS_DTPREL32: 560 write32le(loc, val); 561 return; 562 case R_LARCH_64: 563 case R_LARCH_TLS_DTPREL64: 564 case R_LARCH_64_PCREL: 565 write64le(loc, val); 566 return; 567 568 case R_LARCH_PCREL20_S2: 569 checkInt(loc, val, 22, rel); 570 checkAlignment(loc, val, 4, rel); 571 write32le(loc, setJ20(read32le(loc), val >> 2)); 572 return; 573 574 case R_LARCH_B16: 575 checkInt(loc, val, 18, rel); 576 checkAlignment(loc, val, 4, rel); 577 write32le(loc, setK16(read32le(loc), val >> 2)); 578 return; 579 580 case R_LARCH_B21: 581 checkInt(loc, val, 23, rel); 582 checkAlignment(loc, val, 4, rel); 583 write32le(loc, setD5k16(read32le(loc), val >> 2)); 584 return; 585 586 case R_LARCH_B26: 587 checkInt(loc, val, 28, rel); 588 checkAlignment(loc, val, 4, rel); 589 write32le(loc, setD10k16(read32le(loc), val >> 2)); 590 return; 591 592 // Relocs intended for `addi`, `ld` or `st`. 593 case R_LARCH_PCALA_LO12: 594 // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12 595 // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes 596 // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly 597 // its immediate slot width is different too (16, not 12). 598 // In this case, process like an R_LARCH_B16, but without overflow checking 599 // and only taking the value's lowest 12 bits. 600 if (isJirl(read32le(loc))) { 601 checkAlignment(loc, val, 4, rel); 602 val = SignExtend64<12>(val); 603 write32le(loc, setK16(read32le(loc), val >> 2)); 604 return; 605 } 606 [[fallthrough]]; 607 case R_LARCH_ABS_LO12: 608 case R_LARCH_GOT_PC_LO12: 609 case R_LARCH_GOT_LO12: 610 case R_LARCH_TLS_LE_LO12: 611 case R_LARCH_TLS_IE_PC_LO12: 612 case R_LARCH_TLS_IE_LO12: 613 write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0))); 614 return; 615 616 // Relocs intended for `lu12i.w` or `pcalau12i`. 617 case R_LARCH_ABS_HI20: 618 case R_LARCH_PCALA_HI20: 619 case R_LARCH_GOT_PC_HI20: 620 case R_LARCH_GOT_HI20: 621 case R_LARCH_TLS_LE_HI20: 622 case R_LARCH_TLS_IE_PC_HI20: 623 case R_LARCH_TLS_IE_HI20: 624 case R_LARCH_TLS_LD_PC_HI20: 625 case R_LARCH_TLS_LD_HI20: 626 case R_LARCH_TLS_GD_PC_HI20: 627 case R_LARCH_TLS_GD_HI20: 628 write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12))); 629 return; 630 631 // Relocs intended for `lu32i.d`. 632 case R_LARCH_ABS64_LO20: 633 case R_LARCH_PCALA64_LO20: 634 case R_LARCH_GOT64_PC_LO20: 635 case R_LARCH_GOT64_LO20: 636 case R_LARCH_TLS_LE64_LO20: 637 case R_LARCH_TLS_IE64_PC_LO20: 638 case R_LARCH_TLS_IE64_LO20: 639 write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32))); 640 return; 641 642 // Relocs intended for `lu52i.d`. 643 case R_LARCH_ABS64_HI12: 644 case R_LARCH_PCALA64_HI12: 645 case R_LARCH_GOT64_PC_HI12: 646 case R_LARCH_GOT64_HI12: 647 case R_LARCH_TLS_LE64_HI12: 648 case R_LARCH_TLS_IE64_PC_HI12: 649 case R_LARCH_TLS_IE64_HI12: 650 write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52))); 651 return; 652 653 case R_LARCH_ADD8: 654 *loc += val; 655 return; 656 case R_LARCH_ADD16: 657 write16le(loc, read16le(loc) + val); 658 return; 659 case R_LARCH_ADD32: 660 write32le(loc, read32le(loc) + val); 661 return; 662 case R_LARCH_ADD64: 663 write64le(loc, read64le(loc) + val); 664 return; 665 case R_LARCH_SUB8: 666 *loc -= val; 667 return; 668 case R_LARCH_SUB16: 669 write16le(loc, read16le(loc) - val); 670 return; 671 case R_LARCH_SUB32: 672 write32le(loc, read32le(loc) - val); 673 return; 674 case R_LARCH_SUB64: 675 write64le(loc, read64le(loc) - val); 676 return; 677 678 case R_LARCH_MARK_LA: 679 case R_LARCH_MARK_PCREL: 680 // no-op 681 return; 682 683 case R_LARCH_RELAX: 684 return; // Ignored (for now) 685 686 default: 687 llvm_unreachable("unknown relocation"); 688 } 689 } 690 691 TargetInfo *elf::getLoongArchTargetInfo() { 692 static LoongArch target; 693 return ⌖ 694 } 695