181ad6265SDimitry Andric //===- EhFrame.h ------------------------------------------------*- C++ -*-===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric 981ad6265SDimitry Andric #ifndef LLD_MACHO_EH_FRAME_H 1081ad6265SDimitry Andric #define LLD_MACHO_EH_FRAME_H 1181ad6265SDimitry Andric 1281ad6265SDimitry Andric #include "InputSection.h" 1381ad6265SDimitry Andric #include "Relocations.h" 1481ad6265SDimitry Andric 1581ad6265SDimitry Andric #include "lld/Common/LLVM.h" 1681ad6265SDimitry Andric #include "llvm/ADT/ArrayRef.h" 1781ad6265SDimitry Andric #include "llvm/ADT/PointerUnion.h" 1881ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h" 1981ad6265SDimitry Andric 2081ad6265SDimitry Andric /* 2181ad6265SDimitry Andric * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it 2281ad6265SDimitry Andric * is closely coupled with other file parsing logic; EhFrame.h just contains a 2381ad6265SDimitry Andric * few helpers. 2481ad6265SDimitry Andric */ 2581ad6265SDimitry Andric 2681ad6265SDimitry Andric /* 2781ad6265SDimitry Andric * === The EH frame format === 2881ad6265SDimitry Andric * 2981ad6265SDimitry Andric * EH frames can either be Common Information Entries (CIEs) or Frame 3081ad6265SDimitry Andric * Description Entries (FDEs). CIEs contain information that is common amongst 3181ad6265SDimitry Andric * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame 3281ad6265SDimitry Andric * entries together form a forest of two-level trees, with CIEs as the roots 3381ad6265SDimitry Andric * and FDEs as the leaves. Note that a CIE must precede the FDEs which point 3481ad6265SDimitry Andric * to it. 3581ad6265SDimitry Andric * 3681ad6265SDimitry Andric * A CIE comprises the following fields in order: 3781ad6265SDimitry Andric * 1. Length of the entry (4 or 12 bytes) 3881ad6265SDimitry Andric * 2. CIE offset (4 bytes; always 0 for CIEs) 3981ad6265SDimitry Andric * 3. CIE version (byte) 4081ad6265SDimitry Andric * 4. Null-terminated augmentation string 4181ad6265SDimitry Andric * 5-8. LEB128 values that we don't care about 4281ad6265SDimitry Andric * 9. Augmentation data, to be interpreted using the aug string 4381ad6265SDimitry Andric * 10. DWARF instructions (ignored by LLD) 4481ad6265SDimitry Andric * 4581ad6265SDimitry Andric * An FDE comprises of the following: 4681ad6265SDimitry Andric * 1. Length of the entry (4 or 12 bytes) 4781ad6265SDimitry Andric * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE) 4881ad6265SDimitry Andric * 3. Function address (pointer-sized pcrel offset) 49*bdd1243dSDimitry Andric * 4. (std::optional) Augmentation data length 50*bdd1243dSDimitry Andric * 5. (std::optional) LSDA address (pointer-sized pcrel offset) 5181ad6265SDimitry Andric * 6. DWARF instructions (ignored by LLD) 5281ad6265SDimitry Andric */ 53*bdd1243dSDimitry Andric namespace lld::macho { 5481ad6265SDimitry Andric 5581ad6265SDimitry Andric class EhReader { 5681ad6265SDimitry Andric public: EhReader(const ObjFile * file,ArrayRef<uint8_t> data,size_t dataOff)5761cfbce3SDimitry Andric EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff) 5861cfbce3SDimitry Andric : file(file), data(data), dataOff(dataOff) {} size()5981ad6265SDimitry Andric size_t size() const { return data.size(); } 6081ad6265SDimitry Andric // Read and validate the length field. 6181ad6265SDimitry Andric uint64_t readLength(size_t *off) const; 6281ad6265SDimitry Andric // Skip the length field without doing validation. 6381ad6265SDimitry Andric void skipValidLength(size_t *off) const; 6481ad6265SDimitry Andric uint8_t readByte(size_t *off) const; 6581ad6265SDimitry Andric uint32_t readU32(size_t *off) const; 6661cfbce3SDimitry Andric uint64_t readPointer(size_t *off, uint8_t size) const; 6781ad6265SDimitry Andric StringRef readString(size_t *off) const; 6881ad6265SDimitry Andric void skipLeb128(size_t *off) const; 6981ad6265SDimitry Andric void failOn(size_t errOff, const Twine &msg) const; 7081ad6265SDimitry Andric 7181ad6265SDimitry Andric private: 7281ad6265SDimitry Andric const ObjFile *file; 7381ad6265SDimitry Andric ArrayRef<uint8_t> data; 7481ad6265SDimitry Andric // The offset of the data array within its section. Used only for error 7581ad6265SDimitry Andric // reporting. 7681ad6265SDimitry Andric const size_t dataOff; 7781ad6265SDimitry Andric }; 7881ad6265SDimitry Andric 7981ad6265SDimitry Andric // The EH frame format, when emitted by llvm-mc, consists of a number of 8081ad6265SDimitry Andric // "abs-ified" relocations, i.e. relocations that are implicitly encoded as 8181ad6265SDimitry Andric // pcrel offsets in the section data. The offsets refer to the locations of 8281ad6265SDimitry Andric // symbols in the input object file. When we ingest these EH frames, we convert 8381ad6265SDimitry Andric // these implicit relocations into explicit Relocs. 8481ad6265SDimitry Andric // 8581ad6265SDimitry Andric // These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4. 8681ad6265SDimitry Andric // However, we need this operation to be cross-platform, and ARM does not have a 8781ad6265SDimitry Andric // similar relocation that is applicable. We therefore use the more verbose (but 8881ad6265SDimitry Andric // more generic) subtractor relocation to encode these pcrel values. ld64 8981ad6265SDimitry Andric // appears to do something similar -- its `-r` output contains these explicit 9081ad6265SDimitry Andric // subtractor relocations. 9181ad6265SDimitry Andric class EhRelocator { 9281ad6265SDimitry Andric public: EhRelocator(InputSection * isec)9381ad6265SDimitry Andric EhRelocator(InputSection *isec) : isec(isec) {} 9481ad6265SDimitry Andric 9581ad6265SDimitry Andric // For the next two methods, let `PC` denote `isec address + off`. 9681ad6265SDimitry Andric // Create relocs writing the value of target - PC to PC. 9781ad6265SDimitry Andric void makePcRel(uint64_t off, 9881ad6265SDimitry Andric llvm::PointerUnion<Symbol *, InputSection *> target, 9981ad6265SDimitry Andric uint8_t length); 10081ad6265SDimitry Andric // Create relocs writing the value of PC - target to PC. 10181ad6265SDimitry Andric void makeNegativePcRel(uint64_t off, 10281ad6265SDimitry Andric llvm::PointerUnion<Symbol *, InputSection *> target, 10381ad6265SDimitry Andric uint8_t length); 10481ad6265SDimitry Andric // Insert the new relocations into isec->relocs. 10581ad6265SDimitry Andric void commit(); 10681ad6265SDimitry Andric 10781ad6265SDimitry Andric private: 10881ad6265SDimitry Andric InputSection *isec; 10981ad6265SDimitry Andric // Insert new relocs here so that we don't invalidate iterators into the 11081ad6265SDimitry Andric // existing relocs vector. 11181ad6265SDimitry Andric SmallVector<Reloc, 6> newRelocs; 11281ad6265SDimitry Andric }; 11381ad6265SDimitry Andric 114*bdd1243dSDimitry Andric } // namespace lld::macho 11581ad6265SDimitry Andric 11681ad6265SDimitry Andric #endif 117