xref: /freebsd/contrib/llvm-project/lld/MachO/EhFrame.h (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
181ad6265SDimitry Andric //===- EhFrame.h ------------------------------------------------*- C++ -*-===//
281ad6265SDimitry Andric //
381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
681ad6265SDimitry Andric //
781ad6265SDimitry Andric //===----------------------------------------------------------------------===//
881ad6265SDimitry Andric 
981ad6265SDimitry Andric #ifndef LLD_MACHO_EH_FRAME_H
1081ad6265SDimitry Andric #define LLD_MACHO_EH_FRAME_H
1181ad6265SDimitry Andric 
1281ad6265SDimitry Andric #include "InputSection.h"
1381ad6265SDimitry Andric #include "Relocations.h"
1481ad6265SDimitry Andric 
1581ad6265SDimitry Andric #include "lld/Common/LLVM.h"
1681ad6265SDimitry Andric #include "llvm/ADT/ArrayRef.h"
1781ad6265SDimitry Andric #include "llvm/ADT/PointerUnion.h"
1881ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h"
1981ad6265SDimitry Andric 
2081ad6265SDimitry Andric /*
2181ad6265SDimitry Andric  * NOTE: The main bulk of the EH frame parsing logic is in InputFiles.cpp as it
2281ad6265SDimitry Andric  * is closely coupled with other file parsing logic; EhFrame.h just contains a
2381ad6265SDimitry Andric  * few helpers.
2481ad6265SDimitry Andric  */
2581ad6265SDimitry Andric 
2681ad6265SDimitry Andric /*
2781ad6265SDimitry Andric  * === The EH frame format ===
2881ad6265SDimitry Andric  *
2981ad6265SDimitry Andric  * EH frames can either be Common Information Entries (CIEs) or Frame
3081ad6265SDimitry Andric  * Description Entries (FDEs). CIEs contain information that is common amongst
3181ad6265SDimitry Andric  * several FDEs. Each FDE contains a pointer to its CIE. Thus all the EH frame
3281ad6265SDimitry Andric  * entries together form a forest of two-level trees, with CIEs as the roots
3381ad6265SDimitry Andric  * and FDEs as the leaves. Note that a CIE must precede the FDEs which point
3481ad6265SDimitry Andric  * to it.
3581ad6265SDimitry Andric  *
3681ad6265SDimitry Andric  * A CIE comprises the following fields in order:
3781ad6265SDimitry Andric  * 1.   Length of the entry (4 or 12 bytes)
3881ad6265SDimitry Andric  * 2.   CIE offset (4 bytes; always 0 for CIEs)
3981ad6265SDimitry Andric  * 3.   CIE version (byte)
4081ad6265SDimitry Andric  * 4.   Null-terminated augmentation string
4181ad6265SDimitry Andric  * 5-8. LEB128 values that we don't care about
4281ad6265SDimitry Andric  * 9.   Augmentation data, to be interpreted using the aug string
4381ad6265SDimitry Andric  * 10.  DWARF instructions (ignored by LLD)
4481ad6265SDimitry Andric  *
4581ad6265SDimitry Andric  * An FDE comprises of the following:
4681ad6265SDimitry Andric  * 1. Length of the entry (4 or 12 bytes)
4781ad6265SDimitry Andric  * 2. CIE offset (4 bytes pcrel offset that points backwards to this FDE's CIE)
4881ad6265SDimitry Andric  * 3. Function address (pointer-sized pcrel offset)
49*bdd1243dSDimitry Andric  * 4. (std::optional) Augmentation data length
50*bdd1243dSDimitry Andric  * 5. (std::optional) LSDA address (pointer-sized pcrel offset)
5181ad6265SDimitry Andric  * 6. DWARF instructions (ignored by LLD)
5281ad6265SDimitry Andric  */
53*bdd1243dSDimitry Andric namespace lld::macho {
5481ad6265SDimitry Andric 
5581ad6265SDimitry Andric class EhReader {
5681ad6265SDimitry Andric public:
EhReader(const ObjFile * file,ArrayRef<uint8_t> data,size_t dataOff)5761cfbce3SDimitry Andric   EhReader(const ObjFile *file, ArrayRef<uint8_t> data, size_t dataOff)
5861cfbce3SDimitry Andric       : file(file), data(data), dataOff(dataOff) {}
size()5981ad6265SDimitry Andric   size_t size() const { return data.size(); }
6081ad6265SDimitry Andric   // Read and validate the length field.
6181ad6265SDimitry Andric   uint64_t readLength(size_t *off) const;
6281ad6265SDimitry Andric   // Skip the length field without doing validation.
6381ad6265SDimitry Andric   void skipValidLength(size_t *off) const;
6481ad6265SDimitry Andric   uint8_t readByte(size_t *off) const;
6581ad6265SDimitry Andric   uint32_t readU32(size_t *off) const;
6661cfbce3SDimitry Andric   uint64_t readPointer(size_t *off, uint8_t size) const;
6781ad6265SDimitry Andric   StringRef readString(size_t *off) const;
6881ad6265SDimitry Andric   void skipLeb128(size_t *off) const;
6981ad6265SDimitry Andric   void failOn(size_t errOff, const Twine &msg) const;
7081ad6265SDimitry Andric 
7181ad6265SDimitry Andric private:
7281ad6265SDimitry Andric   const ObjFile *file;
7381ad6265SDimitry Andric   ArrayRef<uint8_t> data;
7481ad6265SDimitry Andric   // The offset of the data array within its section. Used only for error
7581ad6265SDimitry Andric   // reporting.
7681ad6265SDimitry Andric   const size_t dataOff;
7781ad6265SDimitry Andric };
7881ad6265SDimitry Andric 
7981ad6265SDimitry Andric // The EH frame format, when emitted by llvm-mc, consists of a number of
8081ad6265SDimitry Andric // "abs-ified" relocations, i.e. relocations that are implicitly encoded as
8181ad6265SDimitry Andric // pcrel offsets in the section data. The offsets refer to the locations of
8281ad6265SDimitry Andric // symbols in the input object file. When we ingest these EH frames, we convert
8381ad6265SDimitry Andric // these implicit relocations into explicit Relocs.
8481ad6265SDimitry Andric //
8581ad6265SDimitry Andric // These pcrel relocations are semantically similar to X86_64_RELOC_SIGNED_4.
8681ad6265SDimitry Andric // However, we need this operation to be cross-platform, and ARM does not have a
8781ad6265SDimitry Andric // similar relocation that is applicable. We therefore use the more verbose (but
8881ad6265SDimitry Andric // more generic) subtractor relocation to encode these pcrel values. ld64
8981ad6265SDimitry Andric // appears to do something similar -- its `-r` output contains these explicit
9081ad6265SDimitry Andric // subtractor relocations.
9181ad6265SDimitry Andric class EhRelocator {
9281ad6265SDimitry Andric public:
EhRelocator(InputSection * isec)9381ad6265SDimitry Andric   EhRelocator(InputSection *isec) : isec(isec) {}
9481ad6265SDimitry Andric 
9581ad6265SDimitry Andric   // For the next two methods, let `PC` denote `isec address + off`.
9681ad6265SDimitry Andric   // Create relocs writing the value of target - PC to PC.
9781ad6265SDimitry Andric   void makePcRel(uint64_t off,
9881ad6265SDimitry Andric                  llvm::PointerUnion<Symbol *, InputSection *> target,
9981ad6265SDimitry Andric                  uint8_t length);
10081ad6265SDimitry Andric   // Create relocs writing the value of PC - target to PC.
10181ad6265SDimitry Andric   void makeNegativePcRel(uint64_t off,
10281ad6265SDimitry Andric                          llvm::PointerUnion<Symbol *, InputSection *> target,
10381ad6265SDimitry Andric                          uint8_t length);
10481ad6265SDimitry Andric   // Insert the new relocations into isec->relocs.
10581ad6265SDimitry Andric   void commit();
10681ad6265SDimitry Andric 
10781ad6265SDimitry Andric private:
10881ad6265SDimitry Andric   InputSection *isec;
10981ad6265SDimitry Andric   // Insert new relocs here so that we don't invalidate iterators into the
11081ad6265SDimitry Andric   // existing relocs vector.
11181ad6265SDimitry Andric   SmallVector<Reloc, 6> newRelocs;
11281ad6265SDimitry Andric };
11381ad6265SDimitry Andric 
114*bdd1243dSDimitry Andric } // namespace lld::macho
11581ad6265SDimitry Andric 
11681ad6265SDimitry Andric #endif
117