xref: /linux/include/linux/kho/abi/kexec_handover.h (revision b734412619821f3ed63ba63533f539672cb7a76d)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 
3 /*
4  * Copyright (C) 2023 Alexander Graf <graf@amazon.com>
5  * Copyright (C) 2025 Microsoft Corporation, Mike Rapoport <rppt@kernel.org>
6  * Copyright (C) 2025 Google LLC, Changyuan Lyu <changyuanl@google.com>
7  * Copyright (C) 2025 Google LLC, Jason Miu <jasonmiu@google.com>
8  */
9 
10 #ifndef _LINUX_KHO_ABI_KEXEC_HANDOVER_H
11 #define _LINUX_KHO_ABI_KEXEC_HANDOVER_H
12 
13 #include <linux/bits.h>
14 #include <linux/log2.h>
15 #include <linux/math.h>
16 #include <linux/types.h>
17 
18 #include <asm/page.h>
19 
20 /**
21  * DOC: Kexec Handover ABI
22  *
23  * Kexec Handover uses the ABI defined below for passing preserved data from
24  * one kernel to the next.
25  * The ABI uses Flattened Device Tree (FDT) format. The first kernel creates an
26  * FDT which is then passed to the next kernel during a kexec handover.
27  *
28  * This interface is a contract. Any modification to the FDT structure, node
29  * properties, compatible string, or the layout of the data structures
30  * referenced here constitutes a breaking change. Such changes require
31  * incrementing the version number in KHO_FDT_COMPATIBLE to prevent a new kernel
32  * from misinterpreting data from an older kernel. Changes are allowed provided
33  * the compatibility version is incremented. However, backward/forward
34  * compatibility is only guaranteed for kernels supporting the same ABI version.
35  *
36  * FDT Structure Overview:
37  *   The FDT serves as a central registry for physical addresses of preserved
38  *   data structures. The first kernel populates this FDT with references to
39  *   memory regions and other metadata that need to persist across the kexec
40  *   transition. The subsequent kernel then parses this FDT to locate and
41  *   restore the preserved data.::
42  *
43  *     / {
44  *         compatible = "kho-v3";
45  *
46  *         preserved-memory-map = <0x...>;
47  *
48  *         <subnode-name-1> {
49  *             preserved-data = <0x...>;
50  *             blob-size = <0x...>;
51  *         };
52  *
53  *         <subnode-name-2> {
54  *             preserved-data = <0x...>;
55  *             blob-size = <0x...>;
56  *         };
57  *               ... ...
58  *         <subnode-name-N> {
59  *             preserved-data = <0x...>;
60  *             blob-size = <0x...>;
61  *         };
62  *     };
63  *
64  *   Root KHO Node (/):
65  *     - compatible: "kho-v3"
66  *
67  *       Indentifies the overall KHO ABI version.
68  *
69  *     - preserved-memory-map: u64
70  *
71  *       Physical memory address pointing to the root of the
72  *       preserved memory map data structure.
73  *
74  *   Subnodes (<subnode-name-N>):
75  *     Subnodes can also be added to the root node to
76  *     describe other preserved data blobs. The <subnode-name-N>
77  *     is provided by the subsystem that uses KHO for preserving its
78  *     data.
79  *
80  *     - preserved-data: u64
81  *
82  *       Physical address pointing to a subnode data blob that is also
83  *       being preserved.
84  *
85  *     - blob-size: u64
86  *
87  *       Size in bytes of the preserved data blob. This is needed because
88  *       blobs may use arbitrary formats (not just FDT), so the size
89  *       cannot be determined from the blob content alone.
90  */
91 
92 /* The compatible string for the KHO FDT root node. */
93 #define KHO_FDT_COMPATIBLE "kho-v3"
94 
95 /* The FDT property for the preserved memory map. */
96 #define KHO_FDT_MEMORY_MAP_PROP_NAME "preserved-memory-map"
97 
98 /* The FDT property for preserved data blobs. */
99 #define KHO_SUB_TREE_PROP_NAME "preserved-data"
100 
101 /* The FDT property for the size of preserved data blobs. */
102 #define KHO_SUB_TREE_SIZE_PROP_NAME "blob-size"
103 
104 /**
105  * DOC: Kexec Handover ABI for vmalloc Preservation
106  *
107  * The Kexec Handover ABI for preserving vmalloc'ed memory is defined by
108  * a set of structures and helper macros. The layout of these structures is a
109  * stable contract between kernels and is versioned by the KHO_FDT_COMPATIBLE
110  * string.
111  *
112  * The preservation is managed through a main descriptor &struct kho_vmalloc,
113  * which points to a linked list of &struct kho_vmalloc_chunk structures. These
114  * chunks contain the physical addresses of the preserved pages, allowing the
115  * next kernel to reconstruct the vmalloc area with the same content and layout.
116  * Helper macros are also defined for storing and loading pointers within
117  * these structures.
118  */
119 
120 /* Helper macro to define a union for a serializable pointer. */
121 #define DECLARE_KHOSER_PTR(name, type)	\
122 	union {                        \
123 		u64 phys;              \
124 		type ptr;              \
125 	} name
126 
127 /* Stores the physical address of a serializable pointer. */
128 #define KHOSER_STORE_PTR(dest, val)               \
129 	({                                        \
130 		typeof(val) v = val;              \
131 		typecheck(typeof((dest).ptr), v); \
132 		(dest).phys = virt_to_phys(v);    \
133 	})
134 
135 /* Loads the stored physical address back to a pointer. */
136 #define KHOSER_LOAD_PTR(src)						\
137 	({                                                                   \
138 		typeof(src) s = src;                                         \
139 		(typeof((s).ptr))((s).phys ? phys_to_virt((s).phys) : NULL); \
140 	})
141 
142 /*
143  * This header is embedded at the beginning of each `kho_vmalloc_chunk`
144  * and contains a pointer to the next chunk in the linked list,
145  * stored as a physical address for handover.
146  */
147 struct kho_vmalloc_hdr {
148 	DECLARE_KHOSER_PTR(next, struct kho_vmalloc_chunk *);
149 };
150 
151 #define KHO_VMALLOC_SIZE				\
152 	((PAGE_SIZE - sizeof(struct kho_vmalloc_hdr)) / \
153 	 sizeof(u64))
154 
155 /*
156  * Each chunk is a single page and is part of a linked list that describes
157  * a preserved vmalloc area. It contains the header with the link to the next
158  * chunk and a zero terminated array of physical addresses of the pages that
159  * make up the preserved vmalloc area.
160  */
161 struct kho_vmalloc_chunk {
162 	struct kho_vmalloc_hdr hdr;
163 	u64 phys[KHO_VMALLOC_SIZE];
164 };
165 
166 static_assert(sizeof(struct kho_vmalloc_chunk) == PAGE_SIZE);
167 
168 /*
169  * Describes a preserved vmalloc memory area, including the
170  * total number of pages, allocation flags, page order, and a pointer to the
171  * first chunk of physical page addresses.
172  */
173 struct kho_vmalloc {
174 	DECLARE_KHOSER_PTR(first, struct kho_vmalloc_chunk *);
175 	unsigned int total_pages;
176 	unsigned short flags;
177 	unsigned short order;
178 };
179 
180 /**
181  * DOC: KHO persistent memory tracker
182  *
183  * KHO tracks preserved memory using a radix tree data structure. Each node of
184  * the tree is exactly a single page. The leaf nodes are bitmaps where each set
185  * bit is a preserved page of any order. The intermediate nodes are tables of
186  * physical addresses that point to a lower level node.
187  *
188  * The tree hierarchy is shown below::
189  *
190  *   root
191  *   +-------------------+
192  *   |     Level 5       | (struct kho_radix_node)
193  *   +-------------------+
194  *     |
195  *     v
196  *   +-------------------+
197  *   |     Level 4       | (struct kho_radix_node)
198  *   +-------------------+
199  *     |
200  *     | ... (intermediate levels)
201  *     |
202  *     v
203  *   +-------------------+
204  *   |      Level 0      | (struct kho_radix_leaf)
205  *   +-------------------+
206  *
207  * The tree is traversed using a key that encodes the page's physical address
208  * (pa) and its order into a single unsigned long value. The encoded key value
209  * is composed of two parts: the 'order bit' in the upper part and the
210  * 'shifted physical address' in the lower part.::
211  *
212  *   +------------+-----------------------------+--------------------------+
213  *   | Page Order | Order Bit                   | Shifted Physical Address |
214  *   +------------+-----------------------------+--------------------------+
215  *   | 0          | ...000100 ... (at bit 52)   | pa >> (PAGE_SHIFT + 0)   |
216  *   | 1          | ...000010 ... (at bit 51)   | pa >> (PAGE_SHIFT + 1)   |
217  *   | 2          | ...000001 ... (at bit 50)   | pa >> (PAGE_SHIFT + 2)   |
218  *   | ...        | ...                         | ...                      |
219  *   +------------+-----------------------------+--------------------------+
220  *
221  * Shifted Physical Address:
222  * The 'shifted physical address' is the physical address normalized for its
223  * order. It effectively represents the PFN shifted right by the order.
224  *
225  * Order Bit:
226  * The 'order bit' encodes the page order by setting a single bit at a
227  * specific position. The position of this bit itself represents the order.
228  *
229  * For instance, on a 64-bit system with 4KB pages (PAGE_SHIFT = 12), the
230  * maximum range for the shifted physical address (for order 0) is 52 bits
231  * (64 - 12). This address occupies bits [0-51]. For order 0, the order bit is
232  * set at position 52.
233  *
234  * The following diagram illustrates how the encoded key value is split into
235  * indices for the tree levels, with PAGE_SIZE of 4KB::
236  *
237  *        63:60   59:51    50:42    41:33    32:24    23:15         14:0
238  *   +---------+--------+--------+--------+--------+--------+-----------------+
239  *   |    0    |  Lv 5  |  Lv 4  |  Lv 3  |  Lv 2  |  Lv 1  |  Lv 0 (bitmap)  |
240  *   +---------+--------+--------+--------+--------+--------+-----------------+
241  *
242  * The radix tree stores pages of all orders in a single 6-level hierarchy. It
243  * efficiently shares higher tree levels, especially due to common zero top
244  * address bits, allowing a single, efficient algorithm to manage all
245  * pages. This bitmap approach also offers memory efficiency; for example, a
246  * 512KB bitmap can cover a 16GB memory range for 0-order pages with PAGE_SIZE =
247  * 4KB.
248  *
249  * The data structures defined here are part of the KHO ABI. Any modification
250  * to these structures that breaks backward compatibility must be accompanied by
251  * an update to the "compatible" string. This ensures that a newer kernel can
252  * correctly interpret the data passed by an older kernel.
253  */
254 
255 /*
256  * Defines constants for the KHO radix tree structure, used to track preserved
257  * memory. These constants govern the indexing, sizing, and depth of the tree.
258  */
259 enum kho_radix_consts {
260 	/*
261 	 * The bit position of the order bit (and also the length of the
262 	 * shifted physical address) for an order-0 page.
263 	 */
264 	KHO_ORDER_0_LOG2 = 64 - PAGE_SHIFT,
265 
266 	/* Size of the table in kho_radix_node, in log2 */
267 	KHO_TABLE_SIZE_LOG2 = const_ilog2(PAGE_SIZE / sizeof(phys_addr_t)),
268 
269 	/* Number of bits in the kho_radix_leaf bitmap, in log2 */
270 	KHO_BITMAP_SIZE_LOG2 = PAGE_SHIFT + const_ilog2(BITS_PER_BYTE),
271 
272 	/*
273 	 * The total tree depth is the number of intermediate levels
274 	 * and 1 bitmap level.
275 	 */
276 	KHO_TREE_MAX_DEPTH =
277 		DIV_ROUND_UP(KHO_ORDER_0_LOG2 - KHO_BITMAP_SIZE_LOG2 + 1,
278 			     KHO_TABLE_SIZE_LOG2) + 1,
279 };
280 
281 struct kho_radix_node {
282 	u64 table[1 << KHO_TABLE_SIZE_LOG2];
283 };
284 
285 struct kho_radix_leaf {
286 	DECLARE_BITMAP(bitmap, 1 << KHO_BITMAP_SIZE_LOG2);
287 };
288 
289 #endif	/* _LINUX_KHO_ABI_KEXEC_HANDOVER_H */
290