xref: /linux/kernel/liveupdate/luo_core.c (revision bf45794244ca1fb1c135754f36ff765eea01f9e6)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright (c) 2025, Google LLC.
5  * Pasha Tatashin <pasha.tatashin@soleen.com>
6  */
7 
8 /**
9  * DOC: Live Update Orchestrator (LUO)
10  *
11  * Live Update is a specialized, kexec-based reboot process that allows a
12  * running kernel to be updated from one version to another while preserving
13  * the state of selected resources and keeping designated hardware devices
14  * operational. For these devices, DMA activity may continue throughout the
15  * kernel transition.
16  *
17  * While the primary use case driving this work is supporting live updates of
18  * the Linux kernel when it is used as a hypervisor in cloud environments, the
19  * LUO framework itself is designed to be workload-agnostic. Live Update
20  * facilitates a full kernel version upgrade for any type of system.
21  *
22  * For example, a non-hypervisor system running an in-memory cache like
23  * memcached with many gigabytes of data can use LUO. The userspace service
24  * can place its cache into a memfd, have its state preserved by LUO, and
25  * restore it immediately after the kernel kexec.
26  *
27  * Whether the system is running virtual machines, containers, a
28  * high-performance database, or networking services, LUO's primary goal is to
29  * enable a full kernel update by preserving critical userspace state and
30  * keeping essential devices operational.
31  *
32  * The core of LUO is a mechanism that tracks the progress of a live update,
33  * along with a callback API that allows other kernel subsystems to participate
34  * in the process. Example subsystems that can hook into LUO include: kvm,
35  * iommu, interrupts, vfio, participating filesystems, and memory management.
36  *
37  * LUO uses Kexec Handover to transfer memory state from the current kernel to
38  * the next kernel. For more details see Documentation/core-api/kho/index.rst.
39  */
40 
41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42 
43 #include <linux/atomic.h>
44 #include <linux/errno.h>
45 #include <linux/file.h>
46 #include <linux/fs.h>
47 #include <linux/init.h>
48 #include <linux/io.h>
49 #include <linux/kernel.h>
50 #include <linux/kexec_handover.h>
51 #include <linux/kho/abi/luo.h>
52 #include <linux/kobject.h>
53 #include <linux/libfdt.h>
54 #include <linux/liveupdate.h>
55 #include <linux/miscdevice.h>
56 #include <linux/mm.h>
57 #include <linux/sizes.h>
58 #include <linux/string.h>
59 #include <linux/unaligned.h>
60 
61 #include "kexec_handover_internal.h"
62 #include "luo_internal.h"
63 
64 static struct {
65 	bool enabled;
66 	void *fdt_out;
67 	void *fdt_in;
68 	u64 liveupdate_num;
69 } luo_global;
70 
71 static int __init early_liveupdate_param(char *buf)
72 {
73 	return kstrtobool(buf, &luo_global.enabled);
74 }
75 early_param("liveupdate", early_liveupdate_param);
76 
77 static int __init luo_early_startup(void)
78 {
79 	phys_addr_t fdt_phys;
80 	int err, ln_size;
81 	const void *ptr;
82 
83 	if (!kho_is_enabled()) {
84 		if (liveupdate_enabled())
85 			pr_warn("Disabling liveupdate because KHO is disabled\n");
86 		luo_global.enabled = false;
87 		return 0;
88 	}
89 
90 	/* Retrieve LUO subtree, and verify its format. */
91 	err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
92 	if (err) {
93 		if (err != -ENOENT) {
94 			pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
95 			       LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
96 			return err;
97 		}
98 
99 		return 0;
100 	}
101 
102 	luo_global.fdt_in = phys_to_virt(fdt_phys);
103 	err = fdt_node_check_compatible(luo_global.fdt_in, 0,
104 					LUO_FDT_COMPATIBLE);
105 	if (err) {
106 		pr_err("FDT '%s' is incompatible with '%s' [%d]\n",
107 		       LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err);
108 
109 		return -EINVAL;
110 	}
111 
112 	ln_size = 0;
113 	ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM,
114 			  &ln_size);
115 	if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) {
116 		pr_err("Unable to get live update number '%s' [%d]\n",
117 		       LUO_FDT_LIVEUPDATE_NUM, ln_size);
118 
119 		return -EINVAL;
120 	}
121 
122 	luo_global.liveupdate_num = get_unaligned((u64 *)ptr);
123 	pr_info("Retrieved live update data, liveupdate number: %lld\n",
124 		luo_global.liveupdate_num);
125 
126 	err = luo_session_setup_incoming(luo_global.fdt_in);
127 	if (err)
128 		return err;
129 
130 	return 0;
131 }
132 
133 static int __init liveupdate_early_init(void)
134 {
135 	int err;
136 
137 	err = luo_early_startup();
138 	if (err) {
139 		luo_global.enabled = false;
140 		luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n",
141 				 ERR_PTR(err));
142 	}
143 
144 	return err;
145 }
146 early_initcall(liveupdate_early_init);
147 
148 /* Called during boot to create outgoing LUO fdt tree */
149 static int __init luo_fdt_setup(void)
150 {
151 	const u64 ln = luo_global.liveupdate_num + 1;
152 	void *fdt_out;
153 	int err;
154 
155 	fdt_out = kho_alloc_preserve(LUO_FDT_SIZE);
156 	if (IS_ERR(fdt_out)) {
157 		pr_err("failed to allocate/preserve FDT memory\n");
158 		return PTR_ERR(fdt_out);
159 	}
160 
161 	err = fdt_create(fdt_out, LUO_FDT_SIZE);
162 	err |= fdt_finish_reservemap(fdt_out);
163 	err |= fdt_begin_node(fdt_out, "");
164 	err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
165 	err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
166 	err |= luo_session_setup_outgoing(fdt_out);
167 	err |= fdt_end_node(fdt_out);
168 	err |= fdt_finish(fdt_out);
169 	if (err)
170 		goto exit_free;
171 
172 	err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out);
173 	if (err)
174 		goto exit_free;
175 	luo_global.fdt_out = fdt_out;
176 
177 	return 0;
178 
179 exit_free:
180 	kho_unpreserve_free(fdt_out);
181 	pr_err("failed to prepare LUO FDT: %d\n", err);
182 
183 	return err;
184 }
185 
186 /*
187  * late initcall because it initializes the outgoing tree that is needed only
188  * once userspace starts using /dev/liveupdate.
189  */
190 static int __init luo_late_startup(void)
191 {
192 	int err;
193 
194 	if (!liveupdate_enabled())
195 		return 0;
196 
197 	err = luo_fdt_setup();
198 	if (err)
199 		luo_global.enabled = false;
200 
201 	return err;
202 }
203 late_initcall(luo_late_startup);
204 
205 /* Public Functions */
206 
207 /**
208  * liveupdate_reboot() - Kernel reboot notifier for live update final
209  * serialization.
210  *
211  * This function is invoked directly from the reboot() syscall pathway
212  * if kexec is in progress.
213  *
214  * If any callback fails, this function aborts KHO, undoes the freeze()
215  * callbacks, and returns an error.
216  */
217 int liveupdate_reboot(void)
218 {
219 	int err;
220 
221 	if (!liveupdate_enabled())
222 		return 0;
223 
224 	err = luo_session_serialize();
225 	if (err)
226 		return err;
227 
228 	err = kho_finalize();
229 	if (err) {
230 		pr_err("kho_finalize failed %d\n", err);
231 		/*
232 		 * kho_finalize() may return libfdt errors, to aboid passing to
233 		 * userspace unknown errors, change this to EAGAIN.
234 		 */
235 		err = -EAGAIN;
236 	}
237 
238 	return err;
239 }
240 
241 /**
242  * liveupdate_enabled - Check if the live update feature is enabled.
243  *
244  * This function returns the state of the live update feature flag, which
245  * can be controlled via the ``liveupdate`` kernel command-line parameter.
246  *
247  * @return true if live update is enabled, false otherwise.
248  */
249 bool liveupdate_enabled(void)
250 {
251 	return luo_global.enabled;
252 }
253 
254 /**
255  * DOC: LUO ioctl Interface
256  *
257  * The IOCTL user-space control interface for the LUO subsystem.
258  * It registers a character device, typically found at ``/dev/liveupdate``,
259  * which allows a userspace agent to manage the LUO state machine and its
260  * associated resources, such as preservable file descriptors.
261  *
262  * To ensure that the state machine is controlled by a single entity, access
263  * to this device is exclusive: only one process is permitted to have
264  * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will
265  * fail with -EBUSY until the first process closes its file descriptor.
266  * This singleton model simplifies state management by preventing conflicting
267  * commands from multiple userspace agents.
268  */
269 
270 struct luo_device_state {
271 	struct miscdevice miscdev;
272 	atomic_t in_use;
273 };
274 
275 static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
276 {
277 	struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
278 	struct file *file;
279 	int err;
280 
281 	argp->fd = get_unused_fd_flags(O_CLOEXEC);
282 	if (argp->fd < 0)
283 		return argp->fd;
284 
285 	err = luo_session_create(argp->name, &file);
286 	if (err)
287 		goto err_put_fd;
288 
289 	err = luo_ucmd_respond(ucmd, sizeof(*argp));
290 	if (err)
291 		goto err_put_file;
292 
293 	fd_install(argp->fd, file);
294 
295 	return 0;
296 
297 err_put_file:
298 	fput(file);
299 err_put_fd:
300 	put_unused_fd(argp->fd);
301 
302 	return err;
303 }
304 
305 static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd)
306 {
307 	struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd;
308 	struct file *file;
309 	int err;
310 
311 	argp->fd = get_unused_fd_flags(O_CLOEXEC);
312 	if (argp->fd < 0)
313 		return argp->fd;
314 
315 	err = luo_session_retrieve(argp->name, &file);
316 	if (err < 0)
317 		goto err_put_fd;
318 
319 	err = luo_ucmd_respond(ucmd, sizeof(*argp));
320 	if (err)
321 		goto err_put_file;
322 
323 	fd_install(argp->fd, file);
324 
325 	return 0;
326 
327 err_put_file:
328 	fput(file);
329 err_put_fd:
330 	put_unused_fd(argp->fd);
331 
332 	return err;
333 }
334 
335 static int luo_open(struct inode *inodep, struct file *filep)
336 {
337 	struct luo_device_state *ldev = container_of(filep->private_data,
338 						     struct luo_device_state,
339 						     miscdev);
340 
341 	if (atomic_cmpxchg(&ldev->in_use, 0, 1))
342 		return -EBUSY;
343 
344 	/* Always return -EIO to user if deserialization fail */
345 	if (luo_session_deserialize()) {
346 		atomic_set(&ldev->in_use, 0);
347 		return -EIO;
348 	}
349 
350 	return 0;
351 }
352 
353 static int luo_release(struct inode *inodep, struct file *filep)
354 {
355 	struct luo_device_state *ldev = container_of(filep->private_data,
356 						     struct luo_device_state,
357 						     miscdev);
358 	atomic_set(&ldev->in_use, 0);
359 
360 	return 0;
361 }
362 
363 union ucmd_buffer {
364 	struct liveupdate_ioctl_create_session create;
365 	struct liveupdate_ioctl_retrieve_session retrieve;
366 };
367 
368 struct luo_ioctl_op {
369 	unsigned int size;
370 	unsigned int min_size;
371 	unsigned int ioctl_num;
372 	int (*execute)(struct luo_ucmd *ucmd);
373 };
374 
375 #define IOCTL_OP(_ioctl, _fn, _struct, _last)                                  \
376 	[_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = {                            \
377 		.size = sizeof(_struct) +                                      \
378 			BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) <          \
379 					  sizeof(_struct)),                    \
380 		.min_size = offsetofend(_struct, _last),                       \
381 		.ioctl_num = _ioctl,                                           \
382 		.execute = _fn,                                                \
383 	}
384 
385 static const struct luo_ioctl_op luo_ioctl_ops[] = {
386 	IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session,
387 		 struct liveupdate_ioctl_create_session, name),
388 	IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session,
389 		 struct liveupdate_ioctl_retrieve_session, name),
390 };
391 
392 static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
393 {
394 	const struct luo_ioctl_op *op;
395 	struct luo_ucmd ucmd = {};
396 	union ucmd_buffer buf;
397 	unsigned int nr;
398 	int err;
399 
400 	nr = _IOC_NR(cmd);
401 	if (nr - LIVEUPDATE_CMD_BASE >= ARRAY_SIZE(luo_ioctl_ops))
402 		return -EINVAL;
403 
404 	ucmd.ubuffer = (void __user *)arg;
405 	err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
406 	if (err)
407 		return err;
408 
409 	op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE];
410 	if (op->ioctl_num != cmd)
411 		return -ENOIOCTLCMD;
412 	if (ucmd.user_size < op->min_size)
413 		return -EINVAL;
414 
415 	ucmd.cmd = &buf;
416 	err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
417 				    ucmd.user_size);
418 	if (err)
419 		return err;
420 
421 	return op->execute(&ucmd);
422 }
423 
424 static const struct file_operations luo_fops = {
425 	.owner		= THIS_MODULE,
426 	.open		= luo_open,
427 	.release	= luo_release,
428 	.unlocked_ioctl	= luo_ioctl,
429 };
430 
431 static struct luo_device_state luo_dev = {
432 	.miscdev = {
433 		.minor = MISC_DYNAMIC_MINOR,
434 		.name  = "liveupdate",
435 		.fops  = &luo_fops,
436 	},
437 	.in_use = ATOMIC_INIT(0),
438 };
439 
440 static int __init liveupdate_ioctl_init(void)
441 {
442 	if (!liveupdate_enabled())
443 		return 0;
444 
445 	return misc_register(&luo_dev.miscdev);
446 }
447 late_initcall(liveupdate_ioctl_init);
448