xref: /linux/kernel/liveupdate/luo_core.c (revision 9e1e18584548e8ef8b37a2a7f5eb84b91e35a160)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright (c) 2025, Google LLC.
5  * Pasha Tatashin <pasha.tatashin@soleen.com>
6  */
7 
8 /**
9  * DOC: Live Update Orchestrator (LUO)
10  *
11  * Live Update is a specialized, kexec-based reboot process that allows a
12  * running kernel to be updated from one version to another while preserving
13  * the state of selected resources and keeping designated hardware devices
14  * operational. For these devices, DMA activity may continue throughout the
15  * kernel transition.
16  *
17  * While the primary use case driving this work is supporting live updates of
18  * the Linux kernel when it is used as a hypervisor in cloud environments, the
19  * LUO framework itself is designed to be workload-agnostic. Live Update
20  * facilitates a full kernel version upgrade for any type of system.
21  *
22  * For example, a non-hypervisor system running an in-memory cache like
23  * memcached with many gigabytes of data can use LUO. The userspace service
24  * can place its cache into a memfd, have its state preserved by LUO, and
25  * restore it immediately after the kernel kexec.
26  *
27  * Whether the system is running virtual machines, containers, a
28  * high-performance database, or networking services, LUO's primary goal is to
29  * enable a full kernel update by preserving critical userspace state and
30  * keeping essential devices operational.
31  *
32  * The core of LUO is a mechanism that tracks the progress of a live update,
33  * along with a callback API that allows other kernel subsystems to participate
34  * in the process. Example subsystems that can hook into LUO include: kvm,
35  * iommu, interrupts, vfio, participating filesystems, and memory management.
36  *
37  * LUO uses Kexec Handover to transfer memory state from the current kernel to
38  * the next kernel. For more details see Documentation/core-api/kho/index.rst.
39  */
40 
41 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
42 
43 #include <linux/atomic.h>
44 #include <linux/errno.h>
45 #include <linux/file.h>
46 #include <linux/fs.h>
47 #include <linux/init.h>
48 #include <linux/io.h>
49 #include <linux/kernel.h>
50 #include <linux/kexec_handover.h>
51 #include <linux/kho/abi/luo.h>
52 #include <linux/kobject.h>
53 #include <linux/libfdt.h>
54 #include <linux/liveupdate.h>
55 #include <linux/miscdevice.h>
56 #include <linux/mm.h>
57 #include <linux/rwsem.h>
58 #include <linux/sizes.h>
59 #include <linux/string.h>
60 #include <linux/unaligned.h>
61 
62 #include "kexec_handover_internal.h"
63 #include "luo_internal.h"
64 
65 static struct {
66 	bool enabled;
67 	void *fdt_out;
68 	void *fdt_in;
69 	u64 liveupdate_num;
70 } luo_global;
71 
72 /*
73  * luo_register_rwlock - Protects registration of file handlers and FLBs.
74  */
75 DECLARE_RWSEM(luo_register_rwlock);
76 
77 static int __init early_liveupdate_param(char *buf)
78 {
79 	return kstrtobool(buf, &luo_global.enabled);
80 }
81 early_param("liveupdate", early_liveupdate_param);
82 
83 static int __init luo_early_startup(void)
84 {
85 	phys_addr_t fdt_phys;
86 	int err, ln_size;
87 	const void *ptr;
88 
89 	if (!kho_is_enabled()) {
90 		if (liveupdate_enabled())
91 			pr_warn("Disabling liveupdate because KHO is disabled\n");
92 		luo_global.enabled = false;
93 		return 0;
94 	}
95 
96 	/* Retrieve LUO subtree, and verify its format. */
97 	err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys, NULL);
98 	if (err) {
99 		if (err != -ENOENT) {
100 			pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
101 			       LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
102 			return err;
103 		}
104 
105 		return 0;
106 	}
107 
108 	luo_global.fdt_in = phys_to_virt(fdt_phys);
109 	err = fdt_node_check_compatible(luo_global.fdt_in, 0,
110 					LUO_FDT_COMPATIBLE);
111 	if (err) {
112 		pr_err("FDT '%s' is incompatible with '%s' [%d]\n",
113 		       LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err);
114 
115 		return -EINVAL;
116 	}
117 
118 	ln_size = 0;
119 	ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM,
120 			  &ln_size);
121 	if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) {
122 		pr_err("Unable to get live update number '%s' [%d]\n",
123 		       LUO_FDT_LIVEUPDATE_NUM, ln_size);
124 
125 		return -EINVAL;
126 	}
127 
128 	luo_global.liveupdate_num = get_unaligned((u64 *)ptr);
129 	pr_info("Retrieved live update data, liveupdate number: %lld\n",
130 		luo_global.liveupdate_num);
131 
132 	err = luo_session_setup_incoming(luo_global.fdt_in);
133 	if (err)
134 		return err;
135 
136 	err = luo_flb_setup_incoming(luo_global.fdt_in);
137 
138 	return err;
139 }
140 
141 static int __init liveupdate_early_init(void)
142 {
143 	int err;
144 
145 	err = luo_early_startup();
146 	if (err) {
147 		luo_global.enabled = false;
148 		luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n",
149 				 ERR_PTR(err));
150 	}
151 
152 	return err;
153 }
154 early_initcall(liveupdate_early_init);
155 
156 /* Called during boot to create outgoing LUO fdt tree */
157 static int __init luo_fdt_setup(void)
158 {
159 	const u64 ln = luo_global.liveupdate_num + 1;
160 	void *fdt_out;
161 	int err;
162 
163 	fdt_out = kho_alloc_preserve(LUO_FDT_SIZE);
164 	if (IS_ERR(fdt_out)) {
165 		pr_err("failed to allocate/preserve FDT memory\n");
166 		return PTR_ERR(fdt_out);
167 	}
168 
169 	err = fdt_create(fdt_out, LUO_FDT_SIZE);
170 	err |= fdt_finish_reservemap(fdt_out);
171 	err |= fdt_begin_node(fdt_out, "");
172 	err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
173 	err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
174 	err |= luo_session_setup_outgoing(fdt_out);
175 	err |= luo_flb_setup_outgoing(fdt_out);
176 	err |= fdt_end_node(fdt_out);
177 	err |= fdt_finish(fdt_out);
178 	if (err)
179 		goto exit_free;
180 
181 	err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out,
182 			      fdt_totalsize(fdt_out));
183 	if (err)
184 		goto exit_free;
185 	luo_global.fdt_out = fdt_out;
186 
187 	return 0;
188 
189 exit_free:
190 	kho_unpreserve_free(fdt_out);
191 	pr_err("failed to prepare LUO FDT: %d\n", err);
192 
193 	return err;
194 }
195 
196 /*
197  * late initcall because it initializes the outgoing tree that is needed only
198  * once userspace starts using /dev/liveupdate.
199  */
200 static int __init luo_late_startup(void)
201 {
202 	int err;
203 
204 	if (!liveupdate_enabled())
205 		return 0;
206 
207 	err = luo_fdt_setup();
208 	if (err)
209 		luo_global.enabled = false;
210 
211 	return err;
212 }
213 late_initcall(luo_late_startup);
214 
215 /* Public Functions */
216 
217 /**
218  * liveupdate_reboot() - Kernel reboot notifier for live update final
219  * serialization.
220  *
221  * This function is invoked directly from the reboot() syscall pathway
222  * if kexec is in progress.
223  *
224  * If any callback fails, this function aborts KHO, undoes the freeze()
225  * callbacks, and returns an error.
226  */
227 int liveupdate_reboot(void)
228 {
229 	int err;
230 
231 	if (!liveupdate_enabled())
232 		return 0;
233 
234 	err = luo_session_serialize();
235 	if (err)
236 		return err;
237 
238 	luo_flb_serialize();
239 
240 	return 0;
241 }
242 
243 /**
244  * liveupdate_enabled - Check if the live update feature is enabled.
245  *
246  * This function returns the state of the live update feature flag, which
247  * can be controlled via the ``liveupdate`` kernel command-line parameter.
248  *
249  * @return true if live update is enabled, false otherwise.
250  */
251 bool liveupdate_enabled(void)
252 {
253 	return luo_global.enabled;
254 }
255 
256 /**
257  * DOC: LUO ioctl Interface
258  *
259  * The IOCTL user-space control interface for the LUO subsystem.
260  * It registers a character device, typically found at ``/dev/liveupdate``,
261  * which allows a userspace agent to manage the LUO state machine and its
262  * associated resources, such as preservable file descriptors.
263  *
264  * To ensure that the state machine is controlled by a single entity, access
265  * to this device is exclusive: only one process is permitted to have
266  * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will
267  * fail with -EBUSY until the first process closes its file descriptor.
268  * This singleton model simplifies state management by preventing conflicting
269  * commands from multiple userspace agents.
270  */
271 
272 struct luo_device_state {
273 	struct miscdevice miscdev;
274 	atomic_t in_use;
275 };
276 
277 static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
278 {
279 	struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
280 	struct file *file;
281 	int err;
282 
283 	argp->fd = get_unused_fd_flags(O_CLOEXEC);
284 	if (argp->fd < 0)
285 		return argp->fd;
286 
287 	err = luo_session_create(argp->name, &file);
288 	if (err)
289 		goto err_put_fd;
290 
291 	err = luo_ucmd_respond(ucmd, sizeof(*argp));
292 	if (err)
293 		goto err_put_file;
294 
295 	fd_install(argp->fd, file);
296 
297 	return 0;
298 
299 err_put_file:
300 	fput(file);
301 err_put_fd:
302 	put_unused_fd(argp->fd);
303 
304 	return err;
305 }
306 
307 static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd)
308 {
309 	struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd;
310 	struct file *file;
311 	int err;
312 
313 	argp->fd = get_unused_fd_flags(O_CLOEXEC);
314 	if (argp->fd < 0)
315 		return argp->fd;
316 
317 	err = luo_session_retrieve(argp->name, &file);
318 	if (err < 0)
319 		goto err_put_fd;
320 
321 	err = luo_ucmd_respond(ucmd, sizeof(*argp));
322 	if (err)
323 		goto err_put_file;
324 
325 	fd_install(argp->fd, file);
326 
327 	return 0;
328 
329 err_put_file:
330 	fput(file);
331 err_put_fd:
332 	put_unused_fd(argp->fd);
333 
334 	return err;
335 }
336 
337 static int luo_open(struct inode *inodep, struct file *filep)
338 {
339 	struct luo_device_state *ldev = container_of(filep->private_data,
340 						     struct luo_device_state,
341 						     miscdev);
342 
343 	if (atomic_cmpxchg(&ldev->in_use, 0, 1))
344 		return -EBUSY;
345 
346 	/* Always return -EIO to user if deserialization fail */
347 	if (luo_session_deserialize()) {
348 		atomic_set(&ldev->in_use, 0);
349 		return -EIO;
350 	}
351 
352 	return 0;
353 }
354 
355 static int luo_release(struct inode *inodep, struct file *filep)
356 {
357 	struct luo_device_state *ldev = container_of(filep->private_data,
358 						     struct luo_device_state,
359 						     miscdev);
360 	atomic_set(&ldev->in_use, 0);
361 
362 	return 0;
363 }
364 
365 union ucmd_buffer {
366 	struct liveupdate_ioctl_create_session create;
367 	struct liveupdate_ioctl_retrieve_session retrieve;
368 };
369 
370 struct luo_ioctl_op {
371 	unsigned int size;
372 	unsigned int min_size;
373 	unsigned int ioctl_num;
374 	int (*execute)(struct luo_ucmd *ucmd);
375 };
376 
377 #define IOCTL_OP(_ioctl, _fn, _struct, _last)                                  \
378 	[_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = {                            \
379 		.size = sizeof(_struct) +                                      \
380 			BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) <          \
381 					  sizeof(_struct)),                    \
382 		.min_size = offsetofend(_struct, _last),                       \
383 		.ioctl_num = _ioctl,                                           \
384 		.execute = _fn,                                                \
385 	}
386 
387 static const struct luo_ioctl_op luo_ioctl_ops[] = {
388 	IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session,
389 		 struct liveupdate_ioctl_create_session, name),
390 	IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session,
391 		 struct liveupdate_ioctl_retrieve_session, name),
392 };
393 
394 static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
395 {
396 	const struct luo_ioctl_op *op;
397 	struct luo_ucmd ucmd = {};
398 	union ucmd_buffer buf;
399 	unsigned int nr;
400 	int err;
401 
402 	nr = _IOC_NR(cmd);
403 	if (nr - LIVEUPDATE_CMD_BASE >= ARRAY_SIZE(luo_ioctl_ops))
404 		return -EINVAL;
405 
406 	ucmd.ubuffer = (void __user *)arg;
407 	err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
408 	if (err)
409 		return err;
410 
411 	op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE];
412 	if (op->ioctl_num != cmd)
413 		return -ENOIOCTLCMD;
414 	if (ucmd.user_size < op->min_size)
415 		return -EINVAL;
416 
417 	ucmd.cmd = &buf;
418 	err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
419 				    ucmd.user_size);
420 	if (err)
421 		return err;
422 
423 	return op->execute(&ucmd);
424 }
425 
426 static const struct file_operations luo_fops = {
427 	.owner		= THIS_MODULE,
428 	.open		= luo_open,
429 	.release	= luo_release,
430 	.unlocked_ioctl	= luo_ioctl,
431 };
432 
433 static struct luo_device_state luo_dev = {
434 	.miscdev = {
435 		.minor = MISC_DYNAMIC_MINOR,
436 		.name  = "liveupdate",
437 		.fops  = &luo_fops,
438 	},
439 	.in_use = ATOMIC_INIT(0),
440 };
441 
442 static int __init liveupdate_ioctl_init(void)
443 {
444 	if (!liveupdate_enabled())
445 		return 0;
446 
447 	return misc_register(&luo_dev.miscdev);
448 }
449 late_initcall(liveupdate_ioctl_init);
450