1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright (c) 2025, Google LLC.
5 * Pasha Tatashin <pasha.tatashin@soleen.com>
6 */
7
8 /**
9 * DOC: Live Update Orchestrator (LUO)
10 *
11 * Live Update is a specialized, kexec-based reboot process that allows a
12 * running kernel to be updated from one version to another while preserving
13 * the state of selected resources and keeping designated hardware devices
14 * operational. For these devices, DMA activity may continue throughout the
15 * kernel transition.
16 *
17 * While the primary use case driving this work is supporting live updates of
18 * the Linux kernel when it is used as a hypervisor in cloud environments, the
19 * LUO framework itself is designed to be workload-agnostic. Live Update
20 * facilitates a full kernel version upgrade for any type of system.
21 *
22 * For example, a non-hypervisor system running an in-memory cache like
23 * memcached with many gigabytes of data can use LUO. The userspace service
24 * can place its cache into a memfd, have its state preserved by LUO, and
25 * restore it immediately after the kernel kexec.
26 *
27 * Whether the system is running virtual machines, containers, a
28 * high-performance database, or networking services, LUO's primary goal is to
29 * enable a full kernel update by preserving critical userspace state and
30 * keeping essential devices operational.
31 *
32 * The core of LUO is a mechanism that tracks the progress of a live update,
33 * along with a callback API that allows other kernel subsystems to participate
34 * in the process. Example subsystems that can hook into LUO include: kvm,
35 * iommu, interrupts, vfio, participating filesystems, and memory management.
36 *
37 * LUO uses Kexec Handover to transfer memory state from the current kernel to
38 * the next kernel. For more details see
39 * Documentation/core-api/kho/concepts.rst.
40 */
41
42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
43
44 #include <linux/atomic.h>
45 #include <linux/errno.h>
46 #include <linux/file.h>
47 #include <linux/fs.h>
48 #include <linux/init.h>
49 #include <linux/io.h>
50 #include <linux/kernel.h>
51 #include <linux/kexec_handover.h>
52 #include <linux/kho/abi/luo.h>
53 #include <linux/kobject.h>
54 #include <linux/libfdt.h>
55 #include <linux/liveupdate.h>
56 #include <linux/miscdevice.h>
57 #include <linux/mm.h>
58 #include <linux/sizes.h>
59 #include <linux/string.h>
60 #include <linux/unaligned.h>
61
62 #include "kexec_handover_internal.h"
63 #include "luo_internal.h"
64
65 static struct {
66 bool enabled;
67 void *fdt_out;
68 void *fdt_in;
69 u64 liveupdate_num;
70 } luo_global;
71
early_liveupdate_param(char * buf)72 static int __init early_liveupdate_param(char *buf)
73 {
74 return kstrtobool(buf, &luo_global.enabled);
75 }
76 early_param("liveupdate", early_liveupdate_param);
77
luo_early_startup(void)78 static int __init luo_early_startup(void)
79 {
80 phys_addr_t fdt_phys;
81 int err, ln_size;
82 const void *ptr;
83
84 if (!kho_is_enabled()) {
85 if (liveupdate_enabled())
86 pr_warn("Disabling liveupdate because KHO is disabled\n");
87 luo_global.enabled = false;
88 return 0;
89 }
90
91 /* Retrieve LUO subtree, and verify its format. */
92 err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys);
93 if (err) {
94 if (err != -ENOENT) {
95 pr_err("failed to retrieve FDT '%s' from KHO: %pe\n",
96 LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err));
97 return err;
98 }
99
100 return 0;
101 }
102
103 luo_global.fdt_in = phys_to_virt(fdt_phys);
104 err = fdt_node_check_compatible(luo_global.fdt_in, 0,
105 LUO_FDT_COMPATIBLE);
106 if (err) {
107 pr_err("FDT '%s' is incompatible with '%s' [%d]\n",
108 LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err);
109
110 return -EINVAL;
111 }
112
113 ln_size = 0;
114 ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM,
115 &ln_size);
116 if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) {
117 pr_err("Unable to get live update number '%s' [%d]\n",
118 LUO_FDT_LIVEUPDATE_NUM, ln_size);
119
120 return -EINVAL;
121 }
122
123 luo_global.liveupdate_num = get_unaligned((u64 *)ptr);
124 pr_info("Retrieved live update data, liveupdate number: %lld\n",
125 luo_global.liveupdate_num);
126
127 err = luo_session_setup_incoming(luo_global.fdt_in);
128 if (err)
129 return err;
130
131 return 0;
132 }
133
liveupdate_early_init(void)134 static int __init liveupdate_early_init(void)
135 {
136 int err;
137
138 err = luo_early_startup();
139 if (err) {
140 luo_global.enabled = false;
141 luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n",
142 ERR_PTR(err));
143 }
144
145 return err;
146 }
147 early_initcall(liveupdate_early_init);
148
149 /* Called during boot to create outgoing LUO fdt tree */
luo_fdt_setup(void)150 static int __init luo_fdt_setup(void)
151 {
152 const u64 ln = luo_global.liveupdate_num + 1;
153 void *fdt_out;
154 int err;
155
156 fdt_out = kho_alloc_preserve(LUO_FDT_SIZE);
157 if (IS_ERR(fdt_out)) {
158 pr_err("failed to allocate/preserve FDT memory\n");
159 return PTR_ERR(fdt_out);
160 }
161
162 err = fdt_create(fdt_out, LUO_FDT_SIZE);
163 err |= fdt_finish_reservemap(fdt_out);
164 err |= fdt_begin_node(fdt_out, "");
165 err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE);
166 err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln));
167 err |= luo_session_setup_outgoing(fdt_out);
168 err |= fdt_end_node(fdt_out);
169 err |= fdt_finish(fdt_out);
170 if (err)
171 goto exit_free;
172
173 err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out);
174 if (err)
175 goto exit_free;
176 luo_global.fdt_out = fdt_out;
177
178 return 0;
179
180 exit_free:
181 kho_unpreserve_free(fdt_out);
182 pr_err("failed to prepare LUO FDT: %d\n", err);
183
184 return err;
185 }
186
187 /*
188 * late initcall because it initializes the outgoing tree that is needed only
189 * once userspace starts using /dev/liveupdate.
190 */
luo_late_startup(void)191 static int __init luo_late_startup(void)
192 {
193 int err;
194
195 if (!liveupdate_enabled())
196 return 0;
197
198 err = luo_fdt_setup();
199 if (err)
200 luo_global.enabled = false;
201
202 return err;
203 }
204 late_initcall(luo_late_startup);
205
206 /* Public Functions */
207
208 /**
209 * liveupdate_reboot() - Kernel reboot notifier for live update final
210 * serialization.
211 *
212 * This function is invoked directly from the reboot() syscall pathway
213 * if kexec is in progress.
214 *
215 * If any callback fails, this function aborts KHO, undoes the freeze()
216 * callbacks, and returns an error.
217 */
liveupdate_reboot(void)218 int liveupdate_reboot(void)
219 {
220 int err;
221
222 if (!liveupdate_enabled())
223 return 0;
224
225 err = luo_session_serialize();
226 if (err)
227 return err;
228
229 err = kho_finalize();
230 if (err) {
231 pr_err("kho_finalize failed %d\n", err);
232 /*
233 * kho_finalize() may return libfdt errors, to aboid passing to
234 * userspace unknown errors, change this to EAGAIN.
235 */
236 err = -EAGAIN;
237 }
238
239 return err;
240 }
241
242 /**
243 * liveupdate_enabled - Check if the live update feature is enabled.
244 *
245 * This function returns the state of the live update feature flag, which
246 * can be controlled via the ``liveupdate`` kernel command-line parameter.
247 *
248 * @return true if live update is enabled, false otherwise.
249 */
liveupdate_enabled(void)250 bool liveupdate_enabled(void)
251 {
252 return luo_global.enabled;
253 }
254
255 /**
256 * DOC: LUO ioctl Interface
257 *
258 * The IOCTL user-space control interface for the LUO subsystem.
259 * It registers a character device, typically found at ``/dev/liveupdate``,
260 * which allows a userspace agent to manage the LUO state machine and its
261 * associated resources, such as preservable file descriptors.
262 *
263 * To ensure that the state machine is controlled by a single entity, access
264 * to this device is exclusive: only one process is permitted to have
265 * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will
266 * fail with -EBUSY until the first process closes its file descriptor.
267 * This singleton model simplifies state management by preventing conflicting
268 * commands from multiple userspace agents.
269 */
270
271 struct luo_device_state {
272 struct miscdevice miscdev;
273 atomic_t in_use;
274 };
275
luo_ioctl_create_session(struct luo_ucmd * ucmd)276 static int luo_ioctl_create_session(struct luo_ucmd *ucmd)
277 {
278 struct liveupdate_ioctl_create_session *argp = ucmd->cmd;
279 struct file *file;
280 int err;
281
282 argp->fd = get_unused_fd_flags(O_CLOEXEC);
283 if (argp->fd < 0)
284 return argp->fd;
285
286 err = luo_session_create(argp->name, &file);
287 if (err)
288 goto err_put_fd;
289
290 err = luo_ucmd_respond(ucmd, sizeof(*argp));
291 if (err)
292 goto err_put_file;
293
294 fd_install(argp->fd, file);
295
296 return 0;
297
298 err_put_file:
299 fput(file);
300 err_put_fd:
301 put_unused_fd(argp->fd);
302
303 return err;
304 }
305
luo_ioctl_retrieve_session(struct luo_ucmd * ucmd)306 static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd)
307 {
308 struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd;
309 struct file *file;
310 int err;
311
312 argp->fd = get_unused_fd_flags(O_CLOEXEC);
313 if (argp->fd < 0)
314 return argp->fd;
315
316 err = luo_session_retrieve(argp->name, &file);
317 if (err < 0)
318 goto err_put_fd;
319
320 err = luo_ucmd_respond(ucmd, sizeof(*argp));
321 if (err)
322 goto err_put_file;
323
324 fd_install(argp->fd, file);
325
326 return 0;
327
328 err_put_file:
329 fput(file);
330 err_put_fd:
331 put_unused_fd(argp->fd);
332
333 return err;
334 }
335
luo_open(struct inode * inodep,struct file * filep)336 static int luo_open(struct inode *inodep, struct file *filep)
337 {
338 struct luo_device_state *ldev = container_of(filep->private_data,
339 struct luo_device_state,
340 miscdev);
341
342 if (atomic_cmpxchg(&ldev->in_use, 0, 1))
343 return -EBUSY;
344
345 /* Always return -EIO to user if deserialization fail */
346 if (luo_session_deserialize()) {
347 atomic_set(&ldev->in_use, 0);
348 return -EIO;
349 }
350
351 return 0;
352 }
353
luo_release(struct inode * inodep,struct file * filep)354 static int luo_release(struct inode *inodep, struct file *filep)
355 {
356 struct luo_device_state *ldev = container_of(filep->private_data,
357 struct luo_device_state,
358 miscdev);
359 atomic_set(&ldev->in_use, 0);
360
361 return 0;
362 }
363
364 union ucmd_buffer {
365 struct liveupdate_ioctl_create_session create;
366 struct liveupdate_ioctl_retrieve_session retrieve;
367 };
368
369 struct luo_ioctl_op {
370 unsigned int size;
371 unsigned int min_size;
372 unsigned int ioctl_num;
373 int (*execute)(struct luo_ucmd *ucmd);
374 };
375
376 #define IOCTL_OP(_ioctl, _fn, _struct, _last) \
377 [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = { \
378 .size = sizeof(_struct) + \
379 BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
380 sizeof(_struct)), \
381 .min_size = offsetofend(_struct, _last), \
382 .ioctl_num = _ioctl, \
383 .execute = _fn, \
384 }
385
386 static const struct luo_ioctl_op luo_ioctl_ops[] = {
387 IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session,
388 struct liveupdate_ioctl_create_session, name),
389 IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session,
390 struct liveupdate_ioctl_retrieve_session, name),
391 };
392
luo_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)393 static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
394 {
395 const struct luo_ioctl_op *op;
396 struct luo_ucmd ucmd = {};
397 union ucmd_buffer buf;
398 unsigned int nr;
399 int err;
400
401 nr = _IOC_NR(cmd);
402 if (nr < LIVEUPDATE_CMD_BASE ||
403 (nr - LIVEUPDATE_CMD_BASE) >= ARRAY_SIZE(luo_ioctl_ops)) {
404 return -EINVAL;
405 }
406
407 ucmd.ubuffer = (void __user *)arg;
408 err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
409 if (err)
410 return err;
411
412 op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE];
413 if (op->ioctl_num != cmd)
414 return -ENOIOCTLCMD;
415 if (ucmd.user_size < op->min_size)
416 return -EINVAL;
417
418 ucmd.cmd = &buf;
419 err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
420 ucmd.user_size);
421 if (err)
422 return err;
423
424 return op->execute(&ucmd);
425 }
426
427 static const struct file_operations luo_fops = {
428 .owner = THIS_MODULE,
429 .open = luo_open,
430 .release = luo_release,
431 .unlocked_ioctl = luo_ioctl,
432 };
433
434 static struct luo_device_state luo_dev = {
435 .miscdev = {
436 .minor = MISC_DYNAMIC_MINOR,
437 .name = "liveupdate",
438 .fops = &luo_fops,
439 },
440 .in_use = ATOMIC_INIT(0),
441 };
442
liveupdate_ioctl_init(void)443 static int __init liveupdate_ioctl_init(void)
444 {
445 if (!liveupdate_enabled())
446 return 0;
447
448 return misc_register(&luo_dev.miscdev);
449 }
450 late_initcall(liveupdate_ioctl_init);
451