1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright (c) 2025, Google LLC. 5 * Pasha Tatashin <pasha.tatashin@soleen.com> 6 */ 7 8 /** 9 * DOC: Live Update Orchestrator (LUO) 10 * 11 * Live Update is a specialized, kexec-based reboot process that allows a 12 * running kernel to be updated from one version to another while preserving 13 * the state of selected resources and keeping designated hardware devices 14 * operational. For these devices, DMA activity may continue throughout the 15 * kernel transition. 16 * 17 * While the primary use case driving this work is supporting live updates of 18 * the Linux kernel when it is used as a hypervisor in cloud environments, the 19 * LUO framework itself is designed to be workload-agnostic. Live Update 20 * facilitates a full kernel version upgrade for any type of system. 21 * 22 * For example, a non-hypervisor system running an in-memory cache like 23 * memcached with many gigabytes of data can use LUO. The userspace service 24 * can place its cache into a memfd, have its state preserved by LUO, and 25 * restore it immediately after the kernel kexec. 26 * 27 * Whether the system is running virtual machines, containers, a 28 * high-performance database, or networking services, LUO's primary goal is to 29 * enable a full kernel update by preserving critical userspace state and 30 * keeping essential devices operational. 31 * 32 * The core of LUO is a mechanism that tracks the progress of a live update, 33 * along with a callback API that allows other kernel subsystems to participate 34 * in the process. Example subsystems that can hook into LUO include: kvm, 35 * iommu, interrupts, vfio, participating filesystems, and memory management. 36 * 37 * LUO uses Kexec Handover to transfer memory state from the current kernel to 38 * the next kernel. For more details see 39 * Documentation/core-api/kho/concepts.rst. 40 */ 41 42 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 43 44 #include <linux/atomic.h> 45 #include <linux/errno.h> 46 #include <linux/file.h> 47 #include <linux/fs.h> 48 #include <linux/init.h> 49 #include <linux/io.h> 50 #include <linux/kernel.h> 51 #include <linux/kexec_handover.h> 52 #include <linux/kho/abi/luo.h> 53 #include <linux/kobject.h> 54 #include <linux/libfdt.h> 55 #include <linux/liveupdate.h> 56 #include <linux/miscdevice.h> 57 #include <linux/mm.h> 58 #include <linux/sizes.h> 59 #include <linux/string.h> 60 #include <linux/unaligned.h> 61 62 #include "kexec_handover_internal.h" 63 #include "luo_internal.h" 64 65 static struct { 66 bool enabled; 67 void *fdt_out; 68 void *fdt_in; 69 u64 liveupdate_num; 70 } luo_global; 71 72 static int __init early_liveupdate_param(char *buf) 73 { 74 return kstrtobool(buf, &luo_global.enabled); 75 } 76 early_param("liveupdate", early_liveupdate_param); 77 78 static int __init luo_early_startup(void) 79 { 80 phys_addr_t fdt_phys; 81 int err, ln_size; 82 const void *ptr; 83 84 if (!kho_is_enabled()) { 85 if (liveupdate_enabled()) 86 pr_warn("Disabling liveupdate because KHO is disabled\n"); 87 luo_global.enabled = false; 88 return 0; 89 } 90 91 /* Retrieve LUO subtree, and verify its format. */ 92 err = kho_retrieve_subtree(LUO_FDT_KHO_ENTRY_NAME, &fdt_phys); 93 if (err) { 94 if (err != -ENOENT) { 95 pr_err("failed to retrieve FDT '%s' from KHO: %pe\n", 96 LUO_FDT_KHO_ENTRY_NAME, ERR_PTR(err)); 97 return err; 98 } 99 100 return 0; 101 } 102 103 luo_global.fdt_in = phys_to_virt(fdt_phys); 104 err = fdt_node_check_compatible(luo_global.fdt_in, 0, 105 LUO_FDT_COMPATIBLE); 106 if (err) { 107 pr_err("FDT '%s' is incompatible with '%s' [%d]\n", 108 LUO_FDT_KHO_ENTRY_NAME, LUO_FDT_COMPATIBLE, err); 109 110 return -EINVAL; 111 } 112 113 ln_size = 0; 114 ptr = fdt_getprop(luo_global.fdt_in, 0, LUO_FDT_LIVEUPDATE_NUM, 115 &ln_size); 116 if (!ptr || ln_size != sizeof(luo_global.liveupdate_num)) { 117 pr_err("Unable to get live update number '%s' [%d]\n", 118 LUO_FDT_LIVEUPDATE_NUM, ln_size); 119 120 return -EINVAL; 121 } 122 123 luo_global.liveupdate_num = get_unaligned((u64 *)ptr); 124 pr_info("Retrieved live update data, liveupdate number: %lld\n", 125 luo_global.liveupdate_num); 126 127 err = luo_session_setup_incoming(luo_global.fdt_in); 128 if (err) 129 return err; 130 131 return 0; 132 } 133 134 static int __init liveupdate_early_init(void) 135 { 136 int err; 137 138 err = luo_early_startup(); 139 if (err) { 140 luo_global.enabled = false; 141 luo_restore_fail("The incoming tree failed to initialize properly [%pe], disabling live update\n", 142 ERR_PTR(err)); 143 } 144 145 return err; 146 } 147 early_initcall(liveupdate_early_init); 148 149 /* Called during boot to create outgoing LUO fdt tree */ 150 static int __init luo_fdt_setup(void) 151 { 152 const u64 ln = luo_global.liveupdate_num + 1; 153 void *fdt_out; 154 int err; 155 156 fdt_out = kho_alloc_preserve(LUO_FDT_SIZE); 157 if (IS_ERR(fdt_out)) { 158 pr_err("failed to allocate/preserve FDT memory\n"); 159 return PTR_ERR(fdt_out); 160 } 161 162 err = fdt_create(fdt_out, LUO_FDT_SIZE); 163 err |= fdt_finish_reservemap(fdt_out); 164 err |= fdt_begin_node(fdt_out, ""); 165 err |= fdt_property_string(fdt_out, "compatible", LUO_FDT_COMPATIBLE); 166 err |= fdt_property(fdt_out, LUO_FDT_LIVEUPDATE_NUM, &ln, sizeof(ln)); 167 err |= luo_session_setup_outgoing(fdt_out); 168 err |= fdt_end_node(fdt_out); 169 err |= fdt_finish(fdt_out); 170 if (err) 171 goto exit_free; 172 173 err = kho_add_subtree(LUO_FDT_KHO_ENTRY_NAME, fdt_out); 174 if (err) 175 goto exit_free; 176 luo_global.fdt_out = fdt_out; 177 178 return 0; 179 180 exit_free: 181 kho_unpreserve_free(fdt_out); 182 pr_err("failed to prepare LUO FDT: %d\n", err); 183 184 return err; 185 } 186 187 /* 188 * late initcall because it initializes the outgoing tree that is needed only 189 * once userspace starts using /dev/liveupdate. 190 */ 191 static int __init luo_late_startup(void) 192 { 193 int err; 194 195 if (!liveupdate_enabled()) 196 return 0; 197 198 err = luo_fdt_setup(); 199 if (err) 200 luo_global.enabled = false; 201 202 return err; 203 } 204 late_initcall(luo_late_startup); 205 206 /* Public Functions */ 207 208 /** 209 * liveupdate_reboot() - Kernel reboot notifier for live update final 210 * serialization. 211 * 212 * This function is invoked directly from the reboot() syscall pathway 213 * if kexec is in progress. 214 * 215 * If any callback fails, this function aborts KHO, undoes the freeze() 216 * callbacks, and returns an error. 217 */ 218 int liveupdate_reboot(void) 219 { 220 int err; 221 222 if (!liveupdate_enabled()) 223 return 0; 224 225 err = luo_session_serialize(); 226 if (err) 227 return err; 228 229 err = kho_finalize(); 230 if (err) { 231 pr_err("kho_finalize failed %d\n", err); 232 /* 233 * kho_finalize() may return libfdt errors, to aboid passing to 234 * userspace unknown errors, change this to EAGAIN. 235 */ 236 err = -EAGAIN; 237 } 238 239 return err; 240 } 241 242 /** 243 * liveupdate_enabled - Check if the live update feature is enabled. 244 * 245 * This function returns the state of the live update feature flag, which 246 * can be controlled via the ``liveupdate`` kernel command-line parameter. 247 * 248 * @return true if live update is enabled, false otherwise. 249 */ 250 bool liveupdate_enabled(void) 251 { 252 return luo_global.enabled; 253 } 254 255 /** 256 * DOC: LUO ioctl Interface 257 * 258 * The IOCTL user-space control interface for the LUO subsystem. 259 * It registers a character device, typically found at ``/dev/liveupdate``, 260 * which allows a userspace agent to manage the LUO state machine and its 261 * associated resources, such as preservable file descriptors. 262 * 263 * To ensure that the state machine is controlled by a single entity, access 264 * to this device is exclusive: only one process is permitted to have 265 * ``/dev/liveupdate`` open at any given time. Subsequent open attempts will 266 * fail with -EBUSY until the first process closes its file descriptor. 267 * This singleton model simplifies state management by preventing conflicting 268 * commands from multiple userspace agents. 269 */ 270 271 struct luo_device_state { 272 struct miscdevice miscdev; 273 atomic_t in_use; 274 }; 275 276 static int luo_ioctl_create_session(struct luo_ucmd *ucmd) 277 { 278 struct liveupdate_ioctl_create_session *argp = ucmd->cmd; 279 struct file *file; 280 int err; 281 282 argp->fd = get_unused_fd_flags(O_CLOEXEC); 283 if (argp->fd < 0) 284 return argp->fd; 285 286 err = luo_session_create(argp->name, &file); 287 if (err) 288 goto err_put_fd; 289 290 err = luo_ucmd_respond(ucmd, sizeof(*argp)); 291 if (err) 292 goto err_put_file; 293 294 fd_install(argp->fd, file); 295 296 return 0; 297 298 err_put_file: 299 fput(file); 300 err_put_fd: 301 put_unused_fd(argp->fd); 302 303 return err; 304 } 305 306 static int luo_ioctl_retrieve_session(struct luo_ucmd *ucmd) 307 { 308 struct liveupdate_ioctl_retrieve_session *argp = ucmd->cmd; 309 struct file *file; 310 int err; 311 312 argp->fd = get_unused_fd_flags(O_CLOEXEC); 313 if (argp->fd < 0) 314 return argp->fd; 315 316 err = luo_session_retrieve(argp->name, &file); 317 if (err < 0) 318 goto err_put_fd; 319 320 err = luo_ucmd_respond(ucmd, sizeof(*argp)); 321 if (err) 322 goto err_put_file; 323 324 fd_install(argp->fd, file); 325 326 return 0; 327 328 err_put_file: 329 fput(file); 330 err_put_fd: 331 put_unused_fd(argp->fd); 332 333 return err; 334 } 335 336 static int luo_open(struct inode *inodep, struct file *filep) 337 { 338 struct luo_device_state *ldev = container_of(filep->private_data, 339 struct luo_device_state, 340 miscdev); 341 342 if (atomic_cmpxchg(&ldev->in_use, 0, 1)) 343 return -EBUSY; 344 345 /* Always return -EIO to user if deserialization fail */ 346 if (luo_session_deserialize()) { 347 atomic_set(&ldev->in_use, 0); 348 return -EIO; 349 } 350 351 return 0; 352 } 353 354 static int luo_release(struct inode *inodep, struct file *filep) 355 { 356 struct luo_device_state *ldev = container_of(filep->private_data, 357 struct luo_device_state, 358 miscdev); 359 atomic_set(&ldev->in_use, 0); 360 361 return 0; 362 } 363 364 union ucmd_buffer { 365 struct liveupdate_ioctl_create_session create; 366 struct liveupdate_ioctl_retrieve_session retrieve; 367 }; 368 369 struct luo_ioctl_op { 370 unsigned int size; 371 unsigned int min_size; 372 unsigned int ioctl_num; 373 int (*execute)(struct luo_ucmd *ucmd); 374 }; 375 376 #define IOCTL_OP(_ioctl, _fn, _struct, _last) \ 377 [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_BASE] = { \ 378 .size = sizeof(_struct) + \ 379 BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \ 380 sizeof(_struct)), \ 381 .min_size = offsetofend(_struct, _last), \ 382 .ioctl_num = _ioctl, \ 383 .execute = _fn, \ 384 } 385 386 static const struct luo_ioctl_op luo_ioctl_ops[] = { 387 IOCTL_OP(LIVEUPDATE_IOCTL_CREATE_SESSION, luo_ioctl_create_session, 388 struct liveupdate_ioctl_create_session, name), 389 IOCTL_OP(LIVEUPDATE_IOCTL_RETRIEVE_SESSION, luo_ioctl_retrieve_session, 390 struct liveupdate_ioctl_retrieve_session, name), 391 }; 392 393 static long luo_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 394 { 395 const struct luo_ioctl_op *op; 396 struct luo_ucmd ucmd = {}; 397 union ucmd_buffer buf; 398 unsigned int nr; 399 int err; 400 401 nr = _IOC_NR(cmd); 402 if (nr - LIVEUPDATE_CMD_BASE >= ARRAY_SIZE(luo_ioctl_ops)) 403 return -EINVAL; 404 405 ucmd.ubuffer = (void __user *)arg; 406 err = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer); 407 if (err) 408 return err; 409 410 op = &luo_ioctl_ops[nr - LIVEUPDATE_CMD_BASE]; 411 if (op->ioctl_num != cmd) 412 return -ENOIOCTLCMD; 413 if (ucmd.user_size < op->min_size) 414 return -EINVAL; 415 416 ucmd.cmd = &buf; 417 err = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer, 418 ucmd.user_size); 419 if (err) 420 return err; 421 422 return op->execute(&ucmd); 423 } 424 425 static const struct file_operations luo_fops = { 426 .owner = THIS_MODULE, 427 .open = luo_open, 428 .release = luo_release, 429 .unlocked_ioctl = luo_ioctl, 430 }; 431 432 static struct luo_device_state luo_dev = { 433 .miscdev = { 434 .minor = MISC_DYNAMIC_MINOR, 435 .name = "liveupdate", 436 .fops = &luo_fops, 437 }, 438 .in_use = ATOMIC_INIT(0), 439 }; 440 441 static int __init liveupdate_ioctl_init(void) 442 { 443 if (!liveupdate_enabled()) 444 return 0; 445 446 return misc_register(&luo_dev.miscdev); 447 } 448 late_initcall(liveupdate_ioctl_init); 449