1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Copyright (c) 2025, Google LLC.
5 * Pasha Tatashin <pasha.tatashin@soleen.com>
6 */
7
8 /**
9 * DOC: LUO Sessions
10 *
11 * LUO Sessions provide the core mechanism for grouping and managing `struct
12 * file *` instances that need to be preserved across a kexec-based live
13 * update. Each session acts as a named container for a set of file objects,
14 * allowing a userspace agent to manage the lifecycle of resources critical to a
15 * workload.
16 *
17 * Core Concepts:
18 *
19 * - Named Containers: Sessions are identified by a unique, user-provided name,
20 * which is used for both creation in the current kernel and retrieval in the
21 * next kernel.
22 *
23 * - Userspace Interface: Session management is driven from userspace via
24 * ioctls on /dev/liveupdate.
25 *
26 * - Serialization: Session metadata is preserved using the KHO framework. When
27 * a live update is triggered via kexec, an array of `struct luo_session_ser`
28 * is populated and placed in a preserved memory region. An FDT node is also
29 * created, containing the count of sessions and the physical address of this
30 * array.
31 *
32 * Session Lifecycle:
33 *
34 * 1. Creation: A userspace agent calls `luo_session_create()` to create a
35 * new, empty session and receives a file descriptor for it.
36 *
37 * 2. Serialization: When the `reboot(LINUX_REBOOT_CMD_KEXEC)` syscall is
38 * made, `luo_session_serialize()` is called. It iterates through all
39 * active sessions and writes their metadata into a memory area preserved
40 * by KHO.
41 *
42 * 3. Deserialization (in new kernel): After kexec, `luo_session_deserialize()`
43 * runs, reading the serialized data and creating a list of `struct
44 * luo_session` objects representing the preserved sessions.
45 *
46 * 4. Retrieval: A userspace agent in the new kernel can then call
47 * `luo_session_retrieve()` with a session name to get a new file
48 * descriptor and access the preserved state.
49 */
50
51 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52
53 #include <linux/anon_inodes.h>
54 #include <linux/cleanup.h>
55 #include <linux/err.h>
56 #include <linux/errno.h>
57 #include <linux/file.h>
58 #include <linux/fs.h>
59 #include <linux/io.h>
60 #include <linux/kexec_handover.h>
61 #include <linux/kho/abi/luo.h>
62 #include <linux/libfdt.h>
63 #include <linux/list.h>
64 #include <linux/liveupdate.h>
65 #include <linux/mutex.h>
66 #include <linux/rwsem.h>
67 #include <linux/slab.h>
68 #include <linux/unaligned.h>
69 #include <uapi/linux/liveupdate.h>
70 #include "luo_internal.h"
71
72 /* 16 4K pages, give space for 744 sessions */
73 #define LUO_SESSION_PGCNT 16ul
74 #define LUO_SESSION_MAX (((LUO_SESSION_PGCNT << PAGE_SHIFT) - \
75 sizeof(struct luo_session_header_ser)) / \
76 sizeof(struct luo_session_ser))
77
78 /**
79 * struct luo_session_header - Header struct for managing LUO sessions.
80 * @count: The number of sessions currently tracked in the @list.
81 * @list: The head of the linked list of `struct luo_session` instances.
82 * @rwsem: A read-write semaphore providing synchronized access to the
83 * session list and other fields in this structure.
84 * @header_ser: The header data of serialization array.
85 * @ser: The serialized session data (an array of
86 * `struct luo_session_ser`).
87 * @active: Set to true when first initialized. If previous kernel did not
88 * send session data, active stays false for incoming.
89 */
90 struct luo_session_header {
91 long count;
92 struct list_head list;
93 struct rw_semaphore rwsem;
94 struct luo_session_header_ser *header_ser;
95 struct luo_session_ser *ser;
96 bool active;
97 };
98
99 /**
100 * struct luo_session_global - Global container for managing LUO sessions.
101 * @incoming: The sessions passed from the previous kernel.
102 * @outgoing: The sessions that are going to be passed to the next kernel.
103 */
104 struct luo_session_global {
105 struct luo_session_header incoming;
106 struct luo_session_header outgoing;
107 };
108
109 static struct luo_session_global luo_session_global = {
110 .incoming = {
111 .list = LIST_HEAD_INIT(luo_session_global.incoming.list),
112 .rwsem = __RWSEM_INITIALIZER(luo_session_global.incoming.rwsem),
113 },
114 .outgoing = {
115 .list = LIST_HEAD_INIT(luo_session_global.outgoing.list),
116 .rwsem = __RWSEM_INITIALIZER(luo_session_global.outgoing.rwsem),
117 },
118 };
119
luo_session_alloc(const char * name)120 static struct luo_session *luo_session_alloc(const char *name)
121 {
122 struct luo_session *session = kzalloc(sizeof(*session), GFP_KERNEL);
123
124 if (!session)
125 return ERR_PTR(-ENOMEM);
126
127 strscpy(session->name, name, sizeof(session->name));
128 INIT_LIST_HEAD(&session->file_set.files_list);
129 luo_file_set_init(&session->file_set);
130 INIT_LIST_HEAD(&session->list);
131 mutex_init(&session->mutex);
132
133 return session;
134 }
135
luo_session_free(struct luo_session * session)136 static void luo_session_free(struct luo_session *session)
137 {
138 luo_file_set_destroy(&session->file_set);
139 mutex_destroy(&session->mutex);
140 kfree(session);
141 }
142
luo_session_insert(struct luo_session_header * sh,struct luo_session * session)143 static int luo_session_insert(struct luo_session_header *sh,
144 struct luo_session *session)
145 {
146 struct luo_session *it;
147
148 guard(rwsem_write)(&sh->rwsem);
149
150 /*
151 * For outgoing we should make sure there is room in serialization array
152 * for new session.
153 */
154 if (sh == &luo_session_global.outgoing) {
155 if (sh->count == LUO_SESSION_MAX)
156 return -ENOMEM;
157 }
158
159 /*
160 * For small number of sessions this loop won't hurt performance
161 * but if we ever start using a lot of sessions, this might
162 * become a bottle neck during deserialization time, as it would
163 * cause O(n*n) complexity.
164 */
165 list_for_each_entry(it, &sh->list, list) {
166 if (!strncmp(it->name, session->name, sizeof(it->name)))
167 return -EEXIST;
168 }
169 list_add_tail(&session->list, &sh->list);
170 sh->count++;
171
172 return 0;
173 }
174
luo_session_remove(struct luo_session_header * sh,struct luo_session * session)175 static void luo_session_remove(struct luo_session_header *sh,
176 struct luo_session *session)
177 {
178 guard(rwsem_write)(&sh->rwsem);
179 list_del(&session->list);
180 sh->count--;
181 }
182
luo_session_finish_one(struct luo_session * session)183 static int luo_session_finish_one(struct luo_session *session)
184 {
185 guard(mutex)(&session->mutex);
186 return luo_file_finish(&session->file_set);
187 }
188
luo_session_unfreeze_one(struct luo_session * session,struct luo_session_ser * ser)189 static void luo_session_unfreeze_one(struct luo_session *session,
190 struct luo_session_ser *ser)
191 {
192 guard(mutex)(&session->mutex);
193 luo_file_unfreeze(&session->file_set, &ser->file_set_ser);
194 }
195
luo_session_freeze_one(struct luo_session * session,struct luo_session_ser * ser)196 static int luo_session_freeze_one(struct luo_session *session,
197 struct luo_session_ser *ser)
198 {
199 guard(mutex)(&session->mutex);
200 return luo_file_freeze(&session->file_set, &ser->file_set_ser);
201 }
202
luo_session_release(struct inode * inodep,struct file * filep)203 static int luo_session_release(struct inode *inodep, struct file *filep)
204 {
205 struct luo_session *session = filep->private_data;
206 struct luo_session_header *sh;
207
208 /* If retrieved is set, it means this session is from incoming list */
209 if (session->retrieved) {
210 int err = luo_session_finish_one(session);
211
212 if (err) {
213 pr_warn("Unable to finish session [%s] on release\n",
214 session->name);
215 return err;
216 }
217 sh = &luo_session_global.incoming;
218 } else {
219 scoped_guard(mutex, &session->mutex)
220 luo_file_unpreserve_files(&session->file_set);
221 sh = &luo_session_global.outgoing;
222 }
223
224 luo_session_remove(sh, session);
225 luo_session_free(session);
226
227 return 0;
228 }
229
luo_session_preserve_fd(struct luo_session * session,struct luo_ucmd * ucmd)230 static int luo_session_preserve_fd(struct luo_session *session,
231 struct luo_ucmd *ucmd)
232 {
233 struct liveupdate_session_preserve_fd *argp = ucmd->cmd;
234 int err;
235
236 guard(mutex)(&session->mutex);
237 err = luo_preserve_file(&session->file_set, argp->token, argp->fd);
238 if (err)
239 return err;
240
241 err = luo_ucmd_respond(ucmd, sizeof(*argp));
242 if (err)
243 pr_warn("The file was successfully preserved, but response to user failed\n");
244
245 return err;
246 }
247
luo_session_retrieve_fd(struct luo_session * session,struct luo_ucmd * ucmd)248 static int luo_session_retrieve_fd(struct luo_session *session,
249 struct luo_ucmd *ucmd)
250 {
251 struct liveupdate_session_retrieve_fd *argp = ucmd->cmd;
252 struct file *file;
253 int err;
254
255 argp->fd = get_unused_fd_flags(O_CLOEXEC);
256 if (argp->fd < 0)
257 return argp->fd;
258
259 guard(mutex)(&session->mutex);
260 err = luo_retrieve_file(&session->file_set, argp->token, &file);
261 if (err < 0)
262 goto err_put_fd;
263
264 err = luo_ucmd_respond(ucmd, sizeof(*argp));
265 if (err)
266 goto err_put_file;
267
268 fd_install(argp->fd, file);
269
270 return 0;
271
272 err_put_file:
273 fput(file);
274 err_put_fd:
275 put_unused_fd(argp->fd);
276
277 return err;
278 }
279
luo_session_finish(struct luo_session * session,struct luo_ucmd * ucmd)280 static int luo_session_finish(struct luo_session *session,
281 struct luo_ucmd *ucmd)
282 {
283 struct liveupdate_session_finish *argp = ucmd->cmd;
284 int err = luo_session_finish_one(session);
285
286 if (err)
287 return err;
288
289 return luo_ucmd_respond(ucmd, sizeof(*argp));
290 }
291
292 union ucmd_buffer {
293 struct liveupdate_session_finish finish;
294 struct liveupdate_session_preserve_fd preserve;
295 struct liveupdate_session_retrieve_fd retrieve;
296 };
297
298 struct luo_ioctl_op {
299 unsigned int size;
300 unsigned int min_size;
301 unsigned int ioctl_num;
302 int (*execute)(struct luo_session *session, struct luo_ucmd *ucmd);
303 };
304
305 #define IOCTL_OP(_ioctl, _fn, _struct, _last) \
306 [_IOC_NR(_ioctl) - LIVEUPDATE_CMD_SESSION_BASE] = { \
307 .size = sizeof(_struct) + \
308 BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) < \
309 sizeof(_struct)), \
310 .min_size = offsetofend(_struct, _last), \
311 .ioctl_num = _ioctl, \
312 .execute = _fn, \
313 }
314
315 static const struct luo_ioctl_op luo_session_ioctl_ops[] = {
316 IOCTL_OP(LIVEUPDATE_SESSION_FINISH, luo_session_finish,
317 struct liveupdate_session_finish, reserved),
318 IOCTL_OP(LIVEUPDATE_SESSION_PRESERVE_FD, luo_session_preserve_fd,
319 struct liveupdate_session_preserve_fd, token),
320 IOCTL_OP(LIVEUPDATE_SESSION_RETRIEVE_FD, luo_session_retrieve_fd,
321 struct liveupdate_session_retrieve_fd, token),
322 };
323
luo_session_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)324 static long luo_session_ioctl(struct file *filep, unsigned int cmd,
325 unsigned long arg)
326 {
327 struct luo_session *session = filep->private_data;
328 const struct luo_ioctl_op *op;
329 struct luo_ucmd ucmd = {};
330 union ucmd_buffer buf;
331 unsigned int nr;
332 int ret;
333
334 nr = _IOC_NR(cmd);
335 if (nr < LIVEUPDATE_CMD_SESSION_BASE || (nr - LIVEUPDATE_CMD_SESSION_BASE) >=
336 ARRAY_SIZE(luo_session_ioctl_ops)) {
337 return -EINVAL;
338 }
339
340 ucmd.ubuffer = (void __user *)arg;
341 ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
342 if (ret)
343 return ret;
344
345 op = &luo_session_ioctl_ops[nr - LIVEUPDATE_CMD_SESSION_BASE];
346 if (op->ioctl_num != cmd)
347 return -ENOIOCTLCMD;
348 if (ucmd.user_size < op->min_size)
349 return -EINVAL;
350
351 ucmd.cmd = &buf;
352 ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
353 ucmd.user_size);
354 if (ret)
355 return ret;
356
357 return op->execute(session, &ucmd);
358 }
359
360 static const struct file_operations luo_session_fops = {
361 .owner = THIS_MODULE,
362 .release = luo_session_release,
363 .unlocked_ioctl = luo_session_ioctl,
364 };
365
366 /* Create a "struct file" for session */
luo_session_getfile(struct luo_session * session,struct file ** filep)367 static int luo_session_getfile(struct luo_session *session, struct file **filep)
368 {
369 char name_buf[128];
370 struct file *file;
371
372 lockdep_assert_held(&session->mutex);
373 snprintf(name_buf, sizeof(name_buf), "[luo_session] %s", session->name);
374 file = anon_inode_getfile(name_buf, &luo_session_fops, session, O_RDWR);
375 if (IS_ERR(file))
376 return PTR_ERR(file);
377
378 *filep = file;
379
380 return 0;
381 }
382
luo_session_create(const char * name,struct file ** filep)383 int luo_session_create(const char *name, struct file **filep)
384 {
385 struct luo_session *session;
386 int err;
387
388 session = luo_session_alloc(name);
389 if (IS_ERR(session))
390 return PTR_ERR(session);
391
392 err = luo_session_insert(&luo_session_global.outgoing, session);
393 if (err)
394 goto err_free;
395
396 scoped_guard(mutex, &session->mutex)
397 err = luo_session_getfile(session, filep);
398 if (err)
399 goto err_remove;
400
401 return 0;
402
403 err_remove:
404 luo_session_remove(&luo_session_global.outgoing, session);
405 err_free:
406 luo_session_free(session);
407
408 return err;
409 }
410
luo_session_retrieve(const char * name,struct file ** filep)411 int luo_session_retrieve(const char *name, struct file **filep)
412 {
413 struct luo_session_header *sh = &luo_session_global.incoming;
414 struct luo_session *session = NULL;
415 struct luo_session *it;
416 int err;
417
418 scoped_guard(rwsem_read, &sh->rwsem) {
419 list_for_each_entry(it, &sh->list, list) {
420 if (!strncmp(it->name, name, sizeof(it->name))) {
421 session = it;
422 break;
423 }
424 }
425 }
426
427 if (!session)
428 return -ENOENT;
429
430 guard(mutex)(&session->mutex);
431 if (session->retrieved)
432 return -EINVAL;
433
434 err = luo_session_getfile(session, filep);
435 if (!err)
436 session->retrieved = true;
437
438 return err;
439 }
440
luo_session_setup_outgoing(void * fdt_out)441 int __init luo_session_setup_outgoing(void *fdt_out)
442 {
443 struct luo_session_header_ser *header_ser;
444 u64 header_ser_pa;
445 int err;
446
447 header_ser = kho_alloc_preserve(LUO_SESSION_PGCNT << PAGE_SHIFT);
448 if (IS_ERR(header_ser))
449 return PTR_ERR(header_ser);
450 header_ser_pa = virt_to_phys(header_ser);
451
452 err = fdt_begin_node(fdt_out, LUO_FDT_SESSION_NODE_NAME);
453 err |= fdt_property_string(fdt_out, "compatible",
454 LUO_FDT_SESSION_COMPATIBLE);
455 err |= fdt_property(fdt_out, LUO_FDT_SESSION_HEADER, &header_ser_pa,
456 sizeof(header_ser_pa));
457 err |= fdt_end_node(fdt_out);
458
459 if (err)
460 goto err_unpreserve;
461
462 luo_session_global.outgoing.header_ser = header_ser;
463 luo_session_global.outgoing.ser = (void *)(header_ser + 1);
464 luo_session_global.outgoing.active = true;
465
466 return 0;
467
468 err_unpreserve:
469 kho_unpreserve_free(header_ser);
470 return err;
471 }
472
luo_session_setup_incoming(void * fdt_in)473 int __init luo_session_setup_incoming(void *fdt_in)
474 {
475 struct luo_session_header_ser *header_ser;
476 int err, header_size, offset;
477 u64 header_ser_pa;
478 const void *ptr;
479
480 offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_SESSION_NODE_NAME);
481 if (offset < 0) {
482 pr_err("Unable to get session node: [%s]\n",
483 LUO_FDT_SESSION_NODE_NAME);
484 return -EINVAL;
485 }
486
487 err = fdt_node_check_compatible(fdt_in, offset,
488 LUO_FDT_SESSION_COMPATIBLE);
489 if (err) {
490 pr_err("Session node incompatible [%s]\n",
491 LUO_FDT_SESSION_COMPATIBLE);
492 return -EINVAL;
493 }
494
495 header_size = 0;
496 ptr = fdt_getprop(fdt_in, offset, LUO_FDT_SESSION_HEADER, &header_size);
497 if (!ptr || header_size != sizeof(u64)) {
498 pr_err("Unable to get session header '%s' [%d]\n",
499 LUO_FDT_SESSION_HEADER, header_size);
500 return -EINVAL;
501 }
502
503 header_ser_pa = get_unaligned((u64 *)ptr);
504 header_ser = phys_to_virt(header_ser_pa);
505
506 luo_session_global.incoming.header_ser = header_ser;
507 luo_session_global.incoming.ser = (void *)(header_ser + 1);
508 luo_session_global.incoming.active = true;
509
510 return 0;
511 }
512
luo_session_deserialize(void)513 int luo_session_deserialize(void)
514 {
515 struct luo_session_header *sh = &luo_session_global.incoming;
516 static bool is_deserialized;
517 static int err;
518
519 /* If has been deserialized, always return the same error code */
520 if (is_deserialized)
521 return err;
522
523 is_deserialized = true;
524 if (!sh->active)
525 return 0;
526
527 /*
528 * Note on error handling:
529 *
530 * If deserialization fails (e.g., allocation failure or corrupt data),
531 * we intentionally skip cleanup of sessions that were already restored.
532 *
533 * A partial failure leaves the preserved state inconsistent.
534 * Implementing a safe "undo" to unwind complex dependencies (sessions,
535 * files, hardware state) is error-prone and provides little value, as
536 * the system is effectively in a broken state.
537 *
538 * We treat these resources as leaked. The expected recovery path is for
539 * userspace to detect the failure and trigger a reboot, which will
540 * reliably reset devices and reclaim memory.
541 */
542 for (int i = 0; i < sh->header_ser->count; i++) {
543 struct luo_session *session;
544
545 session = luo_session_alloc(sh->ser[i].name);
546 if (IS_ERR(session)) {
547 pr_warn("Failed to allocate session [%s] during deserialization %pe\n",
548 sh->ser[i].name, session);
549 return PTR_ERR(session);
550 }
551
552 err = luo_session_insert(sh, session);
553 if (err) {
554 pr_warn("Failed to insert session [%s] %pe\n",
555 session->name, ERR_PTR(err));
556 luo_session_free(session);
557 return err;
558 }
559
560 scoped_guard(mutex, &session->mutex) {
561 luo_file_deserialize(&session->file_set,
562 &sh->ser[i].file_set_ser);
563 }
564 }
565
566 kho_restore_free(sh->header_ser);
567 sh->header_ser = NULL;
568 sh->ser = NULL;
569
570 return 0;
571 }
572
luo_session_serialize(void)573 int luo_session_serialize(void)
574 {
575 struct luo_session_header *sh = &luo_session_global.outgoing;
576 struct luo_session *session;
577 int i = 0;
578 int err;
579
580 guard(rwsem_write)(&sh->rwsem);
581 list_for_each_entry(session, &sh->list, list) {
582 err = luo_session_freeze_one(session, &sh->ser[i]);
583 if (err)
584 goto err_undo;
585
586 strscpy(sh->ser[i].name, session->name,
587 sizeof(sh->ser[i].name));
588 i++;
589 }
590 sh->header_ser->count = sh->count;
591
592 return 0;
593
594 err_undo:
595 list_for_each_entry_continue_reverse(session, &sh->list, list) {
596 i--;
597 luo_session_unfreeze_one(session, &sh->ser[i]);
598 memset(sh->ser[i].name, 0, sizeof(sh->ser[i].name));
599 }
600
601 return err;
602 }
603
604 /**
605 * luo_session_quiesce - Ensure no active sessions exist and lock session lists.
606 *
607 * Acquires exclusive write locks on both incoming and outgoing session lists.
608 * It then validates no sessions exist in either list.
609 *
610 * This mechanism is used during file handler un/registration to ensure that no
611 * sessions are currently using the handler, and no new sessions can be created
612 * while un/registration is in progress.
613 *
614 * This prevents registering new handlers while sessions are active or
615 * while deserialization is in progress.
616 *
617 * Return:
618 * true - System is quiescent (0 sessions) and locked.
619 * false - Active sessions exist. The locks are released internally.
620 */
luo_session_quiesce(void)621 bool luo_session_quiesce(void)
622 {
623 down_write(&luo_session_global.incoming.rwsem);
624 down_write(&luo_session_global.outgoing.rwsem);
625
626 if (luo_session_global.incoming.count ||
627 luo_session_global.outgoing.count) {
628 up_write(&luo_session_global.outgoing.rwsem);
629 up_write(&luo_session_global.incoming.rwsem);
630 return false;
631 }
632
633 return true;
634 }
635
636 /**
637 * luo_session_resume - Unlock session lists and resume normal activity.
638 *
639 * Releases the exclusive locks acquired by a successful call to
640 * luo_session_quiesce().
641 */
luo_session_resume(void)642 void luo_session_resume(void)
643 {
644 up_write(&luo_session_global.outgoing.rwsem);
645 up_write(&luo_session_global.incoming.rwsem);
646 }
647