xref: /linux/kernel/liveupdate/luo_session.c (revision 509d3f45847627f4c5cdce004c3ec79262b5239c)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Copyright (c) 2025, Google LLC.
5  * Pasha Tatashin <pasha.tatashin@soleen.com>
6  */
7 
8 /**
9  * DOC: LUO Sessions
10  *
11  * LUO Sessions provide the core mechanism for grouping and managing `struct
12  * file *` instances that need to be preserved across a kexec-based live
13  * update. Each session acts as a named container for a set of file objects,
14  * allowing a userspace agent to manage the lifecycle of resources critical to a
15  * workload.
16  *
17  * Core Concepts:
18  *
19  * - Named Containers: Sessions are identified by a unique, user-provided name,
20  *   which is used for both creation in the current kernel and retrieval in the
21  *   next kernel.
22  *
23  * - Userspace Interface: Session management is driven from userspace via
24  *   ioctls on /dev/liveupdate.
25  *
26  * - Serialization: Session metadata is preserved using the KHO framework. When
27  *   a live update is triggered via kexec, an array of `struct luo_session_ser`
28  *   is populated and placed in a preserved memory region. An FDT node is also
29  *   created, containing the count of sessions and the physical address of this
30  *   array.
31  *
32  * Session Lifecycle:
33  *
34  * 1.  Creation: A userspace agent calls `luo_session_create()` to create a
35  *     new, empty session and receives a file descriptor for it.
36  *
37  * 2.  Serialization: When the `reboot(LINUX_REBOOT_CMD_KEXEC)` syscall is
38  *     made, `luo_session_serialize()` is called. It iterates through all
39  *     active sessions and writes their metadata into a memory area preserved
40  *     by KHO.
41  *
42  * 3.  Deserialization (in new kernel): After kexec, `luo_session_deserialize()`
43  *     runs, reading the serialized data and creating a list of `struct
44  *     luo_session` objects representing the preserved sessions.
45  *
46  * 4.  Retrieval: A userspace agent in the new kernel can then call
47  *     `luo_session_retrieve()` with a session name to get a new file
48  *     descriptor and access the preserved state.
49  */
50 
51 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
52 
53 #include <linux/anon_inodes.h>
54 #include <linux/cleanup.h>
55 #include <linux/err.h>
56 #include <linux/errno.h>
57 #include <linux/file.h>
58 #include <linux/fs.h>
59 #include <linux/io.h>
60 #include <linux/kexec_handover.h>
61 #include <linux/kho/abi/luo.h>
62 #include <linux/libfdt.h>
63 #include <linux/list.h>
64 #include <linux/liveupdate.h>
65 #include <linux/mutex.h>
66 #include <linux/rwsem.h>
67 #include <linux/slab.h>
68 #include <linux/unaligned.h>
69 #include <uapi/linux/liveupdate.h>
70 #include "luo_internal.h"
71 
72 /* 16 4K pages, give space for 744 sessions */
73 #define LUO_SESSION_PGCNT	16ul
74 #define LUO_SESSION_MAX		(((LUO_SESSION_PGCNT << PAGE_SHIFT) -	\
75 		sizeof(struct luo_session_header_ser)) /		\
76 		sizeof(struct luo_session_ser))
77 
78 /**
79  * struct luo_session_header - Header struct for managing LUO sessions.
80  * @count:      The number of sessions currently tracked in the @list.
81  * @list:       The head of the linked list of `struct luo_session` instances.
82  * @rwsem:      A read-write semaphore providing synchronized access to the
83  *              session list and other fields in this structure.
84  * @header_ser: The header data of serialization array.
85  * @ser:        The serialized session data (an array of
86  *              `struct luo_session_ser`).
87  * @active:     Set to true when first initialized. If previous kernel did not
88  *              send session data, active stays false for incoming.
89  */
90 struct luo_session_header {
91 	long count;
92 	struct list_head list;
93 	struct rw_semaphore rwsem;
94 	struct luo_session_header_ser *header_ser;
95 	struct luo_session_ser *ser;
96 	bool active;
97 };
98 
99 /**
100  * struct luo_session_global - Global container for managing LUO sessions.
101  * @incoming:     The sessions passed from the previous kernel.
102  * @outgoing:     The sessions that are going to be passed to the next kernel.
103  */
104 struct luo_session_global {
105 	struct luo_session_header incoming;
106 	struct luo_session_header outgoing;
107 };
108 
109 static struct luo_session_global luo_session_global = {
110 	.incoming = {
111 		.list = LIST_HEAD_INIT(luo_session_global.incoming.list),
112 		.rwsem = __RWSEM_INITIALIZER(luo_session_global.incoming.rwsem),
113 	},
114 	.outgoing = {
115 		.list = LIST_HEAD_INIT(luo_session_global.outgoing.list),
116 		.rwsem = __RWSEM_INITIALIZER(luo_session_global.outgoing.rwsem),
117 	},
118 };
119 
luo_session_alloc(const char * name)120 static struct luo_session *luo_session_alloc(const char *name)
121 {
122 	struct luo_session *session = kzalloc(sizeof(*session), GFP_KERNEL);
123 
124 	if (!session)
125 		return ERR_PTR(-ENOMEM);
126 
127 	strscpy(session->name, name, sizeof(session->name));
128 	INIT_LIST_HEAD(&session->file_set.files_list);
129 	luo_file_set_init(&session->file_set);
130 	INIT_LIST_HEAD(&session->list);
131 	mutex_init(&session->mutex);
132 
133 	return session;
134 }
135 
luo_session_free(struct luo_session * session)136 static void luo_session_free(struct luo_session *session)
137 {
138 	luo_file_set_destroy(&session->file_set);
139 	mutex_destroy(&session->mutex);
140 	kfree(session);
141 }
142 
luo_session_insert(struct luo_session_header * sh,struct luo_session * session)143 static int luo_session_insert(struct luo_session_header *sh,
144 			      struct luo_session *session)
145 {
146 	struct luo_session *it;
147 
148 	guard(rwsem_write)(&sh->rwsem);
149 
150 	/*
151 	 * For outgoing we should make sure there is room in serialization array
152 	 * for new session.
153 	 */
154 	if (sh == &luo_session_global.outgoing) {
155 		if (sh->count == LUO_SESSION_MAX)
156 			return -ENOMEM;
157 	}
158 
159 	/*
160 	 * For small number of sessions this loop won't hurt performance
161 	 * but if we ever start using a lot of sessions, this might
162 	 * become a bottle neck during deserialization time, as it would
163 	 * cause O(n*n) complexity.
164 	 */
165 	list_for_each_entry(it, &sh->list, list) {
166 		if (!strncmp(it->name, session->name, sizeof(it->name)))
167 			return -EEXIST;
168 	}
169 	list_add_tail(&session->list, &sh->list);
170 	sh->count++;
171 
172 	return 0;
173 }
174 
luo_session_remove(struct luo_session_header * sh,struct luo_session * session)175 static void luo_session_remove(struct luo_session_header *sh,
176 			       struct luo_session *session)
177 {
178 	guard(rwsem_write)(&sh->rwsem);
179 	list_del(&session->list);
180 	sh->count--;
181 }
182 
luo_session_finish_one(struct luo_session * session)183 static int luo_session_finish_one(struct luo_session *session)
184 {
185 	guard(mutex)(&session->mutex);
186 	return luo_file_finish(&session->file_set);
187 }
188 
luo_session_unfreeze_one(struct luo_session * session,struct luo_session_ser * ser)189 static void luo_session_unfreeze_one(struct luo_session *session,
190 				     struct luo_session_ser *ser)
191 {
192 	guard(mutex)(&session->mutex);
193 	luo_file_unfreeze(&session->file_set, &ser->file_set_ser);
194 }
195 
luo_session_freeze_one(struct luo_session * session,struct luo_session_ser * ser)196 static int luo_session_freeze_one(struct luo_session *session,
197 				  struct luo_session_ser *ser)
198 {
199 	guard(mutex)(&session->mutex);
200 	return luo_file_freeze(&session->file_set, &ser->file_set_ser);
201 }
202 
luo_session_release(struct inode * inodep,struct file * filep)203 static int luo_session_release(struct inode *inodep, struct file *filep)
204 {
205 	struct luo_session *session = filep->private_data;
206 	struct luo_session_header *sh;
207 
208 	/* If retrieved is set, it means this session is from incoming list */
209 	if (session->retrieved) {
210 		int err = luo_session_finish_one(session);
211 
212 		if (err) {
213 			pr_warn("Unable to finish session [%s] on release\n",
214 				session->name);
215 			return err;
216 		}
217 		sh = &luo_session_global.incoming;
218 	} else {
219 		scoped_guard(mutex, &session->mutex)
220 			luo_file_unpreserve_files(&session->file_set);
221 		sh = &luo_session_global.outgoing;
222 	}
223 
224 	luo_session_remove(sh, session);
225 	luo_session_free(session);
226 
227 	return 0;
228 }
229 
luo_session_preserve_fd(struct luo_session * session,struct luo_ucmd * ucmd)230 static int luo_session_preserve_fd(struct luo_session *session,
231 				   struct luo_ucmd *ucmd)
232 {
233 	struct liveupdate_session_preserve_fd *argp = ucmd->cmd;
234 	int err;
235 
236 	guard(mutex)(&session->mutex);
237 	err = luo_preserve_file(&session->file_set, argp->token, argp->fd);
238 	if (err)
239 		return err;
240 
241 	err = luo_ucmd_respond(ucmd, sizeof(*argp));
242 	if (err)
243 		pr_warn("The file was successfully preserved, but response to user failed\n");
244 
245 	return err;
246 }
247 
luo_session_retrieve_fd(struct luo_session * session,struct luo_ucmd * ucmd)248 static int luo_session_retrieve_fd(struct luo_session *session,
249 				   struct luo_ucmd *ucmd)
250 {
251 	struct liveupdate_session_retrieve_fd *argp = ucmd->cmd;
252 	struct file *file;
253 	int err;
254 
255 	argp->fd = get_unused_fd_flags(O_CLOEXEC);
256 	if (argp->fd < 0)
257 		return argp->fd;
258 
259 	guard(mutex)(&session->mutex);
260 	err = luo_retrieve_file(&session->file_set, argp->token, &file);
261 	if (err < 0)
262 		goto  err_put_fd;
263 
264 	err = luo_ucmd_respond(ucmd, sizeof(*argp));
265 	if (err)
266 		goto err_put_file;
267 
268 	fd_install(argp->fd, file);
269 
270 	return 0;
271 
272 err_put_file:
273 	fput(file);
274 err_put_fd:
275 	put_unused_fd(argp->fd);
276 
277 	return err;
278 }
279 
luo_session_finish(struct luo_session * session,struct luo_ucmd * ucmd)280 static int luo_session_finish(struct luo_session *session,
281 			      struct luo_ucmd *ucmd)
282 {
283 	struct liveupdate_session_finish *argp = ucmd->cmd;
284 	int err = luo_session_finish_one(session);
285 
286 	if (err)
287 		return err;
288 
289 	return luo_ucmd_respond(ucmd, sizeof(*argp));
290 }
291 
292 union ucmd_buffer {
293 	struct liveupdate_session_finish finish;
294 	struct liveupdate_session_preserve_fd preserve;
295 	struct liveupdate_session_retrieve_fd retrieve;
296 };
297 
298 struct luo_ioctl_op {
299 	unsigned int size;
300 	unsigned int min_size;
301 	unsigned int ioctl_num;
302 	int (*execute)(struct luo_session *session, struct luo_ucmd *ucmd);
303 };
304 
305 #define IOCTL_OP(_ioctl, _fn, _struct, _last)                                  \
306 	[_IOC_NR(_ioctl) - LIVEUPDATE_CMD_SESSION_BASE] = {                    \
307 		.size = sizeof(_struct) +                                      \
308 			BUILD_BUG_ON_ZERO(sizeof(union ucmd_buffer) <          \
309 					  sizeof(_struct)),                    \
310 		.min_size = offsetofend(_struct, _last),                       \
311 		.ioctl_num = _ioctl,                                           \
312 		.execute = _fn,                                                \
313 	}
314 
315 static const struct luo_ioctl_op luo_session_ioctl_ops[] = {
316 	IOCTL_OP(LIVEUPDATE_SESSION_FINISH, luo_session_finish,
317 		 struct liveupdate_session_finish, reserved),
318 	IOCTL_OP(LIVEUPDATE_SESSION_PRESERVE_FD, luo_session_preserve_fd,
319 		 struct liveupdate_session_preserve_fd, token),
320 	IOCTL_OP(LIVEUPDATE_SESSION_RETRIEVE_FD, luo_session_retrieve_fd,
321 		 struct liveupdate_session_retrieve_fd, token),
322 };
323 
luo_session_ioctl(struct file * filep,unsigned int cmd,unsigned long arg)324 static long luo_session_ioctl(struct file *filep, unsigned int cmd,
325 			      unsigned long arg)
326 {
327 	struct luo_session *session = filep->private_data;
328 	const struct luo_ioctl_op *op;
329 	struct luo_ucmd ucmd = {};
330 	union ucmd_buffer buf;
331 	unsigned int nr;
332 	int ret;
333 
334 	nr = _IOC_NR(cmd);
335 	if (nr < LIVEUPDATE_CMD_SESSION_BASE || (nr - LIVEUPDATE_CMD_SESSION_BASE) >=
336 	    ARRAY_SIZE(luo_session_ioctl_ops)) {
337 		return -EINVAL;
338 	}
339 
340 	ucmd.ubuffer = (void __user *)arg;
341 	ret = get_user(ucmd.user_size, (u32 __user *)ucmd.ubuffer);
342 	if (ret)
343 		return ret;
344 
345 	op = &luo_session_ioctl_ops[nr - LIVEUPDATE_CMD_SESSION_BASE];
346 	if (op->ioctl_num != cmd)
347 		return -ENOIOCTLCMD;
348 	if (ucmd.user_size < op->min_size)
349 		return -EINVAL;
350 
351 	ucmd.cmd = &buf;
352 	ret = copy_struct_from_user(ucmd.cmd, op->size, ucmd.ubuffer,
353 				    ucmd.user_size);
354 	if (ret)
355 		return ret;
356 
357 	return op->execute(session, &ucmd);
358 }
359 
360 static const struct file_operations luo_session_fops = {
361 	.owner = THIS_MODULE,
362 	.release = luo_session_release,
363 	.unlocked_ioctl = luo_session_ioctl,
364 };
365 
366 /* Create a "struct file" for session */
luo_session_getfile(struct luo_session * session,struct file ** filep)367 static int luo_session_getfile(struct luo_session *session, struct file **filep)
368 {
369 	char name_buf[128];
370 	struct file *file;
371 
372 	lockdep_assert_held(&session->mutex);
373 	snprintf(name_buf, sizeof(name_buf), "[luo_session] %s", session->name);
374 	file = anon_inode_getfile(name_buf, &luo_session_fops, session, O_RDWR);
375 	if (IS_ERR(file))
376 		return PTR_ERR(file);
377 
378 	*filep = file;
379 
380 	return 0;
381 }
382 
luo_session_create(const char * name,struct file ** filep)383 int luo_session_create(const char *name, struct file **filep)
384 {
385 	struct luo_session *session;
386 	int err;
387 
388 	session = luo_session_alloc(name);
389 	if (IS_ERR(session))
390 		return PTR_ERR(session);
391 
392 	err = luo_session_insert(&luo_session_global.outgoing, session);
393 	if (err)
394 		goto err_free;
395 
396 	scoped_guard(mutex, &session->mutex)
397 		err = luo_session_getfile(session, filep);
398 	if (err)
399 		goto err_remove;
400 
401 	return 0;
402 
403 err_remove:
404 	luo_session_remove(&luo_session_global.outgoing, session);
405 err_free:
406 	luo_session_free(session);
407 
408 	return err;
409 }
410 
luo_session_retrieve(const char * name,struct file ** filep)411 int luo_session_retrieve(const char *name, struct file **filep)
412 {
413 	struct luo_session_header *sh = &luo_session_global.incoming;
414 	struct luo_session *session = NULL;
415 	struct luo_session *it;
416 	int err;
417 
418 	scoped_guard(rwsem_read, &sh->rwsem) {
419 		list_for_each_entry(it, &sh->list, list) {
420 			if (!strncmp(it->name, name, sizeof(it->name))) {
421 				session = it;
422 				break;
423 			}
424 		}
425 	}
426 
427 	if (!session)
428 		return -ENOENT;
429 
430 	guard(mutex)(&session->mutex);
431 	if (session->retrieved)
432 		return -EINVAL;
433 
434 	err = luo_session_getfile(session, filep);
435 	if (!err)
436 		session->retrieved = true;
437 
438 	return err;
439 }
440 
luo_session_setup_outgoing(void * fdt_out)441 int __init luo_session_setup_outgoing(void *fdt_out)
442 {
443 	struct luo_session_header_ser *header_ser;
444 	u64 header_ser_pa;
445 	int err;
446 
447 	header_ser = kho_alloc_preserve(LUO_SESSION_PGCNT << PAGE_SHIFT);
448 	if (IS_ERR(header_ser))
449 		return PTR_ERR(header_ser);
450 	header_ser_pa = virt_to_phys(header_ser);
451 
452 	err = fdt_begin_node(fdt_out, LUO_FDT_SESSION_NODE_NAME);
453 	err |= fdt_property_string(fdt_out, "compatible",
454 				   LUO_FDT_SESSION_COMPATIBLE);
455 	err |= fdt_property(fdt_out, LUO_FDT_SESSION_HEADER, &header_ser_pa,
456 			    sizeof(header_ser_pa));
457 	err |= fdt_end_node(fdt_out);
458 
459 	if (err)
460 		goto err_unpreserve;
461 
462 	luo_session_global.outgoing.header_ser = header_ser;
463 	luo_session_global.outgoing.ser = (void *)(header_ser + 1);
464 	luo_session_global.outgoing.active = true;
465 
466 	return 0;
467 
468 err_unpreserve:
469 	kho_unpreserve_free(header_ser);
470 	return err;
471 }
472 
luo_session_setup_incoming(void * fdt_in)473 int __init luo_session_setup_incoming(void *fdt_in)
474 {
475 	struct luo_session_header_ser *header_ser;
476 	int err, header_size, offset;
477 	u64 header_ser_pa;
478 	const void *ptr;
479 
480 	offset = fdt_subnode_offset(fdt_in, 0, LUO_FDT_SESSION_NODE_NAME);
481 	if (offset < 0) {
482 		pr_err("Unable to get session node: [%s]\n",
483 		       LUO_FDT_SESSION_NODE_NAME);
484 		return -EINVAL;
485 	}
486 
487 	err = fdt_node_check_compatible(fdt_in, offset,
488 					LUO_FDT_SESSION_COMPATIBLE);
489 	if (err) {
490 		pr_err("Session node incompatible [%s]\n",
491 		       LUO_FDT_SESSION_COMPATIBLE);
492 		return -EINVAL;
493 	}
494 
495 	header_size = 0;
496 	ptr = fdt_getprop(fdt_in, offset, LUO_FDT_SESSION_HEADER, &header_size);
497 	if (!ptr || header_size != sizeof(u64)) {
498 		pr_err("Unable to get session header '%s' [%d]\n",
499 		       LUO_FDT_SESSION_HEADER, header_size);
500 		return -EINVAL;
501 	}
502 
503 	header_ser_pa = get_unaligned((u64 *)ptr);
504 	header_ser = phys_to_virt(header_ser_pa);
505 
506 	luo_session_global.incoming.header_ser = header_ser;
507 	luo_session_global.incoming.ser = (void *)(header_ser + 1);
508 	luo_session_global.incoming.active = true;
509 
510 	return 0;
511 }
512 
luo_session_deserialize(void)513 int luo_session_deserialize(void)
514 {
515 	struct luo_session_header *sh = &luo_session_global.incoming;
516 	static bool is_deserialized;
517 	static int err;
518 
519 	/* If has been deserialized, always return the same error code */
520 	if (is_deserialized)
521 		return err;
522 
523 	is_deserialized = true;
524 	if (!sh->active)
525 		return 0;
526 
527 	/*
528 	 * Note on error handling:
529 	 *
530 	 * If deserialization fails (e.g., allocation failure or corrupt data),
531 	 * we intentionally skip cleanup of sessions that were already restored.
532 	 *
533 	 * A partial failure leaves the preserved state inconsistent.
534 	 * Implementing a safe "undo" to unwind complex dependencies (sessions,
535 	 * files, hardware state) is error-prone and provides little value, as
536 	 * the system is effectively in a broken state.
537 	 *
538 	 * We treat these resources as leaked. The expected recovery path is for
539 	 * userspace to detect the failure and trigger a reboot, which will
540 	 * reliably reset devices and reclaim memory.
541 	 */
542 	for (int i = 0; i < sh->header_ser->count; i++) {
543 		struct luo_session *session;
544 
545 		session = luo_session_alloc(sh->ser[i].name);
546 		if (IS_ERR(session)) {
547 			pr_warn("Failed to allocate session [%s] during deserialization %pe\n",
548 				sh->ser[i].name, session);
549 			return PTR_ERR(session);
550 		}
551 
552 		err = luo_session_insert(sh, session);
553 		if (err) {
554 			pr_warn("Failed to insert session [%s] %pe\n",
555 				session->name, ERR_PTR(err));
556 			luo_session_free(session);
557 			return err;
558 		}
559 
560 		scoped_guard(mutex, &session->mutex) {
561 			luo_file_deserialize(&session->file_set,
562 					     &sh->ser[i].file_set_ser);
563 		}
564 	}
565 
566 	kho_restore_free(sh->header_ser);
567 	sh->header_ser = NULL;
568 	sh->ser = NULL;
569 
570 	return 0;
571 }
572 
luo_session_serialize(void)573 int luo_session_serialize(void)
574 {
575 	struct luo_session_header *sh = &luo_session_global.outgoing;
576 	struct luo_session *session;
577 	int i = 0;
578 	int err;
579 
580 	guard(rwsem_write)(&sh->rwsem);
581 	list_for_each_entry(session, &sh->list, list) {
582 		err = luo_session_freeze_one(session, &sh->ser[i]);
583 		if (err)
584 			goto err_undo;
585 
586 		strscpy(sh->ser[i].name, session->name,
587 			sizeof(sh->ser[i].name));
588 		i++;
589 	}
590 	sh->header_ser->count = sh->count;
591 
592 	return 0;
593 
594 err_undo:
595 	list_for_each_entry_continue_reverse(session, &sh->list, list) {
596 		i--;
597 		luo_session_unfreeze_one(session, &sh->ser[i]);
598 		memset(sh->ser[i].name, 0, sizeof(sh->ser[i].name));
599 	}
600 
601 	return err;
602 }
603 
604 /**
605  * luo_session_quiesce - Ensure no active sessions exist and lock session lists.
606  *
607  * Acquires exclusive write locks on both incoming and outgoing session lists.
608  * It then validates no sessions exist in either list.
609  *
610  * This mechanism is used during file handler un/registration to ensure that no
611  * sessions are currently using the handler, and no new sessions can be created
612  * while un/registration is in progress.
613  *
614  * This prevents registering new handlers while sessions are active or
615  * while deserialization is in progress.
616  *
617  * Return:
618  * true  - System is quiescent (0 sessions) and locked.
619  * false - Active sessions exist. The locks are released internally.
620  */
luo_session_quiesce(void)621 bool luo_session_quiesce(void)
622 {
623 	down_write(&luo_session_global.incoming.rwsem);
624 	down_write(&luo_session_global.outgoing.rwsem);
625 
626 	if (luo_session_global.incoming.count ||
627 	    luo_session_global.outgoing.count) {
628 		up_write(&luo_session_global.outgoing.rwsem);
629 		up_write(&luo_session_global.incoming.rwsem);
630 		return false;
631 	}
632 
633 	return true;
634 }
635 
636 /**
637  * luo_session_resume - Unlock session lists and resume normal activity.
638  *
639  * Releases the exclusive locks acquired by a successful call to
640  * luo_session_quiesce().
641  */
luo_session_resume(void)642 void luo_session_resume(void)
643 {
644 	up_write(&luo_session_global.outgoing.rwsem);
645 	up_write(&luo_session_global.incoming.rwsem);
646 }
647