xref: /linux/kernel/nstree.c (revision 0b3bb205808195159be633a8cefb602670e856fb)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
3 
4 #include <linux/nstree.h>
5 #include <linux/proc_ns.h>
6 #include <linux/rculist.h>
7 #include <linux/vfsdebug.h>
8 #include <linux/syscalls.h>
9 #include <linux/user_namespace.h>
10 
11 static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock);
12 
13 DEFINE_LOCK_GUARD_0(ns_tree_writer,
14 		    write_seqlock(&ns_tree_lock),
15 		    write_sequnlock(&ns_tree_lock))
16 
17 DEFINE_LOCK_GUARD_0(ns_tree_locked_reader,
18 		    read_seqlock_excl(&ns_tree_lock),
19 		    read_sequnlock_excl(&ns_tree_lock))
20 
21 static struct ns_tree_root ns_unified_root = { /* protected by ns_tree_lock */
22 	.ns_rb = RB_ROOT,
23 	.ns_list_head = LIST_HEAD_INIT(ns_unified_root.ns_list_head),
24 };
25 
26 struct ns_tree_root mnt_ns_tree = {
27 	.ns_rb = RB_ROOT,
28 	.ns_list_head = LIST_HEAD_INIT(mnt_ns_tree.ns_list_head),
29 };
30 
31 struct ns_tree_root net_ns_tree = {
32 	.ns_rb = RB_ROOT,
33 	.ns_list_head = LIST_HEAD_INIT(net_ns_tree.ns_list_head),
34 };
35 EXPORT_SYMBOL_GPL(net_ns_tree);
36 
37 struct ns_tree_root uts_ns_tree = {
38 	.ns_rb = RB_ROOT,
39 	.ns_list_head = LIST_HEAD_INIT(uts_ns_tree.ns_list_head),
40 };
41 
42 struct ns_tree_root user_ns_tree = {
43 	.ns_rb = RB_ROOT,
44 	.ns_list_head = LIST_HEAD_INIT(user_ns_tree.ns_list_head),
45 };
46 
47 struct ns_tree_root ipc_ns_tree = {
48 	.ns_rb = RB_ROOT,
49 	.ns_list_head = LIST_HEAD_INIT(ipc_ns_tree.ns_list_head),
50 };
51 
52 struct ns_tree_root pid_ns_tree = {
53 	.ns_rb = RB_ROOT,
54 	.ns_list_head = LIST_HEAD_INIT(pid_ns_tree.ns_list_head),
55 };
56 
57 struct ns_tree_root cgroup_ns_tree = {
58 	.ns_rb = RB_ROOT,
59 	.ns_list_head = LIST_HEAD_INIT(cgroup_ns_tree.ns_list_head),
60 };
61 
62 struct ns_tree_root time_ns_tree = {
63 	.ns_rb = RB_ROOT,
64 	.ns_list_head = LIST_HEAD_INIT(time_ns_tree.ns_list_head),
65 };
66 
67 /**
68  * ns_tree_node_init - Initialize a namespace tree node
69  * @node: The node to initialize
70  *
71  * Initializes both the rbtree node and list entry.
72  */
ns_tree_node_init(struct ns_tree_node * node)73 void ns_tree_node_init(struct ns_tree_node *node)
74 {
75 	RB_CLEAR_NODE(&node->ns_node);
76 	INIT_LIST_HEAD(&node->ns_list_entry);
77 }
78 
79 /**
80  * ns_tree_root_init - Initialize a namespace tree root
81  * @root: The root to initialize
82  *
83  * Initializes both the rbtree root and list head.
84  */
ns_tree_root_init(struct ns_tree_root * root)85 void ns_tree_root_init(struct ns_tree_root *root)
86 {
87 	root->ns_rb = RB_ROOT;
88 	INIT_LIST_HEAD(&root->ns_list_head);
89 }
90 
91 /**
92  * ns_tree_node_empty - Check if a namespace tree node is empty
93  * @node: The node to check
94  *
95  * Returns true if the node is not in any tree.
96  */
ns_tree_node_empty(const struct ns_tree_node * node)97 bool ns_tree_node_empty(const struct ns_tree_node *node)
98 {
99 	return RB_EMPTY_NODE(&node->ns_node);
100 }
101 
102 /**
103  * ns_tree_node_add - Add a node to a namespace tree
104  * @node: The node to add
105  * @root: The tree root to add to
106  * @cmp: Comparison function for rbtree insertion
107  *
108  * Adds the node to both the rbtree and the list, maintaining sorted order.
109  * The list is maintained in the same order as the rbtree to enable efficient
110  * iteration.
111  *
112  * Returns: NULL if insertion succeeded, existing node if duplicate found
113  */
ns_tree_node_add(struct ns_tree_node * node,struct ns_tree_root * root,int (* cmp)(struct rb_node *,const struct rb_node *))114 struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
115 				  struct ns_tree_root *root,
116 				  int (*cmp)(struct rb_node *, const struct rb_node *))
117 {
118 	struct rb_node *ret, *prev;
119 
120 	/* Add to rbtree */
121 	ret = rb_find_add_rcu(&node->ns_node, &root->ns_rb, cmp);
122 
123 	/* Add to list in sorted order */
124 	prev = rb_prev(&node->ns_node);
125 	if (!prev) {
126 		/* No previous node, add at head */
127 		list_add_rcu(&node->ns_list_entry, &root->ns_list_head);
128 	} else {
129 		/* Add after previous node */
130 		struct ns_tree_node *prev_node;
131 		prev_node = rb_entry(prev, struct ns_tree_node, ns_node);
132 		list_add_rcu(&node->ns_list_entry, &prev_node->ns_list_entry);
133 	}
134 
135 	return ret;
136 }
137 
138 /**
139  * ns_tree_node_del - Remove a node from a namespace tree
140  * @node: The node to remove
141  * @root: The tree root to remove from
142  *
143  * Removes the node from both the rbtree and the list atomically.
144  */
ns_tree_node_del(struct ns_tree_node * node,struct ns_tree_root * root)145 void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root)
146 {
147 	rb_erase(&node->ns_node, &root->ns_rb);
148 	RB_CLEAR_NODE(&node->ns_node);
149 	list_bidir_del_rcu(&node->ns_list_entry);
150 }
151 
node_to_ns(const struct rb_node * node)152 static inline struct ns_common *node_to_ns(const struct rb_node *node)
153 {
154 	if (!node)
155 		return NULL;
156 	return rb_entry(node, struct ns_common, ns_tree_node.ns_node);
157 }
158 
node_to_ns_unified(const struct rb_node * node)159 static inline struct ns_common *node_to_ns_unified(const struct rb_node *node)
160 {
161 	if (!node)
162 		return NULL;
163 	return rb_entry(node, struct ns_common, ns_unified_node.ns_node);
164 }
165 
node_to_ns_owner(const struct rb_node * node)166 static inline struct ns_common *node_to_ns_owner(const struct rb_node *node)
167 {
168 	if (!node)
169 		return NULL;
170 	return rb_entry(node, struct ns_common, ns_owner_node.ns_node);
171 }
172 
ns_id_cmp(u64 id_a,u64 id_b)173 static int ns_id_cmp(u64 id_a, u64 id_b)
174 {
175 	if (id_a < id_b)
176 		return -1;
177 	if (id_a > id_b)
178 		return 1;
179 	return 0;
180 }
181 
ns_cmp(struct rb_node * a,const struct rb_node * b)182 static int ns_cmp(struct rb_node *a, const struct rb_node *b)
183 {
184 	return ns_id_cmp(node_to_ns(a)->ns_id, node_to_ns(b)->ns_id);
185 }
186 
ns_cmp_unified(struct rb_node * a,const struct rb_node * b)187 static int ns_cmp_unified(struct rb_node *a, const struct rb_node *b)
188 {
189 	return ns_id_cmp(node_to_ns_unified(a)->ns_id, node_to_ns_unified(b)->ns_id);
190 }
191 
ns_cmp_owner(struct rb_node * a,const struct rb_node * b)192 static int ns_cmp_owner(struct rb_node *a, const struct rb_node *b)
193 {
194 	return ns_id_cmp(node_to_ns_owner(a)->ns_id, node_to_ns_owner(b)->ns_id);
195 }
196 
__ns_tree_add_raw(struct ns_common * ns,struct ns_tree_root * ns_tree)197 void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree)
198 {
199 	struct rb_node *node;
200 	const struct proc_ns_operations *ops = ns->ops;
201 
202 	VFS_WARN_ON_ONCE(!ns->ns_id);
203 
204 	guard(ns_tree_writer)();
205 
206 	/* Add to per-type tree and list */
207 	node = ns_tree_node_add(&ns->ns_tree_node, ns_tree, ns_cmp);
208 
209 	/* Add to unified tree and list */
210 	ns_tree_node_add(&ns->ns_unified_node, &ns_unified_root, ns_cmp_unified);
211 
212 	/* Add to owner's tree if applicable */
213 	if (ops) {
214 		struct user_namespace *user_ns;
215 
216 		VFS_WARN_ON_ONCE(!ops->owner);
217 		user_ns = ops->owner(ns);
218 		if (user_ns) {
219 			struct ns_common *owner = &user_ns->ns;
220 			VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
221 
222 			/* Insert into owner's tree and list */
223 			ns_tree_node_add(&ns->ns_owner_node, &owner->ns_owner_root, ns_cmp_owner);
224 		} else {
225 			/* Only the initial user namespace doesn't have an owner. */
226 			VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns));
227 		}
228 	}
229 
230 	VFS_WARN_ON_ONCE(node);
231 }
232 
__ns_tree_remove(struct ns_common * ns,struct ns_tree_root * ns_tree)233 void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree)
234 {
235 	const struct proc_ns_operations *ops = ns->ops;
236 	struct user_namespace *user_ns;
237 
238 	VFS_WARN_ON_ONCE(ns_tree_node_empty(&ns->ns_tree_node));
239 	VFS_WARN_ON_ONCE(list_empty(&ns->ns_tree_node.ns_list_entry));
240 
241 	write_seqlock(&ns_tree_lock);
242 
243 	/* Remove from per-type tree and list */
244 	ns_tree_node_del(&ns->ns_tree_node, ns_tree);
245 
246 	/* Remove from unified tree and list */
247 	ns_tree_node_del(&ns->ns_unified_node, &ns_unified_root);
248 
249 	/* Remove from owner's tree if applicable */
250 	if (ops) {
251 		user_ns = ops->owner(ns);
252 		if (user_ns) {
253 			struct ns_common *owner = &user_ns->ns;
254 			ns_tree_node_del(&ns->ns_owner_node, &owner->ns_owner_root);
255 		}
256 	}
257 
258 	write_sequnlock(&ns_tree_lock);
259 }
260 EXPORT_SYMBOL_GPL(__ns_tree_remove);
261 
ns_find(const void * key,const struct rb_node * node)262 static int ns_find(const void *key, const struct rb_node *node)
263 {
264 	const u64 ns_id = *(u64 *)key;
265 	const struct ns_common *ns = node_to_ns(node);
266 
267 	if (ns_id < ns->ns_id)
268 		return -1;
269 	if (ns_id > ns->ns_id)
270 		return 1;
271 	return 0;
272 }
273 
ns_find_unified(const void * key,const struct rb_node * node)274 static int ns_find_unified(const void *key, const struct rb_node *node)
275 {
276 	const u64 ns_id = *(u64 *)key;
277 	const struct ns_common *ns = node_to_ns_unified(node);
278 
279 	if (ns_id < ns->ns_id)
280 		return -1;
281 	if (ns_id > ns->ns_id)
282 		return 1;
283 	return 0;
284 }
285 
ns_tree_from_type(int ns_type)286 static struct ns_tree_root *ns_tree_from_type(int ns_type)
287 {
288 	switch (ns_type) {
289 	case CLONE_NEWCGROUP:
290 		return &cgroup_ns_tree;
291 	case CLONE_NEWIPC:
292 		return &ipc_ns_tree;
293 	case CLONE_NEWNS:
294 		return &mnt_ns_tree;
295 	case CLONE_NEWNET:
296 		return &net_ns_tree;
297 	case CLONE_NEWPID:
298 		return &pid_ns_tree;
299 	case CLONE_NEWUSER:
300 		return &user_ns_tree;
301 	case CLONE_NEWUTS:
302 		return &uts_ns_tree;
303 	case CLONE_NEWTIME:
304 		return &time_ns_tree;
305 	}
306 
307 	return NULL;
308 }
309 
__ns_unified_tree_lookup_rcu(u64 ns_id)310 static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id)
311 {
312 	struct rb_node *node;
313 	unsigned int seq;
314 
315 	do {
316 		seq = read_seqbegin(&ns_tree_lock);
317 		node = rb_find_rcu(&ns_id, &ns_unified_root.ns_rb, ns_find_unified);
318 		if (node)
319 			break;
320 	} while (read_seqretry(&ns_tree_lock, seq));
321 
322 	return node_to_ns_unified(node);
323 }
324 
__ns_tree_lookup_rcu(u64 ns_id,int ns_type)325 static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type)
326 {
327 	struct ns_tree_root *ns_tree;
328 	struct rb_node *node;
329 	unsigned int seq;
330 
331 	ns_tree = ns_tree_from_type(ns_type);
332 	if (!ns_tree)
333 		return NULL;
334 
335 	do {
336 		seq = read_seqbegin(&ns_tree_lock);
337 		node = rb_find_rcu(&ns_id, &ns_tree->ns_rb, ns_find);
338 		if (node)
339 			break;
340 	} while (read_seqretry(&ns_tree_lock, seq));
341 
342 	return node_to_ns(node);
343 }
344 
ns_tree_lookup_rcu(u64 ns_id,int ns_type)345 struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
346 {
347 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
348 
349 	if (ns_type)
350 		return __ns_tree_lookup_rcu(ns_id, ns_type);
351 
352 	return __ns_unified_tree_lookup_rcu(ns_id);
353 }
354 
355 /**
356  * __ns_tree_adjoined_rcu - find the next/previous namespace in the same
357  * tree
358  * @ns: namespace to start from
359  * @ns_tree: namespace tree to search in
360  * @previous: if true find the previous namespace, otherwise the next
361  *
362  * Find the next or previous namespace in the same tree as @ns. If
363  * there is no next/previous namespace, -ENOENT is returned.
364  */
__ns_tree_adjoined_rcu(struct ns_common * ns,struct ns_tree_root * ns_tree,bool previous)365 struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
366 					 struct ns_tree_root *ns_tree, bool previous)
367 {
368 	struct list_head *list;
369 
370 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage");
371 
372 	if (previous)
373 		list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_tree_node.ns_list_entry));
374 	else
375 		list = rcu_dereference(list_next_rcu(&ns->ns_tree_node.ns_list_entry));
376 	if (list_is_head(list, &ns_tree->ns_list_head))
377 		return ERR_PTR(-ENOENT);
378 
379 	return list_entry_rcu(list, struct ns_common, ns_tree_node.ns_list_entry);
380 }
381 
382 /**
383  * __ns_tree_gen_id - generate a new namespace id
384  * @ns: namespace to generate id for
385  * @id: if non-zero, this is the initial namespace and this is a fixed id
386  *
387  * Generates a new namespace id and assigns it to the namespace. All
388  * namespaces types share the same id space and thus can be compared
389  * directly. IOW, when two ids of two namespace are equal, they are
390  * identical.
391  */
__ns_tree_gen_id(struct ns_common * ns,u64 id)392 u64 __ns_tree_gen_id(struct ns_common *ns, u64 id)
393 {
394 	static atomic64_t namespace_cookie = ATOMIC64_INIT(NS_LAST_INIT_ID + 1);
395 
396 	if (id)
397 		ns->ns_id = id;
398 	else
399 		ns->ns_id = atomic64_inc_return(&namespace_cookie);
400 	return ns->ns_id;
401 }
402 
403 struct klistns {
404 	u64 __user *uns_ids;
405 	u32 nr_ns_ids;
406 	u64 last_ns_id;
407 	u64 user_ns_id;
408 	u32 ns_type;
409 	struct user_namespace *user_ns;
410 	bool userns_capable;
411 	struct ns_common *first_ns;
412 };
413 
__free_klistns_free(const struct klistns * kls)414 static void __free_klistns_free(const struct klistns *kls)
415 {
416 	if (kls->user_ns_id != LISTNS_CURRENT_USER)
417 		put_user_ns(kls->user_ns);
418 	if (kls->first_ns && kls->first_ns->ops)
419 		kls->first_ns->ops->put(kls->first_ns);
420 }
421 
422 #define NS_ALL (PID_NS | USER_NS | MNT_NS | UTS_NS | IPC_NS | NET_NS | CGROUP_NS | TIME_NS)
423 
copy_ns_id_req(const struct ns_id_req __user * req,struct ns_id_req * kreq)424 static int copy_ns_id_req(const struct ns_id_req __user *req,
425 			  struct ns_id_req *kreq)
426 {
427 	int ret;
428 	size_t usize;
429 
430 	BUILD_BUG_ON(sizeof(struct ns_id_req) != NS_ID_REQ_SIZE_VER0);
431 
432 	ret = get_user(usize, &req->size);
433 	if (ret)
434 		return -EFAULT;
435 	if (unlikely(usize > PAGE_SIZE))
436 		return -E2BIG;
437 	if (unlikely(usize < NS_ID_REQ_SIZE_VER0))
438 		return -EINVAL;
439 	memset(kreq, 0, sizeof(*kreq));
440 	ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
441 	if (ret)
442 		return ret;
443 	if (kreq->spare != 0)
444 		return -EINVAL;
445 	if (kreq->ns_type & ~NS_ALL)
446 		return -EOPNOTSUPP;
447 	return 0;
448 }
449 
prepare_klistns(struct klistns * kls,struct ns_id_req * kreq,u64 __user * ns_ids,size_t nr_ns_ids)450 static inline int prepare_klistns(struct klistns *kls, struct ns_id_req *kreq,
451 				  u64 __user *ns_ids, size_t nr_ns_ids)
452 {
453 	kls->last_ns_id = kreq->ns_id;
454 	kls->user_ns_id = kreq->user_ns_id;
455 	kls->nr_ns_ids	= nr_ns_ids;
456 	kls->ns_type	= kreq->ns_type;
457 	kls->uns_ids	= ns_ids;
458 	return 0;
459 }
460 
461 /*
462  * Lookup a namespace owned by owner with id >= ns_id.
463  * Returns the namespace with the smallest id that is >= ns_id.
464  */
lookup_ns_owner_at(u64 ns_id,struct ns_common * owner)465 static struct ns_common *lookup_ns_owner_at(u64 ns_id, struct ns_common *owner)
466 {
467 	struct ns_common *ret = NULL;
468 	struct rb_node *node;
469 
470 	VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
471 
472 	guard(ns_tree_locked_reader)();
473 
474 	node = owner->ns_owner_root.ns_rb.rb_node;
475 	while (node) {
476 		struct ns_common *ns;
477 
478 		ns = node_to_ns_owner(node);
479 		if (ns_id <= ns->ns_id) {
480 			ret = ns;
481 			if (ns_id == ns->ns_id)
482 				break;
483 			node = node->rb_left;
484 		} else {
485 			node = node->rb_right;
486 		}
487 	}
488 
489 	if (ret)
490 		ret = ns_get_unless_inactive(ret);
491 	return ret;
492 }
493 
lookup_ns_id(u64 mnt_ns_id,int ns_type)494 static struct ns_common *lookup_ns_id(u64 mnt_ns_id, int ns_type)
495 {
496 	struct ns_common *ns;
497 
498 	guard(rcu)();
499 	ns = ns_tree_lookup_rcu(mnt_ns_id, ns_type);
500 	if (!ns)
501 		return NULL;
502 
503 	if (!ns_get_unless_inactive(ns))
504 		return NULL;
505 
506 	return ns;
507 }
508 
ns_requested(const struct klistns * kls,const struct ns_common * ns)509 static inline bool __must_check ns_requested(const struct klistns *kls,
510 					     const struct ns_common *ns)
511 {
512 	return !kls->ns_type || (kls->ns_type & ns->ns_type);
513 }
514 
may_list_ns(const struct klistns * kls,struct ns_common * ns)515 static inline bool __must_check may_list_ns(const struct klistns *kls,
516 					    struct ns_common *ns)
517 {
518 	if (kls->user_ns && kls->userns_capable)
519 		return true;
520 	if (is_current_namespace(ns))
521 		return true;
522 	return may_see_all_namespaces();
523 }
524 
ns_put(struct ns_common * ns)525 static inline void ns_put(struct ns_common *ns)
526 {
527 	if (ns && ns->ops)
528 		ns->ops->put(ns);
529 }
530 
531 DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) ns_put(_T))
532 
legitimize_ns(const struct klistns * kls,struct ns_common * candidate)533 static inline struct ns_common *__must_check legitimize_ns(const struct klistns *kls,
534 							   struct ns_common *candidate)
535 {
536 	struct ns_common *ns __free(ns_put) = NULL;
537 
538 	if (!ns_requested(kls, candidate))
539 		return NULL;
540 
541 	ns = ns_get_unless_inactive(candidate);
542 	if (!ns)
543 		return NULL;
544 
545 	if (!may_list_ns(kls, ns))
546 		return NULL;
547 
548 	return no_free_ptr(ns);
549 }
550 
do_listns_userns(struct klistns * kls)551 static ssize_t do_listns_userns(struct klistns *kls)
552 {
553 	u64 __user *ns_ids = kls->uns_ids;
554 	size_t nr_ns_ids = kls->nr_ns_ids;
555 	struct ns_common *ns = NULL, *first_ns = NULL, *prev = NULL;
556 	const struct list_head *head;
557 	ssize_t ret;
558 
559 	VFS_WARN_ON_ONCE(!kls->user_ns_id);
560 
561 	if (kls->user_ns_id == LISTNS_CURRENT_USER)
562 		ns = to_ns_common(current_user_ns());
563 	else if (kls->user_ns_id)
564 		ns = lookup_ns_id(kls->user_ns_id, CLONE_NEWUSER);
565 	if (!ns)
566 		return -EINVAL;
567 	kls->user_ns = to_user_ns(ns);
568 
569 	/*
570 	 * Use the rbtree to find the first namespace we care about and
571 	 * then use it's list entry to iterate from there.
572 	 */
573 	if (kls->last_ns_id) {
574 		kls->first_ns = lookup_ns_owner_at(kls->last_ns_id + 1, ns);
575 		if (!kls->first_ns)
576 			return -ENOENT;
577 		first_ns = kls->first_ns;
578 	}
579 
580 	ret = 0;
581 	head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
582 	kls->userns_capable = may_see_all_namespaces();
583 
584 	rcu_read_lock();
585 
586 	if (!first_ns)
587 		first_ns = list_entry_rcu(head->next, typeof(*first_ns), ns_owner_node.ns_list_entry);
588 
589 	ns = first_ns;
590 	list_for_each_entry_from_rcu(ns, head, ns_owner_node.ns_list_entry) {
591 		struct ns_common *valid;
592 
593 		if (!nr_ns_ids)
594 			break;
595 
596 		valid = legitimize_ns(kls, ns);
597 		if (!valid)
598 			continue;
599 
600 		rcu_read_unlock();
601 
602 		ns_put(prev);
603 		prev = valid;
604 
605 		if (put_user(valid->ns_id, ns_ids + ret)) {
606 			ns_put(prev);
607 			return -EFAULT;
608 		}
609 
610 		nr_ns_ids--;
611 		ret++;
612 
613 		rcu_read_lock();
614 	}
615 
616 	rcu_read_unlock();
617 	ns_put(prev);
618 	return ret;
619 }
620 
621 /*
622  * Lookup a namespace with id >= ns_id in either the unified tree or a type-specific tree.
623  * Returns the namespace with the smallest id that is >= ns_id.
624  */
lookup_ns_id_at(u64 ns_id,int ns_type)625 static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type)
626 {
627 	struct ns_common *ret = NULL;
628 	struct ns_tree_root *ns_tree = NULL;
629 	struct rb_node *node;
630 
631 	if (ns_type) {
632 		ns_tree = ns_tree_from_type(ns_type);
633 		if (!ns_tree)
634 			return NULL;
635 	}
636 
637 	guard(ns_tree_locked_reader)();
638 
639 	if (ns_tree)
640 		node = ns_tree->ns_rb.rb_node;
641 	else
642 		node = ns_unified_root.ns_rb.rb_node;
643 
644 	while (node) {
645 		struct ns_common *ns;
646 
647 		if (ns_type)
648 			ns = node_to_ns(node);
649 		else
650 			ns = node_to_ns_unified(node);
651 
652 		if (ns_id <= ns->ns_id) {
653 			if (ns_type)
654 				ret = node_to_ns(node);
655 			else
656 				ret = node_to_ns_unified(node);
657 			if (ns_id == ns->ns_id)
658 				break;
659 			node = node->rb_left;
660 		} else {
661 			node = node->rb_right;
662 		}
663 	}
664 
665 	if (ret)
666 		ret = ns_get_unless_inactive(ret);
667 	return ret;
668 }
669 
first_ns_common(const struct list_head * head,struct ns_tree_root * ns_tree)670 static inline struct ns_common *first_ns_common(const struct list_head *head,
671 						struct ns_tree_root *ns_tree)
672 {
673 	if (ns_tree)
674 		return list_entry_rcu(head->next, struct ns_common, ns_tree_node.ns_list_entry);
675 	return list_entry_rcu(head->next, struct ns_common, ns_unified_node.ns_list_entry);
676 }
677 
next_ns_common(struct ns_common * ns,struct ns_tree_root * ns_tree)678 static inline struct ns_common *next_ns_common(struct ns_common *ns,
679 					       struct ns_tree_root *ns_tree)
680 {
681 	if (ns_tree)
682 		return list_entry_rcu(ns->ns_tree_node.ns_list_entry.next, struct ns_common, ns_tree_node.ns_list_entry);
683 	return list_entry_rcu(ns->ns_unified_node.ns_list_entry.next, struct ns_common, ns_unified_node.ns_list_entry);
684 }
685 
ns_common_is_head(struct ns_common * ns,const struct list_head * head,struct ns_tree_root * ns_tree)686 static inline bool ns_common_is_head(struct ns_common *ns,
687 				     const struct list_head *head,
688 				     struct ns_tree_root *ns_tree)
689 {
690 	if (ns_tree)
691 		return &ns->ns_tree_node.ns_list_entry == head;
692 	return &ns->ns_unified_node.ns_list_entry == head;
693 }
694 
do_listns(struct klistns * kls)695 static ssize_t do_listns(struct klistns *kls)
696 {
697 	u64 __user *ns_ids = kls->uns_ids;
698 	size_t nr_ns_ids = kls->nr_ns_ids;
699 	struct ns_common *ns, *first_ns = NULL, *prev = NULL;
700 	struct ns_tree_root *ns_tree = NULL;
701 	const struct list_head *head;
702 	u32 ns_type;
703 	ssize_t ret;
704 
705 	if (hweight32(kls->ns_type) == 1)
706 		ns_type = kls->ns_type;
707 	else
708 		ns_type = 0;
709 
710 	if (ns_type) {
711 		ns_tree = ns_tree_from_type(ns_type);
712 		if (!ns_tree)
713 			return -EINVAL;
714 	}
715 
716 	if (kls->last_ns_id) {
717 		kls->first_ns = lookup_ns_id_at(kls->last_ns_id + 1, ns_type);
718 		if (!kls->first_ns)
719 			return -ENOENT;
720 		first_ns = kls->first_ns;
721 	}
722 
723 	ret = 0;
724 	if (ns_tree)
725 		head = &ns_tree->ns_list_head;
726 	else
727 		head = &ns_unified_root.ns_list_head;
728 
729 	rcu_read_lock();
730 
731 	if (!first_ns)
732 		first_ns = first_ns_common(head, ns_tree);
733 
734 	for (ns = first_ns; !ns_common_is_head(ns, head, ns_tree) && nr_ns_ids;
735 	     ns = next_ns_common(ns, ns_tree)) {
736 		struct ns_common *valid;
737 
738 		valid = legitimize_ns(kls, ns);
739 		if (!valid)
740 			continue;
741 
742 		rcu_read_unlock();
743 
744 		ns_put(prev);
745 		prev = valid;
746 
747 		if (put_user(valid->ns_id, ns_ids + ret)) {
748 			ns_put(prev);
749 			return -EFAULT;
750 		}
751 
752 		nr_ns_ids--;
753 		ret++;
754 
755 		rcu_read_lock();
756 	}
757 
758 	rcu_read_unlock();
759 	ns_put(prev);
760 	return ret;
761 }
762 
SYSCALL_DEFINE4(listns,const struct ns_id_req __user *,req,u64 __user *,ns_ids,size_t,nr_ns_ids,unsigned int,flags)763 SYSCALL_DEFINE4(listns, const struct ns_id_req __user *, req,
764 		u64 __user *, ns_ids, size_t, nr_ns_ids, unsigned int, flags)
765 {
766 	struct klistns klns __free(klistns_free) = {};
767 	const size_t maxcount = 1000000;
768 	struct ns_id_req kreq;
769 	ssize_t ret;
770 
771 	if (flags)
772 		return -EINVAL;
773 
774 	if (unlikely(nr_ns_ids > maxcount))
775 		return -EOVERFLOW;
776 
777 	if (!access_ok(ns_ids, nr_ns_ids * sizeof(*ns_ids)))
778 		return -EFAULT;
779 
780 	ret = copy_ns_id_req(req, &kreq);
781 	if (ret)
782 		return ret;
783 
784 	ret = prepare_klistns(&klns, &kreq, ns_ids, nr_ns_ids);
785 	if (ret)
786 		return ret;
787 
788 	if (kreq.user_ns_id)
789 		return do_listns_userns(&klns);
790 
791 	return do_listns(&klns);
792 }
793