xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c (revision d8fbbd371ca11d9ad4b29b9d3a316885a5da0b15)
1 // SPDX-License-Identifier: BSD-2-Clause
2 /*
3  * Copyright (c) 2021 Klara Systems, Inc.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 /*
29  * Copyright (c) 2025, Rob Norris <robn@despairlabs.com>
30  */
31 
32 #include <sys/types.h>
33 #include <sys/sysmacros.h>
34 #include <sys/kmem.h>
35 #include <linux/file.h>
36 #include <linux/magic.h>
37 #include <sys/zone.h>
38 #include <sys/string.h>
39 
40 #if defined(CONFIG_USER_NS)
41 #include <linux/statfs.h>
42 #include <linux/proc_ns.h>
43 #endif
44 
45 #include <sys/mutex.h>
46 
47 static kmutex_t zone_datasets_lock;
48 static struct list_head zone_datasets;
49 
50 typedef struct zone_datasets {
51 	struct list_head zds_list;	/* zone_datasets linkage */
52 	struct user_namespace *zds_userns; /* namespace reference */
53 	struct list_head zds_datasets;	/* datasets for the namespace */
54 } zone_datasets_t;
55 
56 typedef struct zone_dataset {
57 	struct list_head zd_list;	/* zone_dataset linkage */
58 	size_t zd_dsnamelen;		/* length of name */
59 	char zd_dsname[];		/* name of the member dataset */
60 } zone_dataset_t;
61 
62 /*
63  * UID-based dataset zoning: allows delegating datasets to all user
64  * namespaces owned by a specific UID, enabling rootless container support.
65  */
66 typedef struct zone_uid_datasets {
67 	struct list_head zuds_list;	/* zone_uid_datasets linkage */
68 	kuid_t zuds_owner;		/* owner UID */
69 	struct list_head zuds_datasets;	/* datasets for this UID */
70 } zone_uid_datasets_t;
71 
72 static struct list_head zone_uid_datasets;
73 
74 #ifdef CONFIG_USER_NS
75 
76 /*
77  * Linux 6.18 moved the generic namespace type away from ns->ops->type onto
78  * ns_common itself.
79  */
80 #ifdef HAVE_NS_COMMON_TYPE
81 #define	ns_is_newuser(ns)	\
82 	((ns)->ns_type == CLONE_NEWUSER)
83 #else
84 #define	ns_is_newuser(ns)	\
85 	((ns)->ops != NULL && (ns)->ops->type == CLONE_NEWUSER)
86 #endif
87 
88 /*
89  * Returns:
90  * - 0 on success
91  * - EBADF if it cannot open the provided file descriptor
92  * - ENOTTY if the file itself is a not a user namespace file. We want to
93  *   intercept this error in the ZFS layer. We cannot just return one of the
94  *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
95  *   and the SPL layers.
96  */
97 static int
user_ns_get(int fd,struct user_namespace ** userns)98 user_ns_get(int fd, struct user_namespace **userns)
99 {
100 	struct kstatfs st;
101 	struct file *nsfile;
102 	struct ns_common *ns;
103 	int error;
104 
105 	if ((nsfile = fget(fd)) == NULL)
106 		return (EBADF);
107 	if (vfs_statfs(&nsfile->f_path, &st) != 0) {
108 		error = ENOTTY;
109 		goto done;
110 	}
111 	if (st.f_type != NSFS_MAGIC) {
112 		error = ENOTTY;
113 		goto done;
114 	}
115 	ns = get_proc_ns(file_inode(nsfile));
116 	if (!ns_is_newuser(ns)) {
117 		error = ENOTTY;
118 		goto done;
119 	}
120 	*userns = container_of(ns, struct user_namespace, ns);
121 
122 	error = 0;
123 done:
124 	fput(nsfile);
125 
126 	return (error);
127 }
128 #endif /* CONFIG_USER_NS */
129 
130 static unsigned int
user_ns_zoneid(struct user_namespace * user_ns)131 user_ns_zoneid(struct user_namespace *user_ns)
132 {
133 	unsigned int r;
134 
135 	r = user_ns->ns.inum;
136 
137 	return (r);
138 }
139 
140 static struct zone_datasets *
zone_datasets_lookup(unsigned int nsinum)141 zone_datasets_lookup(unsigned int nsinum)
142 {
143 	zone_datasets_t *zds;
144 
145 	list_for_each_entry(zds, &zone_datasets, zds_list) {
146 		if (user_ns_zoneid(zds->zds_userns) == nsinum)
147 			return (zds);
148 	}
149 	return (NULL);
150 }
151 
152 #ifdef CONFIG_USER_NS
153 static zone_uid_datasets_t *
zone_uid_datasets_lookup(kuid_t owner)154 zone_uid_datasets_lookup(kuid_t owner)
155 {
156 	zone_uid_datasets_t *zuds;
157 
158 	list_for_each_entry(zuds, &zone_uid_datasets, zuds_list) {
159 		if (uid_eq(zuds->zuds_owner, owner))
160 			return (zuds);
161 	}
162 	return (NULL);
163 }
164 
165 static struct zone_dataset *
zone_dataset_lookup(zone_datasets_t * zds,const char * dataset,size_t dsnamelen)166 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
167 {
168 	zone_dataset_t *zd;
169 
170 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
171 		if (zd->zd_dsnamelen != dsnamelen)
172 			continue;
173 		if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
174 			return (zd);
175 	}
176 
177 	return (NULL);
178 }
179 
180 static int
zone_dataset_cred_check(cred_t * cred)181 zone_dataset_cred_check(cred_t *cred)
182 {
183 
184 	if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
185 		return (EPERM);
186 
187 	return (0);
188 }
189 #endif /* CONFIG_USER_NS */
190 
191 static int
zone_dataset_name_check(const char * dataset,size_t * dsnamelen)192 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
193 {
194 
195 	if (dataset[0] == '\0' || dataset[0] == '/')
196 		return (ENOENT);
197 
198 	*dsnamelen = strlen(dataset);
199 	/* Ignore trailing slash, if supplied. */
200 	if (dataset[*dsnamelen - 1] == '/')
201 		(*dsnamelen)--;
202 
203 	return (0);
204 }
205 
206 int
zone_dataset_attach(cred_t * cred,const char * dataset,int userns_fd)207 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
208 {
209 #ifdef CONFIG_USER_NS
210 	struct user_namespace *userns;
211 	zone_datasets_t *zds;
212 	zone_dataset_t *zd;
213 	int error;
214 	size_t dsnamelen;
215 
216 	if ((error = zone_dataset_cred_check(cred)) != 0)
217 		return (error);
218 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
219 		return (error);
220 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
221 		return (error);
222 
223 	mutex_enter(&zone_datasets_lock);
224 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
225 	if (zds == NULL) {
226 		zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
227 		INIT_LIST_HEAD(&zds->zds_list);
228 		INIT_LIST_HEAD(&zds->zds_datasets);
229 		zds->zds_userns = userns;
230 		/*
231 		 * Lock the namespace by incresing its refcount to prevent
232 		 * the namespace ID from being reused.
233 		 */
234 		get_user_ns(userns);
235 		list_add_tail(&zds->zds_list, &zone_datasets);
236 	} else {
237 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
238 		if (zd != NULL) {
239 			mutex_exit(&zone_datasets_lock);
240 			return (EEXIST);
241 		}
242 	}
243 
244 	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
245 	zd->zd_dsnamelen = dsnamelen;
246 	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
247 	INIT_LIST_HEAD(&zd->zd_list);
248 	list_add_tail(&zd->zd_list, &zds->zds_datasets);
249 
250 	mutex_exit(&zone_datasets_lock);
251 	return (0);
252 #else
253 	return (ENXIO);
254 #endif /* CONFIG_USER_NS */
255 }
256 EXPORT_SYMBOL(zone_dataset_attach);
257 
258 int
zone_dataset_attach_uid(cred_t * cred,const char * dataset,uid_t owner_uid)259 zone_dataset_attach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
260 {
261 #ifdef CONFIG_USER_NS
262 	zone_uid_datasets_t *zuds;
263 	zone_dataset_t *zd;
264 	int error;
265 	size_t dsnamelen;
266 	kuid_t kowner;
267 
268 	/* Only root can attach datasets to UIDs */
269 	if ((error = zone_dataset_cred_check(cred)) != 0)
270 		return (error);
271 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
272 		return (error);
273 
274 	kowner = make_kuid(current_user_ns(), owner_uid);
275 	if (!uid_valid(kowner))
276 		return (EINVAL);
277 
278 	mutex_enter(&zone_datasets_lock);
279 
280 	/* Find or create UID entry */
281 	zuds = zone_uid_datasets_lookup(kowner);
282 	if (zuds == NULL) {
283 		zuds = kmem_alloc(sizeof (zone_uid_datasets_t), KM_SLEEP);
284 		INIT_LIST_HEAD(&zuds->zuds_list);
285 		INIT_LIST_HEAD(&zuds->zuds_datasets);
286 		zuds->zuds_owner = kowner;
287 		list_add_tail(&zuds->zuds_list, &zone_uid_datasets);
288 	} else {
289 		/* Check if dataset already attached */
290 		list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
291 			if (zd->zd_dsnamelen == dsnamelen &&
292 			    strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
293 				mutex_exit(&zone_datasets_lock);
294 				return (EEXIST);
295 			}
296 		}
297 	}
298 
299 	/* Add dataset to UID's list */
300 	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
301 	zd->zd_dsnamelen = dsnamelen;
302 	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
303 	INIT_LIST_HEAD(&zd->zd_list);
304 	list_add_tail(&zd->zd_list, &zuds->zuds_datasets);
305 
306 	mutex_exit(&zone_datasets_lock);
307 	return (0);
308 #else
309 	return (ENXIO);
310 #endif /* CONFIG_USER_NS */
311 }
312 EXPORT_SYMBOL(zone_dataset_attach_uid);
313 
314 int
zone_dataset_detach(cred_t * cred,const char * dataset,int userns_fd)315 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
316 {
317 #ifdef CONFIG_USER_NS
318 	struct user_namespace *userns;
319 	zone_datasets_t *zds;
320 	zone_dataset_t *zd;
321 	int error;
322 	size_t dsnamelen;
323 
324 	if ((error = zone_dataset_cred_check(cred)) != 0)
325 		return (error);
326 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
327 		return (error);
328 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
329 		return (error);
330 
331 	mutex_enter(&zone_datasets_lock);
332 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
333 	if (zds != NULL)
334 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
335 	if (zds == NULL || zd == NULL) {
336 		mutex_exit(&zone_datasets_lock);
337 		return (ENOENT);
338 	}
339 
340 	list_del(&zd->zd_list);
341 	kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
342 
343 	/* Prune the namespace entry if it has no more delegations. */
344 	if (list_empty(&zds->zds_datasets)) {
345 		/*
346 		 * Decrease the refcount now that the namespace is no longer
347 		 * used. It is no longer necessary to prevent the namespace ID
348 		 * from being reused.
349 		 */
350 		put_user_ns(userns);
351 		list_del(&zds->zds_list);
352 		kmem_free(zds, sizeof (*zds));
353 	}
354 
355 	mutex_exit(&zone_datasets_lock);
356 	return (0);
357 #else
358 	return (ENXIO);
359 #endif /* CONFIG_USER_NS */
360 }
361 EXPORT_SYMBOL(zone_dataset_detach);
362 
363 int
zone_dataset_detach_uid(cred_t * cred,const char * dataset,uid_t owner_uid)364 zone_dataset_detach_uid(cred_t *cred, const char *dataset, uid_t owner_uid)
365 {
366 #ifdef CONFIG_USER_NS
367 	zone_uid_datasets_t *zuds;
368 	zone_dataset_t *zd;
369 	int error;
370 	size_t dsnamelen;
371 	kuid_t kowner;
372 
373 	if ((error = zone_dataset_cred_check(cred)) != 0)
374 		return (error);
375 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
376 		return (error);
377 
378 	kowner = make_kuid(current_user_ns(), owner_uid);
379 	if (!uid_valid(kowner))
380 		return (EINVAL);
381 
382 	mutex_enter(&zone_datasets_lock);
383 
384 	zuds = zone_uid_datasets_lookup(kowner);
385 	if (zuds == NULL) {
386 		mutex_exit(&zone_datasets_lock);
387 		return (ENOENT);
388 	}
389 
390 	/* Find and remove dataset */
391 	list_for_each_entry(zd, &zuds->zuds_datasets, zd_list) {
392 		if (zd->zd_dsnamelen == dsnamelen &&
393 		    strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) {
394 			list_del(&zd->zd_list);
395 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
396 
397 			/* Remove UID entry if no more datasets */
398 			if (list_empty(&zuds->zuds_datasets)) {
399 				list_del(&zuds->zuds_list);
400 				kmem_free(zuds, sizeof (*zuds));
401 			}
402 
403 			mutex_exit(&zone_datasets_lock);
404 			return (0);
405 		}
406 	}
407 
408 	mutex_exit(&zone_datasets_lock);
409 	return (ENOENT);
410 #else
411 	return (ENXIO);
412 #endif /* CONFIG_USER_NS */
413 }
414 EXPORT_SYMBOL(zone_dataset_detach_uid);
415 
416 /*
417  * Callback for looking up zoned_uid property (registered by ZFS module).
418  */
419 static zone_get_zoned_uid_fn_t zone_get_zoned_uid_fn = NULL;
420 
421 void
zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)422 zone_register_zoned_uid_callback(zone_get_zoned_uid_fn_t fn)
423 {
424 	zone_get_zoned_uid_fn = fn;
425 }
426 EXPORT_SYMBOL(zone_register_zoned_uid_callback);
427 
428 void
zone_unregister_zoned_uid_callback(void)429 zone_unregister_zoned_uid_callback(void)
430 {
431 	zone_get_zoned_uid_fn = NULL;
432 }
433 EXPORT_SYMBOL(zone_unregister_zoned_uid_callback);
434 
435 #ifdef CONFIG_USER_NS
436 /*
437  * Check if a dataset is the delegation root (has zoned_uid set locally).
438  */
439 static boolean_t
zone_dataset_is_zoned_uid_root(const char * dataset,uid_t zoned_uid)440 zone_dataset_is_zoned_uid_root(const char *dataset, uid_t zoned_uid)
441 {
442 	char *root;
443 	uid_t found_uid;
444 	boolean_t is_root;
445 
446 	if (zone_get_zoned_uid_fn == NULL)
447 		return (B_FALSE);
448 
449 	root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
450 	found_uid = zone_get_zoned_uid_fn(dataset, root, MAXPATHLEN);
451 	is_root = (found_uid == zoned_uid && strcmp(root, dataset) == 0);
452 	kmem_free(root, MAXPATHLEN);
453 	return (is_root);
454 }
455 #endif /* CONFIG_USER_NS */
456 
457 /*
458  * Core authorization check for zoned_uid write delegation.
459  */
460 zone_admin_result_t
zone_dataset_admin_check(const char * dataset,zone_uid_op_t op,const char * aux_dataset)461 zone_dataset_admin_check(const char *dataset, zone_uid_op_t op,
462     const char *aux_dataset)
463 {
464 #ifdef CONFIG_USER_NS
465 	struct user_namespace *user_ns;
466 	char *delegation_root;
467 	uid_t zoned_uid, ns_owner_uid;
468 	int write_unused;
469 	zone_admin_result_t result = ZONE_ADMIN_NOT_APPLICABLE;
470 
471 	/* Step 1: If in global zone, not applicable */
472 	if (INGLOBALZONE(curproc))
473 		return (ZONE_ADMIN_NOT_APPLICABLE);
474 
475 	/* Step 2: Need callback to be registered */
476 	if (zone_get_zoned_uid_fn == NULL)
477 		return (ZONE_ADMIN_NOT_APPLICABLE);
478 
479 	delegation_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
480 
481 	/* Step 3: Find delegation root */
482 	zoned_uid = zone_get_zoned_uid_fn(dataset, delegation_root,
483 	    MAXPATHLEN);
484 	if (zoned_uid == 0)
485 		goto out;
486 
487 	/* Step 4: Verify namespace owner matches */
488 	user_ns = current_user_ns();
489 	ns_owner_uid = from_kuid(&init_user_ns, user_ns->owner);
490 	if (ns_owner_uid != zoned_uid)
491 		goto out;
492 
493 	/* Step 5: Tiered capability check based on operation class */
494 	{
495 		int required_cap;
496 		switch (op) {
497 		case ZONE_OP_DESTROY:
498 		case ZONE_OP_RENAME:
499 		case ZONE_OP_CLONE:
500 			required_cap = CAP_SYS_ADMIN;
501 			break;
502 		case ZONE_OP_CREATE:
503 		case ZONE_OP_SNAPSHOT:
504 		case ZONE_OP_SETPROP:
505 			required_cap = CAP_FOWNER;
506 			break;
507 		default:
508 			required_cap = CAP_SYS_ADMIN;
509 			break;
510 		}
511 		if (!ns_capable(user_ns, required_cap)) {
512 			result = ZONE_ADMIN_DENIED;
513 			goto out;
514 		}
515 	}
516 
517 	/* Step 6: Operation-specific constraints */
518 	switch (op) {
519 	case ZONE_OP_DESTROY:
520 		/* Cannot destroy the delegation root itself */
521 		if (zone_dataset_is_zoned_uid_root(dataset, zoned_uid)) {
522 			result = ZONE_ADMIN_DENIED;
523 			goto out;
524 		}
525 		break;
526 
527 	case ZONE_OP_RENAME:
528 		/* Cannot rename outside delegation subtree */
529 		if (aux_dataset != NULL) {
530 			char *dst_root;
531 			uid_t dst_uid;
532 
533 			dst_root = kmem_alloc(MAXPATHLEN, KM_SLEEP);
534 			dst_uid = zone_get_zoned_uid_fn(aux_dataset,
535 			    dst_root, MAXPATHLEN);
536 			if (dst_uid != zoned_uid ||
537 			    strcmp(dst_root, delegation_root) != 0) {
538 				kmem_free(dst_root, MAXPATHLEN);
539 				result = ZONE_ADMIN_DENIED;
540 				goto out;
541 			}
542 			kmem_free(dst_root, MAXPATHLEN);
543 		}
544 		break;
545 
546 	case ZONE_OP_CLONE:
547 		/* Clone source must be visible */
548 		if (aux_dataset != NULL) {
549 			if (!zone_dataset_visible(aux_dataset, &write_unused)) {
550 				result = ZONE_ADMIN_DENIED;
551 				goto out;
552 			}
553 		}
554 		break;
555 
556 	case ZONE_OP_CREATE:
557 	case ZONE_OP_SNAPSHOT:
558 	case ZONE_OP_SETPROP:
559 		/* No additional constraints */
560 		break;
561 	}
562 
563 	result = ZONE_ADMIN_ALLOWED;
564 out:
565 	kmem_free(delegation_root, MAXPATHLEN);
566 	return (result);
567 #else
568 	(void) dataset, (void) op, (void) aux_dataset;
569 	return (ZONE_ADMIN_NOT_APPLICABLE);
570 #endif
571 }
572 EXPORT_SYMBOL(zone_dataset_admin_check);
573 
574 /*
575  * A dataset is visible if:
576  * - It is a parent of a namespace entry.
577  * - It is one of the namespace entries.
578  * - It is a child of a namespace entry.
579  *
580  * A dataset is writable if:
581  * - It is one of the namespace entries.
582  * - It is a child of a namespace entry.
583  *
584  * The parent datasets of namespace entries are visible and
585  * read-only to provide a path back to the root of the pool.
586  */
587 /*
588  * Helper function to check if a dataset matches against a list of
589  * delegated datasets. Returns visibility and sets write permission.
590  */
591 static int
zone_dataset_check_list(struct list_head * datasets,const char * dataset,size_t dsnamelen,int * write)592 zone_dataset_check_list(struct list_head *datasets, const char *dataset,
593     size_t dsnamelen, int *write)
594 {
595 	zone_dataset_t *zd;
596 	size_t zd_len;
597 	int visible = 0;
598 
599 	list_for_each_entry(zd, datasets, zd_list) {
600 		zd_len = strlen(zd->zd_dsname);
601 		if (zd_len > dsnamelen) {
602 			/*
603 			 * The name of the namespace entry is longer than that
604 			 * of the dataset, so it could be that the dataset is a
605 			 * parent of the namespace entry.
606 			 */
607 			visible = memcmp(zd->zd_dsname, dataset,
608 			    dsnamelen) == 0 &&
609 			    zd->zd_dsname[dsnamelen] == '/';
610 			if (visible)
611 				break;
612 		} else if (zd_len == dsnamelen) {
613 			/*
614 			 * The name of the namespace entry is as long as that
615 			 * of the dataset, so perhaps the dataset itself is the
616 			 * namespace entry.
617 			 */
618 			visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
619 			if (visible) {
620 				if (write != NULL)
621 					*write = 1;
622 				break;
623 			}
624 		} else {
625 			/*
626 			 * The name of the namespace entry is shorter than that
627 			 * of the dataset, so perhaps the dataset is a child of
628 			 * the namespace entry.
629 			 */
630 			visible = memcmp(zd->zd_dsname, dataset,
631 			    zd_len) == 0 && (dataset[zd_len] == '/' ||
632 			    dataset[zd_len] == '@' || dataset[zd_len] == '#');
633 			if (visible) {
634 				if (write != NULL)
635 					*write = 1;
636 				break;
637 			}
638 		}
639 	}
640 
641 	return (visible);
642 }
643 
644 #if defined(CONFIG_USER_NS)
645 /*
646  * Check UID-based zoning visibility for the current process.
647  * Must be called with zone_datasets_lock held.
648  */
649 static int
zone_dataset_visible_uid(const char * dataset,size_t dsnamelen,int * write)650 zone_dataset_visible_uid(const char *dataset, size_t dsnamelen, int *write)
651 {
652 	zone_uid_datasets_t *zuds;
653 
654 	zuds = zone_uid_datasets_lookup(curproc->cred->user_ns->owner);
655 	if (zuds != NULL)
656 		return (zone_dataset_check_list(&zuds->zuds_datasets, dataset,
657 		    dsnamelen, write));
658 	return (0);
659 }
660 #endif
661 
662 int
zone_dataset_visible(const char * dataset,int * write)663 zone_dataset_visible(const char *dataset, int *write)
664 {
665 	zone_datasets_t *zds;
666 	size_t dsnamelen;
667 	int visible;
668 
669 	/* Default to read-only, in case visible is returned. */
670 	if (write != NULL)
671 		*write = 0;
672 	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
673 		return (0);
674 	if (INGLOBALZONE(curproc)) {
675 		if (write != NULL)
676 			*write = 1;
677 		return (1);
678 	}
679 
680 	mutex_enter(&zone_datasets_lock);
681 
682 	/* First, check namespace-specific zoning (existing behavior) */
683 	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
684 	if (zds != NULL) {
685 		visible = zone_dataset_check_list(&zds->zds_datasets, dataset,
686 		    dsnamelen, write);
687 		if (visible) {
688 			mutex_exit(&zone_datasets_lock);
689 			return (visible);
690 		}
691 	}
692 
693 	/* Second, check UID-based zoning */
694 #if defined(CONFIG_USER_NS)
695 	visible = zone_dataset_visible_uid(dataset, dsnamelen, write);
696 	if (visible) {
697 		mutex_exit(&zone_datasets_lock);
698 		return (visible);
699 	}
700 #endif
701 
702 	mutex_exit(&zone_datasets_lock);
703 	return (0);
704 }
705 EXPORT_SYMBOL(zone_dataset_visible);
706 
707 unsigned int
global_zoneid(void)708 global_zoneid(void)
709 {
710 	unsigned int z = 0;
711 
712 #if defined(CONFIG_USER_NS)
713 	z = user_ns_zoneid(&init_user_ns);
714 #endif
715 
716 	return (z);
717 }
718 EXPORT_SYMBOL(global_zoneid);
719 
720 unsigned int
crgetzoneid(const cred_t * cr)721 crgetzoneid(const cred_t *cr)
722 {
723 	unsigned int r = 0;
724 
725 #if defined(CONFIG_USER_NS)
726 	r = user_ns_zoneid(cr->user_ns);
727 #endif
728 
729 	return (r);
730 }
731 EXPORT_SYMBOL(crgetzoneid);
732 
733 boolean_t
inglobalzone(proc_t * proc)734 inglobalzone(proc_t *proc)
735 {
736 	(void) proc;
737 #if defined(CONFIG_USER_NS)
738 	return (current_user_ns() == &init_user_ns);
739 #else
740 	return (B_TRUE);
741 #endif
742 }
743 EXPORT_SYMBOL(inglobalzone);
744 
745 int
spl_zone_init(void)746 spl_zone_init(void)
747 {
748 	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
749 	INIT_LIST_HEAD(&zone_datasets);
750 	INIT_LIST_HEAD(&zone_uid_datasets);
751 	return (0);
752 }
753 
754 void
spl_zone_fini(void)755 spl_zone_fini(void)
756 {
757 	zone_datasets_t *zds;
758 	zone_uid_datasets_t *zuds;
759 	zone_dataset_t *zd;
760 
761 	/*
762 	 * It would be better to assert an empty zone_datasets, but since
763 	 * there's no automatic mechanism for cleaning them up if the user
764 	 * namespace is destroyed, just do it here, since spl is about to go
765 	 * out of context.
766 	 */
767 
768 	/* Clean up UID-based delegations */
769 	while (!list_empty(&zone_uid_datasets)) {
770 		zuds = list_entry(zone_uid_datasets.next,
771 		    zone_uid_datasets_t, zuds_list);
772 		while (!list_empty(&zuds->zuds_datasets)) {
773 			zd = list_entry(zuds->zuds_datasets.next,
774 			    zone_dataset_t, zd_list);
775 			list_del(&zd->zd_list);
776 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
777 		}
778 		list_del(&zuds->zuds_list);
779 		kmem_free(zuds, sizeof (*zuds));
780 	}
781 
782 	/* Clean up namespace-based delegations */
783 	while (!list_empty(&zone_datasets)) {
784 		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
785 		while (!list_empty(&zds->zds_datasets)) {
786 			zd = list_entry(zds->zds_datasets.next,
787 			    zone_dataset_t, zd_list);
788 			list_del(&zd->zd_list);
789 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
790 		}
791 		put_user_ns(zds->zds_userns);
792 		list_del(&zds->zds_list);
793 		kmem_free(zds, sizeof (*zds));
794 	}
795 	mutex_destroy(&zone_datasets_lock);
796 }
797