xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c (revision d5b0e70f7e04d971691517ce1304d86a1e367e2e)
1 /*
2  * Copyright (c) 2021 Klara Systems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/mutex.h>
29 #include <sys/sysmacros.h>
30 #include <sys/kmem.h>
31 #include <linux/file.h>
32 #include <linux/magic.h>
33 #include <sys/zone.h>
34 
35 #if defined(CONFIG_USER_NS)
36 #include <linux/statfs.h>
37 #include <linux/proc_ns.h>
38 #endif
39 
40 static kmutex_t zone_datasets_lock;
41 static struct list_head zone_datasets;
42 
43 typedef struct zone_datasets {
44 	struct list_head zds_list;	/* zone_datasets linkage */
45 	struct user_namespace *zds_userns; /* namespace reference */
46 	struct list_head zds_datasets;	/* datasets for the namespace */
47 } zone_datasets_t;
48 
49 typedef struct zone_dataset {
50 	struct list_head zd_list;	/* zone_dataset linkage */
51 	size_t zd_dsnamelen;		/* length of name */
52 	char zd_dsname[0];		/* name of the member dataset */
53 } zone_dataset_t;
54 
55 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
56 /*
57  * Returns:
58  * - 0 on success
59  * - EBADF if it cannot open the provided file descriptor
60  * - ENOTTY if the file itself is a not a user namespace file. We want to
61  *   intercept this error in the ZFS layer. We cannot just return one of the
62  *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
63  *   and the SPL layers.
64  */
65 static int
66 user_ns_get(int fd, struct user_namespace **userns)
67 {
68 	struct kstatfs st;
69 	struct file *nsfile;
70 	struct ns_common *ns;
71 	int error;
72 
73 	if ((nsfile = fget(fd)) == NULL)
74 		return (EBADF);
75 	if (vfs_statfs(&nsfile->f_path, &st) != 0) {
76 		error = ENOTTY;
77 		goto done;
78 	}
79 	if (st.f_type != NSFS_MAGIC) {
80 		error = ENOTTY;
81 		goto done;
82 	}
83 	ns = get_proc_ns(file_inode(nsfile));
84 	if (ns->ops->type != CLONE_NEWUSER) {
85 		error = ENOTTY;
86 		goto done;
87 	}
88 	*userns = container_of(ns, struct user_namespace, ns);
89 
90 	error = 0;
91 done:
92 	fput(nsfile);
93 
94 	return (error);
95 }
96 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
97 
98 static unsigned int
99 user_ns_zoneid(struct user_namespace *user_ns)
100 {
101 	unsigned int r;
102 
103 #if defined(HAVE_USER_NS_COMMON_INUM)
104 	r = user_ns->ns.inum;
105 #else
106 	r = user_ns->proc_inum;
107 #endif
108 
109 	return (r);
110 }
111 
112 static struct zone_datasets *
113 zone_datasets_lookup(unsigned int nsinum)
114 {
115 	zone_datasets_t *zds;
116 
117 	list_for_each_entry(zds, &zone_datasets, zds_list) {
118 		if (user_ns_zoneid(zds->zds_userns) == nsinum)
119 			return (zds);
120 	}
121 	return (NULL);
122 }
123 
124 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
125 static struct zone_dataset *
126 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
127 {
128 	zone_dataset_t *zd;
129 
130 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
131 		if (zd->zd_dsnamelen != dsnamelen)
132 			continue;
133 		if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
134 			return (zd);
135 	}
136 
137 	return (NULL);
138 }
139 
140 static int
141 zone_dataset_cred_check(cred_t *cred)
142 {
143 
144 	if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
145 		return (EPERM);
146 
147 	return (0);
148 }
149 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
150 
151 static int
152 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
153 {
154 
155 	if (dataset[0] == '\0' || dataset[0] == '/')
156 		return (ENOENT);
157 
158 	*dsnamelen = strlen(dataset);
159 	/* Ignore trailing slash, if supplied. */
160 	if (dataset[*dsnamelen - 1] == '/')
161 		(*dsnamelen)--;
162 
163 	return (0);
164 }
165 
166 int
167 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
168 {
169 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
170 	struct user_namespace *userns;
171 	zone_datasets_t *zds;
172 	zone_dataset_t *zd;
173 	int error;
174 	size_t dsnamelen;
175 
176 	if ((error = zone_dataset_cred_check(cred)) != 0)
177 		return (error);
178 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
179 		return (error);
180 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
181 		return (error);
182 
183 	mutex_enter(&zone_datasets_lock);
184 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
185 	if (zds == NULL) {
186 		zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
187 		INIT_LIST_HEAD(&zds->zds_list);
188 		INIT_LIST_HEAD(&zds->zds_datasets);
189 		zds->zds_userns = userns;
190 		/*
191 		 * Lock the namespace by incresing its refcount to prevent
192 		 * the namespace ID from being reused.
193 		 */
194 		get_user_ns(userns);
195 		list_add_tail(&zds->zds_list, &zone_datasets);
196 	} else {
197 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
198 		if (zd != NULL) {
199 			mutex_exit(&zone_datasets_lock);
200 			return (EEXIST);
201 		}
202 	}
203 
204 	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
205 	zd->zd_dsnamelen = dsnamelen;
206 	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
207 	INIT_LIST_HEAD(&zd->zd_list);
208 	list_add_tail(&zd->zd_list, &zds->zds_datasets);
209 
210 	mutex_exit(&zone_datasets_lock);
211 	return (0);
212 #else
213 	return (ENXIO);
214 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
215 }
216 EXPORT_SYMBOL(zone_dataset_attach);
217 
218 int
219 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
220 {
221 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
222 	struct user_namespace *userns;
223 	zone_datasets_t *zds;
224 	zone_dataset_t *zd;
225 	int error;
226 	size_t dsnamelen;
227 
228 	if ((error = zone_dataset_cred_check(cred)) != 0)
229 		return (error);
230 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
231 		return (error);
232 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
233 		return (error);
234 
235 	mutex_enter(&zone_datasets_lock);
236 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
237 	if (zds != NULL)
238 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
239 	if (zds == NULL || zd == NULL) {
240 		mutex_exit(&zone_datasets_lock);
241 		return (ENOENT);
242 	}
243 
244 	list_del(&zd->zd_list);
245 	kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
246 
247 	/* Prune the namespace entry if it has no more delegations. */
248 	if (list_empty(&zds->zds_datasets)) {
249 		/*
250 		 * Decrease the refcount now that the namespace is no longer
251 		 * used. It is no longer necessary to prevent the namespace ID
252 		 * from being reused.
253 		 */
254 		put_user_ns(userns);
255 		list_del(&zds->zds_list);
256 		kmem_free(zds, sizeof (*zds));
257 	}
258 
259 	mutex_exit(&zone_datasets_lock);
260 	return (0);
261 #else
262 	return (ENXIO);
263 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
264 }
265 EXPORT_SYMBOL(zone_dataset_detach);
266 
267 /*
268  * A dataset is visible if:
269  * - It is a parent of a namespace entry.
270  * - It is one of the namespace entries.
271  * - It is a child of a namespace entry.
272  *
273  * A dataset is writable if:
274  * - It is one of the namespace entries.
275  * - It is a child of a namespace entry.
276  *
277  * The parent datasets of namespace entries are visible and
278  * read-only to provide a path back to the root of the pool.
279  */
280 int
281 zone_dataset_visible(const char *dataset, int *write)
282 {
283 	zone_datasets_t *zds;
284 	zone_dataset_t *zd;
285 	size_t dsnamelen, zd_len;
286 	int visible;
287 
288 	/* Default to read-only, in case visible is returned. */
289 	if (write != NULL)
290 		*write = 0;
291 	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
292 		return (0);
293 	if (INGLOBALZONE(curproc)) {
294 		if (write != NULL)
295 			*write = 1;
296 		return (1);
297 	}
298 
299 	mutex_enter(&zone_datasets_lock);
300 	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
301 	if (zds == NULL) {
302 		mutex_exit(&zone_datasets_lock);
303 		return (0);
304 	}
305 
306 	visible = 0;
307 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
308 		zd_len = strlen(zd->zd_dsname);
309 		if (zd_len > dsnamelen) {
310 			/*
311 			 * The name of the namespace entry is longer than that
312 			 * of the dataset, so it could be that the dataset is a
313 			 * parent of the namespace entry.
314 			 */
315 			visible = memcmp(zd->zd_dsname, dataset,
316 			    dsnamelen) == 0 &&
317 			    zd->zd_dsname[dsnamelen] == '/';
318 			if (visible)
319 				break;
320 		} else if (zd_len == dsnamelen) {
321 			/*
322 			 * The name of the namespace entry is as long as that
323 			 * of the dataset, so perhaps the dataset itself is the
324 			 * namespace entry.
325 			 */
326 			visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
327 			if (visible) {
328 				if (write != NULL)
329 					*write = 1;
330 				break;
331 			}
332 		} else {
333 			/*
334 			 * The name of the namespace entry is shorter than that
335 			 * of the dataset, so perhaps the dataset is a child of
336 			 * the namespace entry.
337 			 */
338 			visible = memcmp(zd->zd_dsname, dataset,
339 			    zd_len) == 0 && dataset[zd_len] == '/';
340 			if (visible) {
341 				if (write != NULL)
342 					*write = 1;
343 				break;
344 			}
345 		}
346 	}
347 
348 	mutex_exit(&zone_datasets_lock);
349 	return (visible);
350 }
351 EXPORT_SYMBOL(zone_dataset_visible);
352 
353 unsigned int
354 global_zoneid(void)
355 {
356 	unsigned int z = 0;
357 
358 #if defined(CONFIG_USER_NS)
359 	z = user_ns_zoneid(&init_user_ns);
360 #endif
361 
362 	return (z);
363 }
364 EXPORT_SYMBOL(global_zoneid);
365 
366 unsigned int
367 crgetzoneid(const cred_t *cr)
368 {
369 	unsigned int r = 0;
370 
371 #if defined(CONFIG_USER_NS)
372 	r = user_ns_zoneid(cr->user_ns);
373 #endif
374 
375 	return (r);
376 }
377 EXPORT_SYMBOL(crgetzoneid);
378 
379 boolean_t
380 inglobalzone(proc_t *proc)
381 {
382 #if defined(CONFIG_USER_NS)
383 	return (proc->cred->user_ns == &init_user_ns);
384 #else
385 	return (B_TRUE);
386 #endif
387 }
388 EXPORT_SYMBOL(inglobalzone);
389 
390 int
391 spl_zone_init(void)
392 {
393 	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
394 	INIT_LIST_HEAD(&zone_datasets);
395 	return (0);
396 }
397 
398 void
399 spl_zone_fini(void)
400 {
401 	zone_datasets_t *zds;
402 	zone_dataset_t *zd;
403 
404 	/*
405 	 * It would be better to assert an empty zone_datasets, but since
406 	 * there's no automatic mechanism for cleaning them up if the user
407 	 * namespace is destroyed, just do it here, since spl is about to go
408 	 * out of context.
409 	 */
410 	while (!list_empty(&zone_datasets)) {
411 		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
412 		while (!list_empty(&zds->zds_datasets)) {
413 			zd = list_entry(zds->zds_datasets.next,
414 			    zone_dataset_t, zd_list);
415 			list_del(&zd->zd_list);
416 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
417 		}
418 		put_user_ns(zds->zds_userns);
419 		list_del(&zds->zds_list);
420 		kmem_free(zds, sizeof (*zds));
421 	}
422 	mutex_destroy(&zone_datasets_lock);
423 }
424