xref: /freebsd/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c (revision 87b759f0fa1f7554d50ce640c40138512bbded44)
1 /*
2  * Copyright (c) 2021 Klara Systems, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/sysmacros.h>
29 #include <sys/kmem.h>
30 #include <linux/file.h>
31 #include <linux/magic.h>
32 #include <sys/zone.h>
33 #include <sys/string.h>
34 
35 #if defined(CONFIG_USER_NS)
36 #include <linux/statfs.h>
37 #include <linux/proc_ns.h>
38 #endif
39 
40 #include <sys/mutex.h>
41 
42 static kmutex_t zone_datasets_lock;
43 static struct list_head zone_datasets;
44 
45 typedef struct zone_datasets {
46 	struct list_head zds_list;	/* zone_datasets linkage */
47 	struct user_namespace *zds_userns; /* namespace reference */
48 	struct list_head zds_datasets;	/* datasets for the namespace */
49 } zone_datasets_t;
50 
51 typedef struct zone_dataset {
52 	struct list_head zd_list;	/* zone_dataset linkage */
53 	size_t zd_dsnamelen;		/* length of name */
54 	char zd_dsname[];		/* name of the member dataset */
55 } zone_dataset_t;
56 
57 #ifdef CONFIG_USER_NS
58 /*
59  * Returns:
60  * - 0 on success
61  * - EBADF if it cannot open the provided file descriptor
62  * - ENOTTY if the file itself is a not a user namespace file. We want to
63  *   intercept this error in the ZFS layer. We cannot just return one of the
64  *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
65  *   and the SPL layers.
66  */
67 static int
68 user_ns_get(int fd, struct user_namespace **userns)
69 {
70 	struct kstatfs st;
71 	struct file *nsfile;
72 	struct ns_common *ns;
73 	int error;
74 
75 	if ((nsfile = fget(fd)) == NULL)
76 		return (EBADF);
77 	if (vfs_statfs(&nsfile->f_path, &st) != 0) {
78 		error = ENOTTY;
79 		goto done;
80 	}
81 	if (st.f_type != NSFS_MAGIC) {
82 		error = ENOTTY;
83 		goto done;
84 	}
85 	ns = get_proc_ns(file_inode(nsfile));
86 	if (ns->ops->type != CLONE_NEWUSER) {
87 		error = ENOTTY;
88 		goto done;
89 	}
90 	*userns = container_of(ns, struct user_namespace, ns);
91 
92 	error = 0;
93 done:
94 	fput(nsfile);
95 
96 	return (error);
97 }
98 #endif /* CONFIG_USER_NS */
99 
100 static unsigned int
101 user_ns_zoneid(struct user_namespace *user_ns)
102 {
103 	unsigned int r;
104 
105 	r = user_ns->ns.inum;
106 
107 	return (r);
108 }
109 
110 static struct zone_datasets *
111 zone_datasets_lookup(unsigned int nsinum)
112 {
113 	zone_datasets_t *zds;
114 
115 	list_for_each_entry(zds, &zone_datasets, zds_list) {
116 		if (user_ns_zoneid(zds->zds_userns) == nsinum)
117 			return (zds);
118 	}
119 	return (NULL);
120 }
121 
122 #ifdef CONFIG_USER_NS
123 static struct zone_dataset *
124 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
125 {
126 	zone_dataset_t *zd;
127 
128 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
129 		if (zd->zd_dsnamelen != dsnamelen)
130 			continue;
131 		if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
132 			return (zd);
133 	}
134 
135 	return (NULL);
136 }
137 
138 static int
139 zone_dataset_cred_check(cred_t *cred)
140 {
141 
142 	if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
143 		return (EPERM);
144 
145 	return (0);
146 }
147 #endif /* CONFIG_USER_NS */
148 
149 static int
150 zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
151 {
152 
153 	if (dataset[0] == '\0' || dataset[0] == '/')
154 		return (ENOENT);
155 
156 	*dsnamelen = strlen(dataset);
157 	/* Ignore trailing slash, if supplied. */
158 	if (dataset[*dsnamelen - 1] == '/')
159 		(*dsnamelen)--;
160 
161 	return (0);
162 }
163 
164 int
165 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
166 {
167 #ifdef CONFIG_USER_NS
168 	struct user_namespace *userns;
169 	zone_datasets_t *zds;
170 	zone_dataset_t *zd;
171 	int error;
172 	size_t dsnamelen;
173 
174 	if ((error = zone_dataset_cred_check(cred)) != 0)
175 		return (error);
176 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
177 		return (error);
178 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
179 		return (error);
180 
181 	mutex_enter(&zone_datasets_lock);
182 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
183 	if (zds == NULL) {
184 		zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
185 		INIT_LIST_HEAD(&zds->zds_list);
186 		INIT_LIST_HEAD(&zds->zds_datasets);
187 		zds->zds_userns = userns;
188 		/*
189 		 * Lock the namespace by incresing its refcount to prevent
190 		 * the namespace ID from being reused.
191 		 */
192 		get_user_ns(userns);
193 		list_add_tail(&zds->zds_list, &zone_datasets);
194 	} else {
195 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
196 		if (zd != NULL) {
197 			mutex_exit(&zone_datasets_lock);
198 			return (EEXIST);
199 		}
200 	}
201 
202 	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
203 	zd->zd_dsnamelen = dsnamelen;
204 	strlcpy(zd->zd_dsname, dataset, dsnamelen + 1);
205 	INIT_LIST_HEAD(&zd->zd_list);
206 	list_add_tail(&zd->zd_list, &zds->zds_datasets);
207 
208 	mutex_exit(&zone_datasets_lock);
209 	return (0);
210 #else
211 	return (ENXIO);
212 #endif /* CONFIG_USER_NS */
213 }
214 EXPORT_SYMBOL(zone_dataset_attach);
215 
216 int
217 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
218 {
219 #ifdef CONFIG_USER_NS
220 	struct user_namespace *userns;
221 	zone_datasets_t *zds;
222 	zone_dataset_t *zd;
223 	int error;
224 	size_t dsnamelen;
225 
226 	if ((error = zone_dataset_cred_check(cred)) != 0)
227 		return (error);
228 	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
229 		return (error);
230 	if ((error = user_ns_get(userns_fd, &userns)) != 0)
231 		return (error);
232 
233 	mutex_enter(&zone_datasets_lock);
234 	zds = zone_datasets_lookup(user_ns_zoneid(userns));
235 	if (zds != NULL)
236 		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
237 	if (zds == NULL || zd == NULL) {
238 		mutex_exit(&zone_datasets_lock);
239 		return (ENOENT);
240 	}
241 
242 	list_del(&zd->zd_list);
243 	kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
244 
245 	/* Prune the namespace entry if it has no more delegations. */
246 	if (list_empty(&zds->zds_datasets)) {
247 		/*
248 		 * Decrease the refcount now that the namespace is no longer
249 		 * used. It is no longer necessary to prevent the namespace ID
250 		 * from being reused.
251 		 */
252 		put_user_ns(userns);
253 		list_del(&zds->zds_list);
254 		kmem_free(zds, sizeof (*zds));
255 	}
256 
257 	mutex_exit(&zone_datasets_lock);
258 	return (0);
259 #else
260 	return (ENXIO);
261 #endif /* CONFIG_USER_NS */
262 }
263 EXPORT_SYMBOL(zone_dataset_detach);
264 
265 /*
266  * A dataset is visible if:
267  * - It is a parent of a namespace entry.
268  * - It is one of the namespace entries.
269  * - It is a child of a namespace entry.
270  *
271  * A dataset is writable if:
272  * - It is one of the namespace entries.
273  * - It is a child of a namespace entry.
274  *
275  * The parent datasets of namespace entries are visible and
276  * read-only to provide a path back to the root of the pool.
277  */
278 int
279 zone_dataset_visible(const char *dataset, int *write)
280 {
281 	zone_datasets_t *zds;
282 	zone_dataset_t *zd;
283 	size_t dsnamelen, zd_len;
284 	int visible;
285 
286 	/* Default to read-only, in case visible is returned. */
287 	if (write != NULL)
288 		*write = 0;
289 	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
290 		return (0);
291 	if (INGLOBALZONE(curproc)) {
292 		if (write != NULL)
293 			*write = 1;
294 		return (1);
295 	}
296 
297 	mutex_enter(&zone_datasets_lock);
298 	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
299 	if (zds == NULL) {
300 		mutex_exit(&zone_datasets_lock);
301 		return (0);
302 	}
303 
304 	visible = 0;
305 	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
306 		zd_len = strlen(zd->zd_dsname);
307 		if (zd_len > dsnamelen) {
308 			/*
309 			 * The name of the namespace entry is longer than that
310 			 * of the dataset, so it could be that the dataset is a
311 			 * parent of the namespace entry.
312 			 */
313 			visible = memcmp(zd->zd_dsname, dataset,
314 			    dsnamelen) == 0 &&
315 			    zd->zd_dsname[dsnamelen] == '/';
316 			if (visible)
317 				break;
318 		} else if (zd_len == dsnamelen) {
319 			/*
320 			 * The name of the namespace entry is as long as that
321 			 * of the dataset, so perhaps the dataset itself is the
322 			 * namespace entry.
323 			 */
324 			visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
325 			if (visible) {
326 				if (write != NULL)
327 					*write = 1;
328 				break;
329 			}
330 		} else {
331 			/*
332 			 * The name of the namespace entry is shorter than that
333 			 * of the dataset, so perhaps the dataset is a child of
334 			 * the namespace entry.
335 			 */
336 			visible = memcmp(zd->zd_dsname, dataset,
337 			    zd_len) == 0 && dataset[zd_len] == '/';
338 			if (visible) {
339 				if (write != NULL)
340 					*write = 1;
341 				break;
342 			}
343 		}
344 	}
345 
346 	mutex_exit(&zone_datasets_lock);
347 	return (visible);
348 }
349 EXPORT_SYMBOL(zone_dataset_visible);
350 
351 unsigned int
352 global_zoneid(void)
353 {
354 	unsigned int z = 0;
355 
356 #if defined(CONFIG_USER_NS)
357 	z = user_ns_zoneid(&init_user_ns);
358 #endif
359 
360 	return (z);
361 }
362 EXPORT_SYMBOL(global_zoneid);
363 
364 unsigned int
365 crgetzoneid(const cred_t *cr)
366 {
367 	unsigned int r = 0;
368 
369 #if defined(CONFIG_USER_NS)
370 	r = user_ns_zoneid(cr->user_ns);
371 #endif
372 
373 	return (r);
374 }
375 EXPORT_SYMBOL(crgetzoneid);
376 
377 boolean_t
378 inglobalzone(proc_t *proc)
379 {
380 #if defined(CONFIG_USER_NS)
381 	return (proc->cred->user_ns == &init_user_ns);
382 #else
383 	return (B_TRUE);
384 #endif
385 }
386 EXPORT_SYMBOL(inglobalzone);
387 
388 int
389 spl_zone_init(void)
390 {
391 	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
392 	INIT_LIST_HEAD(&zone_datasets);
393 	return (0);
394 }
395 
396 void
397 spl_zone_fini(void)
398 {
399 	zone_datasets_t *zds;
400 	zone_dataset_t *zd;
401 
402 	/*
403 	 * It would be better to assert an empty zone_datasets, but since
404 	 * there's no automatic mechanism for cleaning them up if the user
405 	 * namespace is destroyed, just do it here, since spl is about to go
406 	 * out of context.
407 	 */
408 	while (!list_empty(&zone_datasets)) {
409 		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
410 		while (!list_empty(&zds->zds_datasets)) {
411 			zd = list_entry(zds->zds_datasets.next,
412 			    zone_dataset_t, zd_list);
413 			list_del(&zd->zd_list);
414 			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
415 		}
416 		put_user_ns(zds->zds_userns);
417 		list_del(&zds->zds_list);
418 		kmem_free(zds, sizeof (*zds));
419 	}
420 	mutex_destroy(&zone_datasets_lock);
421 }
422