1 // SPDX-License-Identifier: BSD-2-Clause 2 /* 3 * Copyright (c) 2021 Klara Systems, Inc. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/types.h> 29 #include <sys/sysmacros.h> 30 #include <sys/kmem.h> 31 #include <linux/file.h> 32 #include <linux/magic.h> 33 #include <sys/zone.h> 34 #include <sys/string.h> 35 36 #if defined(CONFIG_USER_NS) 37 #include <linux/statfs.h> 38 #include <linux/proc_ns.h> 39 #endif 40 41 #include <sys/mutex.h> 42 43 static kmutex_t zone_datasets_lock; 44 static struct list_head zone_datasets; 45 46 typedef struct zone_datasets { 47 struct list_head zds_list; /* zone_datasets linkage */ 48 struct user_namespace *zds_userns; /* namespace reference */ 49 struct list_head zds_datasets; /* datasets for the namespace */ 50 } zone_datasets_t; 51 52 typedef struct zone_dataset { 53 struct list_head zd_list; /* zone_dataset linkage */ 54 size_t zd_dsnamelen; /* length of name */ 55 char zd_dsname[]; /* name of the member dataset */ 56 } zone_dataset_t; 57 58 #ifdef CONFIG_USER_NS 59 /* 60 * Returns: 61 * - 0 on success 62 * - EBADF if it cannot open the provided file descriptor 63 * - ENOTTY if the file itself is a not a user namespace file. We want to 64 * intercept this error in the ZFS layer. We cannot just return one of the 65 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS 66 * and the SPL layers. 67 */ 68 static int 69 user_ns_get(int fd, struct user_namespace **userns) 70 { 71 struct kstatfs st; 72 struct file *nsfile; 73 struct ns_common *ns; 74 int error; 75 76 if ((nsfile = fget(fd)) == NULL) 77 return (EBADF); 78 if (vfs_statfs(&nsfile->f_path, &st) != 0) { 79 error = ENOTTY; 80 goto done; 81 } 82 if (st.f_type != NSFS_MAGIC) { 83 error = ENOTTY; 84 goto done; 85 } 86 ns = get_proc_ns(file_inode(nsfile)); 87 if (ns->ops->type != CLONE_NEWUSER) { 88 error = ENOTTY; 89 goto done; 90 } 91 *userns = container_of(ns, struct user_namespace, ns); 92 93 error = 0; 94 done: 95 fput(nsfile); 96 97 return (error); 98 } 99 #endif /* CONFIG_USER_NS */ 100 101 static unsigned int 102 user_ns_zoneid(struct user_namespace *user_ns) 103 { 104 unsigned int r; 105 106 r = user_ns->ns.inum; 107 108 return (r); 109 } 110 111 static struct zone_datasets * 112 zone_datasets_lookup(unsigned int nsinum) 113 { 114 zone_datasets_t *zds; 115 116 list_for_each_entry(zds, &zone_datasets, zds_list) { 117 if (user_ns_zoneid(zds->zds_userns) == nsinum) 118 return (zds); 119 } 120 return (NULL); 121 } 122 123 #ifdef CONFIG_USER_NS 124 static struct zone_dataset * 125 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) 126 { 127 zone_dataset_t *zd; 128 129 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 130 if (zd->zd_dsnamelen != dsnamelen) 131 continue; 132 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) 133 return (zd); 134 } 135 136 return (NULL); 137 } 138 139 static int 140 zone_dataset_cred_check(cred_t *cred) 141 { 142 143 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) 144 return (EPERM); 145 146 return (0); 147 } 148 #endif /* CONFIG_USER_NS */ 149 150 static int 151 zone_dataset_name_check(const char *dataset, size_t *dsnamelen) 152 { 153 154 if (dataset[0] == '\0' || dataset[0] == '/') 155 return (ENOENT); 156 157 *dsnamelen = strlen(dataset); 158 /* Ignore trailing slash, if supplied. */ 159 if (dataset[*dsnamelen - 1] == '/') 160 (*dsnamelen)--; 161 162 return (0); 163 } 164 165 int 166 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) 167 { 168 #ifdef CONFIG_USER_NS 169 struct user_namespace *userns; 170 zone_datasets_t *zds; 171 zone_dataset_t *zd; 172 int error; 173 size_t dsnamelen; 174 175 if ((error = zone_dataset_cred_check(cred)) != 0) 176 return (error); 177 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 178 return (error); 179 if ((error = user_ns_get(userns_fd, &userns)) != 0) 180 return (error); 181 182 mutex_enter(&zone_datasets_lock); 183 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 184 if (zds == NULL) { 185 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); 186 INIT_LIST_HEAD(&zds->zds_list); 187 INIT_LIST_HEAD(&zds->zds_datasets); 188 zds->zds_userns = userns; 189 /* 190 * Lock the namespace by incresing its refcount to prevent 191 * the namespace ID from being reused. 192 */ 193 get_user_ns(userns); 194 list_add_tail(&zds->zds_list, &zone_datasets); 195 } else { 196 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 197 if (zd != NULL) { 198 mutex_exit(&zone_datasets_lock); 199 return (EEXIST); 200 } 201 } 202 203 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); 204 zd->zd_dsnamelen = dsnamelen; 205 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1); 206 INIT_LIST_HEAD(&zd->zd_list); 207 list_add_tail(&zd->zd_list, &zds->zds_datasets); 208 209 mutex_exit(&zone_datasets_lock); 210 return (0); 211 #else 212 return (ENXIO); 213 #endif /* CONFIG_USER_NS */ 214 } 215 EXPORT_SYMBOL(zone_dataset_attach); 216 217 int 218 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) 219 { 220 #ifdef CONFIG_USER_NS 221 struct user_namespace *userns; 222 zone_datasets_t *zds; 223 zone_dataset_t *zd; 224 int error; 225 size_t dsnamelen; 226 227 if ((error = zone_dataset_cred_check(cred)) != 0) 228 return (error); 229 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 230 return (error); 231 if ((error = user_ns_get(userns_fd, &userns)) != 0) 232 return (error); 233 234 mutex_enter(&zone_datasets_lock); 235 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 236 if (zds != NULL) 237 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 238 if (zds == NULL || zd == NULL) { 239 mutex_exit(&zone_datasets_lock); 240 return (ENOENT); 241 } 242 243 list_del(&zd->zd_list); 244 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 245 246 /* Prune the namespace entry if it has no more delegations. */ 247 if (list_empty(&zds->zds_datasets)) { 248 /* 249 * Decrease the refcount now that the namespace is no longer 250 * used. It is no longer necessary to prevent the namespace ID 251 * from being reused. 252 */ 253 put_user_ns(userns); 254 list_del(&zds->zds_list); 255 kmem_free(zds, sizeof (*zds)); 256 } 257 258 mutex_exit(&zone_datasets_lock); 259 return (0); 260 #else 261 return (ENXIO); 262 #endif /* CONFIG_USER_NS */ 263 } 264 EXPORT_SYMBOL(zone_dataset_detach); 265 266 /* 267 * A dataset is visible if: 268 * - It is a parent of a namespace entry. 269 * - It is one of the namespace entries. 270 * - It is a child of a namespace entry. 271 * 272 * A dataset is writable if: 273 * - It is one of the namespace entries. 274 * - It is a child of a namespace entry. 275 * 276 * The parent datasets of namespace entries are visible and 277 * read-only to provide a path back to the root of the pool. 278 */ 279 int 280 zone_dataset_visible(const char *dataset, int *write) 281 { 282 zone_datasets_t *zds; 283 zone_dataset_t *zd; 284 size_t dsnamelen, zd_len; 285 int visible; 286 287 /* Default to read-only, in case visible is returned. */ 288 if (write != NULL) 289 *write = 0; 290 if (zone_dataset_name_check(dataset, &dsnamelen) != 0) 291 return (0); 292 if (INGLOBALZONE(curproc)) { 293 if (write != NULL) 294 *write = 1; 295 return (1); 296 } 297 298 mutex_enter(&zone_datasets_lock); 299 zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); 300 if (zds == NULL) { 301 mutex_exit(&zone_datasets_lock); 302 return (0); 303 } 304 305 visible = 0; 306 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 307 zd_len = strlen(zd->zd_dsname); 308 if (zd_len > dsnamelen) { 309 /* 310 * The name of the namespace entry is longer than that 311 * of the dataset, so it could be that the dataset is a 312 * parent of the namespace entry. 313 */ 314 visible = memcmp(zd->zd_dsname, dataset, 315 dsnamelen) == 0 && 316 zd->zd_dsname[dsnamelen] == '/'; 317 if (visible) 318 break; 319 } else if (zd_len == dsnamelen) { 320 /* 321 * The name of the namespace entry is as long as that 322 * of the dataset, so perhaps the dataset itself is the 323 * namespace entry. 324 */ 325 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; 326 if (visible) { 327 if (write != NULL) 328 *write = 1; 329 break; 330 } 331 } else { 332 /* 333 * The name of the namespace entry is shorter than that 334 * of the dataset, so perhaps the dataset is a child of 335 * the namespace entry. 336 */ 337 visible = memcmp(zd->zd_dsname, dataset, 338 zd_len) == 0 && dataset[zd_len] == '/'; 339 if (visible) { 340 if (write != NULL) 341 *write = 1; 342 break; 343 } 344 } 345 } 346 347 mutex_exit(&zone_datasets_lock); 348 return (visible); 349 } 350 EXPORT_SYMBOL(zone_dataset_visible); 351 352 unsigned int 353 global_zoneid(void) 354 { 355 unsigned int z = 0; 356 357 #if defined(CONFIG_USER_NS) 358 z = user_ns_zoneid(&init_user_ns); 359 #endif 360 361 return (z); 362 } 363 EXPORT_SYMBOL(global_zoneid); 364 365 unsigned int 366 crgetzoneid(const cred_t *cr) 367 { 368 unsigned int r = 0; 369 370 #if defined(CONFIG_USER_NS) 371 r = user_ns_zoneid(cr->user_ns); 372 #endif 373 374 return (r); 375 } 376 EXPORT_SYMBOL(crgetzoneid); 377 378 boolean_t 379 inglobalzone(proc_t *proc) 380 { 381 #if defined(CONFIG_USER_NS) 382 return (proc->cred->user_ns == &init_user_ns); 383 #else 384 return (B_TRUE); 385 #endif 386 } 387 EXPORT_SYMBOL(inglobalzone); 388 389 int 390 spl_zone_init(void) 391 { 392 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); 393 INIT_LIST_HEAD(&zone_datasets); 394 return (0); 395 } 396 397 void 398 spl_zone_fini(void) 399 { 400 zone_datasets_t *zds; 401 zone_dataset_t *zd; 402 403 /* 404 * It would be better to assert an empty zone_datasets, but since 405 * there's no automatic mechanism for cleaning them up if the user 406 * namespace is destroyed, just do it here, since spl is about to go 407 * out of context. 408 */ 409 while (!list_empty(&zone_datasets)) { 410 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); 411 while (!list_empty(&zds->zds_datasets)) { 412 zd = list_entry(zds->zds_datasets.next, 413 zone_dataset_t, zd_list); 414 list_del(&zd->zd_list); 415 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 416 } 417 put_user_ns(zds->zds_userns); 418 list_del(&zds->zds_list); 419 kmem_free(zds, sizeof (*zds)); 420 } 421 mutex_destroy(&zone_datasets_lock); 422 } 423