1 /* 2 * Copyright (c) 2021 Klara Systems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/sysmacros.h> 29 #include <sys/kmem.h> 30 #include <linux/file.h> 31 #include <linux/magic.h> 32 #include <sys/zone.h> 33 #include <sys/string.h> 34 35 #if defined(CONFIG_USER_NS) 36 #include <linux/statfs.h> 37 #include <linux/proc_ns.h> 38 #endif 39 40 #include <sys/mutex.h> 41 42 static kmutex_t zone_datasets_lock; 43 static struct list_head zone_datasets; 44 45 typedef struct zone_datasets { 46 struct list_head zds_list; /* zone_datasets linkage */ 47 struct user_namespace *zds_userns; /* namespace reference */ 48 struct list_head zds_datasets; /* datasets for the namespace */ 49 } zone_datasets_t; 50 51 typedef struct zone_dataset { 52 struct list_head zd_list; /* zone_dataset linkage */ 53 size_t zd_dsnamelen; /* length of name */ 54 char zd_dsname[]; /* name of the member dataset */ 55 } zone_dataset_t; 56 57 #ifdef CONFIG_USER_NS 58 /* 59 * Returns: 60 * - 0 on success 61 * - EBADF if it cannot open the provided file descriptor 62 * - ENOTTY if the file itself is a not a user namespace file. We want to 63 * intercept this error in the ZFS layer. We cannot just return one of the 64 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS 65 * and the SPL layers. 66 */ 67 static int 68 user_ns_get(int fd, struct user_namespace **userns) 69 { 70 struct kstatfs st; 71 struct file *nsfile; 72 struct ns_common *ns; 73 int error; 74 75 if ((nsfile = fget(fd)) == NULL) 76 return (EBADF); 77 if (vfs_statfs(&nsfile->f_path, &st) != 0) { 78 error = ENOTTY; 79 goto done; 80 } 81 if (st.f_type != NSFS_MAGIC) { 82 error = ENOTTY; 83 goto done; 84 } 85 ns = get_proc_ns(file_inode(nsfile)); 86 if (ns->ops->type != CLONE_NEWUSER) { 87 error = ENOTTY; 88 goto done; 89 } 90 *userns = container_of(ns, struct user_namespace, ns); 91 92 error = 0; 93 done: 94 fput(nsfile); 95 96 return (error); 97 } 98 #endif /* CONFIG_USER_NS */ 99 100 static unsigned int 101 user_ns_zoneid(struct user_namespace *user_ns) 102 { 103 unsigned int r; 104 105 r = user_ns->ns.inum; 106 107 return (r); 108 } 109 110 static struct zone_datasets * 111 zone_datasets_lookup(unsigned int nsinum) 112 { 113 zone_datasets_t *zds; 114 115 list_for_each_entry(zds, &zone_datasets, zds_list) { 116 if (user_ns_zoneid(zds->zds_userns) == nsinum) 117 return (zds); 118 } 119 return (NULL); 120 } 121 122 #ifdef CONFIG_USER_NS 123 static struct zone_dataset * 124 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) 125 { 126 zone_dataset_t *zd; 127 128 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 129 if (zd->zd_dsnamelen != dsnamelen) 130 continue; 131 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) 132 return (zd); 133 } 134 135 return (NULL); 136 } 137 138 static int 139 zone_dataset_cred_check(cred_t *cred) 140 { 141 142 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) 143 return (EPERM); 144 145 return (0); 146 } 147 #endif /* CONFIG_USER_NS */ 148 149 static int 150 zone_dataset_name_check(const char *dataset, size_t *dsnamelen) 151 { 152 153 if (dataset[0] == '\0' || dataset[0] == '/') 154 return (ENOENT); 155 156 *dsnamelen = strlen(dataset); 157 /* Ignore trailing slash, if supplied. */ 158 if (dataset[*dsnamelen - 1] == '/') 159 (*dsnamelen)--; 160 161 return (0); 162 } 163 164 int 165 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) 166 { 167 #ifdef CONFIG_USER_NS 168 struct user_namespace *userns; 169 zone_datasets_t *zds; 170 zone_dataset_t *zd; 171 int error; 172 size_t dsnamelen; 173 174 if ((error = zone_dataset_cred_check(cred)) != 0) 175 return (error); 176 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 177 return (error); 178 if ((error = user_ns_get(userns_fd, &userns)) != 0) 179 return (error); 180 181 mutex_enter(&zone_datasets_lock); 182 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 183 if (zds == NULL) { 184 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); 185 INIT_LIST_HEAD(&zds->zds_list); 186 INIT_LIST_HEAD(&zds->zds_datasets); 187 zds->zds_userns = userns; 188 /* 189 * Lock the namespace by incresing its refcount to prevent 190 * the namespace ID from being reused. 191 */ 192 get_user_ns(userns); 193 list_add_tail(&zds->zds_list, &zone_datasets); 194 } else { 195 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 196 if (zd != NULL) { 197 mutex_exit(&zone_datasets_lock); 198 return (EEXIST); 199 } 200 } 201 202 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); 203 zd->zd_dsnamelen = dsnamelen; 204 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1); 205 INIT_LIST_HEAD(&zd->zd_list); 206 list_add_tail(&zd->zd_list, &zds->zds_datasets); 207 208 mutex_exit(&zone_datasets_lock); 209 return (0); 210 #else 211 return (ENXIO); 212 #endif /* CONFIG_USER_NS */ 213 } 214 EXPORT_SYMBOL(zone_dataset_attach); 215 216 int 217 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) 218 { 219 #ifdef CONFIG_USER_NS 220 struct user_namespace *userns; 221 zone_datasets_t *zds; 222 zone_dataset_t *zd; 223 int error; 224 size_t dsnamelen; 225 226 if ((error = zone_dataset_cred_check(cred)) != 0) 227 return (error); 228 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 229 return (error); 230 if ((error = user_ns_get(userns_fd, &userns)) != 0) 231 return (error); 232 233 mutex_enter(&zone_datasets_lock); 234 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 235 if (zds != NULL) 236 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 237 if (zds == NULL || zd == NULL) { 238 mutex_exit(&zone_datasets_lock); 239 return (ENOENT); 240 } 241 242 list_del(&zd->zd_list); 243 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 244 245 /* Prune the namespace entry if it has no more delegations. */ 246 if (list_empty(&zds->zds_datasets)) { 247 /* 248 * Decrease the refcount now that the namespace is no longer 249 * used. It is no longer necessary to prevent the namespace ID 250 * from being reused. 251 */ 252 put_user_ns(userns); 253 list_del(&zds->zds_list); 254 kmem_free(zds, sizeof (*zds)); 255 } 256 257 mutex_exit(&zone_datasets_lock); 258 return (0); 259 #else 260 return (ENXIO); 261 #endif /* CONFIG_USER_NS */ 262 } 263 EXPORT_SYMBOL(zone_dataset_detach); 264 265 /* 266 * A dataset is visible if: 267 * - It is a parent of a namespace entry. 268 * - It is one of the namespace entries. 269 * - It is a child of a namespace entry. 270 * 271 * A dataset is writable if: 272 * - It is one of the namespace entries. 273 * - It is a child of a namespace entry. 274 * 275 * The parent datasets of namespace entries are visible and 276 * read-only to provide a path back to the root of the pool. 277 */ 278 int 279 zone_dataset_visible(const char *dataset, int *write) 280 { 281 zone_datasets_t *zds; 282 zone_dataset_t *zd; 283 size_t dsnamelen, zd_len; 284 int visible; 285 286 /* Default to read-only, in case visible is returned. */ 287 if (write != NULL) 288 *write = 0; 289 if (zone_dataset_name_check(dataset, &dsnamelen) != 0) 290 return (0); 291 if (INGLOBALZONE(curproc)) { 292 if (write != NULL) 293 *write = 1; 294 return (1); 295 } 296 297 mutex_enter(&zone_datasets_lock); 298 zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); 299 if (zds == NULL) { 300 mutex_exit(&zone_datasets_lock); 301 return (0); 302 } 303 304 visible = 0; 305 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 306 zd_len = strlen(zd->zd_dsname); 307 if (zd_len > dsnamelen) { 308 /* 309 * The name of the namespace entry is longer than that 310 * of the dataset, so it could be that the dataset is a 311 * parent of the namespace entry. 312 */ 313 visible = memcmp(zd->zd_dsname, dataset, 314 dsnamelen) == 0 && 315 zd->zd_dsname[dsnamelen] == '/'; 316 if (visible) 317 break; 318 } else if (zd_len == dsnamelen) { 319 /* 320 * The name of the namespace entry is as long as that 321 * of the dataset, so perhaps the dataset itself is the 322 * namespace entry. 323 */ 324 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; 325 if (visible) { 326 if (write != NULL) 327 *write = 1; 328 break; 329 } 330 } else { 331 /* 332 * The name of the namespace entry is shorter than that 333 * of the dataset, so perhaps the dataset is a child of 334 * the namespace entry. 335 */ 336 visible = memcmp(zd->zd_dsname, dataset, 337 zd_len) == 0 && dataset[zd_len] == '/'; 338 if (visible) { 339 if (write != NULL) 340 *write = 1; 341 break; 342 } 343 } 344 } 345 346 mutex_exit(&zone_datasets_lock); 347 return (visible); 348 } 349 EXPORT_SYMBOL(zone_dataset_visible); 350 351 unsigned int 352 global_zoneid(void) 353 { 354 unsigned int z = 0; 355 356 #if defined(CONFIG_USER_NS) 357 z = user_ns_zoneid(&init_user_ns); 358 #endif 359 360 return (z); 361 } 362 EXPORT_SYMBOL(global_zoneid); 363 364 unsigned int 365 crgetzoneid(const cred_t *cr) 366 { 367 unsigned int r = 0; 368 369 #if defined(CONFIG_USER_NS) 370 r = user_ns_zoneid(cr->user_ns); 371 #endif 372 373 return (r); 374 } 375 EXPORT_SYMBOL(crgetzoneid); 376 377 boolean_t 378 inglobalzone(proc_t *proc) 379 { 380 #if defined(CONFIG_USER_NS) 381 return (proc->cred->user_ns == &init_user_ns); 382 #else 383 return (B_TRUE); 384 #endif 385 } 386 EXPORT_SYMBOL(inglobalzone); 387 388 int 389 spl_zone_init(void) 390 { 391 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); 392 INIT_LIST_HEAD(&zone_datasets); 393 return (0); 394 } 395 396 void 397 spl_zone_fini(void) 398 { 399 zone_datasets_t *zds; 400 zone_dataset_t *zd; 401 402 /* 403 * It would be better to assert an empty zone_datasets, but since 404 * there's no automatic mechanism for cleaning them up if the user 405 * namespace is destroyed, just do it here, since spl is about to go 406 * out of context. 407 */ 408 while (!list_empty(&zone_datasets)) { 409 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); 410 while (!list_empty(&zds->zds_datasets)) { 411 zd = list_entry(zds->zds_datasets.next, 412 zone_dataset_t, zd_list); 413 list_del(&zd->zd_list); 414 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 415 } 416 put_user_ns(zds->zds_userns); 417 list_del(&zds->zds_list); 418 kmem_free(zds, sizeof (*zds)); 419 } 420 mutex_destroy(&zone_datasets_lock); 421 } 422