1 /* 2 * Copyright (c) 2021 Klara Systems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/mutex.h> 29 #include <sys/sysmacros.h> 30 #include <sys/kmem.h> 31 #include <linux/file.h> 32 #include <linux/magic.h> 33 #include <sys/zone.h> 34 35 #if defined(CONFIG_USER_NS) 36 #include <linux/statfs.h> 37 #include <linux/proc_ns.h> 38 #endif 39 40 static kmutex_t zone_datasets_lock; 41 static struct list_head zone_datasets; 42 43 typedef struct zone_datasets { 44 struct list_head zds_list; /* zone_datasets linkage */ 45 struct user_namespace *zds_userns; /* namespace reference */ 46 struct list_head zds_datasets; /* datasets for the namespace */ 47 } zone_datasets_t; 48 49 typedef struct zone_dataset { 50 struct list_head zd_list; /* zone_dataset linkage */ 51 size_t zd_dsnamelen; /* length of name */ 52 char zd_dsname[0]; /* name of the member dataset */ 53 } zone_dataset_t; 54 55 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 56 /* 57 * Returns: 58 * - 0 on success 59 * - EBADF if it cannot open the provided file descriptor 60 * - ENOTTY if the file itself is a not a user namespace file. We want to 61 * intercept this error in the ZFS layer. We cannot just return one of the 62 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS 63 * and the SPL layers. 64 */ 65 static int 66 user_ns_get(int fd, struct user_namespace **userns) 67 { 68 struct kstatfs st; 69 struct file *nsfile; 70 struct ns_common *ns; 71 int error; 72 73 if ((nsfile = fget(fd)) == NULL) 74 return (EBADF); 75 if (vfs_statfs(&nsfile->f_path, &st) != 0) { 76 error = ENOTTY; 77 goto done; 78 } 79 if (st.f_type != NSFS_MAGIC) { 80 error = ENOTTY; 81 goto done; 82 } 83 ns = get_proc_ns(file_inode(nsfile)); 84 if (ns->ops->type != CLONE_NEWUSER) { 85 error = ENOTTY; 86 goto done; 87 } 88 *userns = container_of(ns, struct user_namespace, ns); 89 90 error = 0; 91 done: 92 fput(nsfile); 93 94 return (error); 95 } 96 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 97 98 static unsigned int 99 user_ns_zoneid(struct user_namespace *user_ns) 100 { 101 unsigned int r; 102 103 #if defined(HAVE_USER_NS_COMMON_INUM) 104 r = user_ns->ns.inum; 105 #else 106 r = user_ns->proc_inum; 107 #endif 108 109 return (r); 110 } 111 112 static struct zone_datasets * 113 zone_datasets_lookup(unsigned int nsinum) 114 { 115 zone_datasets_t *zds; 116 117 list_for_each_entry(zds, &zone_datasets, zds_list) { 118 if (user_ns_zoneid(zds->zds_userns) == nsinum) 119 return (zds); 120 } 121 return (NULL); 122 } 123 124 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 125 static struct zone_dataset * 126 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) 127 { 128 zone_dataset_t *zd; 129 130 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 131 if (zd->zd_dsnamelen != dsnamelen) 132 continue; 133 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) 134 return (zd); 135 } 136 137 return (NULL); 138 } 139 140 static int 141 zone_dataset_cred_check(cred_t *cred) 142 { 143 144 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) 145 return (EPERM); 146 147 return (0); 148 } 149 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 150 151 static int 152 zone_dataset_name_check(const char *dataset, size_t *dsnamelen) 153 { 154 155 if (dataset[0] == '\0' || dataset[0] == '/') 156 return (ENOENT); 157 158 *dsnamelen = strlen(dataset); 159 /* Ignore trailing slash, if supplied. */ 160 if (dataset[*dsnamelen - 1] == '/') 161 (*dsnamelen)--; 162 163 return (0); 164 } 165 166 int 167 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) 168 { 169 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 170 struct user_namespace *userns; 171 zone_datasets_t *zds; 172 zone_dataset_t *zd; 173 int error; 174 size_t dsnamelen; 175 176 if ((error = zone_dataset_cred_check(cred)) != 0) 177 return (error); 178 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 179 return (error); 180 if ((error = user_ns_get(userns_fd, &userns)) != 0) 181 return (error); 182 183 mutex_enter(&zone_datasets_lock); 184 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 185 if (zds == NULL) { 186 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); 187 INIT_LIST_HEAD(&zds->zds_list); 188 INIT_LIST_HEAD(&zds->zds_datasets); 189 zds->zds_userns = userns; 190 /* 191 * Lock the namespace by incresing its refcount to prevent 192 * the namespace ID from being reused. 193 */ 194 get_user_ns(userns); 195 list_add_tail(&zds->zds_list, &zone_datasets); 196 } else { 197 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 198 if (zd != NULL) { 199 mutex_exit(&zone_datasets_lock); 200 return (EEXIST); 201 } 202 } 203 204 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); 205 zd->zd_dsnamelen = dsnamelen; 206 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1); 207 INIT_LIST_HEAD(&zd->zd_list); 208 list_add_tail(&zd->zd_list, &zds->zds_datasets); 209 210 mutex_exit(&zone_datasets_lock); 211 return (0); 212 #else 213 return (ENXIO); 214 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 215 } 216 EXPORT_SYMBOL(zone_dataset_attach); 217 218 int 219 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) 220 { 221 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 222 struct user_namespace *userns; 223 zone_datasets_t *zds; 224 zone_dataset_t *zd; 225 int error; 226 size_t dsnamelen; 227 228 if ((error = zone_dataset_cred_check(cred)) != 0) 229 return (error); 230 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 231 return (error); 232 if ((error = user_ns_get(userns_fd, &userns)) != 0) 233 return (error); 234 235 mutex_enter(&zone_datasets_lock); 236 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 237 if (zds != NULL) 238 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 239 if (zds == NULL || zd == NULL) { 240 mutex_exit(&zone_datasets_lock); 241 return (ENOENT); 242 } 243 244 list_del(&zd->zd_list); 245 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 246 247 /* Prune the namespace entry if it has no more delegations. */ 248 if (list_empty(&zds->zds_datasets)) { 249 /* 250 * Decrease the refcount now that the namespace is no longer 251 * used. It is no longer necessary to prevent the namespace ID 252 * from being reused. 253 */ 254 put_user_ns(userns); 255 list_del(&zds->zds_list); 256 kmem_free(zds, sizeof (*zds)); 257 } 258 259 mutex_exit(&zone_datasets_lock); 260 return (0); 261 #else 262 return (ENXIO); 263 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 264 } 265 EXPORT_SYMBOL(zone_dataset_detach); 266 267 /* 268 * A dataset is visible if: 269 * - It is a parent of a namespace entry. 270 * - It is one of the namespace entries. 271 * - It is a child of a namespace entry. 272 * 273 * A dataset is writable if: 274 * - It is one of the namespace entries. 275 * - It is a child of a namespace entry. 276 * 277 * The parent datasets of namespace entries are visible and 278 * read-only to provide a path back to the root of the pool. 279 */ 280 int 281 zone_dataset_visible(const char *dataset, int *write) 282 { 283 zone_datasets_t *zds; 284 zone_dataset_t *zd; 285 size_t dsnamelen, zd_len; 286 int visible; 287 288 /* Default to read-only, in case visible is returned. */ 289 if (write != NULL) 290 *write = 0; 291 if (zone_dataset_name_check(dataset, &dsnamelen) != 0) 292 return (0); 293 if (INGLOBALZONE(curproc)) { 294 if (write != NULL) 295 *write = 1; 296 return (1); 297 } 298 299 mutex_enter(&zone_datasets_lock); 300 zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); 301 if (zds == NULL) { 302 mutex_exit(&zone_datasets_lock); 303 return (0); 304 } 305 306 visible = 0; 307 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 308 zd_len = strlen(zd->zd_dsname); 309 if (zd_len > dsnamelen) { 310 /* 311 * The name of the namespace entry is longer than that 312 * of the dataset, so it could be that the dataset is a 313 * parent of the namespace entry. 314 */ 315 visible = memcmp(zd->zd_dsname, dataset, 316 dsnamelen) == 0 && 317 zd->zd_dsname[dsnamelen] == '/'; 318 if (visible) 319 break; 320 } else if (zd_len == dsnamelen) { 321 /* 322 * The name of the namespace entry is as long as that 323 * of the dataset, so perhaps the dataset itself is the 324 * namespace entry. 325 */ 326 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; 327 if (visible) { 328 if (write != NULL) 329 *write = 1; 330 break; 331 } 332 } else { 333 /* 334 * The name of the namespace entry is shorter than that 335 * of the dataset, so perhaps the dataset is a child of 336 * the namespace entry. 337 */ 338 visible = memcmp(zd->zd_dsname, dataset, 339 zd_len) == 0 && dataset[zd_len] == '/'; 340 if (visible) { 341 if (write != NULL) 342 *write = 1; 343 break; 344 } 345 } 346 } 347 348 mutex_exit(&zone_datasets_lock); 349 return (visible); 350 } 351 EXPORT_SYMBOL(zone_dataset_visible); 352 353 unsigned int 354 global_zoneid(void) 355 { 356 unsigned int z = 0; 357 358 #if defined(CONFIG_USER_NS) 359 z = user_ns_zoneid(&init_user_ns); 360 #endif 361 362 return (z); 363 } 364 EXPORT_SYMBOL(global_zoneid); 365 366 unsigned int 367 crgetzoneid(const cred_t *cr) 368 { 369 unsigned int r = 0; 370 371 #if defined(CONFIG_USER_NS) 372 r = user_ns_zoneid(cr->user_ns); 373 #endif 374 375 return (r); 376 } 377 EXPORT_SYMBOL(crgetzoneid); 378 379 boolean_t 380 inglobalzone(proc_t *proc) 381 { 382 #if defined(CONFIG_USER_NS) 383 return (proc->cred->user_ns == &init_user_ns); 384 #else 385 return (B_TRUE); 386 #endif 387 } 388 EXPORT_SYMBOL(inglobalzone); 389 390 int 391 spl_zone_init(void) 392 { 393 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); 394 INIT_LIST_HEAD(&zone_datasets); 395 return (0); 396 } 397 398 void 399 spl_zone_fini(void) 400 { 401 zone_datasets_t *zds; 402 zone_dataset_t *zd; 403 404 /* 405 * It would be better to assert an empty zone_datasets, but since 406 * there's no automatic mechanism for cleaning them up if the user 407 * namespace is destroyed, just do it here, since spl is about to go 408 * out of context. 409 */ 410 while (!list_empty(&zone_datasets)) { 411 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); 412 while (!list_empty(&zds->zds_datasets)) { 413 zd = list_entry(zds->zds_datasets.next, 414 zone_dataset_t, zd_list); 415 list_del(&zd->zd_list); 416 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 417 } 418 put_user_ns(zds->zds_userns); 419 list_del(&zds->zds_list); 420 kmem_free(zds, sizeof (*zds)); 421 } 422 mutex_destroy(&zone_datasets_lock); 423 } 424