1 /* 2 * Copyright (c) 2021 Klara Systems, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/sysmacros.h> 29 #include <sys/kmem.h> 30 #include <linux/file.h> 31 #include <linux/magic.h> 32 #include <sys/zone.h> 33 34 #if defined(CONFIG_USER_NS) 35 #include <linux/statfs.h> 36 #include <linux/proc_ns.h> 37 #endif 38 39 #include <sys/mutex.h> 40 41 static kmutex_t zone_datasets_lock; 42 static struct list_head zone_datasets; 43 44 typedef struct zone_datasets { 45 struct list_head zds_list; /* zone_datasets linkage */ 46 struct user_namespace *zds_userns; /* namespace reference */ 47 struct list_head zds_datasets; /* datasets for the namespace */ 48 } zone_datasets_t; 49 50 typedef struct zone_dataset { 51 struct list_head zd_list; /* zone_dataset linkage */ 52 size_t zd_dsnamelen; /* length of name */ 53 char zd_dsname[]; /* name of the member dataset */ 54 } zone_dataset_t; 55 56 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 57 /* 58 * Returns: 59 * - 0 on success 60 * - EBADF if it cannot open the provided file descriptor 61 * - ENOTTY if the file itself is a not a user namespace file. We want to 62 * intercept this error in the ZFS layer. We cannot just return one of the 63 * ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS 64 * and the SPL layers. 65 */ 66 static int 67 user_ns_get(int fd, struct user_namespace **userns) 68 { 69 struct kstatfs st; 70 struct file *nsfile; 71 struct ns_common *ns; 72 int error; 73 74 if ((nsfile = fget(fd)) == NULL) 75 return (EBADF); 76 if (vfs_statfs(&nsfile->f_path, &st) != 0) { 77 error = ENOTTY; 78 goto done; 79 } 80 if (st.f_type != NSFS_MAGIC) { 81 error = ENOTTY; 82 goto done; 83 } 84 ns = get_proc_ns(file_inode(nsfile)); 85 if (ns->ops->type != CLONE_NEWUSER) { 86 error = ENOTTY; 87 goto done; 88 } 89 *userns = container_of(ns, struct user_namespace, ns); 90 91 error = 0; 92 done: 93 fput(nsfile); 94 95 return (error); 96 } 97 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 98 99 static unsigned int 100 user_ns_zoneid(struct user_namespace *user_ns) 101 { 102 unsigned int r; 103 104 #if defined(HAVE_USER_NS_COMMON_INUM) 105 r = user_ns->ns.inum; 106 #else 107 r = user_ns->proc_inum; 108 #endif 109 110 return (r); 111 } 112 113 static struct zone_datasets * 114 zone_datasets_lookup(unsigned int nsinum) 115 { 116 zone_datasets_t *zds; 117 118 list_for_each_entry(zds, &zone_datasets, zds_list) { 119 if (user_ns_zoneid(zds->zds_userns) == nsinum) 120 return (zds); 121 } 122 return (NULL); 123 } 124 125 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 126 static struct zone_dataset * 127 zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen) 128 { 129 zone_dataset_t *zd; 130 131 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 132 if (zd->zd_dsnamelen != dsnamelen) 133 continue; 134 if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0) 135 return (zd); 136 } 137 138 return (NULL); 139 } 140 141 static int 142 zone_dataset_cred_check(cred_t *cred) 143 { 144 145 if (!uid_eq(cred->uid, GLOBAL_ROOT_UID)) 146 return (EPERM); 147 148 return (0); 149 } 150 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 151 152 static int 153 zone_dataset_name_check(const char *dataset, size_t *dsnamelen) 154 { 155 156 if (dataset[0] == '\0' || dataset[0] == '/') 157 return (ENOENT); 158 159 *dsnamelen = strlen(dataset); 160 /* Ignore trailing slash, if supplied. */ 161 if (dataset[*dsnamelen - 1] == '/') 162 (*dsnamelen)--; 163 164 return (0); 165 } 166 167 int 168 zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd) 169 { 170 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 171 struct user_namespace *userns; 172 zone_datasets_t *zds; 173 zone_dataset_t *zd; 174 int error; 175 size_t dsnamelen; 176 177 if ((error = zone_dataset_cred_check(cred)) != 0) 178 return (error); 179 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 180 return (error); 181 if ((error = user_ns_get(userns_fd, &userns)) != 0) 182 return (error); 183 184 mutex_enter(&zone_datasets_lock); 185 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 186 if (zds == NULL) { 187 zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP); 188 INIT_LIST_HEAD(&zds->zds_list); 189 INIT_LIST_HEAD(&zds->zds_datasets); 190 zds->zds_userns = userns; 191 /* 192 * Lock the namespace by incresing its refcount to prevent 193 * the namespace ID from being reused. 194 */ 195 get_user_ns(userns); 196 list_add_tail(&zds->zds_list, &zone_datasets); 197 } else { 198 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 199 if (zd != NULL) { 200 mutex_exit(&zone_datasets_lock); 201 return (EEXIST); 202 } 203 } 204 205 zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP); 206 zd->zd_dsnamelen = dsnamelen; 207 strlcpy(zd->zd_dsname, dataset, dsnamelen + 1); 208 INIT_LIST_HEAD(&zd->zd_list); 209 list_add_tail(&zd->zd_list, &zds->zds_datasets); 210 211 mutex_exit(&zone_datasets_lock); 212 return (0); 213 #else 214 return (ENXIO); 215 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 216 } 217 EXPORT_SYMBOL(zone_dataset_attach); 218 219 int 220 zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd) 221 { 222 #if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) 223 struct user_namespace *userns; 224 zone_datasets_t *zds; 225 zone_dataset_t *zd; 226 int error; 227 size_t dsnamelen; 228 229 if ((error = zone_dataset_cred_check(cred)) != 0) 230 return (error); 231 if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0) 232 return (error); 233 if ((error = user_ns_get(userns_fd, &userns)) != 0) 234 return (error); 235 236 mutex_enter(&zone_datasets_lock); 237 zds = zone_datasets_lookup(user_ns_zoneid(userns)); 238 if (zds != NULL) 239 zd = zone_dataset_lookup(zds, dataset, dsnamelen); 240 if (zds == NULL || zd == NULL) { 241 mutex_exit(&zone_datasets_lock); 242 return (ENOENT); 243 } 244 245 list_del(&zd->zd_list); 246 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 247 248 /* Prune the namespace entry if it has no more delegations. */ 249 if (list_empty(&zds->zds_datasets)) { 250 /* 251 * Decrease the refcount now that the namespace is no longer 252 * used. It is no longer necessary to prevent the namespace ID 253 * from being reused. 254 */ 255 put_user_ns(userns); 256 list_del(&zds->zds_list); 257 kmem_free(zds, sizeof (*zds)); 258 } 259 260 mutex_exit(&zone_datasets_lock); 261 return (0); 262 #else 263 return (ENXIO); 264 #endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */ 265 } 266 EXPORT_SYMBOL(zone_dataset_detach); 267 268 /* 269 * A dataset is visible if: 270 * - It is a parent of a namespace entry. 271 * - It is one of the namespace entries. 272 * - It is a child of a namespace entry. 273 * 274 * A dataset is writable if: 275 * - It is one of the namespace entries. 276 * - It is a child of a namespace entry. 277 * 278 * The parent datasets of namespace entries are visible and 279 * read-only to provide a path back to the root of the pool. 280 */ 281 int 282 zone_dataset_visible(const char *dataset, int *write) 283 { 284 zone_datasets_t *zds; 285 zone_dataset_t *zd; 286 size_t dsnamelen, zd_len; 287 int visible; 288 289 /* Default to read-only, in case visible is returned. */ 290 if (write != NULL) 291 *write = 0; 292 if (zone_dataset_name_check(dataset, &dsnamelen) != 0) 293 return (0); 294 if (INGLOBALZONE(curproc)) { 295 if (write != NULL) 296 *write = 1; 297 return (1); 298 } 299 300 mutex_enter(&zone_datasets_lock); 301 zds = zone_datasets_lookup(crgetzoneid(curproc->cred)); 302 if (zds == NULL) { 303 mutex_exit(&zone_datasets_lock); 304 return (0); 305 } 306 307 visible = 0; 308 list_for_each_entry(zd, &zds->zds_datasets, zd_list) { 309 zd_len = strlen(zd->zd_dsname); 310 if (zd_len > dsnamelen) { 311 /* 312 * The name of the namespace entry is longer than that 313 * of the dataset, so it could be that the dataset is a 314 * parent of the namespace entry. 315 */ 316 visible = memcmp(zd->zd_dsname, dataset, 317 dsnamelen) == 0 && 318 zd->zd_dsname[dsnamelen] == '/'; 319 if (visible) 320 break; 321 } else if (zd_len == dsnamelen) { 322 /* 323 * The name of the namespace entry is as long as that 324 * of the dataset, so perhaps the dataset itself is the 325 * namespace entry. 326 */ 327 visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0; 328 if (visible) { 329 if (write != NULL) 330 *write = 1; 331 break; 332 } 333 } else { 334 /* 335 * The name of the namespace entry is shorter than that 336 * of the dataset, so perhaps the dataset is a child of 337 * the namespace entry. 338 */ 339 visible = memcmp(zd->zd_dsname, dataset, 340 zd_len) == 0 && dataset[zd_len] == '/'; 341 if (visible) { 342 if (write != NULL) 343 *write = 1; 344 break; 345 } 346 } 347 } 348 349 mutex_exit(&zone_datasets_lock); 350 return (visible); 351 } 352 EXPORT_SYMBOL(zone_dataset_visible); 353 354 unsigned int 355 global_zoneid(void) 356 { 357 unsigned int z = 0; 358 359 #if defined(CONFIG_USER_NS) 360 z = user_ns_zoneid(&init_user_ns); 361 #endif 362 363 return (z); 364 } 365 EXPORT_SYMBOL(global_zoneid); 366 367 unsigned int 368 crgetzoneid(const cred_t *cr) 369 { 370 unsigned int r = 0; 371 372 #if defined(CONFIG_USER_NS) 373 r = user_ns_zoneid(cr->user_ns); 374 #endif 375 376 return (r); 377 } 378 EXPORT_SYMBOL(crgetzoneid); 379 380 boolean_t 381 inglobalzone(proc_t *proc) 382 { 383 #if defined(CONFIG_USER_NS) 384 return (proc->cred->user_ns == &init_user_ns); 385 #else 386 return (B_TRUE); 387 #endif 388 } 389 EXPORT_SYMBOL(inglobalzone); 390 391 int 392 spl_zone_init(void) 393 { 394 mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL); 395 INIT_LIST_HEAD(&zone_datasets); 396 return (0); 397 } 398 399 void 400 spl_zone_fini(void) 401 { 402 zone_datasets_t *zds; 403 zone_dataset_t *zd; 404 405 /* 406 * It would be better to assert an empty zone_datasets, but since 407 * there's no automatic mechanism for cleaning them up if the user 408 * namespace is destroyed, just do it here, since spl is about to go 409 * out of context. 410 */ 411 while (!list_empty(&zone_datasets)) { 412 zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list); 413 while (!list_empty(&zds->zds_datasets)) { 414 zd = list_entry(zds->zds_datasets.next, 415 zone_dataset_t, zd_list); 416 list_del(&zd->zd_list); 417 kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1); 418 } 419 put_user_ns(zds->zds_userns); 420 list_del(&zds->zds_list); 421 kmem_free(zds, sizeof (*zds)); 422 } 423 mutex_destroy(&zone_datasets_lock); 424 } 425