1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2013 Joyent, Inc. All rights reserved. 25 */ 26 27 /* vnode ops for the /dev/zvol directory */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/sysmacros.h> 32 #include <sys/ddi.h> 33 #include <sys/sunndi.h> 34 #include <sys/sunldi.h> 35 #include <fs/fs_subr.h> 36 #include <sys/fs/dv_node.h> 37 #include <sys/fs/sdev_impl.h> 38 #include <sys/zfs_ioctl.h> 39 #include <sys/policy.h> 40 #include <sys/stat.h> 41 #include <sys/vfs_opreg.h> 42 43 struct vnodeops *devzvol_vnodeops; 44 static major_t devzvol_major; 45 static taskq_ent_t devzvol_zclist_task; 46 47 static kmutex_t devzvol_mtx; 48 /* Below are protected by devzvol_mtx */ 49 static boolean_t devzvol_isopen; 50 static boolean_t devzvol_zclist_task_running = B_FALSE; 51 static uint64_t devzvol_gen = 0; 52 static uint64_t devzvol_zclist; 53 static size_t devzvol_zclist_size; 54 static ldi_ident_t devzvol_li; 55 static ldi_handle_t devzvol_lh; 56 57 /* 58 * we need to use ddi_mod* since fs/dev gets loaded early on in 59 * startup(), and linking fs/dev to fs/zfs would drag in a lot of 60 * other stuff (like drv/random) before the rest of the system is 61 * ready to go 62 */ 63 ddi_modhandle_t zfs_mod; 64 int (*szcm)(char *); 65 int (*szn2m)(char *, minor_t *); 66 67 int 68 sdev_zvol_create_minor(char *dsname) 69 { 70 if (szcm == NULL) 71 return (-1); 72 return ((*szcm)(dsname)); 73 } 74 75 int 76 sdev_zvol_name2minor(char *dsname, minor_t *minor) 77 { 78 if (szn2m == NULL) 79 return (-1); 80 return ((*szn2m)(dsname, minor)); 81 } 82 83 int 84 devzvol_open_zfs() 85 { 86 int rc; 87 dev_t dv; 88 89 devzvol_li = ldi_ident_from_anon(); 90 if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred, 91 &devzvol_lh, devzvol_li)) 92 return (-1); 93 if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs", 94 KRTLD_MODE_FIRST, &rc)) == NULL)) { 95 return (rc); 96 } 97 ASSERT(szcm == NULL && szn2m == NULL); 98 if ((szcm = (int (*)(char *)) 99 ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) { 100 cmn_err(CE_WARN, "couldn't resolve zvol_create_minor"); 101 return (rc); 102 } 103 if ((szn2m = (int(*)(char *, minor_t *)) 104 ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) { 105 cmn_err(CE_WARN, "couldn't resolve zvol_name2minor"); 106 return (rc); 107 } 108 if (ldi_get_dev(devzvol_lh, &dv)) 109 return (-1); 110 devzvol_major = getmajor(dv); 111 return (0); 112 } 113 114 void 115 devzvol_close_zfs() 116 { 117 szcm = NULL; 118 szn2m = NULL; 119 (void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred); 120 ldi_ident_release(devzvol_li); 121 if (zfs_mod != NULL) { 122 (void) ddi_modclose(zfs_mod); 123 zfs_mod = NULL; 124 } 125 } 126 127 int 128 devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size) 129 { 130 uint64_t cookie; 131 int size = 8000; 132 int unused; 133 int rc; 134 135 if (cmd != ZFS_IOC_POOL_CONFIGS) 136 mutex_enter(&devzvol_mtx); 137 if (!devzvol_isopen) { 138 if ((rc = devzvol_open_zfs()) == 0) { 139 devzvol_isopen = B_TRUE; 140 } else { 141 if (cmd != ZFS_IOC_POOL_CONFIGS) 142 mutex_exit(&devzvol_mtx); 143 return (ENXIO); 144 } 145 } 146 cookie = zc->zc_cookie; 147 again: 148 zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size, 149 KM_SLEEP); 150 zc->zc_nvlist_dst_size = size; 151 rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred, 152 &unused); 153 if (rc == ENOMEM) { 154 int newsize; 155 newsize = zc->zc_nvlist_dst_size; 156 ASSERT(newsize > size); 157 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); 158 size = newsize; 159 zc->zc_cookie = cookie; 160 goto again; 161 } 162 if (alloc_size == NULL) 163 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size); 164 else 165 *alloc_size = size; 166 if (cmd != ZFS_IOC_POOL_CONFIGS) 167 mutex_exit(&devzvol_mtx); 168 return (rc); 169 } 170 171 /* figures out if the objset exists and returns its type */ 172 int 173 devzvol_objset_check(char *dsname, dmu_objset_type_t *type) 174 { 175 boolean_t ispool; 176 zfs_cmd_t *zc; 177 int rc; 178 179 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 180 (void) strlcpy(zc->zc_name, dsname, MAXPATHLEN); 181 182 ispool = (strchr(dsname, '/') == NULL) ? B_TRUE : B_FALSE; 183 if (!ispool && sdev_zvol_name2minor(dsname, NULL) == 0) { 184 sdcmn_err13(("found cached minor node")); 185 if (type) 186 *type = DMU_OST_ZVOL; 187 kmem_free(zc, sizeof (zfs_cmd_t)); 188 return (0); 189 } 190 rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS : 191 ZFS_IOC_OBJSET_STATS, zc, NULL); 192 if (type && rc == 0) 193 *type = (ispool) ? DMU_OST_ZFS : 194 zc->zc_objset_stats.dds_type; 195 kmem_free(zc, sizeof (zfs_cmd_t)); 196 return (rc); 197 } 198 199 /* 200 * Returns what the zfs dataset name should be, given the /dev/zvol 201 * path and an optional name (can be NULL). 202 * 203 * Note that if the name param is NULL, then path must be an 204 * actual dataset's directory and not one of the top-level 205 * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a 206 * specific dataset. 207 */ 208 char * 209 devzvol_make_dsname(const char *path, const char *name) 210 { 211 char *dsname; 212 const char *ptr; 213 int dslen; 214 215 if (strcmp(path, ZVOL_DIR) == 0) 216 return (NULL); 217 if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0)) 218 return (NULL); 219 ptr = path + strlen(ZVOL_DIR); 220 if (strncmp(ptr, "/dsk", 4) == 0) 221 ptr += strlen("/dsk"); 222 else if (strncmp(ptr, "/rdsk", 5) == 0) 223 ptr += strlen("/rdsk"); 224 else 225 return (NULL); 226 227 if (*ptr == '/') 228 ptr++; 229 else if (name == NULL) 230 return (NULL); 231 232 dslen = strlen(ptr); 233 if (dslen) 234 dslen++; /* plus null */ 235 if (name) 236 dslen += strlen(name) + 1; /* plus slash */ 237 dsname = kmem_zalloc(dslen, KM_SLEEP); 238 if (*ptr) { 239 (void) strlcpy(dsname, ptr, dslen); 240 if (name) 241 (void) strlcat(dsname, "/", dslen); 242 } 243 if (name) 244 (void) strlcat(dsname, name, dslen); 245 return (dsname); 246 } 247 248 /* 249 * check if the zvol's sdev_node is still valid, which means make 250 * sure the zvol is still valid. zvol minors aren't proactively 251 * destroyed when the zvol is destroyed, so we use a validator to clean 252 * these up (in other words, when such nodes are encountered during 253 * subsequent lookup() and readdir() operations) so that only valid 254 * nodes are returned. The ordering between devname_lookup_func and 255 * devzvol_validate is a little inefficient in the case of invalid 256 * or stale nodes because devname_lookup_func calls 257 * devzvol_create_{dir, link}, then the validator says it's invalid, 258 * and then the node gets cleaned up. 259 */ 260 int 261 devzvol_validate(struct sdev_node *dv) 262 { 263 dmu_objset_type_t do_type; 264 char *dsname; 265 char *nm = dv->sdev_name; 266 int rc; 267 268 sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm)); 269 /* 270 * validate only READY nodes; if someone is sitting on the 271 * directory of a dataset that just got destroyed we could 272 * get a zombie node which we just skip. 273 */ 274 if (dv->sdev_state != SDEV_READY) { 275 sdcmn_err13(("skipping '%s'", nm)); 276 return (SDEV_VTOR_SKIP); 277 } 278 279 if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) || 280 (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0)) 281 return (SDEV_VTOR_VALID); 282 dsname = devzvol_make_dsname(dv->sdev_path, NULL); 283 if (dsname == NULL) 284 return (SDEV_VTOR_INVALID); 285 286 rc = devzvol_objset_check(dsname, &do_type); 287 sdcmn_err13((" '%s' rc %d", dsname, rc)); 288 if (rc != 0) { 289 kmem_free(dsname, strlen(dsname) + 1); 290 return (SDEV_VTOR_INVALID); 291 } 292 sdcmn_err13((" v_type %d do_type %d", 293 SDEVTOV(dv)->v_type, do_type)); 294 if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) || 295 ((SDEVTOV(dv)->v_type == VBLK || SDEVTOV(dv)->v_type == VCHR) && 296 do_type != DMU_OST_ZVOL) || 297 (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) { 298 kmem_free(dsname, strlen(dsname) + 1); 299 return (SDEV_VTOR_STALE); 300 } 301 if (SDEVTOV(dv)->v_type == VLNK) { 302 char *ptr, *link; 303 long val = 0; 304 minor_t lminor, ominor; 305 306 rc = sdev_getlink(SDEVTOV(dv), &link); 307 ASSERT(rc == 0); 308 309 ptr = strrchr(link, ':') + 1; 310 rc = ddi_strtol(ptr, NULL, 10, &val); 311 kmem_free(link, strlen(link) + 1); 312 ASSERT(rc == 0 && val != 0); 313 lminor = (minor_t)val; 314 if (sdev_zvol_name2minor(dsname, &ominor) < 0 || 315 ominor != lminor) { 316 kmem_free(dsname, strlen(dsname) + 1); 317 return (SDEV_VTOR_STALE); 318 } 319 } 320 kmem_free(dsname, strlen(dsname) + 1); 321 return (SDEV_VTOR_VALID); 322 } 323 324 /* 325 * Taskq callback to update the devzvol_zclist. 326 * 327 * We need to defer this to the taskq to avoid it running with a user 328 * context that might be associated with some non-global zone, and thus 329 * not being able to list all of the pools on the entire system. 330 */ 331 /*ARGSUSED*/ 332 static void 333 devzvol_update_zclist_cb(void *arg) 334 { 335 zfs_cmd_t *zc; 336 int rc; 337 size_t size; 338 339 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 340 mutex_enter(&devzvol_mtx); 341 zc->zc_cookie = devzvol_gen; 342 343 rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size); 344 switch (rc) { 345 case 0: 346 /* new generation */ 347 ASSERT(devzvol_gen != zc->zc_cookie); 348 devzvol_gen = zc->zc_cookie; 349 if (devzvol_zclist) 350 kmem_free((void *)(uintptr_t)devzvol_zclist, 351 devzvol_zclist_size); 352 devzvol_zclist = zc->zc_nvlist_dst; 353 /* Keep the alloc'd size, not the nvlist size. */ 354 devzvol_zclist_size = size; 355 break; 356 default: 357 /* 358 * Either there was no change in pool configuration 359 * since we last asked (rc == EEXIST) or we got a 360 * catastrophic error. 361 * 362 * Give up memory and exit. 363 */ 364 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, 365 size); 366 break; 367 } 368 369 VERIFY(devzvol_zclist_task_running == B_TRUE); 370 devzvol_zclist_task_running = B_FALSE; 371 mutex_exit(&devzvol_mtx); 372 373 kmem_free(zc, sizeof (zfs_cmd_t)); 374 } 375 376 static void 377 devzvol_update_zclist(void) 378 { 379 mutex_enter(&devzvol_mtx); 380 if (devzvol_zclist_task_running == B_TRUE) { 381 mutex_exit(&devzvol_mtx); 382 goto wait; 383 } 384 385 devzvol_zclist_task_running = B_TRUE; 386 387 taskq_dispatch_ent(sdev_taskq, devzvol_update_zclist_cb, NULL, 0, 388 &devzvol_zclist_task); 389 390 mutex_exit(&devzvol_mtx); 391 392 wait: 393 taskq_wait(sdev_taskq); 394 } 395 396 /* 397 * Creates sub-directories for each zpool as needed in response to a 398 * readdir on one of the /dev/zvol/{dsk,rdsk} directories. 399 */ 400 void 401 devzvol_create_pool_dirs(struct vnode *dvp) 402 { 403 nvlist_t *nv = NULL; 404 nvpair_t *elem = NULL; 405 int pools = 0; 406 int rc; 407 408 sdcmn_err13(("devzvol_create_pool_dirs")); 409 410 devzvol_update_zclist(); 411 412 mutex_enter(&devzvol_mtx); 413 414 rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist, 415 devzvol_zclist_size, &nv, 0); 416 if (rc) { 417 ASSERT(rc == 0); 418 kmem_free((void *)(uintptr_t)devzvol_zclist, 419 devzvol_zclist_size); 420 devzvol_gen = 0; 421 devzvol_zclist = NULL; 422 devzvol_zclist_size = 0; 423 goto out; 424 } 425 mutex_exit(&devzvol_mtx); 426 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) { 427 struct vnode *vp; 428 ASSERT(dvp->v_count > 0); 429 rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0, 430 NULL, kcred, NULL, 0, NULL); 431 /* should either work, or not be visible from a zone */ 432 ASSERT(rc == 0 || rc == ENOENT); 433 if (rc == 0) 434 VN_RELE(vp); 435 pools++; 436 } 437 nvlist_free(nv); 438 mutex_enter(&devzvol_mtx); 439 if (devzvol_isopen && pools == 0) { 440 /* clean up so zfs can be unloaded */ 441 devzvol_close_zfs(); 442 devzvol_isopen = B_FALSE; 443 } 444 out: 445 mutex_exit(&devzvol_mtx); 446 } 447 448 /*ARGSUSED3*/ 449 static int 450 devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg, 451 cred_t *cred, void *whatever, char *whichever) 452 { 453 timestruc_t now; 454 struct vattr *vap = (struct vattr *)arg; 455 456 sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name, 457 ddv->sdev_path, nm)); 458 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, 459 strlen(ZVOL_DIR)) == 0); 460 *vap = *sdev_getdefault_attr(VDIR); 461 gethrestime(&now); 462 vap->va_atime = now; 463 vap->va_mtime = now; 464 vap->va_ctime = now; 465 return (0); 466 } 467 468 /*ARGSUSED3*/ 469 static int 470 devzvol_create_link(struct sdev_node *ddv, char *nm, 471 void **arg, cred_t *cred, void *whatever, char *whichever) 472 { 473 minor_t minor; 474 char *pathname = (char *)*arg; 475 int rc; 476 char *dsname; 477 char *x; 478 char str[MAXNAMELEN]; 479 sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name, 480 ddv->sdev_path, nm)); 481 dsname = devzvol_make_dsname(ddv->sdev_path, nm); 482 rc = sdev_zvol_create_minor(dsname); 483 if ((rc != 0 && rc != EEXIST && rc != EBUSY) || 484 sdev_zvol_name2minor(dsname, &minor)) { 485 sdcmn_err13(("devzvol_create_link %d", rc)); 486 kmem_free(dsname, strlen(dsname) + 1); 487 return (-1); 488 } 489 kmem_free(dsname, strlen(dsname) + 1); 490 491 /* 492 * This is a valid zvol; create a symlink that points to the 493 * minor which was created under /devices/pseudo/zfs@0 494 */ 495 *pathname = '\0'; 496 for (x = ddv->sdev_path; x = strchr(x, '/'); x++) 497 (void) strcat(pathname, "../"); 498 (void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor); 499 (void) strncat(pathname, str, MAXPATHLEN); 500 if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR, 501 strlen(ZVOL_FULL_RDEV_DIR)) == 0) 502 (void) strcat(pathname, ",raw"); 503 return (0); 504 } 505 506 /* Clean zvol sdev_nodes that are no longer valid. */ 507 static void 508 devzvol_prunedir(struct sdev_node *ddv) 509 { 510 struct sdev_node *dv; 511 512 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 513 514 sdcmn_err13(("prunedir '%s'", ddv->sdev_name)); 515 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0); 516 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 517 rw_exit(&ddv->sdev_contents); 518 rw_enter(&ddv->sdev_contents, RW_WRITER); 519 } 520 521 dv = SDEV_FIRST_ENTRY(ddv); 522 while (dv) { 523 sdcmn_err13(("sdev_name '%s'", dv->sdev_name)); 524 525 switch (devzvol_validate(dv)) { 526 case SDEV_VTOR_VALID: 527 case SDEV_VTOR_SKIP: 528 dv = SDEV_NEXT_ENTRY(ddv, dv); 529 continue; 530 case SDEV_VTOR_INVALID: 531 sdcmn_err7(("prunedir: destroy invalid " 532 "node: %s\n", dv->sdev_name)); 533 break; 534 } 535 536 if ((SDEVTOV(dv)->v_type == VDIR) && 537 (sdev_cleandir(dv, NULL, 0) != 0)) { 538 dv = SDEV_NEXT_ENTRY(ddv, dv); 539 continue; 540 } 541 SDEV_HOLD(dv); 542 /* remove the cache node */ 543 sdev_cache_update(ddv, &dv, dv->sdev_name, 544 SDEV_CACHE_DELETE); 545 SDEV_RELE(dv); 546 dv = SDEV_FIRST_ENTRY(ddv); 547 } 548 rw_downgrade(&ddv->sdev_contents); 549 } 550 551 /* 552 * This function is used to create a dir or dev inside a zone's /dev when the 553 * zone has a zvol that is dynamically created within the zone (i.e. inside 554 * of a delegated dataset. Since there is no /devices tree within a zone, 555 * we create the chr/blk devices directly inside the zone's /dev instead of 556 * making symlinks. 557 */ 558 static int 559 devzvol_mk_ngz_node(struct sdev_node *parent, char *nm) 560 { 561 struct vattr vattr; 562 timestruc_t now; 563 enum vtype expected_type = VDIR; 564 dmu_objset_type_t do_type; 565 struct sdev_node *dv = NULL; 566 int res; 567 char *dsname; 568 569 bzero(&vattr, sizeof (vattr)); 570 gethrestime(&now); 571 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 572 vattr.va_uid = SDEV_UID_DEFAULT; 573 vattr.va_gid = SDEV_GID_DEFAULT; 574 vattr.va_type = VNON; 575 vattr.va_atime = now; 576 vattr.va_mtime = now; 577 vattr.va_ctime = now; 578 579 if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL) 580 return (ENOENT); 581 582 if (devzvol_objset_check(dsname, &do_type) != 0) { 583 kmem_free(dsname, strlen(dsname) + 1); 584 return (ENOENT); 585 } 586 if (do_type == DMU_OST_ZVOL) 587 expected_type = VBLK; 588 589 if (expected_type == VDIR) { 590 vattr.va_type = VDIR; 591 vattr.va_mode = SDEV_DIRMODE_DEFAULT; 592 } else { 593 minor_t minor; 594 dev_t devnum; 595 int rc; 596 597 rc = sdev_zvol_create_minor(dsname); 598 if ((rc != 0 && rc != EEXIST && rc != EBUSY) || 599 sdev_zvol_name2minor(dsname, &minor)) { 600 kmem_free(dsname, strlen(dsname) + 1); 601 return (ENOENT); 602 } 603 604 devnum = makedevice(devzvol_major, minor); 605 vattr.va_rdev = devnum; 606 607 if (strstr(parent->sdev_path, "/rdsk/") != NULL) 608 vattr.va_type = VCHR; 609 else 610 vattr.va_type = VBLK; 611 vattr.va_mode = SDEV_DEVMODE_DEFAULT; 612 } 613 kmem_free(dsname, strlen(dsname) + 1); 614 615 rw_enter(&parent->sdev_contents, RW_WRITER); 616 617 res = sdev_mknode(parent, nm, &dv, &vattr, 618 NULL, NULL, kcred, SDEV_READY); 619 rw_exit(&parent->sdev_contents); 620 if (res != 0) 621 return (ENOENT); 622 623 SDEV_RELE(dv); 624 return (0); 625 } 626 627 /*ARGSUSED*/ 628 static int 629 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp, 630 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred, 631 caller_context_t *ct, int *direntflags, pathname_t *realpnp) 632 { 633 enum vtype expected_type = VDIR; 634 struct sdev_node *parent = VTOSDEV(dvp); 635 char *dsname; 636 dmu_objset_type_t do_type; 637 int error; 638 639 sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm)); 640 *vpp = NULL; 641 /* execute access is required to search the directory */ 642 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0) 643 return (error); 644 645 rw_enter(&parent->sdev_contents, RW_READER); 646 if (SDEV_IS_GLOBAL(parent)) { 647 /* 648 * During iter_datasets, don't create GZ dev when running in 649 * NGZ. We can't return ENOENT here since that could 650 * incorrectly trigger the creation of the dev from the 651 * recursive call through prof_filldir during iter_datasets. 652 */ 653 if (getzoneid() != GLOBAL_ZONEID) { 654 rw_exit(&parent->sdev_contents); 655 return (EPERM); 656 } 657 } else { 658 int res; 659 660 rw_exit(&parent->sdev_contents); 661 662 /* 663 * If we're in the global zone and reach down into a non-global 664 * zone's /dev/zvol then this action could trigger the creation 665 * of all of the zvol devices for every zone into the non-global 666 * zone's /dev tree. This could be a big security hole. To 667 * prevent this, disallow the global zone from looking inside 668 * a non-global zones /dev/zvol. This behavior is similar to 669 * delegated datasets, which cannot be used by the global zone. 670 */ 671 if (getzoneid() == GLOBAL_ZONEID) 672 return (EPERM); 673 674 res = prof_lookup(dvp, nm, vpp, cred); 675 676 /* 677 * We won't find a zvol that was dynamically created inside 678 * a NGZ, within a delegated dataset, in the zone's dev profile 679 * but prof_lookup will also find it via sdev_cache_lookup. 680 */ 681 if (res == ENOENT) { 682 /* 683 * We have to create the sdev node for the dymamically 684 * created zvol. 685 */ 686 if (devzvol_mk_ngz_node(parent, nm) != 0) 687 return (ENOENT); 688 res = prof_lookup(dvp, nm, vpp, cred); 689 } 690 691 return (res); 692 } 693 694 dsname = devzvol_make_dsname(parent->sdev_path, nm); 695 rw_exit(&parent->sdev_contents); 696 sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)")); 697 if (dsname) { 698 error = devzvol_objset_check(dsname, &do_type); 699 if (error != 0) { 700 error = ENOENT; 701 goto out; 702 } 703 if (do_type == DMU_OST_ZVOL) 704 expected_type = VLNK; 705 } 706 /* 707 * the callbacks expect: 708 * 709 * parent->sdev_path nm 710 * /dev/zvol {r}dsk 711 * /dev/zvol/{r}dsk <pool name> 712 * /dev/zvol/{r}dsk/<dataset name> <last ds component> 713 * 714 * sdev_name is always last path component of sdev_path 715 */ 716 if (expected_type == VDIR) { 717 error = devname_lookup_func(parent, nm, vpp, cred, 718 devzvol_create_dir, SDEV_VATTR); 719 } else { 720 error = devname_lookup_func(parent, nm, vpp, cred, 721 devzvol_create_link, SDEV_VLINK); 722 } 723 sdcmn_err13(("devzvol_lookup %d %d", expected_type, error)); 724 ASSERT(error || ((*vpp)->v_type == expected_type)); 725 out: 726 if (dsname) 727 kmem_free(dsname, strlen(dsname) + 1); 728 sdcmn_err13(("devzvol_lookup %d", error)); 729 return (error); 730 } 731 732 /* 733 * We allow create to find existing nodes 734 * - if the node doesn't exist - EROFS 735 * - creating an existing dir read-only succeeds, otherwise EISDIR 736 * - exclusive creates fail - EEXIST 737 */ 738 /*ARGSUSED2*/ 739 static int 740 devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl, 741 int mode, struct vnode **vpp, struct cred *cred, int flag, 742 caller_context_t *ct, vsecattr_t *vsecp) 743 { 744 int error; 745 struct vnode *vp; 746 747 *vpp = NULL; 748 749 error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL, 750 NULL); 751 if (error == 0) { 752 if (excl == EXCL) 753 error = EEXIST; 754 else if (vp->v_type == VDIR && (mode & VWRITE)) 755 error = EISDIR; 756 else 757 error = VOP_ACCESS(vp, mode, 0, cred, ct); 758 759 if (error) { 760 VN_RELE(vp); 761 } else 762 *vpp = vp; 763 } else if (error == ENOENT) { 764 error = EROFS; 765 } 766 767 return (error); 768 } 769 770 void sdev_iter_snapshots(struct vnode *dvp, char *name); 771 772 void 773 sdev_iter_datasets(struct vnode *dvp, int arg, char *name) 774 { 775 zfs_cmd_t *zc; 776 int rc; 777 778 sdcmn_err13(("iter name is '%s' (arg %x)", name, arg)); 779 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); 780 (void) strcpy(zc->zc_name, name); 781 782 while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) { 783 struct vnode *vpp; 784 char *ptr; 785 786 sdcmn_err13((" name %s", zc->zc_name)); 787 if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%')) 788 goto skip; 789 ptr = strrchr(zc->zc_name, '/') + 1; 790 rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL, 791 kcred, NULL, NULL, NULL); 792 if (rc == 0) { 793 VN_RELE(vpp); 794 } else if (rc == ENOENT) { 795 goto skip; 796 } else { 797 /* 798 * EBUSY == problem with zvols's dmu holds? 799 * EPERM when in a NGZ and traversing up and out. 800 */ 801 goto skip; 802 } 803 if (arg == ZFS_IOC_DATASET_LIST_NEXT && 804 zc->zc_objset_stats.dds_type != DMU_OST_ZFS) 805 sdev_iter_snapshots(dvp, zc->zc_name); 806 skip: 807 (void) strcpy(zc->zc_name, name); 808 } 809 kmem_free(zc, sizeof (zfs_cmd_t)); 810 } 811 812 void 813 sdev_iter_snapshots(struct vnode *dvp, char *name) 814 { 815 sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name); 816 } 817 818 /*ARGSUSED4*/ 819 static int 820 devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred, 821 int *eofp, caller_context_t *ct_unused, int flags_unused) 822 { 823 struct sdev_node *sdvp = VTOSDEV(dvp); 824 char *ptr; 825 826 sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path, 827 sdvp->sdev_name)); 828 829 if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) { 830 struct vnode *vp; 831 832 rw_exit(&sdvp->sdev_contents); 833 (void) devname_lookup_func(sdvp, "dsk", &vp, cred, 834 devzvol_create_dir, SDEV_VATTR); 835 VN_RELE(vp); 836 (void) devname_lookup_func(sdvp, "rdsk", &vp, cred, 837 devzvol_create_dir, SDEV_VATTR); 838 VN_RELE(vp); 839 rw_enter(&sdvp->sdev_contents, RW_READER); 840 return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); 841 } 842 if (uiop->uio_offset == 0) 843 devzvol_prunedir(sdvp); 844 ptr = sdvp->sdev_path + strlen(ZVOL_DIR); 845 if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) { 846 rw_exit(&sdvp->sdev_contents); 847 devzvol_create_pool_dirs(dvp); 848 rw_enter(&sdvp->sdev_contents, RW_READER); 849 return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); 850 } 851 852 ptr = strchr(ptr + 1, '/'); 853 if (ptr == NULL) 854 return (ENOENT); 855 ptr++; 856 rw_exit(&sdvp->sdev_contents); 857 sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr); 858 rw_enter(&sdvp->sdev_contents, RW_READER); 859 return (devname_readdir_func(dvp, uiop, cred, eofp, 0)); 860 } 861 862 const fs_operation_def_t devzvol_vnodeops_tbl[] = { 863 VOPNAME_READDIR, { .vop_readdir = devzvol_readdir }, 864 VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup }, 865 VOPNAME_CREATE, { .vop_create = devzvol_create }, 866 VOPNAME_RENAME, { .error = fs_nosys }, 867 VOPNAME_MKDIR, { .error = fs_nosys }, 868 VOPNAME_RMDIR, { .error = fs_nosys }, 869 VOPNAME_REMOVE, { .error = fs_nosys }, 870 VOPNAME_SYMLINK, { .error = fs_nosys }, 871 NULL, NULL 872 }; 873