1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved. 24 * Copyright (c) 2017 by Delphix. All rights reserved. 25 */ 26 27 /* 28 * utility routines for the /dev fs 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/t_lock.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/user.h> 37 #include <sys/time.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/fcntl.h> 42 #include <sys/flock.h> 43 #include <sys/kmem.h> 44 #include <sys/uio.h> 45 #include <sys/errno.h> 46 #include <sys/stat.h> 47 #include <sys/cred.h> 48 #include <sys/dirent.h> 49 #include <sys/pathname.h> 50 #include <sys/cmn_err.h> 51 #include <sys/debug.h> 52 #include <sys/mode.h> 53 #include <sys/policy.h> 54 #include <fs/fs_subr.h> 55 #include <sys/mount.h> 56 #include <sys/fs/snode.h> 57 #include <sys/fs/dv_node.h> 58 #include <sys/fs/sdev_impl.h> 59 #include <sys/sunndi.h> 60 #include <sys/sunmdi.h> 61 #include <sys/conf.h> 62 #include <sys/proc.h> 63 #include <sys/user.h> 64 #include <sys/modctl.h> 65 66 #ifdef DEBUG 67 int sdev_debug = 0x00000001; 68 int sdev_debug_cache_flags = 0; 69 #endif 70 71 /* 72 * globals 73 */ 74 /* prototype memory vattrs */ 75 vattr_t sdev_vattr_dir = { 76 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 77 VDIR, /* va_type */ 78 SDEV_DIRMODE_DEFAULT, /* va_mode */ 79 SDEV_UID_DEFAULT, /* va_uid */ 80 SDEV_GID_DEFAULT, /* va_gid */ 81 0, /* va_fsid */ 82 0, /* va_nodeid */ 83 0, /* va_nlink */ 84 0, /* va_size */ 85 0, /* va_atime */ 86 0, /* va_mtime */ 87 0, /* va_ctime */ 88 0, /* va_rdev */ 89 0, /* va_blksize */ 90 0, /* va_nblocks */ 91 0 /* va_vcode */ 92 }; 93 94 vattr_t sdev_vattr_lnk = { 95 AT_TYPE|AT_MODE, /* va_mask */ 96 VLNK, /* va_type */ 97 SDEV_LNKMODE_DEFAULT, /* va_mode */ 98 SDEV_UID_DEFAULT, /* va_uid */ 99 SDEV_GID_DEFAULT, /* va_gid */ 100 0, /* va_fsid */ 101 0, /* va_nodeid */ 102 0, /* va_nlink */ 103 0, /* va_size */ 104 0, /* va_atime */ 105 0, /* va_mtime */ 106 0, /* va_ctime */ 107 0, /* va_rdev */ 108 0, /* va_blksize */ 109 0, /* va_nblocks */ 110 0 /* va_vcode */ 111 }; 112 113 vattr_t sdev_vattr_blk = { 114 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 115 VBLK, /* va_type */ 116 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 117 SDEV_UID_DEFAULT, /* va_uid */ 118 SDEV_GID_DEFAULT, /* va_gid */ 119 0, /* va_fsid */ 120 0, /* va_nodeid */ 121 0, /* va_nlink */ 122 0, /* va_size */ 123 0, /* va_atime */ 124 0, /* va_mtime */ 125 0, /* va_ctime */ 126 0, /* va_rdev */ 127 0, /* va_blksize */ 128 0, /* va_nblocks */ 129 0 /* va_vcode */ 130 }; 131 132 vattr_t sdev_vattr_chr = { 133 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 134 VCHR, /* va_type */ 135 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 136 SDEV_UID_DEFAULT, /* va_uid */ 137 SDEV_GID_DEFAULT, /* va_gid */ 138 0, /* va_fsid */ 139 0, /* va_nodeid */ 140 0, /* va_nlink */ 141 0, /* va_size */ 142 0, /* va_atime */ 143 0, /* va_mtime */ 144 0, /* va_ctime */ 145 0, /* va_rdev */ 146 0, /* va_blksize */ 147 0, /* va_nblocks */ 148 0 /* va_vcode */ 149 }; 150 151 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 152 int devtype; /* fstype */ 153 154 static void 155 sdev_prof_free(struct sdev_node *dv) 156 { 157 ASSERT(!SDEV_IS_GLOBAL(dv)); 158 nvlist_free(dv->sdev_prof.dev_name); 159 nvlist_free(dv->sdev_prof.dev_map); 160 nvlist_free(dv->sdev_prof.dev_symlink); 161 nvlist_free(dv->sdev_prof.dev_glob_incdir); 162 nvlist_free(dv->sdev_prof.dev_glob_excdir); 163 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 164 } 165 166 /* sdev_node cache constructor */ 167 /*ARGSUSED1*/ 168 static int 169 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 170 { 171 struct sdev_node *dv = (struct sdev_node *)buf; 172 struct vnode *vp; 173 174 bzero(buf, sizeof (struct sdev_node)); 175 vp = dv->sdev_vnode = vn_alloc(flag); 176 if (vp == NULL) { 177 return (-1); 178 } 179 vp->v_data = dv; 180 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 181 return (0); 182 } 183 184 /* sdev_node cache destructor */ 185 /*ARGSUSED1*/ 186 static void 187 i_sdev_node_dtor(void *buf, void *arg) 188 { 189 struct sdev_node *dv = (struct sdev_node *)buf; 190 struct vnode *vp = SDEVTOV(dv); 191 192 rw_destroy(&dv->sdev_contents); 193 vn_free(vp); 194 } 195 196 /* initialize sdev_node cache */ 197 void 198 sdev_node_cache_init() 199 { 200 int flags = 0; 201 202 #ifdef DEBUG 203 flags = sdev_debug_cache_flags; 204 if (flags) 205 sdcmn_err(("cache debug flags 0x%x\n", flags)); 206 #endif /* DEBUG */ 207 208 ASSERT(sdev_node_cache == NULL); 209 sdev_node_cache = kmem_cache_create("sdev_node_cache", 210 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 211 NULL, NULL, NULL, flags); 212 } 213 214 /* destroy sdev_node cache */ 215 void 216 sdev_node_cache_fini() 217 { 218 ASSERT(sdev_node_cache != NULL); 219 kmem_cache_destroy(sdev_node_cache); 220 sdev_node_cache = NULL; 221 } 222 223 /* 224 * Compare two nodes lexographically to balance avl tree 225 */ 226 static int 227 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 228 { 229 int rv; 230 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 231 return (0); 232 return ((rv < 0) ? -1 : 1); 233 } 234 235 void 236 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 237 { 238 ASSERT(dv); 239 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 240 dv->sdev_state = state; 241 } 242 243 static void 244 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 245 { 246 timestruc_t now; 247 struct vattr *attrp; 248 uint_t mask; 249 250 ASSERT(dv->sdev_attr); 251 ASSERT(vap); 252 253 attrp = dv->sdev_attr; 254 mask = vap->va_mask; 255 if (mask & AT_TYPE) 256 attrp->va_type = vap->va_type; 257 if (mask & AT_MODE) 258 attrp->va_mode = vap->va_mode; 259 if (mask & AT_UID) 260 attrp->va_uid = vap->va_uid; 261 if (mask & AT_GID) 262 attrp->va_gid = vap->va_gid; 263 if (mask & AT_RDEV) 264 attrp->va_rdev = vap->va_rdev; 265 266 gethrestime(&now); 267 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 268 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 269 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 270 } 271 272 static void 273 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 274 { 275 ASSERT(dv->sdev_attr == NULL); 276 ASSERT(vap->va_mask & AT_TYPE); 277 ASSERT(vap->va_mask & AT_MODE); 278 279 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 280 sdev_attr_update(dv, vap); 281 } 282 283 /* alloc and initialize a sdev_node */ 284 int 285 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 286 vattr_t *vap) 287 { 288 struct sdev_node *dv = NULL; 289 struct vnode *vp; 290 size_t nmlen, len; 291 devname_handle_t *dhl; 292 293 nmlen = strlen(nm) + 1; 294 if (nmlen > MAXNAMELEN) { 295 sdcmn_err9(("sdev_nodeinit: node name %s" 296 " too long\n", nm)); 297 *newdv = NULL; 298 return (ENAMETOOLONG); 299 } 300 301 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 302 303 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 304 bcopy(nm, dv->sdev_name, nmlen); 305 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 306 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 307 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 308 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 309 /* overwritten for VLNK nodes */ 310 dv->sdev_symlink = NULL; 311 list_link_init(&dv->sdev_plist); 312 313 vp = SDEVTOV(dv); 314 vn_reinit(vp); 315 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 316 if (vap) 317 vp->v_type = vap->va_type; 318 319 /* 320 * initialized to the parent's vnodeops. 321 * maybe overwriten for a VDIR 322 */ 323 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 324 vn_exists(vp); 325 326 dv->sdev_dotdot = NULL; 327 dv->sdev_attrvp = NULL; 328 if (vap) { 329 sdev_attr_alloc(dv, vap); 330 } else { 331 dv->sdev_attr = NULL; 332 } 333 334 dv->sdev_ino = sdev_mkino(dv); 335 dv->sdev_nlink = 0; /* updated on insert */ 336 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 337 dv->sdev_flags |= SDEV_BUILD; 338 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 339 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 340 if (SDEV_IS_GLOBAL(ddv)) { 341 dv->sdev_flags |= SDEV_GLOBAL; 342 dhl = &(dv->sdev_handle); 343 dhl->dh_data = dv; 344 dhl->dh_args = NULL; 345 sdev_set_no_negcache(dv); 346 dv->sdev_gdir_gen = 0; 347 } else { 348 dv->sdev_flags &= ~SDEV_GLOBAL; 349 dv->sdev_origin = NULL; /* set later */ 350 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 351 dv->sdev_ldir_gen = 0; 352 dv->sdev_devtree_gen = 0; 353 } 354 355 rw_enter(&dv->sdev_contents, RW_WRITER); 356 sdev_set_nodestate(dv, SDEV_INIT); 357 rw_exit(&dv->sdev_contents); 358 *newdv = dv; 359 360 return (0); 361 } 362 363 /* 364 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the 365 * caller to transition the node to the SDEV_ZOMBIE state. 366 */ 367 int 368 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 369 void *args, struct cred *cred) 370 { 371 int error = 0; 372 struct vnode *vp = SDEVTOV(dv); 373 vtype_t type; 374 375 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 376 377 type = vap->va_type; 378 vp->v_type = type; 379 vp->v_rdev = vap->va_rdev; 380 rw_enter(&dv->sdev_contents, RW_WRITER); 381 if (type == VDIR) { 382 dv->sdev_nlink = 2; 383 dv->sdev_flags &= ~SDEV_PERSIST; 384 dv->sdev_flags &= ~SDEV_DYNAMIC; 385 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 386 ASSERT(dv->sdev_dotdot); 387 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 388 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 389 avl_create(&dv->sdev_entries, 390 (int (*)(const void *, const void *))sdev_compare_nodes, 391 sizeof (struct sdev_node), 392 offsetof(struct sdev_node, sdev_avllink)); 393 } else if (type == VLNK) { 394 ASSERT(args); 395 dv->sdev_nlink = 1; 396 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 397 } else { 398 dv->sdev_nlink = 1; 399 } 400 sdev_plugin_nodeready(dv); 401 402 if (!(SDEV_IS_GLOBAL(dv))) { 403 dv->sdev_origin = (struct sdev_node *)args; 404 dv->sdev_flags &= ~SDEV_PERSIST; 405 } 406 407 /* 408 * shadow node is created here OR 409 * if failed (indicated by dv->sdev_attrvp == NULL), 410 * created later in sdev_setattr 411 */ 412 if (avp) { 413 dv->sdev_attrvp = avp; 414 } else { 415 if (dv->sdev_attr == NULL) { 416 sdev_attr_alloc(dv, vap); 417 } else { 418 sdev_attr_update(dv, vap); 419 } 420 421 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 422 error = sdev_shadow_node(dv, cred); 423 } 424 425 if (error == 0) { 426 /* transition to READY state */ 427 sdev_set_nodestate(dv, SDEV_READY); 428 sdev_nc_node_exists(dv); 429 } 430 rw_exit(&dv->sdev_contents); 431 return (error); 432 } 433 434 /* 435 * Build the VROOT sdev_node. 436 */ 437 /*ARGSUSED*/ 438 struct sdev_node * 439 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 440 struct vnode *avp, struct cred *cred) 441 { 442 struct sdev_node *dv; 443 struct vnode *vp; 444 char devdir[] = "/dev"; 445 446 ASSERT(sdev_node_cache != NULL); 447 ASSERT(avp); 448 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 449 vp = SDEVTOV(dv); 450 vn_reinit(vp); 451 vp->v_flag |= VROOT; 452 vp->v_vfsp = vfsp; 453 vp->v_type = VDIR; 454 vp->v_rdev = devdev; 455 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 456 vn_exists(vp); 457 458 if (vfsp->vfs_mntpt) 459 dv->sdev_name = i_ddi_strdup( 460 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 461 else 462 /* vfs_mountdev1 set mount point later */ 463 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 464 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 465 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 466 dv->sdev_ino = SDEV_ROOTINO; 467 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 468 dv->sdev_dotdot = dv; /* .. == self */ 469 dv->sdev_attrvp = avp; 470 dv->sdev_attr = NULL; 471 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 472 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 473 if (strcmp(dv->sdev_name, "/dev") == 0) { 474 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 475 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 476 dv->sdev_gdir_gen = 0; 477 } else { 478 dv->sdev_flags = SDEV_BUILD; 479 dv->sdev_flags &= ~SDEV_PERSIST; 480 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 481 dv->sdev_ldir_gen = 0; 482 dv->sdev_devtree_gen = 0; 483 } 484 485 avl_create(&dv->sdev_entries, 486 (int (*)(const void *, const void *))sdev_compare_nodes, 487 sizeof (struct sdev_node), 488 offsetof(struct sdev_node, sdev_avllink)); 489 490 rw_enter(&dv->sdev_contents, RW_WRITER); 491 sdev_set_nodestate(dv, SDEV_READY); 492 rw_exit(&dv->sdev_contents); 493 sdev_nc_node_exists(dv); 494 return (dv); 495 } 496 497 struct sdev_vop_table vtab[] = { 498 { "pts", devpts_vnodeops_tbl, &devpts_vnodeops, devpts_validate, 499 SDEV_DYNAMIC | SDEV_VTOR }, 500 501 { "vt", devvt_vnodeops_tbl, &devvt_vnodeops, devvt_validate, 502 SDEV_DYNAMIC | SDEV_VTOR }, 503 504 { "zvol", devzvol_vnodeops_tbl, &devzvol_vnodeops, 505 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 506 507 { "zcons", NULL, NULL, NULL, SDEV_NO_NCACHE }, 508 509 { "net", devnet_vnodeops_tbl, &devnet_vnodeops, devnet_validate, 510 SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 511 512 { "ipnet", devipnet_vnodeops_tbl, &devipnet_vnodeops, 513 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 514 515 /* 516 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the 517 * lofi driver controls child nodes. 518 * 519 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted 520 * stale nodes (e.g. from devfsadm -R). 521 * 522 * In addition, devfsadm knows not to attempt a rmdir: a zone 523 * may hold a reference, which would zombify the node, 524 * preventing a mkdir. 525 */ 526 527 { "lofi", NULL, NULL, NULL, 528 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 529 { "rlofi", NULL, NULL, NULL, 530 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 531 532 { NULL, NULL, NULL, NULL, 0} 533 }; 534 535 536 /* 537 * Build the base root inode 538 */ 539 ino_t 540 sdev_mkino(struct sdev_node *dv) 541 { 542 ino_t ino; 543 544 /* 545 * for now, follow the lead of tmpfs here 546 * need to someday understand the requirements here 547 */ 548 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 549 ino += SDEV_ROOTINO + 1; 550 551 return (ino); 552 } 553 554 int 555 sdev_getlink(struct vnode *linkvp, char **link) 556 { 557 int err; 558 char *buf; 559 struct uio uio = {0}; 560 struct iovec iov = {0}; 561 562 if (linkvp == NULL) 563 return (ENOENT); 564 ASSERT(linkvp->v_type == VLNK); 565 566 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 567 iov.iov_base = buf; 568 iov.iov_len = MAXPATHLEN; 569 uio.uio_iov = &iov; 570 uio.uio_iovcnt = 1; 571 uio.uio_resid = MAXPATHLEN; 572 uio.uio_segflg = UIO_SYSSPACE; 573 uio.uio_llimit = MAXOFFSET_T; 574 575 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 576 if (err) { 577 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 578 kmem_free(buf, MAXPATHLEN); 579 return (ENOENT); 580 } 581 582 /* mission complete */ 583 *link = i_ddi_strdup(buf, KM_SLEEP); 584 kmem_free(buf, MAXPATHLEN); 585 return (0); 586 } 587 588 /* 589 * A convenient wrapper to get the devfs node vnode for a device 590 * minor functionality: readlink() of a /dev symlink 591 * Place the link into dv->sdev_symlink 592 */ 593 static int 594 sdev_follow_link(struct sdev_node *dv) 595 { 596 int err; 597 struct vnode *linkvp; 598 char *link = NULL; 599 600 linkvp = SDEVTOV(dv); 601 if (linkvp == NULL) 602 return (ENOENT); 603 ASSERT(linkvp->v_type == VLNK); 604 err = sdev_getlink(linkvp, &link); 605 if (err) { 606 dv->sdev_symlink = NULL; 607 return (ENOENT); 608 } 609 610 ASSERT(link != NULL); 611 dv->sdev_symlink = link; 612 return (0); 613 } 614 615 static int 616 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 617 { 618 vtype_t otype = SDEVTOV(dv)->v_type; 619 620 /* 621 * existing sdev_node has a different type. 622 */ 623 if (otype != nvap->va_type) { 624 sdcmn_err9(("sdev_node_check: existing node " 625 " %s type %d does not match new node type %d\n", 626 dv->sdev_name, otype, nvap->va_type)); 627 return (EEXIST); 628 } 629 630 /* 631 * For a symlink, the target should be the same. 632 */ 633 if (otype == VLNK) { 634 ASSERT(nargs != NULL); 635 ASSERT(dv->sdev_symlink != NULL); 636 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 637 sdcmn_err9(("sdev_node_check: existing node " 638 " %s has different symlink %s as new node " 639 " %s\n", dv->sdev_name, dv->sdev_symlink, 640 (char *)nargs)); 641 return (EEXIST); 642 } 643 } 644 645 return (0); 646 } 647 648 /* 649 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 650 * 651 * arguments: 652 * - ddv (parent) 653 * - nm (child name) 654 * - newdv (sdev_node for nm is returned here) 655 * - vap (vattr for the node to be created, va_type should be set. 656 * - avp (attribute vnode) 657 * the defaults should be used if unknown) 658 * - cred 659 * - args 660 * . tnm (for VLNK) 661 * . global sdev_node (for !SDEV_GLOBAL) 662 * - state: SDEV_INIT, SDEV_READY 663 * 664 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 665 * 666 * NOTE: directory contents writers lock needs to be held before 667 * calling this routine. 668 */ 669 int 670 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 671 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 672 sdev_node_state_t state) 673 { 674 int error = 0; 675 sdev_node_state_t node_state; 676 struct sdev_node *dv = NULL; 677 678 ASSERT(state != SDEV_ZOMBIE); 679 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 680 681 if (*newdv) { 682 dv = *newdv; 683 } else { 684 /* allocate and initialize a sdev_node */ 685 if (ddv->sdev_state == SDEV_ZOMBIE) { 686 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 687 ddv->sdev_path)); 688 return (ENOENT); 689 } 690 691 error = sdev_nodeinit(ddv, nm, &dv, vap); 692 if (error != 0) { 693 sdcmn_err9(("sdev_mknode: error %d," 694 " name %s can not be initialized\n", 695 error, nm)); 696 return (error); 697 } 698 ASSERT(dv); 699 700 /* insert into the directory cache */ 701 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 702 } 703 704 ASSERT(dv); 705 node_state = dv->sdev_state; 706 ASSERT(node_state != SDEV_ZOMBIE); 707 708 if (state == SDEV_READY) { 709 switch (node_state) { 710 case SDEV_INIT: 711 error = sdev_nodeready(dv, vap, avp, args, cred); 712 if (error) { 713 sdcmn_err9(("sdev_mknode: node %s can NOT" 714 " be transitioned into READY state, " 715 "error %d\n", nm, error)); 716 } 717 break; 718 case SDEV_READY: 719 /* 720 * Do some sanity checking to make sure 721 * the existing sdev_node is what has been 722 * asked for. 723 */ 724 error = sdev_node_check(dv, vap, args); 725 break; 726 default: 727 break; 728 } 729 } 730 731 if (!error) { 732 *newdv = dv; 733 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 734 } else { 735 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 736 /* 737 * We created this node, it wasn't passed into us. Therefore it 738 * is up to us to delete it. 739 */ 740 if (*newdv == NULL) 741 SDEV_SIMPLE_RELE(dv); 742 *newdv = NULL; 743 } 744 745 return (error); 746 } 747 748 /* 749 * convenient wrapper to change vp's ATIME, CTIME and MTIME 750 */ 751 void 752 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 753 { 754 struct vattr attr; 755 timestruc_t now; 756 int err; 757 758 ASSERT(vp); 759 gethrestime(&now); 760 if (mask & AT_CTIME) 761 attr.va_ctime = now; 762 if (mask & AT_MTIME) 763 attr.va_mtime = now; 764 if (mask & AT_ATIME) 765 attr.va_atime = now; 766 767 attr.va_mask = (mask & AT_TIMES); 768 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 769 if (err && (err != EROFS)) { 770 sdcmn_err(("update timestamps error %d\n", err)); 771 } 772 } 773 774 /* 775 * the backing store vnode is released here 776 */ 777 /*ARGSUSED1*/ 778 void 779 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 780 { 781 /* no references */ 782 ASSERT(dv->sdev_nlink == 0); 783 784 if (dv->sdev_attrvp != NULLVP) { 785 VN_RELE(dv->sdev_attrvp); 786 /* 787 * reset the attrvp so that no more 788 * references can be made on this already 789 * vn_rele() vnode 790 */ 791 dv->sdev_attrvp = NULLVP; 792 } 793 794 if (dv->sdev_attr != NULL) { 795 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 796 dv->sdev_attr = NULL; 797 } 798 799 if (dv->sdev_name != NULL) { 800 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 801 dv->sdev_name = NULL; 802 } 803 804 if (dv->sdev_symlink != NULL) { 805 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 806 dv->sdev_symlink = NULL; 807 } 808 809 if (dv->sdev_path) { 810 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 811 dv->sdev_path = NULL; 812 } 813 814 if (!SDEV_IS_GLOBAL(dv)) { 815 sdev_prof_free(dv); 816 if (dv->sdev_vnode->v_type != VLNK && dv->sdev_origin != NULL) 817 SDEV_RELE(dv->sdev_origin); 818 } 819 820 if (SDEVTOV(dv)->v_type == VDIR) { 821 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 822 avl_destroy(&dv->sdev_entries); 823 } 824 825 mutex_destroy(&dv->sdev_lookup_lock); 826 cv_destroy(&dv->sdev_lookup_cv); 827 828 /* return node to initial state as per constructor */ 829 (void) memset((void *)&dv->sdev_instance_data, 0, 830 sizeof (dv->sdev_instance_data)); 831 vn_invalid(SDEVTOV(dv)); 832 dv->sdev_private = NULL; 833 kmem_cache_free(sdev_node_cache, dv); 834 } 835 836 /* 837 * DIRECTORY CACHE lookup 838 */ 839 struct sdev_node * 840 sdev_findbyname(struct sdev_node *ddv, char *nm) 841 { 842 struct sdev_node *dv; 843 struct sdev_node dvtmp; 844 avl_index_t where; 845 846 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 847 848 dvtmp.sdev_name = nm; 849 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 850 if (dv) { 851 ASSERT(dv->sdev_dotdot == ddv); 852 ASSERT(strcmp(dv->sdev_name, nm) == 0); 853 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 854 SDEV_HOLD(dv); 855 return (dv); 856 } 857 return (NULL); 858 } 859 860 /* 861 * Inserts a new sdev_node in a parent directory 862 */ 863 void 864 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 865 { 866 avl_index_t where; 867 868 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 869 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 870 ASSERT(ddv->sdev_nlink >= 2); 871 ASSERT(dv->sdev_nlink == 0); 872 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 873 874 dv->sdev_dotdot = ddv; 875 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 876 avl_insert(&ddv->sdev_entries, dv, where); 877 ddv->sdev_nlink++; 878 } 879 880 /* 881 * The following check is needed because while sdev_nodes are linked 882 * in SDEV_INIT state, they have their link counts incremented only 883 * in SDEV_READY state. 884 */ 885 static void 886 decr_link(struct sdev_node *dv) 887 { 888 VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); 889 if (dv->sdev_state != SDEV_INIT) { 890 VERIFY(dv->sdev_nlink >= 1); 891 dv->sdev_nlink--; 892 } else { 893 VERIFY(dv->sdev_nlink == 0); 894 } 895 } 896 897 /* 898 * Delete an existing dv from directory cache 899 * 900 * In the case of a node is still held by non-zero reference count, the node is 901 * put into ZOMBIE state. The node is always unlinked from its parent, but it is 902 * not destroyed via sdev_inactive until its reference count reaches "0". 903 */ 904 static void 905 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 906 { 907 struct vnode *vp; 908 sdev_node_state_t os; 909 910 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 911 912 vp = SDEVTOV(dv); 913 mutex_enter(&vp->v_lock); 914 rw_enter(&dv->sdev_contents, RW_WRITER); 915 os = dv->sdev_state; 916 ASSERT(os != SDEV_ZOMBIE); 917 dv->sdev_state = SDEV_ZOMBIE; 918 919 /* 920 * unlink ourselves from the parent directory now to take care of the .. 921 * link. However, if we're a directory, we don't remove our reference to 922 * ourself eg. '.' until we are torn down in the inactive callback. 923 */ 924 decr_link(ddv); 925 avl_remove(&ddv->sdev_entries, dv); 926 /* 927 * sdev_inactive expects nodes to have a link to themselves when we're 928 * tearing them down. If we're transitioning from the initial state to 929 * zombie and not via ready, then we're not going to have this link that 930 * comes from the node being ready. As a result, we need to increment 931 * our link count by one to account for this. 932 */ 933 if (os == SDEV_INIT && dv->sdev_nlink == 0) 934 dv->sdev_nlink++; 935 rw_exit(&dv->sdev_contents); 936 mutex_exit(&vp->v_lock); 937 } 938 939 /* 940 * check if the source is in the path of the target 941 * 942 * source and target are different 943 */ 944 /*ARGSUSED2*/ 945 static int 946 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 947 { 948 int error = 0; 949 struct sdev_node *dotdot, *dir; 950 951 dotdot = tdv->sdev_dotdot; 952 ASSERT(dotdot); 953 954 /* fs root */ 955 if (dotdot == tdv) { 956 return (0); 957 } 958 959 for (;;) { 960 /* 961 * avoid error cases like 962 * mv a a/b 963 * mv a a/b/c 964 * etc. 965 */ 966 if (dotdot == sdv) { 967 error = EINVAL; 968 break; 969 } 970 971 dir = dotdot; 972 dotdot = dir->sdev_dotdot; 973 974 /* done checking because root is reached */ 975 if (dir == dotdot) { 976 break; 977 } 978 } 979 return (error); 980 } 981 982 int 983 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 984 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 985 struct cred *cred) 986 { 987 int error = 0; 988 struct vnode *ovp = SDEVTOV(odv); 989 struct vnode *nvp; 990 struct vattr vattr; 991 int doingdir = (ovp->v_type == VDIR); 992 char *link = NULL; 993 int samedir = (oddv == nddv) ? 1 : 0; 994 int bkstore = 0; 995 struct sdev_node *idv = NULL; 996 struct sdev_node *ndv = NULL; 997 timestruc_t now; 998 999 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1000 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1001 if (error) 1002 return (error); 1003 1004 if (!samedir) 1005 rw_enter(&oddv->sdev_contents, RW_WRITER); 1006 rw_enter(&nddv->sdev_contents, RW_WRITER); 1007 1008 /* 1009 * the source may have been deleted by another thread before 1010 * we gets here. 1011 */ 1012 if (odv->sdev_state != SDEV_READY) { 1013 error = ENOENT; 1014 goto err_out; 1015 } 1016 1017 if (doingdir && (odv == nddv)) { 1018 error = EINVAL; 1019 goto err_out; 1020 } 1021 1022 /* 1023 * If renaming a directory, and the parents are different (".." must be 1024 * changed) then the source dir must not be in the dir hierarchy above 1025 * the target since it would orphan everything below the source dir. 1026 */ 1027 if (doingdir && (oddv != nddv)) { 1028 error = sdev_checkpath(odv, nddv, cred); 1029 if (error) 1030 goto err_out; 1031 } 1032 1033 /* fix the source for a symlink */ 1034 if (vattr.va_type == VLNK) { 1035 if (odv->sdev_symlink == NULL) { 1036 error = sdev_follow_link(odv); 1037 if (error) { 1038 /* 1039 * The underlying symlink doesn't exist. This 1040 * node probably shouldn't even exist. While 1041 * it's a bit jarring to consumers, we're going 1042 * to remove the node from /dev. 1043 */ 1044 if (SDEV_IS_PERSIST((*ndvp))) 1045 bkstore = 1; 1046 sdev_dirdelete(oddv, odv); 1047 if (bkstore) { 1048 ASSERT(nddv->sdev_attrvp); 1049 error = VOP_REMOVE(nddv->sdev_attrvp, 1050 nnm, cred, NULL, 0); 1051 if (error) 1052 goto err_out; 1053 } 1054 error = ENOENT; 1055 goto err_out; 1056 } 1057 } 1058 ASSERT(odv->sdev_symlink); 1059 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1060 } 1061 1062 /* destination existing */ 1063 if (*ndvp) { 1064 nvp = SDEVTOV(*ndvp); 1065 ASSERT(nvp); 1066 1067 /* handling renaming to itself */ 1068 if (odv == *ndvp) { 1069 error = 0; 1070 goto err_out; 1071 } 1072 1073 if (nvp->v_type == VDIR) { 1074 if (!doingdir) { 1075 error = EISDIR; 1076 goto err_out; 1077 } 1078 1079 if (vn_vfswlock(nvp)) { 1080 error = EBUSY; 1081 goto err_out; 1082 } 1083 1084 if (vn_mountedvfs(nvp) != NULL) { 1085 vn_vfsunlock(nvp); 1086 error = EBUSY; 1087 goto err_out; 1088 } 1089 1090 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1091 if ((*ndvp)->sdev_nlink > 2) { 1092 vn_vfsunlock(nvp); 1093 error = EEXIST; 1094 goto err_out; 1095 } 1096 vn_vfsunlock(nvp); 1097 1098 /* 1099 * We did not place the hold on *ndvp, so even though 1100 * we're deleting the node, we should not get rid of our 1101 * reference. 1102 */ 1103 sdev_dirdelete(nddv, *ndvp); 1104 *ndvp = NULL; 1105 ASSERT(nddv->sdev_attrvp); 1106 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1107 nddv->sdev_attrvp, cred, NULL, 0); 1108 if (error) 1109 goto err_out; 1110 } else { 1111 if (doingdir) { 1112 error = ENOTDIR; 1113 goto err_out; 1114 } 1115 1116 if (SDEV_IS_PERSIST((*ndvp))) { 1117 bkstore = 1; 1118 } 1119 1120 /* 1121 * Get rid of the node from the directory cache note. 1122 * Don't forget that it's not up to us to remove the vn 1123 * ref on the sdev node, as we did not place it. 1124 */ 1125 sdev_dirdelete(nddv, *ndvp); 1126 *ndvp = NULL; 1127 if (bkstore) { 1128 ASSERT(nddv->sdev_attrvp); 1129 error = VOP_REMOVE(nddv->sdev_attrvp, 1130 nnm, cred, NULL, 0); 1131 if (error) 1132 goto err_out; 1133 } 1134 } 1135 } 1136 1137 /* 1138 * make a fresh node from the source attrs 1139 */ 1140 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1141 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1142 NULL, (void *)link, cred, SDEV_READY); 1143 1144 if (link != NULL) { 1145 kmem_free(link, strlen(link) + 1); 1146 link = NULL; 1147 } 1148 1149 if (error) 1150 goto err_out; 1151 ASSERT(*ndvp); 1152 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1153 1154 /* move dir contents */ 1155 if (doingdir) { 1156 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1157 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1158 SDEV_HOLD(idv); 1159 error = sdev_rnmnode(odv, idv, 1160 (struct sdev_node *)(*ndvp), &ndv, 1161 idv->sdev_name, cred); 1162 SDEV_RELE(idv); 1163 if (error) 1164 goto err_out; 1165 ndv = NULL; 1166 } 1167 } 1168 1169 if ((*ndvp)->sdev_attrvp) { 1170 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1171 AT_CTIME|AT_ATIME); 1172 } else { 1173 ASSERT((*ndvp)->sdev_attr); 1174 gethrestime(&now); 1175 (*ndvp)->sdev_attr->va_ctime = now; 1176 (*ndvp)->sdev_attr->va_atime = now; 1177 } 1178 1179 if (nddv->sdev_attrvp) { 1180 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1181 AT_MTIME|AT_ATIME); 1182 } else { 1183 ASSERT(nddv->sdev_attr); 1184 gethrestime(&now); 1185 nddv->sdev_attr->va_mtime = now; 1186 nddv->sdev_attr->va_atime = now; 1187 } 1188 rw_exit(&nddv->sdev_contents); 1189 if (!samedir) 1190 rw_exit(&oddv->sdev_contents); 1191 1192 SDEV_RELE(*ndvp); 1193 return (error); 1194 1195 err_out: 1196 if (link != NULL) { 1197 kmem_free(link, strlen(link) + 1); 1198 link = NULL; 1199 } 1200 1201 rw_exit(&nddv->sdev_contents); 1202 if (!samedir) 1203 rw_exit(&oddv->sdev_contents); 1204 return (error); 1205 } 1206 1207 /* 1208 * Merge sdev_node specific information into an attribute structure. 1209 * 1210 * note: sdev_node is not locked here 1211 */ 1212 void 1213 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1214 { 1215 struct vnode *vp = SDEVTOV(dv); 1216 1217 vap->va_nlink = dv->sdev_nlink; 1218 vap->va_nodeid = dv->sdev_ino; 1219 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1220 vap->va_type = vp->v_type; 1221 1222 if (vp->v_type == VDIR) { 1223 vap->va_rdev = 0; 1224 vap->va_fsid = vp->v_rdev; 1225 } else if (vp->v_type == VLNK) { 1226 vap->va_rdev = 0; 1227 vap->va_mode &= ~S_IFMT; 1228 vap->va_mode |= S_IFLNK; 1229 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1230 vap->va_rdev = vp->v_rdev; 1231 vap->va_mode &= ~S_IFMT; 1232 if (vap->va_type == VCHR) 1233 vap->va_mode |= S_IFCHR; 1234 else 1235 vap->va_mode |= S_IFBLK; 1236 } else { 1237 vap->va_rdev = 0; 1238 } 1239 } 1240 1241 struct vattr * 1242 sdev_getdefault_attr(enum vtype type) 1243 { 1244 if (type == VDIR) 1245 return (&sdev_vattr_dir); 1246 else if (type == VCHR) 1247 return (&sdev_vattr_chr); 1248 else if (type == VBLK) 1249 return (&sdev_vattr_blk); 1250 else if (type == VLNK) 1251 return (&sdev_vattr_lnk); 1252 else 1253 return (NULL); 1254 } 1255 int 1256 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1257 { 1258 int rv = 0; 1259 struct vnode *vp = SDEVTOV(dv); 1260 1261 switch (vp->v_type) { 1262 case VCHR: 1263 case VBLK: 1264 /* 1265 * If vnode is a device, return special vnode instead 1266 * (though it knows all about -us- via sp->s_realvp) 1267 */ 1268 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1269 VN_RELE(vp); 1270 if (*vpp == NULLVP) 1271 rv = ENOSYS; 1272 break; 1273 default: /* most types are returned as is */ 1274 *vpp = vp; 1275 break; 1276 } 1277 return (rv); 1278 } 1279 1280 /* 1281 * junction between devname and root file system, e.g. ufs 1282 */ 1283 int 1284 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1285 { 1286 struct vnode *rdvp = ddv->sdev_attrvp; 1287 int rval = 0; 1288 1289 ASSERT(rdvp); 1290 1291 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1292 NULL); 1293 return (rval); 1294 } 1295 1296 static int 1297 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1298 { 1299 struct sdev_node *dv = NULL; 1300 char *nm; 1301 struct vnode *dirvp; 1302 int error; 1303 vnode_t *vp; 1304 int eof; 1305 struct iovec iov; 1306 struct uio uio; 1307 struct dirent64 *dp; 1308 dirent64_t *dbuf; 1309 size_t dbuflen; 1310 struct vattr vattr; 1311 char *link = NULL; 1312 1313 if (ddv->sdev_attrvp == NULL) 1314 return (0); 1315 if (!(ddv->sdev_flags & SDEV_BUILD)) 1316 return (0); 1317 1318 dirvp = ddv->sdev_attrvp; 1319 VN_HOLD(dirvp); 1320 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1321 1322 uio.uio_iov = &iov; 1323 uio.uio_iovcnt = 1; 1324 uio.uio_segflg = UIO_SYSSPACE; 1325 uio.uio_fmode = 0; 1326 uio.uio_extflg = UIO_COPY_CACHED; 1327 uio.uio_loffset = 0; 1328 uio.uio_llimit = MAXOFFSET_T; 1329 1330 eof = 0; 1331 error = 0; 1332 while (!error && !eof) { 1333 uio.uio_resid = dlen; 1334 iov.iov_base = (char *)dbuf; 1335 iov.iov_len = dlen; 1336 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1337 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1338 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1339 1340 dbuflen = dlen - uio.uio_resid; 1341 if (error || dbuflen == 0) 1342 break; 1343 1344 if (!(ddv->sdev_flags & SDEV_BUILD)) 1345 break; 1346 1347 for (dp = dbuf; ((intptr_t)dp < 1348 (intptr_t)dbuf + dbuflen); 1349 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1350 nm = dp->d_name; 1351 1352 if (strcmp(nm, ".") == 0 || 1353 strcmp(nm, "..") == 0) 1354 continue; 1355 1356 vp = NULLVP; 1357 dv = sdev_cache_lookup(ddv, nm); 1358 if (dv) { 1359 VERIFY(dv->sdev_state != SDEV_ZOMBIE); 1360 SDEV_SIMPLE_RELE(dv); 1361 continue; 1362 } 1363 1364 /* refill the cache if not already */ 1365 error = devname_backstore_lookup(ddv, nm, &vp); 1366 if (error) 1367 continue; 1368 1369 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1370 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1371 if (error) 1372 continue; 1373 1374 if (vattr.va_type == VLNK) { 1375 error = sdev_getlink(vp, &link); 1376 if (error) { 1377 continue; 1378 } 1379 ASSERT(link != NULL); 1380 } 1381 1382 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1383 rw_exit(&ddv->sdev_contents); 1384 rw_enter(&ddv->sdev_contents, RW_WRITER); 1385 } 1386 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1387 cred, SDEV_READY); 1388 rw_downgrade(&ddv->sdev_contents); 1389 1390 if (link != NULL) { 1391 kmem_free(link, strlen(link) + 1); 1392 link = NULL; 1393 } 1394 1395 if (!error) { 1396 ASSERT(dv); 1397 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1398 SDEV_SIMPLE_RELE(dv); 1399 } 1400 vp = NULL; 1401 dv = NULL; 1402 } 1403 } 1404 1405 done: 1406 VN_RELE(dirvp); 1407 kmem_free(dbuf, dlen); 1408 1409 return (error); 1410 } 1411 1412 void 1413 sdev_filldir_dynamic(struct sdev_node *ddv) 1414 { 1415 int error; 1416 int i; 1417 struct vattr vattr; 1418 struct vattr *vap = &vattr; 1419 char *nm = NULL; 1420 struct sdev_node *dv = NULL; 1421 1422 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1423 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1424 1425 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1426 gethrestime(&vap->va_atime); 1427 vap->va_mtime = vap->va_atime; 1428 vap->va_ctime = vap->va_atime; 1429 for (i = 0; vtab[i].vt_name != NULL; i++) { 1430 /* 1431 * This early, we may be in a read-only /dev environment: leave 1432 * the creation of any nodes we'd attempt to persist to 1433 * devfsadm. Because /dev itself is normally persistent, any 1434 * node which is not marked dynamic will end up being marked 1435 * persistent. However, some nodes are both dynamic and 1436 * persistent, mostly lofi and rlofi, so we need to be careful 1437 * in our check. 1438 */ 1439 if ((vtab[i].vt_flags & SDEV_PERSIST) || 1440 !(vtab[i].vt_flags & SDEV_DYNAMIC)) 1441 continue; 1442 nm = vtab[i].vt_name; 1443 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1444 dv = NULL; 1445 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1446 NULL, kcred, SDEV_READY); 1447 if (error) { 1448 cmn_err(CE_WARN, "%s/%s: error %d\n", 1449 ddv->sdev_name, nm, error); 1450 } else { 1451 ASSERT(dv); 1452 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1453 SDEV_SIMPLE_RELE(dv); 1454 } 1455 } 1456 } 1457 1458 /* 1459 * Creating a backing store entry based on sdev_attr. 1460 * This is called either as part of node creation in a persistent directory 1461 * or from setattr/setsecattr to persist access attributes across reboot. 1462 */ 1463 int 1464 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1465 { 1466 int error = 0; 1467 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1468 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1469 struct vattr *vap = dv->sdev_attr; 1470 char *nm = dv->sdev_name; 1471 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1472 1473 ASSERT(dv && dv->sdev_name && rdvp); 1474 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1475 1476 lookup: 1477 /* try to find it in the backing store */ 1478 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1479 NULL); 1480 if (error == 0) { 1481 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1482 VN_HOLD(rrvp); 1483 VN_RELE(*rvp); 1484 *rvp = rrvp; 1485 } 1486 1487 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1488 dv->sdev_attr = NULL; 1489 dv->sdev_attrvp = *rvp; 1490 return (0); 1491 } 1492 1493 /* let's try to persist the node */ 1494 gethrestime(&vap->va_atime); 1495 vap->va_mtime = vap->va_atime; 1496 vap->va_ctime = vap->va_atime; 1497 vap->va_mask |= AT_TYPE|AT_MODE; 1498 switch (vap->va_type) { 1499 case VDIR: 1500 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1501 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1502 (void *)(*rvp), error)); 1503 if (!error) 1504 VN_RELE(*rvp); 1505 break; 1506 case VCHR: 1507 case VBLK: 1508 case VREG: 1509 case VDOOR: 1510 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1511 rvp, cred, 0, NULL, NULL); 1512 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1513 (void *)(*rvp), error)); 1514 if (!error) 1515 VN_RELE(*rvp); 1516 break; 1517 case VLNK: 1518 ASSERT(dv->sdev_symlink); 1519 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1520 NULL, 0); 1521 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1522 error)); 1523 break; 1524 default: 1525 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1526 "create\n", nm); 1527 /*NOTREACHED*/ 1528 } 1529 1530 /* go back to lookup to factor out spec node and set attrvp */ 1531 if (error == 0) 1532 goto lookup; 1533 1534 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1535 return (error); 1536 } 1537 1538 static void 1539 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1540 { 1541 struct sdev_node *dup = NULL; 1542 1543 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1544 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1545 sdev_direnter(ddv, *dv); 1546 } else { 1547 VERIFY(dup->sdev_state != SDEV_ZOMBIE); 1548 SDEV_SIMPLE_RELE(*dv); 1549 sdev_nodedestroy(*dv, 0); 1550 *dv = dup; 1551 } 1552 } 1553 1554 static void 1555 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1556 { 1557 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1558 sdev_dirdelete(ddv, *dv); 1559 } 1560 1561 /* 1562 * update the in-core directory cache 1563 */ 1564 void 1565 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1566 sdev_cache_ops_t ops) 1567 { 1568 ASSERT((SDEV_HELD(*dv))); 1569 1570 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1571 switch (ops) { 1572 case SDEV_CACHE_ADD: 1573 sdev_cache_add(ddv, dv, nm); 1574 break; 1575 case SDEV_CACHE_DELETE: 1576 sdev_cache_delete(ddv, dv); 1577 break; 1578 default: 1579 break; 1580 } 1581 } 1582 1583 /* 1584 * retrieve the named entry from the directory cache 1585 */ 1586 struct sdev_node * 1587 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1588 { 1589 struct sdev_node *dv = NULL; 1590 1591 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1592 dv = sdev_findbyname(ddv, nm); 1593 1594 return (dv); 1595 } 1596 1597 /* 1598 * Implicit reconfig for nodes constructed by a link generator 1599 * Start devfsadm if needed, or if devfsadm is in progress, 1600 * prepare to block on devfsadm either completing or 1601 * constructing the desired node. As devfsadmd is global 1602 * in scope, constructing all necessary nodes, we only 1603 * need to initiate it once. 1604 */ 1605 static int 1606 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1607 { 1608 int error = 0; 1609 1610 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1611 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1612 ddv->sdev_name, nm, devfsadm_state)); 1613 mutex_enter(&dv->sdev_lookup_lock); 1614 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1615 mutex_exit(&dv->sdev_lookup_lock); 1616 error = 0; 1617 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1618 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1619 ddv->sdev_name, nm, devfsadm_state)); 1620 1621 sdev_devfsadmd_thread(ddv, dv, kcred); 1622 mutex_enter(&dv->sdev_lookup_lock); 1623 SDEV_BLOCK_OTHERS(dv, 1624 (SDEV_LOOKUP | SDEV_LGWAITING)); 1625 mutex_exit(&dv->sdev_lookup_lock); 1626 error = 0; 1627 } else { 1628 error = -1; 1629 } 1630 1631 return (error); 1632 } 1633 1634 /* 1635 * Support for specialized device naming construction mechanisms 1636 */ 1637 static int 1638 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1639 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1640 void *, char *), int flags, struct cred *cred) 1641 { 1642 int rv = 0; 1643 char *physpath = NULL; 1644 struct vattr vattr; 1645 struct vattr *vap = &vattr; 1646 struct sdev_node *dv = NULL; 1647 1648 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1649 if (flags & SDEV_VLINK) { 1650 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1651 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1652 NULL); 1653 if (rv) { 1654 kmem_free(physpath, MAXPATHLEN); 1655 return (-1); 1656 } 1657 1658 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1659 vap->va_size = strlen(physpath); 1660 gethrestime(&vap->va_atime); 1661 vap->va_mtime = vap->va_atime; 1662 vap->va_ctime = vap->va_atime; 1663 1664 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1665 (void *)physpath, cred, SDEV_READY); 1666 kmem_free(physpath, MAXPATHLEN); 1667 if (rv) 1668 return (rv); 1669 } else if (flags & SDEV_VATTR) { 1670 /* 1671 * /dev/pts 1672 * 1673 * callback is responsible to set the basic attributes, 1674 * e.g. va_type/va_uid/va_gid/ 1675 * dev_t if VCHR or VBLK/ 1676 */ 1677 ASSERT(callback); 1678 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1679 if (rv) { 1680 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1681 "callback failed \n")); 1682 return (-1); 1683 } 1684 1685 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1686 cred, SDEV_READY); 1687 1688 if (rv) 1689 return (rv); 1690 1691 } else { 1692 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1693 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1694 __LINE__)); 1695 rv = -1; 1696 } 1697 1698 *dvp = dv; 1699 return (rv); 1700 } 1701 1702 static int 1703 is_devfsadm_thread(char *exec_name) 1704 { 1705 /* 1706 * note: because devfsadmd -> /usr/sbin/devfsadm 1707 * it is safe to use "devfsadm" to capture the lookups 1708 * from devfsadm and its daemon version. 1709 */ 1710 if (strcmp(exec_name, "devfsadm") == 0) 1711 return (1); 1712 return (0); 1713 } 1714 1715 /* 1716 * Lookup Order: 1717 * sdev_node cache; 1718 * backing store (SDEV_PERSIST); 1719 * DBNR: a. dir_ops implemented in the loadable modules; 1720 * b. vnode ops in vtab. 1721 */ 1722 int 1723 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1724 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1725 struct cred *, void *, char *), int flags) 1726 { 1727 int rv = 0, nmlen; 1728 struct vnode *rvp = NULL; 1729 struct sdev_node *dv = NULL; 1730 int retried = 0; 1731 int error = 0; 1732 struct vattr vattr; 1733 char *lookup_thread = curproc->p_user.u_comm; 1734 int failed_flags = 0; 1735 int (*vtor)(struct sdev_node *) = NULL; 1736 int state; 1737 int parent_state; 1738 char *link = NULL; 1739 1740 if (SDEVTOV(ddv)->v_type != VDIR) 1741 return (ENOTDIR); 1742 1743 /* 1744 * Empty name or ., return node itself. 1745 */ 1746 nmlen = strlen(nm); 1747 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1748 *vpp = SDEVTOV(ddv); 1749 VN_HOLD(*vpp); 1750 return (0); 1751 } 1752 1753 /* 1754 * .., return the parent directory 1755 */ 1756 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1757 *vpp = SDEVTOV(ddv->sdev_dotdot); 1758 VN_HOLD(*vpp); 1759 return (0); 1760 } 1761 1762 rw_enter(&ddv->sdev_contents, RW_READER); 1763 if (ddv->sdev_flags & SDEV_VTOR) { 1764 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1765 ASSERT(vtor); 1766 } 1767 1768 tryagain: 1769 /* 1770 * (a) directory cache lookup: 1771 */ 1772 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1773 parent_state = ddv->sdev_state; 1774 dv = sdev_cache_lookup(ddv, nm); 1775 if (dv) { 1776 state = dv->sdev_state; 1777 switch (state) { 1778 case SDEV_INIT: 1779 if (is_devfsadm_thread(lookup_thread)) 1780 break; 1781 1782 /* ZOMBIED parent won't allow node creation */ 1783 if (parent_state == SDEV_ZOMBIE) { 1784 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1785 retried); 1786 goto nolock_notfound; 1787 } 1788 1789 mutex_enter(&dv->sdev_lookup_lock); 1790 /* compensate the threads started after devfsadm */ 1791 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1792 !(SDEV_IS_LOOKUP(dv))) 1793 SDEV_BLOCK_OTHERS(dv, 1794 (SDEV_LOOKUP | SDEV_LGWAITING)); 1795 1796 if (SDEV_IS_LOOKUP(dv)) { 1797 failed_flags |= SLF_REBUILT; 1798 rw_exit(&ddv->sdev_contents); 1799 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1800 mutex_exit(&dv->sdev_lookup_lock); 1801 rw_enter(&ddv->sdev_contents, RW_READER); 1802 1803 if (error != 0) { 1804 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1805 retried); 1806 goto nolock_notfound; 1807 } 1808 1809 state = dv->sdev_state; 1810 if (state == SDEV_INIT) { 1811 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1812 retried); 1813 goto nolock_notfound; 1814 } else if (state == SDEV_READY) { 1815 goto found; 1816 } else if (state == SDEV_ZOMBIE) { 1817 rw_exit(&ddv->sdev_contents); 1818 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1819 retried); 1820 SDEV_RELE(dv); 1821 goto lookup_failed; 1822 } 1823 } else { 1824 mutex_exit(&dv->sdev_lookup_lock); 1825 } 1826 break; 1827 case SDEV_READY: 1828 goto found; 1829 case SDEV_ZOMBIE: 1830 rw_exit(&ddv->sdev_contents); 1831 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1832 SDEV_RELE(dv); 1833 goto lookup_failed; 1834 default: 1835 rw_exit(&ddv->sdev_contents); 1836 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1837 sdev_lookup_failed(ddv, nm, failed_flags); 1838 *vpp = NULLVP; 1839 return (ENOENT); 1840 } 1841 } 1842 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1843 1844 /* 1845 * ZOMBIED parent does not allow new node creation. 1846 * bail out early 1847 */ 1848 if (parent_state == SDEV_ZOMBIE) { 1849 rw_exit(&ddv->sdev_contents); 1850 *vpp = NULLVP; 1851 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1852 return (ENOENT); 1853 } 1854 1855 /* 1856 * (b0): backing store lookup 1857 * SDEV_PERSIST is default except: 1858 * 1) pts nodes 1859 * 2) non-chmod'ed local nodes 1860 * 3) zvol nodes 1861 */ 1862 if (SDEV_IS_PERSIST(ddv)) { 1863 error = devname_backstore_lookup(ddv, nm, &rvp); 1864 1865 if (!error) { 1866 1867 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1868 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 1869 if (error) { 1870 rw_exit(&ddv->sdev_contents); 1871 if (dv) 1872 SDEV_RELE(dv); 1873 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1874 sdev_lookup_failed(ddv, nm, failed_flags); 1875 *vpp = NULLVP; 1876 return (ENOENT); 1877 } 1878 1879 if (vattr.va_type == VLNK) { 1880 error = sdev_getlink(rvp, &link); 1881 if (error) { 1882 rw_exit(&ddv->sdev_contents); 1883 if (dv) 1884 SDEV_RELE(dv); 1885 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1886 retried); 1887 sdev_lookup_failed(ddv, nm, 1888 failed_flags); 1889 *vpp = NULLVP; 1890 return (ENOENT); 1891 } 1892 ASSERT(link != NULL); 1893 } 1894 1895 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1896 rw_exit(&ddv->sdev_contents); 1897 rw_enter(&ddv->sdev_contents, RW_WRITER); 1898 } 1899 error = sdev_mknode(ddv, nm, &dv, &vattr, 1900 rvp, link, cred, SDEV_READY); 1901 rw_downgrade(&ddv->sdev_contents); 1902 1903 if (link != NULL) { 1904 kmem_free(link, strlen(link) + 1); 1905 link = NULL; 1906 } 1907 1908 if (error) { 1909 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1910 rw_exit(&ddv->sdev_contents); 1911 if (dv) 1912 SDEV_RELE(dv); 1913 goto lookup_failed; 1914 } else { 1915 goto found; 1916 } 1917 } else if (retried) { 1918 rw_exit(&ddv->sdev_contents); 1919 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 1920 ddv->sdev_name, nm)); 1921 if (dv) 1922 SDEV_RELE(dv); 1923 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1924 sdev_lookup_failed(ddv, nm, failed_flags); 1925 *vpp = NULLVP; 1926 return (ENOENT); 1927 } 1928 } 1929 1930 lookup_create_node: 1931 /* first thread that is doing the lookup on this node */ 1932 if (callback) { 1933 ASSERT(dv == NULL); 1934 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1935 rw_exit(&ddv->sdev_contents); 1936 rw_enter(&ddv->sdev_contents, RW_WRITER); 1937 } 1938 error = sdev_call_dircallback(ddv, &dv, nm, callback, 1939 flags, cred); 1940 rw_downgrade(&ddv->sdev_contents); 1941 if (error == 0) { 1942 goto found; 1943 } else { 1944 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1945 rw_exit(&ddv->sdev_contents); 1946 goto lookup_failed; 1947 } 1948 } 1949 if (!dv) { 1950 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1951 rw_exit(&ddv->sdev_contents); 1952 rw_enter(&ddv->sdev_contents, RW_WRITER); 1953 } 1954 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 1955 cred, SDEV_INIT); 1956 if (!dv) { 1957 rw_exit(&ddv->sdev_contents); 1958 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1959 sdev_lookup_failed(ddv, nm, failed_flags); 1960 *vpp = NULLVP; 1961 return (ENOENT); 1962 } 1963 rw_downgrade(&ddv->sdev_contents); 1964 } 1965 1966 /* 1967 * (b1) invoking devfsadm once per life time for devfsadm nodes 1968 */ 1969 ASSERT(SDEV_HELD(dv)); 1970 1971 if (SDEV_IS_NO_NCACHE(dv)) 1972 failed_flags |= SLF_NO_NCACHE; 1973 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 1974 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 1975 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 1976 ASSERT(SDEV_HELD(dv)); 1977 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1978 goto nolock_notfound; 1979 } 1980 1981 /* 1982 * filter out known non-existent devices recorded 1983 * during initial reconfiguration boot for which 1984 * reconfig should not be done and lookup may 1985 * be short-circuited now. 1986 */ 1987 if (sdev_lookup_filter(ddv, nm)) { 1988 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1989 goto nolock_notfound; 1990 } 1991 1992 /* bypassing devfsadm internal nodes */ 1993 if (is_devfsadm_thread(lookup_thread)) { 1994 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1995 goto nolock_notfound; 1996 } 1997 1998 if (sdev_reconfig_disable) { 1999 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2000 goto nolock_notfound; 2001 } 2002 2003 error = sdev_call_devfsadmd(ddv, dv, nm); 2004 if (error == 0) { 2005 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2006 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2007 if (sdev_reconfig_verbose) { 2008 cmn_err(CE_CONT, 2009 "?lookup of %s/%s by %s: reconfig\n", 2010 ddv->sdev_name, nm, curproc->p_user.u_comm); 2011 } 2012 retried = 1; 2013 failed_flags |= SLF_REBUILT; 2014 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2015 SDEV_SIMPLE_RELE(dv); 2016 goto tryagain; 2017 } else { 2018 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2019 goto nolock_notfound; 2020 } 2021 2022 found: 2023 ASSERT(dv->sdev_state == SDEV_READY); 2024 if (vtor) { 2025 /* 2026 * Check validity of returned node 2027 */ 2028 switch (vtor(dv)) { 2029 case SDEV_VTOR_VALID: 2030 break; 2031 case SDEV_VTOR_STALE: 2032 /* 2033 * The name exists, but the cache entry is 2034 * stale and needs to be re-created. 2035 */ 2036 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2037 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2038 rw_exit(&ddv->sdev_contents); 2039 rw_enter(&ddv->sdev_contents, RW_WRITER); 2040 } 2041 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 2042 rw_downgrade(&ddv->sdev_contents); 2043 SDEV_RELE(dv); 2044 dv = NULL; 2045 goto lookup_create_node; 2046 /* FALLTHRU */ 2047 case SDEV_VTOR_INVALID: 2048 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2049 sdcmn_err7(("lookup: destroy invalid " 2050 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2051 goto nolock_notfound; 2052 case SDEV_VTOR_SKIP: 2053 sdcmn_err7(("lookup: node not applicable - " 2054 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2055 rw_exit(&ddv->sdev_contents); 2056 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2057 SDEV_RELE(dv); 2058 goto lookup_failed; 2059 default: 2060 cmn_err(CE_PANIC, 2061 "dev fs: validator failed: %s(%p)\n", 2062 dv->sdev_name, (void *)dv); 2063 break; 2064 } 2065 } 2066 2067 rw_exit(&ddv->sdev_contents); 2068 rv = sdev_to_vp(dv, vpp); 2069 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2070 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2071 dv->sdev_state, nm, rv)); 2072 return (rv); 2073 2074 nolock_notfound: 2075 /* 2076 * Destroy the node that is created for synchronization purposes. 2077 */ 2078 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2079 nm, dv->sdev_state)); 2080 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2081 if (dv->sdev_state == SDEV_INIT) { 2082 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2083 rw_exit(&ddv->sdev_contents); 2084 rw_enter(&ddv->sdev_contents, RW_WRITER); 2085 } 2086 2087 /* 2088 * Node state may have changed during the lock 2089 * changes. Re-check. 2090 */ 2091 if (dv->sdev_state == SDEV_INIT) { 2092 sdev_dirdelete(ddv, dv); 2093 rw_exit(&ddv->sdev_contents); 2094 sdev_lookup_failed(ddv, nm, failed_flags); 2095 SDEV_RELE(dv); 2096 *vpp = NULL; 2097 return (ENOENT); 2098 } 2099 } 2100 2101 rw_exit(&ddv->sdev_contents); 2102 SDEV_RELE(dv); 2103 2104 lookup_failed: 2105 sdev_lookup_failed(ddv, nm, failed_flags); 2106 *vpp = NULL; 2107 return (ENOENT); 2108 } 2109 2110 /* 2111 * Given a directory node, mark all nodes beneath as 2112 * STALE, i.e. nodes that don't exist as far as new 2113 * consumers are concerned. Remove them from the 2114 * list of directory entries so that no lookup or 2115 * directory traversal will find them. The node 2116 * not deallocated so existing holds are not affected. 2117 */ 2118 void 2119 sdev_stale(struct sdev_node *ddv) 2120 { 2121 struct sdev_node *dv; 2122 struct vnode *vp; 2123 2124 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2125 2126 rw_enter(&ddv->sdev_contents, RW_WRITER); 2127 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { 2128 vp = SDEVTOV(dv); 2129 SDEV_HOLD(dv); 2130 if (vp->v_type == VDIR) 2131 sdev_stale(dv); 2132 2133 sdev_dirdelete(ddv, dv); 2134 SDEV_RELE(dv); 2135 } 2136 ddv->sdev_flags |= SDEV_BUILD; 2137 rw_exit(&ddv->sdev_contents); 2138 } 2139 2140 /* 2141 * Given a directory node, clean out all the nodes beneath. 2142 * If expr is specified, clean node with names matching expr. 2143 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2144 * so they are excluded from future lookups. 2145 */ 2146 int 2147 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2148 { 2149 int error = 0; 2150 int busy = 0; 2151 struct vnode *vp; 2152 struct sdev_node *dv, *next; 2153 int bkstore = 0; 2154 int len = 0; 2155 char *bks_name = NULL; 2156 2157 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2158 2159 /* 2160 * We try our best to destroy all unused sdev_node's 2161 */ 2162 rw_enter(&ddv->sdev_contents, RW_WRITER); 2163 for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) { 2164 next = SDEV_NEXT_ENTRY(ddv, dv); 2165 vp = SDEVTOV(dv); 2166 2167 if (expr && gmatch(dv->sdev_name, expr) == 0) 2168 continue; 2169 2170 if (vp->v_type == VDIR && 2171 sdev_cleandir(dv, NULL, flags) != 0) { 2172 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2173 dv->sdev_name)); 2174 busy++; 2175 continue; 2176 } 2177 2178 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2179 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2180 dv->sdev_name)); 2181 busy++; 2182 continue; 2183 } 2184 2185 /* 2186 * at this point, either dv is not held or SDEV_ENFORCE 2187 * is specified. In either case, dv needs to be deleted 2188 */ 2189 SDEV_HOLD(dv); 2190 2191 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2192 if (bkstore && (vp->v_type == VDIR)) 2193 bkstore += 1; 2194 2195 if (bkstore) { 2196 len = strlen(dv->sdev_name) + 1; 2197 bks_name = kmem_alloc(len, KM_SLEEP); 2198 bcopy(dv->sdev_name, bks_name, len); 2199 } 2200 2201 sdev_dirdelete(ddv, dv); 2202 2203 /* take care the backing store clean up */ 2204 if (bkstore) { 2205 ASSERT(bks_name); 2206 ASSERT(ddv->sdev_attrvp); 2207 2208 if (bkstore == 1) { 2209 error = VOP_REMOVE(ddv->sdev_attrvp, 2210 bks_name, kcred, NULL, 0); 2211 } else if (bkstore == 2) { 2212 error = VOP_RMDIR(ddv->sdev_attrvp, 2213 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2214 } 2215 2216 /* do not propagate the backing store errors */ 2217 if (error) { 2218 sdcmn_err9(("sdev_cleandir: backing store" 2219 "not cleaned\n")); 2220 error = 0; 2221 } 2222 2223 bkstore = 0; 2224 kmem_free(bks_name, len); 2225 bks_name = NULL; 2226 len = 0; 2227 } 2228 2229 ddv->sdev_flags |= SDEV_BUILD; 2230 SDEV_RELE(dv); 2231 } 2232 2233 ddv->sdev_flags |= SDEV_BUILD; 2234 rw_exit(&ddv->sdev_contents); 2235 2236 if (busy) { 2237 error = EBUSY; 2238 } 2239 2240 return (error); 2241 } 2242 2243 /* 2244 * a convenient wrapper for readdir() funcs 2245 */ 2246 size_t 2247 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2248 { 2249 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2250 if (reclen > size) 2251 return (0); 2252 2253 de->d_ino = (ino64_t)ino; 2254 de->d_off = (off64_t)off + 1; 2255 de->d_reclen = (ushort_t)reclen; 2256 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2257 return (reclen); 2258 } 2259 2260 /* 2261 * sdev_mount service routines 2262 */ 2263 int 2264 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2265 { 2266 int error; 2267 2268 if (uap->datalen != sizeof (*args)) 2269 return (EINVAL); 2270 2271 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2272 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2273 "get user data. error %d\n", error); 2274 return (EFAULT); 2275 } 2276 2277 return (0); 2278 } 2279 2280 #ifdef nextdp 2281 #undef nextdp 2282 #endif 2283 #define nextdp(dp) ((struct dirent64 *) \ 2284 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2285 2286 /* 2287 * readdir helper func 2288 */ 2289 int 2290 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2291 int flags) 2292 { 2293 struct sdev_node *ddv = VTOSDEV(vp); 2294 struct sdev_node *dv; 2295 dirent64_t *dp; 2296 ulong_t outcount = 0; 2297 size_t namelen; 2298 ulong_t alloc_count; 2299 void *outbuf; 2300 struct iovec *iovp; 2301 int error = 0; 2302 size_t reclen; 2303 offset_t diroff; 2304 offset_t soff; 2305 int this_reclen; 2306 int (*vtor)(struct sdev_node *) = NULL; 2307 struct vattr attr; 2308 timestruc_t now; 2309 2310 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2311 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2312 2313 if (uiop->uio_loffset >= MAXOFF_T) { 2314 if (eofp) 2315 *eofp = 1; 2316 return (0); 2317 } 2318 2319 if (uiop->uio_iovcnt != 1) 2320 return (EINVAL); 2321 2322 if (vp->v_type != VDIR) 2323 return (ENOTDIR); 2324 2325 if (ddv->sdev_flags & SDEV_VTOR) { 2326 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2327 ASSERT(vtor); 2328 } 2329 2330 if (eofp != NULL) 2331 *eofp = 0; 2332 2333 soff = uiop->uio_loffset; 2334 iovp = uiop->uio_iov; 2335 alloc_count = iovp->iov_len; 2336 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2337 outcount = 0; 2338 2339 if (ddv->sdev_state == SDEV_ZOMBIE) 2340 goto get_cache; 2341 2342 if (SDEV_IS_GLOBAL(ddv)) { 2343 2344 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2345 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2346 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2347 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2348 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2349 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2350 !sdev_reconfig_disable) { 2351 /* 2352 * invoking "devfsadm" to do system device reconfig 2353 */ 2354 mutex_enter(&ddv->sdev_lookup_lock); 2355 SDEV_BLOCK_OTHERS(ddv, 2356 (SDEV_READDIR|SDEV_LGWAITING)); 2357 mutex_exit(&ddv->sdev_lookup_lock); 2358 2359 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2360 ddv->sdev_path, curproc->p_user.u_comm)); 2361 if (sdev_reconfig_verbose) { 2362 cmn_err(CE_CONT, 2363 "?readdir of %s by %s: reconfig\n", 2364 ddv->sdev_path, curproc->p_user.u_comm); 2365 } 2366 2367 sdev_devfsadmd_thread(ddv, NULL, kcred); 2368 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2369 /* 2370 * compensate the "ls" started later than "devfsadm" 2371 */ 2372 mutex_enter(&ddv->sdev_lookup_lock); 2373 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2374 mutex_exit(&ddv->sdev_lookup_lock); 2375 } 2376 2377 /* 2378 * release the contents lock so that 2379 * the cache may be updated by devfsadmd 2380 */ 2381 rw_exit(&ddv->sdev_contents); 2382 mutex_enter(&ddv->sdev_lookup_lock); 2383 if (SDEV_IS_READDIR(ddv)) 2384 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2385 mutex_exit(&ddv->sdev_lookup_lock); 2386 rw_enter(&ddv->sdev_contents, RW_READER); 2387 2388 sdcmn_err4(("readdir of directory %s by %s\n", 2389 ddv->sdev_name, curproc->p_user.u_comm)); 2390 if (ddv->sdev_flags & SDEV_BUILD) { 2391 if (SDEV_IS_PERSIST(ddv)) { 2392 error = sdev_filldir_from_store(ddv, 2393 alloc_count, cred); 2394 } 2395 ddv->sdev_flags &= ~SDEV_BUILD; 2396 } 2397 } 2398 2399 get_cache: 2400 /* handle "." and ".." */ 2401 diroff = 0; 2402 if (soff == 0) { 2403 /* first time */ 2404 this_reclen = DIRENT64_RECLEN(1); 2405 if (alloc_count < this_reclen) { 2406 error = EINVAL; 2407 goto done; 2408 } 2409 2410 dp->d_ino = (ino64_t)ddv->sdev_ino; 2411 dp->d_off = (off64_t)1; 2412 dp->d_reclen = (ushort_t)this_reclen; 2413 2414 (void) strncpy(dp->d_name, ".", 2415 DIRENT64_NAMELEN(this_reclen)); 2416 outcount += dp->d_reclen; 2417 dp = nextdp(dp); 2418 } 2419 2420 diroff++; 2421 if (soff <= 1) { 2422 this_reclen = DIRENT64_RECLEN(2); 2423 if (alloc_count < outcount + this_reclen) { 2424 error = EINVAL; 2425 goto done; 2426 } 2427 2428 dp->d_reclen = (ushort_t)this_reclen; 2429 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2430 dp->d_off = (off64_t)2; 2431 2432 (void) strncpy(dp->d_name, "..", 2433 DIRENT64_NAMELEN(this_reclen)); 2434 outcount += dp->d_reclen; 2435 2436 dp = nextdp(dp); 2437 } 2438 2439 2440 /* gets the cache */ 2441 diroff++; 2442 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2443 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2444 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2445 diroff, soff, dv->sdev_name)); 2446 2447 /* bypassing pre-matured nodes */ 2448 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2449 sdcmn_err3(("sdev_readdir: pre-mature node " 2450 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2451 continue; 2452 } 2453 2454 /* 2455 * Check validity of node 2456 * Drop invalid and nodes to be skipped. 2457 * A node the validator indicates as stale needs 2458 * to be returned as presumably the node name itself 2459 * is valid and the node data itself will be refreshed 2460 * on lookup. An application performing a readdir then 2461 * stat on each entry should thus always see consistent 2462 * data. In any case, it is not possible to synchronize 2463 * with dynamic kernel state, and any view we return can 2464 * never be anything more than a snapshot at a point in time. 2465 */ 2466 if (vtor) { 2467 switch (vtor(dv)) { 2468 case SDEV_VTOR_VALID: 2469 break; 2470 case SDEV_VTOR_INVALID: 2471 case SDEV_VTOR_SKIP: 2472 continue; 2473 case SDEV_VTOR_STALE: 2474 sdcmn_err3(("sdev_readir: %s stale\n", 2475 dv->sdev_name)); 2476 break; 2477 default: 2478 cmn_err(CE_PANIC, 2479 "dev fs: validator failed: %s(%p)\n", 2480 dv->sdev_name, (void *)dv); 2481 break; 2482 /*NOTREACHED*/ 2483 } 2484 } 2485 2486 namelen = strlen(dv->sdev_name); 2487 reclen = DIRENT64_RECLEN(namelen); 2488 if (outcount + reclen > alloc_count) { 2489 goto full; 2490 } 2491 dp->d_reclen = (ushort_t)reclen; 2492 dp->d_ino = (ino64_t)dv->sdev_ino; 2493 dp->d_off = (off64_t)diroff + 1; 2494 (void) strncpy(dp->d_name, dv->sdev_name, 2495 DIRENT64_NAMELEN(reclen)); 2496 outcount += reclen; 2497 dp = nextdp(dp); 2498 } 2499 2500 full: 2501 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2502 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2503 (void *)dv)); 2504 2505 if (outcount) 2506 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2507 2508 if (!error) { 2509 uiop->uio_loffset = diroff; 2510 if (eofp) 2511 *eofp = dv ? 0 : 1; 2512 } 2513 2514 2515 if (ddv->sdev_attrvp) { 2516 gethrestime(&now); 2517 attr.va_ctime = now; 2518 attr.va_atime = now; 2519 attr.va_mask = AT_CTIME|AT_ATIME; 2520 2521 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2522 } 2523 done: 2524 kmem_free(outbuf, alloc_count); 2525 return (error); 2526 } 2527 2528 static int 2529 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2530 { 2531 vnode_t *vp; 2532 vnode_t *cvp; 2533 struct sdev_node *svp; 2534 char *nm; 2535 struct pathname pn; 2536 int error; 2537 int persisted = 0; 2538 2539 ASSERT(INGLOBALZONE(curproc)); 2540 2541 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2542 return (error); 2543 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2544 2545 vp = rootdir; 2546 VN_HOLD(vp); 2547 2548 while (pn_pathleft(&pn)) { 2549 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2550 (void) pn_getcomponent(&pn, nm); 2551 2552 /* 2553 * Deal with the .. special case where we may be 2554 * traversing up across a mount point, to the 2555 * root of this filesystem or global root. 2556 */ 2557 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2558 checkforroot: 2559 if (VN_CMP(vp, rootdir)) { 2560 nm[1] = 0; 2561 } else if (vp->v_flag & VROOT) { 2562 vfs_t *vfsp; 2563 cvp = vp; 2564 vfsp = cvp->v_vfsp; 2565 vfs_rlock_wait(vfsp); 2566 vp = cvp->v_vfsp->vfs_vnodecovered; 2567 if (vp == NULL || 2568 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2569 vfs_unlock(vfsp); 2570 VN_RELE(cvp); 2571 error = EIO; 2572 break; 2573 } 2574 VN_HOLD(vp); 2575 vfs_unlock(vfsp); 2576 VN_RELE(cvp); 2577 cvp = NULL; 2578 goto checkforroot; 2579 } 2580 } 2581 2582 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2583 NULL, NULL); 2584 if (error) { 2585 VN_RELE(vp); 2586 break; 2587 } 2588 2589 /* traverse mount points encountered on our journey */ 2590 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2591 VN_RELE(vp); 2592 VN_RELE(cvp); 2593 break; 2594 } 2595 2596 /* 2597 * symbolic link, can be either relative and absolute 2598 */ 2599 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2600 struct pathname linkpath; 2601 pn_alloc(&linkpath); 2602 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2603 pn_free(&linkpath); 2604 break; 2605 } 2606 if (pn_pathleft(&linkpath) == 0) 2607 (void) pn_set(&linkpath, "."); 2608 error = pn_insert(&pn, &linkpath, strlen(nm)); 2609 pn_free(&linkpath); 2610 if (pn.pn_pathlen == 0) { 2611 VN_RELE(vp); 2612 return (ENOENT); 2613 } 2614 if (pn.pn_path[0] == '/') { 2615 pn_skipslash(&pn); 2616 VN_RELE(vp); 2617 VN_RELE(cvp); 2618 vp = rootdir; 2619 VN_HOLD(vp); 2620 } else { 2621 VN_RELE(cvp); 2622 } 2623 continue; 2624 } 2625 2626 VN_RELE(vp); 2627 2628 /* 2629 * Direct the operation to the persisting filesystem 2630 * underlying /dev. Bail if we encounter a 2631 * non-persistent dev entity here. 2632 */ 2633 if (cvp->v_vfsp->vfs_fstype == devtype) { 2634 2635 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2636 error = ENOENT; 2637 VN_RELE(cvp); 2638 break; 2639 } 2640 2641 if (VTOSDEV(cvp) == NULL) { 2642 error = ENOENT; 2643 VN_RELE(cvp); 2644 break; 2645 } 2646 svp = VTOSDEV(cvp); 2647 if ((vp = svp->sdev_attrvp) == NULL) { 2648 error = ENOENT; 2649 VN_RELE(cvp); 2650 break; 2651 } 2652 persisted = 1; 2653 VN_HOLD(vp); 2654 VN_RELE(cvp); 2655 cvp = vp; 2656 } 2657 2658 vp = cvp; 2659 pn_skipslash(&pn); 2660 } 2661 2662 kmem_free(nm, MAXNAMELEN); 2663 pn_free(&pn); 2664 2665 if (error) 2666 return (error); 2667 2668 /* 2669 * Only return persisted nodes in the filesystem underlying /dev. 2670 */ 2671 if (!persisted) { 2672 VN_RELE(vp); 2673 return (ENOENT); 2674 } 2675 2676 *r_vp = vp; 2677 return (0); 2678 } 2679 2680 int 2681 sdev_modctl_readdir(const char *dir, char ***dirlistp, int *npathsp, 2682 int *npathsp_alloc, int checking_empty) 2683 { 2684 char **pathlist = NULL; 2685 char **newlist = NULL; 2686 int npaths = 0; 2687 int npaths_alloc = 0; 2688 dirent64_t *dbuf = NULL; 2689 int n; 2690 char *s; 2691 int error; 2692 vnode_t *vp; 2693 int eof; 2694 struct iovec iov; 2695 struct uio uio; 2696 struct dirent64 *dp; 2697 size_t dlen; 2698 size_t dbuflen; 2699 int ndirents = 64; 2700 char *nm; 2701 2702 error = sdev_modctl_lookup(dir, &vp); 2703 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2704 dir, curproc->p_user.u_comm, 2705 (error == 0) ? "ok" : "failed")); 2706 if (error) 2707 return (error); 2708 2709 dlen = ndirents * (sizeof (*dbuf)); 2710 dbuf = kmem_alloc(dlen, KM_SLEEP); 2711 2712 uio.uio_iov = &iov; 2713 uio.uio_iovcnt = 1; 2714 uio.uio_segflg = UIO_SYSSPACE; 2715 uio.uio_fmode = 0; 2716 uio.uio_extflg = UIO_COPY_CACHED; 2717 uio.uio_loffset = 0; 2718 uio.uio_llimit = MAXOFFSET_T; 2719 2720 eof = 0; 2721 error = 0; 2722 while (!error && !eof) { 2723 uio.uio_resid = dlen; 2724 iov.iov_base = (char *)dbuf; 2725 iov.iov_len = dlen; 2726 2727 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2728 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2729 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2730 2731 dbuflen = dlen - uio.uio_resid; 2732 2733 if (error || dbuflen == 0) 2734 break; 2735 2736 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2737 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2738 2739 nm = dp->d_name; 2740 2741 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2742 continue; 2743 if (npaths == npaths_alloc) { 2744 npaths_alloc += 64; 2745 newlist = (char **) 2746 kmem_zalloc((npaths_alloc + 1) * 2747 sizeof (char *), KM_SLEEP); 2748 if (pathlist) { 2749 bcopy(pathlist, newlist, 2750 npaths * sizeof (char *)); 2751 kmem_free(pathlist, 2752 (npaths + 1) * sizeof (char *)); 2753 } 2754 pathlist = newlist; 2755 } 2756 n = strlen(nm) + 1; 2757 s = kmem_alloc(n, KM_SLEEP); 2758 bcopy(nm, s, n); 2759 pathlist[npaths++] = s; 2760 sdcmn_err11((" %s/%s\n", dir, s)); 2761 2762 /* if checking empty, one entry is as good as many */ 2763 if (checking_empty) { 2764 eof = 1; 2765 break; 2766 } 2767 } 2768 } 2769 2770 exit: 2771 VN_RELE(vp); 2772 2773 if (dbuf) 2774 kmem_free(dbuf, dlen); 2775 2776 if (error) 2777 return (error); 2778 2779 *dirlistp = pathlist; 2780 *npathsp = npaths; 2781 *npathsp_alloc = npaths_alloc; 2782 2783 return (0); 2784 } 2785 2786 void 2787 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2788 { 2789 int i, n; 2790 2791 for (i = 0; i < npaths; i++) { 2792 n = strlen(pathlist[i]) + 1; 2793 kmem_free(pathlist[i], n); 2794 } 2795 2796 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2797 } 2798 2799 int 2800 sdev_modctl_devexists(const char *path) 2801 { 2802 vnode_t *vp; 2803 int error; 2804 2805 error = sdev_modctl_lookup(path, &vp); 2806 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2807 path, curproc->p_user.u_comm, 2808 (error == 0) ? "ok" : "failed")); 2809 if (error == 0) 2810 VN_RELE(vp); 2811 2812 return (error); 2813 } 2814 2815 /* 2816 * a generic setattr() function 2817 * 2818 * note: flags only supports AT_UID and AT_GID. 2819 * Future enhancements can be done for other types, e.g. AT_MODE 2820 */ 2821 int 2822 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 2823 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 2824 int), int protocol) 2825 { 2826 struct sdev_node *dv = VTOSDEV(vp); 2827 struct sdev_node *parent = dv->sdev_dotdot; 2828 struct vattr *get; 2829 uint_t mask = vap->va_mask; 2830 int error; 2831 2832 /* some sanity checks */ 2833 if (vap->va_mask & AT_NOSET) 2834 return (EINVAL); 2835 2836 if (vap->va_mask & AT_SIZE) { 2837 if (vp->v_type == VDIR) { 2838 return (EISDIR); 2839 } 2840 } 2841 2842 /* no need to set attribute, but do not fail either */ 2843 ASSERT(parent); 2844 rw_enter(&parent->sdev_contents, RW_READER); 2845 if (dv->sdev_state == SDEV_ZOMBIE) { 2846 rw_exit(&parent->sdev_contents); 2847 return (0); 2848 } 2849 2850 /* If backing store exists, just set it. */ 2851 if (dv->sdev_attrvp) { 2852 rw_exit(&parent->sdev_contents); 2853 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 2854 } 2855 2856 /* 2857 * Otherwise, for nodes with the persistence attribute, create it. 2858 */ 2859 ASSERT(dv->sdev_attr); 2860 if (SDEV_IS_PERSIST(dv) || 2861 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 2862 sdev_vattr_merge(dv, vap); 2863 rw_enter(&dv->sdev_contents, RW_WRITER); 2864 error = sdev_shadow_node(dv, cred); 2865 rw_exit(&dv->sdev_contents); 2866 rw_exit(&parent->sdev_contents); 2867 2868 if (error) 2869 return (error); 2870 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 2871 } 2872 2873 2874 /* 2875 * sdev_attr was allocated in sdev_mknode 2876 */ 2877 rw_enter(&dv->sdev_contents, RW_WRITER); 2878 error = secpolicy_vnode_setattr(cred, vp, vap, 2879 dv->sdev_attr, flags, sdev_unlocked_access, dv); 2880 if (error) { 2881 rw_exit(&dv->sdev_contents); 2882 rw_exit(&parent->sdev_contents); 2883 return (error); 2884 } 2885 2886 get = dv->sdev_attr; 2887 if (mask & AT_MODE) { 2888 get->va_mode &= S_IFMT; 2889 get->va_mode |= vap->va_mode & ~S_IFMT; 2890 } 2891 2892 if ((mask & AT_UID) || (mask & AT_GID)) { 2893 if (mask & AT_UID) 2894 get->va_uid = vap->va_uid; 2895 if (mask & AT_GID) 2896 get->va_gid = vap->va_gid; 2897 /* 2898 * a callback must be provided if the protocol is set 2899 */ 2900 if ((protocol & AT_UID) || (protocol & AT_GID)) { 2901 ASSERT(callback); 2902 error = callback(dv, get, protocol); 2903 if (error) { 2904 rw_exit(&dv->sdev_contents); 2905 rw_exit(&parent->sdev_contents); 2906 return (error); 2907 } 2908 } 2909 } 2910 2911 if (mask & AT_ATIME) 2912 get->va_atime = vap->va_atime; 2913 if (mask & AT_MTIME) 2914 get->va_mtime = vap->va_mtime; 2915 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 2916 gethrestime(&get->va_ctime); 2917 } 2918 2919 sdev_vattr_merge(dv, get); 2920 rw_exit(&dv->sdev_contents); 2921 rw_exit(&parent->sdev_contents); 2922 return (0); 2923 } 2924 2925 /* 2926 * a generic inactive() function 2927 */ 2928 /*ARGSUSED*/ 2929 void 2930 devname_inactive_func(struct vnode *vp, struct cred *cred, 2931 void (*callback)(struct vnode *)) 2932 { 2933 int clean; 2934 struct sdev_node *dv = VTOSDEV(vp); 2935 int state; 2936 2937 mutex_enter(&vp->v_lock); 2938 ASSERT(vp->v_count >= 1); 2939 2940 2941 if (vp->v_count == 1 && callback != NULL) 2942 callback(vp); 2943 2944 rw_enter(&dv->sdev_contents, RW_WRITER); 2945 state = dv->sdev_state; 2946 2947 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 2948 2949 /* 2950 * sdev is a rather bad public citizen. It violates the general 2951 * agreement that in memory nodes should always have a valid reference 2952 * count on their vnode. But that's not the case here. This means that 2953 * we do actually have to distinguish between getting inactive callbacks 2954 * for zombies and otherwise. This should probably be fixed. 2955 */ 2956 if (clean) { 2957 /* Remove the . entry to ourselves */ 2958 if (vp->v_type == VDIR) { 2959 decr_link(dv); 2960 } 2961 VERIFY(dv->sdev_nlink == 1); 2962 decr_link(dv); 2963 VN_RELE_LOCKED(vp); 2964 rw_exit(&dv->sdev_contents); 2965 mutex_exit(&vp->v_lock); 2966 sdev_nodedestroy(dv, 0); 2967 } else { 2968 VN_RELE_LOCKED(vp); 2969 rw_exit(&dv->sdev_contents); 2970 mutex_exit(&vp->v_lock); 2971 } 2972 } 2973