1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved. 24 */ 25 26 /* 27 * utility routines for the /dev fs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/dirent.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/mode.h> 52 #include <sys/policy.h> 53 #include <fs/fs_subr.h> 54 #include <sys/mount.h> 55 #include <sys/fs/snode.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/sdev_impl.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/conf.h> 61 #include <sys/proc.h> 62 #include <sys/user.h> 63 #include <sys/modctl.h> 64 65 #ifdef DEBUG 66 int sdev_debug = 0x00000001; 67 int sdev_debug_cache_flags = 0; 68 #endif 69 70 /* 71 * globals 72 */ 73 /* prototype memory vattrs */ 74 vattr_t sdev_vattr_dir = { 75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 76 VDIR, /* va_type */ 77 SDEV_DIRMODE_DEFAULT, /* va_mode */ 78 SDEV_UID_DEFAULT, /* va_uid */ 79 SDEV_GID_DEFAULT, /* va_gid */ 80 0, /* va_fsid */ 81 0, /* va_nodeid */ 82 0, /* va_nlink */ 83 0, /* va_size */ 84 0, /* va_atime */ 85 0, /* va_mtime */ 86 0, /* va_ctime */ 87 0, /* va_rdev */ 88 0, /* va_blksize */ 89 0, /* va_nblocks */ 90 0 /* va_vcode */ 91 }; 92 93 vattr_t sdev_vattr_lnk = { 94 AT_TYPE|AT_MODE, /* va_mask */ 95 VLNK, /* va_type */ 96 SDEV_LNKMODE_DEFAULT, /* va_mode */ 97 SDEV_UID_DEFAULT, /* va_uid */ 98 SDEV_GID_DEFAULT, /* va_gid */ 99 0, /* va_fsid */ 100 0, /* va_nodeid */ 101 0, /* va_nlink */ 102 0, /* va_size */ 103 0, /* va_atime */ 104 0, /* va_mtime */ 105 0, /* va_ctime */ 106 0, /* va_rdev */ 107 0, /* va_blksize */ 108 0, /* va_nblocks */ 109 0 /* va_vcode */ 110 }; 111 112 vattr_t sdev_vattr_blk = { 113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 114 VBLK, /* va_type */ 115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 116 SDEV_UID_DEFAULT, /* va_uid */ 117 SDEV_GID_DEFAULT, /* va_gid */ 118 0, /* va_fsid */ 119 0, /* va_nodeid */ 120 0, /* va_nlink */ 121 0, /* va_size */ 122 0, /* va_atime */ 123 0, /* va_mtime */ 124 0, /* va_ctime */ 125 0, /* va_rdev */ 126 0, /* va_blksize */ 127 0, /* va_nblocks */ 128 0 /* va_vcode */ 129 }; 130 131 vattr_t sdev_vattr_chr = { 132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 133 VCHR, /* va_type */ 134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 135 SDEV_UID_DEFAULT, /* va_uid */ 136 SDEV_GID_DEFAULT, /* va_gid */ 137 0, /* va_fsid */ 138 0, /* va_nodeid */ 139 0, /* va_nlink */ 140 0, /* va_size */ 141 0, /* va_atime */ 142 0, /* va_mtime */ 143 0, /* va_ctime */ 144 0, /* va_rdev */ 145 0, /* va_blksize */ 146 0, /* va_nblocks */ 147 0 /* va_vcode */ 148 }; 149 150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 151 int devtype; /* fstype */ 152 153 /* static */ 154 static struct vnodeops *sdev_get_vop(struct sdev_node *); 155 static void sdev_set_no_negcache(struct sdev_node *); 156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 157 static void sdev_free_vtab(fs_operation_def_t *); 158 159 static void 160 sdev_prof_free(struct sdev_node *dv) 161 { 162 ASSERT(!SDEV_IS_GLOBAL(dv)); 163 nvlist_free(dv->sdev_prof.dev_name); 164 nvlist_free(dv->sdev_prof.dev_map); 165 nvlist_free(dv->sdev_prof.dev_symlink); 166 nvlist_free(dv->sdev_prof.dev_glob_incdir); 167 nvlist_free(dv->sdev_prof.dev_glob_excdir); 168 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 169 } 170 171 /* sdev_node cache constructor */ 172 /*ARGSUSED1*/ 173 static int 174 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 175 { 176 struct sdev_node *dv = (struct sdev_node *)buf; 177 struct vnode *vp; 178 179 bzero(buf, sizeof (struct sdev_node)); 180 vp = dv->sdev_vnode = vn_alloc(flag); 181 if (vp == NULL) { 182 return (-1); 183 } 184 vp->v_data = dv; 185 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 186 return (0); 187 } 188 189 /* sdev_node cache destructor */ 190 /*ARGSUSED1*/ 191 static void 192 i_sdev_node_dtor(void *buf, void *arg) 193 { 194 struct sdev_node *dv = (struct sdev_node *)buf; 195 struct vnode *vp = SDEVTOV(dv); 196 197 rw_destroy(&dv->sdev_contents); 198 vn_free(vp); 199 } 200 201 /* initialize sdev_node cache */ 202 void 203 sdev_node_cache_init() 204 { 205 int flags = 0; 206 207 #ifdef DEBUG 208 flags = sdev_debug_cache_flags; 209 if (flags) 210 sdcmn_err(("cache debug flags 0x%x\n", flags)); 211 #endif /* DEBUG */ 212 213 ASSERT(sdev_node_cache == NULL); 214 sdev_node_cache = kmem_cache_create("sdev_node_cache", 215 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 216 NULL, NULL, NULL, flags); 217 } 218 219 /* destroy sdev_node cache */ 220 void 221 sdev_node_cache_fini() 222 { 223 ASSERT(sdev_node_cache != NULL); 224 kmem_cache_destroy(sdev_node_cache); 225 sdev_node_cache = NULL; 226 } 227 228 /* 229 * Compare two nodes lexographically to balance avl tree 230 */ 231 static int 232 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 233 { 234 int rv; 235 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 236 return (0); 237 return ((rv < 0) ? -1 : 1); 238 } 239 240 void 241 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 242 { 243 ASSERT(dv); 244 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 245 dv->sdev_state = state; 246 } 247 248 static void 249 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 250 { 251 timestruc_t now; 252 struct vattr *attrp; 253 uint_t mask; 254 255 ASSERT(dv->sdev_attr); 256 ASSERT(vap); 257 258 attrp = dv->sdev_attr; 259 mask = vap->va_mask; 260 if (mask & AT_TYPE) 261 attrp->va_type = vap->va_type; 262 if (mask & AT_MODE) 263 attrp->va_mode = vap->va_mode; 264 if (mask & AT_UID) 265 attrp->va_uid = vap->va_uid; 266 if (mask & AT_GID) 267 attrp->va_gid = vap->va_gid; 268 if (mask & AT_RDEV) 269 attrp->va_rdev = vap->va_rdev; 270 271 gethrestime(&now); 272 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 273 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 274 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 275 } 276 277 static void 278 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 279 { 280 ASSERT(dv->sdev_attr == NULL); 281 ASSERT(vap->va_mask & AT_TYPE); 282 ASSERT(vap->va_mask & AT_MODE); 283 284 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 285 sdev_attr_update(dv, vap); 286 } 287 288 /* alloc and initialize a sdev_node */ 289 int 290 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 291 vattr_t *vap) 292 { 293 struct sdev_node *dv = NULL; 294 struct vnode *vp; 295 size_t nmlen, len; 296 devname_handle_t *dhl; 297 298 nmlen = strlen(nm) + 1; 299 if (nmlen > MAXNAMELEN) { 300 sdcmn_err9(("sdev_nodeinit: node name %s" 301 " too long\n", nm)); 302 *newdv = NULL; 303 return (ENAMETOOLONG); 304 } 305 306 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 307 308 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 309 bcopy(nm, dv->sdev_name, nmlen); 310 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 311 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 312 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 313 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 314 /* overwritten for VLNK nodes */ 315 dv->sdev_symlink = NULL; 316 317 vp = SDEVTOV(dv); 318 vn_reinit(vp); 319 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 320 if (vap) 321 vp->v_type = vap->va_type; 322 323 /* 324 * initialized to the parent's vnodeops. 325 * maybe overwriten for a VDIR 326 */ 327 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 328 vn_exists(vp); 329 330 dv->sdev_dotdot = NULL; 331 dv->sdev_attrvp = NULL; 332 if (vap) { 333 sdev_attr_alloc(dv, vap); 334 } else { 335 dv->sdev_attr = NULL; 336 } 337 338 dv->sdev_ino = sdev_mkino(dv); 339 dv->sdev_nlink = 0; /* updated on insert */ 340 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 341 dv->sdev_flags |= SDEV_BUILD; 342 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 343 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 344 if (SDEV_IS_GLOBAL(ddv)) { 345 dv->sdev_flags |= SDEV_GLOBAL; 346 dhl = &(dv->sdev_handle); 347 dhl->dh_data = dv; 348 dhl->dh_args = NULL; 349 sdev_set_no_negcache(dv); 350 dv->sdev_gdir_gen = 0; 351 } else { 352 dv->sdev_flags &= ~SDEV_GLOBAL; 353 dv->sdev_origin = NULL; /* set later */ 354 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 355 dv->sdev_ldir_gen = 0; 356 dv->sdev_devtree_gen = 0; 357 } 358 359 rw_enter(&dv->sdev_contents, RW_WRITER); 360 sdev_set_nodestate(dv, SDEV_INIT); 361 rw_exit(&dv->sdev_contents); 362 *newdv = dv; 363 364 return (0); 365 } 366 367 /* 368 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the 369 * caller to transition the node to the SDEV_ZOMBIE state. 370 */ 371 int 372 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 373 void *args, struct cred *cred) 374 { 375 int error = 0; 376 struct vnode *vp = SDEVTOV(dv); 377 vtype_t type; 378 379 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 380 381 type = vap->va_type; 382 vp->v_type = type; 383 vp->v_rdev = vap->va_rdev; 384 rw_enter(&dv->sdev_contents, RW_WRITER); 385 if (type == VDIR) { 386 dv->sdev_nlink = 2; 387 dv->sdev_flags &= ~SDEV_PERSIST; 388 dv->sdev_flags &= ~SDEV_DYNAMIC; 389 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 390 ASSERT(dv->sdev_dotdot); 391 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 392 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 393 avl_create(&dv->sdev_entries, 394 (int (*)(const void *, const void *))sdev_compare_nodes, 395 sizeof (struct sdev_node), 396 offsetof(struct sdev_node, sdev_avllink)); 397 } else if (type == VLNK) { 398 ASSERT(args); 399 dv->sdev_nlink = 1; 400 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 401 } else { 402 dv->sdev_nlink = 1; 403 } 404 405 if (!(SDEV_IS_GLOBAL(dv))) { 406 dv->sdev_origin = (struct sdev_node *)args; 407 dv->sdev_flags &= ~SDEV_PERSIST; 408 } 409 410 /* 411 * shadow node is created here OR 412 * if failed (indicated by dv->sdev_attrvp == NULL), 413 * created later in sdev_setattr 414 */ 415 if (avp) { 416 dv->sdev_attrvp = avp; 417 } else { 418 if (dv->sdev_attr == NULL) { 419 sdev_attr_alloc(dv, vap); 420 } else { 421 sdev_attr_update(dv, vap); 422 } 423 424 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 425 error = sdev_shadow_node(dv, cred); 426 } 427 428 if (error == 0) { 429 /* transition to READY state */ 430 sdev_set_nodestate(dv, SDEV_READY); 431 sdev_nc_node_exists(dv); 432 } 433 rw_exit(&dv->sdev_contents); 434 return (error); 435 } 436 437 /* 438 * Build the VROOT sdev_node. 439 */ 440 /*ARGSUSED*/ 441 struct sdev_node * 442 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 443 struct vnode *avp, struct cred *cred) 444 { 445 struct sdev_node *dv; 446 struct vnode *vp; 447 char devdir[] = "/dev"; 448 449 ASSERT(sdev_node_cache != NULL); 450 ASSERT(avp); 451 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 452 vp = SDEVTOV(dv); 453 vn_reinit(vp); 454 vp->v_flag |= VROOT; 455 vp->v_vfsp = vfsp; 456 vp->v_type = VDIR; 457 vp->v_rdev = devdev; 458 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 459 vn_exists(vp); 460 461 if (vfsp->vfs_mntpt) 462 dv->sdev_name = i_ddi_strdup( 463 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 464 else 465 /* vfs_mountdev1 set mount point later */ 466 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 467 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 468 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 469 dv->sdev_ino = SDEV_ROOTINO; 470 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 471 dv->sdev_dotdot = dv; /* .. == self */ 472 dv->sdev_attrvp = avp; 473 dv->sdev_attr = NULL; 474 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 475 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 476 if (strcmp(dv->sdev_name, "/dev") == 0) { 477 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 478 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 479 dv->sdev_gdir_gen = 0; 480 } else { 481 dv->sdev_flags = SDEV_BUILD; 482 dv->sdev_flags &= ~SDEV_PERSIST; 483 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 484 dv->sdev_ldir_gen = 0; 485 dv->sdev_devtree_gen = 0; 486 } 487 488 avl_create(&dv->sdev_entries, 489 (int (*)(const void *, const void *))sdev_compare_nodes, 490 sizeof (struct sdev_node), 491 offsetof(struct sdev_node, sdev_avllink)); 492 493 rw_enter(&dv->sdev_contents, RW_WRITER); 494 sdev_set_nodestate(dv, SDEV_READY); 495 rw_exit(&dv->sdev_contents); 496 sdev_nc_node_exists(dv); 497 return (dv); 498 } 499 500 /* directory dependent vop table */ 501 struct sdev_vop_table { 502 char *vt_name; /* subdirectory name */ 503 const fs_operation_def_t *vt_service; /* vnodeops table */ 504 struct vnodeops *vt_vops; /* constructed vop */ 505 struct vnodeops **vt_global_vops; /* global container for vop */ 506 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 507 int vt_flags; 508 }; 509 510 /* 511 * A nice improvement would be to provide a plug-in mechanism 512 * for this table instead of a const table. 513 */ 514 static struct sdev_vop_table vtab[] = 515 { 516 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 517 SDEV_DYNAMIC | SDEV_VTOR }, 518 519 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 520 SDEV_DYNAMIC | SDEV_VTOR }, 521 522 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, 523 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 524 525 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 526 527 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 528 SDEV_DYNAMIC | SDEV_VTOR }, 529 530 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 531 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 532 533 /* 534 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the 535 * lofi driver controls child nodes. 536 * 537 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted 538 * stale nodes (e.g. from devfsadm -R). 539 * 540 * In addition, devfsadm knows not to attempt a rmdir: a zone 541 * may hold a reference, which would zombify the node, 542 * preventing a mkdir. 543 */ 544 545 { "lofi", NULL, NULL, NULL, NULL, 546 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 547 { "rlofi", NULL, NULL, NULL, NULL, 548 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 549 550 { NULL, NULL, NULL, NULL, NULL, 0} 551 }; 552 553 /* 554 * We need to match off of the sdev_path, not the sdev_name. We are only allowed 555 * to exist directly under /dev. 556 */ 557 struct sdev_vop_table * 558 sdev_match(struct sdev_node *dv) 559 { 560 int vlen; 561 int i; 562 const char *path; 563 564 if (strlen(dv->sdev_path) <= 5) 565 return (NULL); 566 567 if (strncmp(dv->sdev_path, "/dev/", 5) != 0) 568 return (NULL); 569 path = dv->sdev_path + 5; 570 571 for (i = 0; vtab[i].vt_name; i++) { 572 if (strcmp(vtab[i].vt_name, path) == 0) 573 return (&vtab[i]); 574 if (vtab[i].vt_flags & SDEV_SUBDIR) { 575 vlen = strlen(vtab[i].vt_name); 576 if ((strncmp(vtab[i].vt_name, path, 577 vlen - 1) == 0) && path[vlen] == '/') 578 return (&vtab[i]); 579 } 580 581 } 582 return (NULL); 583 } 584 585 /* 586 * sets a directory's vnodeops if the directory is in the vtab; 587 */ 588 static struct vnodeops * 589 sdev_get_vop(struct sdev_node *dv) 590 { 591 struct sdev_vop_table *vtp; 592 char *path; 593 594 path = dv->sdev_path; 595 ASSERT(path); 596 597 /* gets the relative path to /dev/ */ 598 path += 5; 599 600 /* gets the vtab entry it matches */ 601 if ((vtp = sdev_match(dv)) != NULL) { 602 dv->sdev_flags |= vtp->vt_flags; 603 if (SDEV_IS_PERSIST(dv->sdev_dotdot) && 604 (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv))) 605 dv->sdev_flags |= SDEV_PERSIST; 606 607 if (vtp->vt_vops) { 608 if (vtp->vt_global_vops) 609 *(vtp->vt_global_vops) = vtp->vt_vops; 610 611 return (vtp->vt_vops); 612 } 613 614 if (vtp->vt_service) { 615 fs_operation_def_t *templ; 616 templ = sdev_merge_vtab(vtp->vt_service); 617 if (vn_make_ops(vtp->vt_name, 618 (const fs_operation_def_t *)templ, 619 &vtp->vt_vops) != 0) { 620 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 621 vtp->vt_name); 622 /*NOTREACHED*/ 623 } 624 if (vtp->vt_global_vops) { 625 *(vtp->vt_global_vops) = vtp->vt_vops; 626 } 627 sdev_free_vtab(templ); 628 629 return (vtp->vt_vops); 630 } 631 632 return (sdev_vnodeops); 633 } 634 635 /* child inherits the persistence of the parent */ 636 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 637 dv->sdev_flags |= SDEV_PERSIST; 638 639 return (sdev_vnodeops); 640 } 641 642 static void 643 sdev_set_no_negcache(struct sdev_node *dv) 644 { 645 int i; 646 char *path; 647 648 ASSERT(dv->sdev_path); 649 path = dv->sdev_path + strlen("/dev/"); 650 651 for (i = 0; vtab[i].vt_name; i++) { 652 if (strcmp(vtab[i].vt_name, path) == 0) { 653 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 654 dv->sdev_flags |= SDEV_NO_NCACHE; 655 break; 656 } 657 } 658 } 659 660 void * 661 sdev_get_vtor(struct sdev_node *dv) 662 { 663 struct sdev_vop_table *vtp; 664 665 vtp = sdev_match(dv); 666 if (vtp) 667 return ((void *)vtp->vt_vtor); 668 else 669 return (NULL); 670 } 671 672 /* 673 * Build the base root inode 674 */ 675 ino_t 676 sdev_mkino(struct sdev_node *dv) 677 { 678 ino_t ino; 679 680 /* 681 * for now, follow the lead of tmpfs here 682 * need to someday understand the requirements here 683 */ 684 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 685 ino += SDEV_ROOTINO + 1; 686 687 return (ino); 688 } 689 690 int 691 sdev_getlink(struct vnode *linkvp, char **link) 692 { 693 int err; 694 char *buf; 695 struct uio uio = {0}; 696 struct iovec iov = {0}; 697 698 if (linkvp == NULL) 699 return (ENOENT); 700 ASSERT(linkvp->v_type == VLNK); 701 702 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 703 iov.iov_base = buf; 704 iov.iov_len = MAXPATHLEN; 705 uio.uio_iov = &iov; 706 uio.uio_iovcnt = 1; 707 uio.uio_resid = MAXPATHLEN; 708 uio.uio_segflg = UIO_SYSSPACE; 709 uio.uio_llimit = MAXOFFSET_T; 710 711 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 712 if (err) { 713 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 714 kmem_free(buf, MAXPATHLEN); 715 return (ENOENT); 716 } 717 718 /* mission complete */ 719 *link = i_ddi_strdup(buf, KM_SLEEP); 720 kmem_free(buf, MAXPATHLEN); 721 return (0); 722 } 723 724 /* 725 * A convenient wrapper to get the devfs node vnode for a device 726 * minor functionality: readlink() of a /dev symlink 727 * Place the link into dv->sdev_symlink 728 */ 729 static int 730 sdev_follow_link(struct sdev_node *dv) 731 { 732 int err; 733 struct vnode *linkvp; 734 char *link = NULL; 735 736 linkvp = SDEVTOV(dv); 737 if (linkvp == NULL) 738 return (ENOENT); 739 ASSERT(linkvp->v_type == VLNK); 740 err = sdev_getlink(linkvp, &link); 741 if (err) { 742 dv->sdev_symlink = NULL; 743 return (ENOENT); 744 } 745 746 ASSERT(link != NULL); 747 dv->sdev_symlink = link; 748 return (0); 749 } 750 751 static int 752 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 753 { 754 vtype_t otype = SDEVTOV(dv)->v_type; 755 756 /* 757 * existing sdev_node has a different type. 758 */ 759 if (otype != nvap->va_type) { 760 sdcmn_err9(("sdev_node_check: existing node " 761 " %s type %d does not match new node type %d\n", 762 dv->sdev_name, otype, nvap->va_type)); 763 return (EEXIST); 764 } 765 766 /* 767 * For a symlink, the target should be the same. 768 */ 769 if (otype == VLNK) { 770 ASSERT(nargs != NULL); 771 ASSERT(dv->sdev_symlink != NULL); 772 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 773 sdcmn_err9(("sdev_node_check: existing node " 774 " %s has different symlink %s as new node " 775 " %s\n", dv->sdev_name, dv->sdev_symlink, 776 (char *)nargs)); 777 return (EEXIST); 778 } 779 } 780 781 return (0); 782 } 783 784 /* 785 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 786 * 787 * arguments: 788 * - ddv (parent) 789 * - nm (child name) 790 * - newdv (sdev_node for nm is returned here) 791 * - vap (vattr for the node to be created, va_type should be set. 792 * - avp (attribute vnode) 793 * the defaults should be used if unknown) 794 * - cred 795 * - args 796 * . tnm (for VLNK) 797 * . global sdev_node (for !SDEV_GLOBAL) 798 * - state: SDEV_INIT, SDEV_READY 799 * 800 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 801 * 802 * NOTE: directory contents writers lock needs to be held before 803 * calling this routine. 804 */ 805 int 806 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 807 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 808 sdev_node_state_t state) 809 { 810 int error = 0; 811 sdev_node_state_t node_state; 812 struct sdev_node *dv = NULL; 813 814 ASSERT(state != SDEV_ZOMBIE); 815 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 816 817 if (*newdv) { 818 dv = *newdv; 819 } else { 820 /* allocate and initialize a sdev_node */ 821 if (ddv->sdev_state == SDEV_ZOMBIE) { 822 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 823 ddv->sdev_path)); 824 return (ENOENT); 825 } 826 827 error = sdev_nodeinit(ddv, nm, &dv, vap); 828 if (error != 0) { 829 sdcmn_err9(("sdev_mknode: error %d," 830 " name %s can not be initialized\n", 831 error, nm)); 832 return (error); 833 } 834 ASSERT(dv); 835 836 /* insert into the directory cache */ 837 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 838 } 839 840 ASSERT(dv); 841 node_state = dv->sdev_state; 842 ASSERT(node_state != SDEV_ZOMBIE); 843 844 if (state == SDEV_READY) { 845 switch (node_state) { 846 case SDEV_INIT: 847 error = sdev_nodeready(dv, vap, avp, args, cred); 848 if (error) { 849 sdcmn_err9(("sdev_mknode: node %s can NOT" 850 " be transitioned into READY state, " 851 "error %d\n", nm, error)); 852 } 853 break; 854 case SDEV_READY: 855 /* 856 * Do some sanity checking to make sure 857 * the existing sdev_node is what has been 858 * asked for. 859 */ 860 error = sdev_node_check(dv, vap, args); 861 break; 862 default: 863 break; 864 } 865 } 866 867 if (!error) { 868 *newdv = dv; 869 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 870 } else { 871 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 872 /* 873 * We created this node, it wasn't passed into us. Therefore it 874 * is up to us to delete it. 875 */ 876 if (*newdv == NULL) 877 SDEV_SIMPLE_RELE(dv); 878 *newdv = NULL; 879 } 880 881 return (error); 882 } 883 884 /* 885 * convenient wrapper to change vp's ATIME, CTIME and MTIME 886 */ 887 void 888 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 889 { 890 struct vattr attr; 891 timestruc_t now; 892 int err; 893 894 ASSERT(vp); 895 gethrestime(&now); 896 if (mask & AT_CTIME) 897 attr.va_ctime = now; 898 if (mask & AT_MTIME) 899 attr.va_mtime = now; 900 if (mask & AT_ATIME) 901 attr.va_atime = now; 902 903 attr.va_mask = (mask & AT_TIMES); 904 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 905 if (err && (err != EROFS)) { 906 sdcmn_err(("update timestamps error %d\n", err)); 907 } 908 } 909 910 /* 911 * the backing store vnode is released here 912 */ 913 /*ARGSUSED1*/ 914 void 915 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 916 { 917 /* no references */ 918 ASSERT(dv->sdev_nlink == 0); 919 920 if (dv->sdev_attrvp != NULLVP) { 921 VN_RELE(dv->sdev_attrvp); 922 /* 923 * reset the attrvp so that no more 924 * references can be made on this already 925 * vn_rele() vnode 926 */ 927 dv->sdev_attrvp = NULLVP; 928 } 929 930 if (dv->sdev_attr != NULL) { 931 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 932 dv->sdev_attr = NULL; 933 } 934 935 if (dv->sdev_name != NULL) { 936 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 937 dv->sdev_name = NULL; 938 } 939 940 if (dv->sdev_symlink != NULL) { 941 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 942 dv->sdev_symlink = NULL; 943 } 944 945 if (dv->sdev_path) { 946 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 947 dv->sdev_path = NULL; 948 } 949 950 if (!SDEV_IS_GLOBAL(dv)) 951 sdev_prof_free(dv); 952 953 if (SDEVTOV(dv)->v_type == VDIR) { 954 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 955 avl_destroy(&dv->sdev_entries); 956 } 957 958 mutex_destroy(&dv->sdev_lookup_lock); 959 cv_destroy(&dv->sdev_lookup_cv); 960 961 /* return node to initial state as per constructor */ 962 (void) memset((void *)&dv->sdev_instance_data, 0, 963 sizeof (dv->sdev_instance_data)); 964 vn_invalid(SDEVTOV(dv)); 965 kmem_cache_free(sdev_node_cache, dv); 966 } 967 968 /* 969 * DIRECTORY CACHE lookup 970 */ 971 struct sdev_node * 972 sdev_findbyname(struct sdev_node *ddv, char *nm) 973 { 974 struct sdev_node *dv; 975 struct sdev_node dvtmp; 976 avl_index_t where; 977 978 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 979 980 dvtmp.sdev_name = nm; 981 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 982 if (dv) { 983 ASSERT(dv->sdev_dotdot == ddv); 984 ASSERT(strcmp(dv->sdev_name, nm) == 0); 985 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 986 SDEV_HOLD(dv); 987 return (dv); 988 } 989 return (NULL); 990 } 991 992 /* 993 * Inserts a new sdev_node in a parent directory 994 */ 995 void 996 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 997 { 998 avl_index_t where; 999 1000 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1001 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 1002 ASSERT(ddv->sdev_nlink >= 2); 1003 ASSERT(dv->sdev_nlink == 0); 1004 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1005 1006 dv->sdev_dotdot = ddv; 1007 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 1008 avl_insert(&ddv->sdev_entries, dv, where); 1009 ddv->sdev_nlink++; 1010 } 1011 1012 /* 1013 * The following check is needed because while sdev_nodes are linked 1014 * in SDEV_INIT state, they have their link counts incremented only 1015 * in SDEV_READY state. 1016 */ 1017 static void 1018 decr_link(struct sdev_node *dv) 1019 { 1020 VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); 1021 if (dv->sdev_state != SDEV_INIT) { 1022 VERIFY(dv->sdev_nlink >= 1); 1023 dv->sdev_nlink--; 1024 } else { 1025 VERIFY(dv->sdev_nlink == 0); 1026 } 1027 } 1028 1029 /* 1030 * Delete an existing dv from directory cache 1031 * 1032 * In the case of a node is still held by non-zero reference count, the node is 1033 * put into ZOMBIE state. The node is always unlinked from its parent, but it is 1034 * not destroyed via sdev_inactive until its reference count reaches "0". 1035 */ 1036 static void 1037 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 1038 { 1039 struct vnode *vp; 1040 sdev_node_state_t os; 1041 1042 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1043 1044 vp = SDEVTOV(dv); 1045 mutex_enter(&vp->v_lock); 1046 rw_enter(&dv->sdev_contents, RW_WRITER); 1047 os = dv->sdev_state; 1048 ASSERT(os != SDEV_ZOMBIE); 1049 dv->sdev_state = SDEV_ZOMBIE; 1050 1051 /* 1052 * unlink ourselves from the parent directory now to take care of the .. 1053 * link. However, if we're a directory, we don't remove our reference to 1054 * ourself eg. '.' until we are torn down in the inactive callback. 1055 */ 1056 decr_link(ddv); 1057 avl_remove(&ddv->sdev_entries, dv); 1058 /* 1059 * sdev_inactive expects nodes to have a link to themselves when we're 1060 * tearing them down. If we're transitioning from the initial state to 1061 * zombie and not via ready, then we're not going to have this link that 1062 * comes from the node being ready. As a result, we need to increment 1063 * our link count by one to account for this. 1064 */ 1065 if (os == SDEV_INIT && dv->sdev_nlink == 0) 1066 dv->sdev_nlink++; 1067 rw_exit(&dv->sdev_contents); 1068 mutex_exit(&vp->v_lock); 1069 } 1070 1071 /* 1072 * check if the source is in the path of the target 1073 * 1074 * source and target are different 1075 */ 1076 /*ARGSUSED2*/ 1077 static int 1078 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1079 { 1080 int error = 0; 1081 struct sdev_node *dotdot, *dir; 1082 1083 dotdot = tdv->sdev_dotdot; 1084 ASSERT(dotdot); 1085 1086 /* fs root */ 1087 if (dotdot == tdv) { 1088 return (0); 1089 } 1090 1091 for (;;) { 1092 /* 1093 * avoid error cases like 1094 * mv a a/b 1095 * mv a a/b/c 1096 * etc. 1097 */ 1098 if (dotdot == sdv) { 1099 error = EINVAL; 1100 break; 1101 } 1102 1103 dir = dotdot; 1104 dotdot = dir->sdev_dotdot; 1105 1106 /* done checking because root is reached */ 1107 if (dir == dotdot) { 1108 break; 1109 } 1110 } 1111 return (error); 1112 } 1113 1114 int 1115 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1116 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1117 struct cred *cred) 1118 { 1119 int error = 0; 1120 struct vnode *ovp = SDEVTOV(odv); 1121 struct vnode *nvp; 1122 struct vattr vattr; 1123 int doingdir = (ovp->v_type == VDIR); 1124 char *link = NULL; 1125 int samedir = (oddv == nddv) ? 1 : 0; 1126 int bkstore = 0; 1127 struct sdev_node *idv = NULL; 1128 struct sdev_node *ndv = NULL; 1129 timestruc_t now; 1130 1131 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1132 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1133 if (error) 1134 return (error); 1135 1136 if (!samedir) 1137 rw_enter(&oddv->sdev_contents, RW_WRITER); 1138 rw_enter(&nddv->sdev_contents, RW_WRITER); 1139 1140 /* 1141 * the source may have been deleted by another thread before 1142 * we gets here. 1143 */ 1144 if (odv->sdev_state != SDEV_READY) { 1145 error = ENOENT; 1146 goto err_out; 1147 } 1148 1149 if (doingdir && (odv == nddv)) { 1150 error = EINVAL; 1151 goto err_out; 1152 } 1153 1154 /* 1155 * If renaming a directory, and the parents are different (".." must be 1156 * changed) then the source dir must not be in the dir hierarchy above 1157 * the target since it would orphan everything below the source dir. 1158 */ 1159 if (doingdir && (oddv != nddv)) { 1160 error = sdev_checkpath(odv, nddv, cred); 1161 if (error) 1162 goto err_out; 1163 } 1164 1165 /* fix the source for a symlink */ 1166 if (vattr.va_type == VLNK) { 1167 if (odv->sdev_symlink == NULL) { 1168 error = sdev_follow_link(odv); 1169 if (error) { 1170 /* 1171 * The underlying symlink doesn't exist. This 1172 * node probably shouldn't even exist. While 1173 * it's a bit jarring to consumers, we're going 1174 * to remove the node from /dev. 1175 */ 1176 if (SDEV_IS_PERSIST((*ndvp))) 1177 bkstore = 1; 1178 sdev_dirdelete(oddv, odv); 1179 if (bkstore) { 1180 ASSERT(nddv->sdev_attrvp); 1181 error = VOP_REMOVE(nddv->sdev_attrvp, 1182 nnm, cred, NULL, 0); 1183 if (error) 1184 goto err_out; 1185 } 1186 error = ENOENT; 1187 goto err_out; 1188 } 1189 } 1190 ASSERT(odv->sdev_symlink); 1191 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1192 } 1193 1194 /* destination existing */ 1195 if (*ndvp) { 1196 nvp = SDEVTOV(*ndvp); 1197 ASSERT(nvp); 1198 1199 /* handling renaming to itself */ 1200 if (odv == *ndvp) { 1201 error = 0; 1202 goto err_out; 1203 } 1204 1205 if (nvp->v_type == VDIR) { 1206 if (!doingdir) { 1207 error = EISDIR; 1208 goto err_out; 1209 } 1210 1211 if (vn_vfswlock(nvp)) { 1212 error = EBUSY; 1213 goto err_out; 1214 } 1215 1216 if (vn_mountedvfs(nvp) != NULL) { 1217 vn_vfsunlock(nvp); 1218 error = EBUSY; 1219 goto err_out; 1220 } 1221 1222 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1223 if ((*ndvp)->sdev_nlink > 2) { 1224 vn_vfsunlock(nvp); 1225 error = EEXIST; 1226 goto err_out; 1227 } 1228 vn_vfsunlock(nvp); 1229 1230 /* 1231 * We did not place the hold on *ndvp, so even though 1232 * we're deleting the node, we should not get rid of our 1233 * reference. 1234 */ 1235 sdev_dirdelete(nddv, *ndvp); 1236 *ndvp = NULL; 1237 ASSERT(nddv->sdev_attrvp); 1238 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1239 nddv->sdev_attrvp, cred, NULL, 0); 1240 if (error) 1241 goto err_out; 1242 } else { 1243 if (doingdir) { 1244 error = ENOTDIR; 1245 goto err_out; 1246 } 1247 1248 if (SDEV_IS_PERSIST((*ndvp))) { 1249 bkstore = 1; 1250 } 1251 1252 /* 1253 * Get rid of the node from the directory cache note. 1254 * Don't forget that it's not up to us to remove the vn 1255 * ref on the sdev node, as we did not place it. 1256 */ 1257 sdev_dirdelete(nddv, *ndvp); 1258 *ndvp = NULL; 1259 if (bkstore) { 1260 ASSERT(nddv->sdev_attrvp); 1261 error = VOP_REMOVE(nddv->sdev_attrvp, 1262 nnm, cred, NULL, 0); 1263 if (error) 1264 goto err_out; 1265 } 1266 } 1267 } 1268 1269 /* 1270 * make a fresh node from the source attrs 1271 */ 1272 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1273 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1274 NULL, (void *)link, cred, SDEV_READY); 1275 1276 if (link != NULL) { 1277 kmem_free(link, strlen(link) + 1); 1278 link = NULL; 1279 } 1280 1281 if (error) 1282 goto err_out; 1283 ASSERT(*ndvp); 1284 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1285 1286 /* move dir contents */ 1287 if (doingdir) { 1288 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1289 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1290 SDEV_HOLD(idv); 1291 error = sdev_rnmnode(odv, idv, 1292 (struct sdev_node *)(*ndvp), &ndv, 1293 idv->sdev_name, cred); 1294 SDEV_RELE(idv); 1295 if (error) 1296 goto err_out; 1297 ndv = NULL; 1298 } 1299 } 1300 1301 if ((*ndvp)->sdev_attrvp) { 1302 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1303 AT_CTIME|AT_ATIME); 1304 } else { 1305 ASSERT((*ndvp)->sdev_attr); 1306 gethrestime(&now); 1307 (*ndvp)->sdev_attr->va_ctime = now; 1308 (*ndvp)->sdev_attr->va_atime = now; 1309 } 1310 1311 if (nddv->sdev_attrvp) { 1312 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1313 AT_MTIME|AT_ATIME); 1314 } else { 1315 ASSERT(nddv->sdev_attr); 1316 gethrestime(&now); 1317 nddv->sdev_attr->va_mtime = now; 1318 nddv->sdev_attr->va_atime = now; 1319 } 1320 rw_exit(&nddv->sdev_contents); 1321 if (!samedir) 1322 rw_exit(&oddv->sdev_contents); 1323 1324 SDEV_RELE(*ndvp); 1325 return (error); 1326 1327 err_out: 1328 if (link != NULL) { 1329 kmem_free(link, strlen(link) + 1); 1330 link = NULL; 1331 } 1332 1333 rw_exit(&nddv->sdev_contents); 1334 if (!samedir) 1335 rw_exit(&oddv->sdev_contents); 1336 return (error); 1337 } 1338 1339 /* 1340 * Merge sdev_node specific information into an attribute structure. 1341 * 1342 * note: sdev_node is not locked here 1343 */ 1344 void 1345 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1346 { 1347 struct vnode *vp = SDEVTOV(dv); 1348 1349 vap->va_nlink = dv->sdev_nlink; 1350 vap->va_nodeid = dv->sdev_ino; 1351 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1352 vap->va_type = vp->v_type; 1353 1354 if (vp->v_type == VDIR) { 1355 vap->va_rdev = 0; 1356 vap->va_fsid = vp->v_rdev; 1357 } else if (vp->v_type == VLNK) { 1358 vap->va_rdev = 0; 1359 vap->va_mode &= ~S_IFMT; 1360 vap->va_mode |= S_IFLNK; 1361 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1362 vap->va_rdev = vp->v_rdev; 1363 vap->va_mode &= ~S_IFMT; 1364 if (vap->va_type == VCHR) 1365 vap->va_mode |= S_IFCHR; 1366 else 1367 vap->va_mode |= S_IFBLK; 1368 } else { 1369 vap->va_rdev = 0; 1370 } 1371 } 1372 1373 struct vattr * 1374 sdev_getdefault_attr(enum vtype type) 1375 { 1376 if (type == VDIR) 1377 return (&sdev_vattr_dir); 1378 else if (type == VCHR) 1379 return (&sdev_vattr_chr); 1380 else if (type == VBLK) 1381 return (&sdev_vattr_blk); 1382 else if (type == VLNK) 1383 return (&sdev_vattr_lnk); 1384 else 1385 return (NULL); 1386 } 1387 int 1388 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1389 { 1390 int rv = 0; 1391 struct vnode *vp = SDEVTOV(dv); 1392 1393 switch (vp->v_type) { 1394 case VCHR: 1395 case VBLK: 1396 /* 1397 * If vnode is a device, return special vnode instead 1398 * (though it knows all about -us- via sp->s_realvp) 1399 */ 1400 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1401 VN_RELE(vp); 1402 if (*vpp == NULLVP) 1403 rv = ENOSYS; 1404 break; 1405 default: /* most types are returned as is */ 1406 *vpp = vp; 1407 break; 1408 } 1409 return (rv); 1410 } 1411 1412 /* 1413 * junction between devname and root file system, e.g. ufs 1414 */ 1415 int 1416 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1417 { 1418 struct vnode *rdvp = ddv->sdev_attrvp; 1419 int rval = 0; 1420 1421 ASSERT(rdvp); 1422 1423 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1424 NULL); 1425 return (rval); 1426 } 1427 1428 static int 1429 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1430 { 1431 struct sdev_node *dv = NULL; 1432 char *nm; 1433 struct vnode *dirvp; 1434 int error; 1435 vnode_t *vp; 1436 int eof; 1437 struct iovec iov; 1438 struct uio uio; 1439 struct dirent64 *dp; 1440 dirent64_t *dbuf; 1441 size_t dbuflen; 1442 struct vattr vattr; 1443 char *link = NULL; 1444 1445 if (ddv->sdev_attrvp == NULL) 1446 return (0); 1447 if (!(ddv->sdev_flags & SDEV_BUILD)) 1448 return (0); 1449 1450 dirvp = ddv->sdev_attrvp; 1451 VN_HOLD(dirvp); 1452 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1453 1454 uio.uio_iov = &iov; 1455 uio.uio_iovcnt = 1; 1456 uio.uio_segflg = UIO_SYSSPACE; 1457 uio.uio_fmode = 0; 1458 uio.uio_extflg = UIO_COPY_CACHED; 1459 uio.uio_loffset = 0; 1460 uio.uio_llimit = MAXOFFSET_T; 1461 1462 eof = 0; 1463 error = 0; 1464 while (!error && !eof) { 1465 uio.uio_resid = dlen; 1466 iov.iov_base = (char *)dbuf; 1467 iov.iov_len = dlen; 1468 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1469 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1470 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1471 1472 dbuflen = dlen - uio.uio_resid; 1473 if (error || dbuflen == 0) 1474 break; 1475 1476 if (!(ddv->sdev_flags & SDEV_BUILD)) 1477 break; 1478 1479 for (dp = dbuf; ((intptr_t)dp < 1480 (intptr_t)dbuf + dbuflen); 1481 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1482 nm = dp->d_name; 1483 1484 if (strcmp(nm, ".") == 0 || 1485 strcmp(nm, "..") == 0) 1486 continue; 1487 1488 vp = NULLVP; 1489 dv = sdev_cache_lookup(ddv, nm); 1490 if (dv) { 1491 VERIFY(dv->sdev_state != SDEV_ZOMBIE); 1492 SDEV_SIMPLE_RELE(dv); 1493 continue; 1494 } 1495 1496 /* refill the cache if not already */ 1497 error = devname_backstore_lookup(ddv, nm, &vp); 1498 if (error) 1499 continue; 1500 1501 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1502 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1503 if (error) 1504 continue; 1505 1506 if (vattr.va_type == VLNK) { 1507 error = sdev_getlink(vp, &link); 1508 if (error) { 1509 continue; 1510 } 1511 ASSERT(link != NULL); 1512 } 1513 1514 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1515 rw_exit(&ddv->sdev_contents); 1516 rw_enter(&ddv->sdev_contents, RW_WRITER); 1517 } 1518 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1519 cred, SDEV_READY); 1520 rw_downgrade(&ddv->sdev_contents); 1521 1522 if (link != NULL) { 1523 kmem_free(link, strlen(link) + 1); 1524 link = NULL; 1525 } 1526 1527 if (!error) { 1528 ASSERT(dv); 1529 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1530 SDEV_SIMPLE_RELE(dv); 1531 } 1532 vp = NULL; 1533 dv = NULL; 1534 } 1535 } 1536 1537 done: 1538 VN_RELE(dirvp); 1539 kmem_free(dbuf, dlen); 1540 1541 return (error); 1542 } 1543 1544 void 1545 sdev_filldir_dynamic(struct sdev_node *ddv) 1546 { 1547 int error; 1548 int i; 1549 struct vattr vattr; 1550 struct vattr *vap = &vattr; 1551 char *nm = NULL; 1552 struct sdev_node *dv = NULL; 1553 1554 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1555 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1556 1557 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1558 gethrestime(&vap->va_atime); 1559 vap->va_mtime = vap->va_atime; 1560 vap->va_ctime = vap->va_atime; 1561 for (i = 0; vtab[i].vt_name != NULL; i++) { 1562 /* 1563 * This early, we may be in a read-only /dev environment: leave 1564 * the creation of any nodes we'd attempt to persist to 1565 * devfsadm. Because /dev itself is normally persistent, any 1566 * node which is not marked dynamic will end up being marked 1567 * persistent. However, some nodes are both dynamic and 1568 * persistent, mostly lofi and rlofi, so we need to be careful 1569 * in our check. 1570 */ 1571 if ((vtab[i].vt_flags & SDEV_PERSIST) || 1572 !(vtab[i].vt_flags & SDEV_DYNAMIC)) 1573 continue; 1574 nm = vtab[i].vt_name; 1575 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1576 dv = NULL; 1577 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1578 NULL, kcred, SDEV_READY); 1579 if (error) { 1580 cmn_err(CE_WARN, "%s/%s: error %d\n", 1581 ddv->sdev_name, nm, error); 1582 } else { 1583 ASSERT(dv); 1584 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1585 SDEV_SIMPLE_RELE(dv); 1586 } 1587 } 1588 } 1589 1590 /* 1591 * Creating a backing store entry based on sdev_attr. 1592 * This is called either as part of node creation in a persistent directory 1593 * or from setattr/setsecattr to persist access attributes across reboot. 1594 */ 1595 int 1596 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1597 { 1598 int error = 0; 1599 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1600 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1601 struct vattr *vap = dv->sdev_attr; 1602 char *nm = dv->sdev_name; 1603 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1604 1605 ASSERT(dv && dv->sdev_name && rdvp); 1606 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1607 1608 lookup: 1609 /* try to find it in the backing store */ 1610 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1611 NULL); 1612 if (error == 0) { 1613 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1614 VN_HOLD(rrvp); 1615 VN_RELE(*rvp); 1616 *rvp = rrvp; 1617 } 1618 1619 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1620 dv->sdev_attr = NULL; 1621 dv->sdev_attrvp = *rvp; 1622 return (0); 1623 } 1624 1625 /* let's try to persist the node */ 1626 gethrestime(&vap->va_atime); 1627 vap->va_mtime = vap->va_atime; 1628 vap->va_ctime = vap->va_atime; 1629 vap->va_mask |= AT_TYPE|AT_MODE; 1630 switch (vap->va_type) { 1631 case VDIR: 1632 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1633 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1634 (void *)(*rvp), error)); 1635 if (!error) 1636 VN_RELE(*rvp); 1637 break; 1638 case VCHR: 1639 case VBLK: 1640 case VREG: 1641 case VDOOR: 1642 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1643 rvp, cred, 0, NULL, NULL); 1644 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1645 (void *)(*rvp), error)); 1646 if (!error) 1647 VN_RELE(*rvp); 1648 break; 1649 case VLNK: 1650 ASSERT(dv->sdev_symlink); 1651 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1652 NULL, 0); 1653 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1654 error)); 1655 break; 1656 default: 1657 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1658 "create\n", nm); 1659 /*NOTREACHED*/ 1660 } 1661 1662 /* go back to lookup to factor out spec node and set attrvp */ 1663 if (error == 0) 1664 goto lookup; 1665 1666 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1667 return (error); 1668 } 1669 1670 static void 1671 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1672 { 1673 struct sdev_node *dup = NULL; 1674 1675 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1676 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1677 sdev_direnter(ddv, *dv); 1678 } else { 1679 VERIFY(dup->sdev_state != SDEV_ZOMBIE); 1680 SDEV_SIMPLE_RELE(*dv); 1681 sdev_nodedestroy(*dv, 0); 1682 *dv = dup; 1683 } 1684 } 1685 1686 static void 1687 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1688 { 1689 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1690 sdev_dirdelete(ddv, *dv); 1691 } 1692 1693 /* 1694 * update the in-core directory cache 1695 */ 1696 void 1697 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1698 sdev_cache_ops_t ops) 1699 { 1700 ASSERT((SDEV_HELD(*dv))); 1701 1702 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1703 switch (ops) { 1704 case SDEV_CACHE_ADD: 1705 sdev_cache_add(ddv, dv, nm); 1706 break; 1707 case SDEV_CACHE_DELETE: 1708 sdev_cache_delete(ddv, dv); 1709 break; 1710 default: 1711 break; 1712 } 1713 } 1714 1715 /* 1716 * retrieve the named entry from the directory cache 1717 */ 1718 struct sdev_node * 1719 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1720 { 1721 struct sdev_node *dv = NULL; 1722 1723 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1724 dv = sdev_findbyname(ddv, nm); 1725 1726 return (dv); 1727 } 1728 1729 /* 1730 * Implicit reconfig for nodes constructed by a link generator 1731 * Start devfsadm if needed, or if devfsadm is in progress, 1732 * prepare to block on devfsadm either completing or 1733 * constructing the desired node. As devfsadmd is global 1734 * in scope, constructing all necessary nodes, we only 1735 * need to initiate it once. 1736 */ 1737 static int 1738 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1739 { 1740 int error = 0; 1741 1742 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1743 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1744 ddv->sdev_name, nm, devfsadm_state)); 1745 mutex_enter(&dv->sdev_lookup_lock); 1746 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1747 mutex_exit(&dv->sdev_lookup_lock); 1748 error = 0; 1749 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1750 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1751 ddv->sdev_name, nm, devfsadm_state)); 1752 1753 sdev_devfsadmd_thread(ddv, dv, kcred); 1754 mutex_enter(&dv->sdev_lookup_lock); 1755 SDEV_BLOCK_OTHERS(dv, 1756 (SDEV_LOOKUP | SDEV_LGWAITING)); 1757 mutex_exit(&dv->sdev_lookup_lock); 1758 error = 0; 1759 } else { 1760 error = -1; 1761 } 1762 1763 return (error); 1764 } 1765 1766 /* 1767 * Support for specialized device naming construction mechanisms 1768 */ 1769 static int 1770 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1771 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1772 void *, char *), int flags, struct cred *cred) 1773 { 1774 int rv = 0; 1775 char *physpath = NULL; 1776 struct vattr vattr; 1777 struct vattr *vap = &vattr; 1778 struct sdev_node *dv = NULL; 1779 1780 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1781 if (flags & SDEV_VLINK) { 1782 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1783 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1784 NULL); 1785 if (rv) { 1786 kmem_free(physpath, MAXPATHLEN); 1787 return (-1); 1788 } 1789 1790 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1791 vap->va_size = strlen(physpath); 1792 gethrestime(&vap->va_atime); 1793 vap->va_mtime = vap->va_atime; 1794 vap->va_ctime = vap->va_atime; 1795 1796 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1797 (void *)physpath, cred, SDEV_READY); 1798 kmem_free(physpath, MAXPATHLEN); 1799 if (rv) 1800 return (rv); 1801 } else if (flags & SDEV_VATTR) { 1802 /* 1803 * /dev/pts 1804 * 1805 * callback is responsible to set the basic attributes, 1806 * e.g. va_type/va_uid/va_gid/ 1807 * dev_t if VCHR or VBLK/ 1808 */ 1809 ASSERT(callback); 1810 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1811 if (rv) { 1812 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1813 "callback failed \n")); 1814 return (-1); 1815 } 1816 1817 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1818 cred, SDEV_READY); 1819 1820 if (rv) 1821 return (rv); 1822 1823 } else { 1824 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1825 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1826 __LINE__)); 1827 rv = -1; 1828 } 1829 1830 *dvp = dv; 1831 return (rv); 1832 } 1833 1834 static int 1835 is_devfsadm_thread(char *exec_name) 1836 { 1837 /* 1838 * note: because devfsadmd -> /usr/sbin/devfsadm 1839 * it is safe to use "devfsadm" to capture the lookups 1840 * from devfsadm and its daemon version. 1841 */ 1842 if (strcmp(exec_name, "devfsadm") == 0) 1843 return (1); 1844 return (0); 1845 } 1846 1847 /* 1848 * Lookup Order: 1849 * sdev_node cache; 1850 * backing store (SDEV_PERSIST); 1851 * DBNR: a. dir_ops implemented in the loadable modules; 1852 * b. vnode ops in vtab. 1853 */ 1854 int 1855 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1856 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1857 struct cred *, void *, char *), int flags) 1858 { 1859 int rv = 0, nmlen; 1860 struct vnode *rvp = NULL; 1861 struct sdev_node *dv = NULL; 1862 int retried = 0; 1863 int error = 0; 1864 struct vattr vattr; 1865 char *lookup_thread = curproc->p_user.u_comm; 1866 int failed_flags = 0; 1867 int (*vtor)(struct sdev_node *) = NULL; 1868 int state; 1869 int parent_state; 1870 char *link = NULL; 1871 1872 if (SDEVTOV(ddv)->v_type != VDIR) 1873 return (ENOTDIR); 1874 1875 /* 1876 * Empty name or ., return node itself. 1877 */ 1878 nmlen = strlen(nm); 1879 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1880 *vpp = SDEVTOV(ddv); 1881 VN_HOLD(*vpp); 1882 return (0); 1883 } 1884 1885 /* 1886 * .., return the parent directory 1887 */ 1888 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1889 *vpp = SDEVTOV(ddv->sdev_dotdot); 1890 VN_HOLD(*vpp); 1891 return (0); 1892 } 1893 1894 rw_enter(&ddv->sdev_contents, RW_READER); 1895 if (ddv->sdev_flags & SDEV_VTOR) { 1896 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1897 ASSERT(vtor); 1898 } 1899 1900 tryagain: 1901 /* 1902 * (a) directory cache lookup: 1903 */ 1904 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1905 parent_state = ddv->sdev_state; 1906 dv = sdev_cache_lookup(ddv, nm); 1907 if (dv) { 1908 state = dv->sdev_state; 1909 switch (state) { 1910 case SDEV_INIT: 1911 if (is_devfsadm_thread(lookup_thread)) 1912 break; 1913 1914 /* ZOMBIED parent won't allow node creation */ 1915 if (parent_state == SDEV_ZOMBIE) { 1916 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1917 retried); 1918 goto nolock_notfound; 1919 } 1920 1921 mutex_enter(&dv->sdev_lookup_lock); 1922 /* compensate the threads started after devfsadm */ 1923 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1924 !(SDEV_IS_LOOKUP(dv))) 1925 SDEV_BLOCK_OTHERS(dv, 1926 (SDEV_LOOKUP | SDEV_LGWAITING)); 1927 1928 if (SDEV_IS_LOOKUP(dv)) { 1929 failed_flags |= SLF_REBUILT; 1930 rw_exit(&ddv->sdev_contents); 1931 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1932 mutex_exit(&dv->sdev_lookup_lock); 1933 rw_enter(&ddv->sdev_contents, RW_READER); 1934 1935 if (error != 0) { 1936 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1937 retried); 1938 goto nolock_notfound; 1939 } 1940 1941 state = dv->sdev_state; 1942 if (state == SDEV_INIT) { 1943 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1944 retried); 1945 goto nolock_notfound; 1946 } else if (state == SDEV_READY) { 1947 goto found; 1948 } else if (state == SDEV_ZOMBIE) { 1949 rw_exit(&ddv->sdev_contents); 1950 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1951 retried); 1952 SDEV_RELE(dv); 1953 goto lookup_failed; 1954 } 1955 } else { 1956 mutex_exit(&dv->sdev_lookup_lock); 1957 } 1958 break; 1959 case SDEV_READY: 1960 goto found; 1961 case SDEV_ZOMBIE: 1962 rw_exit(&ddv->sdev_contents); 1963 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1964 SDEV_RELE(dv); 1965 goto lookup_failed; 1966 default: 1967 rw_exit(&ddv->sdev_contents); 1968 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1969 sdev_lookup_failed(ddv, nm, failed_flags); 1970 *vpp = NULLVP; 1971 return (ENOENT); 1972 } 1973 } 1974 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1975 1976 /* 1977 * ZOMBIED parent does not allow new node creation. 1978 * bail out early 1979 */ 1980 if (parent_state == SDEV_ZOMBIE) { 1981 rw_exit(&ddv->sdev_contents); 1982 *vpp = NULLVP; 1983 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1984 return (ENOENT); 1985 } 1986 1987 /* 1988 * (b0): backing store lookup 1989 * SDEV_PERSIST is default except: 1990 * 1) pts nodes 1991 * 2) non-chmod'ed local nodes 1992 * 3) zvol nodes 1993 */ 1994 if (SDEV_IS_PERSIST(ddv)) { 1995 error = devname_backstore_lookup(ddv, nm, &rvp); 1996 1997 if (!error) { 1998 1999 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 2000 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 2001 if (error) { 2002 rw_exit(&ddv->sdev_contents); 2003 if (dv) 2004 SDEV_RELE(dv); 2005 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2006 sdev_lookup_failed(ddv, nm, failed_flags); 2007 *vpp = NULLVP; 2008 return (ENOENT); 2009 } 2010 2011 if (vattr.va_type == VLNK) { 2012 error = sdev_getlink(rvp, &link); 2013 if (error) { 2014 rw_exit(&ddv->sdev_contents); 2015 if (dv) 2016 SDEV_RELE(dv); 2017 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2018 retried); 2019 sdev_lookup_failed(ddv, nm, 2020 failed_flags); 2021 *vpp = NULLVP; 2022 return (ENOENT); 2023 } 2024 ASSERT(link != NULL); 2025 } 2026 2027 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2028 rw_exit(&ddv->sdev_contents); 2029 rw_enter(&ddv->sdev_contents, RW_WRITER); 2030 } 2031 error = sdev_mknode(ddv, nm, &dv, &vattr, 2032 rvp, link, cred, SDEV_READY); 2033 rw_downgrade(&ddv->sdev_contents); 2034 2035 if (link != NULL) { 2036 kmem_free(link, strlen(link) + 1); 2037 link = NULL; 2038 } 2039 2040 if (error) { 2041 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2042 rw_exit(&ddv->sdev_contents); 2043 if (dv) 2044 SDEV_RELE(dv); 2045 goto lookup_failed; 2046 } else { 2047 goto found; 2048 } 2049 } else if (retried) { 2050 rw_exit(&ddv->sdev_contents); 2051 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2052 ddv->sdev_name, nm)); 2053 if (dv) 2054 SDEV_RELE(dv); 2055 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2056 sdev_lookup_failed(ddv, nm, failed_flags); 2057 *vpp = NULLVP; 2058 return (ENOENT); 2059 } 2060 } 2061 2062 lookup_create_node: 2063 /* first thread that is doing the lookup on this node */ 2064 if (callback) { 2065 ASSERT(dv == NULL); 2066 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2067 rw_exit(&ddv->sdev_contents); 2068 rw_enter(&ddv->sdev_contents, RW_WRITER); 2069 } 2070 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2071 flags, cred); 2072 rw_downgrade(&ddv->sdev_contents); 2073 if (error == 0) { 2074 goto found; 2075 } else { 2076 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2077 rw_exit(&ddv->sdev_contents); 2078 goto lookup_failed; 2079 } 2080 } 2081 if (!dv) { 2082 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2083 rw_exit(&ddv->sdev_contents); 2084 rw_enter(&ddv->sdev_contents, RW_WRITER); 2085 } 2086 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2087 cred, SDEV_INIT); 2088 if (!dv) { 2089 rw_exit(&ddv->sdev_contents); 2090 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2091 sdev_lookup_failed(ddv, nm, failed_flags); 2092 *vpp = NULLVP; 2093 return (ENOENT); 2094 } 2095 rw_downgrade(&ddv->sdev_contents); 2096 } 2097 2098 /* 2099 * (b1) invoking devfsadm once per life time for devfsadm nodes 2100 */ 2101 ASSERT(SDEV_HELD(dv)); 2102 2103 if (SDEV_IS_NO_NCACHE(dv)) 2104 failed_flags |= SLF_NO_NCACHE; 2105 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2106 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2107 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2108 ASSERT(SDEV_HELD(dv)); 2109 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2110 goto nolock_notfound; 2111 } 2112 2113 /* 2114 * filter out known non-existent devices recorded 2115 * during initial reconfiguration boot for which 2116 * reconfig should not be done and lookup may 2117 * be short-circuited now. 2118 */ 2119 if (sdev_lookup_filter(ddv, nm)) { 2120 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2121 goto nolock_notfound; 2122 } 2123 2124 /* bypassing devfsadm internal nodes */ 2125 if (is_devfsadm_thread(lookup_thread)) { 2126 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2127 goto nolock_notfound; 2128 } 2129 2130 if (sdev_reconfig_disable) { 2131 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2132 goto nolock_notfound; 2133 } 2134 2135 error = sdev_call_devfsadmd(ddv, dv, nm); 2136 if (error == 0) { 2137 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2138 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2139 if (sdev_reconfig_verbose) { 2140 cmn_err(CE_CONT, 2141 "?lookup of %s/%s by %s: reconfig\n", 2142 ddv->sdev_name, nm, curproc->p_user.u_comm); 2143 } 2144 retried = 1; 2145 failed_flags |= SLF_REBUILT; 2146 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2147 SDEV_SIMPLE_RELE(dv); 2148 goto tryagain; 2149 } else { 2150 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2151 goto nolock_notfound; 2152 } 2153 2154 found: 2155 ASSERT(dv->sdev_state == SDEV_READY); 2156 if (vtor) { 2157 /* 2158 * Check validity of returned node 2159 */ 2160 switch (vtor(dv)) { 2161 case SDEV_VTOR_VALID: 2162 break; 2163 case SDEV_VTOR_STALE: 2164 /* 2165 * The name exists, but the cache entry is 2166 * stale and needs to be re-created. 2167 */ 2168 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2169 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2170 rw_exit(&ddv->sdev_contents); 2171 rw_enter(&ddv->sdev_contents, RW_WRITER); 2172 } 2173 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 2174 rw_downgrade(&ddv->sdev_contents); 2175 SDEV_RELE(dv); 2176 dv = NULL; 2177 goto lookup_create_node; 2178 /* FALLTHRU */ 2179 case SDEV_VTOR_INVALID: 2180 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2181 sdcmn_err7(("lookup: destroy invalid " 2182 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2183 goto nolock_notfound; 2184 case SDEV_VTOR_SKIP: 2185 sdcmn_err7(("lookup: node not applicable - " 2186 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2187 rw_exit(&ddv->sdev_contents); 2188 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2189 SDEV_RELE(dv); 2190 goto lookup_failed; 2191 default: 2192 cmn_err(CE_PANIC, 2193 "dev fs: validator failed: %s(%p)\n", 2194 dv->sdev_name, (void *)dv); 2195 break; 2196 } 2197 } 2198 2199 rw_exit(&ddv->sdev_contents); 2200 rv = sdev_to_vp(dv, vpp); 2201 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2202 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2203 dv->sdev_state, nm, rv)); 2204 return (rv); 2205 2206 nolock_notfound: 2207 /* 2208 * Destroy the node that is created for synchronization purposes. 2209 */ 2210 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2211 nm, dv->sdev_state)); 2212 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2213 if (dv->sdev_state == SDEV_INIT) { 2214 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2215 rw_exit(&ddv->sdev_contents); 2216 rw_enter(&ddv->sdev_contents, RW_WRITER); 2217 } 2218 2219 /* 2220 * Node state may have changed during the lock 2221 * changes. Re-check. 2222 */ 2223 if (dv->sdev_state == SDEV_INIT) { 2224 sdev_dirdelete(ddv, dv); 2225 rw_exit(&ddv->sdev_contents); 2226 sdev_lookup_failed(ddv, nm, failed_flags); 2227 SDEV_RELE(dv); 2228 *vpp = NULL; 2229 return (ENOENT); 2230 } 2231 } 2232 2233 rw_exit(&ddv->sdev_contents); 2234 SDEV_RELE(dv); 2235 2236 lookup_failed: 2237 sdev_lookup_failed(ddv, nm, failed_flags); 2238 *vpp = NULL; 2239 return (ENOENT); 2240 } 2241 2242 /* 2243 * Given a directory node, mark all nodes beneath as 2244 * STALE, i.e. nodes that don't exist as far as new 2245 * consumers are concerned. Remove them from the 2246 * list of directory entries so that no lookup or 2247 * directory traversal will find them. The node 2248 * not deallocated so existing holds are not affected. 2249 */ 2250 void 2251 sdev_stale(struct sdev_node *ddv) 2252 { 2253 struct sdev_node *dv; 2254 struct vnode *vp; 2255 2256 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2257 2258 rw_enter(&ddv->sdev_contents, RW_WRITER); 2259 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { 2260 vp = SDEVTOV(dv); 2261 SDEV_HOLD(dv); 2262 if (vp->v_type == VDIR) 2263 sdev_stale(dv); 2264 2265 sdev_dirdelete(ddv, dv); 2266 SDEV_RELE(dv); 2267 } 2268 ddv->sdev_flags |= SDEV_BUILD; 2269 rw_exit(&ddv->sdev_contents); 2270 } 2271 2272 /* 2273 * Given a directory node, clean out all the nodes beneath. 2274 * If expr is specified, clean node with names matching expr. 2275 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2276 * so they are excluded from future lookups. 2277 */ 2278 int 2279 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2280 { 2281 int error = 0; 2282 int busy = 0; 2283 struct vnode *vp; 2284 struct sdev_node *dv, *next; 2285 int bkstore = 0; 2286 int len = 0; 2287 char *bks_name = NULL; 2288 2289 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2290 2291 /* 2292 * We try our best to destroy all unused sdev_node's 2293 */ 2294 rw_enter(&ddv->sdev_contents, RW_WRITER); 2295 for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) { 2296 next = SDEV_NEXT_ENTRY(ddv, dv); 2297 vp = SDEVTOV(dv); 2298 2299 if (expr && gmatch(dv->sdev_name, expr) == 0) 2300 continue; 2301 2302 if (vp->v_type == VDIR && 2303 sdev_cleandir(dv, NULL, flags) != 0) { 2304 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2305 dv->sdev_name)); 2306 busy++; 2307 continue; 2308 } 2309 2310 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2311 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2312 dv->sdev_name)); 2313 busy++; 2314 continue; 2315 } 2316 2317 /* 2318 * at this point, either dv is not held or SDEV_ENFORCE 2319 * is specified. In either case, dv needs to be deleted 2320 */ 2321 SDEV_HOLD(dv); 2322 2323 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2324 if (bkstore && (vp->v_type == VDIR)) 2325 bkstore += 1; 2326 2327 if (bkstore) { 2328 len = strlen(dv->sdev_name) + 1; 2329 bks_name = kmem_alloc(len, KM_SLEEP); 2330 bcopy(dv->sdev_name, bks_name, len); 2331 } 2332 2333 sdev_dirdelete(ddv, dv); 2334 2335 /* take care the backing store clean up */ 2336 if (bkstore) { 2337 ASSERT(bks_name); 2338 ASSERT(ddv->sdev_attrvp); 2339 2340 if (bkstore == 1) { 2341 error = VOP_REMOVE(ddv->sdev_attrvp, 2342 bks_name, kcred, NULL, 0); 2343 } else if (bkstore == 2) { 2344 error = VOP_RMDIR(ddv->sdev_attrvp, 2345 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2346 } 2347 2348 /* do not propagate the backing store errors */ 2349 if (error) { 2350 sdcmn_err9(("sdev_cleandir: backing store" 2351 "not cleaned\n")); 2352 error = 0; 2353 } 2354 2355 bkstore = 0; 2356 kmem_free(bks_name, len); 2357 bks_name = NULL; 2358 len = 0; 2359 } 2360 2361 ddv->sdev_flags |= SDEV_BUILD; 2362 SDEV_RELE(dv); 2363 } 2364 2365 ddv->sdev_flags |= SDEV_BUILD; 2366 rw_exit(&ddv->sdev_contents); 2367 2368 if (busy) { 2369 error = EBUSY; 2370 } 2371 2372 return (error); 2373 } 2374 2375 /* 2376 * a convenient wrapper for readdir() funcs 2377 */ 2378 size_t 2379 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2380 { 2381 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2382 if (reclen > size) 2383 return (0); 2384 2385 de->d_ino = (ino64_t)ino; 2386 de->d_off = (off64_t)off + 1; 2387 de->d_reclen = (ushort_t)reclen; 2388 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2389 return (reclen); 2390 } 2391 2392 /* 2393 * sdev_mount service routines 2394 */ 2395 int 2396 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2397 { 2398 int error; 2399 2400 if (uap->datalen != sizeof (*args)) 2401 return (EINVAL); 2402 2403 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2404 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2405 "get user data. error %d\n", error); 2406 return (EFAULT); 2407 } 2408 2409 return (0); 2410 } 2411 2412 #ifdef nextdp 2413 #undef nextdp 2414 #endif 2415 #define nextdp(dp) ((struct dirent64 *) \ 2416 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2417 2418 /* 2419 * readdir helper func 2420 */ 2421 int 2422 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2423 int flags) 2424 { 2425 struct sdev_node *ddv = VTOSDEV(vp); 2426 struct sdev_node *dv; 2427 dirent64_t *dp; 2428 ulong_t outcount = 0; 2429 size_t namelen; 2430 ulong_t alloc_count; 2431 void *outbuf; 2432 struct iovec *iovp; 2433 int error = 0; 2434 size_t reclen; 2435 offset_t diroff; 2436 offset_t soff; 2437 int this_reclen; 2438 int (*vtor)(struct sdev_node *) = NULL; 2439 struct vattr attr; 2440 timestruc_t now; 2441 2442 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2443 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2444 2445 if (uiop->uio_loffset >= MAXOFF_T) { 2446 if (eofp) 2447 *eofp = 1; 2448 return (0); 2449 } 2450 2451 if (uiop->uio_iovcnt != 1) 2452 return (EINVAL); 2453 2454 if (vp->v_type != VDIR) 2455 return (ENOTDIR); 2456 2457 if (ddv->sdev_flags & SDEV_VTOR) { 2458 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2459 ASSERT(vtor); 2460 } 2461 2462 if (eofp != NULL) 2463 *eofp = 0; 2464 2465 soff = uiop->uio_loffset; 2466 iovp = uiop->uio_iov; 2467 alloc_count = iovp->iov_len; 2468 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2469 outcount = 0; 2470 2471 if (ddv->sdev_state == SDEV_ZOMBIE) 2472 goto get_cache; 2473 2474 if (SDEV_IS_GLOBAL(ddv)) { 2475 2476 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2477 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2478 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2479 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2480 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2481 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2482 !sdev_reconfig_disable) { 2483 /* 2484 * invoking "devfsadm" to do system device reconfig 2485 */ 2486 mutex_enter(&ddv->sdev_lookup_lock); 2487 SDEV_BLOCK_OTHERS(ddv, 2488 (SDEV_READDIR|SDEV_LGWAITING)); 2489 mutex_exit(&ddv->sdev_lookup_lock); 2490 2491 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2492 ddv->sdev_path, curproc->p_user.u_comm)); 2493 if (sdev_reconfig_verbose) { 2494 cmn_err(CE_CONT, 2495 "?readdir of %s by %s: reconfig\n", 2496 ddv->sdev_path, curproc->p_user.u_comm); 2497 } 2498 2499 sdev_devfsadmd_thread(ddv, NULL, kcred); 2500 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2501 /* 2502 * compensate the "ls" started later than "devfsadm" 2503 */ 2504 mutex_enter(&ddv->sdev_lookup_lock); 2505 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2506 mutex_exit(&ddv->sdev_lookup_lock); 2507 } 2508 2509 /* 2510 * release the contents lock so that 2511 * the cache may be updated by devfsadmd 2512 */ 2513 rw_exit(&ddv->sdev_contents); 2514 mutex_enter(&ddv->sdev_lookup_lock); 2515 if (SDEV_IS_READDIR(ddv)) 2516 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2517 mutex_exit(&ddv->sdev_lookup_lock); 2518 rw_enter(&ddv->sdev_contents, RW_READER); 2519 2520 sdcmn_err4(("readdir of directory %s by %s\n", 2521 ddv->sdev_name, curproc->p_user.u_comm)); 2522 if (ddv->sdev_flags & SDEV_BUILD) { 2523 if (SDEV_IS_PERSIST(ddv)) { 2524 error = sdev_filldir_from_store(ddv, 2525 alloc_count, cred); 2526 } 2527 ddv->sdev_flags &= ~SDEV_BUILD; 2528 } 2529 } 2530 2531 get_cache: 2532 /* handle "." and ".." */ 2533 diroff = 0; 2534 if (soff == 0) { 2535 /* first time */ 2536 this_reclen = DIRENT64_RECLEN(1); 2537 if (alloc_count < this_reclen) { 2538 error = EINVAL; 2539 goto done; 2540 } 2541 2542 dp->d_ino = (ino64_t)ddv->sdev_ino; 2543 dp->d_off = (off64_t)1; 2544 dp->d_reclen = (ushort_t)this_reclen; 2545 2546 (void) strncpy(dp->d_name, ".", 2547 DIRENT64_NAMELEN(this_reclen)); 2548 outcount += dp->d_reclen; 2549 dp = nextdp(dp); 2550 } 2551 2552 diroff++; 2553 if (soff <= 1) { 2554 this_reclen = DIRENT64_RECLEN(2); 2555 if (alloc_count < outcount + this_reclen) { 2556 error = EINVAL; 2557 goto done; 2558 } 2559 2560 dp->d_reclen = (ushort_t)this_reclen; 2561 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2562 dp->d_off = (off64_t)2; 2563 2564 (void) strncpy(dp->d_name, "..", 2565 DIRENT64_NAMELEN(this_reclen)); 2566 outcount += dp->d_reclen; 2567 2568 dp = nextdp(dp); 2569 } 2570 2571 2572 /* gets the cache */ 2573 diroff++; 2574 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2575 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2576 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2577 diroff, soff, dv->sdev_name)); 2578 2579 /* bypassing pre-matured nodes */ 2580 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2581 sdcmn_err3(("sdev_readdir: pre-mature node " 2582 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2583 continue; 2584 } 2585 2586 /* 2587 * Check validity of node 2588 * Drop invalid and nodes to be skipped. 2589 * A node the validator indicates as stale needs 2590 * to be returned as presumably the node name itself 2591 * is valid and the node data itself will be refreshed 2592 * on lookup. An application performing a readdir then 2593 * stat on each entry should thus always see consistent 2594 * data. In any case, it is not possible to synchronize 2595 * with dynamic kernel state, and any view we return can 2596 * never be anything more than a snapshot at a point in time. 2597 */ 2598 if (vtor) { 2599 switch (vtor(dv)) { 2600 case SDEV_VTOR_VALID: 2601 break; 2602 case SDEV_VTOR_INVALID: 2603 case SDEV_VTOR_SKIP: 2604 continue; 2605 case SDEV_VTOR_STALE: 2606 sdcmn_err3(("sdev_readir: %s stale\n", 2607 dv->sdev_name)); 2608 break; 2609 default: 2610 cmn_err(CE_PANIC, 2611 "dev fs: validator failed: %s(%p)\n", 2612 dv->sdev_name, (void *)dv); 2613 break; 2614 /*NOTREACHED*/ 2615 } 2616 } 2617 2618 namelen = strlen(dv->sdev_name); 2619 reclen = DIRENT64_RECLEN(namelen); 2620 if (outcount + reclen > alloc_count) { 2621 goto full; 2622 } 2623 dp->d_reclen = (ushort_t)reclen; 2624 dp->d_ino = (ino64_t)dv->sdev_ino; 2625 dp->d_off = (off64_t)diroff + 1; 2626 (void) strncpy(dp->d_name, dv->sdev_name, 2627 DIRENT64_NAMELEN(reclen)); 2628 outcount += reclen; 2629 dp = nextdp(dp); 2630 } 2631 2632 full: 2633 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2634 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2635 (void *)dv)); 2636 2637 if (outcount) 2638 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2639 2640 if (!error) { 2641 uiop->uio_loffset = diroff; 2642 if (eofp) 2643 *eofp = dv ? 0 : 1; 2644 } 2645 2646 2647 if (ddv->sdev_attrvp) { 2648 gethrestime(&now); 2649 attr.va_ctime = now; 2650 attr.va_atime = now; 2651 attr.va_mask = AT_CTIME|AT_ATIME; 2652 2653 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2654 } 2655 done: 2656 kmem_free(outbuf, alloc_count); 2657 return (error); 2658 } 2659 2660 static int 2661 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2662 { 2663 vnode_t *vp; 2664 vnode_t *cvp; 2665 struct sdev_node *svp; 2666 char *nm; 2667 struct pathname pn; 2668 int error; 2669 int persisted = 0; 2670 2671 ASSERT(INGLOBALZONE(curproc)); 2672 2673 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2674 return (error); 2675 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2676 2677 vp = rootdir; 2678 VN_HOLD(vp); 2679 2680 while (pn_pathleft(&pn)) { 2681 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2682 (void) pn_getcomponent(&pn, nm); 2683 2684 /* 2685 * Deal with the .. special case where we may be 2686 * traversing up across a mount point, to the 2687 * root of this filesystem or global root. 2688 */ 2689 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2690 checkforroot: 2691 if (VN_CMP(vp, rootdir)) { 2692 nm[1] = 0; 2693 } else if (vp->v_flag & VROOT) { 2694 vfs_t *vfsp; 2695 cvp = vp; 2696 vfsp = cvp->v_vfsp; 2697 vfs_rlock_wait(vfsp); 2698 vp = cvp->v_vfsp->vfs_vnodecovered; 2699 if (vp == NULL || 2700 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2701 vfs_unlock(vfsp); 2702 VN_RELE(cvp); 2703 error = EIO; 2704 break; 2705 } 2706 VN_HOLD(vp); 2707 vfs_unlock(vfsp); 2708 VN_RELE(cvp); 2709 cvp = NULL; 2710 goto checkforroot; 2711 } 2712 } 2713 2714 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2715 NULL, NULL); 2716 if (error) { 2717 VN_RELE(vp); 2718 break; 2719 } 2720 2721 /* traverse mount points encountered on our journey */ 2722 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2723 VN_RELE(vp); 2724 VN_RELE(cvp); 2725 break; 2726 } 2727 2728 /* 2729 * symbolic link, can be either relative and absolute 2730 */ 2731 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2732 struct pathname linkpath; 2733 pn_alloc(&linkpath); 2734 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2735 pn_free(&linkpath); 2736 break; 2737 } 2738 if (pn_pathleft(&linkpath) == 0) 2739 (void) pn_set(&linkpath, "."); 2740 error = pn_insert(&pn, &linkpath, strlen(nm)); 2741 pn_free(&linkpath); 2742 if (pn.pn_pathlen == 0) { 2743 VN_RELE(vp); 2744 return (ENOENT); 2745 } 2746 if (pn.pn_path[0] == '/') { 2747 pn_skipslash(&pn); 2748 VN_RELE(vp); 2749 VN_RELE(cvp); 2750 vp = rootdir; 2751 VN_HOLD(vp); 2752 } else { 2753 VN_RELE(cvp); 2754 } 2755 continue; 2756 } 2757 2758 VN_RELE(vp); 2759 2760 /* 2761 * Direct the operation to the persisting filesystem 2762 * underlying /dev. Bail if we encounter a 2763 * non-persistent dev entity here. 2764 */ 2765 if (cvp->v_vfsp->vfs_fstype == devtype) { 2766 2767 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2768 error = ENOENT; 2769 VN_RELE(cvp); 2770 break; 2771 } 2772 2773 if (VTOSDEV(cvp) == NULL) { 2774 error = ENOENT; 2775 VN_RELE(cvp); 2776 break; 2777 } 2778 svp = VTOSDEV(cvp); 2779 if ((vp = svp->sdev_attrvp) == NULL) { 2780 error = ENOENT; 2781 VN_RELE(cvp); 2782 break; 2783 } 2784 persisted = 1; 2785 VN_HOLD(vp); 2786 VN_RELE(cvp); 2787 cvp = vp; 2788 } 2789 2790 vp = cvp; 2791 pn_skipslash(&pn); 2792 } 2793 2794 kmem_free(nm, MAXNAMELEN); 2795 pn_free(&pn); 2796 2797 if (error) 2798 return (error); 2799 2800 /* 2801 * Only return persisted nodes in the filesystem underlying /dev. 2802 */ 2803 if (!persisted) { 2804 VN_RELE(vp); 2805 return (ENOENT); 2806 } 2807 2808 *r_vp = vp; 2809 return (0); 2810 } 2811 2812 int 2813 sdev_modctl_readdir(const char *dir, char ***dirlistp, 2814 int *npathsp, int *npathsp_alloc, int checking_empty) 2815 { 2816 char **pathlist = NULL; 2817 char **newlist = NULL; 2818 int npaths = 0; 2819 int npaths_alloc = 0; 2820 dirent64_t *dbuf = NULL; 2821 int n; 2822 char *s; 2823 int error; 2824 vnode_t *vp; 2825 int eof; 2826 struct iovec iov; 2827 struct uio uio; 2828 struct dirent64 *dp; 2829 size_t dlen; 2830 size_t dbuflen; 2831 int ndirents = 64; 2832 char *nm; 2833 2834 error = sdev_modctl_lookup(dir, &vp); 2835 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2836 dir, curproc->p_user.u_comm, 2837 (error == 0) ? "ok" : "failed")); 2838 if (error) 2839 return (error); 2840 2841 dlen = ndirents * (sizeof (*dbuf)); 2842 dbuf = kmem_alloc(dlen, KM_SLEEP); 2843 2844 uio.uio_iov = &iov; 2845 uio.uio_iovcnt = 1; 2846 uio.uio_segflg = UIO_SYSSPACE; 2847 uio.uio_fmode = 0; 2848 uio.uio_extflg = UIO_COPY_CACHED; 2849 uio.uio_loffset = 0; 2850 uio.uio_llimit = MAXOFFSET_T; 2851 2852 eof = 0; 2853 error = 0; 2854 while (!error && !eof) { 2855 uio.uio_resid = dlen; 2856 iov.iov_base = (char *)dbuf; 2857 iov.iov_len = dlen; 2858 2859 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2860 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2861 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2862 2863 dbuflen = dlen - uio.uio_resid; 2864 2865 if (error || dbuflen == 0) 2866 break; 2867 2868 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2869 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2870 2871 nm = dp->d_name; 2872 2873 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2874 continue; 2875 if (npaths == npaths_alloc) { 2876 npaths_alloc += 64; 2877 newlist = (char **) 2878 kmem_zalloc((npaths_alloc + 1) * 2879 sizeof (char *), KM_SLEEP); 2880 if (pathlist) { 2881 bcopy(pathlist, newlist, 2882 npaths * sizeof (char *)); 2883 kmem_free(pathlist, 2884 (npaths + 1) * sizeof (char *)); 2885 } 2886 pathlist = newlist; 2887 } 2888 n = strlen(nm) + 1; 2889 s = kmem_alloc(n, KM_SLEEP); 2890 bcopy(nm, s, n); 2891 pathlist[npaths++] = s; 2892 sdcmn_err11((" %s/%s\n", dir, s)); 2893 2894 /* if checking empty, one entry is as good as many */ 2895 if (checking_empty) { 2896 eof = 1; 2897 break; 2898 } 2899 } 2900 } 2901 2902 exit: 2903 VN_RELE(vp); 2904 2905 if (dbuf) 2906 kmem_free(dbuf, dlen); 2907 2908 if (error) 2909 return (error); 2910 2911 *dirlistp = pathlist; 2912 *npathsp = npaths; 2913 *npathsp_alloc = npaths_alloc; 2914 2915 return (0); 2916 } 2917 2918 void 2919 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2920 { 2921 int i, n; 2922 2923 for (i = 0; i < npaths; i++) { 2924 n = strlen(pathlist[i]) + 1; 2925 kmem_free(pathlist[i], n); 2926 } 2927 2928 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2929 } 2930 2931 int 2932 sdev_modctl_devexists(const char *path) 2933 { 2934 vnode_t *vp; 2935 int error; 2936 2937 error = sdev_modctl_lookup(path, &vp); 2938 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2939 path, curproc->p_user.u_comm, 2940 (error == 0) ? "ok" : "failed")); 2941 if (error == 0) 2942 VN_RELE(vp); 2943 2944 return (error); 2945 } 2946 2947 extern int sdev_vnodeops_tbl_size; 2948 2949 /* 2950 * construct a new template with overrides from vtab 2951 */ 2952 static fs_operation_def_t * 2953 sdev_merge_vtab(const fs_operation_def_t tab[]) 2954 { 2955 fs_operation_def_t *new; 2956 const fs_operation_def_t *tab_entry; 2957 2958 /* make a copy of standard vnode ops table */ 2959 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 2960 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 2961 2962 /* replace the overrides from tab */ 2963 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 2964 fs_operation_def_t *std_entry = new; 2965 while (std_entry->name) { 2966 if (strcmp(tab_entry->name, std_entry->name) == 0) { 2967 std_entry->func = tab_entry->func; 2968 break; 2969 } 2970 std_entry++; 2971 } 2972 if (std_entry->name == NULL) 2973 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 2974 tab_entry->name); 2975 } 2976 2977 return (new); 2978 } 2979 2980 /* free memory allocated by sdev_merge_vtab */ 2981 static void 2982 sdev_free_vtab(fs_operation_def_t *new) 2983 { 2984 kmem_free(new, sdev_vnodeops_tbl_size); 2985 } 2986 2987 /* 2988 * a generic setattr() function 2989 * 2990 * note: flags only supports AT_UID and AT_GID. 2991 * Future enhancements can be done for other types, e.g. AT_MODE 2992 */ 2993 int 2994 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 2995 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 2996 int), int protocol) 2997 { 2998 struct sdev_node *dv = VTOSDEV(vp); 2999 struct sdev_node *parent = dv->sdev_dotdot; 3000 struct vattr *get; 3001 uint_t mask = vap->va_mask; 3002 int error; 3003 3004 /* some sanity checks */ 3005 if (vap->va_mask & AT_NOSET) 3006 return (EINVAL); 3007 3008 if (vap->va_mask & AT_SIZE) { 3009 if (vp->v_type == VDIR) { 3010 return (EISDIR); 3011 } 3012 } 3013 3014 /* no need to set attribute, but do not fail either */ 3015 ASSERT(parent); 3016 rw_enter(&parent->sdev_contents, RW_READER); 3017 if (dv->sdev_state == SDEV_ZOMBIE) { 3018 rw_exit(&parent->sdev_contents); 3019 return (0); 3020 } 3021 3022 /* If backing store exists, just set it. */ 3023 if (dv->sdev_attrvp) { 3024 rw_exit(&parent->sdev_contents); 3025 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3026 } 3027 3028 /* 3029 * Otherwise, for nodes with the persistence attribute, create it. 3030 */ 3031 ASSERT(dv->sdev_attr); 3032 if (SDEV_IS_PERSIST(dv) || 3033 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3034 sdev_vattr_merge(dv, vap); 3035 rw_enter(&dv->sdev_contents, RW_WRITER); 3036 error = sdev_shadow_node(dv, cred); 3037 rw_exit(&dv->sdev_contents); 3038 rw_exit(&parent->sdev_contents); 3039 3040 if (error) 3041 return (error); 3042 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3043 } 3044 3045 3046 /* 3047 * sdev_attr was allocated in sdev_mknode 3048 */ 3049 rw_enter(&dv->sdev_contents, RW_WRITER); 3050 error = secpolicy_vnode_setattr(cred, vp, vap, 3051 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3052 if (error) { 3053 rw_exit(&dv->sdev_contents); 3054 rw_exit(&parent->sdev_contents); 3055 return (error); 3056 } 3057 3058 get = dv->sdev_attr; 3059 if (mask & AT_MODE) { 3060 get->va_mode &= S_IFMT; 3061 get->va_mode |= vap->va_mode & ~S_IFMT; 3062 } 3063 3064 if ((mask & AT_UID) || (mask & AT_GID)) { 3065 if (mask & AT_UID) 3066 get->va_uid = vap->va_uid; 3067 if (mask & AT_GID) 3068 get->va_gid = vap->va_gid; 3069 /* 3070 * a callback must be provided if the protocol is set 3071 */ 3072 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3073 ASSERT(callback); 3074 error = callback(dv, get, protocol); 3075 if (error) { 3076 rw_exit(&dv->sdev_contents); 3077 rw_exit(&parent->sdev_contents); 3078 return (error); 3079 } 3080 } 3081 } 3082 3083 if (mask & AT_ATIME) 3084 get->va_atime = vap->va_atime; 3085 if (mask & AT_MTIME) 3086 get->va_mtime = vap->va_mtime; 3087 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3088 gethrestime(&get->va_ctime); 3089 } 3090 3091 sdev_vattr_merge(dv, get); 3092 rw_exit(&dv->sdev_contents); 3093 rw_exit(&parent->sdev_contents); 3094 return (0); 3095 } 3096 3097 /* 3098 * a generic inactive() function 3099 */ 3100 /*ARGSUSED*/ 3101 void 3102 devname_inactive_func(struct vnode *vp, struct cred *cred, 3103 void (*callback)(struct vnode *)) 3104 { 3105 int clean; 3106 struct sdev_node *dv = VTOSDEV(vp); 3107 int state; 3108 3109 mutex_enter(&vp->v_lock); 3110 ASSERT(vp->v_count >= 1); 3111 3112 3113 if (vp->v_count == 1 && callback != NULL) 3114 callback(vp); 3115 3116 rw_enter(&dv->sdev_contents, RW_WRITER); 3117 state = dv->sdev_state; 3118 3119 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3120 3121 /* 3122 * sdev is a rather bad public citizen. It violates the general 3123 * agreement that in memory nodes should always have a valid reference 3124 * count on their vnode. But that's not the case here. This means that 3125 * we do actually have to distinguish between getting inactive callbacks 3126 * for zombies and otherwise. This should probably be fixed. 3127 */ 3128 if (clean) { 3129 /* Remove the . entry to ourselves */ 3130 if (vp->v_type == VDIR) { 3131 decr_link(dv); 3132 } 3133 VERIFY(dv->sdev_nlink == 1); 3134 decr_link(dv); 3135 --vp->v_count; 3136 rw_exit(&dv->sdev_contents); 3137 mutex_exit(&vp->v_lock); 3138 sdev_nodedestroy(dv, 0); 3139 } else { 3140 --vp->v_count; 3141 rw_exit(&dv->sdev_contents); 3142 mutex_exit(&vp->v_lock); 3143 } 3144 } 3145