1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * utility routines for the /dev fs 27 */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/t_lock.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/user.h> 35 #include <sys/time.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/kmem.h> 42 #include <sys/uio.h> 43 #include <sys/errno.h> 44 #include <sys/stat.h> 45 #include <sys/cred.h> 46 #include <sys/dirent.h> 47 #include <sys/pathname.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/mode.h> 51 #include <sys/policy.h> 52 #include <fs/fs_subr.h> 53 #include <sys/mount.h> 54 #include <sys/fs/snode.h> 55 #include <sys/fs/dv_node.h> 56 #include <sys/fs/sdev_impl.h> 57 #include <sys/sunndi.h> 58 #include <sys/sunmdi.h> 59 #include <sys/conf.h> 60 #include <sys/proc.h> 61 #include <sys/user.h> 62 #include <sys/modctl.h> 63 64 #ifdef DEBUG 65 int sdev_debug = 0x00000001; 66 int sdev_debug_cache_flags = 0; 67 #endif 68 69 /* 70 * globals 71 */ 72 /* prototype memory vattrs */ 73 vattr_t sdev_vattr_dir = { 74 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 75 VDIR, /* va_type */ 76 SDEV_DIRMODE_DEFAULT, /* va_mode */ 77 SDEV_UID_DEFAULT, /* va_uid */ 78 SDEV_GID_DEFAULT, /* va_gid */ 79 0, /* va_fsid */ 80 0, /* va_nodeid */ 81 0, /* va_nlink */ 82 0, /* va_size */ 83 0, /* va_atime */ 84 0, /* va_mtime */ 85 0, /* va_ctime */ 86 0, /* va_rdev */ 87 0, /* va_blksize */ 88 0, /* va_nblocks */ 89 0 /* va_vcode */ 90 }; 91 92 vattr_t sdev_vattr_lnk = { 93 AT_TYPE|AT_MODE, /* va_mask */ 94 VLNK, /* va_type */ 95 SDEV_LNKMODE_DEFAULT, /* va_mode */ 96 SDEV_UID_DEFAULT, /* va_uid */ 97 SDEV_GID_DEFAULT, /* va_gid */ 98 0, /* va_fsid */ 99 0, /* va_nodeid */ 100 0, /* va_nlink */ 101 0, /* va_size */ 102 0, /* va_atime */ 103 0, /* va_mtime */ 104 0, /* va_ctime */ 105 0, /* va_rdev */ 106 0, /* va_blksize */ 107 0, /* va_nblocks */ 108 0 /* va_vcode */ 109 }; 110 111 vattr_t sdev_vattr_blk = { 112 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 113 VBLK, /* va_type */ 114 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 115 SDEV_UID_DEFAULT, /* va_uid */ 116 SDEV_GID_DEFAULT, /* va_gid */ 117 0, /* va_fsid */ 118 0, /* va_nodeid */ 119 0, /* va_nlink */ 120 0, /* va_size */ 121 0, /* va_atime */ 122 0, /* va_mtime */ 123 0, /* va_ctime */ 124 0, /* va_rdev */ 125 0, /* va_blksize */ 126 0, /* va_nblocks */ 127 0 /* va_vcode */ 128 }; 129 130 vattr_t sdev_vattr_chr = { 131 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 132 VCHR, /* va_type */ 133 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 134 SDEV_UID_DEFAULT, /* va_uid */ 135 SDEV_GID_DEFAULT, /* va_gid */ 136 0, /* va_fsid */ 137 0, /* va_nodeid */ 138 0, /* va_nlink */ 139 0, /* va_size */ 140 0, /* va_atime */ 141 0, /* va_mtime */ 142 0, /* va_ctime */ 143 0, /* va_rdev */ 144 0, /* va_blksize */ 145 0, /* va_nblocks */ 146 0 /* va_vcode */ 147 }; 148 149 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 150 int devtype; /* fstype */ 151 152 /* static */ 153 static struct vnodeops *sdev_get_vop(struct sdev_node *); 154 static void sdev_set_no_negcache(struct sdev_node *); 155 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 156 static void sdev_free_vtab(fs_operation_def_t *); 157 158 static void 159 sdev_prof_free(struct sdev_node *dv) 160 { 161 ASSERT(!SDEV_IS_GLOBAL(dv)); 162 if (dv->sdev_prof.dev_name) 163 nvlist_free(dv->sdev_prof.dev_name); 164 if (dv->sdev_prof.dev_map) 165 nvlist_free(dv->sdev_prof.dev_map); 166 if (dv->sdev_prof.dev_symlink) 167 nvlist_free(dv->sdev_prof.dev_symlink); 168 if (dv->sdev_prof.dev_glob_incdir) 169 nvlist_free(dv->sdev_prof.dev_glob_incdir); 170 if (dv->sdev_prof.dev_glob_excdir) 171 nvlist_free(dv->sdev_prof.dev_glob_excdir); 172 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 173 } 174 175 /* sdev_node cache constructor */ 176 /*ARGSUSED1*/ 177 static int 178 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 179 { 180 struct sdev_node *dv = (struct sdev_node *)buf; 181 struct vnode *vp; 182 183 bzero(buf, sizeof (struct sdev_node)); 184 vp = dv->sdev_vnode = vn_alloc(flag); 185 if (vp == NULL) { 186 return (-1); 187 } 188 vp->v_data = dv; 189 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 190 return (0); 191 } 192 193 /* sdev_node cache destructor */ 194 /*ARGSUSED1*/ 195 static void 196 i_sdev_node_dtor(void *buf, void *arg) 197 { 198 struct sdev_node *dv = (struct sdev_node *)buf; 199 struct vnode *vp = SDEVTOV(dv); 200 201 rw_destroy(&dv->sdev_contents); 202 vn_free(vp); 203 } 204 205 /* initialize sdev_node cache */ 206 void 207 sdev_node_cache_init() 208 { 209 int flags = 0; 210 211 #ifdef DEBUG 212 flags = sdev_debug_cache_flags; 213 if (flags) 214 sdcmn_err(("cache debug flags 0x%x\n", flags)); 215 #endif /* DEBUG */ 216 217 ASSERT(sdev_node_cache == NULL); 218 sdev_node_cache = kmem_cache_create("sdev_node_cache", 219 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 220 NULL, NULL, NULL, flags); 221 } 222 223 /* destroy sdev_node cache */ 224 void 225 sdev_node_cache_fini() 226 { 227 ASSERT(sdev_node_cache != NULL); 228 kmem_cache_destroy(sdev_node_cache); 229 sdev_node_cache = NULL; 230 } 231 232 /* 233 * Compare two nodes lexographically to balance avl tree 234 */ 235 static int 236 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 237 { 238 int rv; 239 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 240 return (0); 241 return ((rv < 0) ? -1 : 1); 242 } 243 244 void 245 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 246 { 247 ASSERT(dv); 248 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 249 dv->sdev_state = state; 250 } 251 252 static void 253 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 254 { 255 timestruc_t now; 256 struct vattr *attrp; 257 uint_t mask; 258 259 ASSERT(dv->sdev_attr); 260 ASSERT(vap); 261 262 attrp = dv->sdev_attr; 263 mask = vap->va_mask; 264 if (mask & AT_TYPE) 265 attrp->va_type = vap->va_type; 266 if (mask & AT_MODE) 267 attrp->va_mode = vap->va_mode; 268 if (mask & AT_UID) 269 attrp->va_uid = vap->va_uid; 270 if (mask & AT_GID) 271 attrp->va_gid = vap->va_gid; 272 if (mask & AT_RDEV) 273 attrp->va_rdev = vap->va_rdev; 274 275 gethrestime(&now); 276 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 277 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 278 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 279 } 280 281 static void 282 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 283 { 284 ASSERT(dv->sdev_attr == NULL); 285 ASSERT(vap->va_mask & AT_TYPE); 286 ASSERT(vap->va_mask & AT_MODE); 287 288 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 289 sdev_attr_update(dv, vap); 290 } 291 292 /* alloc and initialize a sdev_node */ 293 int 294 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 295 vattr_t *vap) 296 { 297 struct sdev_node *dv = NULL; 298 struct vnode *vp; 299 size_t nmlen, len; 300 devname_handle_t *dhl; 301 302 nmlen = strlen(nm) + 1; 303 if (nmlen > MAXNAMELEN) { 304 sdcmn_err9(("sdev_nodeinit: node name %s" 305 " too long\n", nm)); 306 *newdv = NULL; 307 return (ENAMETOOLONG); 308 } 309 310 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 311 312 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 313 bcopy(nm, dv->sdev_name, nmlen); 314 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 315 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 316 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 317 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 318 /* overwritten for VLNK nodes */ 319 dv->sdev_symlink = NULL; 320 321 vp = SDEVTOV(dv); 322 vn_reinit(vp); 323 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 324 if (vap) 325 vp->v_type = vap->va_type; 326 327 /* 328 * initialized to the parent's vnodeops. 329 * maybe overwriten for a VDIR 330 */ 331 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 332 vn_exists(vp); 333 334 dv->sdev_dotdot = NULL; 335 dv->sdev_attrvp = NULL; 336 if (vap) { 337 sdev_attr_alloc(dv, vap); 338 } else { 339 dv->sdev_attr = NULL; 340 } 341 342 dv->sdev_ino = sdev_mkino(dv); 343 dv->sdev_nlink = 0; /* updated on insert */ 344 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 345 dv->sdev_flags |= SDEV_BUILD; 346 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 347 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 348 if (SDEV_IS_GLOBAL(ddv)) { 349 dv->sdev_flags |= SDEV_GLOBAL; 350 dhl = &(dv->sdev_handle); 351 dhl->dh_data = dv; 352 dhl->dh_args = NULL; 353 sdev_set_no_negcache(dv); 354 dv->sdev_gdir_gen = 0; 355 } else { 356 dv->sdev_flags &= ~SDEV_GLOBAL; 357 dv->sdev_origin = NULL; /* set later */ 358 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 359 dv->sdev_ldir_gen = 0; 360 dv->sdev_devtree_gen = 0; 361 } 362 363 rw_enter(&dv->sdev_contents, RW_WRITER); 364 sdev_set_nodestate(dv, SDEV_INIT); 365 rw_exit(&dv->sdev_contents); 366 *newdv = dv; 367 368 return (0); 369 } 370 371 /* 372 * transition a sdev_node into SDEV_READY state 373 */ 374 int 375 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 376 void *args, struct cred *cred) 377 { 378 int error = 0; 379 struct vnode *vp = SDEVTOV(dv); 380 vtype_t type; 381 382 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 383 384 type = vap->va_type; 385 vp->v_type = type; 386 vp->v_rdev = vap->va_rdev; 387 rw_enter(&dv->sdev_contents, RW_WRITER); 388 if (type == VDIR) { 389 dv->sdev_nlink = 2; 390 dv->sdev_flags &= ~SDEV_PERSIST; 391 dv->sdev_flags &= ~SDEV_DYNAMIC; 392 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 393 ASSERT(dv->sdev_dotdot); 394 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 395 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 396 avl_create(&dv->sdev_entries, 397 (int (*)(const void *, const void *))sdev_compare_nodes, 398 sizeof (struct sdev_node), 399 offsetof(struct sdev_node, sdev_avllink)); 400 } else if (type == VLNK) { 401 ASSERT(args); 402 dv->sdev_nlink = 1; 403 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 404 } else { 405 dv->sdev_nlink = 1; 406 } 407 408 if (!(SDEV_IS_GLOBAL(dv))) { 409 dv->sdev_origin = (struct sdev_node *)args; 410 dv->sdev_flags &= ~SDEV_PERSIST; 411 } 412 413 /* 414 * shadow node is created here OR 415 * if failed (indicated by dv->sdev_attrvp == NULL), 416 * created later in sdev_setattr 417 */ 418 if (avp) { 419 dv->sdev_attrvp = avp; 420 } else { 421 if (dv->sdev_attr == NULL) { 422 sdev_attr_alloc(dv, vap); 423 } else { 424 sdev_attr_update(dv, vap); 425 } 426 427 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 428 error = sdev_shadow_node(dv, cred); 429 } 430 431 if (error == 0) { 432 /* transition to READY state */ 433 sdev_set_nodestate(dv, SDEV_READY); 434 sdev_nc_node_exists(dv); 435 } else { 436 sdev_set_nodestate(dv, SDEV_ZOMBIE); 437 } 438 rw_exit(&dv->sdev_contents); 439 return (error); 440 } 441 442 /* 443 * setting ZOMBIE state 444 */ 445 static int 446 sdev_nodezombied(struct sdev_node *dv) 447 { 448 rw_enter(&dv->sdev_contents, RW_WRITER); 449 sdev_set_nodestate(dv, SDEV_ZOMBIE); 450 rw_exit(&dv->sdev_contents); 451 return (0); 452 } 453 454 /* 455 * Build the VROOT sdev_node. 456 */ 457 /*ARGSUSED*/ 458 struct sdev_node * 459 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 460 struct vnode *avp, struct cred *cred) 461 { 462 struct sdev_node *dv; 463 struct vnode *vp; 464 char devdir[] = "/dev"; 465 466 ASSERT(sdev_node_cache != NULL); 467 ASSERT(avp); 468 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 469 vp = SDEVTOV(dv); 470 vn_reinit(vp); 471 vp->v_flag |= VROOT; 472 vp->v_vfsp = vfsp; 473 vp->v_type = VDIR; 474 vp->v_rdev = devdev; 475 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 476 vn_exists(vp); 477 478 if (vfsp->vfs_mntpt) 479 dv->sdev_name = i_ddi_strdup( 480 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 481 else 482 /* vfs_mountdev1 set mount point later */ 483 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 484 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 485 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 486 dv->sdev_ino = SDEV_ROOTINO; 487 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 488 dv->sdev_dotdot = dv; /* .. == self */ 489 dv->sdev_attrvp = avp; 490 dv->sdev_attr = NULL; 491 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 492 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 493 if (strcmp(dv->sdev_name, "/dev") == 0) { 494 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 495 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 496 dv->sdev_gdir_gen = 0; 497 } else { 498 dv->sdev_flags = SDEV_BUILD; 499 dv->sdev_flags &= ~SDEV_PERSIST; 500 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 501 dv->sdev_ldir_gen = 0; 502 dv->sdev_devtree_gen = 0; 503 } 504 505 avl_create(&dv->sdev_entries, 506 (int (*)(const void *, const void *))sdev_compare_nodes, 507 sizeof (struct sdev_node), 508 offsetof(struct sdev_node, sdev_avllink)); 509 510 rw_enter(&dv->sdev_contents, RW_WRITER); 511 sdev_set_nodestate(dv, SDEV_READY); 512 rw_exit(&dv->sdev_contents); 513 sdev_nc_node_exists(dv); 514 return (dv); 515 } 516 517 /* directory dependent vop table */ 518 struct sdev_vop_table { 519 char *vt_name; /* subdirectory name */ 520 const fs_operation_def_t *vt_service; /* vnodeops table */ 521 struct vnodeops *vt_vops; /* constructed vop */ 522 struct vnodeops **vt_global_vops; /* global container for vop */ 523 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 524 int vt_flags; 525 }; 526 527 /* 528 * A nice improvement would be to provide a plug-in mechanism 529 * for this table instead of a const table. 530 */ 531 static struct sdev_vop_table vtab[] = 532 { 533 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 534 SDEV_DYNAMIC | SDEV_VTOR }, 535 536 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 537 SDEV_DYNAMIC | SDEV_VTOR }, 538 539 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, 540 devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 541 542 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 543 544 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 545 SDEV_DYNAMIC | SDEV_VTOR }, 546 547 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 548 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 549 550 /* 551 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the 552 * lofi driver controls child nodes. 553 * 554 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted 555 * stale nodes (e.g. from devfsadm -R). 556 * 557 * In addition, devfsadm knows not to attempt a rmdir: a zone 558 * may hold a reference, which would zombify the node, 559 * preventing a mkdir. 560 */ 561 562 { "lofi", NULL, NULL, NULL, NULL, 563 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 564 { "rlofi", NULL, NULL, NULL, NULL, 565 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 566 567 { NULL, NULL, NULL, NULL, NULL, 0} 568 }; 569 570 struct sdev_vop_table * 571 sdev_match(struct sdev_node *dv) 572 { 573 int vlen; 574 int i; 575 576 for (i = 0; vtab[i].vt_name; i++) { 577 if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0) 578 return (&vtab[i]); 579 if (vtab[i].vt_flags & SDEV_SUBDIR) { 580 char *ptr; 581 582 ASSERT(strlen(dv->sdev_path) > 5); 583 ptr = dv->sdev_path + 5; 584 vlen = strlen(vtab[i].vt_name); 585 if ((strncmp(vtab[i].vt_name, ptr, 586 vlen - 1) == 0) && ptr[vlen] == '/') 587 return (&vtab[i]); 588 } 589 590 } 591 return (NULL); 592 } 593 594 /* 595 * sets a directory's vnodeops if the directory is in the vtab; 596 */ 597 static struct vnodeops * 598 sdev_get_vop(struct sdev_node *dv) 599 { 600 struct sdev_vop_table *vtp; 601 char *path; 602 603 path = dv->sdev_path; 604 ASSERT(path); 605 606 /* gets the relative path to /dev/ */ 607 path += 5; 608 609 /* gets the vtab entry it matches */ 610 if ((vtp = sdev_match(dv)) != NULL) { 611 dv->sdev_flags |= vtp->vt_flags; 612 613 if (vtp->vt_vops) { 614 if (vtp->vt_global_vops) 615 *(vtp->vt_global_vops) = vtp->vt_vops; 616 return (vtp->vt_vops); 617 } 618 619 if (vtp->vt_service) { 620 fs_operation_def_t *templ; 621 templ = sdev_merge_vtab(vtp->vt_service); 622 if (vn_make_ops(vtp->vt_name, 623 (const fs_operation_def_t *)templ, 624 &vtp->vt_vops) != 0) { 625 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 626 vtp->vt_name); 627 /*NOTREACHED*/ 628 } 629 if (vtp->vt_global_vops) { 630 *(vtp->vt_global_vops) = vtp->vt_vops; 631 } 632 sdev_free_vtab(templ); 633 return (vtp->vt_vops); 634 } 635 return (sdev_vnodeops); 636 } 637 638 /* child inherits the persistence of the parent */ 639 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 640 dv->sdev_flags |= SDEV_PERSIST; 641 642 return (sdev_vnodeops); 643 } 644 645 static void 646 sdev_set_no_negcache(struct sdev_node *dv) 647 { 648 int i; 649 char *path; 650 651 ASSERT(dv->sdev_path); 652 path = dv->sdev_path + strlen("/dev/"); 653 654 for (i = 0; vtab[i].vt_name; i++) { 655 if (strcmp(vtab[i].vt_name, path) == 0) { 656 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 657 dv->sdev_flags |= SDEV_NO_NCACHE; 658 break; 659 } 660 } 661 } 662 663 void * 664 sdev_get_vtor(struct sdev_node *dv) 665 { 666 struct sdev_vop_table *vtp; 667 668 vtp = sdev_match(dv); 669 if (vtp) 670 return ((void *)vtp->vt_vtor); 671 else 672 return (NULL); 673 } 674 675 /* 676 * Build the base root inode 677 */ 678 ino_t 679 sdev_mkino(struct sdev_node *dv) 680 { 681 ino_t ino; 682 683 /* 684 * for now, follow the lead of tmpfs here 685 * need to someday understand the requirements here 686 */ 687 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 688 ino += SDEV_ROOTINO + 1; 689 690 return (ino); 691 } 692 693 int 694 sdev_getlink(struct vnode *linkvp, char **link) 695 { 696 int err; 697 char *buf; 698 struct uio uio = {0}; 699 struct iovec iov = {0}; 700 701 if (linkvp == NULL) 702 return (ENOENT); 703 ASSERT(linkvp->v_type == VLNK); 704 705 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 706 iov.iov_base = buf; 707 iov.iov_len = MAXPATHLEN; 708 uio.uio_iov = &iov; 709 uio.uio_iovcnt = 1; 710 uio.uio_resid = MAXPATHLEN; 711 uio.uio_segflg = UIO_SYSSPACE; 712 uio.uio_llimit = MAXOFFSET_T; 713 714 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 715 if (err) { 716 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 717 kmem_free(buf, MAXPATHLEN); 718 return (ENOENT); 719 } 720 721 /* mission complete */ 722 *link = i_ddi_strdup(buf, KM_SLEEP); 723 kmem_free(buf, MAXPATHLEN); 724 return (0); 725 } 726 727 /* 728 * A convenient wrapper to get the devfs node vnode for a device 729 * minor functionality: readlink() of a /dev symlink 730 * Place the link into dv->sdev_symlink 731 */ 732 static int 733 sdev_follow_link(struct sdev_node *dv) 734 { 735 int err; 736 struct vnode *linkvp; 737 char *link = NULL; 738 739 linkvp = SDEVTOV(dv); 740 if (linkvp == NULL) 741 return (ENOENT); 742 ASSERT(linkvp->v_type == VLNK); 743 err = sdev_getlink(linkvp, &link); 744 if (err) { 745 (void) sdev_nodezombied(dv); 746 dv->sdev_symlink = NULL; 747 return (ENOENT); 748 } 749 750 ASSERT(link != NULL); 751 dv->sdev_symlink = link; 752 return (0); 753 } 754 755 static int 756 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 757 { 758 vtype_t otype = SDEVTOV(dv)->v_type; 759 760 /* 761 * existing sdev_node has a different type. 762 */ 763 if (otype != nvap->va_type) { 764 sdcmn_err9(("sdev_node_check: existing node " 765 " %s type %d does not match new node type %d\n", 766 dv->sdev_name, otype, nvap->va_type)); 767 return (EEXIST); 768 } 769 770 /* 771 * For a symlink, the target should be the same. 772 */ 773 if (otype == VLNK) { 774 ASSERT(nargs != NULL); 775 ASSERT(dv->sdev_symlink != NULL); 776 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 777 sdcmn_err9(("sdev_node_check: existing node " 778 " %s has different symlink %s as new node " 779 " %s\n", dv->sdev_name, dv->sdev_symlink, 780 (char *)nargs)); 781 return (EEXIST); 782 } 783 } 784 785 return (0); 786 } 787 788 /* 789 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 790 * 791 * arguments: 792 * - ddv (parent) 793 * - nm (child name) 794 * - newdv (sdev_node for nm is returned here) 795 * - vap (vattr for the node to be created, va_type should be set. 796 * - avp (attribute vnode) 797 * the defaults should be used if unknown) 798 * - cred 799 * - args 800 * . tnm (for VLNK) 801 * . global sdev_node (for !SDEV_GLOBAL) 802 * - state: SDEV_INIT, SDEV_READY 803 * 804 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 805 * 806 * NOTE: directory contents writers lock needs to be held before 807 * calling this routine. 808 */ 809 int 810 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 811 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 812 sdev_node_state_t state) 813 { 814 int error = 0; 815 sdev_node_state_t node_state; 816 struct sdev_node *dv = NULL; 817 818 ASSERT(state != SDEV_ZOMBIE); 819 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 820 821 if (*newdv) { 822 dv = *newdv; 823 } else { 824 /* allocate and initialize a sdev_node */ 825 if (ddv->sdev_state == SDEV_ZOMBIE) { 826 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 827 ddv->sdev_path)); 828 return (ENOENT); 829 } 830 831 error = sdev_nodeinit(ddv, nm, &dv, vap); 832 if (error != 0) { 833 sdcmn_err9(("sdev_mknode: error %d," 834 " name %s can not be initialized\n", 835 error, nm)); 836 return (error); 837 } 838 ASSERT(dv); 839 840 /* insert into the directory cache */ 841 error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 842 if (error) { 843 sdcmn_err9(("sdev_mknode: node %s can not" 844 " be added into directory cache\n", nm)); 845 return (ENOENT); 846 } 847 } 848 849 ASSERT(dv); 850 node_state = dv->sdev_state; 851 ASSERT(node_state != SDEV_ZOMBIE); 852 853 if (state == SDEV_READY) { 854 switch (node_state) { 855 case SDEV_INIT: 856 error = sdev_nodeready(dv, vap, avp, args, cred); 857 if (error) { 858 sdcmn_err9(("sdev_mknode: node %s can NOT" 859 " be transitioned into READY state, " 860 "error %d\n", nm, error)); 861 } 862 break; 863 case SDEV_READY: 864 /* 865 * Do some sanity checking to make sure 866 * the existing sdev_node is what has been 867 * asked for. 868 */ 869 error = sdev_node_check(dv, vap, args); 870 break; 871 default: 872 break; 873 } 874 } 875 876 if (!error) { 877 *newdv = dv; 878 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 879 } else { 880 SDEV_SIMPLE_RELE(dv); 881 *newdv = NULL; 882 } 883 884 return (error); 885 } 886 887 /* 888 * convenient wrapper to change vp's ATIME, CTIME and MTIME 889 */ 890 void 891 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 892 { 893 struct vattr attr; 894 timestruc_t now; 895 int err; 896 897 ASSERT(vp); 898 gethrestime(&now); 899 if (mask & AT_CTIME) 900 attr.va_ctime = now; 901 if (mask & AT_MTIME) 902 attr.va_mtime = now; 903 if (mask & AT_ATIME) 904 attr.va_atime = now; 905 906 attr.va_mask = (mask & AT_TIMES); 907 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 908 if (err && (err != EROFS)) { 909 sdcmn_err(("update timestamps error %d\n", err)); 910 } 911 } 912 913 /* 914 * the backing store vnode is released here 915 */ 916 /*ARGSUSED1*/ 917 void 918 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 919 { 920 /* no references */ 921 ASSERT(dv->sdev_nlink == 0); 922 923 if (dv->sdev_attrvp != NULLVP) { 924 VN_RELE(dv->sdev_attrvp); 925 /* 926 * reset the attrvp so that no more 927 * references can be made on this already 928 * vn_rele() vnode 929 */ 930 dv->sdev_attrvp = NULLVP; 931 } 932 933 if (dv->sdev_attr != NULL) { 934 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 935 dv->sdev_attr = NULL; 936 } 937 938 if (dv->sdev_name != NULL) { 939 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 940 dv->sdev_name = NULL; 941 } 942 943 if (dv->sdev_symlink != NULL) { 944 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 945 dv->sdev_symlink = NULL; 946 } 947 948 if (dv->sdev_path) { 949 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 950 dv->sdev_path = NULL; 951 } 952 953 if (!SDEV_IS_GLOBAL(dv)) 954 sdev_prof_free(dv); 955 956 if (SDEVTOV(dv)->v_type == VDIR) { 957 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 958 avl_destroy(&dv->sdev_entries); 959 } 960 961 mutex_destroy(&dv->sdev_lookup_lock); 962 cv_destroy(&dv->sdev_lookup_cv); 963 964 /* return node to initial state as per constructor */ 965 (void) memset((void *)&dv->sdev_instance_data, 0, 966 sizeof (dv->sdev_instance_data)); 967 vn_invalid(SDEVTOV(dv)); 968 kmem_cache_free(sdev_node_cache, dv); 969 } 970 971 /* 972 * DIRECTORY CACHE lookup 973 */ 974 struct sdev_node * 975 sdev_findbyname(struct sdev_node *ddv, char *nm) 976 { 977 struct sdev_node *dv; 978 struct sdev_node dvtmp; 979 avl_index_t where; 980 981 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 982 983 dvtmp.sdev_name = nm; 984 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 985 if (dv) { 986 ASSERT(dv->sdev_dotdot == ddv); 987 ASSERT(strcmp(dv->sdev_name, nm) == 0); 988 SDEV_HOLD(dv); 989 return (dv); 990 } 991 return (NULL); 992 } 993 994 /* 995 * Inserts a new sdev_node in a parent directory 996 */ 997 void 998 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 999 { 1000 avl_index_t where; 1001 1002 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1003 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 1004 ASSERT(ddv->sdev_nlink >= 2); 1005 ASSERT(dv->sdev_nlink == 0); 1006 1007 dv->sdev_dotdot = ddv; 1008 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 1009 avl_insert(&ddv->sdev_entries, dv, where); 1010 ddv->sdev_nlink++; 1011 } 1012 1013 /* 1014 * The following check is needed because while sdev_nodes are linked 1015 * in SDEV_INIT state, they have their link counts incremented only 1016 * in SDEV_READY state. 1017 */ 1018 static void 1019 decr_link(struct sdev_node *dv) 1020 { 1021 if (dv->sdev_state != SDEV_INIT) 1022 dv->sdev_nlink--; 1023 else 1024 ASSERT(dv->sdev_nlink == 0); 1025 } 1026 1027 /* 1028 * Delete an existing dv from directory cache 1029 * 1030 * In the case of a node is still held by non-zero reference count, 1031 * the node is put into ZOMBIE state. Once the reference count 1032 * reaches "0", the node is unlinked and destroyed, 1033 * in sdev_inactive(). 1034 */ 1035 static int 1036 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 1037 { 1038 struct vnode *vp; 1039 1040 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1041 1042 vp = SDEVTOV(dv); 1043 mutex_enter(&vp->v_lock); 1044 1045 /* dv is held still */ 1046 if (vp->v_count > 1) { 1047 rw_enter(&dv->sdev_contents, RW_WRITER); 1048 if (dv->sdev_state == SDEV_READY) { 1049 sdcmn_err9(( 1050 "sdev_dirdelete: node %s busy with count %d\n", 1051 dv->sdev_name, vp->v_count)); 1052 dv->sdev_state = SDEV_ZOMBIE; 1053 } 1054 rw_exit(&dv->sdev_contents); 1055 --vp->v_count; 1056 mutex_exit(&vp->v_lock); 1057 return (EBUSY); 1058 } 1059 ASSERT(vp->v_count == 1); 1060 1061 /* unlink from the memory cache */ 1062 ddv->sdev_nlink--; /* .. to above */ 1063 if (vp->v_type == VDIR) { 1064 decr_link(dv); /* . to self */ 1065 } 1066 1067 avl_remove(&ddv->sdev_entries, dv); 1068 decr_link(dv); /* name, back to zero */ 1069 vp->v_count--; 1070 mutex_exit(&vp->v_lock); 1071 1072 /* destroy the node */ 1073 sdev_nodedestroy(dv, 0); 1074 return (0); 1075 } 1076 1077 /* 1078 * check if the source is in the path of the target 1079 * 1080 * source and target are different 1081 */ 1082 /*ARGSUSED2*/ 1083 static int 1084 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1085 { 1086 int error = 0; 1087 struct sdev_node *dotdot, *dir; 1088 1089 dotdot = tdv->sdev_dotdot; 1090 ASSERT(dotdot); 1091 1092 /* fs root */ 1093 if (dotdot == tdv) { 1094 return (0); 1095 } 1096 1097 for (;;) { 1098 /* 1099 * avoid error cases like 1100 * mv a a/b 1101 * mv a a/b/c 1102 * etc. 1103 */ 1104 if (dotdot == sdv) { 1105 error = EINVAL; 1106 break; 1107 } 1108 1109 dir = dotdot; 1110 dotdot = dir->sdev_dotdot; 1111 1112 /* done checking because root is reached */ 1113 if (dir == dotdot) { 1114 break; 1115 } 1116 } 1117 return (error); 1118 } 1119 1120 int 1121 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1122 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1123 struct cred *cred) 1124 { 1125 int error = 0; 1126 struct vnode *ovp = SDEVTOV(odv); 1127 struct vnode *nvp; 1128 struct vattr vattr; 1129 int doingdir = (ovp->v_type == VDIR); 1130 char *link = NULL; 1131 int samedir = (oddv == nddv) ? 1 : 0; 1132 int bkstore = 0; 1133 struct sdev_node *idv = NULL; 1134 struct sdev_node *ndv = NULL; 1135 timestruc_t now; 1136 1137 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1138 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1139 if (error) 1140 return (error); 1141 1142 if (!samedir) 1143 rw_enter(&oddv->sdev_contents, RW_WRITER); 1144 rw_enter(&nddv->sdev_contents, RW_WRITER); 1145 1146 /* 1147 * the source may have been deleted by another thread before 1148 * we gets here. 1149 */ 1150 if (odv->sdev_state != SDEV_READY) { 1151 error = ENOENT; 1152 goto err_out; 1153 } 1154 1155 if (doingdir && (odv == nddv)) { 1156 error = EINVAL; 1157 goto err_out; 1158 } 1159 1160 /* 1161 * If renaming a directory, and the parents are different (".." must be 1162 * changed) then the source dir must not be in the dir hierarchy above 1163 * the target since it would orphan everything below the source dir. 1164 */ 1165 if (doingdir && (oddv != nddv)) { 1166 error = sdev_checkpath(odv, nddv, cred); 1167 if (error) 1168 goto err_out; 1169 } 1170 1171 /* destination existing */ 1172 if (*ndvp) { 1173 nvp = SDEVTOV(*ndvp); 1174 ASSERT(nvp); 1175 1176 /* handling renaming to itself */ 1177 if (odv == *ndvp) { 1178 error = 0; 1179 goto err_out; 1180 } 1181 1182 if (nvp->v_type == VDIR) { 1183 if (!doingdir) { 1184 error = EISDIR; 1185 goto err_out; 1186 } 1187 1188 if (vn_vfswlock(nvp)) { 1189 error = EBUSY; 1190 goto err_out; 1191 } 1192 1193 if (vn_mountedvfs(nvp) != NULL) { 1194 vn_vfsunlock(nvp); 1195 error = EBUSY; 1196 goto err_out; 1197 } 1198 1199 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1200 if ((*ndvp)->sdev_nlink > 2) { 1201 vn_vfsunlock(nvp); 1202 error = EEXIST; 1203 goto err_out; 1204 } 1205 vn_vfsunlock(nvp); 1206 1207 (void) sdev_dirdelete(nddv, *ndvp); 1208 *ndvp = NULL; 1209 ASSERT(nddv->sdev_attrvp); 1210 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1211 nddv->sdev_attrvp, cred, NULL, 0); 1212 if (error) 1213 goto err_out; 1214 } else { 1215 if (doingdir) { 1216 error = ENOTDIR; 1217 goto err_out; 1218 } 1219 1220 if (SDEV_IS_PERSIST((*ndvp))) { 1221 bkstore = 1; 1222 } 1223 1224 /* 1225 * get rid of the node from the directory cache 1226 * note, in case EBUSY is returned, the ZOMBIE 1227 * node is taken care in sdev_mknode. 1228 */ 1229 (void) sdev_dirdelete(nddv, *ndvp); 1230 *ndvp = NULL; 1231 if (bkstore) { 1232 ASSERT(nddv->sdev_attrvp); 1233 error = VOP_REMOVE(nddv->sdev_attrvp, 1234 nnm, cred, NULL, 0); 1235 if (error) 1236 goto err_out; 1237 } 1238 } 1239 } 1240 1241 /* fix the source for a symlink */ 1242 if (vattr.va_type == VLNK) { 1243 if (odv->sdev_symlink == NULL) { 1244 error = sdev_follow_link(odv); 1245 if (error) { 1246 error = ENOENT; 1247 goto err_out; 1248 } 1249 } 1250 ASSERT(odv->sdev_symlink); 1251 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1252 } 1253 1254 /* 1255 * make a fresh node from the source attrs 1256 */ 1257 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1258 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1259 NULL, (void *)link, cred, SDEV_READY); 1260 1261 if (link) 1262 kmem_free(link, strlen(link) + 1); 1263 1264 if (error) 1265 goto err_out; 1266 ASSERT(*ndvp); 1267 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1268 1269 /* move dir contents */ 1270 if (doingdir) { 1271 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1272 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1273 error = sdev_rnmnode(odv, idv, 1274 (struct sdev_node *)(*ndvp), &ndv, 1275 idv->sdev_name, cred); 1276 if (error) 1277 goto err_out; 1278 ndv = NULL; 1279 } 1280 } 1281 1282 if ((*ndvp)->sdev_attrvp) { 1283 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1284 AT_CTIME|AT_ATIME); 1285 } else { 1286 ASSERT((*ndvp)->sdev_attr); 1287 gethrestime(&now); 1288 (*ndvp)->sdev_attr->va_ctime = now; 1289 (*ndvp)->sdev_attr->va_atime = now; 1290 } 1291 1292 if (nddv->sdev_attrvp) { 1293 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1294 AT_MTIME|AT_ATIME); 1295 } else { 1296 ASSERT(nddv->sdev_attr); 1297 gethrestime(&now); 1298 nddv->sdev_attr->va_mtime = now; 1299 nddv->sdev_attr->va_atime = now; 1300 } 1301 rw_exit(&nddv->sdev_contents); 1302 if (!samedir) 1303 rw_exit(&oddv->sdev_contents); 1304 1305 SDEV_RELE(*ndvp); 1306 return (error); 1307 1308 err_out: 1309 rw_exit(&nddv->sdev_contents); 1310 if (!samedir) 1311 rw_exit(&oddv->sdev_contents); 1312 return (error); 1313 } 1314 1315 /* 1316 * Merge sdev_node specific information into an attribute structure. 1317 * 1318 * note: sdev_node is not locked here 1319 */ 1320 void 1321 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1322 { 1323 struct vnode *vp = SDEVTOV(dv); 1324 1325 vap->va_nlink = dv->sdev_nlink; 1326 vap->va_nodeid = dv->sdev_ino; 1327 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1328 vap->va_type = vp->v_type; 1329 1330 if (vp->v_type == VDIR) { 1331 vap->va_rdev = 0; 1332 vap->va_fsid = vp->v_rdev; 1333 } else if (vp->v_type == VLNK) { 1334 vap->va_rdev = 0; 1335 vap->va_mode &= ~S_IFMT; 1336 vap->va_mode |= S_IFLNK; 1337 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1338 vap->va_rdev = vp->v_rdev; 1339 vap->va_mode &= ~S_IFMT; 1340 if (vap->va_type == VCHR) 1341 vap->va_mode |= S_IFCHR; 1342 else 1343 vap->va_mode |= S_IFBLK; 1344 } else { 1345 vap->va_rdev = 0; 1346 } 1347 } 1348 1349 struct vattr * 1350 sdev_getdefault_attr(enum vtype type) 1351 { 1352 if (type == VDIR) 1353 return (&sdev_vattr_dir); 1354 else if (type == VCHR) 1355 return (&sdev_vattr_chr); 1356 else if (type == VBLK) 1357 return (&sdev_vattr_blk); 1358 else if (type == VLNK) 1359 return (&sdev_vattr_lnk); 1360 else 1361 return (NULL); 1362 } 1363 int 1364 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1365 { 1366 int rv = 0; 1367 struct vnode *vp = SDEVTOV(dv); 1368 1369 switch (vp->v_type) { 1370 case VCHR: 1371 case VBLK: 1372 /* 1373 * If vnode is a device, return special vnode instead 1374 * (though it knows all about -us- via sp->s_realvp) 1375 */ 1376 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1377 VN_RELE(vp); 1378 if (*vpp == NULLVP) 1379 rv = ENOSYS; 1380 break; 1381 default: /* most types are returned as is */ 1382 *vpp = vp; 1383 break; 1384 } 1385 return (rv); 1386 } 1387 1388 /* 1389 * junction between devname and root file system, e.g. ufs 1390 */ 1391 int 1392 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1393 { 1394 struct vnode *rdvp = ddv->sdev_attrvp; 1395 int rval = 0; 1396 1397 ASSERT(rdvp); 1398 1399 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1400 NULL); 1401 return (rval); 1402 } 1403 1404 static int 1405 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1406 { 1407 struct sdev_node *dv = NULL; 1408 char *nm; 1409 struct vnode *dirvp; 1410 int error; 1411 vnode_t *vp; 1412 int eof; 1413 struct iovec iov; 1414 struct uio uio; 1415 struct dirent64 *dp; 1416 dirent64_t *dbuf; 1417 size_t dbuflen; 1418 struct vattr vattr; 1419 char *link = NULL; 1420 1421 if (ddv->sdev_attrvp == NULL) 1422 return (0); 1423 if (!(ddv->sdev_flags & SDEV_BUILD)) 1424 return (0); 1425 1426 dirvp = ddv->sdev_attrvp; 1427 VN_HOLD(dirvp); 1428 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1429 1430 uio.uio_iov = &iov; 1431 uio.uio_iovcnt = 1; 1432 uio.uio_segflg = UIO_SYSSPACE; 1433 uio.uio_fmode = 0; 1434 uio.uio_extflg = UIO_COPY_CACHED; 1435 uio.uio_loffset = 0; 1436 uio.uio_llimit = MAXOFFSET_T; 1437 1438 eof = 0; 1439 error = 0; 1440 while (!error && !eof) { 1441 uio.uio_resid = dlen; 1442 iov.iov_base = (char *)dbuf; 1443 iov.iov_len = dlen; 1444 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1445 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1446 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1447 1448 dbuflen = dlen - uio.uio_resid; 1449 if (error || dbuflen == 0) 1450 break; 1451 1452 if (!(ddv->sdev_flags & SDEV_BUILD)) 1453 break; 1454 1455 for (dp = dbuf; ((intptr_t)dp < 1456 (intptr_t)dbuf + dbuflen); 1457 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1458 nm = dp->d_name; 1459 1460 if (strcmp(nm, ".") == 0 || 1461 strcmp(nm, "..") == 0) 1462 continue; 1463 1464 vp = NULLVP; 1465 dv = sdev_cache_lookup(ddv, nm); 1466 if (dv) { 1467 if (dv->sdev_state != SDEV_ZOMBIE) { 1468 SDEV_SIMPLE_RELE(dv); 1469 } else { 1470 /* 1471 * A ZOMBIE node may not have been 1472 * cleaned up from the backing store, 1473 * bypass this entry in this case, 1474 * and clean it up from the directory 1475 * cache if this is the last call. 1476 */ 1477 (void) sdev_dirdelete(ddv, dv); 1478 } 1479 continue; 1480 } 1481 1482 /* refill the cache if not already */ 1483 error = devname_backstore_lookup(ddv, nm, &vp); 1484 if (error) 1485 continue; 1486 1487 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1488 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1489 if (error) 1490 continue; 1491 1492 if (vattr.va_type == VLNK) { 1493 error = sdev_getlink(vp, &link); 1494 if (error) { 1495 continue; 1496 } 1497 ASSERT(link != NULL); 1498 } 1499 1500 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1501 rw_exit(&ddv->sdev_contents); 1502 rw_enter(&ddv->sdev_contents, RW_WRITER); 1503 } 1504 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1505 cred, SDEV_READY); 1506 rw_downgrade(&ddv->sdev_contents); 1507 1508 if (link != NULL) { 1509 kmem_free(link, strlen(link) + 1); 1510 link = NULL; 1511 } 1512 1513 if (!error) { 1514 ASSERT(dv); 1515 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1516 SDEV_SIMPLE_RELE(dv); 1517 } 1518 vp = NULL; 1519 dv = NULL; 1520 } 1521 } 1522 1523 done: 1524 VN_RELE(dirvp); 1525 kmem_free(dbuf, dlen); 1526 1527 return (error); 1528 } 1529 1530 void 1531 sdev_filldir_dynamic(struct sdev_node *ddv) 1532 { 1533 int error; 1534 int i; 1535 struct vattr vattr; 1536 struct vattr *vap = &vattr; 1537 char *nm = NULL; 1538 struct sdev_node *dv = NULL; 1539 1540 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1541 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1542 1543 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1544 gethrestime(&vap->va_atime); 1545 vap->va_mtime = vap->va_atime; 1546 vap->va_ctime = vap->va_atime; 1547 for (i = 0; vtab[i].vt_name != NULL; i++) { 1548 /* 1549 * This early, we may be in a read-only /dev 1550 * environment: leave the creation of any nodes we'd 1551 * attempt to persist to devfsadm. 1552 */ 1553 if (vtab[i].vt_flags & SDEV_PERSIST) 1554 continue; 1555 nm = vtab[i].vt_name; 1556 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1557 dv = NULL; 1558 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1559 NULL, kcred, SDEV_READY); 1560 if (error) { 1561 cmn_err(CE_WARN, "%s/%s: error %d\n", 1562 ddv->sdev_name, nm, error); 1563 } else { 1564 ASSERT(dv); 1565 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1566 SDEV_SIMPLE_RELE(dv); 1567 } 1568 } 1569 } 1570 1571 /* 1572 * Creating a backing store entry based on sdev_attr. 1573 * This is called either as part of node creation in a persistent directory 1574 * or from setattr/setsecattr to persist access attributes across reboot. 1575 */ 1576 int 1577 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1578 { 1579 int error = 0; 1580 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1581 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1582 struct vattr *vap = dv->sdev_attr; 1583 char *nm = dv->sdev_name; 1584 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1585 1586 ASSERT(dv && dv->sdev_name && rdvp); 1587 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1588 1589 lookup: 1590 /* try to find it in the backing store */ 1591 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1592 NULL); 1593 if (error == 0) { 1594 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1595 VN_HOLD(rrvp); 1596 VN_RELE(*rvp); 1597 *rvp = rrvp; 1598 } 1599 1600 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1601 dv->sdev_attr = NULL; 1602 dv->sdev_attrvp = *rvp; 1603 return (0); 1604 } 1605 1606 /* let's try to persist the node */ 1607 gethrestime(&vap->va_atime); 1608 vap->va_mtime = vap->va_atime; 1609 vap->va_ctime = vap->va_atime; 1610 vap->va_mask |= AT_TYPE|AT_MODE; 1611 switch (vap->va_type) { 1612 case VDIR: 1613 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1614 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1615 (void *)(*rvp), error)); 1616 break; 1617 case VCHR: 1618 case VBLK: 1619 case VREG: 1620 case VDOOR: 1621 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1622 rvp, cred, 0, NULL, NULL); 1623 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1624 (void *)(*rvp), error)); 1625 if (!error) 1626 VN_RELE(*rvp); 1627 break; 1628 case VLNK: 1629 ASSERT(dv->sdev_symlink); 1630 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1631 NULL, 0); 1632 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1633 error)); 1634 break; 1635 default: 1636 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1637 "create\n", nm); 1638 /*NOTREACHED*/ 1639 } 1640 1641 /* go back to lookup to factor out spec node and set attrvp */ 1642 if (error == 0) 1643 goto lookup; 1644 1645 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1646 return (error); 1647 } 1648 1649 static int 1650 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1651 { 1652 int error = 0; 1653 struct sdev_node *dup = NULL; 1654 1655 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1656 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1657 sdev_direnter(ddv, *dv); 1658 } else { 1659 if (dup->sdev_state == SDEV_ZOMBIE) { 1660 error = sdev_dirdelete(ddv, dup); 1661 /* 1662 * The ZOMBIE node is still hanging 1663 * around with more than one reference counts. 1664 * Fail the new node creation so that 1665 * the directory cache won't have 1666 * duplicate entries for the same named node 1667 */ 1668 if (error == EBUSY) { 1669 SDEV_SIMPLE_RELE(*dv); 1670 sdev_nodedestroy(*dv, 0); 1671 *dv = NULL; 1672 return (error); 1673 } 1674 sdev_direnter(ddv, *dv); 1675 } else { 1676 ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); 1677 SDEV_SIMPLE_RELE(*dv); 1678 sdev_nodedestroy(*dv, 0); 1679 *dv = dup; 1680 } 1681 } 1682 1683 return (0); 1684 } 1685 1686 static int 1687 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1688 { 1689 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1690 return (sdev_dirdelete(ddv, *dv)); 1691 } 1692 1693 /* 1694 * update the in-core directory cache 1695 */ 1696 int 1697 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1698 sdev_cache_ops_t ops) 1699 { 1700 int error = 0; 1701 1702 ASSERT((SDEV_HELD(*dv))); 1703 1704 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1705 switch (ops) { 1706 case SDEV_CACHE_ADD: 1707 error = sdev_cache_add(ddv, dv, nm); 1708 break; 1709 case SDEV_CACHE_DELETE: 1710 error = sdev_cache_delete(ddv, dv); 1711 break; 1712 default: 1713 break; 1714 } 1715 1716 return (error); 1717 } 1718 1719 /* 1720 * retrieve the named entry from the directory cache 1721 */ 1722 struct sdev_node * 1723 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1724 { 1725 struct sdev_node *dv = NULL; 1726 1727 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1728 dv = sdev_findbyname(ddv, nm); 1729 1730 return (dv); 1731 } 1732 1733 /* 1734 * Implicit reconfig for nodes constructed by a link generator 1735 * Start devfsadm if needed, or if devfsadm is in progress, 1736 * prepare to block on devfsadm either completing or 1737 * constructing the desired node. As devfsadmd is global 1738 * in scope, constructing all necessary nodes, we only 1739 * need to initiate it once. 1740 */ 1741 static int 1742 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1743 { 1744 int error = 0; 1745 1746 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1747 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1748 ddv->sdev_name, nm, devfsadm_state)); 1749 mutex_enter(&dv->sdev_lookup_lock); 1750 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1751 mutex_exit(&dv->sdev_lookup_lock); 1752 error = 0; 1753 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1754 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1755 ddv->sdev_name, nm, devfsadm_state)); 1756 1757 sdev_devfsadmd_thread(ddv, dv, kcred); 1758 mutex_enter(&dv->sdev_lookup_lock); 1759 SDEV_BLOCK_OTHERS(dv, 1760 (SDEV_LOOKUP | SDEV_LGWAITING)); 1761 mutex_exit(&dv->sdev_lookup_lock); 1762 error = 0; 1763 } else { 1764 error = -1; 1765 } 1766 1767 return (error); 1768 } 1769 1770 /* 1771 * Support for specialized device naming construction mechanisms 1772 */ 1773 static int 1774 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1775 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1776 void *, char *), int flags, struct cred *cred) 1777 { 1778 int rv = 0; 1779 char *physpath = NULL; 1780 struct vattr vattr; 1781 struct vattr *vap = &vattr; 1782 struct sdev_node *dv = NULL; 1783 1784 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1785 if (flags & SDEV_VLINK) { 1786 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1787 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1788 NULL); 1789 if (rv) { 1790 kmem_free(physpath, MAXPATHLEN); 1791 return (-1); 1792 } 1793 1794 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1795 vap->va_size = strlen(physpath); 1796 gethrestime(&vap->va_atime); 1797 vap->va_mtime = vap->va_atime; 1798 vap->va_ctime = vap->va_atime; 1799 1800 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1801 (void *)physpath, cred, SDEV_READY); 1802 kmem_free(physpath, MAXPATHLEN); 1803 if (rv) 1804 return (rv); 1805 } else if (flags & SDEV_VATTR) { 1806 /* 1807 * /dev/pts 1808 * 1809 * callback is responsible to set the basic attributes, 1810 * e.g. va_type/va_uid/va_gid/ 1811 * dev_t if VCHR or VBLK/ 1812 */ 1813 ASSERT(callback); 1814 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1815 if (rv) { 1816 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1817 "callback failed \n")); 1818 return (-1); 1819 } 1820 1821 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1822 cred, SDEV_READY); 1823 1824 if (rv) 1825 return (rv); 1826 1827 } else { 1828 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1829 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1830 __LINE__)); 1831 rv = -1; 1832 } 1833 1834 *dvp = dv; 1835 return (rv); 1836 } 1837 1838 static int 1839 is_devfsadm_thread(char *exec_name) 1840 { 1841 /* 1842 * note: because devfsadmd -> /usr/sbin/devfsadm 1843 * it is safe to use "devfsadm" to capture the lookups 1844 * from devfsadm and its daemon version. 1845 */ 1846 if (strcmp(exec_name, "devfsadm") == 0) 1847 return (1); 1848 return (0); 1849 } 1850 1851 /* 1852 * Lookup Order: 1853 * sdev_node cache; 1854 * backing store (SDEV_PERSIST); 1855 * DBNR: a. dir_ops implemented in the loadable modules; 1856 * b. vnode ops in vtab. 1857 */ 1858 int 1859 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1860 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1861 struct cred *, void *, char *), int flags) 1862 { 1863 int rv = 0, nmlen; 1864 struct vnode *rvp = NULL; 1865 struct sdev_node *dv = NULL; 1866 int retried = 0; 1867 int error = 0; 1868 struct vattr vattr; 1869 char *lookup_thread = curproc->p_user.u_comm; 1870 int failed_flags = 0; 1871 int (*vtor)(struct sdev_node *) = NULL; 1872 int state; 1873 int parent_state; 1874 char *link = NULL; 1875 1876 if (SDEVTOV(ddv)->v_type != VDIR) 1877 return (ENOTDIR); 1878 1879 /* 1880 * Empty name or ., return node itself. 1881 */ 1882 nmlen = strlen(nm); 1883 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1884 *vpp = SDEVTOV(ddv); 1885 VN_HOLD(*vpp); 1886 return (0); 1887 } 1888 1889 /* 1890 * .., return the parent directory 1891 */ 1892 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1893 *vpp = SDEVTOV(ddv->sdev_dotdot); 1894 VN_HOLD(*vpp); 1895 return (0); 1896 } 1897 1898 rw_enter(&ddv->sdev_contents, RW_READER); 1899 if (ddv->sdev_flags & SDEV_VTOR) { 1900 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1901 ASSERT(vtor); 1902 } 1903 1904 tryagain: 1905 /* 1906 * (a) directory cache lookup: 1907 */ 1908 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1909 parent_state = ddv->sdev_state; 1910 dv = sdev_cache_lookup(ddv, nm); 1911 if (dv) { 1912 state = dv->sdev_state; 1913 switch (state) { 1914 case SDEV_INIT: 1915 if (is_devfsadm_thread(lookup_thread)) 1916 break; 1917 1918 /* ZOMBIED parent won't allow node creation */ 1919 if (parent_state == SDEV_ZOMBIE) { 1920 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1921 retried); 1922 goto nolock_notfound; 1923 } 1924 1925 mutex_enter(&dv->sdev_lookup_lock); 1926 /* compensate the threads started after devfsadm */ 1927 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1928 !(SDEV_IS_LOOKUP(dv))) 1929 SDEV_BLOCK_OTHERS(dv, 1930 (SDEV_LOOKUP | SDEV_LGWAITING)); 1931 1932 if (SDEV_IS_LOOKUP(dv)) { 1933 failed_flags |= SLF_REBUILT; 1934 rw_exit(&ddv->sdev_contents); 1935 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1936 mutex_exit(&dv->sdev_lookup_lock); 1937 rw_enter(&ddv->sdev_contents, RW_READER); 1938 1939 if (error != 0) { 1940 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1941 retried); 1942 goto nolock_notfound; 1943 } 1944 1945 state = dv->sdev_state; 1946 if (state == SDEV_INIT) { 1947 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1948 retried); 1949 goto nolock_notfound; 1950 } else if (state == SDEV_READY) { 1951 goto found; 1952 } else if (state == SDEV_ZOMBIE) { 1953 rw_exit(&ddv->sdev_contents); 1954 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1955 retried); 1956 SDEV_RELE(dv); 1957 goto lookup_failed; 1958 } 1959 } else { 1960 mutex_exit(&dv->sdev_lookup_lock); 1961 } 1962 break; 1963 case SDEV_READY: 1964 goto found; 1965 case SDEV_ZOMBIE: 1966 rw_exit(&ddv->sdev_contents); 1967 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1968 SDEV_RELE(dv); 1969 goto lookup_failed; 1970 default: 1971 rw_exit(&ddv->sdev_contents); 1972 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1973 sdev_lookup_failed(ddv, nm, failed_flags); 1974 *vpp = NULLVP; 1975 return (ENOENT); 1976 } 1977 } 1978 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1979 1980 /* 1981 * ZOMBIED parent does not allow new node creation. 1982 * bail out early 1983 */ 1984 if (parent_state == SDEV_ZOMBIE) { 1985 rw_exit(&ddv->sdev_contents); 1986 *vpp = NULLVP; 1987 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1988 return (ENOENT); 1989 } 1990 1991 /* 1992 * (b0): backing store lookup 1993 * SDEV_PERSIST is default except: 1994 * 1) pts nodes 1995 * 2) non-chmod'ed local nodes 1996 * 3) zvol nodes 1997 */ 1998 if (SDEV_IS_PERSIST(ddv)) { 1999 error = devname_backstore_lookup(ddv, nm, &rvp); 2000 2001 if (!error) { 2002 2003 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 2004 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 2005 if (error) { 2006 rw_exit(&ddv->sdev_contents); 2007 if (dv) 2008 SDEV_RELE(dv); 2009 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2010 sdev_lookup_failed(ddv, nm, failed_flags); 2011 *vpp = NULLVP; 2012 return (ENOENT); 2013 } 2014 2015 if (vattr.va_type == VLNK) { 2016 error = sdev_getlink(rvp, &link); 2017 if (error) { 2018 rw_exit(&ddv->sdev_contents); 2019 if (dv) 2020 SDEV_RELE(dv); 2021 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2022 retried); 2023 sdev_lookup_failed(ddv, nm, 2024 failed_flags); 2025 *vpp = NULLVP; 2026 return (ENOENT); 2027 } 2028 ASSERT(link != NULL); 2029 } 2030 2031 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2032 rw_exit(&ddv->sdev_contents); 2033 rw_enter(&ddv->sdev_contents, RW_WRITER); 2034 } 2035 error = sdev_mknode(ddv, nm, &dv, &vattr, 2036 rvp, link, cred, SDEV_READY); 2037 rw_downgrade(&ddv->sdev_contents); 2038 2039 if (link != NULL) { 2040 kmem_free(link, strlen(link) + 1); 2041 link = NULL; 2042 } 2043 2044 if (error) { 2045 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2046 rw_exit(&ddv->sdev_contents); 2047 if (dv) 2048 SDEV_RELE(dv); 2049 goto lookup_failed; 2050 } else { 2051 goto found; 2052 } 2053 } else if (retried) { 2054 rw_exit(&ddv->sdev_contents); 2055 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2056 ddv->sdev_name, nm)); 2057 if (dv) 2058 SDEV_RELE(dv); 2059 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2060 sdev_lookup_failed(ddv, nm, failed_flags); 2061 *vpp = NULLVP; 2062 return (ENOENT); 2063 } 2064 } 2065 2066 lookup_create_node: 2067 /* first thread that is doing the lookup on this node */ 2068 if (callback) { 2069 ASSERT(dv == NULL); 2070 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2071 rw_exit(&ddv->sdev_contents); 2072 rw_enter(&ddv->sdev_contents, RW_WRITER); 2073 } 2074 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2075 flags, cred); 2076 rw_downgrade(&ddv->sdev_contents); 2077 if (error == 0) { 2078 goto found; 2079 } else { 2080 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2081 rw_exit(&ddv->sdev_contents); 2082 goto lookup_failed; 2083 } 2084 } 2085 if (!dv) { 2086 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2087 rw_exit(&ddv->sdev_contents); 2088 rw_enter(&ddv->sdev_contents, RW_WRITER); 2089 } 2090 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2091 cred, SDEV_INIT); 2092 if (!dv) { 2093 rw_exit(&ddv->sdev_contents); 2094 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2095 sdev_lookup_failed(ddv, nm, failed_flags); 2096 *vpp = NULLVP; 2097 return (ENOENT); 2098 } 2099 rw_downgrade(&ddv->sdev_contents); 2100 } 2101 2102 /* 2103 * (b1) invoking devfsadm once per life time for devfsadm nodes 2104 */ 2105 ASSERT(SDEV_HELD(dv)); 2106 2107 if (SDEV_IS_NO_NCACHE(dv)) 2108 failed_flags |= SLF_NO_NCACHE; 2109 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2110 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2111 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2112 ASSERT(SDEV_HELD(dv)); 2113 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2114 goto nolock_notfound; 2115 } 2116 2117 /* 2118 * filter out known non-existent devices recorded 2119 * during initial reconfiguration boot for which 2120 * reconfig should not be done and lookup may 2121 * be short-circuited now. 2122 */ 2123 if (sdev_lookup_filter(ddv, nm)) { 2124 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2125 goto nolock_notfound; 2126 } 2127 2128 /* bypassing devfsadm internal nodes */ 2129 if (is_devfsadm_thread(lookup_thread)) { 2130 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2131 goto nolock_notfound; 2132 } 2133 2134 if (sdev_reconfig_disable) { 2135 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2136 goto nolock_notfound; 2137 } 2138 2139 error = sdev_call_devfsadmd(ddv, dv, nm); 2140 if (error == 0) { 2141 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2142 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2143 if (sdev_reconfig_verbose) { 2144 cmn_err(CE_CONT, 2145 "?lookup of %s/%s by %s: reconfig\n", 2146 ddv->sdev_name, nm, curproc->p_user.u_comm); 2147 } 2148 retried = 1; 2149 failed_flags |= SLF_REBUILT; 2150 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2151 SDEV_SIMPLE_RELE(dv); 2152 goto tryagain; 2153 } else { 2154 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2155 goto nolock_notfound; 2156 } 2157 2158 found: 2159 ASSERT(!(dv->sdev_flags & SDEV_STALE)); 2160 ASSERT(dv->sdev_state == SDEV_READY); 2161 if (vtor) { 2162 /* 2163 * Check validity of returned node 2164 */ 2165 switch (vtor(dv)) { 2166 case SDEV_VTOR_VALID: 2167 break; 2168 case SDEV_VTOR_STALE: 2169 /* 2170 * The name exists, but the cache entry is 2171 * stale and needs to be re-created. 2172 */ 2173 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2174 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2175 rw_exit(&ddv->sdev_contents); 2176 rw_enter(&ddv->sdev_contents, RW_WRITER); 2177 } 2178 error = sdev_cache_update(ddv, &dv, nm, 2179 SDEV_CACHE_DELETE); 2180 rw_downgrade(&ddv->sdev_contents); 2181 if (error == 0) { 2182 dv = NULL; 2183 goto lookup_create_node; 2184 } 2185 /* FALLTHRU */ 2186 case SDEV_VTOR_INVALID: 2187 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2188 sdcmn_err7(("lookup: destroy invalid " 2189 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2190 goto nolock_notfound; 2191 case SDEV_VTOR_SKIP: 2192 sdcmn_err7(("lookup: node not applicable - " 2193 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2194 rw_exit(&ddv->sdev_contents); 2195 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2196 SDEV_RELE(dv); 2197 goto lookup_failed; 2198 default: 2199 cmn_err(CE_PANIC, 2200 "dev fs: validator failed: %s(%p)\n", 2201 dv->sdev_name, (void *)dv); 2202 break; 2203 } 2204 } 2205 2206 rw_exit(&ddv->sdev_contents); 2207 rv = sdev_to_vp(dv, vpp); 2208 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2209 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2210 dv->sdev_state, nm, rv)); 2211 return (rv); 2212 2213 nolock_notfound: 2214 /* 2215 * Destroy the node that is created for synchronization purposes. 2216 */ 2217 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2218 nm, dv->sdev_state)); 2219 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2220 if (dv->sdev_state == SDEV_INIT) { 2221 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2222 rw_exit(&ddv->sdev_contents); 2223 rw_enter(&ddv->sdev_contents, RW_WRITER); 2224 } 2225 2226 /* 2227 * Node state may have changed during the lock 2228 * changes. Re-check. 2229 */ 2230 if (dv->sdev_state == SDEV_INIT) { 2231 (void) sdev_dirdelete(ddv, dv); 2232 rw_exit(&ddv->sdev_contents); 2233 sdev_lookup_failed(ddv, nm, failed_flags); 2234 *vpp = NULL; 2235 return (ENOENT); 2236 } 2237 } 2238 2239 rw_exit(&ddv->sdev_contents); 2240 SDEV_RELE(dv); 2241 2242 lookup_failed: 2243 sdev_lookup_failed(ddv, nm, failed_flags); 2244 *vpp = NULL; 2245 return (ENOENT); 2246 } 2247 2248 /* 2249 * Given a directory node, mark all nodes beneath as 2250 * STALE, i.e. nodes that don't exist as far as new 2251 * consumers are concerned. Remove them from the 2252 * list of directory entries so that no lookup or 2253 * directory traversal will find them. The node 2254 * not deallocated so existing holds are not affected. 2255 */ 2256 void 2257 sdev_stale(struct sdev_node *ddv) 2258 { 2259 struct sdev_node *dv; 2260 struct vnode *vp; 2261 2262 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2263 2264 rw_enter(&ddv->sdev_contents, RW_WRITER); 2265 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) { 2266 vp = SDEVTOV(dv); 2267 if (vp->v_type == VDIR) 2268 sdev_stale(dv); 2269 2270 sdcmn_err9(("sdev_stale: setting stale %s\n", 2271 dv->sdev_path)); 2272 dv->sdev_flags |= SDEV_STALE; 2273 avl_remove(&ddv->sdev_entries, dv); 2274 } 2275 ddv->sdev_flags |= SDEV_BUILD; 2276 rw_exit(&ddv->sdev_contents); 2277 } 2278 2279 /* 2280 * Given a directory node, clean out all the nodes beneath. 2281 * If expr is specified, clean node with names matching expr. 2282 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2283 * so they are excluded from future lookups. 2284 */ 2285 int 2286 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2287 { 2288 int error = 0; 2289 int busy = 0; 2290 struct vnode *vp; 2291 struct sdev_node *dv, *next = NULL; 2292 int bkstore = 0; 2293 int len = 0; 2294 char *bks_name = NULL; 2295 2296 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2297 2298 /* 2299 * We try our best to destroy all unused sdev_node's 2300 */ 2301 rw_enter(&ddv->sdev_contents, RW_WRITER); 2302 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { 2303 next = SDEV_NEXT_ENTRY(ddv, dv); 2304 vp = SDEVTOV(dv); 2305 2306 if (expr && gmatch(dv->sdev_name, expr) == 0) 2307 continue; 2308 2309 if (vp->v_type == VDIR && 2310 sdev_cleandir(dv, NULL, flags) != 0) { 2311 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2312 dv->sdev_name)); 2313 busy++; 2314 continue; 2315 } 2316 2317 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2318 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2319 dv->sdev_name)); 2320 busy++; 2321 continue; 2322 } 2323 2324 /* 2325 * at this point, either dv is not held or SDEV_ENFORCE 2326 * is specified. In either case, dv needs to be deleted 2327 */ 2328 SDEV_HOLD(dv); 2329 2330 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2331 if (bkstore && (vp->v_type == VDIR)) 2332 bkstore += 1; 2333 2334 if (bkstore) { 2335 len = strlen(dv->sdev_name) + 1; 2336 bks_name = kmem_alloc(len, KM_SLEEP); 2337 bcopy(dv->sdev_name, bks_name, len); 2338 } 2339 2340 error = sdev_dirdelete(ddv, dv); 2341 2342 if (error == EBUSY) { 2343 sdcmn_err9(("sdev_cleandir: dir busy\n")); 2344 busy++; 2345 } 2346 2347 /* take care the backing store clean up */ 2348 if (bkstore && (error == 0)) { 2349 ASSERT(bks_name); 2350 ASSERT(ddv->sdev_attrvp); 2351 2352 if (bkstore == 1) { 2353 error = VOP_REMOVE(ddv->sdev_attrvp, 2354 bks_name, kcred, NULL, 0); 2355 } else if (bkstore == 2) { 2356 error = VOP_RMDIR(ddv->sdev_attrvp, 2357 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2358 } 2359 2360 /* do not propagate the backing store errors */ 2361 if (error) { 2362 sdcmn_err9(("sdev_cleandir: backing store" 2363 "not cleaned\n")); 2364 error = 0; 2365 } 2366 2367 bkstore = 0; 2368 kmem_free(bks_name, len); 2369 bks_name = NULL; 2370 len = 0; 2371 } 2372 } 2373 2374 ddv->sdev_flags |= SDEV_BUILD; 2375 rw_exit(&ddv->sdev_contents); 2376 2377 if (busy) { 2378 error = EBUSY; 2379 } 2380 2381 return (error); 2382 } 2383 2384 /* 2385 * a convenient wrapper for readdir() funcs 2386 */ 2387 size_t 2388 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2389 { 2390 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2391 if (reclen > size) 2392 return (0); 2393 2394 de->d_ino = (ino64_t)ino; 2395 de->d_off = (off64_t)off + 1; 2396 de->d_reclen = (ushort_t)reclen; 2397 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2398 return (reclen); 2399 } 2400 2401 /* 2402 * sdev_mount service routines 2403 */ 2404 int 2405 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2406 { 2407 int error; 2408 2409 if (uap->datalen != sizeof (*args)) 2410 return (EINVAL); 2411 2412 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2413 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2414 "get user data. error %d\n", error); 2415 return (EFAULT); 2416 } 2417 2418 return (0); 2419 } 2420 2421 #ifdef nextdp 2422 #undef nextdp 2423 #endif 2424 #define nextdp(dp) ((struct dirent64 *) \ 2425 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2426 2427 /* 2428 * readdir helper func 2429 */ 2430 int 2431 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2432 int flags) 2433 { 2434 struct sdev_node *ddv = VTOSDEV(vp); 2435 struct sdev_node *dv; 2436 dirent64_t *dp; 2437 ulong_t outcount = 0; 2438 size_t namelen; 2439 ulong_t alloc_count; 2440 void *outbuf; 2441 struct iovec *iovp; 2442 int error = 0; 2443 size_t reclen; 2444 offset_t diroff; 2445 offset_t soff; 2446 int this_reclen; 2447 int (*vtor)(struct sdev_node *) = NULL; 2448 struct vattr attr; 2449 timestruc_t now; 2450 2451 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2452 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2453 2454 if (uiop->uio_loffset >= MAXOFF_T) { 2455 if (eofp) 2456 *eofp = 1; 2457 return (0); 2458 } 2459 2460 if (uiop->uio_iovcnt != 1) 2461 return (EINVAL); 2462 2463 if (vp->v_type != VDIR) 2464 return (ENOTDIR); 2465 2466 if (ddv->sdev_flags & SDEV_VTOR) { 2467 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2468 ASSERT(vtor); 2469 } 2470 2471 if (eofp != NULL) 2472 *eofp = 0; 2473 2474 soff = uiop->uio_loffset; 2475 iovp = uiop->uio_iov; 2476 alloc_count = iovp->iov_len; 2477 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2478 outcount = 0; 2479 2480 if (ddv->sdev_state == SDEV_ZOMBIE) 2481 goto get_cache; 2482 2483 if (SDEV_IS_GLOBAL(ddv)) { 2484 2485 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2486 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2487 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2488 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2489 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2490 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2491 !sdev_reconfig_disable) { 2492 /* 2493 * invoking "devfsadm" to do system device reconfig 2494 */ 2495 mutex_enter(&ddv->sdev_lookup_lock); 2496 SDEV_BLOCK_OTHERS(ddv, 2497 (SDEV_READDIR|SDEV_LGWAITING)); 2498 mutex_exit(&ddv->sdev_lookup_lock); 2499 2500 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2501 ddv->sdev_path, curproc->p_user.u_comm)); 2502 if (sdev_reconfig_verbose) { 2503 cmn_err(CE_CONT, 2504 "?readdir of %s by %s: reconfig\n", 2505 ddv->sdev_path, curproc->p_user.u_comm); 2506 } 2507 2508 sdev_devfsadmd_thread(ddv, NULL, kcred); 2509 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2510 /* 2511 * compensate the "ls" started later than "devfsadm" 2512 */ 2513 mutex_enter(&ddv->sdev_lookup_lock); 2514 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2515 mutex_exit(&ddv->sdev_lookup_lock); 2516 } 2517 2518 /* 2519 * release the contents lock so that 2520 * the cache may be updated by devfsadmd 2521 */ 2522 rw_exit(&ddv->sdev_contents); 2523 mutex_enter(&ddv->sdev_lookup_lock); 2524 if (SDEV_IS_READDIR(ddv)) 2525 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2526 mutex_exit(&ddv->sdev_lookup_lock); 2527 rw_enter(&ddv->sdev_contents, RW_READER); 2528 2529 sdcmn_err4(("readdir of directory %s by %s\n", 2530 ddv->sdev_name, curproc->p_user.u_comm)); 2531 if (ddv->sdev_flags & SDEV_BUILD) { 2532 if (SDEV_IS_PERSIST(ddv)) { 2533 error = sdev_filldir_from_store(ddv, 2534 alloc_count, cred); 2535 } 2536 ddv->sdev_flags &= ~SDEV_BUILD; 2537 } 2538 } 2539 2540 get_cache: 2541 /* handle "." and ".." */ 2542 diroff = 0; 2543 if (soff == 0) { 2544 /* first time */ 2545 this_reclen = DIRENT64_RECLEN(1); 2546 if (alloc_count < this_reclen) { 2547 error = EINVAL; 2548 goto done; 2549 } 2550 2551 dp->d_ino = (ino64_t)ddv->sdev_ino; 2552 dp->d_off = (off64_t)1; 2553 dp->d_reclen = (ushort_t)this_reclen; 2554 2555 (void) strncpy(dp->d_name, ".", 2556 DIRENT64_NAMELEN(this_reclen)); 2557 outcount += dp->d_reclen; 2558 dp = nextdp(dp); 2559 } 2560 2561 diroff++; 2562 if (soff <= 1) { 2563 this_reclen = DIRENT64_RECLEN(2); 2564 if (alloc_count < outcount + this_reclen) { 2565 error = EINVAL; 2566 goto done; 2567 } 2568 2569 dp->d_reclen = (ushort_t)this_reclen; 2570 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2571 dp->d_off = (off64_t)2; 2572 2573 (void) strncpy(dp->d_name, "..", 2574 DIRENT64_NAMELEN(this_reclen)); 2575 outcount += dp->d_reclen; 2576 2577 dp = nextdp(dp); 2578 } 2579 2580 2581 /* gets the cache */ 2582 diroff++; 2583 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2584 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2585 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2586 diroff, soff, dv->sdev_name)); 2587 2588 /* bypassing pre-matured nodes */ 2589 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2590 sdcmn_err3(("sdev_readdir: pre-mature node " 2591 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2592 continue; 2593 } 2594 2595 /* 2596 * Check validity of node 2597 * Drop invalid and nodes to be skipped. 2598 * A node the validator indicates as stale needs 2599 * to be returned as presumably the node name itself 2600 * is valid and the node data itself will be refreshed 2601 * on lookup. An application performing a readdir then 2602 * stat on each entry should thus always see consistent 2603 * data. In any case, it is not possible to synchronize 2604 * with dynamic kernel state, and any view we return can 2605 * never be anything more than a snapshot at a point in time. 2606 */ 2607 if (vtor) { 2608 switch (vtor(dv)) { 2609 case SDEV_VTOR_VALID: 2610 break; 2611 case SDEV_VTOR_INVALID: 2612 case SDEV_VTOR_SKIP: 2613 continue; 2614 case SDEV_VTOR_STALE: 2615 sdcmn_err3(("sdev_readir: %s stale\n", 2616 dv->sdev_name)); 2617 break; 2618 default: 2619 cmn_err(CE_PANIC, 2620 "dev fs: validator failed: %s(%p)\n", 2621 dv->sdev_name, (void *)dv); 2622 break; 2623 /*NOTREACHED*/ 2624 } 2625 } 2626 2627 namelen = strlen(dv->sdev_name); 2628 reclen = DIRENT64_RECLEN(namelen); 2629 if (outcount + reclen > alloc_count) { 2630 goto full; 2631 } 2632 dp->d_reclen = (ushort_t)reclen; 2633 dp->d_ino = (ino64_t)dv->sdev_ino; 2634 dp->d_off = (off64_t)diroff + 1; 2635 (void) strncpy(dp->d_name, dv->sdev_name, 2636 DIRENT64_NAMELEN(reclen)); 2637 outcount += reclen; 2638 dp = nextdp(dp); 2639 } 2640 2641 full: 2642 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2643 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2644 (void *)dv)); 2645 2646 if (outcount) 2647 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2648 2649 if (!error) { 2650 uiop->uio_loffset = diroff; 2651 if (eofp) 2652 *eofp = dv ? 0 : 1; 2653 } 2654 2655 2656 if (ddv->sdev_attrvp) { 2657 gethrestime(&now); 2658 attr.va_ctime = now; 2659 attr.va_atime = now; 2660 attr.va_mask = AT_CTIME|AT_ATIME; 2661 2662 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2663 } 2664 done: 2665 kmem_free(outbuf, alloc_count); 2666 return (error); 2667 } 2668 2669 static int 2670 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2671 { 2672 vnode_t *vp; 2673 vnode_t *cvp; 2674 struct sdev_node *svp; 2675 char *nm; 2676 struct pathname pn; 2677 int error; 2678 int persisted = 0; 2679 2680 ASSERT(INGLOBALZONE(curproc)); 2681 2682 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2683 return (error); 2684 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2685 2686 vp = rootdir; 2687 VN_HOLD(vp); 2688 2689 while (pn_pathleft(&pn)) { 2690 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2691 (void) pn_getcomponent(&pn, nm); 2692 2693 /* 2694 * Deal with the .. special case where we may be 2695 * traversing up across a mount point, to the 2696 * root of this filesystem or global root. 2697 */ 2698 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2699 checkforroot: 2700 if (VN_CMP(vp, rootdir)) { 2701 nm[1] = 0; 2702 } else if (vp->v_flag & VROOT) { 2703 vfs_t *vfsp; 2704 cvp = vp; 2705 vfsp = cvp->v_vfsp; 2706 vfs_rlock_wait(vfsp); 2707 vp = cvp->v_vfsp->vfs_vnodecovered; 2708 if (vp == NULL || 2709 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2710 vfs_unlock(vfsp); 2711 VN_RELE(cvp); 2712 error = EIO; 2713 break; 2714 } 2715 VN_HOLD(vp); 2716 vfs_unlock(vfsp); 2717 VN_RELE(cvp); 2718 cvp = NULL; 2719 goto checkforroot; 2720 } 2721 } 2722 2723 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2724 NULL, NULL); 2725 if (error) { 2726 VN_RELE(vp); 2727 break; 2728 } 2729 2730 /* traverse mount points encountered on our journey */ 2731 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2732 VN_RELE(vp); 2733 VN_RELE(cvp); 2734 break; 2735 } 2736 2737 /* 2738 * symbolic link, can be either relative and absolute 2739 */ 2740 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2741 struct pathname linkpath; 2742 pn_alloc(&linkpath); 2743 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2744 pn_free(&linkpath); 2745 break; 2746 } 2747 if (pn_pathleft(&linkpath) == 0) 2748 (void) pn_set(&linkpath, "."); 2749 error = pn_insert(&pn, &linkpath, strlen(nm)); 2750 pn_free(&linkpath); 2751 if (pn.pn_pathlen == 0) { 2752 VN_RELE(vp); 2753 return (ENOENT); 2754 } 2755 if (pn.pn_path[0] == '/') { 2756 pn_skipslash(&pn); 2757 VN_RELE(vp); 2758 VN_RELE(cvp); 2759 vp = rootdir; 2760 VN_HOLD(vp); 2761 } else { 2762 VN_RELE(cvp); 2763 } 2764 continue; 2765 } 2766 2767 VN_RELE(vp); 2768 2769 /* 2770 * Direct the operation to the persisting filesystem 2771 * underlying /dev. Bail if we encounter a 2772 * non-persistent dev entity here. 2773 */ 2774 if (cvp->v_vfsp->vfs_fstype == devtype) { 2775 2776 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2777 error = ENOENT; 2778 VN_RELE(cvp); 2779 break; 2780 } 2781 2782 if (VTOSDEV(cvp) == NULL) { 2783 error = ENOENT; 2784 VN_RELE(cvp); 2785 break; 2786 } 2787 svp = VTOSDEV(cvp); 2788 if ((vp = svp->sdev_attrvp) == NULL) { 2789 error = ENOENT; 2790 VN_RELE(cvp); 2791 break; 2792 } 2793 persisted = 1; 2794 VN_HOLD(vp); 2795 VN_RELE(cvp); 2796 cvp = vp; 2797 } 2798 2799 vp = cvp; 2800 pn_skipslash(&pn); 2801 } 2802 2803 kmem_free(nm, MAXNAMELEN); 2804 pn_free(&pn); 2805 2806 if (error) 2807 return (error); 2808 2809 /* 2810 * Only return persisted nodes in the filesystem underlying /dev. 2811 */ 2812 if (!persisted) { 2813 VN_RELE(vp); 2814 return (ENOENT); 2815 } 2816 2817 *r_vp = vp; 2818 return (0); 2819 } 2820 2821 int 2822 sdev_modctl_readdir(const char *dir, char ***dirlistp, 2823 int *npathsp, int *npathsp_alloc, int checking_empty) 2824 { 2825 char **pathlist = NULL; 2826 char **newlist = NULL; 2827 int npaths = 0; 2828 int npaths_alloc = 0; 2829 dirent64_t *dbuf = NULL; 2830 int n; 2831 char *s; 2832 int error; 2833 vnode_t *vp; 2834 int eof; 2835 struct iovec iov; 2836 struct uio uio; 2837 struct dirent64 *dp; 2838 size_t dlen; 2839 size_t dbuflen; 2840 int ndirents = 64; 2841 char *nm; 2842 2843 error = sdev_modctl_lookup(dir, &vp); 2844 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2845 dir, curproc->p_user.u_comm, 2846 (error == 0) ? "ok" : "failed")); 2847 if (error) 2848 return (error); 2849 2850 dlen = ndirents * (sizeof (*dbuf)); 2851 dbuf = kmem_alloc(dlen, KM_SLEEP); 2852 2853 uio.uio_iov = &iov; 2854 uio.uio_iovcnt = 1; 2855 uio.uio_segflg = UIO_SYSSPACE; 2856 uio.uio_fmode = 0; 2857 uio.uio_extflg = UIO_COPY_CACHED; 2858 uio.uio_loffset = 0; 2859 uio.uio_llimit = MAXOFFSET_T; 2860 2861 eof = 0; 2862 error = 0; 2863 while (!error && !eof) { 2864 uio.uio_resid = dlen; 2865 iov.iov_base = (char *)dbuf; 2866 iov.iov_len = dlen; 2867 2868 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2869 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2870 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2871 2872 dbuflen = dlen - uio.uio_resid; 2873 2874 if (error || dbuflen == 0) 2875 break; 2876 2877 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2878 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2879 2880 nm = dp->d_name; 2881 2882 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2883 continue; 2884 if (npaths == npaths_alloc) { 2885 npaths_alloc += 64; 2886 newlist = (char **) 2887 kmem_zalloc((npaths_alloc + 1) * 2888 sizeof (char *), KM_SLEEP); 2889 if (pathlist) { 2890 bcopy(pathlist, newlist, 2891 npaths * sizeof (char *)); 2892 kmem_free(pathlist, 2893 (npaths + 1) * sizeof (char *)); 2894 } 2895 pathlist = newlist; 2896 } 2897 n = strlen(nm) + 1; 2898 s = kmem_alloc(n, KM_SLEEP); 2899 bcopy(nm, s, n); 2900 pathlist[npaths++] = s; 2901 sdcmn_err11((" %s/%s\n", dir, s)); 2902 2903 /* if checking empty, one entry is as good as many */ 2904 if (checking_empty) { 2905 eof = 1; 2906 break; 2907 } 2908 } 2909 } 2910 2911 exit: 2912 VN_RELE(vp); 2913 2914 if (dbuf) 2915 kmem_free(dbuf, dlen); 2916 2917 if (error) 2918 return (error); 2919 2920 *dirlistp = pathlist; 2921 *npathsp = npaths; 2922 *npathsp_alloc = npaths_alloc; 2923 2924 return (0); 2925 } 2926 2927 void 2928 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2929 { 2930 int i, n; 2931 2932 for (i = 0; i < npaths; i++) { 2933 n = strlen(pathlist[i]) + 1; 2934 kmem_free(pathlist[i], n); 2935 } 2936 2937 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2938 } 2939 2940 int 2941 sdev_modctl_devexists(const char *path) 2942 { 2943 vnode_t *vp; 2944 int error; 2945 2946 error = sdev_modctl_lookup(path, &vp); 2947 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2948 path, curproc->p_user.u_comm, 2949 (error == 0) ? "ok" : "failed")); 2950 if (error == 0) 2951 VN_RELE(vp); 2952 2953 return (error); 2954 } 2955 2956 extern int sdev_vnodeops_tbl_size; 2957 2958 /* 2959 * construct a new template with overrides from vtab 2960 */ 2961 static fs_operation_def_t * 2962 sdev_merge_vtab(const fs_operation_def_t tab[]) 2963 { 2964 fs_operation_def_t *new; 2965 const fs_operation_def_t *tab_entry; 2966 2967 /* make a copy of standard vnode ops table */ 2968 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 2969 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 2970 2971 /* replace the overrides from tab */ 2972 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 2973 fs_operation_def_t *std_entry = new; 2974 while (std_entry->name) { 2975 if (strcmp(tab_entry->name, std_entry->name) == 0) { 2976 std_entry->func = tab_entry->func; 2977 break; 2978 } 2979 std_entry++; 2980 } 2981 if (std_entry->name == NULL) 2982 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 2983 tab_entry->name); 2984 } 2985 2986 return (new); 2987 } 2988 2989 /* free memory allocated by sdev_merge_vtab */ 2990 static void 2991 sdev_free_vtab(fs_operation_def_t *new) 2992 { 2993 kmem_free(new, sdev_vnodeops_tbl_size); 2994 } 2995 2996 /* 2997 * a generic setattr() function 2998 * 2999 * note: flags only supports AT_UID and AT_GID. 3000 * Future enhancements can be done for other types, e.g. AT_MODE 3001 */ 3002 int 3003 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 3004 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 3005 int), int protocol) 3006 { 3007 struct sdev_node *dv = VTOSDEV(vp); 3008 struct sdev_node *parent = dv->sdev_dotdot; 3009 struct vattr *get; 3010 uint_t mask = vap->va_mask; 3011 int error; 3012 3013 /* some sanity checks */ 3014 if (vap->va_mask & AT_NOSET) 3015 return (EINVAL); 3016 3017 if (vap->va_mask & AT_SIZE) { 3018 if (vp->v_type == VDIR) { 3019 return (EISDIR); 3020 } 3021 } 3022 3023 /* no need to set attribute, but do not fail either */ 3024 ASSERT(parent); 3025 rw_enter(&parent->sdev_contents, RW_READER); 3026 if (dv->sdev_state == SDEV_ZOMBIE) { 3027 rw_exit(&parent->sdev_contents); 3028 return (0); 3029 } 3030 3031 /* If backing store exists, just set it. */ 3032 if (dv->sdev_attrvp) { 3033 rw_exit(&parent->sdev_contents); 3034 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3035 } 3036 3037 /* 3038 * Otherwise, for nodes with the persistence attribute, create it. 3039 */ 3040 ASSERT(dv->sdev_attr); 3041 if (SDEV_IS_PERSIST(dv) || 3042 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3043 sdev_vattr_merge(dv, vap); 3044 rw_enter(&dv->sdev_contents, RW_WRITER); 3045 error = sdev_shadow_node(dv, cred); 3046 rw_exit(&dv->sdev_contents); 3047 rw_exit(&parent->sdev_contents); 3048 3049 if (error) 3050 return (error); 3051 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3052 } 3053 3054 3055 /* 3056 * sdev_attr was allocated in sdev_mknode 3057 */ 3058 rw_enter(&dv->sdev_contents, RW_WRITER); 3059 error = secpolicy_vnode_setattr(cred, vp, vap, 3060 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3061 if (error) { 3062 rw_exit(&dv->sdev_contents); 3063 rw_exit(&parent->sdev_contents); 3064 return (error); 3065 } 3066 3067 get = dv->sdev_attr; 3068 if (mask & AT_MODE) { 3069 get->va_mode &= S_IFMT; 3070 get->va_mode |= vap->va_mode & ~S_IFMT; 3071 } 3072 3073 if ((mask & AT_UID) || (mask & AT_GID)) { 3074 if (mask & AT_UID) 3075 get->va_uid = vap->va_uid; 3076 if (mask & AT_GID) 3077 get->va_gid = vap->va_gid; 3078 /* 3079 * a callback must be provided if the protocol is set 3080 */ 3081 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3082 ASSERT(callback); 3083 error = callback(dv, get, protocol); 3084 if (error) { 3085 rw_exit(&dv->sdev_contents); 3086 rw_exit(&parent->sdev_contents); 3087 return (error); 3088 } 3089 } 3090 } 3091 3092 if (mask & AT_ATIME) 3093 get->va_atime = vap->va_atime; 3094 if (mask & AT_MTIME) 3095 get->va_mtime = vap->va_mtime; 3096 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3097 gethrestime(&get->va_ctime); 3098 } 3099 3100 sdev_vattr_merge(dv, get); 3101 rw_exit(&dv->sdev_contents); 3102 rw_exit(&parent->sdev_contents); 3103 return (0); 3104 } 3105 3106 /* 3107 * a generic inactive() function 3108 */ 3109 /*ARGSUSED*/ 3110 void 3111 devname_inactive_func(struct vnode *vp, struct cred *cred, 3112 void (*callback)(struct vnode *)) 3113 { 3114 int clean; 3115 struct sdev_node *dv = VTOSDEV(vp); 3116 struct sdev_node *ddv = dv->sdev_dotdot; 3117 int state; 3118 3119 rw_enter(&ddv->sdev_contents, RW_WRITER); 3120 state = dv->sdev_state; 3121 3122 mutex_enter(&vp->v_lock); 3123 ASSERT(vp->v_count >= 1); 3124 3125 if (vp->v_count == 1 && callback != NULL) 3126 callback(vp); 3127 3128 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3129 3130 /* 3131 * last ref count on the ZOMBIE node is released. 3132 * clean up the sdev_node, and 3133 * release the hold on the backing store node so that 3134 * the ZOMBIE backing stores also cleaned out. 3135 */ 3136 if (clean) { 3137 ASSERT(ddv); 3138 3139 ddv->sdev_nlink--; 3140 if (vp->v_type == VDIR) { 3141 dv->sdev_nlink--; 3142 } 3143 if ((dv->sdev_flags & SDEV_STALE) == 0) 3144 avl_remove(&ddv->sdev_entries, dv); 3145 dv->sdev_nlink--; 3146 --vp->v_count; 3147 mutex_exit(&vp->v_lock); 3148 sdev_nodedestroy(dv, 0); 3149 } else { 3150 --vp->v_count; 3151 mutex_exit(&vp->v_lock); 3152 } 3153 rw_exit(&ddv->sdev_contents); 3154 } 3155