1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * utility routines for the /dev fs 27 */ 28 29 #include <sys/types.h> 30 #include <sys/param.h> 31 #include <sys/t_lock.h> 32 #include <sys/systm.h> 33 #include <sys/sysmacros.h> 34 #include <sys/user.h> 35 #include <sys/time.h> 36 #include <sys/vfs.h> 37 #include <sys/vnode.h> 38 #include <sys/file.h> 39 #include <sys/fcntl.h> 40 #include <sys/flock.h> 41 #include <sys/kmem.h> 42 #include <sys/uio.h> 43 #include <sys/errno.h> 44 #include <sys/stat.h> 45 #include <sys/cred.h> 46 #include <sys/dirent.h> 47 #include <sys/pathname.h> 48 #include <sys/cmn_err.h> 49 #include <sys/debug.h> 50 #include <sys/mode.h> 51 #include <sys/policy.h> 52 #include <fs/fs_subr.h> 53 #include <sys/mount.h> 54 #include <sys/fs/snode.h> 55 #include <sys/fs/dv_node.h> 56 #include <sys/fs/sdev_impl.h> 57 #include <sys/sunndi.h> 58 #include <sys/sunmdi.h> 59 #include <sys/conf.h> 60 #include <sys/proc.h> 61 #include <sys/user.h> 62 #include <sys/modctl.h> 63 64 #ifdef DEBUG 65 int sdev_debug = 0x00000001; 66 int sdev_debug_cache_flags = 0; 67 #endif 68 69 /* 70 * globals 71 */ 72 /* prototype memory vattrs */ 73 vattr_t sdev_vattr_dir = { 74 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 75 VDIR, /* va_type */ 76 SDEV_DIRMODE_DEFAULT, /* va_mode */ 77 SDEV_UID_DEFAULT, /* va_uid */ 78 SDEV_GID_DEFAULT, /* va_gid */ 79 0, /* va_fsid */ 80 0, /* va_nodeid */ 81 0, /* va_nlink */ 82 0, /* va_size */ 83 0, /* va_atime */ 84 0, /* va_mtime */ 85 0, /* va_ctime */ 86 0, /* va_rdev */ 87 0, /* va_blksize */ 88 0, /* va_nblocks */ 89 0 /* va_vcode */ 90 }; 91 92 vattr_t sdev_vattr_lnk = { 93 AT_TYPE|AT_MODE, /* va_mask */ 94 VLNK, /* va_type */ 95 SDEV_LNKMODE_DEFAULT, /* va_mode */ 96 SDEV_UID_DEFAULT, /* va_uid */ 97 SDEV_GID_DEFAULT, /* va_gid */ 98 0, /* va_fsid */ 99 0, /* va_nodeid */ 100 0, /* va_nlink */ 101 0, /* va_size */ 102 0, /* va_atime */ 103 0, /* va_mtime */ 104 0, /* va_ctime */ 105 0, /* va_rdev */ 106 0, /* va_blksize */ 107 0, /* va_nblocks */ 108 0 /* va_vcode */ 109 }; 110 111 vattr_t sdev_vattr_blk = { 112 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 113 VBLK, /* va_type */ 114 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 115 SDEV_UID_DEFAULT, /* va_uid */ 116 SDEV_GID_DEFAULT, /* va_gid */ 117 0, /* va_fsid */ 118 0, /* va_nodeid */ 119 0, /* va_nlink */ 120 0, /* va_size */ 121 0, /* va_atime */ 122 0, /* va_mtime */ 123 0, /* va_ctime */ 124 0, /* va_rdev */ 125 0, /* va_blksize */ 126 0, /* va_nblocks */ 127 0 /* va_vcode */ 128 }; 129 130 vattr_t sdev_vattr_chr = { 131 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 132 VCHR, /* va_type */ 133 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 134 SDEV_UID_DEFAULT, /* va_uid */ 135 SDEV_GID_DEFAULT, /* va_gid */ 136 0, /* va_fsid */ 137 0, /* va_nodeid */ 138 0, /* va_nlink */ 139 0, /* va_size */ 140 0, /* va_atime */ 141 0, /* va_mtime */ 142 0, /* va_ctime */ 143 0, /* va_rdev */ 144 0, /* va_blksize */ 145 0, /* va_nblocks */ 146 0 /* va_vcode */ 147 }; 148 149 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 150 int devtype; /* fstype */ 151 152 /* static */ 153 static struct vnodeops *sdev_get_vop(struct sdev_node *); 154 static void sdev_set_no_negcache(struct sdev_node *); 155 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 156 static void sdev_free_vtab(fs_operation_def_t *); 157 158 static void 159 sdev_prof_free(struct sdev_node *dv) 160 { 161 ASSERT(!SDEV_IS_GLOBAL(dv)); 162 if (dv->sdev_prof.dev_name) 163 nvlist_free(dv->sdev_prof.dev_name); 164 if (dv->sdev_prof.dev_map) 165 nvlist_free(dv->sdev_prof.dev_map); 166 if (dv->sdev_prof.dev_symlink) 167 nvlist_free(dv->sdev_prof.dev_symlink); 168 if (dv->sdev_prof.dev_glob_incdir) 169 nvlist_free(dv->sdev_prof.dev_glob_incdir); 170 if (dv->sdev_prof.dev_glob_excdir) 171 nvlist_free(dv->sdev_prof.dev_glob_excdir); 172 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 173 } 174 175 /* sdev_node cache constructor */ 176 /*ARGSUSED1*/ 177 static int 178 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 179 { 180 struct sdev_node *dv = (struct sdev_node *)buf; 181 struct vnode *vp; 182 183 bzero(buf, sizeof (struct sdev_node)); 184 vp = dv->sdev_vnode = vn_alloc(flag); 185 if (vp == NULL) { 186 return (-1); 187 } 188 vp->v_data = dv; 189 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 190 return (0); 191 } 192 193 /* sdev_node cache destructor */ 194 /*ARGSUSED1*/ 195 static void 196 i_sdev_node_dtor(void *buf, void *arg) 197 { 198 struct sdev_node *dv = (struct sdev_node *)buf; 199 struct vnode *vp = SDEVTOV(dv); 200 201 rw_destroy(&dv->sdev_contents); 202 vn_free(vp); 203 } 204 205 /* initialize sdev_node cache */ 206 void 207 sdev_node_cache_init() 208 { 209 int flags = 0; 210 211 #ifdef DEBUG 212 flags = sdev_debug_cache_flags; 213 if (flags) 214 sdcmn_err(("cache debug flags 0x%x\n", flags)); 215 #endif /* DEBUG */ 216 217 ASSERT(sdev_node_cache == NULL); 218 sdev_node_cache = kmem_cache_create("sdev_node_cache", 219 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 220 NULL, NULL, NULL, flags); 221 } 222 223 /* destroy sdev_node cache */ 224 void 225 sdev_node_cache_fini() 226 { 227 ASSERT(sdev_node_cache != NULL); 228 kmem_cache_destroy(sdev_node_cache); 229 sdev_node_cache = NULL; 230 } 231 232 /* 233 * Compare two nodes lexographically to balance avl tree 234 */ 235 static int 236 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 237 { 238 int rv; 239 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 240 return (0); 241 return ((rv < 0) ? -1 : 1); 242 } 243 244 void 245 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 246 { 247 ASSERT(dv); 248 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 249 dv->sdev_state = state; 250 } 251 252 static void 253 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 254 { 255 timestruc_t now; 256 struct vattr *attrp; 257 uint_t mask; 258 259 ASSERT(dv->sdev_attr); 260 ASSERT(vap); 261 262 attrp = dv->sdev_attr; 263 mask = vap->va_mask; 264 if (mask & AT_TYPE) 265 attrp->va_type = vap->va_type; 266 if (mask & AT_MODE) 267 attrp->va_mode = vap->va_mode; 268 if (mask & AT_UID) 269 attrp->va_uid = vap->va_uid; 270 if (mask & AT_GID) 271 attrp->va_gid = vap->va_gid; 272 if (mask & AT_RDEV) 273 attrp->va_rdev = vap->va_rdev; 274 275 gethrestime(&now); 276 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 277 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 278 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 279 } 280 281 static void 282 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 283 { 284 ASSERT(dv->sdev_attr == NULL); 285 ASSERT(vap->va_mask & AT_TYPE); 286 ASSERT(vap->va_mask & AT_MODE); 287 288 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 289 sdev_attr_update(dv, vap); 290 } 291 292 /* alloc and initialize a sdev_node */ 293 int 294 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 295 vattr_t *vap) 296 { 297 struct sdev_node *dv = NULL; 298 struct vnode *vp; 299 size_t nmlen, len; 300 devname_handle_t *dhl; 301 302 nmlen = strlen(nm) + 1; 303 if (nmlen > MAXNAMELEN) { 304 sdcmn_err9(("sdev_nodeinit: node name %s" 305 " too long\n", nm)); 306 *newdv = NULL; 307 return (ENAMETOOLONG); 308 } 309 310 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 311 312 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 313 bcopy(nm, dv->sdev_name, nmlen); 314 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 315 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 316 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 317 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 318 /* overwritten for VLNK nodes */ 319 dv->sdev_symlink = NULL; 320 321 vp = SDEVTOV(dv); 322 vn_reinit(vp); 323 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 324 if (vap) 325 vp->v_type = vap->va_type; 326 327 /* 328 * initialized to the parent's vnodeops. 329 * maybe overwriten for a VDIR 330 */ 331 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 332 vn_exists(vp); 333 334 dv->sdev_dotdot = NULL; 335 dv->sdev_attrvp = NULL; 336 if (vap) { 337 sdev_attr_alloc(dv, vap); 338 } else { 339 dv->sdev_attr = NULL; 340 } 341 342 dv->sdev_ino = sdev_mkino(dv); 343 dv->sdev_nlink = 0; /* updated on insert */ 344 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 345 dv->sdev_flags |= SDEV_BUILD; 346 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 347 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 348 if (SDEV_IS_GLOBAL(ddv)) { 349 dv->sdev_flags |= SDEV_GLOBAL; 350 dhl = &(dv->sdev_handle); 351 dhl->dh_data = dv; 352 dhl->dh_args = NULL; 353 sdev_set_no_negcache(dv); 354 dv->sdev_gdir_gen = 0; 355 } else { 356 dv->sdev_flags &= ~SDEV_GLOBAL; 357 dv->sdev_origin = NULL; /* set later */ 358 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 359 dv->sdev_ldir_gen = 0; 360 dv->sdev_devtree_gen = 0; 361 } 362 363 rw_enter(&dv->sdev_contents, RW_WRITER); 364 sdev_set_nodestate(dv, SDEV_INIT); 365 rw_exit(&dv->sdev_contents); 366 *newdv = dv; 367 368 return (0); 369 } 370 371 /* 372 * transition a sdev_node into SDEV_READY state 373 */ 374 int 375 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 376 void *args, struct cred *cred) 377 { 378 int error = 0; 379 struct vnode *vp = SDEVTOV(dv); 380 vtype_t type; 381 382 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 383 384 type = vap->va_type; 385 vp->v_type = type; 386 vp->v_rdev = vap->va_rdev; 387 rw_enter(&dv->sdev_contents, RW_WRITER); 388 if (type == VDIR) { 389 dv->sdev_nlink = 2; 390 dv->sdev_flags &= ~SDEV_PERSIST; 391 dv->sdev_flags &= ~SDEV_DYNAMIC; 392 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 393 ASSERT(dv->sdev_dotdot); 394 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 395 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 396 avl_create(&dv->sdev_entries, 397 (int (*)(const void *, const void *))sdev_compare_nodes, 398 sizeof (struct sdev_node), 399 offsetof(struct sdev_node, sdev_avllink)); 400 } else if (type == VLNK) { 401 ASSERT(args); 402 dv->sdev_nlink = 1; 403 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 404 } else { 405 dv->sdev_nlink = 1; 406 } 407 408 if (!(SDEV_IS_GLOBAL(dv))) { 409 dv->sdev_origin = (struct sdev_node *)args; 410 dv->sdev_flags &= ~SDEV_PERSIST; 411 } 412 413 /* 414 * shadow node is created here OR 415 * if failed (indicated by dv->sdev_attrvp == NULL), 416 * created later in sdev_setattr 417 */ 418 if (avp) { 419 dv->sdev_attrvp = avp; 420 } else { 421 if (dv->sdev_attr == NULL) { 422 sdev_attr_alloc(dv, vap); 423 } else { 424 sdev_attr_update(dv, vap); 425 } 426 427 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 428 error = sdev_shadow_node(dv, cred); 429 } 430 431 if (error == 0) { 432 /* transition to READY state */ 433 sdev_set_nodestate(dv, SDEV_READY); 434 sdev_nc_node_exists(dv); 435 } else { 436 sdev_set_nodestate(dv, SDEV_ZOMBIE); 437 } 438 rw_exit(&dv->sdev_contents); 439 return (error); 440 } 441 442 /* 443 * setting ZOMBIE state 444 */ 445 static int 446 sdev_nodezombied(struct sdev_node *dv) 447 { 448 rw_enter(&dv->sdev_contents, RW_WRITER); 449 sdev_set_nodestate(dv, SDEV_ZOMBIE); 450 rw_exit(&dv->sdev_contents); 451 return (0); 452 } 453 454 /* 455 * Build the VROOT sdev_node. 456 */ 457 /*ARGSUSED*/ 458 struct sdev_node * 459 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 460 struct vnode *avp, struct cred *cred) 461 { 462 struct sdev_node *dv; 463 struct vnode *vp; 464 char devdir[] = "/dev"; 465 466 ASSERT(sdev_node_cache != NULL); 467 ASSERT(avp); 468 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 469 vp = SDEVTOV(dv); 470 vn_reinit(vp); 471 vp->v_flag |= VROOT; 472 vp->v_vfsp = vfsp; 473 vp->v_type = VDIR; 474 vp->v_rdev = devdev; 475 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 476 vn_exists(vp); 477 478 if (vfsp->vfs_mntpt) 479 dv->sdev_name = i_ddi_strdup( 480 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 481 else 482 /* vfs_mountdev1 set mount point later */ 483 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 484 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 485 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 486 dv->sdev_ino = SDEV_ROOTINO; 487 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 488 dv->sdev_dotdot = dv; /* .. == self */ 489 dv->sdev_attrvp = avp; 490 dv->sdev_attr = NULL; 491 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 492 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 493 if (strcmp(dv->sdev_name, "/dev") == 0) { 494 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 495 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 496 dv->sdev_gdir_gen = 0; 497 } else { 498 dv->sdev_flags = SDEV_BUILD; 499 dv->sdev_flags &= ~SDEV_PERSIST; 500 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 501 dv->sdev_ldir_gen = 0; 502 dv->sdev_devtree_gen = 0; 503 } 504 505 avl_create(&dv->sdev_entries, 506 (int (*)(const void *, const void *))sdev_compare_nodes, 507 sizeof (struct sdev_node), 508 offsetof(struct sdev_node, sdev_avllink)); 509 510 rw_enter(&dv->sdev_contents, RW_WRITER); 511 sdev_set_nodestate(dv, SDEV_READY); 512 rw_exit(&dv->sdev_contents); 513 sdev_nc_node_exists(dv); 514 return (dv); 515 } 516 517 /* directory dependent vop table */ 518 struct sdev_vop_table { 519 char *vt_name; /* subdirectory name */ 520 const fs_operation_def_t *vt_service; /* vnodeops table */ 521 struct vnodeops *vt_vops; /* constructed vop */ 522 struct vnodeops **vt_global_vops; /* global container for vop */ 523 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 524 int vt_flags; 525 }; 526 527 /* 528 * A nice improvement would be to provide a plug-in mechanism 529 * for this table instead of a const table. 530 */ 531 static struct sdev_vop_table vtab[] = 532 { 533 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 534 SDEV_DYNAMIC | SDEV_VTOR }, 535 536 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 537 SDEV_DYNAMIC | SDEV_VTOR }, 538 539 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, 540 devzvol_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 541 542 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 543 544 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 545 SDEV_DYNAMIC | SDEV_VTOR }, 546 547 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 548 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 549 550 { "lofi", NULL, NULL, NULL, NULL, SDEV_ZONED }, 551 { "rlofi", NULL, NULL, NULL, NULL, SDEV_ZONED }, 552 553 { NULL, NULL, NULL, NULL, NULL, 0} 554 }; 555 556 struct sdev_vop_table * 557 sdev_match(struct sdev_node *dv) 558 { 559 int vlen; 560 int i; 561 562 for (i = 0; vtab[i].vt_name; i++) { 563 if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0) 564 return (&vtab[i]); 565 if (vtab[i].vt_flags & SDEV_SUBDIR) { 566 char *ptr; 567 568 ASSERT(strlen(dv->sdev_path) > 5); 569 ptr = dv->sdev_path + 5; 570 vlen = strlen(vtab[i].vt_name); 571 if ((strncmp(vtab[i].vt_name, ptr, 572 vlen - 1) == 0) && ptr[vlen] == '/') 573 return (&vtab[i]); 574 } 575 576 } 577 return (NULL); 578 } 579 580 /* 581 * sets a directory's vnodeops if the directory is in the vtab; 582 */ 583 static struct vnodeops * 584 sdev_get_vop(struct sdev_node *dv) 585 { 586 struct sdev_vop_table *vtp; 587 char *path; 588 589 path = dv->sdev_path; 590 ASSERT(path); 591 592 /* gets the relative path to /dev/ */ 593 path += 5; 594 595 /* gets the vtab entry it matches */ 596 if ((vtp = sdev_match(dv)) != NULL) { 597 dv->sdev_flags |= vtp->vt_flags; 598 599 if (vtp->vt_vops) { 600 if (vtp->vt_global_vops) 601 *(vtp->vt_global_vops) = vtp->vt_vops; 602 return (vtp->vt_vops); 603 } 604 605 if (vtp->vt_service) { 606 fs_operation_def_t *templ; 607 templ = sdev_merge_vtab(vtp->vt_service); 608 if (vn_make_ops(vtp->vt_name, 609 (const fs_operation_def_t *)templ, 610 &vtp->vt_vops) != 0) { 611 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 612 vtp->vt_name); 613 /*NOTREACHED*/ 614 } 615 if (vtp->vt_global_vops) { 616 *(vtp->vt_global_vops) = vtp->vt_vops; 617 } 618 sdev_free_vtab(templ); 619 return (vtp->vt_vops); 620 } 621 return (sdev_vnodeops); 622 } 623 624 /* child inherits the persistence of the parent */ 625 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 626 dv->sdev_flags |= SDEV_PERSIST; 627 628 return (sdev_vnodeops); 629 } 630 631 static void 632 sdev_set_no_negcache(struct sdev_node *dv) 633 { 634 int i; 635 char *path; 636 637 ASSERT(dv->sdev_path); 638 path = dv->sdev_path + strlen("/dev/"); 639 640 for (i = 0; vtab[i].vt_name; i++) { 641 if (strcmp(vtab[i].vt_name, path) == 0) { 642 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 643 dv->sdev_flags |= SDEV_NO_NCACHE; 644 break; 645 } 646 } 647 } 648 649 void * 650 sdev_get_vtor(struct sdev_node *dv) 651 { 652 struct sdev_vop_table *vtp; 653 654 vtp = sdev_match(dv); 655 if (vtp) 656 return ((void *)vtp->vt_vtor); 657 else 658 return (NULL); 659 } 660 661 /* 662 * Build the base root inode 663 */ 664 ino_t 665 sdev_mkino(struct sdev_node *dv) 666 { 667 ino_t ino; 668 669 /* 670 * for now, follow the lead of tmpfs here 671 * need to someday understand the requirements here 672 */ 673 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 674 ino += SDEV_ROOTINO + 1; 675 676 return (ino); 677 } 678 679 int 680 sdev_getlink(struct vnode *linkvp, char **link) 681 { 682 int err; 683 char *buf; 684 struct uio uio = {0}; 685 struct iovec iov = {0}; 686 687 if (linkvp == NULL) 688 return (ENOENT); 689 ASSERT(linkvp->v_type == VLNK); 690 691 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 692 iov.iov_base = buf; 693 iov.iov_len = MAXPATHLEN; 694 uio.uio_iov = &iov; 695 uio.uio_iovcnt = 1; 696 uio.uio_resid = MAXPATHLEN; 697 uio.uio_segflg = UIO_SYSSPACE; 698 uio.uio_llimit = MAXOFFSET_T; 699 700 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 701 if (err) { 702 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 703 kmem_free(buf, MAXPATHLEN); 704 return (ENOENT); 705 } 706 707 /* mission complete */ 708 *link = i_ddi_strdup(buf, KM_SLEEP); 709 kmem_free(buf, MAXPATHLEN); 710 return (0); 711 } 712 713 /* 714 * A convenient wrapper to get the devfs node vnode for a device 715 * minor functionality: readlink() of a /dev symlink 716 * Place the link into dv->sdev_symlink 717 */ 718 static int 719 sdev_follow_link(struct sdev_node *dv) 720 { 721 int err; 722 struct vnode *linkvp; 723 char *link = NULL; 724 725 linkvp = SDEVTOV(dv); 726 if (linkvp == NULL) 727 return (ENOENT); 728 ASSERT(linkvp->v_type == VLNK); 729 err = sdev_getlink(linkvp, &link); 730 if (err) { 731 (void) sdev_nodezombied(dv); 732 dv->sdev_symlink = NULL; 733 return (ENOENT); 734 } 735 736 ASSERT(link != NULL); 737 dv->sdev_symlink = link; 738 return (0); 739 } 740 741 static int 742 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 743 { 744 vtype_t otype = SDEVTOV(dv)->v_type; 745 746 /* 747 * existing sdev_node has a different type. 748 */ 749 if (otype != nvap->va_type) { 750 sdcmn_err9(("sdev_node_check: existing node " 751 " %s type %d does not match new node type %d\n", 752 dv->sdev_name, otype, nvap->va_type)); 753 return (EEXIST); 754 } 755 756 /* 757 * For a symlink, the target should be the same. 758 */ 759 if (otype == VLNK) { 760 ASSERT(nargs != NULL); 761 ASSERT(dv->sdev_symlink != NULL); 762 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 763 sdcmn_err9(("sdev_node_check: existing node " 764 " %s has different symlink %s as new node " 765 " %s\n", dv->sdev_name, dv->sdev_symlink, 766 (char *)nargs)); 767 return (EEXIST); 768 } 769 } 770 771 return (0); 772 } 773 774 /* 775 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 776 * 777 * arguments: 778 * - ddv (parent) 779 * - nm (child name) 780 * - newdv (sdev_node for nm is returned here) 781 * - vap (vattr for the node to be created, va_type should be set. 782 * - avp (attribute vnode) 783 * the defaults should be used if unknown) 784 * - cred 785 * - args 786 * . tnm (for VLNK) 787 * . global sdev_node (for !SDEV_GLOBAL) 788 * - state: SDEV_INIT, SDEV_READY 789 * 790 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 791 * 792 * NOTE: directory contents writers lock needs to be held before 793 * calling this routine. 794 */ 795 int 796 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 797 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 798 sdev_node_state_t state) 799 { 800 int error = 0; 801 sdev_node_state_t node_state; 802 struct sdev_node *dv = NULL; 803 804 ASSERT(state != SDEV_ZOMBIE); 805 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 806 807 if (*newdv) { 808 dv = *newdv; 809 } else { 810 /* allocate and initialize a sdev_node */ 811 if (ddv->sdev_state == SDEV_ZOMBIE) { 812 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 813 ddv->sdev_path)); 814 return (ENOENT); 815 } 816 817 error = sdev_nodeinit(ddv, nm, &dv, vap); 818 if (error != 0) { 819 sdcmn_err9(("sdev_mknode: error %d," 820 " name %s can not be initialized\n", 821 error, nm)); 822 return (error); 823 } 824 ASSERT(dv); 825 826 /* insert into the directory cache */ 827 error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 828 if (error) { 829 sdcmn_err9(("sdev_mknode: node %s can not" 830 " be added into directory cache\n", nm)); 831 return (ENOENT); 832 } 833 } 834 835 ASSERT(dv); 836 node_state = dv->sdev_state; 837 ASSERT(node_state != SDEV_ZOMBIE); 838 839 if (state == SDEV_READY) { 840 switch (node_state) { 841 case SDEV_INIT: 842 error = sdev_nodeready(dv, vap, avp, args, cred); 843 if (error) { 844 sdcmn_err9(("sdev_mknode: node %s can NOT" 845 " be transitioned into READY state, " 846 "error %d\n", nm, error)); 847 } 848 break; 849 case SDEV_READY: 850 /* 851 * Do some sanity checking to make sure 852 * the existing sdev_node is what has been 853 * asked for. 854 */ 855 error = sdev_node_check(dv, vap, args); 856 break; 857 default: 858 break; 859 } 860 } 861 862 if (!error) { 863 *newdv = dv; 864 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 865 } else { 866 SDEV_SIMPLE_RELE(dv); 867 *newdv = NULL; 868 } 869 870 return (error); 871 } 872 873 /* 874 * convenient wrapper to change vp's ATIME, CTIME and MTIME 875 */ 876 void 877 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 878 { 879 struct vattr attr; 880 timestruc_t now; 881 int err; 882 883 ASSERT(vp); 884 gethrestime(&now); 885 if (mask & AT_CTIME) 886 attr.va_ctime = now; 887 if (mask & AT_MTIME) 888 attr.va_mtime = now; 889 if (mask & AT_ATIME) 890 attr.va_atime = now; 891 892 attr.va_mask = (mask & AT_TIMES); 893 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 894 if (err && (err != EROFS)) { 895 sdcmn_err(("update timestamps error %d\n", err)); 896 } 897 } 898 899 /* 900 * the backing store vnode is released here 901 */ 902 /*ARGSUSED1*/ 903 void 904 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 905 { 906 /* no references */ 907 ASSERT(dv->sdev_nlink == 0); 908 909 if (dv->sdev_attrvp != NULLVP) { 910 VN_RELE(dv->sdev_attrvp); 911 /* 912 * reset the attrvp so that no more 913 * references can be made on this already 914 * vn_rele() vnode 915 */ 916 dv->sdev_attrvp = NULLVP; 917 } 918 919 if (dv->sdev_attr != NULL) { 920 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 921 dv->sdev_attr = NULL; 922 } 923 924 if (dv->sdev_name != NULL) { 925 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 926 dv->sdev_name = NULL; 927 } 928 929 if (dv->sdev_symlink != NULL) { 930 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 931 dv->sdev_symlink = NULL; 932 } 933 934 if (dv->sdev_path) { 935 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 936 dv->sdev_path = NULL; 937 } 938 939 if (!SDEV_IS_GLOBAL(dv)) 940 sdev_prof_free(dv); 941 942 if (SDEVTOV(dv)->v_type == VDIR) { 943 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 944 avl_destroy(&dv->sdev_entries); 945 } 946 947 mutex_destroy(&dv->sdev_lookup_lock); 948 cv_destroy(&dv->sdev_lookup_cv); 949 950 /* return node to initial state as per constructor */ 951 (void) memset((void *)&dv->sdev_instance_data, 0, 952 sizeof (dv->sdev_instance_data)); 953 vn_invalid(SDEVTOV(dv)); 954 kmem_cache_free(sdev_node_cache, dv); 955 } 956 957 /* 958 * DIRECTORY CACHE lookup 959 */ 960 struct sdev_node * 961 sdev_findbyname(struct sdev_node *ddv, char *nm) 962 { 963 struct sdev_node *dv; 964 struct sdev_node dvtmp; 965 avl_index_t where; 966 967 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 968 969 dvtmp.sdev_name = nm; 970 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 971 if (dv) { 972 ASSERT(dv->sdev_dotdot == ddv); 973 ASSERT(strcmp(dv->sdev_name, nm) == 0); 974 SDEV_HOLD(dv); 975 return (dv); 976 } 977 return (NULL); 978 } 979 980 /* 981 * Inserts a new sdev_node in a parent directory 982 */ 983 void 984 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 985 { 986 avl_index_t where; 987 988 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 989 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 990 ASSERT(ddv->sdev_nlink >= 2); 991 ASSERT(dv->sdev_nlink == 0); 992 993 dv->sdev_dotdot = ddv; 994 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 995 avl_insert(&ddv->sdev_entries, dv, where); 996 ddv->sdev_nlink++; 997 } 998 999 /* 1000 * The following check is needed because while sdev_nodes are linked 1001 * in SDEV_INIT state, they have their link counts incremented only 1002 * in SDEV_READY state. 1003 */ 1004 static void 1005 decr_link(struct sdev_node *dv) 1006 { 1007 if (dv->sdev_state != SDEV_INIT) 1008 dv->sdev_nlink--; 1009 else 1010 ASSERT(dv->sdev_nlink == 0); 1011 } 1012 1013 /* 1014 * Delete an existing dv from directory cache 1015 * 1016 * In the case of a node is still held by non-zero reference count, 1017 * the node is put into ZOMBIE state. Once the reference count 1018 * reaches "0", the node is unlinked and destroyed, 1019 * in sdev_inactive(). 1020 */ 1021 static int 1022 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 1023 { 1024 struct vnode *vp; 1025 1026 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1027 1028 vp = SDEVTOV(dv); 1029 mutex_enter(&vp->v_lock); 1030 1031 /* dv is held still */ 1032 if (vp->v_count > 1) { 1033 rw_enter(&dv->sdev_contents, RW_WRITER); 1034 if (dv->sdev_state == SDEV_READY) { 1035 sdcmn_err9(( 1036 "sdev_dirdelete: node %s busy with count %d\n", 1037 dv->sdev_name, vp->v_count)); 1038 dv->sdev_state = SDEV_ZOMBIE; 1039 } 1040 rw_exit(&dv->sdev_contents); 1041 --vp->v_count; 1042 mutex_exit(&vp->v_lock); 1043 return (EBUSY); 1044 } 1045 ASSERT(vp->v_count == 1); 1046 1047 /* unlink from the memory cache */ 1048 ddv->sdev_nlink--; /* .. to above */ 1049 if (vp->v_type == VDIR) { 1050 decr_link(dv); /* . to self */ 1051 } 1052 1053 avl_remove(&ddv->sdev_entries, dv); 1054 decr_link(dv); /* name, back to zero */ 1055 vp->v_count--; 1056 mutex_exit(&vp->v_lock); 1057 1058 /* destroy the node */ 1059 sdev_nodedestroy(dv, 0); 1060 return (0); 1061 } 1062 1063 /* 1064 * check if the source is in the path of the target 1065 * 1066 * source and target are different 1067 */ 1068 /*ARGSUSED2*/ 1069 static int 1070 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1071 { 1072 int error = 0; 1073 struct sdev_node *dotdot, *dir; 1074 1075 dotdot = tdv->sdev_dotdot; 1076 ASSERT(dotdot); 1077 1078 /* fs root */ 1079 if (dotdot == tdv) { 1080 return (0); 1081 } 1082 1083 for (;;) { 1084 /* 1085 * avoid error cases like 1086 * mv a a/b 1087 * mv a a/b/c 1088 * etc. 1089 */ 1090 if (dotdot == sdv) { 1091 error = EINVAL; 1092 break; 1093 } 1094 1095 dir = dotdot; 1096 dotdot = dir->sdev_dotdot; 1097 1098 /* done checking because root is reached */ 1099 if (dir == dotdot) { 1100 break; 1101 } 1102 } 1103 return (error); 1104 } 1105 1106 int 1107 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1108 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1109 struct cred *cred) 1110 { 1111 int error = 0; 1112 struct vnode *ovp = SDEVTOV(odv); 1113 struct vnode *nvp; 1114 struct vattr vattr; 1115 int doingdir = (ovp->v_type == VDIR); 1116 char *link = NULL; 1117 int samedir = (oddv == nddv) ? 1 : 0; 1118 int bkstore = 0; 1119 struct sdev_node *idv = NULL; 1120 struct sdev_node *ndv = NULL; 1121 timestruc_t now; 1122 1123 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1124 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1125 if (error) 1126 return (error); 1127 1128 if (!samedir) 1129 rw_enter(&oddv->sdev_contents, RW_WRITER); 1130 rw_enter(&nddv->sdev_contents, RW_WRITER); 1131 1132 /* 1133 * the source may have been deleted by another thread before 1134 * we gets here. 1135 */ 1136 if (odv->sdev_state != SDEV_READY) { 1137 error = ENOENT; 1138 goto err_out; 1139 } 1140 1141 if (doingdir && (odv == nddv)) { 1142 error = EINVAL; 1143 goto err_out; 1144 } 1145 1146 /* 1147 * If renaming a directory, and the parents are different (".." must be 1148 * changed) then the source dir must not be in the dir hierarchy above 1149 * the target since it would orphan everything below the source dir. 1150 */ 1151 if (doingdir && (oddv != nddv)) { 1152 error = sdev_checkpath(odv, nddv, cred); 1153 if (error) 1154 goto err_out; 1155 } 1156 1157 /* destination existing */ 1158 if (*ndvp) { 1159 nvp = SDEVTOV(*ndvp); 1160 ASSERT(nvp); 1161 1162 /* handling renaming to itself */ 1163 if (odv == *ndvp) { 1164 error = 0; 1165 goto err_out; 1166 } 1167 1168 if (nvp->v_type == VDIR) { 1169 if (!doingdir) { 1170 error = EISDIR; 1171 goto err_out; 1172 } 1173 1174 if (vn_vfswlock(nvp)) { 1175 error = EBUSY; 1176 goto err_out; 1177 } 1178 1179 if (vn_mountedvfs(nvp) != NULL) { 1180 vn_vfsunlock(nvp); 1181 error = EBUSY; 1182 goto err_out; 1183 } 1184 1185 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1186 if ((*ndvp)->sdev_nlink > 2) { 1187 vn_vfsunlock(nvp); 1188 error = EEXIST; 1189 goto err_out; 1190 } 1191 vn_vfsunlock(nvp); 1192 1193 (void) sdev_dirdelete(nddv, *ndvp); 1194 *ndvp = NULL; 1195 ASSERT(nddv->sdev_attrvp); 1196 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1197 nddv->sdev_attrvp, cred, NULL, 0); 1198 if (error) 1199 goto err_out; 1200 } else { 1201 if (doingdir) { 1202 error = ENOTDIR; 1203 goto err_out; 1204 } 1205 1206 if (SDEV_IS_PERSIST((*ndvp))) { 1207 bkstore = 1; 1208 } 1209 1210 /* 1211 * get rid of the node from the directory cache 1212 * note, in case EBUSY is returned, the ZOMBIE 1213 * node is taken care in sdev_mknode. 1214 */ 1215 (void) sdev_dirdelete(nddv, *ndvp); 1216 *ndvp = NULL; 1217 if (bkstore) { 1218 ASSERT(nddv->sdev_attrvp); 1219 error = VOP_REMOVE(nddv->sdev_attrvp, 1220 nnm, cred, NULL, 0); 1221 if (error) 1222 goto err_out; 1223 } 1224 } 1225 } 1226 1227 /* fix the source for a symlink */ 1228 if (vattr.va_type == VLNK) { 1229 if (odv->sdev_symlink == NULL) { 1230 error = sdev_follow_link(odv); 1231 if (error) { 1232 error = ENOENT; 1233 goto err_out; 1234 } 1235 } 1236 ASSERT(odv->sdev_symlink); 1237 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1238 } 1239 1240 /* 1241 * make a fresh node from the source attrs 1242 */ 1243 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1244 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1245 NULL, (void *)link, cred, SDEV_READY); 1246 1247 if (link) 1248 kmem_free(link, strlen(link) + 1); 1249 1250 if (error) 1251 goto err_out; 1252 ASSERT(*ndvp); 1253 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1254 1255 /* move dir contents */ 1256 if (doingdir) { 1257 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1258 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1259 error = sdev_rnmnode(odv, idv, 1260 (struct sdev_node *)(*ndvp), &ndv, 1261 idv->sdev_name, cred); 1262 if (error) 1263 goto err_out; 1264 ndv = NULL; 1265 } 1266 } 1267 1268 if ((*ndvp)->sdev_attrvp) { 1269 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1270 AT_CTIME|AT_ATIME); 1271 } else { 1272 ASSERT((*ndvp)->sdev_attr); 1273 gethrestime(&now); 1274 (*ndvp)->sdev_attr->va_ctime = now; 1275 (*ndvp)->sdev_attr->va_atime = now; 1276 } 1277 1278 if (nddv->sdev_attrvp) { 1279 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1280 AT_MTIME|AT_ATIME); 1281 } else { 1282 ASSERT(nddv->sdev_attr); 1283 gethrestime(&now); 1284 nddv->sdev_attr->va_mtime = now; 1285 nddv->sdev_attr->va_atime = now; 1286 } 1287 rw_exit(&nddv->sdev_contents); 1288 if (!samedir) 1289 rw_exit(&oddv->sdev_contents); 1290 1291 SDEV_RELE(*ndvp); 1292 return (error); 1293 1294 err_out: 1295 rw_exit(&nddv->sdev_contents); 1296 if (!samedir) 1297 rw_exit(&oddv->sdev_contents); 1298 return (error); 1299 } 1300 1301 /* 1302 * Merge sdev_node specific information into an attribute structure. 1303 * 1304 * note: sdev_node is not locked here 1305 */ 1306 void 1307 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1308 { 1309 struct vnode *vp = SDEVTOV(dv); 1310 1311 vap->va_nlink = dv->sdev_nlink; 1312 vap->va_nodeid = dv->sdev_ino; 1313 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1314 vap->va_type = vp->v_type; 1315 1316 if (vp->v_type == VDIR) { 1317 vap->va_rdev = 0; 1318 vap->va_fsid = vp->v_rdev; 1319 } else if (vp->v_type == VLNK) { 1320 vap->va_rdev = 0; 1321 vap->va_mode &= ~S_IFMT; 1322 vap->va_mode |= S_IFLNK; 1323 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1324 vap->va_rdev = vp->v_rdev; 1325 vap->va_mode &= ~S_IFMT; 1326 if (vap->va_type == VCHR) 1327 vap->va_mode |= S_IFCHR; 1328 else 1329 vap->va_mode |= S_IFBLK; 1330 } else { 1331 vap->va_rdev = 0; 1332 } 1333 } 1334 1335 struct vattr * 1336 sdev_getdefault_attr(enum vtype type) 1337 { 1338 if (type == VDIR) 1339 return (&sdev_vattr_dir); 1340 else if (type == VCHR) 1341 return (&sdev_vattr_chr); 1342 else if (type == VBLK) 1343 return (&sdev_vattr_blk); 1344 else if (type == VLNK) 1345 return (&sdev_vattr_lnk); 1346 else 1347 return (NULL); 1348 } 1349 int 1350 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1351 { 1352 int rv = 0; 1353 struct vnode *vp = SDEVTOV(dv); 1354 1355 switch (vp->v_type) { 1356 case VCHR: 1357 case VBLK: 1358 /* 1359 * If vnode is a device, return special vnode instead 1360 * (though it knows all about -us- via sp->s_realvp) 1361 */ 1362 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1363 VN_RELE(vp); 1364 if (*vpp == NULLVP) 1365 rv = ENOSYS; 1366 break; 1367 default: /* most types are returned as is */ 1368 *vpp = vp; 1369 break; 1370 } 1371 return (rv); 1372 } 1373 1374 /* 1375 * junction between devname and root file system, e.g. ufs 1376 */ 1377 int 1378 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1379 { 1380 struct vnode *rdvp = ddv->sdev_attrvp; 1381 int rval = 0; 1382 1383 ASSERT(rdvp); 1384 1385 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1386 NULL); 1387 return (rval); 1388 } 1389 1390 static int 1391 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1392 { 1393 struct sdev_node *dv = NULL; 1394 char *nm; 1395 struct vnode *dirvp; 1396 int error; 1397 vnode_t *vp; 1398 int eof; 1399 struct iovec iov; 1400 struct uio uio; 1401 struct dirent64 *dp; 1402 dirent64_t *dbuf; 1403 size_t dbuflen; 1404 struct vattr vattr; 1405 char *link = NULL; 1406 1407 if (ddv->sdev_attrvp == NULL) 1408 return (0); 1409 if (!(ddv->sdev_flags & SDEV_BUILD)) 1410 return (0); 1411 1412 dirvp = ddv->sdev_attrvp; 1413 VN_HOLD(dirvp); 1414 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1415 1416 uio.uio_iov = &iov; 1417 uio.uio_iovcnt = 1; 1418 uio.uio_segflg = UIO_SYSSPACE; 1419 uio.uio_fmode = 0; 1420 uio.uio_extflg = UIO_COPY_CACHED; 1421 uio.uio_loffset = 0; 1422 uio.uio_llimit = MAXOFFSET_T; 1423 1424 eof = 0; 1425 error = 0; 1426 while (!error && !eof) { 1427 uio.uio_resid = dlen; 1428 iov.iov_base = (char *)dbuf; 1429 iov.iov_len = dlen; 1430 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1431 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1432 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1433 1434 dbuflen = dlen - uio.uio_resid; 1435 if (error || dbuflen == 0) 1436 break; 1437 1438 if (!(ddv->sdev_flags & SDEV_BUILD)) 1439 break; 1440 1441 for (dp = dbuf; ((intptr_t)dp < 1442 (intptr_t)dbuf + dbuflen); 1443 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1444 nm = dp->d_name; 1445 1446 if (strcmp(nm, ".") == 0 || 1447 strcmp(nm, "..") == 0) 1448 continue; 1449 1450 vp = NULLVP; 1451 dv = sdev_cache_lookup(ddv, nm); 1452 if (dv) { 1453 if (dv->sdev_state != SDEV_ZOMBIE) { 1454 SDEV_SIMPLE_RELE(dv); 1455 } else { 1456 /* 1457 * A ZOMBIE node may not have been 1458 * cleaned up from the backing store, 1459 * bypass this entry in this case, 1460 * and clean it up from the directory 1461 * cache if this is the last call. 1462 */ 1463 (void) sdev_dirdelete(ddv, dv); 1464 } 1465 continue; 1466 } 1467 1468 /* refill the cache if not already */ 1469 error = devname_backstore_lookup(ddv, nm, &vp); 1470 if (error) 1471 continue; 1472 1473 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1474 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1475 if (error) 1476 continue; 1477 1478 if (vattr.va_type == VLNK) { 1479 error = sdev_getlink(vp, &link); 1480 if (error) { 1481 continue; 1482 } 1483 ASSERT(link != NULL); 1484 } 1485 1486 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1487 rw_exit(&ddv->sdev_contents); 1488 rw_enter(&ddv->sdev_contents, RW_WRITER); 1489 } 1490 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1491 cred, SDEV_READY); 1492 rw_downgrade(&ddv->sdev_contents); 1493 1494 if (link != NULL) { 1495 kmem_free(link, strlen(link) + 1); 1496 link = NULL; 1497 } 1498 1499 if (!error) { 1500 ASSERT(dv); 1501 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1502 SDEV_SIMPLE_RELE(dv); 1503 } 1504 vp = NULL; 1505 dv = NULL; 1506 } 1507 } 1508 1509 done: 1510 VN_RELE(dirvp); 1511 kmem_free(dbuf, dlen); 1512 1513 return (error); 1514 } 1515 1516 void 1517 sdev_filldir_dynamic(struct sdev_node *ddv) 1518 { 1519 int error; 1520 int i; 1521 struct vattr vattr; 1522 struct vattr *vap = &vattr; 1523 char *nm = NULL; 1524 struct sdev_node *dv = NULL; 1525 1526 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1527 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1528 1529 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1530 gethrestime(&vap->va_atime); 1531 vap->va_mtime = vap->va_atime; 1532 vap->va_ctime = vap->va_atime; 1533 for (i = 0; vtab[i].vt_name != NULL; i++) { 1534 nm = vtab[i].vt_name; 1535 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1536 dv = NULL; 1537 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1538 NULL, kcred, SDEV_READY); 1539 if (error) { 1540 cmn_err(CE_WARN, "%s/%s: error %d\n", 1541 ddv->sdev_name, nm, error); 1542 } else { 1543 ASSERT(dv); 1544 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1545 SDEV_SIMPLE_RELE(dv); 1546 } 1547 } 1548 } 1549 1550 /* 1551 * Creating a backing store entry based on sdev_attr. 1552 * This is called either as part of node creation in a persistent directory 1553 * or from setattr/setsecattr to persist access attributes across reboot. 1554 */ 1555 int 1556 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1557 { 1558 int error = 0; 1559 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1560 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1561 struct vattr *vap = dv->sdev_attr; 1562 char *nm = dv->sdev_name; 1563 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1564 1565 ASSERT(dv && dv->sdev_name && rdvp); 1566 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1567 1568 lookup: 1569 /* try to find it in the backing store */ 1570 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1571 NULL); 1572 if (error == 0) { 1573 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1574 VN_HOLD(rrvp); 1575 VN_RELE(*rvp); 1576 *rvp = rrvp; 1577 } 1578 1579 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1580 dv->sdev_attr = NULL; 1581 dv->sdev_attrvp = *rvp; 1582 return (0); 1583 } 1584 1585 /* let's try to persist the node */ 1586 gethrestime(&vap->va_atime); 1587 vap->va_mtime = vap->va_atime; 1588 vap->va_ctime = vap->va_atime; 1589 vap->va_mask |= AT_TYPE|AT_MODE; 1590 switch (vap->va_type) { 1591 case VDIR: 1592 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1593 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1594 (void *)(*rvp), error)); 1595 break; 1596 case VCHR: 1597 case VBLK: 1598 case VREG: 1599 case VDOOR: 1600 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1601 rvp, cred, 0, NULL, NULL); 1602 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1603 (void *)(*rvp), error)); 1604 if (!error) 1605 VN_RELE(*rvp); 1606 break; 1607 case VLNK: 1608 ASSERT(dv->sdev_symlink); 1609 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1610 NULL, 0); 1611 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1612 error)); 1613 break; 1614 default: 1615 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1616 "create\n", nm); 1617 /*NOTREACHED*/ 1618 } 1619 1620 /* go back to lookup to factor out spec node and set attrvp */ 1621 if (error == 0) 1622 goto lookup; 1623 1624 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1625 return (error); 1626 } 1627 1628 static int 1629 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1630 { 1631 int error = 0; 1632 struct sdev_node *dup = NULL; 1633 1634 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1635 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1636 sdev_direnter(ddv, *dv); 1637 } else { 1638 if (dup->sdev_state == SDEV_ZOMBIE) { 1639 error = sdev_dirdelete(ddv, dup); 1640 /* 1641 * The ZOMBIE node is still hanging 1642 * around with more than one reference counts. 1643 * Fail the new node creation so that 1644 * the directory cache won't have 1645 * duplicate entries for the same named node 1646 */ 1647 if (error == EBUSY) { 1648 SDEV_SIMPLE_RELE(*dv); 1649 sdev_nodedestroy(*dv, 0); 1650 *dv = NULL; 1651 return (error); 1652 } 1653 sdev_direnter(ddv, *dv); 1654 } else { 1655 ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); 1656 SDEV_SIMPLE_RELE(*dv); 1657 sdev_nodedestroy(*dv, 0); 1658 *dv = dup; 1659 } 1660 } 1661 1662 return (0); 1663 } 1664 1665 static int 1666 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1667 { 1668 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1669 return (sdev_dirdelete(ddv, *dv)); 1670 } 1671 1672 /* 1673 * update the in-core directory cache 1674 */ 1675 int 1676 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1677 sdev_cache_ops_t ops) 1678 { 1679 int error = 0; 1680 1681 ASSERT((SDEV_HELD(*dv))); 1682 1683 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1684 switch (ops) { 1685 case SDEV_CACHE_ADD: 1686 error = sdev_cache_add(ddv, dv, nm); 1687 break; 1688 case SDEV_CACHE_DELETE: 1689 error = sdev_cache_delete(ddv, dv); 1690 break; 1691 default: 1692 break; 1693 } 1694 1695 return (error); 1696 } 1697 1698 /* 1699 * retrieve the named entry from the directory cache 1700 */ 1701 struct sdev_node * 1702 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1703 { 1704 struct sdev_node *dv = NULL; 1705 1706 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1707 dv = sdev_findbyname(ddv, nm); 1708 1709 return (dv); 1710 } 1711 1712 /* 1713 * Implicit reconfig for nodes constructed by a link generator 1714 * Start devfsadm if needed, or if devfsadm is in progress, 1715 * prepare to block on devfsadm either completing or 1716 * constructing the desired node. As devfsadmd is global 1717 * in scope, constructing all necessary nodes, we only 1718 * need to initiate it once. 1719 */ 1720 static int 1721 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1722 { 1723 int error = 0; 1724 1725 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1726 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1727 ddv->sdev_name, nm, devfsadm_state)); 1728 mutex_enter(&dv->sdev_lookup_lock); 1729 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1730 mutex_exit(&dv->sdev_lookup_lock); 1731 error = 0; 1732 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1733 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1734 ddv->sdev_name, nm, devfsadm_state)); 1735 1736 sdev_devfsadmd_thread(ddv, dv, kcred); 1737 mutex_enter(&dv->sdev_lookup_lock); 1738 SDEV_BLOCK_OTHERS(dv, 1739 (SDEV_LOOKUP | SDEV_LGWAITING)); 1740 mutex_exit(&dv->sdev_lookup_lock); 1741 error = 0; 1742 } else { 1743 error = -1; 1744 } 1745 1746 return (error); 1747 } 1748 1749 /* 1750 * Support for specialized device naming construction mechanisms 1751 */ 1752 static int 1753 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1754 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1755 void *, char *), int flags, struct cred *cred) 1756 { 1757 int rv = 0; 1758 char *physpath = NULL; 1759 struct vattr vattr; 1760 struct vattr *vap = &vattr; 1761 struct sdev_node *dv = NULL; 1762 1763 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1764 if (flags & SDEV_VLINK) { 1765 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1766 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1767 NULL); 1768 if (rv) { 1769 kmem_free(physpath, MAXPATHLEN); 1770 return (-1); 1771 } 1772 1773 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1774 vap->va_size = strlen(physpath); 1775 gethrestime(&vap->va_atime); 1776 vap->va_mtime = vap->va_atime; 1777 vap->va_ctime = vap->va_atime; 1778 1779 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1780 (void *)physpath, cred, SDEV_READY); 1781 kmem_free(physpath, MAXPATHLEN); 1782 if (rv) 1783 return (rv); 1784 } else if (flags & SDEV_VATTR) { 1785 /* 1786 * /dev/pts 1787 * 1788 * callback is responsible to set the basic attributes, 1789 * e.g. va_type/va_uid/va_gid/ 1790 * dev_t if VCHR or VBLK/ 1791 */ 1792 ASSERT(callback); 1793 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1794 if (rv) { 1795 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1796 "callback failed \n")); 1797 return (-1); 1798 } 1799 1800 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1801 cred, SDEV_READY); 1802 1803 if (rv) 1804 return (rv); 1805 1806 } else { 1807 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1808 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1809 __LINE__)); 1810 rv = -1; 1811 } 1812 1813 *dvp = dv; 1814 return (rv); 1815 } 1816 1817 static int 1818 is_devfsadm_thread(char *exec_name) 1819 { 1820 /* 1821 * note: because devfsadmd -> /usr/sbin/devfsadm 1822 * it is safe to use "devfsadm" to capture the lookups 1823 * from devfsadm and its daemon version. 1824 */ 1825 if (strcmp(exec_name, "devfsadm") == 0) 1826 return (1); 1827 return (0); 1828 } 1829 1830 /* 1831 * Lookup Order: 1832 * sdev_node cache; 1833 * backing store (SDEV_PERSIST); 1834 * DBNR: a. dir_ops implemented in the loadable modules; 1835 * b. vnode ops in vtab. 1836 */ 1837 int 1838 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1839 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1840 struct cred *, void *, char *), int flags) 1841 { 1842 int rv = 0, nmlen; 1843 struct vnode *rvp = NULL; 1844 struct sdev_node *dv = NULL; 1845 int retried = 0; 1846 int error = 0; 1847 struct vattr vattr; 1848 char *lookup_thread = curproc->p_user.u_comm; 1849 int failed_flags = 0; 1850 int (*vtor)(struct sdev_node *) = NULL; 1851 int state; 1852 int parent_state; 1853 char *link = NULL; 1854 1855 if (SDEVTOV(ddv)->v_type != VDIR) 1856 return (ENOTDIR); 1857 1858 /* 1859 * Empty name or ., return node itself. 1860 */ 1861 nmlen = strlen(nm); 1862 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1863 *vpp = SDEVTOV(ddv); 1864 VN_HOLD(*vpp); 1865 return (0); 1866 } 1867 1868 /* 1869 * .., return the parent directory 1870 */ 1871 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1872 *vpp = SDEVTOV(ddv->sdev_dotdot); 1873 VN_HOLD(*vpp); 1874 return (0); 1875 } 1876 1877 rw_enter(&ddv->sdev_contents, RW_READER); 1878 if (ddv->sdev_flags & SDEV_VTOR) { 1879 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1880 ASSERT(vtor); 1881 } 1882 1883 tryagain: 1884 /* 1885 * (a) directory cache lookup: 1886 */ 1887 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1888 parent_state = ddv->sdev_state; 1889 dv = sdev_cache_lookup(ddv, nm); 1890 if (dv) { 1891 state = dv->sdev_state; 1892 switch (state) { 1893 case SDEV_INIT: 1894 if (is_devfsadm_thread(lookup_thread)) 1895 break; 1896 1897 /* ZOMBIED parent won't allow node creation */ 1898 if (parent_state == SDEV_ZOMBIE) { 1899 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1900 retried); 1901 goto nolock_notfound; 1902 } 1903 1904 mutex_enter(&dv->sdev_lookup_lock); 1905 /* compensate the threads started after devfsadm */ 1906 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1907 !(SDEV_IS_LOOKUP(dv))) 1908 SDEV_BLOCK_OTHERS(dv, 1909 (SDEV_LOOKUP | SDEV_LGWAITING)); 1910 1911 if (SDEV_IS_LOOKUP(dv)) { 1912 failed_flags |= SLF_REBUILT; 1913 rw_exit(&ddv->sdev_contents); 1914 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1915 mutex_exit(&dv->sdev_lookup_lock); 1916 rw_enter(&ddv->sdev_contents, RW_READER); 1917 1918 if (error != 0) { 1919 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1920 retried); 1921 goto nolock_notfound; 1922 } 1923 1924 state = dv->sdev_state; 1925 if (state == SDEV_INIT) { 1926 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1927 retried); 1928 goto nolock_notfound; 1929 } else if (state == SDEV_READY) { 1930 goto found; 1931 } else if (state == SDEV_ZOMBIE) { 1932 rw_exit(&ddv->sdev_contents); 1933 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1934 retried); 1935 SDEV_RELE(dv); 1936 goto lookup_failed; 1937 } 1938 } else { 1939 mutex_exit(&dv->sdev_lookup_lock); 1940 } 1941 break; 1942 case SDEV_READY: 1943 goto found; 1944 case SDEV_ZOMBIE: 1945 rw_exit(&ddv->sdev_contents); 1946 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1947 SDEV_RELE(dv); 1948 goto lookup_failed; 1949 default: 1950 rw_exit(&ddv->sdev_contents); 1951 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1952 sdev_lookup_failed(ddv, nm, failed_flags); 1953 *vpp = NULLVP; 1954 return (ENOENT); 1955 } 1956 } 1957 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1958 1959 /* 1960 * ZOMBIED parent does not allow new node creation. 1961 * bail out early 1962 */ 1963 if (parent_state == SDEV_ZOMBIE) { 1964 rw_exit(&ddv->sdev_contents); 1965 *vpp = NULLVP; 1966 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1967 return (ENOENT); 1968 } 1969 1970 /* 1971 * (b0): backing store lookup 1972 * SDEV_PERSIST is default except: 1973 * 1) pts nodes 1974 * 2) non-chmod'ed local nodes 1975 * 3) zvol nodes 1976 */ 1977 if (SDEV_IS_PERSIST(ddv)) { 1978 error = devname_backstore_lookup(ddv, nm, &rvp); 1979 1980 if (!error) { 1981 1982 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1983 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 1984 if (error) { 1985 rw_exit(&ddv->sdev_contents); 1986 if (dv) 1987 SDEV_RELE(dv); 1988 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1989 sdev_lookup_failed(ddv, nm, failed_flags); 1990 *vpp = NULLVP; 1991 return (ENOENT); 1992 } 1993 1994 if (vattr.va_type == VLNK) { 1995 error = sdev_getlink(rvp, &link); 1996 if (error) { 1997 rw_exit(&ddv->sdev_contents); 1998 if (dv) 1999 SDEV_RELE(dv); 2000 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2001 retried); 2002 sdev_lookup_failed(ddv, nm, 2003 failed_flags); 2004 *vpp = NULLVP; 2005 return (ENOENT); 2006 } 2007 ASSERT(link != NULL); 2008 } 2009 2010 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2011 rw_exit(&ddv->sdev_contents); 2012 rw_enter(&ddv->sdev_contents, RW_WRITER); 2013 } 2014 error = sdev_mknode(ddv, nm, &dv, &vattr, 2015 rvp, link, cred, SDEV_READY); 2016 rw_downgrade(&ddv->sdev_contents); 2017 2018 if (link != NULL) { 2019 kmem_free(link, strlen(link) + 1); 2020 link = NULL; 2021 } 2022 2023 if (error) { 2024 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2025 rw_exit(&ddv->sdev_contents); 2026 if (dv) 2027 SDEV_RELE(dv); 2028 goto lookup_failed; 2029 } else { 2030 goto found; 2031 } 2032 } else if (retried) { 2033 rw_exit(&ddv->sdev_contents); 2034 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2035 ddv->sdev_name, nm)); 2036 if (dv) 2037 SDEV_RELE(dv); 2038 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2039 sdev_lookup_failed(ddv, nm, failed_flags); 2040 *vpp = NULLVP; 2041 return (ENOENT); 2042 } 2043 } 2044 2045 lookup_create_node: 2046 /* first thread that is doing the lookup on this node */ 2047 if (callback) { 2048 ASSERT(dv == NULL); 2049 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2050 rw_exit(&ddv->sdev_contents); 2051 rw_enter(&ddv->sdev_contents, RW_WRITER); 2052 } 2053 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2054 flags, cred); 2055 rw_downgrade(&ddv->sdev_contents); 2056 if (error == 0) { 2057 goto found; 2058 } else { 2059 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2060 rw_exit(&ddv->sdev_contents); 2061 goto lookup_failed; 2062 } 2063 } 2064 if (!dv) { 2065 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2066 rw_exit(&ddv->sdev_contents); 2067 rw_enter(&ddv->sdev_contents, RW_WRITER); 2068 } 2069 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2070 cred, SDEV_INIT); 2071 if (!dv) { 2072 rw_exit(&ddv->sdev_contents); 2073 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2074 sdev_lookup_failed(ddv, nm, failed_flags); 2075 *vpp = NULLVP; 2076 return (ENOENT); 2077 } 2078 rw_downgrade(&ddv->sdev_contents); 2079 } 2080 2081 /* 2082 * (b1) invoking devfsadm once per life time for devfsadm nodes 2083 */ 2084 ASSERT(SDEV_HELD(dv)); 2085 2086 if (SDEV_IS_NO_NCACHE(dv)) 2087 failed_flags |= SLF_NO_NCACHE; 2088 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2089 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2090 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2091 ASSERT(SDEV_HELD(dv)); 2092 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2093 goto nolock_notfound; 2094 } 2095 2096 /* 2097 * filter out known non-existent devices recorded 2098 * during initial reconfiguration boot for which 2099 * reconfig should not be done and lookup may 2100 * be short-circuited now. 2101 */ 2102 if (sdev_lookup_filter(ddv, nm)) { 2103 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2104 goto nolock_notfound; 2105 } 2106 2107 /* bypassing devfsadm internal nodes */ 2108 if (is_devfsadm_thread(lookup_thread)) { 2109 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2110 goto nolock_notfound; 2111 } 2112 2113 if (sdev_reconfig_disable) { 2114 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2115 goto nolock_notfound; 2116 } 2117 2118 error = sdev_call_devfsadmd(ddv, dv, nm); 2119 if (error == 0) { 2120 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2121 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2122 if (sdev_reconfig_verbose) { 2123 cmn_err(CE_CONT, 2124 "?lookup of %s/%s by %s: reconfig\n", 2125 ddv->sdev_name, nm, curproc->p_user.u_comm); 2126 } 2127 retried = 1; 2128 failed_flags |= SLF_REBUILT; 2129 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2130 SDEV_SIMPLE_RELE(dv); 2131 goto tryagain; 2132 } else { 2133 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2134 goto nolock_notfound; 2135 } 2136 2137 found: 2138 ASSERT(!(dv->sdev_flags & SDEV_STALE)); 2139 ASSERT(dv->sdev_state == SDEV_READY); 2140 if (vtor) { 2141 /* 2142 * Check validity of returned node 2143 */ 2144 switch (vtor(dv)) { 2145 case SDEV_VTOR_VALID: 2146 break; 2147 case SDEV_VTOR_STALE: 2148 /* 2149 * The name exists, but the cache entry is 2150 * stale and needs to be re-created. 2151 */ 2152 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2153 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2154 rw_exit(&ddv->sdev_contents); 2155 rw_enter(&ddv->sdev_contents, RW_WRITER); 2156 } 2157 error = sdev_cache_update(ddv, &dv, nm, 2158 SDEV_CACHE_DELETE); 2159 rw_downgrade(&ddv->sdev_contents); 2160 if (error == 0) { 2161 dv = NULL; 2162 goto lookup_create_node; 2163 } 2164 /* FALLTHRU */ 2165 case SDEV_VTOR_INVALID: 2166 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2167 sdcmn_err7(("lookup: destroy invalid " 2168 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2169 goto nolock_notfound; 2170 case SDEV_VTOR_SKIP: 2171 sdcmn_err7(("lookup: node not applicable - " 2172 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2173 rw_exit(&ddv->sdev_contents); 2174 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2175 SDEV_RELE(dv); 2176 goto lookup_failed; 2177 default: 2178 cmn_err(CE_PANIC, 2179 "dev fs: validator failed: %s(%p)\n", 2180 dv->sdev_name, (void *)dv); 2181 break; 2182 } 2183 } 2184 2185 rw_exit(&ddv->sdev_contents); 2186 rv = sdev_to_vp(dv, vpp); 2187 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2188 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2189 dv->sdev_state, nm, rv)); 2190 return (rv); 2191 2192 nolock_notfound: 2193 /* 2194 * Destroy the node that is created for synchronization purposes. 2195 */ 2196 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2197 nm, dv->sdev_state)); 2198 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2199 if (dv->sdev_state == SDEV_INIT) { 2200 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2201 rw_exit(&ddv->sdev_contents); 2202 rw_enter(&ddv->sdev_contents, RW_WRITER); 2203 } 2204 2205 /* 2206 * Node state may have changed during the lock 2207 * changes. Re-check. 2208 */ 2209 if (dv->sdev_state == SDEV_INIT) { 2210 (void) sdev_dirdelete(ddv, dv); 2211 rw_exit(&ddv->sdev_contents); 2212 sdev_lookup_failed(ddv, nm, failed_flags); 2213 *vpp = NULL; 2214 return (ENOENT); 2215 } 2216 } 2217 2218 rw_exit(&ddv->sdev_contents); 2219 SDEV_RELE(dv); 2220 2221 lookup_failed: 2222 sdev_lookup_failed(ddv, nm, failed_flags); 2223 *vpp = NULL; 2224 return (ENOENT); 2225 } 2226 2227 /* 2228 * Given a directory node, mark all nodes beneath as 2229 * STALE, i.e. nodes that don't exist as far as new 2230 * consumers are concerned. Remove them from the 2231 * list of directory entries so that no lookup or 2232 * directory traversal will find them. The node 2233 * not deallocated so existing holds are not affected. 2234 */ 2235 void 2236 sdev_stale(struct sdev_node *ddv) 2237 { 2238 struct sdev_node *dv; 2239 struct vnode *vp; 2240 2241 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2242 2243 rw_enter(&ddv->sdev_contents, RW_WRITER); 2244 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) { 2245 vp = SDEVTOV(dv); 2246 if (vp->v_type == VDIR) 2247 sdev_stale(dv); 2248 2249 sdcmn_err9(("sdev_stale: setting stale %s\n", 2250 dv->sdev_path)); 2251 dv->sdev_flags |= SDEV_STALE; 2252 avl_remove(&ddv->sdev_entries, dv); 2253 } 2254 ddv->sdev_flags |= SDEV_BUILD; 2255 rw_exit(&ddv->sdev_contents); 2256 } 2257 2258 /* 2259 * Given a directory node, clean out all the nodes beneath. 2260 * If expr is specified, clean node with names matching expr. 2261 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2262 * so they are excluded from future lookups. 2263 */ 2264 int 2265 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2266 { 2267 int error = 0; 2268 int busy = 0; 2269 struct vnode *vp; 2270 struct sdev_node *dv, *next = NULL; 2271 int bkstore = 0; 2272 int len = 0; 2273 char *bks_name = NULL; 2274 2275 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2276 2277 /* 2278 * We try our best to destroy all unused sdev_node's 2279 */ 2280 rw_enter(&ddv->sdev_contents, RW_WRITER); 2281 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { 2282 next = SDEV_NEXT_ENTRY(ddv, dv); 2283 vp = SDEVTOV(dv); 2284 2285 if (expr && gmatch(dv->sdev_name, expr) == 0) 2286 continue; 2287 2288 if (vp->v_type == VDIR && 2289 sdev_cleandir(dv, NULL, flags) != 0) { 2290 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2291 dv->sdev_name)); 2292 busy++; 2293 continue; 2294 } 2295 2296 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2297 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2298 dv->sdev_name)); 2299 busy++; 2300 continue; 2301 } 2302 2303 /* 2304 * at this point, either dv is not held or SDEV_ENFORCE 2305 * is specified. In either case, dv needs to be deleted 2306 */ 2307 SDEV_HOLD(dv); 2308 2309 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2310 if (bkstore && (vp->v_type == VDIR)) 2311 bkstore += 1; 2312 2313 if (bkstore) { 2314 len = strlen(dv->sdev_name) + 1; 2315 bks_name = kmem_alloc(len, KM_SLEEP); 2316 bcopy(dv->sdev_name, bks_name, len); 2317 } 2318 2319 error = sdev_dirdelete(ddv, dv); 2320 2321 if (error == EBUSY) { 2322 sdcmn_err9(("sdev_cleandir: dir busy\n")); 2323 busy++; 2324 } 2325 2326 /* take care the backing store clean up */ 2327 if (bkstore && (error == 0)) { 2328 ASSERT(bks_name); 2329 ASSERT(ddv->sdev_attrvp); 2330 2331 if (bkstore == 1) { 2332 error = VOP_REMOVE(ddv->sdev_attrvp, 2333 bks_name, kcred, NULL, 0); 2334 } else if (bkstore == 2) { 2335 error = VOP_RMDIR(ddv->sdev_attrvp, 2336 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2337 } 2338 2339 /* do not propagate the backing store errors */ 2340 if (error) { 2341 sdcmn_err9(("sdev_cleandir: backing store" 2342 "not cleaned\n")); 2343 error = 0; 2344 } 2345 2346 bkstore = 0; 2347 kmem_free(bks_name, len); 2348 bks_name = NULL; 2349 len = 0; 2350 } 2351 } 2352 2353 ddv->sdev_flags |= SDEV_BUILD; 2354 rw_exit(&ddv->sdev_contents); 2355 2356 if (busy) { 2357 error = EBUSY; 2358 } 2359 2360 return (error); 2361 } 2362 2363 /* 2364 * a convenient wrapper for readdir() funcs 2365 */ 2366 size_t 2367 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2368 { 2369 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2370 if (reclen > size) 2371 return (0); 2372 2373 de->d_ino = (ino64_t)ino; 2374 de->d_off = (off64_t)off + 1; 2375 de->d_reclen = (ushort_t)reclen; 2376 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2377 return (reclen); 2378 } 2379 2380 /* 2381 * sdev_mount service routines 2382 */ 2383 int 2384 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2385 { 2386 int error; 2387 2388 if (uap->datalen != sizeof (*args)) 2389 return (EINVAL); 2390 2391 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2392 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2393 "get user data. error %d\n", error); 2394 return (EFAULT); 2395 } 2396 2397 return (0); 2398 } 2399 2400 #ifdef nextdp 2401 #undef nextdp 2402 #endif 2403 #define nextdp(dp) ((struct dirent64 *) \ 2404 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2405 2406 /* 2407 * readdir helper func 2408 */ 2409 int 2410 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2411 int flags) 2412 { 2413 struct sdev_node *ddv = VTOSDEV(vp); 2414 struct sdev_node *dv; 2415 dirent64_t *dp; 2416 ulong_t outcount = 0; 2417 size_t namelen; 2418 ulong_t alloc_count; 2419 void *outbuf; 2420 struct iovec *iovp; 2421 int error = 0; 2422 size_t reclen; 2423 offset_t diroff; 2424 offset_t soff; 2425 int this_reclen; 2426 int (*vtor)(struct sdev_node *) = NULL; 2427 struct vattr attr; 2428 timestruc_t now; 2429 2430 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2431 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2432 2433 if (uiop->uio_loffset >= MAXOFF_T) { 2434 if (eofp) 2435 *eofp = 1; 2436 return (0); 2437 } 2438 2439 if (uiop->uio_iovcnt != 1) 2440 return (EINVAL); 2441 2442 if (vp->v_type != VDIR) 2443 return (ENOTDIR); 2444 2445 if (ddv->sdev_flags & SDEV_VTOR) { 2446 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2447 ASSERT(vtor); 2448 } 2449 2450 if (eofp != NULL) 2451 *eofp = 0; 2452 2453 soff = uiop->uio_loffset; 2454 iovp = uiop->uio_iov; 2455 alloc_count = iovp->iov_len; 2456 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2457 outcount = 0; 2458 2459 if (ddv->sdev_state == SDEV_ZOMBIE) 2460 goto get_cache; 2461 2462 if (SDEV_IS_GLOBAL(ddv)) { 2463 2464 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2465 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2466 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2467 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2468 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2469 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2470 !sdev_reconfig_disable) { 2471 /* 2472 * invoking "devfsadm" to do system device reconfig 2473 */ 2474 mutex_enter(&ddv->sdev_lookup_lock); 2475 SDEV_BLOCK_OTHERS(ddv, 2476 (SDEV_READDIR|SDEV_LGWAITING)); 2477 mutex_exit(&ddv->sdev_lookup_lock); 2478 2479 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2480 ddv->sdev_path, curproc->p_user.u_comm)); 2481 if (sdev_reconfig_verbose) { 2482 cmn_err(CE_CONT, 2483 "?readdir of %s by %s: reconfig\n", 2484 ddv->sdev_path, curproc->p_user.u_comm); 2485 } 2486 2487 sdev_devfsadmd_thread(ddv, NULL, kcred); 2488 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2489 /* 2490 * compensate the "ls" started later than "devfsadm" 2491 */ 2492 mutex_enter(&ddv->sdev_lookup_lock); 2493 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2494 mutex_exit(&ddv->sdev_lookup_lock); 2495 } 2496 2497 /* 2498 * release the contents lock so that 2499 * the cache may be updated by devfsadmd 2500 */ 2501 rw_exit(&ddv->sdev_contents); 2502 mutex_enter(&ddv->sdev_lookup_lock); 2503 if (SDEV_IS_READDIR(ddv)) 2504 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2505 mutex_exit(&ddv->sdev_lookup_lock); 2506 rw_enter(&ddv->sdev_contents, RW_READER); 2507 2508 sdcmn_err4(("readdir of directory %s by %s\n", 2509 ddv->sdev_name, curproc->p_user.u_comm)); 2510 if (ddv->sdev_flags & SDEV_BUILD) { 2511 if (SDEV_IS_PERSIST(ddv)) { 2512 error = sdev_filldir_from_store(ddv, 2513 alloc_count, cred); 2514 } 2515 ddv->sdev_flags &= ~SDEV_BUILD; 2516 } 2517 } 2518 2519 get_cache: 2520 /* handle "." and ".." */ 2521 diroff = 0; 2522 if (soff == 0) { 2523 /* first time */ 2524 this_reclen = DIRENT64_RECLEN(1); 2525 if (alloc_count < this_reclen) { 2526 error = EINVAL; 2527 goto done; 2528 } 2529 2530 dp->d_ino = (ino64_t)ddv->sdev_ino; 2531 dp->d_off = (off64_t)1; 2532 dp->d_reclen = (ushort_t)this_reclen; 2533 2534 (void) strncpy(dp->d_name, ".", 2535 DIRENT64_NAMELEN(this_reclen)); 2536 outcount += dp->d_reclen; 2537 dp = nextdp(dp); 2538 } 2539 2540 diroff++; 2541 if (soff <= 1) { 2542 this_reclen = DIRENT64_RECLEN(2); 2543 if (alloc_count < outcount + this_reclen) { 2544 error = EINVAL; 2545 goto done; 2546 } 2547 2548 dp->d_reclen = (ushort_t)this_reclen; 2549 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2550 dp->d_off = (off64_t)2; 2551 2552 (void) strncpy(dp->d_name, "..", 2553 DIRENT64_NAMELEN(this_reclen)); 2554 outcount += dp->d_reclen; 2555 2556 dp = nextdp(dp); 2557 } 2558 2559 2560 /* gets the cache */ 2561 diroff++; 2562 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2563 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2564 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2565 diroff, soff, dv->sdev_name)); 2566 2567 /* bypassing pre-matured nodes */ 2568 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2569 sdcmn_err3(("sdev_readdir: pre-mature node " 2570 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2571 continue; 2572 } 2573 2574 /* 2575 * Check validity of node 2576 * Drop invalid and nodes to be skipped. 2577 * A node the validator indicates as stale needs 2578 * to be returned as presumably the node name itself 2579 * is valid and the node data itself will be refreshed 2580 * on lookup. An application performing a readdir then 2581 * stat on each entry should thus always see consistent 2582 * data. In any case, it is not possible to synchronize 2583 * with dynamic kernel state, and any view we return can 2584 * never be anything more than a snapshot at a point in time. 2585 */ 2586 if (vtor) { 2587 switch (vtor(dv)) { 2588 case SDEV_VTOR_VALID: 2589 break; 2590 case SDEV_VTOR_INVALID: 2591 case SDEV_VTOR_SKIP: 2592 continue; 2593 case SDEV_VTOR_STALE: 2594 sdcmn_err3(("sdev_readir: %s stale\n", 2595 dv->sdev_name)); 2596 break; 2597 default: 2598 cmn_err(CE_PANIC, 2599 "dev fs: validator failed: %s(%p)\n", 2600 dv->sdev_name, (void *)dv); 2601 break; 2602 /*NOTREACHED*/ 2603 } 2604 } 2605 2606 namelen = strlen(dv->sdev_name); 2607 reclen = DIRENT64_RECLEN(namelen); 2608 if (outcount + reclen > alloc_count) { 2609 goto full; 2610 } 2611 dp->d_reclen = (ushort_t)reclen; 2612 dp->d_ino = (ino64_t)dv->sdev_ino; 2613 dp->d_off = (off64_t)diroff + 1; 2614 (void) strncpy(dp->d_name, dv->sdev_name, 2615 DIRENT64_NAMELEN(reclen)); 2616 outcount += reclen; 2617 dp = nextdp(dp); 2618 } 2619 2620 full: 2621 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2622 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2623 (void *)dv)); 2624 2625 if (outcount) 2626 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2627 2628 if (!error) { 2629 uiop->uio_loffset = diroff; 2630 if (eofp) 2631 *eofp = dv ? 0 : 1; 2632 } 2633 2634 2635 if (ddv->sdev_attrvp) { 2636 gethrestime(&now); 2637 attr.va_ctime = now; 2638 attr.va_atime = now; 2639 attr.va_mask = AT_CTIME|AT_ATIME; 2640 2641 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2642 } 2643 done: 2644 kmem_free(outbuf, alloc_count); 2645 return (error); 2646 } 2647 2648 static int 2649 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2650 { 2651 vnode_t *vp; 2652 vnode_t *cvp; 2653 struct sdev_node *svp; 2654 char *nm; 2655 struct pathname pn; 2656 int error; 2657 int persisted = 0; 2658 2659 ASSERT(INGLOBALZONE(curproc)); 2660 2661 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2662 return (error); 2663 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2664 2665 vp = rootdir; 2666 VN_HOLD(vp); 2667 2668 while (pn_pathleft(&pn)) { 2669 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2670 (void) pn_getcomponent(&pn, nm); 2671 2672 /* 2673 * Deal with the .. special case where we may be 2674 * traversing up across a mount point, to the 2675 * root of this filesystem or global root. 2676 */ 2677 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2678 checkforroot: 2679 if (VN_CMP(vp, rootdir)) { 2680 nm[1] = 0; 2681 } else if (vp->v_flag & VROOT) { 2682 vfs_t *vfsp; 2683 cvp = vp; 2684 vfsp = cvp->v_vfsp; 2685 vfs_rlock_wait(vfsp); 2686 vp = cvp->v_vfsp->vfs_vnodecovered; 2687 if (vp == NULL || 2688 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2689 vfs_unlock(vfsp); 2690 VN_RELE(cvp); 2691 error = EIO; 2692 break; 2693 } 2694 VN_HOLD(vp); 2695 vfs_unlock(vfsp); 2696 VN_RELE(cvp); 2697 cvp = NULL; 2698 goto checkforroot; 2699 } 2700 } 2701 2702 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2703 NULL, NULL); 2704 if (error) { 2705 VN_RELE(vp); 2706 break; 2707 } 2708 2709 /* traverse mount points encountered on our journey */ 2710 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2711 VN_RELE(vp); 2712 VN_RELE(cvp); 2713 break; 2714 } 2715 2716 /* 2717 * symbolic link, can be either relative and absolute 2718 */ 2719 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2720 struct pathname linkpath; 2721 pn_alloc(&linkpath); 2722 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2723 pn_free(&linkpath); 2724 break; 2725 } 2726 if (pn_pathleft(&linkpath) == 0) 2727 (void) pn_set(&linkpath, "."); 2728 error = pn_insert(&pn, &linkpath, strlen(nm)); 2729 pn_free(&linkpath); 2730 if (pn.pn_pathlen == 0) { 2731 VN_RELE(vp); 2732 return (ENOENT); 2733 } 2734 if (pn.pn_path[0] == '/') { 2735 pn_skipslash(&pn); 2736 VN_RELE(vp); 2737 VN_RELE(cvp); 2738 vp = rootdir; 2739 VN_HOLD(vp); 2740 } else { 2741 VN_RELE(cvp); 2742 } 2743 continue; 2744 } 2745 2746 VN_RELE(vp); 2747 2748 /* 2749 * Direct the operation to the persisting filesystem 2750 * underlying /dev. Bail if we encounter a 2751 * non-persistent dev entity here. 2752 */ 2753 if (cvp->v_vfsp->vfs_fstype == devtype) { 2754 2755 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2756 error = ENOENT; 2757 VN_RELE(cvp); 2758 break; 2759 } 2760 2761 if (VTOSDEV(cvp) == NULL) { 2762 error = ENOENT; 2763 VN_RELE(cvp); 2764 break; 2765 } 2766 svp = VTOSDEV(cvp); 2767 if ((vp = svp->sdev_attrvp) == NULL) { 2768 error = ENOENT; 2769 VN_RELE(cvp); 2770 break; 2771 } 2772 persisted = 1; 2773 VN_HOLD(vp); 2774 VN_RELE(cvp); 2775 cvp = vp; 2776 } 2777 2778 vp = cvp; 2779 pn_skipslash(&pn); 2780 } 2781 2782 kmem_free(nm, MAXNAMELEN); 2783 pn_free(&pn); 2784 2785 if (error) 2786 return (error); 2787 2788 /* 2789 * Only return persisted nodes in the filesystem underlying /dev. 2790 */ 2791 if (!persisted) { 2792 VN_RELE(vp); 2793 return (ENOENT); 2794 } 2795 2796 *r_vp = vp; 2797 return (0); 2798 } 2799 2800 int 2801 sdev_modctl_readdir(const char *dir, char ***dirlistp, 2802 int *npathsp, int *npathsp_alloc, int checking_empty) 2803 { 2804 char **pathlist = NULL; 2805 char **newlist = NULL; 2806 int npaths = 0; 2807 int npaths_alloc = 0; 2808 dirent64_t *dbuf = NULL; 2809 int n; 2810 char *s; 2811 int error; 2812 vnode_t *vp; 2813 int eof; 2814 struct iovec iov; 2815 struct uio uio; 2816 struct dirent64 *dp; 2817 size_t dlen; 2818 size_t dbuflen; 2819 int ndirents = 64; 2820 char *nm; 2821 2822 error = sdev_modctl_lookup(dir, &vp); 2823 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2824 dir, curproc->p_user.u_comm, 2825 (error == 0) ? "ok" : "failed")); 2826 if (error) 2827 return (error); 2828 2829 dlen = ndirents * (sizeof (*dbuf)); 2830 dbuf = kmem_alloc(dlen, KM_SLEEP); 2831 2832 uio.uio_iov = &iov; 2833 uio.uio_iovcnt = 1; 2834 uio.uio_segflg = UIO_SYSSPACE; 2835 uio.uio_fmode = 0; 2836 uio.uio_extflg = UIO_COPY_CACHED; 2837 uio.uio_loffset = 0; 2838 uio.uio_llimit = MAXOFFSET_T; 2839 2840 eof = 0; 2841 error = 0; 2842 while (!error && !eof) { 2843 uio.uio_resid = dlen; 2844 iov.iov_base = (char *)dbuf; 2845 iov.iov_len = dlen; 2846 2847 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2848 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2849 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2850 2851 dbuflen = dlen - uio.uio_resid; 2852 2853 if (error || dbuflen == 0) 2854 break; 2855 2856 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2857 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2858 2859 nm = dp->d_name; 2860 2861 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2862 continue; 2863 if (npaths == npaths_alloc) { 2864 npaths_alloc += 64; 2865 newlist = (char **) 2866 kmem_zalloc((npaths_alloc + 1) * 2867 sizeof (char *), KM_SLEEP); 2868 if (pathlist) { 2869 bcopy(pathlist, newlist, 2870 npaths * sizeof (char *)); 2871 kmem_free(pathlist, 2872 (npaths + 1) * sizeof (char *)); 2873 } 2874 pathlist = newlist; 2875 } 2876 n = strlen(nm) + 1; 2877 s = kmem_alloc(n, KM_SLEEP); 2878 bcopy(nm, s, n); 2879 pathlist[npaths++] = s; 2880 sdcmn_err11((" %s/%s\n", dir, s)); 2881 2882 /* if checking empty, one entry is as good as many */ 2883 if (checking_empty) { 2884 eof = 1; 2885 break; 2886 } 2887 } 2888 } 2889 2890 exit: 2891 VN_RELE(vp); 2892 2893 if (dbuf) 2894 kmem_free(dbuf, dlen); 2895 2896 if (error) 2897 return (error); 2898 2899 *dirlistp = pathlist; 2900 *npathsp = npaths; 2901 *npathsp_alloc = npaths_alloc; 2902 2903 return (0); 2904 } 2905 2906 void 2907 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2908 { 2909 int i, n; 2910 2911 for (i = 0; i < npaths; i++) { 2912 n = strlen(pathlist[i]) + 1; 2913 kmem_free(pathlist[i], n); 2914 } 2915 2916 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2917 } 2918 2919 int 2920 sdev_modctl_devexists(const char *path) 2921 { 2922 vnode_t *vp; 2923 int error; 2924 2925 error = sdev_modctl_lookup(path, &vp); 2926 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2927 path, curproc->p_user.u_comm, 2928 (error == 0) ? "ok" : "failed")); 2929 if (error == 0) 2930 VN_RELE(vp); 2931 2932 return (error); 2933 } 2934 2935 extern int sdev_vnodeops_tbl_size; 2936 2937 /* 2938 * construct a new template with overrides from vtab 2939 */ 2940 static fs_operation_def_t * 2941 sdev_merge_vtab(const fs_operation_def_t tab[]) 2942 { 2943 fs_operation_def_t *new; 2944 const fs_operation_def_t *tab_entry; 2945 2946 /* make a copy of standard vnode ops table */ 2947 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 2948 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 2949 2950 /* replace the overrides from tab */ 2951 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 2952 fs_operation_def_t *std_entry = new; 2953 while (std_entry->name) { 2954 if (strcmp(tab_entry->name, std_entry->name) == 0) { 2955 std_entry->func = tab_entry->func; 2956 break; 2957 } 2958 std_entry++; 2959 } 2960 if (std_entry->name == NULL) 2961 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 2962 tab_entry->name); 2963 } 2964 2965 return (new); 2966 } 2967 2968 /* free memory allocated by sdev_merge_vtab */ 2969 static void 2970 sdev_free_vtab(fs_operation_def_t *new) 2971 { 2972 kmem_free(new, sdev_vnodeops_tbl_size); 2973 } 2974 2975 /* 2976 * a generic setattr() function 2977 * 2978 * note: flags only supports AT_UID and AT_GID. 2979 * Future enhancements can be done for other types, e.g. AT_MODE 2980 */ 2981 int 2982 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 2983 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 2984 int), int protocol) 2985 { 2986 struct sdev_node *dv = VTOSDEV(vp); 2987 struct sdev_node *parent = dv->sdev_dotdot; 2988 struct vattr *get; 2989 uint_t mask = vap->va_mask; 2990 int error; 2991 2992 /* some sanity checks */ 2993 if (vap->va_mask & AT_NOSET) 2994 return (EINVAL); 2995 2996 if (vap->va_mask & AT_SIZE) { 2997 if (vp->v_type == VDIR) { 2998 return (EISDIR); 2999 } 3000 } 3001 3002 /* no need to set attribute, but do not fail either */ 3003 ASSERT(parent); 3004 rw_enter(&parent->sdev_contents, RW_READER); 3005 if (dv->sdev_state == SDEV_ZOMBIE) { 3006 rw_exit(&parent->sdev_contents); 3007 return (0); 3008 } 3009 3010 /* If backing store exists, just set it. */ 3011 if (dv->sdev_attrvp) { 3012 rw_exit(&parent->sdev_contents); 3013 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3014 } 3015 3016 /* 3017 * Otherwise, for nodes with the persistence attribute, create it. 3018 */ 3019 ASSERT(dv->sdev_attr); 3020 if (SDEV_IS_PERSIST(dv) || 3021 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3022 sdev_vattr_merge(dv, vap); 3023 rw_enter(&dv->sdev_contents, RW_WRITER); 3024 error = sdev_shadow_node(dv, cred); 3025 rw_exit(&dv->sdev_contents); 3026 rw_exit(&parent->sdev_contents); 3027 3028 if (error) 3029 return (error); 3030 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3031 } 3032 3033 3034 /* 3035 * sdev_attr was allocated in sdev_mknode 3036 */ 3037 rw_enter(&dv->sdev_contents, RW_WRITER); 3038 error = secpolicy_vnode_setattr(cred, vp, vap, 3039 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3040 if (error) { 3041 rw_exit(&dv->sdev_contents); 3042 rw_exit(&parent->sdev_contents); 3043 return (error); 3044 } 3045 3046 get = dv->sdev_attr; 3047 if (mask & AT_MODE) { 3048 get->va_mode &= S_IFMT; 3049 get->va_mode |= vap->va_mode & ~S_IFMT; 3050 } 3051 3052 if ((mask & AT_UID) || (mask & AT_GID)) { 3053 if (mask & AT_UID) 3054 get->va_uid = vap->va_uid; 3055 if (mask & AT_GID) 3056 get->va_gid = vap->va_gid; 3057 /* 3058 * a callback must be provided if the protocol is set 3059 */ 3060 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3061 ASSERT(callback); 3062 error = callback(dv, get, protocol); 3063 if (error) { 3064 rw_exit(&dv->sdev_contents); 3065 rw_exit(&parent->sdev_contents); 3066 return (error); 3067 } 3068 } 3069 } 3070 3071 if (mask & AT_ATIME) 3072 get->va_atime = vap->va_atime; 3073 if (mask & AT_MTIME) 3074 get->va_mtime = vap->va_mtime; 3075 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3076 gethrestime(&get->va_ctime); 3077 } 3078 3079 sdev_vattr_merge(dv, get); 3080 rw_exit(&dv->sdev_contents); 3081 rw_exit(&parent->sdev_contents); 3082 return (0); 3083 } 3084 3085 /* 3086 * a generic inactive() function 3087 */ 3088 /*ARGSUSED*/ 3089 void 3090 devname_inactive_func(struct vnode *vp, struct cred *cred, 3091 void (*callback)(struct vnode *)) 3092 { 3093 int clean; 3094 struct sdev_node *dv = VTOSDEV(vp); 3095 struct sdev_node *ddv = dv->sdev_dotdot; 3096 int state; 3097 3098 rw_enter(&ddv->sdev_contents, RW_WRITER); 3099 state = dv->sdev_state; 3100 3101 mutex_enter(&vp->v_lock); 3102 ASSERT(vp->v_count >= 1); 3103 3104 if (vp->v_count == 1 && callback != NULL) 3105 callback(vp); 3106 3107 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3108 3109 /* 3110 * last ref count on the ZOMBIE node is released. 3111 * clean up the sdev_node, and 3112 * release the hold on the backing store node so that 3113 * the ZOMBIE backing stores also cleaned out. 3114 */ 3115 if (clean) { 3116 ASSERT(ddv); 3117 3118 ddv->sdev_nlink--; 3119 if (vp->v_type == VDIR) { 3120 dv->sdev_nlink--; 3121 } 3122 if ((dv->sdev_flags & SDEV_STALE) == 0) 3123 avl_remove(&ddv->sdev_entries, dv); 3124 dv->sdev_nlink--; 3125 --vp->v_count; 3126 mutex_exit(&vp->v_lock); 3127 sdev_nodedestroy(dv, 0); 3128 } else { 3129 --vp->v_count; 3130 mutex_exit(&vp->v_lock); 3131 } 3132 rw_exit(&ddv->sdev_contents); 3133 } 3134