1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2012, Joyent, Inc. All rights reserved. 24 */ 25 26 /* 27 * utility routines for the /dev fs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/dirent.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/mode.h> 52 #include <sys/policy.h> 53 #include <fs/fs_subr.h> 54 #include <sys/mount.h> 55 #include <sys/fs/snode.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/sdev_impl.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/conf.h> 61 #include <sys/proc.h> 62 #include <sys/user.h> 63 #include <sys/modctl.h> 64 65 #ifdef DEBUG 66 int sdev_debug = 0x00000001; 67 int sdev_debug_cache_flags = 0; 68 #endif 69 70 /* 71 * globals 72 */ 73 /* prototype memory vattrs */ 74 vattr_t sdev_vattr_dir = { 75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 76 VDIR, /* va_type */ 77 SDEV_DIRMODE_DEFAULT, /* va_mode */ 78 SDEV_UID_DEFAULT, /* va_uid */ 79 SDEV_GID_DEFAULT, /* va_gid */ 80 0, /* va_fsid */ 81 0, /* va_nodeid */ 82 0, /* va_nlink */ 83 0, /* va_size */ 84 0, /* va_atime */ 85 0, /* va_mtime */ 86 0, /* va_ctime */ 87 0, /* va_rdev */ 88 0, /* va_blksize */ 89 0, /* va_nblocks */ 90 0 /* va_vcode */ 91 }; 92 93 vattr_t sdev_vattr_lnk = { 94 AT_TYPE|AT_MODE, /* va_mask */ 95 VLNK, /* va_type */ 96 SDEV_LNKMODE_DEFAULT, /* va_mode */ 97 SDEV_UID_DEFAULT, /* va_uid */ 98 SDEV_GID_DEFAULT, /* va_gid */ 99 0, /* va_fsid */ 100 0, /* va_nodeid */ 101 0, /* va_nlink */ 102 0, /* va_size */ 103 0, /* va_atime */ 104 0, /* va_mtime */ 105 0, /* va_ctime */ 106 0, /* va_rdev */ 107 0, /* va_blksize */ 108 0, /* va_nblocks */ 109 0 /* va_vcode */ 110 }; 111 112 vattr_t sdev_vattr_blk = { 113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 114 VBLK, /* va_type */ 115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 116 SDEV_UID_DEFAULT, /* va_uid */ 117 SDEV_GID_DEFAULT, /* va_gid */ 118 0, /* va_fsid */ 119 0, /* va_nodeid */ 120 0, /* va_nlink */ 121 0, /* va_size */ 122 0, /* va_atime */ 123 0, /* va_mtime */ 124 0, /* va_ctime */ 125 0, /* va_rdev */ 126 0, /* va_blksize */ 127 0, /* va_nblocks */ 128 0 /* va_vcode */ 129 }; 130 131 vattr_t sdev_vattr_chr = { 132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 133 VCHR, /* va_type */ 134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 135 SDEV_UID_DEFAULT, /* va_uid */ 136 SDEV_GID_DEFAULT, /* va_gid */ 137 0, /* va_fsid */ 138 0, /* va_nodeid */ 139 0, /* va_nlink */ 140 0, /* va_size */ 141 0, /* va_atime */ 142 0, /* va_mtime */ 143 0, /* va_ctime */ 144 0, /* va_rdev */ 145 0, /* va_blksize */ 146 0, /* va_nblocks */ 147 0 /* va_vcode */ 148 }; 149 150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 151 int devtype; /* fstype */ 152 153 /* static */ 154 static struct vnodeops *sdev_get_vop(struct sdev_node *); 155 static void sdev_set_no_negcache(struct sdev_node *); 156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 157 static void sdev_free_vtab(fs_operation_def_t *); 158 159 static void 160 sdev_prof_free(struct sdev_node *dv) 161 { 162 ASSERT(!SDEV_IS_GLOBAL(dv)); 163 if (dv->sdev_prof.dev_name) 164 nvlist_free(dv->sdev_prof.dev_name); 165 if (dv->sdev_prof.dev_map) 166 nvlist_free(dv->sdev_prof.dev_map); 167 if (dv->sdev_prof.dev_symlink) 168 nvlist_free(dv->sdev_prof.dev_symlink); 169 if (dv->sdev_prof.dev_glob_incdir) 170 nvlist_free(dv->sdev_prof.dev_glob_incdir); 171 if (dv->sdev_prof.dev_glob_excdir) 172 nvlist_free(dv->sdev_prof.dev_glob_excdir); 173 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 174 } 175 176 /* sdev_node cache constructor */ 177 /*ARGSUSED1*/ 178 static int 179 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 180 { 181 struct sdev_node *dv = (struct sdev_node *)buf; 182 struct vnode *vp; 183 184 bzero(buf, sizeof (struct sdev_node)); 185 vp = dv->sdev_vnode = vn_alloc(flag); 186 if (vp == NULL) { 187 return (-1); 188 } 189 vp->v_data = dv; 190 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 191 return (0); 192 } 193 194 /* sdev_node cache destructor */ 195 /*ARGSUSED1*/ 196 static void 197 i_sdev_node_dtor(void *buf, void *arg) 198 { 199 struct sdev_node *dv = (struct sdev_node *)buf; 200 struct vnode *vp = SDEVTOV(dv); 201 202 rw_destroy(&dv->sdev_contents); 203 vn_free(vp); 204 } 205 206 /* initialize sdev_node cache */ 207 void 208 sdev_node_cache_init() 209 { 210 int flags = 0; 211 212 #ifdef DEBUG 213 flags = sdev_debug_cache_flags; 214 if (flags) 215 sdcmn_err(("cache debug flags 0x%x\n", flags)); 216 #endif /* DEBUG */ 217 218 ASSERT(sdev_node_cache == NULL); 219 sdev_node_cache = kmem_cache_create("sdev_node_cache", 220 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 221 NULL, NULL, NULL, flags); 222 } 223 224 /* destroy sdev_node cache */ 225 void 226 sdev_node_cache_fini() 227 { 228 ASSERT(sdev_node_cache != NULL); 229 kmem_cache_destroy(sdev_node_cache); 230 sdev_node_cache = NULL; 231 } 232 233 /* 234 * Compare two nodes lexographically to balance avl tree 235 */ 236 static int 237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 238 { 239 int rv; 240 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 241 return (0); 242 return ((rv < 0) ? -1 : 1); 243 } 244 245 void 246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 247 { 248 ASSERT(dv); 249 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 250 dv->sdev_state = state; 251 } 252 253 static void 254 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 255 { 256 timestruc_t now; 257 struct vattr *attrp; 258 uint_t mask; 259 260 ASSERT(dv->sdev_attr); 261 ASSERT(vap); 262 263 attrp = dv->sdev_attr; 264 mask = vap->va_mask; 265 if (mask & AT_TYPE) 266 attrp->va_type = vap->va_type; 267 if (mask & AT_MODE) 268 attrp->va_mode = vap->va_mode; 269 if (mask & AT_UID) 270 attrp->va_uid = vap->va_uid; 271 if (mask & AT_GID) 272 attrp->va_gid = vap->va_gid; 273 if (mask & AT_RDEV) 274 attrp->va_rdev = vap->va_rdev; 275 276 gethrestime(&now); 277 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 278 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 279 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 280 } 281 282 static void 283 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 284 { 285 ASSERT(dv->sdev_attr == NULL); 286 ASSERT(vap->va_mask & AT_TYPE); 287 ASSERT(vap->va_mask & AT_MODE); 288 289 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 290 sdev_attr_update(dv, vap); 291 } 292 293 /* alloc and initialize a sdev_node */ 294 int 295 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 296 vattr_t *vap) 297 { 298 struct sdev_node *dv = NULL; 299 struct vnode *vp; 300 size_t nmlen, len; 301 devname_handle_t *dhl; 302 303 nmlen = strlen(nm) + 1; 304 if (nmlen > MAXNAMELEN) { 305 sdcmn_err9(("sdev_nodeinit: node name %s" 306 " too long\n", nm)); 307 *newdv = NULL; 308 return (ENAMETOOLONG); 309 } 310 311 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 312 313 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 314 bcopy(nm, dv->sdev_name, nmlen); 315 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 316 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 317 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 318 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 319 /* overwritten for VLNK nodes */ 320 dv->sdev_symlink = NULL; 321 322 vp = SDEVTOV(dv); 323 vn_reinit(vp); 324 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 325 if (vap) 326 vp->v_type = vap->va_type; 327 328 /* 329 * initialized to the parent's vnodeops. 330 * maybe overwriten for a VDIR 331 */ 332 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 333 vn_exists(vp); 334 335 dv->sdev_dotdot = NULL; 336 dv->sdev_attrvp = NULL; 337 if (vap) { 338 sdev_attr_alloc(dv, vap); 339 } else { 340 dv->sdev_attr = NULL; 341 } 342 343 dv->sdev_ino = sdev_mkino(dv); 344 dv->sdev_nlink = 0; /* updated on insert */ 345 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 346 dv->sdev_flags |= SDEV_BUILD; 347 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 348 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 349 if (SDEV_IS_GLOBAL(ddv)) { 350 dv->sdev_flags |= SDEV_GLOBAL; 351 dhl = &(dv->sdev_handle); 352 dhl->dh_data = dv; 353 dhl->dh_args = NULL; 354 sdev_set_no_negcache(dv); 355 dv->sdev_gdir_gen = 0; 356 } else { 357 dv->sdev_flags &= ~SDEV_GLOBAL; 358 dv->sdev_origin = NULL; /* set later */ 359 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 360 dv->sdev_ldir_gen = 0; 361 dv->sdev_devtree_gen = 0; 362 } 363 364 rw_enter(&dv->sdev_contents, RW_WRITER); 365 sdev_set_nodestate(dv, SDEV_INIT); 366 rw_exit(&dv->sdev_contents); 367 *newdv = dv; 368 369 return (0); 370 } 371 372 /* 373 * transition a sdev_node into SDEV_READY state 374 */ 375 int 376 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 377 void *args, struct cred *cred) 378 { 379 int error = 0; 380 struct vnode *vp = SDEVTOV(dv); 381 vtype_t type; 382 383 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 384 385 type = vap->va_type; 386 vp->v_type = type; 387 vp->v_rdev = vap->va_rdev; 388 rw_enter(&dv->sdev_contents, RW_WRITER); 389 if (type == VDIR) { 390 dv->sdev_nlink = 2; 391 dv->sdev_flags &= ~SDEV_PERSIST; 392 dv->sdev_flags &= ~SDEV_DYNAMIC; 393 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 394 ASSERT(dv->sdev_dotdot); 395 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 396 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 397 avl_create(&dv->sdev_entries, 398 (int (*)(const void *, const void *))sdev_compare_nodes, 399 sizeof (struct sdev_node), 400 offsetof(struct sdev_node, sdev_avllink)); 401 } else if (type == VLNK) { 402 ASSERT(args); 403 dv->sdev_nlink = 1; 404 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 405 } else { 406 dv->sdev_nlink = 1; 407 } 408 409 if (!(SDEV_IS_GLOBAL(dv))) { 410 dv->sdev_origin = (struct sdev_node *)args; 411 dv->sdev_flags &= ~SDEV_PERSIST; 412 } 413 414 /* 415 * shadow node is created here OR 416 * if failed (indicated by dv->sdev_attrvp == NULL), 417 * created later in sdev_setattr 418 */ 419 if (avp) { 420 dv->sdev_attrvp = avp; 421 } else { 422 if (dv->sdev_attr == NULL) { 423 sdev_attr_alloc(dv, vap); 424 } else { 425 sdev_attr_update(dv, vap); 426 } 427 428 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 429 error = sdev_shadow_node(dv, cred); 430 } 431 432 if (error == 0) { 433 /* transition to READY state */ 434 sdev_set_nodestate(dv, SDEV_READY); 435 sdev_nc_node_exists(dv); 436 } else { 437 sdev_set_nodestate(dv, SDEV_ZOMBIE); 438 } 439 rw_exit(&dv->sdev_contents); 440 return (error); 441 } 442 443 /* 444 * setting ZOMBIE state 445 */ 446 static int 447 sdev_nodezombied(struct sdev_node *dv) 448 { 449 rw_enter(&dv->sdev_contents, RW_WRITER); 450 sdev_set_nodestate(dv, SDEV_ZOMBIE); 451 rw_exit(&dv->sdev_contents); 452 return (0); 453 } 454 455 /* 456 * Build the VROOT sdev_node. 457 */ 458 /*ARGSUSED*/ 459 struct sdev_node * 460 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 461 struct vnode *avp, struct cred *cred) 462 { 463 struct sdev_node *dv; 464 struct vnode *vp; 465 char devdir[] = "/dev"; 466 467 ASSERT(sdev_node_cache != NULL); 468 ASSERT(avp); 469 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 470 vp = SDEVTOV(dv); 471 vn_reinit(vp); 472 vp->v_flag |= VROOT; 473 vp->v_vfsp = vfsp; 474 vp->v_type = VDIR; 475 vp->v_rdev = devdev; 476 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 477 vn_exists(vp); 478 479 if (vfsp->vfs_mntpt) 480 dv->sdev_name = i_ddi_strdup( 481 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 482 else 483 /* vfs_mountdev1 set mount point later */ 484 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 485 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 486 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 487 dv->sdev_ino = SDEV_ROOTINO; 488 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 489 dv->sdev_dotdot = dv; /* .. == self */ 490 dv->sdev_attrvp = avp; 491 dv->sdev_attr = NULL; 492 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 493 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 494 if (strcmp(dv->sdev_name, "/dev") == 0) { 495 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 496 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 497 dv->sdev_gdir_gen = 0; 498 } else { 499 dv->sdev_flags = SDEV_BUILD; 500 dv->sdev_flags &= ~SDEV_PERSIST; 501 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 502 dv->sdev_ldir_gen = 0; 503 dv->sdev_devtree_gen = 0; 504 } 505 506 avl_create(&dv->sdev_entries, 507 (int (*)(const void *, const void *))sdev_compare_nodes, 508 sizeof (struct sdev_node), 509 offsetof(struct sdev_node, sdev_avllink)); 510 511 rw_enter(&dv->sdev_contents, RW_WRITER); 512 sdev_set_nodestate(dv, SDEV_READY); 513 rw_exit(&dv->sdev_contents); 514 sdev_nc_node_exists(dv); 515 return (dv); 516 } 517 518 /* directory dependent vop table */ 519 struct sdev_vop_table { 520 char *vt_name; /* subdirectory name */ 521 const fs_operation_def_t *vt_service; /* vnodeops table */ 522 struct vnodeops *vt_vops; /* constructed vop */ 523 struct vnodeops **vt_global_vops; /* global container for vop */ 524 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 525 int vt_flags; 526 }; 527 528 /* 529 * A nice improvement would be to provide a plug-in mechanism 530 * for this table instead of a const table. 531 */ 532 static struct sdev_vop_table vtab[] = 533 { 534 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 535 SDEV_DYNAMIC | SDEV_VTOR }, 536 537 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 538 SDEV_DYNAMIC | SDEV_VTOR }, 539 540 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, 541 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 542 543 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 544 545 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 546 SDEV_DYNAMIC | SDEV_VTOR }, 547 548 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 549 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 550 551 /* 552 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the 553 * lofi driver controls child nodes. 554 * 555 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted 556 * stale nodes (e.g. from devfsadm -R). 557 * 558 * In addition, devfsadm knows not to attempt a rmdir: a zone 559 * may hold a reference, which would zombify the node, 560 * preventing a mkdir. 561 */ 562 563 { "lofi", NULL, NULL, NULL, NULL, 564 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 565 { "rlofi", NULL, NULL, NULL, NULL, 566 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 567 568 { NULL, NULL, NULL, NULL, NULL, 0} 569 }; 570 571 struct sdev_vop_table * 572 sdev_match(struct sdev_node *dv) 573 { 574 int vlen; 575 int i; 576 577 for (i = 0; vtab[i].vt_name; i++) { 578 if (strcmp(vtab[i].vt_name, dv->sdev_name) == 0) 579 return (&vtab[i]); 580 if (vtab[i].vt_flags & SDEV_SUBDIR) { 581 char *ptr; 582 583 ASSERT(strlen(dv->sdev_path) > 5); 584 ptr = dv->sdev_path + 5; 585 vlen = strlen(vtab[i].vt_name); 586 if ((strncmp(vtab[i].vt_name, ptr, 587 vlen - 1) == 0) && ptr[vlen] == '/') 588 return (&vtab[i]); 589 } 590 591 } 592 return (NULL); 593 } 594 595 /* 596 * sets a directory's vnodeops if the directory is in the vtab; 597 */ 598 static struct vnodeops * 599 sdev_get_vop(struct sdev_node *dv) 600 { 601 struct sdev_vop_table *vtp; 602 char *path; 603 604 path = dv->sdev_path; 605 ASSERT(path); 606 607 /* gets the relative path to /dev/ */ 608 path += 5; 609 610 /* gets the vtab entry it matches */ 611 if ((vtp = sdev_match(dv)) != NULL) { 612 dv->sdev_flags |= vtp->vt_flags; 613 614 if (vtp->vt_vops) { 615 if (vtp->vt_global_vops) 616 *(vtp->vt_global_vops) = vtp->vt_vops; 617 return (vtp->vt_vops); 618 } 619 620 if (vtp->vt_service) { 621 fs_operation_def_t *templ; 622 templ = sdev_merge_vtab(vtp->vt_service); 623 if (vn_make_ops(vtp->vt_name, 624 (const fs_operation_def_t *)templ, 625 &vtp->vt_vops) != 0) { 626 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 627 vtp->vt_name); 628 /*NOTREACHED*/ 629 } 630 if (vtp->vt_global_vops) { 631 *(vtp->vt_global_vops) = vtp->vt_vops; 632 } 633 sdev_free_vtab(templ); 634 return (vtp->vt_vops); 635 } 636 return (sdev_vnodeops); 637 } 638 639 /* child inherits the persistence of the parent */ 640 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 641 dv->sdev_flags |= SDEV_PERSIST; 642 643 return (sdev_vnodeops); 644 } 645 646 static void 647 sdev_set_no_negcache(struct sdev_node *dv) 648 { 649 int i; 650 char *path; 651 652 ASSERT(dv->sdev_path); 653 path = dv->sdev_path + strlen("/dev/"); 654 655 for (i = 0; vtab[i].vt_name; i++) { 656 if (strcmp(vtab[i].vt_name, path) == 0) { 657 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 658 dv->sdev_flags |= SDEV_NO_NCACHE; 659 break; 660 } 661 } 662 } 663 664 void * 665 sdev_get_vtor(struct sdev_node *dv) 666 { 667 struct sdev_vop_table *vtp; 668 669 vtp = sdev_match(dv); 670 if (vtp) 671 return ((void *)vtp->vt_vtor); 672 else 673 return (NULL); 674 } 675 676 /* 677 * Build the base root inode 678 */ 679 ino_t 680 sdev_mkino(struct sdev_node *dv) 681 { 682 ino_t ino; 683 684 /* 685 * for now, follow the lead of tmpfs here 686 * need to someday understand the requirements here 687 */ 688 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 689 ino += SDEV_ROOTINO + 1; 690 691 return (ino); 692 } 693 694 int 695 sdev_getlink(struct vnode *linkvp, char **link) 696 { 697 int err; 698 char *buf; 699 struct uio uio = {0}; 700 struct iovec iov = {0}; 701 702 if (linkvp == NULL) 703 return (ENOENT); 704 ASSERT(linkvp->v_type == VLNK); 705 706 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 707 iov.iov_base = buf; 708 iov.iov_len = MAXPATHLEN; 709 uio.uio_iov = &iov; 710 uio.uio_iovcnt = 1; 711 uio.uio_resid = MAXPATHLEN; 712 uio.uio_segflg = UIO_SYSSPACE; 713 uio.uio_llimit = MAXOFFSET_T; 714 715 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 716 if (err) { 717 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 718 kmem_free(buf, MAXPATHLEN); 719 return (ENOENT); 720 } 721 722 /* mission complete */ 723 *link = i_ddi_strdup(buf, KM_SLEEP); 724 kmem_free(buf, MAXPATHLEN); 725 return (0); 726 } 727 728 /* 729 * A convenient wrapper to get the devfs node vnode for a device 730 * minor functionality: readlink() of a /dev symlink 731 * Place the link into dv->sdev_symlink 732 */ 733 static int 734 sdev_follow_link(struct sdev_node *dv) 735 { 736 int err; 737 struct vnode *linkvp; 738 char *link = NULL; 739 740 linkvp = SDEVTOV(dv); 741 if (linkvp == NULL) 742 return (ENOENT); 743 ASSERT(linkvp->v_type == VLNK); 744 err = sdev_getlink(linkvp, &link); 745 if (err) { 746 (void) sdev_nodezombied(dv); 747 dv->sdev_symlink = NULL; 748 return (ENOENT); 749 } 750 751 ASSERT(link != NULL); 752 dv->sdev_symlink = link; 753 return (0); 754 } 755 756 static int 757 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 758 { 759 vtype_t otype = SDEVTOV(dv)->v_type; 760 761 /* 762 * existing sdev_node has a different type. 763 */ 764 if (otype != nvap->va_type) { 765 sdcmn_err9(("sdev_node_check: existing node " 766 " %s type %d does not match new node type %d\n", 767 dv->sdev_name, otype, nvap->va_type)); 768 return (EEXIST); 769 } 770 771 /* 772 * For a symlink, the target should be the same. 773 */ 774 if (otype == VLNK) { 775 ASSERT(nargs != NULL); 776 ASSERT(dv->sdev_symlink != NULL); 777 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 778 sdcmn_err9(("sdev_node_check: existing node " 779 " %s has different symlink %s as new node " 780 " %s\n", dv->sdev_name, dv->sdev_symlink, 781 (char *)nargs)); 782 return (EEXIST); 783 } 784 } 785 786 return (0); 787 } 788 789 /* 790 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 791 * 792 * arguments: 793 * - ddv (parent) 794 * - nm (child name) 795 * - newdv (sdev_node for nm is returned here) 796 * - vap (vattr for the node to be created, va_type should be set. 797 * - avp (attribute vnode) 798 * the defaults should be used if unknown) 799 * - cred 800 * - args 801 * . tnm (for VLNK) 802 * . global sdev_node (for !SDEV_GLOBAL) 803 * - state: SDEV_INIT, SDEV_READY 804 * 805 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 806 * 807 * NOTE: directory contents writers lock needs to be held before 808 * calling this routine. 809 */ 810 int 811 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 812 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 813 sdev_node_state_t state) 814 { 815 int error = 0; 816 sdev_node_state_t node_state; 817 struct sdev_node *dv = NULL; 818 819 ASSERT(state != SDEV_ZOMBIE); 820 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 821 822 if (*newdv) { 823 dv = *newdv; 824 } else { 825 /* allocate and initialize a sdev_node */ 826 if (ddv->sdev_state == SDEV_ZOMBIE) { 827 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 828 ddv->sdev_path)); 829 return (ENOENT); 830 } 831 832 error = sdev_nodeinit(ddv, nm, &dv, vap); 833 if (error != 0) { 834 sdcmn_err9(("sdev_mknode: error %d," 835 " name %s can not be initialized\n", 836 error, nm)); 837 return (error); 838 } 839 ASSERT(dv); 840 841 /* insert into the directory cache */ 842 error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 843 if (error) { 844 sdcmn_err9(("sdev_mknode: node %s can not" 845 " be added into directory cache\n", nm)); 846 return (ENOENT); 847 } 848 } 849 850 ASSERT(dv); 851 node_state = dv->sdev_state; 852 ASSERT(node_state != SDEV_ZOMBIE); 853 854 if (state == SDEV_READY) { 855 switch (node_state) { 856 case SDEV_INIT: 857 error = sdev_nodeready(dv, vap, avp, args, cred); 858 if (error) { 859 sdcmn_err9(("sdev_mknode: node %s can NOT" 860 " be transitioned into READY state, " 861 "error %d\n", nm, error)); 862 } 863 break; 864 case SDEV_READY: 865 /* 866 * Do some sanity checking to make sure 867 * the existing sdev_node is what has been 868 * asked for. 869 */ 870 error = sdev_node_check(dv, vap, args); 871 break; 872 default: 873 break; 874 } 875 } 876 877 if (!error) { 878 *newdv = dv; 879 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 880 } else { 881 SDEV_SIMPLE_RELE(dv); 882 *newdv = NULL; 883 } 884 885 return (error); 886 } 887 888 /* 889 * convenient wrapper to change vp's ATIME, CTIME and MTIME 890 */ 891 void 892 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 893 { 894 struct vattr attr; 895 timestruc_t now; 896 int err; 897 898 ASSERT(vp); 899 gethrestime(&now); 900 if (mask & AT_CTIME) 901 attr.va_ctime = now; 902 if (mask & AT_MTIME) 903 attr.va_mtime = now; 904 if (mask & AT_ATIME) 905 attr.va_atime = now; 906 907 attr.va_mask = (mask & AT_TIMES); 908 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 909 if (err && (err != EROFS)) { 910 sdcmn_err(("update timestamps error %d\n", err)); 911 } 912 } 913 914 /* 915 * the backing store vnode is released here 916 */ 917 /*ARGSUSED1*/ 918 void 919 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 920 { 921 /* no references */ 922 ASSERT(dv->sdev_nlink == 0); 923 924 if (dv->sdev_attrvp != NULLVP) { 925 VN_RELE(dv->sdev_attrvp); 926 /* 927 * reset the attrvp so that no more 928 * references can be made on this already 929 * vn_rele() vnode 930 */ 931 dv->sdev_attrvp = NULLVP; 932 } 933 934 if (dv->sdev_attr != NULL) { 935 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 936 dv->sdev_attr = NULL; 937 } 938 939 if (dv->sdev_name != NULL) { 940 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 941 dv->sdev_name = NULL; 942 } 943 944 if (dv->sdev_symlink != NULL) { 945 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 946 dv->sdev_symlink = NULL; 947 } 948 949 if (dv->sdev_path) { 950 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 951 dv->sdev_path = NULL; 952 } 953 954 if (!SDEV_IS_GLOBAL(dv)) 955 sdev_prof_free(dv); 956 957 if (SDEVTOV(dv)->v_type == VDIR) { 958 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 959 avl_destroy(&dv->sdev_entries); 960 } 961 962 mutex_destroy(&dv->sdev_lookup_lock); 963 cv_destroy(&dv->sdev_lookup_cv); 964 965 /* return node to initial state as per constructor */ 966 (void) memset((void *)&dv->sdev_instance_data, 0, 967 sizeof (dv->sdev_instance_data)); 968 vn_invalid(SDEVTOV(dv)); 969 kmem_cache_free(sdev_node_cache, dv); 970 } 971 972 /* 973 * DIRECTORY CACHE lookup 974 */ 975 struct sdev_node * 976 sdev_findbyname(struct sdev_node *ddv, char *nm) 977 { 978 struct sdev_node *dv; 979 struct sdev_node dvtmp; 980 avl_index_t where; 981 982 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 983 984 dvtmp.sdev_name = nm; 985 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 986 if (dv) { 987 ASSERT(dv->sdev_dotdot == ddv); 988 ASSERT(strcmp(dv->sdev_name, nm) == 0); 989 SDEV_HOLD(dv); 990 return (dv); 991 } 992 return (NULL); 993 } 994 995 /* 996 * Inserts a new sdev_node in a parent directory 997 */ 998 void 999 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 1000 { 1001 avl_index_t where; 1002 1003 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1004 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 1005 ASSERT(ddv->sdev_nlink >= 2); 1006 ASSERT(dv->sdev_nlink == 0); 1007 1008 dv->sdev_dotdot = ddv; 1009 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 1010 avl_insert(&ddv->sdev_entries, dv, where); 1011 ddv->sdev_nlink++; 1012 } 1013 1014 /* 1015 * The following check is needed because while sdev_nodes are linked 1016 * in SDEV_INIT state, they have their link counts incremented only 1017 * in SDEV_READY state. 1018 */ 1019 static void 1020 decr_link(struct sdev_node *dv) 1021 { 1022 if (dv->sdev_state != SDEV_INIT) 1023 dv->sdev_nlink--; 1024 else 1025 ASSERT(dv->sdev_nlink == 0); 1026 } 1027 1028 /* 1029 * Delete an existing dv from directory cache 1030 * 1031 * In the case of a node is still held by non-zero reference count, 1032 * the node is put into ZOMBIE state. Once the reference count 1033 * reaches "0", the node is unlinked and destroyed, 1034 * in sdev_inactive(). 1035 */ 1036 static int 1037 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 1038 { 1039 struct vnode *vp; 1040 1041 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1042 1043 vp = SDEVTOV(dv); 1044 mutex_enter(&vp->v_lock); 1045 1046 /* dv is held still */ 1047 if (vp->v_count > 1) { 1048 rw_enter(&dv->sdev_contents, RW_WRITER); 1049 if (dv->sdev_state == SDEV_READY) { 1050 sdcmn_err9(( 1051 "sdev_dirdelete: node %s busy with count %d\n", 1052 dv->sdev_name, vp->v_count)); 1053 dv->sdev_state = SDEV_ZOMBIE; 1054 } 1055 rw_exit(&dv->sdev_contents); 1056 --vp->v_count; 1057 mutex_exit(&vp->v_lock); 1058 return (EBUSY); 1059 } 1060 ASSERT(vp->v_count == 1); 1061 1062 /* unlink from the memory cache */ 1063 ddv->sdev_nlink--; /* .. to above */ 1064 if (vp->v_type == VDIR) { 1065 decr_link(dv); /* . to self */ 1066 } 1067 1068 avl_remove(&ddv->sdev_entries, dv); 1069 decr_link(dv); /* name, back to zero */ 1070 vp->v_count--; 1071 mutex_exit(&vp->v_lock); 1072 1073 /* destroy the node */ 1074 sdev_nodedestroy(dv, 0); 1075 return (0); 1076 } 1077 1078 /* 1079 * check if the source is in the path of the target 1080 * 1081 * source and target are different 1082 */ 1083 /*ARGSUSED2*/ 1084 static int 1085 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1086 { 1087 int error = 0; 1088 struct sdev_node *dotdot, *dir; 1089 1090 dotdot = tdv->sdev_dotdot; 1091 ASSERT(dotdot); 1092 1093 /* fs root */ 1094 if (dotdot == tdv) { 1095 return (0); 1096 } 1097 1098 for (;;) { 1099 /* 1100 * avoid error cases like 1101 * mv a a/b 1102 * mv a a/b/c 1103 * etc. 1104 */ 1105 if (dotdot == sdv) { 1106 error = EINVAL; 1107 break; 1108 } 1109 1110 dir = dotdot; 1111 dotdot = dir->sdev_dotdot; 1112 1113 /* done checking because root is reached */ 1114 if (dir == dotdot) { 1115 break; 1116 } 1117 } 1118 return (error); 1119 } 1120 1121 int 1122 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1123 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1124 struct cred *cred) 1125 { 1126 int error = 0; 1127 struct vnode *ovp = SDEVTOV(odv); 1128 struct vnode *nvp; 1129 struct vattr vattr; 1130 int doingdir = (ovp->v_type == VDIR); 1131 char *link = NULL; 1132 int samedir = (oddv == nddv) ? 1 : 0; 1133 int bkstore = 0; 1134 struct sdev_node *idv = NULL; 1135 struct sdev_node *ndv = NULL; 1136 timestruc_t now; 1137 1138 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1139 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1140 if (error) 1141 return (error); 1142 1143 if (!samedir) 1144 rw_enter(&oddv->sdev_contents, RW_WRITER); 1145 rw_enter(&nddv->sdev_contents, RW_WRITER); 1146 1147 /* 1148 * the source may have been deleted by another thread before 1149 * we gets here. 1150 */ 1151 if (odv->sdev_state != SDEV_READY) { 1152 error = ENOENT; 1153 goto err_out; 1154 } 1155 1156 if (doingdir && (odv == nddv)) { 1157 error = EINVAL; 1158 goto err_out; 1159 } 1160 1161 /* 1162 * If renaming a directory, and the parents are different (".." must be 1163 * changed) then the source dir must not be in the dir hierarchy above 1164 * the target since it would orphan everything below the source dir. 1165 */ 1166 if (doingdir && (oddv != nddv)) { 1167 error = sdev_checkpath(odv, nddv, cred); 1168 if (error) 1169 goto err_out; 1170 } 1171 1172 /* destination existing */ 1173 if (*ndvp) { 1174 nvp = SDEVTOV(*ndvp); 1175 ASSERT(nvp); 1176 1177 /* handling renaming to itself */ 1178 if (odv == *ndvp) { 1179 error = 0; 1180 goto err_out; 1181 } 1182 1183 if (nvp->v_type == VDIR) { 1184 if (!doingdir) { 1185 error = EISDIR; 1186 goto err_out; 1187 } 1188 1189 if (vn_vfswlock(nvp)) { 1190 error = EBUSY; 1191 goto err_out; 1192 } 1193 1194 if (vn_mountedvfs(nvp) != NULL) { 1195 vn_vfsunlock(nvp); 1196 error = EBUSY; 1197 goto err_out; 1198 } 1199 1200 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1201 if ((*ndvp)->sdev_nlink > 2) { 1202 vn_vfsunlock(nvp); 1203 error = EEXIST; 1204 goto err_out; 1205 } 1206 vn_vfsunlock(nvp); 1207 1208 (void) sdev_dirdelete(nddv, *ndvp); 1209 *ndvp = NULL; 1210 ASSERT(nddv->sdev_attrvp); 1211 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1212 nddv->sdev_attrvp, cred, NULL, 0); 1213 if (error) 1214 goto err_out; 1215 } else { 1216 if (doingdir) { 1217 error = ENOTDIR; 1218 goto err_out; 1219 } 1220 1221 if (SDEV_IS_PERSIST((*ndvp))) { 1222 bkstore = 1; 1223 } 1224 1225 /* 1226 * get rid of the node from the directory cache 1227 * note, in case EBUSY is returned, the ZOMBIE 1228 * node is taken care in sdev_mknode. 1229 */ 1230 (void) sdev_dirdelete(nddv, *ndvp); 1231 *ndvp = NULL; 1232 if (bkstore) { 1233 ASSERT(nddv->sdev_attrvp); 1234 error = VOP_REMOVE(nddv->sdev_attrvp, 1235 nnm, cred, NULL, 0); 1236 if (error) 1237 goto err_out; 1238 } 1239 } 1240 } 1241 1242 /* fix the source for a symlink */ 1243 if (vattr.va_type == VLNK) { 1244 if (odv->sdev_symlink == NULL) { 1245 error = sdev_follow_link(odv); 1246 if (error) { 1247 error = ENOENT; 1248 goto err_out; 1249 } 1250 } 1251 ASSERT(odv->sdev_symlink); 1252 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1253 } 1254 1255 /* 1256 * make a fresh node from the source attrs 1257 */ 1258 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1259 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1260 NULL, (void *)link, cred, SDEV_READY); 1261 1262 if (link) 1263 kmem_free(link, strlen(link) + 1); 1264 1265 if (error) 1266 goto err_out; 1267 ASSERT(*ndvp); 1268 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1269 1270 /* move dir contents */ 1271 if (doingdir) { 1272 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1273 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1274 error = sdev_rnmnode(odv, idv, 1275 (struct sdev_node *)(*ndvp), &ndv, 1276 idv->sdev_name, cred); 1277 if (error) 1278 goto err_out; 1279 ndv = NULL; 1280 } 1281 } 1282 1283 if ((*ndvp)->sdev_attrvp) { 1284 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1285 AT_CTIME|AT_ATIME); 1286 } else { 1287 ASSERT((*ndvp)->sdev_attr); 1288 gethrestime(&now); 1289 (*ndvp)->sdev_attr->va_ctime = now; 1290 (*ndvp)->sdev_attr->va_atime = now; 1291 } 1292 1293 if (nddv->sdev_attrvp) { 1294 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1295 AT_MTIME|AT_ATIME); 1296 } else { 1297 ASSERT(nddv->sdev_attr); 1298 gethrestime(&now); 1299 nddv->sdev_attr->va_mtime = now; 1300 nddv->sdev_attr->va_atime = now; 1301 } 1302 rw_exit(&nddv->sdev_contents); 1303 if (!samedir) 1304 rw_exit(&oddv->sdev_contents); 1305 1306 SDEV_RELE(*ndvp); 1307 return (error); 1308 1309 err_out: 1310 rw_exit(&nddv->sdev_contents); 1311 if (!samedir) 1312 rw_exit(&oddv->sdev_contents); 1313 return (error); 1314 } 1315 1316 /* 1317 * Merge sdev_node specific information into an attribute structure. 1318 * 1319 * note: sdev_node is not locked here 1320 */ 1321 void 1322 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1323 { 1324 struct vnode *vp = SDEVTOV(dv); 1325 1326 vap->va_nlink = dv->sdev_nlink; 1327 vap->va_nodeid = dv->sdev_ino; 1328 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1329 vap->va_type = vp->v_type; 1330 1331 if (vp->v_type == VDIR) { 1332 vap->va_rdev = 0; 1333 vap->va_fsid = vp->v_rdev; 1334 } else if (vp->v_type == VLNK) { 1335 vap->va_rdev = 0; 1336 vap->va_mode &= ~S_IFMT; 1337 vap->va_mode |= S_IFLNK; 1338 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1339 vap->va_rdev = vp->v_rdev; 1340 vap->va_mode &= ~S_IFMT; 1341 if (vap->va_type == VCHR) 1342 vap->va_mode |= S_IFCHR; 1343 else 1344 vap->va_mode |= S_IFBLK; 1345 } else { 1346 vap->va_rdev = 0; 1347 } 1348 } 1349 1350 struct vattr * 1351 sdev_getdefault_attr(enum vtype type) 1352 { 1353 if (type == VDIR) 1354 return (&sdev_vattr_dir); 1355 else if (type == VCHR) 1356 return (&sdev_vattr_chr); 1357 else if (type == VBLK) 1358 return (&sdev_vattr_blk); 1359 else if (type == VLNK) 1360 return (&sdev_vattr_lnk); 1361 else 1362 return (NULL); 1363 } 1364 int 1365 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1366 { 1367 int rv = 0; 1368 struct vnode *vp = SDEVTOV(dv); 1369 1370 switch (vp->v_type) { 1371 case VCHR: 1372 case VBLK: 1373 /* 1374 * If vnode is a device, return special vnode instead 1375 * (though it knows all about -us- via sp->s_realvp) 1376 */ 1377 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1378 VN_RELE(vp); 1379 if (*vpp == NULLVP) 1380 rv = ENOSYS; 1381 break; 1382 default: /* most types are returned as is */ 1383 *vpp = vp; 1384 break; 1385 } 1386 return (rv); 1387 } 1388 1389 /* 1390 * junction between devname and root file system, e.g. ufs 1391 */ 1392 int 1393 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1394 { 1395 struct vnode *rdvp = ddv->sdev_attrvp; 1396 int rval = 0; 1397 1398 ASSERT(rdvp); 1399 1400 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1401 NULL); 1402 return (rval); 1403 } 1404 1405 static int 1406 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1407 { 1408 struct sdev_node *dv = NULL; 1409 char *nm; 1410 struct vnode *dirvp; 1411 int error; 1412 vnode_t *vp; 1413 int eof; 1414 struct iovec iov; 1415 struct uio uio; 1416 struct dirent64 *dp; 1417 dirent64_t *dbuf; 1418 size_t dbuflen; 1419 struct vattr vattr; 1420 char *link = NULL; 1421 1422 if (ddv->sdev_attrvp == NULL) 1423 return (0); 1424 if (!(ddv->sdev_flags & SDEV_BUILD)) 1425 return (0); 1426 1427 dirvp = ddv->sdev_attrvp; 1428 VN_HOLD(dirvp); 1429 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1430 1431 uio.uio_iov = &iov; 1432 uio.uio_iovcnt = 1; 1433 uio.uio_segflg = UIO_SYSSPACE; 1434 uio.uio_fmode = 0; 1435 uio.uio_extflg = UIO_COPY_CACHED; 1436 uio.uio_loffset = 0; 1437 uio.uio_llimit = MAXOFFSET_T; 1438 1439 eof = 0; 1440 error = 0; 1441 while (!error && !eof) { 1442 uio.uio_resid = dlen; 1443 iov.iov_base = (char *)dbuf; 1444 iov.iov_len = dlen; 1445 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1446 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1447 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1448 1449 dbuflen = dlen - uio.uio_resid; 1450 if (error || dbuflen == 0) 1451 break; 1452 1453 if (!(ddv->sdev_flags & SDEV_BUILD)) 1454 break; 1455 1456 for (dp = dbuf; ((intptr_t)dp < 1457 (intptr_t)dbuf + dbuflen); 1458 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1459 nm = dp->d_name; 1460 1461 if (strcmp(nm, ".") == 0 || 1462 strcmp(nm, "..") == 0) 1463 continue; 1464 1465 vp = NULLVP; 1466 dv = sdev_cache_lookup(ddv, nm); 1467 if (dv) { 1468 if (dv->sdev_state != SDEV_ZOMBIE) { 1469 SDEV_SIMPLE_RELE(dv); 1470 } else { 1471 /* 1472 * A ZOMBIE node may not have been 1473 * cleaned up from the backing store, 1474 * bypass this entry in this case, 1475 * and clean it up from the directory 1476 * cache if this is the last call. 1477 */ 1478 (void) sdev_dirdelete(ddv, dv); 1479 } 1480 continue; 1481 } 1482 1483 /* refill the cache if not already */ 1484 error = devname_backstore_lookup(ddv, nm, &vp); 1485 if (error) 1486 continue; 1487 1488 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1489 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1490 if (error) 1491 continue; 1492 1493 if (vattr.va_type == VLNK) { 1494 error = sdev_getlink(vp, &link); 1495 if (error) { 1496 continue; 1497 } 1498 ASSERT(link != NULL); 1499 } 1500 1501 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1502 rw_exit(&ddv->sdev_contents); 1503 rw_enter(&ddv->sdev_contents, RW_WRITER); 1504 } 1505 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1506 cred, SDEV_READY); 1507 rw_downgrade(&ddv->sdev_contents); 1508 1509 if (link != NULL) { 1510 kmem_free(link, strlen(link) + 1); 1511 link = NULL; 1512 } 1513 1514 if (!error) { 1515 ASSERT(dv); 1516 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1517 SDEV_SIMPLE_RELE(dv); 1518 } 1519 vp = NULL; 1520 dv = NULL; 1521 } 1522 } 1523 1524 done: 1525 VN_RELE(dirvp); 1526 kmem_free(dbuf, dlen); 1527 1528 return (error); 1529 } 1530 1531 void 1532 sdev_filldir_dynamic(struct sdev_node *ddv) 1533 { 1534 int error; 1535 int i; 1536 struct vattr vattr; 1537 struct vattr *vap = &vattr; 1538 char *nm = NULL; 1539 struct sdev_node *dv = NULL; 1540 1541 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1542 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1543 1544 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1545 gethrestime(&vap->va_atime); 1546 vap->va_mtime = vap->va_atime; 1547 vap->va_ctime = vap->va_atime; 1548 for (i = 0; vtab[i].vt_name != NULL; i++) { 1549 /* 1550 * This early, we may be in a read-only /dev 1551 * environment: leave the creation of any nodes we'd 1552 * attempt to persist to devfsadm. 1553 */ 1554 if (vtab[i].vt_flags & SDEV_PERSIST) 1555 continue; 1556 nm = vtab[i].vt_name; 1557 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1558 dv = NULL; 1559 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1560 NULL, kcred, SDEV_READY); 1561 if (error) { 1562 cmn_err(CE_WARN, "%s/%s: error %d\n", 1563 ddv->sdev_name, nm, error); 1564 } else { 1565 ASSERT(dv); 1566 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1567 SDEV_SIMPLE_RELE(dv); 1568 } 1569 } 1570 } 1571 1572 /* 1573 * Creating a backing store entry based on sdev_attr. 1574 * This is called either as part of node creation in a persistent directory 1575 * or from setattr/setsecattr to persist access attributes across reboot. 1576 */ 1577 int 1578 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1579 { 1580 int error = 0; 1581 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1582 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1583 struct vattr *vap = dv->sdev_attr; 1584 char *nm = dv->sdev_name; 1585 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1586 1587 ASSERT(dv && dv->sdev_name && rdvp); 1588 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1589 1590 lookup: 1591 /* try to find it in the backing store */ 1592 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1593 NULL); 1594 if (error == 0) { 1595 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1596 VN_HOLD(rrvp); 1597 VN_RELE(*rvp); 1598 *rvp = rrvp; 1599 } 1600 1601 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1602 dv->sdev_attr = NULL; 1603 dv->sdev_attrvp = *rvp; 1604 return (0); 1605 } 1606 1607 /* let's try to persist the node */ 1608 gethrestime(&vap->va_atime); 1609 vap->va_mtime = vap->va_atime; 1610 vap->va_ctime = vap->va_atime; 1611 vap->va_mask |= AT_TYPE|AT_MODE; 1612 switch (vap->va_type) { 1613 case VDIR: 1614 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1615 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1616 (void *)(*rvp), error)); 1617 break; 1618 case VCHR: 1619 case VBLK: 1620 case VREG: 1621 case VDOOR: 1622 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1623 rvp, cred, 0, NULL, NULL); 1624 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1625 (void *)(*rvp), error)); 1626 if (!error) 1627 VN_RELE(*rvp); 1628 break; 1629 case VLNK: 1630 ASSERT(dv->sdev_symlink); 1631 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1632 NULL, 0); 1633 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1634 error)); 1635 break; 1636 default: 1637 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1638 "create\n", nm); 1639 /*NOTREACHED*/ 1640 } 1641 1642 /* go back to lookup to factor out spec node and set attrvp */ 1643 if (error == 0) 1644 goto lookup; 1645 1646 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1647 return (error); 1648 } 1649 1650 static int 1651 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1652 { 1653 int error = 0; 1654 struct sdev_node *dup = NULL; 1655 1656 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1657 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1658 sdev_direnter(ddv, *dv); 1659 } else { 1660 if (dup->sdev_state == SDEV_ZOMBIE) { 1661 error = sdev_dirdelete(ddv, dup); 1662 /* 1663 * The ZOMBIE node is still hanging 1664 * around with more than one reference counts. 1665 * Fail the new node creation so that 1666 * the directory cache won't have 1667 * duplicate entries for the same named node 1668 */ 1669 if (error == EBUSY) { 1670 SDEV_SIMPLE_RELE(*dv); 1671 sdev_nodedestroy(*dv, 0); 1672 *dv = NULL; 1673 return (error); 1674 } 1675 sdev_direnter(ddv, *dv); 1676 } else { 1677 ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); 1678 SDEV_SIMPLE_RELE(*dv); 1679 sdev_nodedestroy(*dv, 0); 1680 *dv = dup; 1681 } 1682 } 1683 1684 return (0); 1685 } 1686 1687 static int 1688 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1689 { 1690 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1691 return (sdev_dirdelete(ddv, *dv)); 1692 } 1693 1694 /* 1695 * update the in-core directory cache 1696 */ 1697 int 1698 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1699 sdev_cache_ops_t ops) 1700 { 1701 int error = 0; 1702 1703 ASSERT((SDEV_HELD(*dv))); 1704 1705 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1706 switch (ops) { 1707 case SDEV_CACHE_ADD: 1708 error = sdev_cache_add(ddv, dv, nm); 1709 break; 1710 case SDEV_CACHE_DELETE: 1711 error = sdev_cache_delete(ddv, dv); 1712 break; 1713 default: 1714 break; 1715 } 1716 1717 return (error); 1718 } 1719 1720 /* 1721 * retrieve the named entry from the directory cache 1722 */ 1723 struct sdev_node * 1724 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1725 { 1726 struct sdev_node *dv = NULL; 1727 1728 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1729 dv = sdev_findbyname(ddv, nm); 1730 1731 return (dv); 1732 } 1733 1734 /* 1735 * Implicit reconfig for nodes constructed by a link generator 1736 * Start devfsadm if needed, or if devfsadm is in progress, 1737 * prepare to block on devfsadm either completing or 1738 * constructing the desired node. As devfsadmd is global 1739 * in scope, constructing all necessary nodes, we only 1740 * need to initiate it once. 1741 */ 1742 static int 1743 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1744 { 1745 int error = 0; 1746 1747 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1748 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1749 ddv->sdev_name, nm, devfsadm_state)); 1750 mutex_enter(&dv->sdev_lookup_lock); 1751 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1752 mutex_exit(&dv->sdev_lookup_lock); 1753 error = 0; 1754 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1755 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1756 ddv->sdev_name, nm, devfsadm_state)); 1757 1758 sdev_devfsadmd_thread(ddv, dv, kcred); 1759 mutex_enter(&dv->sdev_lookup_lock); 1760 SDEV_BLOCK_OTHERS(dv, 1761 (SDEV_LOOKUP | SDEV_LGWAITING)); 1762 mutex_exit(&dv->sdev_lookup_lock); 1763 error = 0; 1764 } else { 1765 error = -1; 1766 } 1767 1768 return (error); 1769 } 1770 1771 /* 1772 * Support for specialized device naming construction mechanisms 1773 */ 1774 static int 1775 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1776 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1777 void *, char *), int flags, struct cred *cred) 1778 { 1779 int rv = 0; 1780 char *physpath = NULL; 1781 struct vattr vattr; 1782 struct vattr *vap = &vattr; 1783 struct sdev_node *dv = NULL; 1784 1785 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1786 if (flags & SDEV_VLINK) { 1787 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1788 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1789 NULL); 1790 if (rv) { 1791 kmem_free(physpath, MAXPATHLEN); 1792 return (-1); 1793 } 1794 1795 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1796 vap->va_size = strlen(physpath); 1797 gethrestime(&vap->va_atime); 1798 vap->va_mtime = vap->va_atime; 1799 vap->va_ctime = vap->va_atime; 1800 1801 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1802 (void *)physpath, cred, SDEV_READY); 1803 kmem_free(physpath, MAXPATHLEN); 1804 if (rv) 1805 return (rv); 1806 } else if (flags & SDEV_VATTR) { 1807 /* 1808 * /dev/pts 1809 * 1810 * callback is responsible to set the basic attributes, 1811 * e.g. va_type/va_uid/va_gid/ 1812 * dev_t if VCHR or VBLK/ 1813 */ 1814 ASSERT(callback); 1815 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1816 if (rv) { 1817 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1818 "callback failed \n")); 1819 return (-1); 1820 } 1821 1822 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1823 cred, SDEV_READY); 1824 1825 if (rv) 1826 return (rv); 1827 1828 } else { 1829 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1830 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1831 __LINE__)); 1832 rv = -1; 1833 } 1834 1835 *dvp = dv; 1836 return (rv); 1837 } 1838 1839 static int 1840 is_devfsadm_thread(char *exec_name) 1841 { 1842 /* 1843 * note: because devfsadmd -> /usr/sbin/devfsadm 1844 * it is safe to use "devfsadm" to capture the lookups 1845 * from devfsadm and its daemon version. 1846 */ 1847 if (strcmp(exec_name, "devfsadm") == 0) 1848 return (1); 1849 return (0); 1850 } 1851 1852 /* 1853 * Lookup Order: 1854 * sdev_node cache; 1855 * backing store (SDEV_PERSIST); 1856 * DBNR: a. dir_ops implemented in the loadable modules; 1857 * b. vnode ops in vtab. 1858 */ 1859 int 1860 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1861 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1862 struct cred *, void *, char *), int flags) 1863 { 1864 int rv = 0, nmlen; 1865 struct vnode *rvp = NULL; 1866 struct sdev_node *dv = NULL; 1867 int retried = 0; 1868 int error = 0; 1869 struct vattr vattr; 1870 char *lookup_thread = curproc->p_user.u_comm; 1871 int failed_flags = 0; 1872 int (*vtor)(struct sdev_node *) = NULL; 1873 int state; 1874 int parent_state; 1875 char *link = NULL; 1876 1877 if (SDEVTOV(ddv)->v_type != VDIR) 1878 return (ENOTDIR); 1879 1880 /* 1881 * Empty name or ., return node itself. 1882 */ 1883 nmlen = strlen(nm); 1884 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1885 *vpp = SDEVTOV(ddv); 1886 VN_HOLD(*vpp); 1887 return (0); 1888 } 1889 1890 /* 1891 * .., return the parent directory 1892 */ 1893 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1894 *vpp = SDEVTOV(ddv->sdev_dotdot); 1895 VN_HOLD(*vpp); 1896 return (0); 1897 } 1898 1899 rw_enter(&ddv->sdev_contents, RW_READER); 1900 if (ddv->sdev_flags & SDEV_VTOR) { 1901 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1902 ASSERT(vtor); 1903 } 1904 1905 tryagain: 1906 /* 1907 * (a) directory cache lookup: 1908 */ 1909 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1910 parent_state = ddv->sdev_state; 1911 dv = sdev_cache_lookup(ddv, nm); 1912 if (dv) { 1913 state = dv->sdev_state; 1914 switch (state) { 1915 case SDEV_INIT: 1916 if (is_devfsadm_thread(lookup_thread)) 1917 break; 1918 1919 /* ZOMBIED parent won't allow node creation */ 1920 if (parent_state == SDEV_ZOMBIE) { 1921 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1922 retried); 1923 goto nolock_notfound; 1924 } 1925 1926 mutex_enter(&dv->sdev_lookup_lock); 1927 /* compensate the threads started after devfsadm */ 1928 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1929 !(SDEV_IS_LOOKUP(dv))) 1930 SDEV_BLOCK_OTHERS(dv, 1931 (SDEV_LOOKUP | SDEV_LGWAITING)); 1932 1933 if (SDEV_IS_LOOKUP(dv)) { 1934 failed_flags |= SLF_REBUILT; 1935 rw_exit(&ddv->sdev_contents); 1936 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1937 mutex_exit(&dv->sdev_lookup_lock); 1938 rw_enter(&ddv->sdev_contents, RW_READER); 1939 1940 if (error != 0) { 1941 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1942 retried); 1943 goto nolock_notfound; 1944 } 1945 1946 state = dv->sdev_state; 1947 if (state == SDEV_INIT) { 1948 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1949 retried); 1950 goto nolock_notfound; 1951 } else if (state == SDEV_READY) { 1952 goto found; 1953 } else if (state == SDEV_ZOMBIE) { 1954 rw_exit(&ddv->sdev_contents); 1955 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1956 retried); 1957 SDEV_RELE(dv); 1958 goto lookup_failed; 1959 } 1960 } else { 1961 mutex_exit(&dv->sdev_lookup_lock); 1962 } 1963 break; 1964 case SDEV_READY: 1965 goto found; 1966 case SDEV_ZOMBIE: 1967 rw_exit(&ddv->sdev_contents); 1968 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1969 SDEV_RELE(dv); 1970 goto lookup_failed; 1971 default: 1972 rw_exit(&ddv->sdev_contents); 1973 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1974 sdev_lookup_failed(ddv, nm, failed_flags); 1975 *vpp = NULLVP; 1976 return (ENOENT); 1977 } 1978 } 1979 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1980 1981 /* 1982 * ZOMBIED parent does not allow new node creation. 1983 * bail out early 1984 */ 1985 if (parent_state == SDEV_ZOMBIE) { 1986 rw_exit(&ddv->sdev_contents); 1987 *vpp = NULLVP; 1988 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1989 return (ENOENT); 1990 } 1991 1992 /* 1993 * (b0): backing store lookup 1994 * SDEV_PERSIST is default except: 1995 * 1) pts nodes 1996 * 2) non-chmod'ed local nodes 1997 * 3) zvol nodes 1998 */ 1999 if (SDEV_IS_PERSIST(ddv)) { 2000 error = devname_backstore_lookup(ddv, nm, &rvp); 2001 2002 if (!error) { 2003 2004 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 2005 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 2006 if (error) { 2007 rw_exit(&ddv->sdev_contents); 2008 if (dv) 2009 SDEV_RELE(dv); 2010 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2011 sdev_lookup_failed(ddv, nm, failed_flags); 2012 *vpp = NULLVP; 2013 return (ENOENT); 2014 } 2015 2016 if (vattr.va_type == VLNK) { 2017 error = sdev_getlink(rvp, &link); 2018 if (error) { 2019 rw_exit(&ddv->sdev_contents); 2020 if (dv) 2021 SDEV_RELE(dv); 2022 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2023 retried); 2024 sdev_lookup_failed(ddv, nm, 2025 failed_flags); 2026 *vpp = NULLVP; 2027 return (ENOENT); 2028 } 2029 ASSERT(link != NULL); 2030 } 2031 2032 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2033 rw_exit(&ddv->sdev_contents); 2034 rw_enter(&ddv->sdev_contents, RW_WRITER); 2035 } 2036 error = sdev_mknode(ddv, nm, &dv, &vattr, 2037 rvp, link, cred, SDEV_READY); 2038 rw_downgrade(&ddv->sdev_contents); 2039 2040 if (link != NULL) { 2041 kmem_free(link, strlen(link) + 1); 2042 link = NULL; 2043 } 2044 2045 if (error) { 2046 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2047 rw_exit(&ddv->sdev_contents); 2048 if (dv) 2049 SDEV_RELE(dv); 2050 goto lookup_failed; 2051 } else { 2052 goto found; 2053 } 2054 } else if (retried) { 2055 rw_exit(&ddv->sdev_contents); 2056 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2057 ddv->sdev_name, nm)); 2058 if (dv) 2059 SDEV_RELE(dv); 2060 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2061 sdev_lookup_failed(ddv, nm, failed_flags); 2062 *vpp = NULLVP; 2063 return (ENOENT); 2064 } 2065 } 2066 2067 lookup_create_node: 2068 /* first thread that is doing the lookup on this node */ 2069 if (callback) { 2070 ASSERT(dv == NULL); 2071 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2072 rw_exit(&ddv->sdev_contents); 2073 rw_enter(&ddv->sdev_contents, RW_WRITER); 2074 } 2075 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2076 flags, cred); 2077 rw_downgrade(&ddv->sdev_contents); 2078 if (error == 0) { 2079 goto found; 2080 } else { 2081 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2082 rw_exit(&ddv->sdev_contents); 2083 goto lookup_failed; 2084 } 2085 } 2086 if (!dv) { 2087 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2088 rw_exit(&ddv->sdev_contents); 2089 rw_enter(&ddv->sdev_contents, RW_WRITER); 2090 } 2091 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2092 cred, SDEV_INIT); 2093 if (!dv) { 2094 rw_exit(&ddv->sdev_contents); 2095 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2096 sdev_lookup_failed(ddv, nm, failed_flags); 2097 *vpp = NULLVP; 2098 return (ENOENT); 2099 } 2100 rw_downgrade(&ddv->sdev_contents); 2101 } 2102 2103 /* 2104 * (b1) invoking devfsadm once per life time for devfsadm nodes 2105 */ 2106 ASSERT(SDEV_HELD(dv)); 2107 2108 if (SDEV_IS_NO_NCACHE(dv)) 2109 failed_flags |= SLF_NO_NCACHE; 2110 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2111 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2112 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2113 ASSERT(SDEV_HELD(dv)); 2114 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2115 goto nolock_notfound; 2116 } 2117 2118 /* 2119 * filter out known non-existent devices recorded 2120 * during initial reconfiguration boot for which 2121 * reconfig should not be done and lookup may 2122 * be short-circuited now. 2123 */ 2124 if (sdev_lookup_filter(ddv, nm)) { 2125 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2126 goto nolock_notfound; 2127 } 2128 2129 /* bypassing devfsadm internal nodes */ 2130 if (is_devfsadm_thread(lookup_thread)) { 2131 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2132 goto nolock_notfound; 2133 } 2134 2135 if (sdev_reconfig_disable) { 2136 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2137 goto nolock_notfound; 2138 } 2139 2140 error = sdev_call_devfsadmd(ddv, dv, nm); 2141 if (error == 0) { 2142 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2143 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2144 if (sdev_reconfig_verbose) { 2145 cmn_err(CE_CONT, 2146 "?lookup of %s/%s by %s: reconfig\n", 2147 ddv->sdev_name, nm, curproc->p_user.u_comm); 2148 } 2149 retried = 1; 2150 failed_flags |= SLF_REBUILT; 2151 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2152 SDEV_SIMPLE_RELE(dv); 2153 goto tryagain; 2154 } else { 2155 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2156 goto nolock_notfound; 2157 } 2158 2159 found: 2160 ASSERT(!(dv->sdev_flags & SDEV_STALE)); 2161 ASSERT(dv->sdev_state == SDEV_READY); 2162 if (vtor) { 2163 /* 2164 * Check validity of returned node 2165 */ 2166 switch (vtor(dv)) { 2167 case SDEV_VTOR_VALID: 2168 break; 2169 case SDEV_VTOR_STALE: 2170 /* 2171 * The name exists, but the cache entry is 2172 * stale and needs to be re-created. 2173 */ 2174 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2175 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2176 rw_exit(&ddv->sdev_contents); 2177 rw_enter(&ddv->sdev_contents, RW_WRITER); 2178 } 2179 error = sdev_cache_update(ddv, &dv, nm, 2180 SDEV_CACHE_DELETE); 2181 rw_downgrade(&ddv->sdev_contents); 2182 if (error == 0) { 2183 dv = NULL; 2184 goto lookup_create_node; 2185 } 2186 /* FALLTHRU */ 2187 case SDEV_VTOR_INVALID: 2188 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2189 sdcmn_err7(("lookup: destroy invalid " 2190 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2191 goto nolock_notfound; 2192 case SDEV_VTOR_SKIP: 2193 sdcmn_err7(("lookup: node not applicable - " 2194 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2195 rw_exit(&ddv->sdev_contents); 2196 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2197 SDEV_RELE(dv); 2198 goto lookup_failed; 2199 default: 2200 cmn_err(CE_PANIC, 2201 "dev fs: validator failed: %s(%p)\n", 2202 dv->sdev_name, (void *)dv); 2203 break; 2204 } 2205 } 2206 2207 rw_exit(&ddv->sdev_contents); 2208 rv = sdev_to_vp(dv, vpp); 2209 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2210 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2211 dv->sdev_state, nm, rv)); 2212 return (rv); 2213 2214 nolock_notfound: 2215 /* 2216 * Destroy the node that is created for synchronization purposes. 2217 */ 2218 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2219 nm, dv->sdev_state)); 2220 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2221 if (dv->sdev_state == SDEV_INIT) { 2222 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2223 rw_exit(&ddv->sdev_contents); 2224 rw_enter(&ddv->sdev_contents, RW_WRITER); 2225 } 2226 2227 /* 2228 * Node state may have changed during the lock 2229 * changes. Re-check. 2230 */ 2231 if (dv->sdev_state == SDEV_INIT) { 2232 (void) sdev_dirdelete(ddv, dv); 2233 rw_exit(&ddv->sdev_contents); 2234 sdev_lookup_failed(ddv, nm, failed_flags); 2235 *vpp = NULL; 2236 return (ENOENT); 2237 } 2238 } 2239 2240 rw_exit(&ddv->sdev_contents); 2241 SDEV_RELE(dv); 2242 2243 lookup_failed: 2244 sdev_lookup_failed(ddv, nm, failed_flags); 2245 *vpp = NULL; 2246 return (ENOENT); 2247 } 2248 2249 /* 2250 * Given a directory node, mark all nodes beneath as 2251 * STALE, i.e. nodes that don't exist as far as new 2252 * consumers are concerned. Remove them from the 2253 * list of directory entries so that no lookup or 2254 * directory traversal will find them. The node 2255 * not deallocated so existing holds are not affected. 2256 */ 2257 void 2258 sdev_stale(struct sdev_node *ddv) 2259 { 2260 struct sdev_node *dv; 2261 struct vnode *vp; 2262 2263 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2264 2265 rw_enter(&ddv->sdev_contents, RW_WRITER); 2266 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) { 2267 vp = SDEVTOV(dv); 2268 if (vp->v_type == VDIR) 2269 sdev_stale(dv); 2270 2271 sdcmn_err9(("sdev_stale: setting stale %s\n", 2272 dv->sdev_path)); 2273 dv->sdev_flags |= SDEV_STALE; 2274 avl_remove(&ddv->sdev_entries, dv); 2275 } 2276 ddv->sdev_flags |= SDEV_BUILD; 2277 rw_exit(&ddv->sdev_contents); 2278 } 2279 2280 /* 2281 * Given a directory node, clean out all the nodes beneath. 2282 * If expr is specified, clean node with names matching expr. 2283 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2284 * so they are excluded from future lookups. 2285 */ 2286 int 2287 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2288 { 2289 int error = 0; 2290 int busy = 0; 2291 struct vnode *vp; 2292 struct sdev_node *dv, *next = NULL; 2293 int bkstore = 0; 2294 int len = 0; 2295 char *bks_name = NULL; 2296 2297 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2298 2299 /* 2300 * We try our best to destroy all unused sdev_node's 2301 */ 2302 rw_enter(&ddv->sdev_contents, RW_WRITER); 2303 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { 2304 next = SDEV_NEXT_ENTRY(ddv, dv); 2305 vp = SDEVTOV(dv); 2306 2307 if (expr && gmatch(dv->sdev_name, expr) == 0) 2308 continue; 2309 2310 if (vp->v_type == VDIR && 2311 sdev_cleandir(dv, NULL, flags) != 0) { 2312 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2313 dv->sdev_name)); 2314 busy++; 2315 continue; 2316 } 2317 2318 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2319 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2320 dv->sdev_name)); 2321 busy++; 2322 continue; 2323 } 2324 2325 /* 2326 * at this point, either dv is not held or SDEV_ENFORCE 2327 * is specified. In either case, dv needs to be deleted 2328 */ 2329 SDEV_HOLD(dv); 2330 2331 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2332 if (bkstore && (vp->v_type == VDIR)) 2333 bkstore += 1; 2334 2335 if (bkstore) { 2336 len = strlen(dv->sdev_name) + 1; 2337 bks_name = kmem_alloc(len, KM_SLEEP); 2338 bcopy(dv->sdev_name, bks_name, len); 2339 } 2340 2341 error = sdev_dirdelete(ddv, dv); 2342 2343 if (error == EBUSY) { 2344 sdcmn_err9(("sdev_cleandir: dir busy\n")); 2345 busy++; 2346 } 2347 2348 /* take care the backing store clean up */ 2349 if (bkstore && (error == 0)) { 2350 ASSERT(bks_name); 2351 ASSERT(ddv->sdev_attrvp); 2352 2353 if (bkstore == 1) { 2354 error = VOP_REMOVE(ddv->sdev_attrvp, 2355 bks_name, kcred, NULL, 0); 2356 } else if (bkstore == 2) { 2357 error = VOP_RMDIR(ddv->sdev_attrvp, 2358 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2359 } 2360 2361 /* do not propagate the backing store errors */ 2362 if (error) { 2363 sdcmn_err9(("sdev_cleandir: backing store" 2364 "not cleaned\n")); 2365 error = 0; 2366 } 2367 2368 bkstore = 0; 2369 kmem_free(bks_name, len); 2370 bks_name = NULL; 2371 len = 0; 2372 } 2373 } 2374 2375 ddv->sdev_flags |= SDEV_BUILD; 2376 rw_exit(&ddv->sdev_contents); 2377 2378 if (busy) { 2379 error = EBUSY; 2380 } 2381 2382 return (error); 2383 } 2384 2385 /* 2386 * a convenient wrapper for readdir() funcs 2387 */ 2388 size_t 2389 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2390 { 2391 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2392 if (reclen > size) 2393 return (0); 2394 2395 de->d_ino = (ino64_t)ino; 2396 de->d_off = (off64_t)off + 1; 2397 de->d_reclen = (ushort_t)reclen; 2398 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2399 return (reclen); 2400 } 2401 2402 /* 2403 * sdev_mount service routines 2404 */ 2405 int 2406 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2407 { 2408 int error; 2409 2410 if (uap->datalen != sizeof (*args)) 2411 return (EINVAL); 2412 2413 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2414 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2415 "get user data. error %d\n", error); 2416 return (EFAULT); 2417 } 2418 2419 return (0); 2420 } 2421 2422 #ifdef nextdp 2423 #undef nextdp 2424 #endif 2425 #define nextdp(dp) ((struct dirent64 *) \ 2426 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2427 2428 /* 2429 * readdir helper func 2430 */ 2431 int 2432 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2433 int flags) 2434 { 2435 struct sdev_node *ddv = VTOSDEV(vp); 2436 struct sdev_node *dv; 2437 dirent64_t *dp; 2438 ulong_t outcount = 0; 2439 size_t namelen; 2440 ulong_t alloc_count; 2441 void *outbuf; 2442 struct iovec *iovp; 2443 int error = 0; 2444 size_t reclen; 2445 offset_t diroff; 2446 offset_t soff; 2447 int this_reclen; 2448 int (*vtor)(struct sdev_node *) = NULL; 2449 struct vattr attr; 2450 timestruc_t now; 2451 2452 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2453 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2454 2455 if (uiop->uio_loffset >= MAXOFF_T) { 2456 if (eofp) 2457 *eofp = 1; 2458 return (0); 2459 } 2460 2461 if (uiop->uio_iovcnt != 1) 2462 return (EINVAL); 2463 2464 if (vp->v_type != VDIR) 2465 return (ENOTDIR); 2466 2467 if (ddv->sdev_flags & SDEV_VTOR) { 2468 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2469 ASSERT(vtor); 2470 } 2471 2472 if (eofp != NULL) 2473 *eofp = 0; 2474 2475 soff = uiop->uio_loffset; 2476 iovp = uiop->uio_iov; 2477 alloc_count = iovp->iov_len; 2478 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2479 outcount = 0; 2480 2481 if (ddv->sdev_state == SDEV_ZOMBIE) 2482 goto get_cache; 2483 2484 if (SDEV_IS_GLOBAL(ddv)) { 2485 2486 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2487 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2488 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2489 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2490 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2491 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2492 !sdev_reconfig_disable) { 2493 /* 2494 * invoking "devfsadm" to do system device reconfig 2495 */ 2496 mutex_enter(&ddv->sdev_lookup_lock); 2497 SDEV_BLOCK_OTHERS(ddv, 2498 (SDEV_READDIR|SDEV_LGWAITING)); 2499 mutex_exit(&ddv->sdev_lookup_lock); 2500 2501 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2502 ddv->sdev_path, curproc->p_user.u_comm)); 2503 if (sdev_reconfig_verbose) { 2504 cmn_err(CE_CONT, 2505 "?readdir of %s by %s: reconfig\n", 2506 ddv->sdev_path, curproc->p_user.u_comm); 2507 } 2508 2509 sdev_devfsadmd_thread(ddv, NULL, kcred); 2510 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2511 /* 2512 * compensate the "ls" started later than "devfsadm" 2513 */ 2514 mutex_enter(&ddv->sdev_lookup_lock); 2515 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2516 mutex_exit(&ddv->sdev_lookup_lock); 2517 } 2518 2519 /* 2520 * release the contents lock so that 2521 * the cache may be updated by devfsadmd 2522 */ 2523 rw_exit(&ddv->sdev_contents); 2524 mutex_enter(&ddv->sdev_lookup_lock); 2525 if (SDEV_IS_READDIR(ddv)) 2526 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2527 mutex_exit(&ddv->sdev_lookup_lock); 2528 rw_enter(&ddv->sdev_contents, RW_READER); 2529 2530 sdcmn_err4(("readdir of directory %s by %s\n", 2531 ddv->sdev_name, curproc->p_user.u_comm)); 2532 if (ddv->sdev_flags & SDEV_BUILD) { 2533 if (SDEV_IS_PERSIST(ddv)) { 2534 error = sdev_filldir_from_store(ddv, 2535 alloc_count, cred); 2536 } 2537 ddv->sdev_flags &= ~SDEV_BUILD; 2538 } 2539 } 2540 2541 get_cache: 2542 /* handle "." and ".." */ 2543 diroff = 0; 2544 if (soff == 0) { 2545 /* first time */ 2546 this_reclen = DIRENT64_RECLEN(1); 2547 if (alloc_count < this_reclen) { 2548 error = EINVAL; 2549 goto done; 2550 } 2551 2552 dp->d_ino = (ino64_t)ddv->sdev_ino; 2553 dp->d_off = (off64_t)1; 2554 dp->d_reclen = (ushort_t)this_reclen; 2555 2556 (void) strncpy(dp->d_name, ".", 2557 DIRENT64_NAMELEN(this_reclen)); 2558 outcount += dp->d_reclen; 2559 dp = nextdp(dp); 2560 } 2561 2562 diroff++; 2563 if (soff <= 1) { 2564 this_reclen = DIRENT64_RECLEN(2); 2565 if (alloc_count < outcount + this_reclen) { 2566 error = EINVAL; 2567 goto done; 2568 } 2569 2570 dp->d_reclen = (ushort_t)this_reclen; 2571 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2572 dp->d_off = (off64_t)2; 2573 2574 (void) strncpy(dp->d_name, "..", 2575 DIRENT64_NAMELEN(this_reclen)); 2576 outcount += dp->d_reclen; 2577 2578 dp = nextdp(dp); 2579 } 2580 2581 2582 /* gets the cache */ 2583 diroff++; 2584 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2585 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2586 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2587 diroff, soff, dv->sdev_name)); 2588 2589 /* bypassing pre-matured nodes */ 2590 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2591 sdcmn_err3(("sdev_readdir: pre-mature node " 2592 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2593 continue; 2594 } 2595 2596 /* 2597 * Check validity of node 2598 * Drop invalid and nodes to be skipped. 2599 * A node the validator indicates as stale needs 2600 * to be returned as presumably the node name itself 2601 * is valid and the node data itself will be refreshed 2602 * on lookup. An application performing a readdir then 2603 * stat on each entry should thus always see consistent 2604 * data. In any case, it is not possible to synchronize 2605 * with dynamic kernel state, and any view we return can 2606 * never be anything more than a snapshot at a point in time. 2607 */ 2608 if (vtor) { 2609 switch (vtor(dv)) { 2610 case SDEV_VTOR_VALID: 2611 break; 2612 case SDEV_VTOR_INVALID: 2613 case SDEV_VTOR_SKIP: 2614 continue; 2615 case SDEV_VTOR_STALE: 2616 sdcmn_err3(("sdev_readir: %s stale\n", 2617 dv->sdev_name)); 2618 break; 2619 default: 2620 cmn_err(CE_PANIC, 2621 "dev fs: validator failed: %s(%p)\n", 2622 dv->sdev_name, (void *)dv); 2623 break; 2624 /*NOTREACHED*/ 2625 } 2626 } 2627 2628 namelen = strlen(dv->sdev_name); 2629 reclen = DIRENT64_RECLEN(namelen); 2630 if (outcount + reclen > alloc_count) { 2631 goto full; 2632 } 2633 dp->d_reclen = (ushort_t)reclen; 2634 dp->d_ino = (ino64_t)dv->sdev_ino; 2635 dp->d_off = (off64_t)diroff + 1; 2636 (void) strncpy(dp->d_name, dv->sdev_name, 2637 DIRENT64_NAMELEN(reclen)); 2638 outcount += reclen; 2639 dp = nextdp(dp); 2640 } 2641 2642 full: 2643 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2644 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2645 (void *)dv)); 2646 2647 if (outcount) 2648 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2649 2650 if (!error) { 2651 uiop->uio_loffset = diroff; 2652 if (eofp) 2653 *eofp = dv ? 0 : 1; 2654 } 2655 2656 2657 if (ddv->sdev_attrvp) { 2658 gethrestime(&now); 2659 attr.va_ctime = now; 2660 attr.va_atime = now; 2661 attr.va_mask = AT_CTIME|AT_ATIME; 2662 2663 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2664 } 2665 done: 2666 kmem_free(outbuf, alloc_count); 2667 return (error); 2668 } 2669 2670 static int 2671 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2672 { 2673 vnode_t *vp; 2674 vnode_t *cvp; 2675 struct sdev_node *svp; 2676 char *nm; 2677 struct pathname pn; 2678 int error; 2679 int persisted = 0; 2680 2681 ASSERT(INGLOBALZONE(curproc)); 2682 2683 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2684 return (error); 2685 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2686 2687 vp = rootdir; 2688 VN_HOLD(vp); 2689 2690 while (pn_pathleft(&pn)) { 2691 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2692 (void) pn_getcomponent(&pn, nm); 2693 2694 /* 2695 * Deal with the .. special case where we may be 2696 * traversing up across a mount point, to the 2697 * root of this filesystem or global root. 2698 */ 2699 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2700 checkforroot: 2701 if (VN_CMP(vp, rootdir)) { 2702 nm[1] = 0; 2703 } else if (vp->v_flag & VROOT) { 2704 vfs_t *vfsp; 2705 cvp = vp; 2706 vfsp = cvp->v_vfsp; 2707 vfs_rlock_wait(vfsp); 2708 vp = cvp->v_vfsp->vfs_vnodecovered; 2709 if (vp == NULL || 2710 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2711 vfs_unlock(vfsp); 2712 VN_RELE(cvp); 2713 error = EIO; 2714 break; 2715 } 2716 VN_HOLD(vp); 2717 vfs_unlock(vfsp); 2718 VN_RELE(cvp); 2719 cvp = NULL; 2720 goto checkforroot; 2721 } 2722 } 2723 2724 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2725 NULL, NULL); 2726 if (error) { 2727 VN_RELE(vp); 2728 break; 2729 } 2730 2731 /* traverse mount points encountered on our journey */ 2732 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2733 VN_RELE(vp); 2734 VN_RELE(cvp); 2735 break; 2736 } 2737 2738 /* 2739 * symbolic link, can be either relative and absolute 2740 */ 2741 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2742 struct pathname linkpath; 2743 pn_alloc(&linkpath); 2744 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2745 pn_free(&linkpath); 2746 break; 2747 } 2748 if (pn_pathleft(&linkpath) == 0) 2749 (void) pn_set(&linkpath, "."); 2750 error = pn_insert(&pn, &linkpath, strlen(nm)); 2751 pn_free(&linkpath); 2752 if (pn.pn_pathlen == 0) { 2753 VN_RELE(vp); 2754 return (ENOENT); 2755 } 2756 if (pn.pn_path[0] == '/') { 2757 pn_skipslash(&pn); 2758 VN_RELE(vp); 2759 VN_RELE(cvp); 2760 vp = rootdir; 2761 VN_HOLD(vp); 2762 } else { 2763 VN_RELE(cvp); 2764 } 2765 continue; 2766 } 2767 2768 VN_RELE(vp); 2769 2770 /* 2771 * Direct the operation to the persisting filesystem 2772 * underlying /dev. Bail if we encounter a 2773 * non-persistent dev entity here. 2774 */ 2775 if (cvp->v_vfsp->vfs_fstype == devtype) { 2776 2777 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2778 error = ENOENT; 2779 VN_RELE(cvp); 2780 break; 2781 } 2782 2783 if (VTOSDEV(cvp) == NULL) { 2784 error = ENOENT; 2785 VN_RELE(cvp); 2786 break; 2787 } 2788 svp = VTOSDEV(cvp); 2789 if ((vp = svp->sdev_attrvp) == NULL) { 2790 error = ENOENT; 2791 VN_RELE(cvp); 2792 break; 2793 } 2794 persisted = 1; 2795 VN_HOLD(vp); 2796 VN_RELE(cvp); 2797 cvp = vp; 2798 } 2799 2800 vp = cvp; 2801 pn_skipslash(&pn); 2802 } 2803 2804 kmem_free(nm, MAXNAMELEN); 2805 pn_free(&pn); 2806 2807 if (error) 2808 return (error); 2809 2810 /* 2811 * Only return persisted nodes in the filesystem underlying /dev. 2812 */ 2813 if (!persisted) { 2814 VN_RELE(vp); 2815 return (ENOENT); 2816 } 2817 2818 *r_vp = vp; 2819 return (0); 2820 } 2821 2822 int 2823 sdev_modctl_readdir(const char *dir, char ***dirlistp, 2824 int *npathsp, int *npathsp_alloc, int checking_empty) 2825 { 2826 char **pathlist = NULL; 2827 char **newlist = NULL; 2828 int npaths = 0; 2829 int npaths_alloc = 0; 2830 dirent64_t *dbuf = NULL; 2831 int n; 2832 char *s; 2833 int error; 2834 vnode_t *vp; 2835 int eof; 2836 struct iovec iov; 2837 struct uio uio; 2838 struct dirent64 *dp; 2839 size_t dlen; 2840 size_t dbuflen; 2841 int ndirents = 64; 2842 char *nm; 2843 2844 error = sdev_modctl_lookup(dir, &vp); 2845 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2846 dir, curproc->p_user.u_comm, 2847 (error == 0) ? "ok" : "failed")); 2848 if (error) 2849 return (error); 2850 2851 dlen = ndirents * (sizeof (*dbuf)); 2852 dbuf = kmem_alloc(dlen, KM_SLEEP); 2853 2854 uio.uio_iov = &iov; 2855 uio.uio_iovcnt = 1; 2856 uio.uio_segflg = UIO_SYSSPACE; 2857 uio.uio_fmode = 0; 2858 uio.uio_extflg = UIO_COPY_CACHED; 2859 uio.uio_loffset = 0; 2860 uio.uio_llimit = MAXOFFSET_T; 2861 2862 eof = 0; 2863 error = 0; 2864 while (!error && !eof) { 2865 uio.uio_resid = dlen; 2866 iov.iov_base = (char *)dbuf; 2867 iov.iov_len = dlen; 2868 2869 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2870 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2871 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2872 2873 dbuflen = dlen - uio.uio_resid; 2874 2875 if (error || dbuflen == 0) 2876 break; 2877 2878 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2879 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2880 2881 nm = dp->d_name; 2882 2883 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2884 continue; 2885 if (npaths == npaths_alloc) { 2886 npaths_alloc += 64; 2887 newlist = (char **) 2888 kmem_zalloc((npaths_alloc + 1) * 2889 sizeof (char *), KM_SLEEP); 2890 if (pathlist) { 2891 bcopy(pathlist, newlist, 2892 npaths * sizeof (char *)); 2893 kmem_free(pathlist, 2894 (npaths + 1) * sizeof (char *)); 2895 } 2896 pathlist = newlist; 2897 } 2898 n = strlen(nm) + 1; 2899 s = kmem_alloc(n, KM_SLEEP); 2900 bcopy(nm, s, n); 2901 pathlist[npaths++] = s; 2902 sdcmn_err11((" %s/%s\n", dir, s)); 2903 2904 /* if checking empty, one entry is as good as many */ 2905 if (checking_empty) { 2906 eof = 1; 2907 break; 2908 } 2909 } 2910 } 2911 2912 exit: 2913 VN_RELE(vp); 2914 2915 if (dbuf) 2916 kmem_free(dbuf, dlen); 2917 2918 if (error) 2919 return (error); 2920 2921 *dirlistp = pathlist; 2922 *npathsp = npaths; 2923 *npathsp_alloc = npaths_alloc; 2924 2925 return (0); 2926 } 2927 2928 void 2929 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2930 { 2931 int i, n; 2932 2933 for (i = 0; i < npaths; i++) { 2934 n = strlen(pathlist[i]) + 1; 2935 kmem_free(pathlist[i], n); 2936 } 2937 2938 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2939 } 2940 2941 int 2942 sdev_modctl_devexists(const char *path) 2943 { 2944 vnode_t *vp; 2945 int error; 2946 2947 error = sdev_modctl_lookup(path, &vp); 2948 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2949 path, curproc->p_user.u_comm, 2950 (error == 0) ? "ok" : "failed")); 2951 if (error == 0) 2952 VN_RELE(vp); 2953 2954 return (error); 2955 } 2956 2957 extern int sdev_vnodeops_tbl_size; 2958 2959 /* 2960 * construct a new template with overrides from vtab 2961 */ 2962 static fs_operation_def_t * 2963 sdev_merge_vtab(const fs_operation_def_t tab[]) 2964 { 2965 fs_operation_def_t *new; 2966 const fs_operation_def_t *tab_entry; 2967 2968 /* make a copy of standard vnode ops table */ 2969 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 2970 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 2971 2972 /* replace the overrides from tab */ 2973 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 2974 fs_operation_def_t *std_entry = new; 2975 while (std_entry->name) { 2976 if (strcmp(tab_entry->name, std_entry->name) == 0) { 2977 std_entry->func = tab_entry->func; 2978 break; 2979 } 2980 std_entry++; 2981 } 2982 if (std_entry->name == NULL) 2983 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 2984 tab_entry->name); 2985 } 2986 2987 return (new); 2988 } 2989 2990 /* free memory allocated by sdev_merge_vtab */ 2991 static void 2992 sdev_free_vtab(fs_operation_def_t *new) 2993 { 2994 kmem_free(new, sdev_vnodeops_tbl_size); 2995 } 2996 2997 /* 2998 * a generic setattr() function 2999 * 3000 * note: flags only supports AT_UID and AT_GID. 3001 * Future enhancements can be done for other types, e.g. AT_MODE 3002 */ 3003 int 3004 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 3005 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 3006 int), int protocol) 3007 { 3008 struct sdev_node *dv = VTOSDEV(vp); 3009 struct sdev_node *parent = dv->sdev_dotdot; 3010 struct vattr *get; 3011 uint_t mask = vap->va_mask; 3012 int error; 3013 3014 /* some sanity checks */ 3015 if (vap->va_mask & AT_NOSET) 3016 return (EINVAL); 3017 3018 if (vap->va_mask & AT_SIZE) { 3019 if (vp->v_type == VDIR) { 3020 return (EISDIR); 3021 } 3022 } 3023 3024 /* no need to set attribute, but do not fail either */ 3025 ASSERT(parent); 3026 rw_enter(&parent->sdev_contents, RW_READER); 3027 if (dv->sdev_state == SDEV_ZOMBIE) { 3028 rw_exit(&parent->sdev_contents); 3029 return (0); 3030 } 3031 3032 /* If backing store exists, just set it. */ 3033 if (dv->sdev_attrvp) { 3034 rw_exit(&parent->sdev_contents); 3035 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3036 } 3037 3038 /* 3039 * Otherwise, for nodes with the persistence attribute, create it. 3040 */ 3041 ASSERT(dv->sdev_attr); 3042 if (SDEV_IS_PERSIST(dv) || 3043 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3044 sdev_vattr_merge(dv, vap); 3045 rw_enter(&dv->sdev_contents, RW_WRITER); 3046 error = sdev_shadow_node(dv, cred); 3047 rw_exit(&dv->sdev_contents); 3048 rw_exit(&parent->sdev_contents); 3049 3050 if (error) 3051 return (error); 3052 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3053 } 3054 3055 3056 /* 3057 * sdev_attr was allocated in sdev_mknode 3058 */ 3059 rw_enter(&dv->sdev_contents, RW_WRITER); 3060 error = secpolicy_vnode_setattr(cred, vp, vap, 3061 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3062 if (error) { 3063 rw_exit(&dv->sdev_contents); 3064 rw_exit(&parent->sdev_contents); 3065 return (error); 3066 } 3067 3068 get = dv->sdev_attr; 3069 if (mask & AT_MODE) { 3070 get->va_mode &= S_IFMT; 3071 get->va_mode |= vap->va_mode & ~S_IFMT; 3072 } 3073 3074 if ((mask & AT_UID) || (mask & AT_GID)) { 3075 if (mask & AT_UID) 3076 get->va_uid = vap->va_uid; 3077 if (mask & AT_GID) 3078 get->va_gid = vap->va_gid; 3079 /* 3080 * a callback must be provided if the protocol is set 3081 */ 3082 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3083 ASSERT(callback); 3084 error = callback(dv, get, protocol); 3085 if (error) { 3086 rw_exit(&dv->sdev_contents); 3087 rw_exit(&parent->sdev_contents); 3088 return (error); 3089 } 3090 } 3091 } 3092 3093 if (mask & AT_ATIME) 3094 get->va_atime = vap->va_atime; 3095 if (mask & AT_MTIME) 3096 get->va_mtime = vap->va_mtime; 3097 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3098 gethrestime(&get->va_ctime); 3099 } 3100 3101 sdev_vattr_merge(dv, get); 3102 rw_exit(&dv->sdev_contents); 3103 rw_exit(&parent->sdev_contents); 3104 return (0); 3105 } 3106 3107 /* 3108 * a generic inactive() function 3109 */ 3110 /*ARGSUSED*/ 3111 void 3112 devname_inactive_func(struct vnode *vp, struct cred *cred, 3113 void (*callback)(struct vnode *)) 3114 { 3115 int clean; 3116 struct sdev_node *dv = VTOSDEV(vp); 3117 struct sdev_node *ddv = dv->sdev_dotdot; 3118 int state; 3119 3120 rw_enter(&ddv->sdev_contents, RW_WRITER); 3121 state = dv->sdev_state; 3122 3123 mutex_enter(&vp->v_lock); 3124 ASSERT(vp->v_count >= 1); 3125 3126 if (vp->v_count == 1 && callback != NULL) 3127 callback(vp); 3128 3129 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3130 3131 /* 3132 * last ref count on the ZOMBIE node is released. 3133 * clean up the sdev_node, and 3134 * release the hold on the backing store node so that 3135 * the ZOMBIE backing stores also cleaned out. 3136 */ 3137 if (clean) { 3138 ASSERT(ddv); 3139 3140 ddv->sdev_nlink--; 3141 if (vp->v_type == VDIR) { 3142 dv->sdev_nlink--; 3143 } 3144 if ((dv->sdev_flags & SDEV_STALE) == 0) 3145 avl_remove(&ddv->sdev_entries, dv); 3146 dv->sdev_nlink--; 3147 --vp->v_count; 3148 mutex_exit(&vp->v_lock); 3149 sdev_nodedestroy(dv, 0); 3150 } else { 3151 --vp->v_count; 3152 mutex_exit(&vp->v_lock); 3153 } 3154 rw_exit(&ddv->sdev_contents); 3155 } 3156