1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013, Joyent, Inc. All rights reserved. 24 */ 25 26 /* 27 * utility routines for the /dev fs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/dirent.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/mode.h> 52 #include <sys/policy.h> 53 #include <fs/fs_subr.h> 54 #include <sys/mount.h> 55 #include <sys/fs/snode.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/sdev_impl.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/conf.h> 61 #include <sys/proc.h> 62 #include <sys/user.h> 63 #include <sys/modctl.h> 64 65 #ifdef DEBUG 66 int sdev_debug = 0x00000001; 67 int sdev_debug_cache_flags = 0; 68 #endif 69 70 /* 71 * globals 72 */ 73 /* prototype memory vattrs */ 74 vattr_t sdev_vattr_dir = { 75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 76 VDIR, /* va_type */ 77 SDEV_DIRMODE_DEFAULT, /* va_mode */ 78 SDEV_UID_DEFAULT, /* va_uid */ 79 SDEV_GID_DEFAULT, /* va_gid */ 80 0, /* va_fsid */ 81 0, /* va_nodeid */ 82 0, /* va_nlink */ 83 0, /* va_size */ 84 0, /* va_atime */ 85 0, /* va_mtime */ 86 0, /* va_ctime */ 87 0, /* va_rdev */ 88 0, /* va_blksize */ 89 0, /* va_nblocks */ 90 0 /* va_vcode */ 91 }; 92 93 vattr_t sdev_vattr_lnk = { 94 AT_TYPE|AT_MODE, /* va_mask */ 95 VLNK, /* va_type */ 96 SDEV_LNKMODE_DEFAULT, /* va_mode */ 97 SDEV_UID_DEFAULT, /* va_uid */ 98 SDEV_GID_DEFAULT, /* va_gid */ 99 0, /* va_fsid */ 100 0, /* va_nodeid */ 101 0, /* va_nlink */ 102 0, /* va_size */ 103 0, /* va_atime */ 104 0, /* va_mtime */ 105 0, /* va_ctime */ 106 0, /* va_rdev */ 107 0, /* va_blksize */ 108 0, /* va_nblocks */ 109 0 /* va_vcode */ 110 }; 111 112 vattr_t sdev_vattr_blk = { 113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 114 VBLK, /* va_type */ 115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 116 SDEV_UID_DEFAULT, /* va_uid */ 117 SDEV_GID_DEFAULT, /* va_gid */ 118 0, /* va_fsid */ 119 0, /* va_nodeid */ 120 0, /* va_nlink */ 121 0, /* va_size */ 122 0, /* va_atime */ 123 0, /* va_mtime */ 124 0, /* va_ctime */ 125 0, /* va_rdev */ 126 0, /* va_blksize */ 127 0, /* va_nblocks */ 128 0 /* va_vcode */ 129 }; 130 131 vattr_t sdev_vattr_chr = { 132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 133 VCHR, /* va_type */ 134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 135 SDEV_UID_DEFAULT, /* va_uid */ 136 SDEV_GID_DEFAULT, /* va_gid */ 137 0, /* va_fsid */ 138 0, /* va_nodeid */ 139 0, /* va_nlink */ 140 0, /* va_size */ 141 0, /* va_atime */ 142 0, /* va_mtime */ 143 0, /* va_ctime */ 144 0, /* va_rdev */ 145 0, /* va_blksize */ 146 0, /* va_nblocks */ 147 0 /* va_vcode */ 148 }; 149 150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 151 int devtype; /* fstype */ 152 153 /* static */ 154 static struct vnodeops *sdev_get_vop(struct sdev_node *); 155 static void sdev_set_no_negcache(struct sdev_node *); 156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 157 static void sdev_free_vtab(fs_operation_def_t *); 158 159 static void 160 sdev_prof_free(struct sdev_node *dv) 161 { 162 ASSERT(!SDEV_IS_GLOBAL(dv)); 163 if (dv->sdev_prof.dev_name) 164 nvlist_free(dv->sdev_prof.dev_name); 165 if (dv->sdev_prof.dev_map) 166 nvlist_free(dv->sdev_prof.dev_map); 167 if (dv->sdev_prof.dev_symlink) 168 nvlist_free(dv->sdev_prof.dev_symlink); 169 if (dv->sdev_prof.dev_glob_incdir) 170 nvlist_free(dv->sdev_prof.dev_glob_incdir); 171 if (dv->sdev_prof.dev_glob_excdir) 172 nvlist_free(dv->sdev_prof.dev_glob_excdir); 173 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 174 } 175 176 /* sdev_node cache constructor */ 177 /*ARGSUSED1*/ 178 static int 179 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 180 { 181 struct sdev_node *dv = (struct sdev_node *)buf; 182 struct vnode *vp; 183 184 bzero(buf, sizeof (struct sdev_node)); 185 vp = dv->sdev_vnode = vn_alloc(flag); 186 if (vp == NULL) { 187 return (-1); 188 } 189 vp->v_data = dv; 190 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 191 return (0); 192 } 193 194 /* sdev_node cache destructor */ 195 /*ARGSUSED1*/ 196 static void 197 i_sdev_node_dtor(void *buf, void *arg) 198 { 199 struct sdev_node *dv = (struct sdev_node *)buf; 200 struct vnode *vp = SDEVTOV(dv); 201 202 rw_destroy(&dv->sdev_contents); 203 vn_free(vp); 204 } 205 206 /* initialize sdev_node cache */ 207 void 208 sdev_node_cache_init() 209 { 210 int flags = 0; 211 212 #ifdef DEBUG 213 flags = sdev_debug_cache_flags; 214 if (flags) 215 sdcmn_err(("cache debug flags 0x%x\n", flags)); 216 #endif /* DEBUG */ 217 218 ASSERT(sdev_node_cache == NULL); 219 sdev_node_cache = kmem_cache_create("sdev_node_cache", 220 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 221 NULL, NULL, NULL, flags); 222 } 223 224 /* destroy sdev_node cache */ 225 void 226 sdev_node_cache_fini() 227 { 228 ASSERT(sdev_node_cache != NULL); 229 kmem_cache_destroy(sdev_node_cache); 230 sdev_node_cache = NULL; 231 } 232 233 /* 234 * Compare two nodes lexographically to balance avl tree 235 */ 236 static int 237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 238 { 239 int rv; 240 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 241 return (0); 242 return ((rv < 0) ? -1 : 1); 243 } 244 245 void 246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 247 { 248 ASSERT(dv); 249 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 250 dv->sdev_state = state; 251 } 252 253 static void 254 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 255 { 256 timestruc_t now; 257 struct vattr *attrp; 258 uint_t mask; 259 260 ASSERT(dv->sdev_attr); 261 ASSERT(vap); 262 263 attrp = dv->sdev_attr; 264 mask = vap->va_mask; 265 if (mask & AT_TYPE) 266 attrp->va_type = vap->va_type; 267 if (mask & AT_MODE) 268 attrp->va_mode = vap->va_mode; 269 if (mask & AT_UID) 270 attrp->va_uid = vap->va_uid; 271 if (mask & AT_GID) 272 attrp->va_gid = vap->va_gid; 273 if (mask & AT_RDEV) 274 attrp->va_rdev = vap->va_rdev; 275 276 gethrestime(&now); 277 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 278 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 279 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 280 } 281 282 static void 283 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 284 { 285 ASSERT(dv->sdev_attr == NULL); 286 ASSERT(vap->va_mask & AT_TYPE); 287 ASSERT(vap->va_mask & AT_MODE); 288 289 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 290 sdev_attr_update(dv, vap); 291 } 292 293 /* alloc and initialize a sdev_node */ 294 int 295 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 296 vattr_t *vap) 297 { 298 struct sdev_node *dv = NULL; 299 struct vnode *vp; 300 size_t nmlen, len; 301 devname_handle_t *dhl; 302 303 nmlen = strlen(nm) + 1; 304 if (nmlen > MAXNAMELEN) { 305 sdcmn_err9(("sdev_nodeinit: node name %s" 306 " too long\n", nm)); 307 *newdv = NULL; 308 return (ENAMETOOLONG); 309 } 310 311 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 312 313 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 314 bcopy(nm, dv->sdev_name, nmlen); 315 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 316 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 317 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 318 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 319 /* overwritten for VLNK nodes */ 320 dv->sdev_symlink = NULL; 321 322 vp = SDEVTOV(dv); 323 vn_reinit(vp); 324 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 325 if (vap) 326 vp->v_type = vap->va_type; 327 328 /* 329 * initialized to the parent's vnodeops. 330 * maybe overwriten for a VDIR 331 */ 332 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 333 vn_exists(vp); 334 335 dv->sdev_dotdot = NULL; 336 dv->sdev_attrvp = NULL; 337 if (vap) { 338 sdev_attr_alloc(dv, vap); 339 } else { 340 dv->sdev_attr = NULL; 341 } 342 343 dv->sdev_ino = sdev_mkino(dv); 344 dv->sdev_nlink = 0; /* updated on insert */ 345 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 346 dv->sdev_flags |= SDEV_BUILD; 347 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 348 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 349 if (SDEV_IS_GLOBAL(ddv)) { 350 dv->sdev_flags |= SDEV_GLOBAL; 351 dhl = &(dv->sdev_handle); 352 dhl->dh_data = dv; 353 dhl->dh_args = NULL; 354 sdev_set_no_negcache(dv); 355 dv->sdev_gdir_gen = 0; 356 } else { 357 dv->sdev_flags &= ~SDEV_GLOBAL; 358 dv->sdev_origin = NULL; /* set later */ 359 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 360 dv->sdev_ldir_gen = 0; 361 dv->sdev_devtree_gen = 0; 362 } 363 364 rw_enter(&dv->sdev_contents, RW_WRITER); 365 sdev_set_nodestate(dv, SDEV_INIT); 366 rw_exit(&dv->sdev_contents); 367 *newdv = dv; 368 369 return (0); 370 } 371 372 /* 373 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the 374 * caller to transition the node to the SDEV_ZOMBIE state. 375 */ 376 int 377 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 378 void *args, struct cred *cred) 379 { 380 int error = 0; 381 struct vnode *vp = SDEVTOV(dv); 382 vtype_t type; 383 384 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 385 386 type = vap->va_type; 387 vp->v_type = type; 388 vp->v_rdev = vap->va_rdev; 389 rw_enter(&dv->sdev_contents, RW_WRITER); 390 if (type == VDIR) { 391 dv->sdev_nlink = 2; 392 dv->sdev_flags &= ~SDEV_PERSIST; 393 dv->sdev_flags &= ~SDEV_DYNAMIC; 394 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 395 ASSERT(dv->sdev_dotdot); 396 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 397 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 398 avl_create(&dv->sdev_entries, 399 (int (*)(const void *, const void *))sdev_compare_nodes, 400 sizeof (struct sdev_node), 401 offsetof(struct sdev_node, sdev_avllink)); 402 } else if (type == VLNK) { 403 ASSERT(args); 404 dv->sdev_nlink = 1; 405 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 406 } else { 407 dv->sdev_nlink = 1; 408 } 409 410 if (!(SDEV_IS_GLOBAL(dv))) { 411 dv->sdev_origin = (struct sdev_node *)args; 412 dv->sdev_flags &= ~SDEV_PERSIST; 413 } 414 415 /* 416 * shadow node is created here OR 417 * if failed (indicated by dv->sdev_attrvp == NULL), 418 * created later in sdev_setattr 419 */ 420 if (avp) { 421 dv->sdev_attrvp = avp; 422 } else { 423 if (dv->sdev_attr == NULL) { 424 sdev_attr_alloc(dv, vap); 425 } else { 426 sdev_attr_update(dv, vap); 427 } 428 429 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 430 error = sdev_shadow_node(dv, cred); 431 } 432 433 if (error == 0) { 434 /* transition to READY state */ 435 sdev_set_nodestate(dv, SDEV_READY); 436 sdev_nc_node_exists(dv); 437 } 438 rw_exit(&dv->sdev_contents); 439 return (error); 440 } 441 442 /* 443 * Build the VROOT sdev_node. 444 */ 445 /*ARGSUSED*/ 446 struct sdev_node * 447 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 448 struct vnode *avp, struct cred *cred) 449 { 450 struct sdev_node *dv; 451 struct vnode *vp; 452 char devdir[] = "/dev"; 453 454 ASSERT(sdev_node_cache != NULL); 455 ASSERT(avp); 456 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 457 vp = SDEVTOV(dv); 458 vn_reinit(vp); 459 vp->v_flag |= VROOT; 460 vp->v_vfsp = vfsp; 461 vp->v_type = VDIR; 462 vp->v_rdev = devdev; 463 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 464 vn_exists(vp); 465 466 if (vfsp->vfs_mntpt) 467 dv->sdev_name = i_ddi_strdup( 468 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 469 else 470 /* vfs_mountdev1 set mount point later */ 471 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 472 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 473 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 474 dv->sdev_ino = SDEV_ROOTINO; 475 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 476 dv->sdev_dotdot = dv; /* .. == self */ 477 dv->sdev_attrvp = avp; 478 dv->sdev_attr = NULL; 479 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 480 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 481 if (strcmp(dv->sdev_name, "/dev") == 0) { 482 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 483 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 484 dv->sdev_gdir_gen = 0; 485 } else { 486 dv->sdev_flags = SDEV_BUILD; 487 dv->sdev_flags &= ~SDEV_PERSIST; 488 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 489 dv->sdev_ldir_gen = 0; 490 dv->sdev_devtree_gen = 0; 491 } 492 493 avl_create(&dv->sdev_entries, 494 (int (*)(const void *, const void *))sdev_compare_nodes, 495 sizeof (struct sdev_node), 496 offsetof(struct sdev_node, sdev_avllink)); 497 498 rw_enter(&dv->sdev_contents, RW_WRITER); 499 sdev_set_nodestate(dv, SDEV_READY); 500 rw_exit(&dv->sdev_contents); 501 sdev_nc_node_exists(dv); 502 return (dv); 503 } 504 505 /* directory dependent vop table */ 506 struct sdev_vop_table { 507 char *vt_name; /* subdirectory name */ 508 const fs_operation_def_t *vt_service; /* vnodeops table */ 509 struct vnodeops *vt_vops; /* constructed vop */ 510 struct vnodeops **vt_global_vops; /* global container for vop */ 511 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 512 int vt_flags; 513 }; 514 515 /* 516 * A nice improvement would be to provide a plug-in mechanism 517 * for this table instead of a const table. 518 */ 519 static struct sdev_vop_table vtab[] = 520 { 521 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 522 SDEV_DYNAMIC | SDEV_VTOR }, 523 524 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 525 SDEV_DYNAMIC | SDEV_VTOR }, 526 527 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, 528 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 529 530 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 531 532 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 533 SDEV_DYNAMIC | SDEV_VTOR }, 534 535 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 536 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 537 538 /* 539 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the 540 * lofi driver controls child nodes. 541 * 542 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted 543 * stale nodes (e.g. from devfsadm -R). 544 * 545 * In addition, devfsadm knows not to attempt a rmdir: a zone 546 * may hold a reference, which would zombify the node, 547 * preventing a mkdir. 548 */ 549 550 { "lofi", NULL, NULL, NULL, NULL, 551 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 552 { "rlofi", NULL, NULL, NULL, NULL, 553 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 554 555 { NULL, NULL, NULL, NULL, NULL, 0} 556 }; 557 558 /* 559 * We need to match off of the sdev_path, not the sdev_name. We are only allowed 560 * to exist directly under /dev. 561 */ 562 struct sdev_vop_table * 563 sdev_match(struct sdev_node *dv) 564 { 565 int vlen; 566 int i; 567 const char *path; 568 569 if (strlen(dv->sdev_path) <= 5) 570 return (NULL); 571 572 if (strncmp(dv->sdev_path, "/dev/", 5) != 0) 573 return (NULL); 574 path = dv->sdev_path + 5; 575 576 for (i = 0; vtab[i].vt_name; i++) { 577 if (strcmp(vtab[i].vt_name, path) == 0) 578 return (&vtab[i]); 579 if (vtab[i].vt_flags & SDEV_SUBDIR) { 580 vlen = strlen(vtab[i].vt_name); 581 if ((strncmp(vtab[i].vt_name, path, 582 vlen - 1) == 0) && path[vlen] == '/') 583 return (&vtab[i]); 584 } 585 586 } 587 return (NULL); 588 } 589 590 /* 591 * sets a directory's vnodeops if the directory is in the vtab; 592 */ 593 static struct vnodeops * 594 sdev_get_vop(struct sdev_node *dv) 595 { 596 struct sdev_vop_table *vtp; 597 char *path; 598 599 path = dv->sdev_path; 600 ASSERT(path); 601 602 /* gets the relative path to /dev/ */ 603 path += 5; 604 605 /* gets the vtab entry it matches */ 606 if ((vtp = sdev_match(dv)) != NULL) { 607 dv->sdev_flags |= vtp->vt_flags; 608 if (SDEV_IS_PERSIST(dv->sdev_dotdot) && 609 (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv))) 610 dv->sdev_flags |= SDEV_PERSIST; 611 612 if (vtp->vt_vops) { 613 if (vtp->vt_global_vops) 614 *(vtp->vt_global_vops) = vtp->vt_vops; 615 616 return (vtp->vt_vops); 617 } 618 619 if (vtp->vt_service) { 620 fs_operation_def_t *templ; 621 templ = sdev_merge_vtab(vtp->vt_service); 622 if (vn_make_ops(vtp->vt_name, 623 (const fs_operation_def_t *)templ, 624 &vtp->vt_vops) != 0) { 625 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 626 vtp->vt_name); 627 /*NOTREACHED*/ 628 } 629 if (vtp->vt_global_vops) { 630 *(vtp->vt_global_vops) = vtp->vt_vops; 631 } 632 sdev_free_vtab(templ); 633 634 return (vtp->vt_vops); 635 } 636 637 return (sdev_vnodeops); 638 } 639 640 /* child inherits the persistence of the parent */ 641 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 642 dv->sdev_flags |= SDEV_PERSIST; 643 644 return (sdev_vnodeops); 645 } 646 647 static void 648 sdev_set_no_negcache(struct sdev_node *dv) 649 { 650 int i; 651 char *path; 652 653 ASSERT(dv->sdev_path); 654 path = dv->sdev_path + strlen("/dev/"); 655 656 for (i = 0; vtab[i].vt_name; i++) { 657 if (strcmp(vtab[i].vt_name, path) == 0) { 658 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 659 dv->sdev_flags |= SDEV_NO_NCACHE; 660 break; 661 } 662 } 663 } 664 665 void * 666 sdev_get_vtor(struct sdev_node *dv) 667 { 668 struct sdev_vop_table *vtp; 669 670 vtp = sdev_match(dv); 671 if (vtp) 672 return ((void *)vtp->vt_vtor); 673 else 674 return (NULL); 675 } 676 677 /* 678 * Build the base root inode 679 */ 680 ino_t 681 sdev_mkino(struct sdev_node *dv) 682 { 683 ino_t ino; 684 685 /* 686 * for now, follow the lead of tmpfs here 687 * need to someday understand the requirements here 688 */ 689 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 690 ino += SDEV_ROOTINO + 1; 691 692 return (ino); 693 } 694 695 int 696 sdev_getlink(struct vnode *linkvp, char **link) 697 { 698 int err; 699 char *buf; 700 struct uio uio = {0}; 701 struct iovec iov = {0}; 702 703 if (linkvp == NULL) 704 return (ENOENT); 705 ASSERT(linkvp->v_type == VLNK); 706 707 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 708 iov.iov_base = buf; 709 iov.iov_len = MAXPATHLEN; 710 uio.uio_iov = &iov; 711 uio.uio_iovcnt = 1; 712 uio.uio_resid = MAXPATHLEN; 713 uio.uio_segflg = UIO_SYSSPACE; 714 uio.uio_llimit = MAXOFFSET_T; 715 716 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 717 if (err) { 718 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 719 kmem_free(buf, MAXPATHLEN); 720 return (ENOENT); 721 } 722 723 /* mission complete */ 724 *link = i_ddi_strdup(buf, KM_SLEEP); 725 kmem_free(buf, MAXPATHLEN); 726 return (0); 727 } 728 729 /* 730 * A convenient wrapper to get the devfs node vnode for a device 731 * minor functionality: readlink() of a /dev symlink 732 * Place the link into dv->sdev_symlink 733 */ 734 static int 735 sdev_follow_link(struct sdev_node *dv) 736 { 737 int err; 738 struct vnode *linkvp; 739 char *link = NULL; 740 741 linkvp = SDEVTOV(dv); 742 if (linkvp == NULL) 743 return (ENOENT); 744 ASSERT(linkvp->v_type == VLNK); 745 err = sdev_getlink(linkvp, &link); 746 if (err) { 747 dv->sdev_symlink = NULL; 748 return (ENOENT); 749 } 750 751 ASSERT(link != NULL); 752 dv->sdev_symlink = link; 753 return (0); 754 } 755 756 static int 757 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 758 { 759 vtype_t otype = SDEVTOV(dv)->v_type; 760 761 /* 762 * existing sdev_node has a different type. 763 */ 764 if (otype != nvap->va_type) { 765 sdcmn_err9(("sdev_node_check: existing node " 766 " %s type %d does not match new node type %d\n", 767 dv->sdev_name, otype, nvap->va_type)); 768 return (EEXIST); 769 } 770 771 /* 772 * For a symlink, the target should be the same. 773 */ 774 if (otype == VLNK) { 775 ASSERT(nargs != NULL); 776 ASSERT(dv->sdev_symlink != NULL); 777 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 778 sdcmn_err9(("sdev_node_check: existing node " 779 " %s has different symlink %s as new node " 780 " %s\n", dv->sdev_name, dv->sdev_symlink, 781 (char *)nargs)); 782 return (EEXIST); 783 } 784 } 785 786 return (0); 787 } 788 789 /* 790 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 791 * 792 * arguments: 793 * - ddv (parent) 794 * - nm (child name) 795 * - newdv (sdev_node for nm is returned here) 796 * - vap (vattr for the node to be created, va_type should be set. 797 * - avp (attribute vnode) 798 * the defaults should be used if unknown) 799 * - cred 800 * - args 801 * . tnm (for VLNK) 802 * . global sdev_node (for !SDEV_GLOBAL) 803 * - state: SDEV_INIT, SDEV_READY 804 * 805 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 806 * 807 * NOTE: directory contents writers lock needs to be held before 808 * calling this routine. 809 */ 810 int 811 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 812 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 813 sdev_node_state_t state) 814 { 815 int error = 0; 816 sdev_node_state_t node_state; 817 struct sdev_node *dv = NULL; 818 819 ASSERT(state != SDEV_ZOMBIE); 820 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 821 822 if (*newdv) { 823 dv = *newdv; 824 } else { 825 /* allocate and initialize a sdev_node */ 826 if (ddv->sdev_state == SDEV_ZOMBIE) { 827 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 828 ddv->sdev_path)); 829 return (ENOENT); 830 } 831 832 error = sdev_nodeinit(ddv, nm, &dv, vap); 833 if (error != 0) { 834 sdcmn_err9(("sdev_mknode: error %d," 835 " name %s can not be initialized\n", 836 error, nm)); 837 return (error); 838 } 839 ASSERT(dv); 840 841 /* insert into the directory cache */ 842 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 843 } 844 845 ASSERT(dv); 846 node_state = dv->sdev_state; 847 ASSERT(node_state != SDEV_ZOMBIE); 848 849 if (state == SDEV_READY) { 850 switch (node_state) { 851 case SDEV_INIT: 852 error = sdev_nodeready(dv, vap, avp, args, cred); 853 if (error) { 854 sdcmn_err9(("sdev_mknode: node %s can NOT" 855 " be transitioned into READY state, " 856 "error %d\n", nm, error)); 857 } 858 break; 859 case SDEV_READY: 860 /* 861 * Do some sanity checking to make sure 862 * the existing sdev_node is what has been 863 * asked for. 864 */ 865 error = sdev_node_check(dv, vap, args); 866 break; 867 default: 868 break; 869 } 870 } 871 872 if (!error) { 873 *newdv = dv; 874 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 875 } else { 876 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 877 /* 878 * We created this node, it wasn't passed into us. Therefore it 879 * is up to us to delete it. 880 */ 881 if (*newdv == NULL) 882 SDEV_SIMPLE_RELE(dv); 883 *newdv = NULL; 884 } 885 886 return (error); 887 } 888 889 /* 890 * convenient wrapper to change vp's ATIME, CTIME and MTIME 891 */ 892 void 893 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 894 { 895 struct vattr attr; 896 timestruc_t now; 897 int err; 898 899 ASSERT(vp); 900 gethrestime(&now); 901 if (mask & AT_CTIME) 902 attr.va_ctime = now; 903 if (mask & AT_MTIME) 904 attr.va_mtime = now; 905 if (mask & AT_ATIME) 906 attr.va_atime = now; 907 908 attr.va_mask = (mask & AT_TIMES); 909 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 910 if (err && (err != EROFS)) { 911 sdcmn_err(("update timestamps error %d\n", err)); 912 } 913 } 914 915 /* 916 * the backing store vnode is released here 917 */ 918 /*ARGSUSED1*/ 919 void 920 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 921 { 922 /* no references */ 923 ASSERT(dv->sdev_nlink == 0); 924 925 if (dv->sdev_attrvp != NULLVP) { 926 VN_RELE(dv->sdev_attrvp); 927 /* 928 * reset the attrvp so that no more 929 * references can be made on this already 930 * vn_rele() vnode 931 */ 932 dv->sdev_attrvp = NULLVP; 933 } 934 935 if (dv->sdev_attr != NULL) { 936 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 937 dv->sdev_attr = NULL; 938 } 939 940 if (dv->sdev_name != NULL) { 941 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 942 dv->sdev_name = NULL; 943 } 944 945 if (dv->sdev_symlink != NULL) { 946 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 947 dv->sdev_symlink = NULL; 948 } 949 950 if (dv->sdev_path) { 951 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 952 dv->sdev_path = NULL; 953 } 954 955 if (!SDEV_IS_GLOBAL(dv)) 956 sdev_prof_free(dv); 957 958 if (SDEVTOV(dv)->v_type == VDIR) { 959 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 960 avl_destroy(&dv->sdev_entries); 961 } 962 963 mutex_destroy(&dv->sdev_lookup_lock); 964 cv_destroy(&dv->sdev_lookup_cv); 965 966 /* return node to initial state as per constructor */ 967 (void) memset((void *)&dv->sdev_instance_data, 0, 968 sizeof (dv->sdev_instance_data)); 969 vn_invalid(SDEVTOV(dv)); 970 kmem_cache_free(sdev_node_cache, dv); 971 } 972 973 /* 974 * DIRECTORY CACHE lookup 975 */ 976 struct sdev_node * 977 sdev_findbyname(struct sdev_node *ddv, char *nm) 978 { 979 struct sdev_node *dv; 980 struct sdev_node dvtmp; 981 avl_index_t where; 982 983 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 984 985 dvtmp.sdev_name = nm; 986 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 987 if (dv) { 988 ASSERT(dv->sdev_dotdot == ddv); 989 ASSERT(strcmp(dv->sdev_name, nm) == 0); 990 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 991 SDEV_HOLD(dv); 992 return (dv); 993 } 994 return (NULL); 995 } 996 997 /* 998 * Inserts a new sdev_node in a parent directory 999 */ 1000 void 1001 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 1002 { 1003 avl_index_t where; 1004 1005 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1006 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 1007 ASSERT(ddv->sdev_nlink >= 2); 1008 ASSERT(dv->sdev_nlink == 0); 1009 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1010 1011 dv->sdev_dotdot = ddv; 1012 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 1013 avl_insert(&ddv->sdev_entries, dv, where); 1014 ddv->sdev_nlink++; 1015 } 1016 1017 /* 1018 * The following check is needed because while sdev_nodes are linked 1019 * in SDEV_INIT state, they have their link counts incremented only 1020 * in SDEV_READY state. 1021 */ 1022 static void 1023 decr_link(struct sdev_node *dv) 1024 { 1025 VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); 1026 if (dv->sdev_state != SDEV_INIT) { 1027 VERIFY(dv->sdev_nlink >= 1); 1028 dv->sdev_nlink--; 1029 } else { 1030 VERIFY(dv->sdev_nlink == 0); 1031 } 1032 } 1033 1034 /* 1035 * Delete an existing dv from directory cache 1036 * 1037 * In the case of a node is still held by non-zero reference count, the node is 1038 * put into ZOMBIE state. The node is always unlinked from its parent, but it is 1039 * not destroyed via sdev_inactive until its reference count reaches "0". 1040 */ 1041 static void 1042 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 1043 { 1044 struct vnode *vp; 1045 sdev_node_state_t os; 1046 1047 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1048 1049 vp = SDEVTOV(dv); 1050 mutex_enter(&vp->v_lock); 1051 rw_enter(&dv->sdev_contents, RW_WRITER); 1052 os = dv->sdev_state; 1053 ASSERT(os != SDEV_ZOMBIE); 1054 dv->sdev_state = SDEV_ZOMBIE; 1055 1056 /* 1057 * unlink ourselves from the parent directory now to take care of the .. 1058 * link. However, if we're a directory, we don't remove our reference to 1059 * ourself eg. '.' until we are torn down in the inactive callback. 1060 */ 1061 decr_link(ddv); 1062 avl_remove(&ddv->sdev_entries, dv); 1063 /* 1064 * sdev_inactive expects nodes to have a link to themselves when we're 1065 * tearing them down. If we're transitioning from the initial state to 1066 * zombie and not via ready, then we're not going to have this link that 1067 * comes from the node being ready. As a result, we need to increment 1068 * our link count by one to account for this. 1069 */ 1070 if (os == SDEV_INIT && dv->sdev_nlink == 0) 1071 dv->sdev_nlink++; 1072 rw_exit(&dv->sdev_contents); 1073 mutex_exit(&vp->v_lock); 1074 } 1075 1076 /* 1077 * check if the source is in the path of the target 1078 * 1079 * source and target are different 1080 */ 1081 /*ARGSUSED2*/ 1082 static int 1083 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1084 { 1085 int error = 0; 1086 struct sdev_node *dotdot, *dir; 1087 1088 dotdot = tdv->sdev_dotdot; 1089 ASSERT(dotdot); 1090 1091 /* fs root */ 1092 if (dotdot == tdv) { 1093 return (0); 1094 } 1095 1096 for (;;) { 1097 /* 1098 * avoid error cases like 1099 * mv a a/b 1100 * mv a a/b/c 1101 * etc. 1102 */ 1103 if (dotdot == sdv) { 1104 error = EINVAL; 1105 break; 1106 } 1107 1108 dir = dotdot; 1109 dotdot = dir->sdev_dotdot; 1110 1111 /* done checking because root is reached */ 1112 if (dir == dotdot) { 1113 break; 1114 } 1115 } 1116 return (error); 1117 } 1118 1119 int 1120 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1121 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1122 struct cred *cred) 1123 { 1124 int error = 0; 1125 struct vnode *ovp = SDEVTOV(odv); 1126 struct vnode *nvp; 1127 struct vattr vattr; 1128 int doingdir = (ovp->v_type == VDIR); 1129 char *link = NULL; 1130 int samedir = (oddv == nddv) ? 1 : 0; 1131 int bkstore = 0; 1132 struct sdev_node *idv = NULL; 1133 struct sdev_node *ndv = NULL; 1134 timestruc_t now; 1135 1136 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1137 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1138 if (error) 1139 return (error); 1140 1141 if (!samedir) 1142 rw_enter(&oddv->sdev_contents, RW_WRITER); 1143 rw_enter(&nddv->sdev_contents, RW_WRITER); 1144 1145 /* 1146 * the source may have been deleted by another thread before 1147 * we gets here. 1148 */ 1149 if (odv->sdev_state != SDEV_READY) { 1150 error = ENOENT; 1151 goto err_out; 1152 } 1153 1154 if (doingdir && (odv == nddv)) { 1155 error = EINVAL; 1156 goto err_out; 1157 } 1158 1159 /* 1160 * If renaming a directory, and the parents are different (".." must be 1161 * changed) then the source dir must not be in the dir hierarchy above 1162 * the target since it would orphan everything below the source dir. 1163 */ 1164 if (doingdir && (oddv != nddv)) { 1165 error = sdev_checkpath(odv, nddv, cred); 1166 if (error) 1167 goto err_out; 1168 } 1169 1170 /* fix the source for a symlink */ 1171 if (vattr.va_type == VLNK) { 1172 if (odv->sdev_symlink == NULL) { 1173 error = sdev_follow_link(odv); 1174 if (error) { 1175 /* 1176 * The underlying symlink doesn't exist. This 1177 * node probably shouldn't even exist. While 1178 * it's a bit jarring to consumers, we're going 1179 * to remove the node from /dev. 1180 */ 1181 if (SDEV_IS_PERSIST((*ndvp))) 1182 bkstore = 1; 1183 sdev_dirdelete(oddv, odv); 1184 if (bkstore) { 1185 ASSERT(nddv->sdev_attrvp); 1186 error = VOP_REMOVE(nddv->sdev_attrvp, 1187 nnm, cred, NULL, 0); 1188 if (error) 1189 goto err_out; 1190 } 1191 error = ENOENT; 1192 goto err_out; 1193 } 1194 } 1195 ASSERT(odv->sdev_symlink); 1196 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1197 } 1198 1199 /* destination existing */ 1200 if (*ndvp) { 1201 nvp = SDEVTOV(*ndvp); 1202 ASSERT(nvp); 1203 1204 /* handling renaming to itself */ 1205 if (odv == *ndvp) { 1206 error = 0; 1207 goto err_out; 1208 } 1209 1210 if (nvp->v_type == VDIR) { 1211 if (!doingdir) { 1212 error = EISDIR; 1213 goto err_out; 1214 } 1215 1216 if (vn_vfswlock(nvp)) { 1217 error = EBUSY; 1218 goto err_out; 1219 } 1220 1221 if (vn_mountedvfs(nvp) != NULL) { 1222 vn_vfsunlock(nvp); 1223 error = EBUSY; 1224 goto err_out; 1225 } 1226 1227 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1228 if ((*ndvp)->sdev_nlink > 2) { 1229 vn_vfsunlock(nvp); 1230 error = EEXIST; 1231 goto err_out; 1232 } 1233 vn_vfsunlock(nvp); 1234 1235 /* 1236 * We did not place the hold on *ndvp, so even though 1237 * we're deleting the node, we should not get rid of our 1238 * reference. 1239 */ 1240 sdev_dirdelete(nddv, *ndvp); 1241 *ndvp = NULL; 1242 ASSERT(nddv->sdev_attrvp); 1243 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1244 nddv->sdev_attrvp, cred, NULL, 0); 1245 if (error) 1246 goto err_out; 1247 } else { 1248 if (doingdir) { 1249 error = ENOTDIR; 1250 goto err_out; 1251 } 1252 1253 if (SDEV_IS_PERSIST((*ndvp))) { 1254 bkstore = 1; 1255 } 1256 1257 /* 1258 * Get rid of the node from the directory cache note. 1259 * Don't forget that it's not up to us to remove the vn 1260 * ref on the sdev node, as we did not place it. 1261 */ 1262 sdev_dirdelete(nddv, *ndvp); 1263 *ndvp = NULL; 1264 if (bkstore) { 1265 ASSERT(nddv->sdev_attrvp); 1266 error = VOP_REMOVE(nddv->sdev_attrvp, 1267 nnm, cred, NULL, 0); 1268 if (error) 1269 goto err_out; 1270 } 1271 } 1272 } 1273 1274 /* 1275 * make a fresh node from the source attrs 1276 */ 1277 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1278 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1279 NULL, (void *)link, cred, SDEV_READY); 1280 1281 if (link != NULL) { 1282 kmem_free(link, strlen(link) + 1); 1283 link = NULL; 1284 } 1285 1286 if (error) 1287 goto err_out; 1288 ASSERT(*ndvp); 1289 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1290 1291 /* move dir contents */ 1292 if (doingdir) { 1293 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1294 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1295 SDEV_HOLD(idv); 1296 error = sdev_rnmnode(odv, idv, 1297 (struct sdev_node *)(*ndvp), &ndv, 1298 idv->sdev_name, cred); 1299 SDEV_RELE(idv); 1300 if (error) 1301 goto err_out; 1302 ndv = NULL; 1303 } 1304 } 1305 1306 if ((*ndvp)->sdev_attrvp) { 1307 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1308 AT_CTIME|AT_ATIME); 1309 } else { 1310 ASSERT((*ndvp)->sdev_attr); 1311 gethrestime(&now); 1312 (*ndvp)->sdev_attr->va_ctime = now; 1313 (*ndvp)->sdev_attr->va_atime = now; 1314 } 1315 1316 if (nddv->sdev_attrvp) { 1317 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1318 AT_MTIME|AT_ATIME); 1319 } else { 1320 ASSERT(nddv->sdev_attr); 1321 gethrestime(&now); 1322 nddv->sdev_attr->va_mtime = now; 1323 nddv->sdev_attr->va_atime = now; 1324 } 1325 rw_exit(&nddv->sdev_contents); 1326 if (!samedir) 1327 rw_exit(&oddv->sdev_contents); 1328 1329 SDEV_RELE(*ndvp); 1330 return (error); 1331 1332 err_out: 1333 if (link != NULL) { 1334 kmem_free(link, strlen(link) + 1); 1335 link = NULL; 1336 } 1337 1338 rw_exit(&nddv->sdev_contents); 1339 if (!samedir) 1340 rw_exit(&oddv->sdev_contents); 1341 return (error); 1342 } 1343 1344 /* 1345 * Merge sdev_node specific information into an attribute structure. 1346 * 1347 * note: sdev_node is not locked here 1348 */ 1349 void 1350 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1351 { 1352 struct vnode *vp = SDEVTOV(dv); 1353 1354 vap->va_nlink = dv->sdev_nlink; 1355 vap->va_nodeid = dv->sdev_ino; 1356 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1357 vap->va_type = vp->v_type; 1358 1359 if (vp->v_type == VDIR) { 1360 vap->va_rdev = 0; 1361 vap->va_fsid = vp->v_rdev; 1362 } else if (vp->v_type == VLNK) { 1363 vap->va_rdev = 0; 1364 vap->va_mode &= ~S_IFMT; 1365 vap->va_mode |= S_IFLNK; 1366 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1367 vap->va_rdev = vp->v_rdev; 1368 vap->va_mode &= ~S_IFMT; 1369 if (vap->va_type == VCHR) 1370 vap->va_mode |= S_IFCHR; 1371 else 1372 vap->va_mode |= S_IFBLK; 1373 } else { 1374 vap->va_rdev = 0; 1375 } 1376 } 1377 1378 struct vattr * 1379 sdev_getdefault_attr(enum vtype type) 1380 { 1381 if (type == VDIR) 1382 return (&sdev_vattr_dir); 1383 else if (type == VCHR) 1384 return (&sdev_vattr_chr); 1385 else if (type == VBLK) 1386 return (&sdev_vattr_blk); 1387 else if (type == VLNK) 1388 return (&sdev_vattr_lnk); 1389 else 1390 return (NULL); 1391 } 1392 int 1393 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1394 { 1395 int rv = 0; 1396 struct vnode *vp = SDEVTOV(dv); 1397 1398 switch (vp->v_type) { 1399 case VCHR: 1400 case VBLK: 1401 /* 1402 * If vnode is a device, return special vnode instead 1403 * (though it knows all about -us- via sp->s_realvp) 1404 */ 1405 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1406 VN_RELE(vp); 1407 if (*vpp == NULLVP) 1408 rv = ENOSYS; 1409 break; 1410 default: /* most types are returned as is */ 1411 *vpp = vp; 1412 break; 1413 } 1414 return (rv); 1415 } 1416 1417 /* 1418 * junction between devname and root file system, e.g. ufs 1419 */ 1420 int 1421 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1422 { 1423 struct vnode *rdvp = ddv->sdev_attrvp; 1424 int rval = 0; 1425 1426 ASSERT(rdvp); 1427 1428 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1429 NULL); 1430 return (rval); 1431 } 1432 1433 static int 1434 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1435 { 1436 struct sdev_node *dv = NULL; 1437 char *nm; 1438 struct vnode *dirvp; 1439 int error; 1440 vnode_t *vp; 1441 int eof; 1442 struct iovec iov; 1443 struct uio uio; 1444 struct dirent64 *dp; 1445 dirent64_t *dbuf; 1446 size_t dbuflen; 1447 struct vattr vattr; 1448 char *link = NULL; 1449 1450 if (ddv->sdev_attrvp == NULL) 1451 return (0); 1452 if (!(ddv->sdev_flags & SDEV_BUILD)) 1453 return (0); 1454 1455 dirvp = ddv->sdev_attrvp; 1456 VN_HOLD(dirvp); 1457 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1458 1459 uio.uio_iov = &iov; 1460 uio.uio_iovcnt = 1; 1461 uio.uio_segflg = UIO_SYSSPACE; 1462 uio.uio_fmode = 0; 1463 uio.uio_extflg = UIO_COPY_CACHED; 1464 uio.uio_loffset = 0; 1465 uio.uio_llimit = MAXOFFSET_T; 1466 1467 eof = 0; 1468 error = 0; 1469 while (!error && !eof) { 1470 uio.uio_resid = dlen; 1471 iov.iov_base = (char *)dbuf; 1472 iov.iov_len = dlen; 1473 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1474 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1475 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1476 1477 dbuflen = dlen - uio.uio_resid; 1478 if (error || dbuflen == 0) 1479 break; 1480 1481 if (!(ddv->sdev_flags & SDEV_BUILD)) 1482 break; 1483 1484 for (dp = dbuf; ((intptr_t)dp < 1485 (intptr_t)dbuf + dbuflen); 1486 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1487 nm = dp->d_name; 1488 1489 if (strcmp(nm, ".") == 0 || 1490 strcmp(nm, "..") == 0) 1491 continue; 1492 1493 vp = NULLVP; 1494 dv = sdev_cache_lookup(ddv, nm); 1495 if (dv) { 1496 VERIFY(dv->sdev_state != SDEV_ZOMBIE); 1497 SDEV_SIMPLE_RELE(dv); 1498 continue; 1499 } 1500 1501 /* refill the cache if not already */ 1502 error = devname_backstore_lookup(ddv, nm, &vp); 1503 if (error) 1504 continue; 1505 1506 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1507 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1508 if (error) 1509 continue; 1510 1511 if (vattr.va_type == VLNK) { 1512 error = sdev_getlink(vp, &link); 1513 if (error) { 1514 continue; 1515 } 1516 ASSERT(link != NULL); 1517 } 1518 1519 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1520 rw_exit(&ddv->sdev_contents); 1521 rw_enter(&ddv->sdev_contents, RW_WRITER); 1522 } 1523 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1524 cred, SDEV_READY); 1525 rw_downgrade(&ddv->sdev_contents); 1526 1527 if (link != NULL) { 1528 kmem_free(link, strlen(link) + 1); 1529 link = NULL; 1530 } 1531 1532 if (!error) { 1533 ASSERT(dv); 1534 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1535 SDEV_SIMPLE_RELE(dv); 1536 } 1537 vp = NULL; 1538 dv = NULL; 1539 } 1540 } 1541 1542 done: 1543 VN_RELE(dirvp); 1544 kmem_free(dbuf, dlen); 1545 1546 return (error); 1547 } 1548 1549 void 1550 sdev_filldir_dynamic(struct sdev_node *ddv) 1551 { 1552 int error; 1553 int i; 1554 struct vattr vattr; 1555 struct vattr *vap = &vattr; 1556 char *nm = NULL; 1557 struct sdev_node *dv = NULL; 1558 1559 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1560 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1561 1562 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1563 gethrestime(&vap->va_atime); 1564 vap->va_mtime = vap->va_atime; 1565 vap->va_ctime = vap->va_atime; 1566 for (i = 0; vtab[i].vt_name != NULL; i++) { 1567 /* 1568 * This early, we may be in a read-only /dev environment: leave 1569 * the creation of any nodes we'd attempt to persist to 1570 * devfsadm. Because /dev itself is normally persistent, any 1571 * node which is not marked dynamic will end up being marked 1572 * persistent. However, some nodes are both dynamic and 1573 * persistent, mostly lofi and rlofi, so we need to be careful 1574 * in our check. 1575 */ 1576 if ((vtab[i].vt_flags & SDEV_PERSIST) || 1577 !(vtab[i].vt_flags & SDEV_DYNAMIC)) 1578 continue; 1579 nm = vtab[i].vt_name; 1580 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1581 dv = NULL; 1582 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1583 NULL, kcred, SDEV_READY); 1584 if (error) { 1585 cmn_err(CE_WARN, "%s/%s: error %d\n", 1586 ddv->sdev_name, nm, error); 1587 } else { 1588 ASSERT(dv); 1589 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1590 SDEV_SIMPLE_RELE(dv); 1591 } 1592 } 1593 } 1594 1595 /* 1596 * Creating a backing store entry based on sdev_attr. 1597 * This is called either as part of node creation in a persistent directory 1598 * or from setattr/setsecattr to persist access attributes across reboot. 1599 */ 1600 int 1601 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1602 { 1603 int error = 0; 1604 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1605 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1606 struct vattr *vap = dv->sdev_attr; 1607 char *nm = dv->sdev_name; 1608 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1609 1610 ASSERT(dv && dv->sdev_name && rdvp); 1611 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1612 1613 lookup: 1614 /* try to find it in the backing store */ 1615 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1616 NULL); 1617 if (error == 0) { 1618 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1619 VN_HOLD(rrvp); 1620 VN_RELE(*rvp); 1621 *rvp = rrvp; 1622 } 1623 1624 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1625 dv->sdev_attr = NULL; 1626 dv->sdev_attrvp = *rvp; 1627 return (0); 1628 } 1629 1630 /* let's try to persist the node */ 1631 gethrestime(&vap->va_atime); 1632 vap->va_mtime = vap->va_atime; 1633 vap->va_ctime = vap->va_atime; 1634 vap->va_mask |= AT_TYPE|AT_MODE; 1635 switch (vap->va_type) { 1636 case VDIR: 1637 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1638 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1639 (void *)(*rvp), error)); 1640 if (!error) 1641 VN_RELE(*rvp); 1642 break; 1643 case VCHR: 1644 case VBLK: 1645 case VREG: 1646 case VDOOR: 1647 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1648 rvp, cred, 0, NULL, NULL); 1649 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1650 (void *)(*rvp), error)); 1651 if (!error) 1652 VN_RELE(*rvp); 1653 break; 1654 case VLNK: 1655 ASSERT(dv->sdev_symlink); 1656 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1657 NULL, 0); 1658 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1659 error)); 1660 break; 1661 default: 1662 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1663 "create\n", nm); 1664 /*NOTREACHED*/ 1665 } 1666 1667 /* go back to lookup to factor out spec node and set attrvp */ 1668 if (error == 0) 1669 goto lookup; 1670 1671 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1672 return (error); 1673 } 1674 1675 static void 1676 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1677 { 1678 struct sdev_node *dup = NULL; 1679 1680 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1681 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1682 sdev_direnter(ddv, *dv); 1683 } else { 1684 VERIFY(dup->sdev_state != SDEV_ZOMBIE); 1685 SDEV_SIMPLE_RELE(*dv); 1686 sdev_nodedestroy(*dv, 0); 1687 *dv = dup; 1688 } 1689 } 1690 1691 static void 1692 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1693 { 1694 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1695 sdev_dirdelete(ddv, *dv); 1696 } 1697 1698 /* 1699 * update the in-core directory cache 1700 */ 1701 void 1702 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1703 sdev_cache_ops_t ops) 1704 { 1705 ASSERT((SDEV_HELD(*dv))); 1706 1707 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1708 switch (ops) { 1709 case SDEV_CACHE_ADD: 1710 sdev_cache_add(ddv, dv, nm); 1711 break; 1712 case SDEV_CACHE_DELETE: 1713 sdev_cache_delete(ddv, dv); 1714 break; 1715 default: 1716 break; 1717 } 1718 } 1719 1720 /* 1721 * retrieve the named entry from the directory cache 1722 */ 1723 struct sdev_node * 1724 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1725 { 1726 struct sdev_node *dv = NULL; 1727 1728 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1729 dv = sdev_findbyname(ddv, nm); 1730 1731 return (dv); 1732 } 1733 1734 /* 1735 * Implicit reconfig for nodes constructed by a link generator 1736 * Start devfsadm if needed, or if devfsadm is in progress, 1737 * prepare to block on devfsadm either completing or 1738 * constructing the desired node. As devfsadmd is global 1739 * in scope, constructing all necessary nodes, we only 1740 * need to initiate it once. 1741 */ 1742 static int 1743 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1744 { 1745 int error = 0; 1746 1747 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1748 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1749 ddv->sdev_name, nm, devfsadm_state)); 1750 mutex_enter(&dv->sdev_lookup_lock); 1751 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1752 mutex_exit(&dv->sdev_lookup_lock); 1753 error = 0; 1754 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1755 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1756 ddv->sdev_name, nm, devfsadm_state)); 1757 1758 sdev_devfsadmd_thread(ddv, dv, kcred); 1759 mutex_enter(&dv->sdev_lookup_lock); 1760 SDEV_BLOCK_OTHERS(dv, 1761 (SDEV_LOOKUP | SDEV_LGWAITING)); 1762 mutex_exit(&dv->sdev_lookup_lock); 1763 error = 0; 1764 } else { 1765 error = -1; 1766 } 1767 1768 return (error); 1769 } 1770 1771 /* 1772 * Support for specialized device naming construction mechanisms 1773 */ 1774 static int 1775 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1776 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1777 void *, char *), int flags, struct cred *cred) 1778 { 1779 int rv = 0; 1780 char *physpath = NULL; 1781 struct vattr vattr; 1782 struct vattr *vap = &vattr; 1783 struct sdev_node *dv = NULL; 1784 1785 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1786 if (flags & SDEV_VLINK) { 1787 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1788 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1789 NULL); 1790 if (rv) { 1791 kmem_free(physpath, MAXPATHLEN); 1792 return (-1); 1793 } 1794 1795 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1796 vap->va_size = strlen(physpath); 1797 gethrestime(&vap->va_atime); 1798 vap->va_mtime = vap->va_atime; 1799 vap->va_ctime = vap->va_atime; 1800 1801 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1802 (void *)physpath, cred, SDEV_READY); 1803 kmem_free(physpath, MAXPATHLEN); 1804 if (rv) 1805 return (rv); 1806 } else if (flags & SDEV_VATTR) { 1807 /* 1808 * /dev/pts 1809 * 1810 * callback is responsible to set the basic attributes, 1811 * e.g. va_type/va_uid/va_gid/ 1812 * dev_t if VCHR or VBLK/ 1813 */ 1814 ASSERT(callback); 1815 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1816 if (rv) { 1817 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1818 "callback failed \n")); 1819 return (-1); 1820 } 1821 1822 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1823 cred, SDEV_READY); 1824 1825 if (rv) 1826 return (rv); 1827 1828 } else { 1829 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1830 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1831 __LINE__)); 1832 rv = -1; 1833 } 1834 1835 *dvp = dv; 1836 return (rv); 1837 } 1838 1839 static int 1840 is_devfsadm_thread(char *exec_name) 1841 { 1842 /* 1843 * note: because devfsadmd -> /usr/sbin/devfsadm 1844 * it is safe to use "devfsadm" to capture the lookups 1845 * from devfsadm and its daemon version. 1846 */ 1847 if (strcmp(exec_name, "devfsadm") == 0) 1848 return (1); 1849 return (0); 1850 } 1851 1852 /* 1853 * Lookup Order: 1854 * sdev_node cache; 1855 * backing store (SDEV_PERSIST); 1856 * DBNR: a. dir_ops implemented in the loadable modules; 1857 * b. vnode ops in vtab. 1858 */ 1859 int 1860 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1861 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1862 struct cred *, void *, char *), int flags) 1863 { 1864 int rv = 0, nmlen; 1865 struct vnode *rvp = NULL; 1866 struct sdev_node *dv = NULL; 1867 int retried = 0; 1868 int error = 0; 1869 struct vattr vattr; 1870 char *lookup_thread = curproc->p_user.u_comm; 1871 int failed_flags = 0; 1872 int (*vtor)(struct sdev_node *) = NULL; 1873 int state; 1874 int parent_state; 1875 char *link = NULL; 1876 1877 if (SDEVTOV(ddv)->v_type != VDIR) 1878 return (ENOTDIR); 1879 1880 /* 1881 * Empty name or ., return node itself. 1882 */ 1883 nmlen = strlen(nm); 1884 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1885 *vpp = SDEVTOV(ddv); 1886 VN_HOLD(*vpp); 1887 return (0); 1888 } 1889 1890 /* 1891 * .., return the parent directory 1892 */ 1893 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1894 *vpp = SDEVTOV(ddv->sdev_dotdot); 1895 VN_HOLD(*vpp); 1896 return (0); 1897 } 1898 1899 rw_enter(&ddv->sdev_contents, RW_READER); 1900 if (ddv->sdev_flags & SDEV_VTOR) { 1901 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1902 ASSERT(vtor); 1903 } 1904 1905 tryagain: 1906 /* 1907 * (a) directory cache lookup: 1908 */ 1909 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1910 parent_state = ddv->sdev_state; 1911 dv = sdev_cache_lookup(ddv, nm); 1912 if (dv) { 1913 state = dv->sdev_state; 1914 switch (state) { 1915 case SDEV_INIT: 1916 if (is_devfsadm_thread(lookup_thread)) 1917 break; 1918 1919 /* ZOMBIED parent won't allow node creation */ 1920 if (parent_state == SDEV_ZOMBIE) { 1921 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1922 retried); 1923 goto nolock_notfound; 1924 } 1925 1926 mutex_enter(&dv->sdev_lookup_lock); 1927 /* compensate the threads started after devfsadm */ 1928 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1929 !(SDEV_IS_LOOKUP(dv))) 1930 SDEV_BLOCK_OTHERS(dv, 1931 (SDEV_LOOKUP | SDEV_LGWAITING)); 1932 1933 if (SDEV_IS_LOOKUP(dv)) { 1934 failed_flags |= SLF_REBUILT; 1935 rw_exit(&ddv->sdev_contents); 1936 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1937 mutex_exit(&dv->sdev_lookup_lock); 1938 rw_enter(&ddv->sdev_contents, RW_READER); 1939 1940 if (error != 0) { 1941 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1942 retried); 1943 goto nolock_notfound; 1944 } 1945 1946 state = dv->sdev_state; 1947 if (state == SDEV_INIT) { 1948 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1949 retried); 1950 goto nolock_notfound; 1951 } else if (state == SDEV_READY) { 1952 goto found; 1953 } else if (state == SDEV_ZOMBIE) { 1954 rw_exit(&ddv->sdev_contents); 1955 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1956 retried); 1957 SDEV_RELE(dv); 1958 goto lookup_failed; 1959 } 1960 } else { 1961 mutex_exit(&dv->sdev_lookup_lock); 1962 } 1963 break; 1964 case SDEV_READY: 1965 goto found; 1966 case SDEV_ZOMBIE: 1967 rw_exit(&ddv->sdev_contents); 1968 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1969 SDEV_RELE(dv); 1970 goto lookup_failed; 1971 default: 1972 rw_exit(&ddv->sdev_contents); 1973 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1974 sdev_lookup_failed(ddv, nm, failed_flags); 1975 *vpp = NULLVP; 1976 return (ENOENT); 1977 } 1978 } 1979 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1980 1981 /* 1982 * ZOMBIED parent does not allow new node creation. 1983 * bail out early 1984 */ 1985 if (parent_state == SDEV_ZOMBIE) { 1986 rw_exit(&ddv->sdev_contents); 1987 *vpp = NULLVP; 1988 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1989 return (ENOENT); 1990 } 1991 1992 /* 1993 * (b0): backing store lookup 1994 * SDEV_PERSIST is default except: 1995 * 1) pts nodes 1996 * 2) non-chmod'ed local nodes 1997 * 3) zvol nodes 1998 */ 1999 if (SDEV_IS_PERSIST(ddv)) { 2000 error = devname_backstore_lookup(ddv, nm, &rvp); 2001 2002 if (!error) { 2003 2004 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 2005 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 2006 if (error) { 2007 rw_exit(&ddv->sdev_contents); 2008 if (dv) 2009 SDEV_RELE(dv); 2010 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2011 sdev_lookup_failed(ddv, nm, failed_flags); 2012 *vpp = NULLVP; 2013 return (ENOENT); 2014 } 2015 2016 if (vattr.va_type == VLNK) { 2017 error = sdev_getlink(rvp, &link); 2018 if (error) { 2019 rw_exit(&ddv->sdev_contents); 2020 if (dv) 2021 SDEV_RELE(dv); 2022 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2023 retried); 2024 sdev_lookup_failed(ddv, nm, 2025 failed_flags); 2026 *vpp = NULLVP; 2027 return (ENOENT); 2028 } 2029 ASSERT(link != NULL); 2030 } 2031 2032 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2033 rw_exit(&ddv->sdev_contents); 2034 rw_enter(&ddv->sdev_contents, RW_WRITER); 2035 } 2036 error = sdev_mknode(ddv, nm, &dv, &vattr, 2037 rvp, link, cred, SDEV_READY); 2038 rw_downgrade(&ddv->sdev_contents); 2039 2040 if (link != NULL) { 2041 kmem_free(link, strlen(link) + 1); 2042 link = NULL; 2043 } 2044 2045 if (error) { 2046 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2047 rw_exit(&ddv->sdev_contents); 2048 if (dv) 2049 SDEV_RELE(dv); 2050 goto lookup_failed; 2051 } else { 2052 goto found; 2053 } 2054 } else if (retried) { 2055 rw_exit(&ddv->sdev_contents); 2056 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2057 ddv->sdev_name, nm)); 2058 if (dv) 2059 SDEV_RELE(dv); 2060 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2061 sdev_lookup_failed(ddv, nm, failed_flags); 2062 *vpp = NULLVP; 2063 return (ENOENT); 2064 } 2065 } 2066 2067 lookup_create_node: 2068 /* first thread that is doing the lookup on this node */ 2069 if (callback) { 2070 ASSERT(dv == NULL); 2071 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2072 rw_exit(&ddv->sdev_contents); 2073 rw_enter(&ddv->sdev_contents, RW_WRITER); 2074 } 2075 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2076 flags, cred); 2077 rw_downgrade(&ddv->sdev_contents); 2078 if (error == 0) { 2079 goto found; 2080 } else { 2081 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2082 rw_exit(&ddv->sdev_contents); 2083 goto lookup_failed; 2084 } 2085 } 2086 if (!dv) { 2087 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2088 rw_exit(&ddv->sdev_contents); 2089 rw_enter(&ddv->sdev_contents, RW_WRITER); 2090 } 2091 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2092 cred, SDEV_INIT); 2093 if (!dv) { 2094 rw_exit(&ddv->sdev_contents); 2095 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2096 sdev_lookup_failed(ddv, nm, failed_flags); 2097 *vpp = NULLVP; 2098 return (ENOENT); 2099 } 2100 rw_downgrade(&ddv->sdev_contents); 2101 } 2102 2103 /* 2104 * (b1) invoking devfsadm once per life time for devfsadm nodes 2105 */ 2106 ASSERT(SDEV_HELD(dv)); 2107 2108 if (SDEV_IS_NO_NCACHE(dv)) 2109 failed_flags |= SLF_NO_NCACHE; 2110 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2111 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2112 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2113 ASSERT(SDEV_HELD(dv)); 2114 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2115 goto nolock_notfound; 2116 } 2117 2118 /* 2119 * filter out known non-existent devices recorded 2120 * during initial reconfiguration boot for which 2121 * reconfig should not be done and lookup may 2122 * be short-circuited now. 2123 */ 2124 if (sdev_lookup_filter(ddv, nm)) { 2125 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2126 goto nolock_notfound; 2127 } 2128 2129 /* bypassing devfsadm internal nodes */ 2130 if (is_devfsadm_thread(lookup_thread)) { 2131 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2132 goto nolock_notfound; 2133 } 2134 2135 if (sdev_reconfig_disable) { 2136 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2137 goto nolock_notfound; 2138 } 2139 2140 error = sdev_call_devfsadmd(ddv, dv, nm); 2141 if (error == 0) { 2142 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2143 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2144 if (sdev_reconfig_verbose) { 2145 cmn_err(CE_CONT, 2146 "?lookup of %s/%s by %s: reconfig\n", 2147 ddv->sdev_name, nm, curproc->p_user.u_comm); 2148 } 2149 retried = 1; 2150 failed_flags |= SLF_REBUILT; 2151 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2152 SDEV_SIMPLE_RELE(dv); 2153 goto tryagain; 2154 } else { 2155 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2156 goto nolock_notfound; 2157 } 2158 2159 found: 2160 ASSERT(dv->sdev_state == SDEV_READY); 2161 if (vtor) { 2162 /* 2163 * Check validity of returned node 2164 */ 2165 switch (vtor(dv)) { 2166 case SDEV_VTOR_VALID: 2167 break; 2168 case SDEV_VTOR_STALE: 2169 /* 2170 * The name exists, but the cache entry is 2171 * stale and needs to be re-created. 2172 */ 2173 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2174 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2175 rw_exit(&ddv->sdev_contents); 2176 rw_enter(&ddv->sdev_contents, RW_WRITER); 2177 } 2178 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 2179 rw_downgrade(&ddv->sdev_contents); 2180 SDEV_RELE(dv); 2181 dv = NULL; 2182 goto lookup_create_node; 2183 /* FALLTHRU */ 2184 case SDEV_VTOR_INVALID: 2185 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2186 sdcmn_err7(("lookup: destroy invalid " 2187 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2188 goto nolock_notfound; 2189 case SDEV_VTOR_SKIP: 2190 sdcmn_err7(("lookup: node not applicable - " 2191 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2192 rw_exit(&ddv->sdev_contents); 2193 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2194 SDEV_RELE(dv); 2195 goto lookup_failed; 2196 default: 2197 cmn_err(CE_PANIC, 2198 "dev fs: validator failed: %s(%p)\n", 2199 dv->sdev_name, (void *)dv); 2200 break; 2201 } 2202 } 2203 2204 rw_exit(&ddv->sdev_contents); 2205 rv = sdev_to_vp(dv, vpp); 2206 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2207 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2208 dv->sdev_state, nm, rv)); 2209 return (rv); 2210 2211 nolock_notfound: 2212 /* 2213 * Destroy the node that is created for synchronization purposes. 2214 */ 2215 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2216 nm, dv->sdev_state)); 2217 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2218 if (dv->sdev_state == SDEV_INIT) { 2219 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2220 rw_exit(&ddv->sdev_contents); 2221 rw_enter(&ddv->sdev_contents, RW_WRITER); 2222 } 2223 2224 /* 2225 * Node state may have changed during the lock 2226 * changes. Re-check. 2227 */ 2228 if (dv->sdev_state == SDEV_INIT) { 2229 sdev_dirdelete(ddv, dv); 2230 rw_exit(&ddv->sdev_contents); 2231 sdev_lookup_failed(ddv, nm, failed_flags); 2232 SDEV_RELE(dv); 2233 *vpp = NULL; 2234 return (ENOENT); 2235 } 2236 } 2237 2238 rw_exit(&ddv->sdev_contents); 2239 SDEV_RELE(dv); 2240 2241 lookup_failed: 2242 sdev_lookup_failed(ddv, nm, failed_flags); 2243 *vpp = NULL; 2244 return (ENOENT); 2245 } 2246 2247 /* 2248 * Given a directory node, mark all nodes beneath as 2249 * STALE, i.e. nodes that don't exist as far as new 2250 * consumers are concerned. Remove them from the 2251 * list of directory entries so that no lookup or 2252 * directory traversal will find them. The node 2253 * not deallocated so existing holds are not affected. 2254 */ 2255 void 2256 sdev_stale(struct sdev_node *ddv) 2257 { 2258 struct sdev_node *dv; 2259 struct vnode *vp; 2260 2261 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2262 2263 rw_enter(&ddv->sdev_contents, RW_WRITER); 2264 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { 2265 vp = SDEVTOV(dv); 2266 SDEV_HOLD(dv); 2267 if (vp->v_type == VDIR) 2268 sdev_stale(dv); 2269 2270 sdev_dirdelete(ddv, dv); 2271 SDEV_RELE(dv); 2272 } 2273 ddv->sdev_flags |= SDEV_BUILD; 2274 rw_exit(&ddv->sdev_contents); 2275 } 2276 2277 /* 2278 * Given a directory node, clean out all the nodes beneath. 2279 * If expr is specified, clean node with names matching expr. 2280 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2281 * so they are excluded from future lookups. 2282 */ 2283 int 2284 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2285 { 2286 int error = 0; 2287 int busy = 0; 2288 struct vnode *vp; 2289 struct sdev_node *dv; 2290 int bkstore = 0; 2291 int len = 0; 2292 char *bks_name = NULL; 2293 2294 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2295 2296 /* 2297 * We try our best to destroy all unused sdev_node's 2298 */ 2299 rw_enter(&ddv->sdev_contents, RW_WRITER); 2300 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { 2301 vp = SDEVTOV(dv); 2302 2303 if (expr && gmatch(dv->sdev_name, expr) == 0) 2304 continue; 2305 2306 if (vp->v_type == VDIR && 2307 sdev_cleandir(dv, NULL, flags) != 0) { 2308 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2309 dv->sdev_name)); 2310 busy++; 2311 continue; 2312 } 2313 2314 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2315 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2316 dv->sdev_name)); 2317 busy++; 2318 continue; 2319 } 2320 2321 /* 2322 * at this point, either dv is not held or SDEV_ENFORCE 2323 * is specified. In either case, dv needs to be deleted 2324 */ 2325 SDEV_HOLD(dv); 2326 2327 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2328 if (bkstore && (vp->v_type == VDIR)) 2329 bkstore += 1; 2330 2331 if (bkstore) { 2332 len = strlen(dv->sdev_name) + 1; 2333 bks_name = kmem_alloc(len, KM_SLEEP); 2334 bcopy(dv->sdev_name, bks_name, len); 2335 } 2336 2337 sdev_dirdelete(ddv, dv); 2338 2339 /* take care the backing store clean up */ 2340 if (bkstore) { 2341 ASSERT(bks_name); 2342 ASSERT(ddv->sdev_attrvp); 2343 2344 if (bkstore == 1) { 2345 error = VOP_REMOVE(ddv->sdev_attrvp, 2346 bks_name, kcred, NULL, 0); 2347 } else if (bkstore == 2) { 2348 error = VOP_RMDIR(ddv->sdev_attrvp, 2349 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2350 } 2351 2352 /* do not propagate the backing store errors */ 2353 if (error) { 2354 sdcmn_err9(("sdev_cleandir: backing store" 2355 "not cleaned\n")); 2356 error = 0; 2357 } 2358 2359 bkstore = 0; 2360 kmem_free(bks_name, len); 2361 bks_name = NULL; 2362 len = 0; 2363 } 2364 2365 ddv->sdev_flags |= SDEV_BUILD; 2366 SDEV_RELE(dv); 2367 } 2368 2369 ddv->sdev_flags |= SDEV_BUILD; 2370 rw_exit(&ddv->sdev_contents); 2371 2372 if (busy) { 2373 error = EBUSY; 2374 } 2375 2376 return (error); 2377 } 2378 2379 /* 2380 * a convenient wrapper for readdir() funcs 2381 */ 2382 size_t 2383 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2384 { 2385 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2386 if (reclen > size) 2387 return (0); 2388 2389 de->d_ino = (ino64_t)ino; 2390 de->d_off = (off64_t)off + 1; 2391 de->d_reclen = (ushort_t)reclen; 2392 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2393 return (reclen); 2394 } 2395 2396 /* 2397 * sdev_mount service routines 2398 */ 2399 int 2400 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2401 { 2402 int error; 2403 2404 if (uap->datalen != sizeof (*args)) 2405 return (EINVAL); 2406 2407 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2408 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2409 "get user data. error %d\n", error); 2410 return (EFAULT); 2411 } 2412 2413 return (0); 2414 } 2415 2416 #ifdef nextdp 2417 #undef nextdp 2418 #endif 2419 #define nextdp(dp) ((struct dirent64 *) \ 2420 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2421 2422 /* 2423 * readdir helper func 2424 */ 2425 int 2426 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2427 int flags) 2428 { 2429 struct sdev_node *ddv = VTOSDEV(vp); 2430 struct sdev_node *dv; 2431 dirent64_t *dp; 2432 ulong_t outcount = 0; 2433 size_t namelen; 2434 ulong_t alloc_count; 2435 void *outbuf; 2436 struct iovec *iovp; 2437 int error = 0; 2438 size_t reclen; 2439 offset_t diroff; 2440 offset_t soff; 2441 int this_reclen; 2442 int (*vtor)(struct sdev_node *) = NULL; 2443 struct vattr attr; 2444 timestruc_t now; 2445 2446 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2447 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2448 2449 if (uiop->uio_loffset >= MAXOFF_T) { 2450 if (eofp) 2451 *eofp = 1; 2452 return (0); 2453 } 2454 2455 if (uiop->uio_iovcnt != 1) 2456 return (EINVAL); 2457 2458 if (vp->v_type != VDIR) 2459 return (ENOTDIR); 2460 2461 if (ddv->sdev_flags & SDEV_VTOR) { 2462 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2463 ASSERT(vtor); 2464 } 2465 2466 if (eofp != NULL) 2467 *eofp = 0; 2468 2469 soff = uiop->uio_loffset; 2470 iovp = uiop->uio_iov; 2471 alloc_count = iovp->iov_len; 2472 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2473 outcount = 0; 2474 2475 if (ddv->sdev_state == SDEV_ZOMBIE) 2476 goto get_cache; 2477 2478 if (SDEV_IS_GLOBAL(ddv)) { 2479 2480 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2481 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2482 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2483 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2484 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2485 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2486 !sdev_reconfig_disable) { 2487 /* 2488 * invoking "devfsadm" to do system device reconfig 2489 */ 2490 mutex_enter(&ddv->sdev_lookup_lock); 2491 SDEV_BLOCK_OTHERS(ddv, 2492 (SDEV_READDIR|SDEV_LGWAITING)); 2493 mutex_exit(&ddv->sdev_lookup_lock); 2494 2495 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2496 ddv->sdev_path, curproc->p_user.u_comm)); 2497 if (sdev_reconfig_verbose) { 2498 cmn_err(CE_CONT, 2499 "?readdir of %s by %s: reconfig\n", 2500 ddv->sdev_path, curproc->p_user.u_comm); 2501 } 2502 2503 sdev_devfsadmd_thread(ddv, NULL, kcred); 2504 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2505 /* 2506 * compensate the "ls" started later than "devfsadm" 2507 */ 2508 mutex_enter(&ddv->sdev_lookup_lock); 2509 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2510 mutex_exit(&ddv->sdev_lookup_lock); 2511 } 2512 2513 /* 2514 * release the contents lock so that 2515 * the cache may be updated by devfsadmd 2516 */ 2517 rw_exit(&ddv->sdev_contents); 2518 mutex_enter(&ddv->sdev_lookup_lock); 2519 if (SDEV_IS_READDIR(ddv)) 2520 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2521 mutex_exit(&ddv->sdev_lookup_lock); 2522 rw_enter(&ddv->sdev_contents, RW_READER); 2523 2524 sdcmn_err4(("readdir of directory %s by %s\n", 2525 ddv->sdev_name, curproc->p_user.u_comm)); 2526 if (ddv->sdev_flags & SDEV_BUILD) { 2527 if (SDEV_IS_PERSIST(ddv)) { 2528 error = sdev_filldir_from_store(ddv, 2529 alloc_count, cred); 2530 } 2531 ddv->sdev_flags &= ~SDEV_BUILD; 2532 } 2533 } 2534 2535 get_cache: 2536 /* handle "." and ".." */ 2537 diroff = 0; 2538 if (soff == 0) { 2539 /* first time */ 2540 this_reclen = DIRENT64_RECLEN(1); 2541 if (alloc_count < this_reclen) { 2542 error = EINVAL; 2543 goto done; 2544 } 2545 2546 dp->d_ino = (ino64_t)ddv->sdev_ino; 2547 dp->d_off = (off64_t)1; 2548 dp->d_reclen = (ushort_t)this_reclen; 2549 2550 (void) strncpy(dp->d_name, ".", 2551 DIRENT64_NAMELEN(this_reclen)); 2552 outcount += dp->d_reclen; 2553 dp = nextdp(dp); 2554 } 2555 2556 diroff++; 2557 if (soff <= 1) { 2558 this_reclen = DIRENT64_RECLEN(2); 2559 if (alloc_count < outcount + this_reclen) { 2560 error = EINVAL; 2561 goto done; 2562 } 2563 2564 dp->d_reclen = (ushort_t)this_reclen; 2565 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2566 dp->d_off = (off64_t)2; 2567 2568 (void) strncpy(dp->d_name, "..", 2569 DIRENT64_NAMELEN(this_reclen)); 2570 outcount += dp->d_reclen; 2571 2572 dp = nextdp(dp); 2573 } 2574 2575 2576 /* gets the cache */ 2577 diroff++; 2578 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2579 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2580 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2581 diroff, soff, dv->sdev_name)); 2582 2583 /* bypassing pre-matured nodes */ 2584 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2585 sdcmn_err3(("sdev_readdir: pre-mature node " 2586 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2587 continue; 2588 } 2589 2590 /* 2591 * Check validity of node 2592 * Drop invalid and nodes to be skipped. 2593 * A node the validator indicates as stale needs 2594 * to be returned as presumably the node name itself 2595 * is valid and the node data itself will be refreshed 2596 * on lookup. An application performing a readdir then 2597 * stat on each entry should thus always see consistent 2598 * data. In any case, it is not possible to synchronize 2599 * with dynamic kernel state, and any view we return can 2600 * never be anything more than a snapshot at a point in time. 2601 */ 2602 if (vtor) { 2603 switch (vtor(dv)) { 2604 case SDEV_VTOR_VALID: 2605 break; 2606 case SDEV_VTOR_INVALID: 2607 case SDEV_VTOR_SKIP: 2608 continue; 2609 case SDEV_VTOR_STALE: 2610 sdcmn_err3(("sdev_readir: %s stale\n", 2611 dv->sdev_name)); 2612 break; 2613 default: 2614 cmn_err(CE_PANIC, 2615 "dev fs: validator failed: %s(%p)\n", 2616 dv->sdev_name, (void *)dv); 2617 break; 2618 /*NOTREACHED*/ 2619 } 2620 } 2621 2622 namelen = strlen(dv->sdev_name); 2623 reclen = DIRENT64_RECLEN(namelen); 2624 if (outcount + reclen > alloc_count) { 2625 goto full; 2626 } 2627 dp->d_reclen = (ushort_t)reclen; 2628 dp->d_ino = (ino64_t)dv->sdev_ino; 2629 dp->d_off = (off64_t)diroff + 1; 2630 (void) strncpy(dp->d_name, dv->sdev_name, 2631 DIRENT64_NAMELEN(reclen)); 2632 outcount += reclen; 2633 dp = nextdp(dp); 2634 } 2635 2636 full: 2637 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2638 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2639 (void *)dv)); 2640 2641 if (outcount) 2642 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2643 2644 if (!error) { 2645 uiop->uio_loffset = diroff; 2646 if (eofp) 2647 *eofp = dv ? 0 : 1; 2648 } 2649 2650 2651 if (ddv->sdev_attrvp) { 2652 gethrestime(&now); 2653 attr.va_ctime = now; 2654 attr.va_atime = now; 2655 attr.va_mask = AT_CTIME|AT_ATIME; 2656 2657 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2658 } 2659 done: 2660 kmem_free(outbuf, alloc_count); 2661 return (error); 2662 } 2663 2664 static int 2665 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2666 { 2667 vnode_t *vp; 2668 vnode_t *cvp; 2669 struct sdev_node *svp; 2670 char *nm; 2671 struct pathname pn; 2672 int error; 2673 int persisted = 0; 2674 2675 ASSERT(INGLOBALZONE(curproc)); 2676 2677 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2678 return (error); 2679 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2680 2681 vp = rootdir; 2682 VN_HOLD(vp); 2683 2684 while (pn_pathleft(&pn)) { 2685 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2686 (void) pn_getcomponent(&pn, nm); 2687 2688 /* 2689 * Deal with the .. special case where we may be 2690 * traversing up across a mount point, to the 2691 * root of this filesystem or global root. 2692 */ 2693 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2694 checkforroot: 2695 if (VN_CMP(vp, rootdir)) { 2696 nm[1] = 0; 2697 } else if (vp->v_flag & VROOT) { 2698 vfs_t *vfsp; 2699 cvp = vp; 2700 vfsp = cvp->v_vfsp; 2701 vfs_rlock_wait(vfsp); 2702 vp = cvp->v_vfsp->vfs_vnodecovered; 2703 if (vp == NULL || 2704 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2705 vfs_unlock(vfsp); 2706 VN_RELE(cvp); 2707 error = EIO; 2708 break; 2709 } 2710 VN_HOLD(vp); 2711 vfs_unlock(vfsp); 2712 VN_RELE(cvp); 2713 cvp = NULL; 2714 goto checkforroot; 2715 } 2716 } 2717 2718 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2719 NULL, NULL); 2720 if (error) { 2721 VN_RELE(vp); 2722 break; 2723 } 2724 2725 /* traverse mount points encountered on our journey */ 2726 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2727 VN_RELE(vp); 2728 VN_RELE(cvp); 2729 break; 2730 } 2731 2732 /* 2733 * symbolic link, can be either relative and absolute 2734 */ 2735 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2736 struct pathname linkpath; 2737 pn_alloc(&linkpath); 2738 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2739 pn_free(&linkpath); 2740 break; 2741 } 2742 if (pn_pathleft(&linkpath) == 0) 2743 (void) pn_set(&linkpath, "."); 2744 error = pn_insert(&pn, &linkpath, strlen(nm)); 2745 pn_free(&linkpath); 2746 if (pn.pn_pathlen == 0) { 2747 VN_RELE(vp); 2748 return (ENOENT); 2749 } 2750 if (pn.pn_path[0] == '/') { 2751 pn_skipslash(&pn); 2752 VN_RELE(vp); 2753 VN_RELE(cvp); 2754 vp = rootdir; 2755 VN_HOLD(vp); 2756 } else { 2757 VN_RELE(cvp); 2758 } 2759 continue; 2760 } 2761 2762 VN_RELE(vp); 2763 2764 /* 2765 * Direct the operation to the persisting filesystem 2766 * underlying /dev. Bail if we encounter a 2767 * non-persistent dev entity here. 2768 */ 2769 if (cvp->v_vfsp->vfs_fstype == devtype) { 2770 2771 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2772 error = ENOENT; 2773 VN_RELE(cvp); 2774 break; 2775 } 2776 2777 if (VTOSDEV(cvp) == NULL) { 2778 error = ENOENT; 2779 VN_RELE(cvp); 2780 break; 2781 } 2782 svp = VTOSDEV(cvp); 2783 if ((vp = svp->sdev_attrvp) == NULL) { 2784 error = ENOENT; 2785 VN_RELE(cvp); 2786 break; 2787 } 2788 persisted = 1; 2789 VN_HOLD(vp); 2790 VN_RELE(cvp); 2791 cvp = vp; 2792 } 2793 2794 vp = cvp; 2795 pn_skipslash(&pn); 2796 } 2797 2798 kmem_free(nm, MAXNAMELEN); 2799 pn_free(&pn); 2800 2801 if (error) 2802 return (error); 2803 2804 /* 2805 * Only return persisted nodes in the filesystem underlying /dev. 2806 */ 2807 if (!persisted) { 2808 VN_RELE(vp); 2809 return (ENOENT); 2810 } 2811 2812 *r_vp = vp; 2813 return (0); 2814 } 2815 2816 int 2817 sdev_modctl_readdir(const char *dir, char ***dirlistp, 2818 int *npathsp, int *npathsp_alloc, int checking_empty) 2819 { 2820 char **pathlist = NULL; 2821 char **newlist = NULL; 2822 int npaths = 0; 2823 int npaths_alloc = 0; 2824 dirent64_t *dbuf = NULL; 2825 int n; 2826 char *s; 2827 int error; 2828 vnode_t *vp; 2829 int eof; 2830 struct iovec iov; 2831 struct uio uio; 2832 struct dirent64 *dp; 2833 size_t dlen; 2834 size_t dbuflen; 2835 int ndirents = 64; 2836 char *nm; 2837 2838 error = sdev_modctl_lookup(dir, &vp); 2839 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2840 dir, curproc->p_user.u_comm, 2841 (error == 0) ? "ok" : "failed")); 2842 if (error) 2843 return (error); 2844 2845 dlen = ndirents * (sizeof (*dbuf)); 2846 dbuf = kmem_alloc(dlen, KM_SLEEP); 2847 2848 uio.uio_iov = &iov; 2849 uio.uio_iovcnt = 1; 2850 uio.uio_segflg = UIO_SYSSPACE; 2851 uio.uio_fmode = 0; 2852 uio.uio_extflg = UIO_COPY_CACHED; 2853 uio.uio_loffset = 0; 2854 uio.uio_llimit = MAXOFFSET_T; 2855 2856 eof = 0; 2857 error = 0; 2858 while (!error && !eof) { 2859 uio.uio_resid = dlen; 2860 iov.iov_base = (char *)dbuf; 2861 iov.iov_len = dlen; 2862 2863 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2864 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2865 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2866 2867 dbuflen = dlen - uio.uio_resid; 2868 2869 if (error || dbuflen == 0) 2870 break; 2871 2872 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2873 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2874 2875 nm = dp->d_name; 2876 2877 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2878 continue; 2879 if (npaths == npaths_alloc) { 2880 npaths_alloc += 64; 2881 newlist = (char **) 2882 kmem_zalloc((npaths_alloc + 1) * 2883 sizeof (char *), KM_SLEEP); 2884 if (pathlist) { 2885 bcopy(pathlist, newlist, 2886 npaths * sizeof (char *)); 2887 kmem_free(pathlist, 2888 (npaths + 1) * sizeof (char *)); 2889 } 2890 pathlist = newlist; 2891 } 2892 n = strlen(nm) + 1; 2893 s = kmem_alloc(n, KM_SLEEP); 2894 bcopy(nm, s, n); 2895 pathlist[npaths++] = s; 2896 sdcmn_err11((" %s/%s\n", dir, s)); 2897 2898 /* if checking empty, one entry is as good as many */ 2899 if (checking_empty) { 2900 eof = 1; 2901 break; 2902 } 2903 } 2904 } 2905 2906 exit: 2907 VN_RELE(vp); 2908 2909 if (dbuf) 2910 kmem_free(dbuf, dlen); 2911 2912 if (error) 2913 return (error); 2914 2915 *dirlistp = pathlist; 2916 *npathsp = npaths; 2917 *npathsp_alloc = npaths_alloc; 2918 2919 return (0); 2920 } 2921 2922 void 2923 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2924 { 2925 int i, n; 2926 2927 for (i = 0; i < npaths; i++) { 2928 n = strlen(pathlist[i]) + 1; 2929 kmem_free(pathlist[i], n); 2930 } 2931 2932 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2933 } 2934 2935 int 2936 sdev_modctl_devexists(const char *path) 2937 { 2938 vnode_t *vp; 2939 int error; 2940 2941 error = sdev_modctl_lookup(path, &vp); 2942 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2943 path, curproc->p_user.u_comm, 2944 (error == 0) ? "ok" : "failed")); 2945 if (error == 0) 2946 VN_RELE(vp); 2947 2948 return (error); 2949 } 2950 2951 extern int sdev_vnodeops_tbl_size; 2952 2953 /* 2954 * construct a new template with overrides from vtab 2955 */ 2956 static fs_operation_def_t * 2957 sdev_merge_vtab(const fs_operation_def_t tab[]) 2958 { 2959 fs_operation_def_t *new; 2960 const fs_operation_def_t *tab_entry; 2961 2962 /* make a copy of standard vnode ops table */ 2963 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 2964 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 2965 2966 /* replace the overrides from tab */ 2967 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 2968 fs_operation_def_t *std_entry = new; 2969 while (std_entry->name) { 2970 if (strcmp(tab_entry->name, std_entry->name) == 0) { 2971 std_entry->func = tab_entry->func; 2972 break; 2973 } 2974 std_entry++; 2975 } 2976 if (std_entry->name == NULL) 2977 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 2978 tab_entry->name); 2979 } 2980 2981 return (new); 2982 } 2983 2984 /* free memory allocated by sdev_merge_vtab */ 2985 static void 2986 sdev_free_vtab(fs_operation_def_t *new) 2987 { 2988 kmem_free(new, sdev_vnodeops_tbl_size); 2989 } 2990 2991 /* 2992 * a generic setattr() function 2993 * 2994 * note: flags only supports AT_UID and AT_GID. 2995 * Future enhancements can be done for other types, e.g. AT_MODE 2996 */ 2997 int 2998 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 2999 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 3000 int), int protocol) 3001 { 3002 struct sdev_node *dv = VTOSDEV(vp); 3003 struct sdev_node *parent = dv->sdev_dotdot; 3004 struct vattr *get; 3005 uint_t mask = vap->va_mask; 3006 int error; 3007 3008 /* some sanity checks */ 3009 if (vap->va_mask & AT_NOSET) 3010 return (EINVAL); 3011 3012 if (vap->va_mask & AT_SIZE) { 3013 if (vp->v_type == VDIR) { 3014 return (EISDIR); 3015 } 3016 } 3017 3018 /* no need to set attribute, but do not fail either */ 3019 ASSERT(parent); 3020 rw_enter(&parent->sdev_contents, RW_READER); 3021 if (dv->sdev_state == SDEV_ZOMBIE) { 3022 rw_exit(&parent->sdev_contents); 3023 return (0); 3024 } 3025 3026 /* If backing store exists, just set it. */ 3027 if (dv->sdev_attrvp) { 3028 rw_exit(&parent->sdev_contents); 3029 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3030 } 3031 3032 /* 3033 * Otherwise, for nodes with the persistence attribute, create it. 3034 */ 3035 ASSERT(dv->sdev_attr); 3036 if (SDEV_IS_PERSIST(dv) || 3037 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3038 sdev_vattr_merge(dv, vap); 3039 rw_enter(&dv->sdev_contents, RW_WRITER); 3040 error = sdev_shadow_node(dv, cred); 3041 rw_exit(&dv->sdev_contents); 3042 rw_exit(&parent->sdev_contents); 3043 3044 if (error) 3045 return (error); 3046 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3047 } 3048 3049 3050 /* 3051 * sdev_attr was allocated in sdev_mknode 3052 */ 3053 rw_enter(&dv->sdev_contents, RW_WRITER); 3054 error = secpolicy_vnode_setattr(cred, vp, vap, 3055 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3056 if (error) { 3057 rw_exit(&dv->sdev_contents); 3058 rw_exit(&parent->sdev_contents); 3059 return (error); 3060 } 3061 3062 get = dv->sdev_attr; 3063 if (mask & AT_MODE) { 3064 get->va_mode &= S_IFMT; 3065 get->va_mode |= vap->va_mode & ~S_IFMT; 3066 } 3067 3068 if ((mask & AT_UID) || (mask & AT_GID)) { 3069 if (mask & AT_UID) 3070 get->va_uid = vap->va_uid; 3071 if (mask & AT_GID) 3072 get->va_gid = vap->va_gid; 3073 /* 3074 * a callback must be provided if the protocol is set 3075 */ 3076 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3077 ASSERT(callback); 3078 error = callback(dv, get, protocol); 3079 if (error) { 3080 rw_exit(&dv->sdev_contents); 3081 rw_exit(&parent->sdev_contents); 3082 return (error); 3083 } 3084 } 3085 } 3086 3087 if (mask & AT_ATIME) 3088 get->va_atime = vap->va_atime; 3089 if (mask & AT_MTIME) 3090 get->va_mtime = vap->va_mtime; 3091 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3092 gethrestime(&get->va_ctime); 3093 } 3094 3095 sdev_vattr_merge(dv, get); 3096 rw_exit(&dv->sdev_contents); 3097 rw_exit(&parent->sdev_contents); 3098 return (0); 3099 } 3100 3101 /* 3102 * a generic inactive() function 3103 */ 3104 /*ARGSUSED*/ 3105 void 3106 devname_inactive_func(struct vnode *vp, struct cred *cred, 3107 void (*callback)(struct vnode *)) 3108 { 3109 int clean; 3110 struct sdev_node *dv = VTOSDEV(vp); 3111 int state; 3112 3113 mutex_enter(&vp->v_lock); 3114 ASSERT(vp->v_count >= 1); 3115 3116 3117 if (vp->v_count == 1 && callback != NULL) 3118 callback(vp); 3119 3120 rw_enter(&dv->sdev_contents, RW_WRITER); 3121 state = dv->sdev_state; 3122 3123 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3124 3125 /* 3126 * sdev is a rather bad public citizen. It violates the general 3127 * agreement that in memory nodes should always have a valid reference 3128 * count on their vnode. But that's not the case here. This means that 3129 * we do actually have to distinguish between getting inactive callbacks 3130 * for zombies and otherwise. This should probably be fixed. 3131 */ 3132 if (clean) { 3133 /* Remove the . entry to ourselves */ 3134 if (vp->v_type == VDIR) { 3135 decr_link(dv); 3136 } 3137 VERIFY(dv->sdev_nlink == 1); 3138 decr_link(dv); 3139 --vp->v_count; 3140 rw_exit(&dv->sdev_contents); 3141 mutex_exit(&vp->v_lock); 3142 sdev_nodedestroy(dv, 0); 3143 } else { 3144 --vp->v_count; 3145 rw_exit(&dv->sdev_contents); 3146 mutex_exit(&vp->v_lock); 3147 } 3148 } 3149