1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 2013, 2016 Joyent, Inc. All rights reserved. 24 * Copyright (c) 2017 by Delphix. All rights reserved. 25 */ 26 27 /* 28 * utility routines for the /dev fs 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/t_lock.h> 34 #include <sys/systm.h> 35 #include <sys/sysmacros.h> 36 #include <sys/user.h> 37 #include <sys/time.h> 38 #include <sys/vfs.h> 39 #include <sys/vnode.h> 40 #include <sys/file.h> 41 #include <sys/fcntl.h> 42 #include <sys/flock.h> 43 #include <sys/kmem.h> 44 #include <sys/uio.h> 45 #include <sys/errno.h> 46 #include <sys/stat.h> 47 #include <sys/cred.h> 48 #include <sys/dirent.h> 49 #include <sys/pathname.h> 50 #include <sys/cmn_err.h> 51 #include <sys/debug.h> 52 #include <sys/mode.h> 53 #include <sys/policy.h> 54 #include <fs/fs_subr.h> 55 #include <sys/mount.h> 56 #include <sys/fs/snode.h> 57 #include <sys/fs/dv_node.h> 58 #include <sys/fs/sdev_impl.h> 59 #include <sys/sunndi.h> 60 #include <sys/sunmdi.h> 61 #include <sys/conf.h> 62 #include <sys/proc.h> 63 #include <sys/user.h> 64 #include <sys/modctl.h> 65 66 #ifdef DEBUG 67 int sdev_debug = 0x00000001; 68 int sdev_debug_cache_flags = 0; 69 #endif 70 71 /* 72 * globals 73 */ 74 /* prototype memory vattrs */ 75 vattr_t sdev_vattr_dir = { 76 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 77 VDIR, /* va_type */ 78 SDEV_DIRMODE_DEFAULT, /* va_mode */ 79 SDEV_UID_DEFAULT, /* va_uid */ 80 SDEV_GID_DEFAULT, /* va_gid */ 81 0, /* va_fsid */ 82 0, /* va_nodeid */ 83 0, /* va_nlink */ 84 0, /* va_size */ 85 0, /* va_atime */ 86 0, /* va_mtime */ 87 0, /* va_ctime */ 88 0, /* va_rdev */ 89 0, /* va_blksize */ 90 0, /* va_nblocks */ 91 0 /* va_vcode */ 92 }; 93 94 vattr_t sdev_vattr_lnk = { 95 AT_TYPE|AT_MODE, /* va_mask */ 96 VLNK, /* va_type */ 97 SDEV_LNKMODE_DEFAULT, /* va_mode */ 98 SDEV_UID_DEFAULT, /* va_uid */ 99 SDEV_GID_DEFAULT, /* va_gid */ 100 0, /* va_fsid */ 101 0, /* va_nodeid */ 102 0, /* va_nlink */ 103 0, /* va_size */ 104 0, /* va_atime */ 105 0, /* va_mtime */ 106 0, /* va_ctime */ 107 0, /* va_rdev */ 108 0, /* va_blksize */ 109 0, /* va_nblocks */ 110 0 /* va_vcode */ 111 }; 112 113 vattr_t sdev_vattr_blk = { 114 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 115 VBLK, /* va_type */ 116 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 117 SDEV_UID_DEFAULT, /* va_uid */ 118 SDEV_GID_DEFAULT, /* va_gid */ 119 0, /* va_fsid */ 120 0, /* va_nodeid */ 121 0, /* va_nlink */ 122 0, /* va_size */ 123 0, /* va_atime */ 124 0, /* va_mtime */ 125 0, /* va_ctime */ 126 0, /* va_rdev */ 127 0, /* va_blksize */ 128 0, /* va_nblocks */ 129 0 /* va_vcode */ 130 }; 131 132 vattr_t sdev_vattr_chr = { 133 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 134 VCHR, /* va_type */ 135 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 136 SDEV_UID_DEFAULT, /* va_uid */ 137 SDEV_GID_DEFAULT, /* va_gid */ 138 0, /* va_fsid */ 139 0, /* va_nodeid */ 140 0, /* va_nlink */ 141 0, /* va_size */ 142 0, /* va_atime */ 143 0, /* va_mtime */ 144 0, /* va_ctime */ 145 0, /* va_rdev */ 146 0, /* va_blksize */ 147 0, /* va_nblocks */ 148 0 /* va_vcode */ 149 }; 150 151 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 152 int devtype; /* fstype */ 153 154 /* static */ 155 static struct vnodeops *sdev_get_vop(struct sdev_node *); 156 static void sdev_set_no_negcache(struct sdev_node *); 157 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 158 static void sdev_free_vtab(fs_operation_def_t *); 159 160 static void 161 sdev_prof_free(struct sdev_node *dv) 162 { 163 ASSERT(!SDEV_IS_GLOBAL(dv)); 164 nvlist_free(dv->sdev_prof.dev_name); 165 nvlist_free(dv->sdev_prof.dev_map); 166 nvlist_free(dv->sdev_prof.dev_symlink); 167 nvlist_free(dv->sdev_prof.dev_glob_incdir); 168 nvlist_free(dv->sdev_prof.dev_glob_excdir); 169 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 170 } 171 172 /* sdev_node cache constructor */ 173 /*ARGSUSED1*/ 174 static int 175 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 176 { 177 struct sdev_node *dv = (struct sdev_node *)buf; 178 struct vnode *vp; 179 180 bzero(buf, sizeof (struct sdev_node)); 181 vp = dv->sdev_vnode = vn_alloc(flag); 182 if (vp == NULL) { 183 return (-1); 184 } 185 vp->v_data = dv; 186 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 187 return (0); 188 } 189 190 /* sdev_node cache destructor */ 191 /*ARGSUSED1*/ 192 static void 193 i_sdev_node_dtor(void *buf, void *arg) 194 { 195 struct sdev_node *dv = (struct sdev_node *)buf; 196 struct vnode *vp = SDEVTOV(dv); 197 198 rw_destroy(&dv->sdev_contents); 199 vn_free(vp); 200 } 201 202 /* initialize sdev_node cache */ 203 void 204 sdev_node_cache_init() 205 { 206 int flags = 0; 207 208 #ifdef DEBUG 209 flags = sdev_debug_cache_flags; 210 if (flags) 211 sdcmn_err(("cache debug flags 0x%x\n", flags)); 212 #endif /* DEBUG */ 213 214 ASSERT(sdev_node_cache == NULL); 215 sdev_node_cache = kmem_cache_create("sdev_node_cache", 216 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 217 NULL, NULL, NULL, flags); 218 } 219 220 /* destroy sdev_node cache */ 221 void 222 sdev_node_cache_fini() 223 { 224 ASSERT(sdev_node_cache != NULL); 225 kmem_cache_destroy(sdev_node_cache); 226 sdev_node_cache = NULL; 227 } 228 229 /* 230 * Compare two nodes lexographically to balance avl tree 231 */ 232 static int 233 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 234 { 235 int rv; 236 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 237 return (0); 238 return ((rv < 0) ? -1 : 1); 239 } 240 241 void 242 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 243 { 244 ASSERT(dv); 245 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 246 dv->sdev_state = state; 247 } 248 249 static void 250 sdev_attr_update(struct sdev_node *dv, vattr_t *vap) 251 { 252 timestruc_t now; 253 struct vattr *attrp; 254 uint_t mask; 255 256 ASSERT(dv->sdev_attr); 257 ASSERT(vap); 258 259 attrp = dv->sdev_attr; 260 mask = vap->va_mask; 261 if (mask & AT_TYPE) 262 attrp->va_type = vap->va_type; 263 if (mask & AT_MODE) 264 attrp->va_mode = vap->va_mode; 265 if (mask & AT_UID) 266 attrp->va_uid = vap->va_uid; 267 if (mask & AT_GID) 268 attrp->va_gid = vap->va_gid; 269 if (mask & AT_RDEV) 270 attrp->va_rdev = vap->va_rdev; 271 272 gethrestime(&now); 273 attrp->va_atime = (mask & AT_ATIME) ? vap->va_atime : now; 274 attrp->va_mtime = (mask & AT_MTIME) ? vap->va_mtime : now; 275 attrp->va_ctime = (mask & AT_CTIME) ? vap->va_ctime : now; 276 } 277 278 static void 279 sdev_attr_alloc(struct sdev_node *dv, vattr_t *vap) 280 { 281 ASSERT(dv->sdev_attr == NULL); 282 ASSERT(vap->va_mask & AT_TYPE); 283 ASSERT(vap->va_mask & AT_MODE); 284 285 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 286 sdev_attr_update(dv, vap); 287 } 288 289 /* alloc and initialize a sdev_node */ 290 int 291 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 292 vattr_t *vap) 293 { 294 struct sdev_node *dv = NULL; 295 struct vnode *vp; 296 size_t nmlen, len; 297 devname_handle_t *dhl; 298 299 nmlen = strlen(nm) + 1; 300 if (nmlen > MAXNAMELEN) { 301 sdcmn_err9(("sdev_nodeinit: node name %s" 302 " too long\n", nm)); 303 *newdv = NULL; 304 return (ENAMETOOLONG); 305 } 306 307 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 308 309 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 310 bcopy(nm, dv->sdev_name, nmlen); 311 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 312 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 313 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 314 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 315 /* overwritten for VLNK nodes */ 316 dv->sdev_symlink = NULL; 317 318 vp = SDEVTOV(dv); 319 vn_reinit(vp); 320 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 321 if (vap) 322 vp->v_type = vap->va_type; 323 324 /* 325 * initialized to the parent's vnodeops. 326 * maybe overwriten for a VDIR 327 */ 328 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 329 vn_exists(vp); 330 331 dv->sdev_dotdot = NULL; 332 dv->sdev_attrvp = NULL; 333 if (vap) { 334 sdev_attr_alloc(dv, vap); 335 } else { 336 dv->sdev_attr = NULL; 337 } 338 339 dv->sdev_ino = sdev_mkino(dv); 340 dv->sdev_nlink = 0; /* updated on insert */ 341 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 342 dv->sdev_flags |= SDEV_BUILD; 343 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 344 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 345 if (SDEV_IS_GLOBAL(ddv)) { 346 dv->sdev_flags |= SDEV_GLOBAL; 347 dhl = &(dv->sdev_handle); 348 dhl->dh_data = dv; 349 dhl->dh_args = NULL; 350 sdev_set_no_negcache(dv); 351 dv->sdev_gdir_gen = 0; 352 } else { 353 dv->sdev_flags &= ~SDEV_GLOBAL; 354 dv->sdev_origin = NULL; /* set later */ 355 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 356 dv->sdev_ldir_gen = 0; 357 dv->sdev_devtree_gen = 0; 358 } 359 360 rw_enter(&dv->sdev_contents, RW_WRITER); 361 sdev_set_nodestate(dv, SDEV_INIT); 362 rw_exit(&dv->sdev_contents); 363 *newdv = dv; 364 365 return (0); 366 } 367 368 /* 369 * Transition a sdev_node into SDEV_READY state. If this fails, it is up to the 370 * caller to transition the node to the SDEV_ZOMBIE state. 371 */ 372 int 373 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 374 void *args, struct cred *cred) 375 { 376 int error = 0; 377 struct vnode *vp = SDEVTOV(dv); 378 vtype_t type; 379 380 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 381 382 type = vap->va_type; 383 vp->v_type = type; 384 vp->v_rdev = vap->va_rdev; 385 rw_enter(&dv->sdev_contents, RW_WRITER); 386 if (type == VDIR) { 387 dv->sdev_nlink = 2; 388 dv->sdev_flags &= ~SDEV_PERSIST; 389 dv->sdev_flags &= ~SDEV_DYNAMIC; 390 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 391 ASSERT(dv->sdev_dotdot); 392 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 393 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 394 avl_create(&dv->sdev_entries, 395 (int (*)(const void *, const void *))sdev_compare_nodes, 396 sizeof (struct sdev_node), 397 offsetof(struct sdev_node, sdev_avllink)); 398 } else if (type == VLNK) { 399 ASSERT(args); 400 dv->sdev_nlink = 1; 401 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 402 } else { 403 dv->sdev_nlink = 1; 404 } 405 406 if (!(SDEV_IS_GLOBAL(dv))) { 407 dv->sdev_origin = (struct sdev_node *)args; 408 dv->sdev_flags &= ~SDEV_PERSIST; 409 } 410 411 /* 412 * shadow node is created here OR 413 * if failed (indicated by dv->sdev_attrvp == NULL), 414 * created later in sdev_setattr 415 */ 416 if (avp) { 417 dv->sdev_attrvp = avp; 418 } else { 419 if (dv->sdev_attr == NULL) { 420 sdev_attr_alloc(dv, vap); 421 } else { 422 sdev_attr_update(dv, vap); 423 } 424 425 if ((dv->sdev_attrvp == NULL) && SDEV_IS_PERSIST(dv)) 426 error = sdev_shadow_node(dv, cred); 427 } 428 429 if (error == 0) { 430 /* transition to READY state */ 431 sdev_set_nodestate(dv, SDEV_READY); 432 sdev_nc_node_exists(dv); 433 } 434 rw_exit(&dv->sdev_contents); 435 return (error); 436 } 437 438 /* 439 * Build the VROOT sdev_node. 440 */ 441 /*ARGSUSED*/ 442 struct sdev_node * 443 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 444 struct vnode *avp, struct cred *cred) 445 { 446 struct sdev_node *dv; 447 struct vnode *vp; 448 char devdir[] = "/dev"; 449 450 ASSERT(sdev_node_cache != NULL); 451 ASSERT(avp); 452 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 453 vp = SDEVTOV(dv); 454 vn_reinit(vp); 455 vp->v_flag |= VROOT; 456 vp->v_vfsp = vfsp; 457 vp->v_type = VDIR; 458 vp->v_rdev = devdev; 459 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 460 vn_exists(vp); 461 462 if (vfsp->vfs_mntpt) 463 dv->sdev_name = i_ddi_strdup( 464 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 465 else 466 /* vfs_mountdev1 set mount point later */ 467 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 468 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 469 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 470 dv->sdev_ino = SDEV_ROOTINO; 471 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 472 dv->sdev_dotdot = dv; /* .. == self */ 473 dv->sdev_attrvp = avp; 474 dv->sdev_attr = NULL; 475 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 476 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 477 if (strcmp(dv->sdev_name, "/dev") == 0) { 478 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 479 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 480 dv->sdev_gdir_gen = 0; 481 } else { 482 dv->sdev_flags = SDEV_BUILD; 483 dv->sdev_flags &= ~SDEV_PERSIST; 484 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 485 dv->sdev_ldir_gen = 0; 486 dv->sdev_devtree_gen = 0; 487 } 488 489 avl_create(&dv->sdev_entries, 490 (int (*)(const void *, const void *))sdev_compare_nodes, 491 sizeof (struct sdev_node), 492 offsetof(struct sdev_node, sdev_avllink)); 493 494 rw_enter(&dv->sdev_contents, RW_WRITER); 495 sdev_set_nodestate(dv, SDEV_READY); 496 rw_exit(&dv->sdev_contents); 497 sdev_nc_node_exists(dv); 498 return (dv); 499 } 500 501 /* directory dependent vop table */ 502 struct sdev_vop_table { 503 char *vt_name; /* subdirectory name */ 504 const fs_operation_def_t *vt_service; /* vnodeops table */ 505 struct vnodeops *vt_vops; /* constructed vop */ 506 struct vnodeops **vt_global_vops; /* global container for vop */ 507 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 508 int vt_flags; 509 }; 510 511 /* 512 * A nice improvement would be to provide a plug-in mechanism 513 * for this table instead of a const table. 514 */ 515 static struct sdev_vop_table vtab[] = 516 { 517 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 518 SDEV_DYNAMIC | SDEV_VTOR }, 519 520 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 521 SDEV_DYNAMIC | SDEV_VTOR }, 522 523 { "zvol", devzvol_vnodeops_tbl, NULL, &devzvol_vnodeops, 524 devzvol_validate, SDEV_ZONED | SDEV_DYNAMIC | SDEV_VTOR | SDEV_SUBDIR }, 525 526 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 527 528 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 529 SDEV_DYNAMIC | SDEV_VTOR }, 530 531 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 532 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 533 534 /* 535 * SDEV_DYNAMIC: prevent calling out to devfsadm, since only the 536 * lofi driver controls child nodes. 537 * 538 * SDEV_PERSIST: ensure devfsadm knows to clean up any persisted 539 * stale nodes (e.g. from devfsadm -R). 540 * 541 * In addition, devfsadm knows not to attempt a rmdir: a zone 542 * may hold a reference, which would zombify the node, 543 * preventing a mkdir. 544 */ 545 546 { "lofi", NULL, NULL, NULL, NULL, 547 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 548 { "rlofi", NULL, NULL, NULL, NULL, 549 SDEV_ZONED | SDEV_DYNAMIC | SDEV_PERSIST }, 550 551 { NULL, NULL, NULL, NULL, NULL, 0} 552 }; 553 554 /* 555 * We need to match off of the sdev_path, not the sdev_name. We are only allowed 556 * to exist directly under /dev. 557 */ 558 struct sdev_vop_table * 559 sdev_match(struct sdev_node *dv) 560 { 561 int vlen; 562 int i; 563 const char *path; 564 565 if (strlen(dv->sdev_path) <= 5) 566 return (NULL); 567 568 if (strncmp(dv->sdev_path, "/dev/", 5) != 0) 569 return (NULL); 570 path = dv->sdev_path + 5; 571 572 for (i = 0; vtab[i].vt_name; i++) { 573 if (strcmp(vtab[i].vt_name, path) == 0) 574 return (&vtab[i]); 575 if (vtab[i].vt_flags & SDEV_SUBDIR) { 576 vlen = strlen(vtab[i].vt_name); 577 if ((strncmp(vtab[i].vt_name, path, 578 vlen - 1) == 0) && path[vlen] == '/') 579 return (&vtab[i]); 580 } 581 582 } 583 return (NULL); 584 } 585 586 /* 587 * sets a directory's vnodeops if the directory is in the vtab; 588 */ 589 static struct vnodeops * 590 sdev_get_vop(struct sdev_node *dv) 591 { 592 struct sdev_vop_table *vtp; 593 char *path; 594 595 path = dv->sdev_path; 596 ASSERT(path); 597 598 /* gets the relative path to /dev/ */ 599 path += 5; 600 601 /* gets the vtab entry it matches */ 602 if ((vtp = sdev_match(dv)) != NULL) { 603 dv->sdev_flags |= vtp->vt_flags; 604 if (SDEV_IS_PERSIST(dv->sdev_dotdot) && 605 (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv))) 606 dv->sdev_flags |= SDEV_PERSIST; 607 608 if (vtp->vt_vops) { 609 if (vtp->vt_global_vops) 610 *(vtp->vt_global_vops) = vtp->vt_vops; 611 612 return (vtp->vt_vops); 613 } 614 615 if (vtp->vt_service) { 616 fs_operation_def_t *templ; 617 templ = sdev_merge_vtab(vtp->vt_service); 618 if (vn_make_ops(vtp->vt_name, 619 (const fs_operation_def_t *)templ, 620 &vtp->vt_vops) != 0) { 621 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 622 vtp->vt_name); 623 /*NOTREACHED*/ 624 } 625 if (vtp->vt_global_vops) { 626 *(vtp->vt_global_vops) = vtp->vt_vops; 627 } 628 sdev_free_vtab(templ); 629 630 return (vtp->vt_vops); 631 } 632 633 return (sdev_vnodeops); 634 } 635 636 /* child inherits the persistence of the parent */ 637 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 638 dv->sdev_flags |= SDEV_PERSIST; 639 640 return (sdev_vnodeops); 641 } 642 643 static void 644 sdev_set_no_negcache(struct sdev_node *dv) 645 { 646 int i; 647 char *path; 648 649 ASSERT(dv->sdev_path); 650 path = dv->sdev_path + strlen("/dev/"); 651 652 for (i = 0; vtab[i].vt_name; i++) { 653 if (strcmp(vtab[i].vt_name, path) == 0) { 654 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 655 dv->sdev_flags |= SDEV_NO_NCACHE; 656 break; 657 } 658 } 659 } 660 661 void * 662 sdev_get_vtor(struct sdev_node *dv) 663 { 664 struct sdev_vop_table *vtp; 665 666 vtp = sdev_match(dv); 667 if (vtp) 668 return ((void *)vtp->vt_vtor); 669 else 670 return (NULL); 671 } 672 673 /* 674 * Build the base root inode 675 */ 676 ino_t 677 sdev_mkino(struct sdev_node *dv) 678 { 679 ino_t ino; 680 681 /* 682 * for now, follow the lead of tmpfs here 683 * need to someday understand the requirements here 684 */ 685 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 686 ino += SDEV_ROOTINO + 1; 687 688 return (ino); 689 } 690 691 int 692 sdev_getlink(struct vnode *linkvp, char **link) 693 { 694 int err; 695 char *buf; 696 struct uio uio = {0}; 697 struct iovec iov = {0}; 698 699 if (linkvp == NULL) 700 return (ENOENT); 701 ASSERT(linkvp->v_type == VLNK); 702 703 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 704 iov.iov_base = buf; 705 iov.iov_len = MAXPATHLEN; 706 uio.uio_iov = &iov; 707 uio.uio_iovcnt = 1; 708 uio.uio_resid = MAXPATHLEN; 709 uio.uio_segflg = UIO_SYSSPACE; 710 uio.uio_llimit = MAXOFFSET_T; 711 712 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 713 if (err) { 714 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 715 kmem_free(buf, MAXPATHLEN); 716 return (ENOENT); 717 } 718 719 /* mission complete */ 720 *link = i_ddi_strdup(buf, KM_SLEEP); 721 kmem_free(buf, MAXPATHLEN); 722 return (0); 723 } 724 725 /* 726 * A convenient wrapper to get the devfs node vnode for a device 727 * minor functionality: readlink() of a /dev symlink 728 * Place the link into dv->sdev_symlink 729 */ 730 static int 731 sdev_follow_link(struct sdev_node *dv) 732 { 733 int err; 734 struct vnode *linkvp; 735 char *link = NULL; 736 737 linkvp = SDEVTOV(dv); 738 if (linkvp == NULL) 739 return (ENOENT); 740 ASSERT(linkvp->v_type == VLNK); 741 err = sdev_getlink(linkvp, &link); 742 if (err) { 743 dv->sdev_symlink = NULL; 744 return (ENOENT); 745 } 746 747 ASSERT(link != NULL); 748 dv->sdev_symlink = link; 749 return (0); 750 } 751 752 static int 753 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 754 { 755 vtype_t otype = SDEVTOV(dv)->v_type; 756 757 /* 758 * existing sdev_node has a different type. 759 */ 760 if (otype != nvap->va_type) { 761 sdcmn_err9(("sdev_node_check: existing node " 762 " %s type %d does not match new node type %d\n", 763 dv->sdev_name, otype, nvap->va_type)); 764 return (EEXIST); 765 } 766 767 /* 768 * For a symlink, the target should be the same. 769 */ 770 if (otype == VLNK) { 771 ASSERT(nargs != NULL); 772 ASSERT(dv->sdev_symlink != NULL); 773 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 774 sdcmn_err9(("sdev_node_check: existing node " 775 " %s has different symlink %s as new node " 776 " %s\n", dv->sdev_name, dv->sdev_symlink, 777 (char *)nargs)); 778 return (EEXIST); 779 } 780 } 781 782 return (0); 783 } 784 785 /* 786 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 787 * 788 * arguments: 789 * - ddv (parent) 790 * - nm (child name) 791 * - newdv (sdev_node for nm is returned here) 792 * - vap (vattr for the node to be created, va_type should be set. 793 * - avp (attribute vnode) 794 * the defaults should be used if unknown) 795 * - cred 796 * - args 797 * . tnm (for VLNK) 798 * . global sdev_node (for !SDEV_GLOBAL) 799 * - state: SDEV_INIT, SDEV_READY 800 * 801 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 802 * 803 * NOTE: directory contents writers lock needs to be held before 804 * calling this routine. 805 */ 806 int 807 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 808 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 809 sdev_node_state_t state) 810 { 811 int error = 0; 812 sdev_node_state_t node_state; 813 struct sdev_node *dv = NULL; 814 815 ASSERT(state != SDEV_ZOMBIE); 816 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 817 818 if (*newdv) { 819 dv = *newdv; 820 } else { 821 /* allocate and initialize a sdev_node */ 822 if (ddv->sdev_state == SDEV_ZOMBIE) { 823 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 824 ddv->sdev_path)); 825 return (ENOENT); 826 } 827 828 error = sdev_nodeinit(ddv, nm, &dv, vap); 829 if (error != 0) { 830 sdcmn_err9(("sdev_mknode: error %d," 831 " name %s can not be initialized\n", 832 error, nm)); 833 return (error); 834 } 835 ASSERT(dv); 836 837 /* insert into the directory cache */ 838 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 839 } 840 841 ASSERT(dv); 842 node_state = dv->sdev_state; 843 ASSERT(node_state != SDEV_ZOMBIE); 844 845 if (state == SDEV_READY) { 846 switch (node_state) { 847 case SDEV_INIT: 848 error = sdev_nodeready(dv, vap, avp, args, cred); 849 if (error) { 850 sdcmn_err9(("sdev_mknode: node %s can NOT" 851 " be transitioned into READY state, " 852 "error %d\n", nm, error)); 853 } 854 break; 855 case SDEV_READY: 856 /* 857 * Do some sanity checking to make sure 858 * the existing sdev_node is what has been 859 * asked for. 860 */ 861 error = sdev_node_check(dv, vap, args); 862 break; 863 default: 864 break; 865 } 866 } 867 868 if (!error) { 869 *newdv = dv; 870 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 871 } else { 872 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 873 /* 874 * We created this node, it wasn't passed into us. Therefore it 875 * is up to us to delete it. 876 */ 877 if (*newdv == NULL) 878 SDEV_SIMPLE_RELE(dv); 879 *newdv = NULL; 880 } 881 882 return (error); 883 } 884 885 /* 886 * convenient wrapper to change vp's ATIME, CTIME and MTIME 887 */ 888 void 889 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 890 { 891 struct vattr attr; 892 timestruc_t now; 893 int err; 894 895 ASSERT(vp); 896 gethrestime(&now); 897 if (mask & AT_CTIME) 898 attr.va_ctime = now; 899 if (mask & AT_MTIME) 900 attr.va_mtime = now; 901 if (mask & AT_ATIME) 902 attr.va_atime = now; 903 904 attr.va_mask = (mask & AT_TIMES); 905 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 906 if (err && (err != EROFS)) { 907 sdcmn_err(("update timestamps error %d\n", err)); 908 } 909 } 910 911 /* 912 * the backing store vnode is released here 913 */ 914 /*ARGSUSED1*/ 915 void 916 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 917 { 918 /* no references */ 919 ASSERT(dv->sdev_nlink == 0); 920 921 if (dv->sdev_attrvp != NULLVP) { 922 VN_RELE(dv->sdev_attrvp); 923 /* 924 * reset the attrvp so that no more 925 * references can be made on this already 926 * vn_rele() vnode 927 */ 928 dv->sdev_attrvp = NULLVP; 929 } 930 931 if (dv->sdev_attr != NULL) { 932 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 933 dv->sdev_attr = NULL; 934 } 935 936 if (dv->sdev_name != NULL) { 937 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 938 dv->sdev_name = NULL; 939 } 940 941 if (dv->sdev_symlink != NULL) { 942 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 943 dv->sdev_symlink = NULL; 944 } 945 946 if (dv->sdev_path) { 947 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 948 dv->sdev_path = NULL; 949 } 950 951 if (!SDEV_IS_GLOBAL(dv)) 952 sdev_prof_free(dv); 953 954 if (SDEVTOV(dv)->v_type == VDIR) { 955 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 956 avl_destroy(&dv->sdev_entries); 957 } 958 959 mutex_destroy(&dv->sdev_lookup_lock); 960 cv_destroy(&dv->sdev_lookup_cv); 961 962 /* return node to initial state as per constructor */ 963 (void) memset((void *)&dv->sdev_instance_data, 0, 964 sizeof (dv->sdev_instance_data)); 965 vn_invalid(SDEVTOV(dv)); 966 kmem_cache_free(sdev_node_cache, dv); 967 } 968 969 /* 970 * DIRECTORY CACHE lookup 971 */ 972 struct sdev_node * 973 sdev_findbyname(struct sdev_node *ddv, char *nm) 974 { 975 struct sdev_node *dv; 976 struct sdev_node dvtmp; 977 avl_index_t where; 978 979 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 980 981 dvtmp.sdev_name = nm; 982 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 983 if (dv) { 984 ASSERT(dv->sdev_dotdot == ddv); 985 ASSERT(strcmp(dv->sdev_name, nm) == 0); 986 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 987 SDEV_HOLD(dv); 988 return (dv); 989 } 990 return (NULL); 991 } 992 993 /* 994 * Inserts a new sdev_node in a parent directory 995 */ 996 void 997 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 998 { 999 avl_index_t where; 1000 1001 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1002 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 1003 ASSERT(ddv->sdev_nlink >= 2); 1004 ASSERT(dv->sdev_nlink == 0); 1005 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1006 1007 dv->sdev_dotdot = ddv; 1008 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 1009 avl_insert(&ddv->sdev_entries, dv, where); 1010 ddv->sdev_nlink++; 1011 } 1012 1013 /* 1014 * The following check is needed because while sdev_nodes are linked 1015 * in SDEV_INIT state, they have their link counts incremented only 1016 * in SDEV_READY state. 1017 */ 1018 static void 1019 decr_link(struct sdev_node *dv) 1020 { 1021 VERIFY(RW_WRITE_HELD(&dv->sdev_contents)); 1022 if (dv->sdev_state != SDEV_INIT) { 1023 VERIFY(dv->sdev_nlink >= 1); 1024 dv->sdev_nlink--; 1025 } else { 1026 VERIFY(dv->sdev_nlink == 0); 1027 } 1028 } 1029 1030 /* 1031 * Delete an existing dv from directory cache 1032 * 1033 * In the case of a node is still held by non-zero reference count, the node is 1034 * put into ZOMBIE state. The node is always unlinked from its parent, but it is 1035 * not destroyed via sdev_inactive until its reference count reaches "0". 1036 */ 1037 static void 1038 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 1039 { 1040 struct vnode *vp; 1041 sdev_node_state_t os; 1042 1043 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1044 1045 vp = SDEVTOV(dv); 1046 mutex_enter(&vp->v_lock); 1047 rw_enter(&dv->sdev_contents, RW_WRITER); 1048 os = dv->sdev_state; 1049 ASSERT(os != SDEV_ZOMBIE); 1050 dv->sdev_state = SDEV_ZOMBIE; 1051 1052 /* 1053 * unlink ourselves from the parent directory now to take care of the .. 1054 * link. However, if we're a directory, we don't remove our reference to 1055 * ourself eg. '.' until we are torn down in the inactive callback. 1056 */ 1057 decr_link(ddv); 1058 avl_remove(&ddv->sdev_entries, dv); 1059 /* 1060 * sdev_inactive expects nodes to have a link to themselves when we're 1061 * tearing them down. If we're transitioning from the initial state to 1062 * zombie and not via ready, then we're not going to have this link that 1063 * comes from the node being ready. As a result, we need to increment 1064 * our link count by one to account for this. 1065 */ 1066 if (os == SDEV_INIT && dv->sdev_nlink == 0) 1067 dv->sdev_nlink++; 1068 rw_exit(&dv->sdev_contents); 1069 mutex_exit(&vp->v_lock); 1070 } 1071 1072 /* 1073 * check if the source is in the path of the target 1074 * 1075 * source and target are different 1076 */ 1077 /*ARGSUSED2*/ 1078 static int 1079 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1080 { 1081 int error = 0; 1082 struct sdev_node *dotdot, *dir; 1083 1084 dotdot = tdv->sdev_dotdot; 1085 ASSERT(dotdot); 1086 1087 /* fs root */ 1088 if (dotdot == tdv) { 1089 return (0); 1090 } 1091 1092 for (;;) { 1093 /* 1094 * avoid error cases like 1095 * mv a a/b 1096 * mv a a/b/c 1097 * etc. 1098 */ 1099 if (dotdot == sdv) { 1100 error = EINVAL; 1101 break; 1102 } 1103 1104 dir = dotdot; 1105 dotdot = dir->sdev_dotdot; 1106 1107 /* done checking because root is reached */ 1108 if (dir == dotdot) { 1109 break; 1110 } 1111 } 1112 return (error); 1113 } 1114 1115 int 1116 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1117 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1118 struct cred *cred) 1119 { 1120 int error = 0; 1121 struct vnode *ovp = SDEVTOV(odv); 1122 struct vnode *nvp; 1123 struct vattr vattr; 1124 int doingdir = (ovp->v_type == VDIR); 1125 char *link = NULL; 1126 int samedir = (oddv == nddv) ? 1 : 0; 1127 int bkstore = 0; 1128 struct sdev_node *idv = NULL; 1129 struct sdev_node *ndv = NULL; 1130 timestruc_t now; 1131 1132 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1133 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1134 if (error) 1135 return (error); 1136 1137 if (!samedir) 1138 rw_enter(&oddv->sdev_contents, RW_WRITER); 1139 rw_enter(&nddv->sdev_contents, RW_WRITER); 1140 1141 /* 1142 * the source may have been deleted by another thread before 1143 * we gets here. 1144 */ 1145 if (odv->sdev_state != SDEV_READY) { 1146 error = ENOENT; 1147 goto err_out; 1148 } 1149 1150 if (doingdir && (odv == nddv)) { 1151 error = EINVAL; 1152 goto err_out; 1153 } 1154 1155 /* 1156 * If renaming a directory, and the parents are different (".." must be 1157 * changed) then the source dir must not be in the dir hierarchy above 1158 * the target since it would orphan everything below the source dir. 1159 */ 1160 if (doingdir && (oddv != nddv)) { 1161 error = sdev_checkpath(odv, nddv, cred); 1162 if (error) 1163 goto err_out; 1164 } 1165 1166 /* fix the source for a symlink */ 1167 if (vattr.va_type == VLNK) { 1168 if (odv->sdev_symlink == NULL) { 1169 error = sdev_follow_link(odv); 1170 if (error) { 1171 /* 1172 * The underlying symlink doesn't exist. This 1173 * node probably shouldn't even exist. While 1174 * it's a bit jarring to consumers, we're going 1175 * to remove the node from /dev. 1176 */ 1177 if (SDEV_IS_PERSIST((*ndvp))) 1178 bkstore = 1; 1179 sdev_dirdelete(oddv, odv); 1180 if (bkstore) { 1181 ASSERT(nddv->sdev_attrvp); 1182 error = VOP_REMOVE(nddv->sdev_attrvp, 1183 nnm, cred, NULL, 0); 1184 if (error) 1185 goto err_out; 1186 } 1187 error = ENOENT; 1188 goto err_out; 1189 } 1190 } 1191 ASSERT(odv->sdev_symlink); 1192 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1193 } 1194 1195 /* destination existing */ 1196 if (*ndvp) { 1197 nvp = SDEVTOV(*ndvp); 1198 ASSERT(nvp); 1199 1200 /* handling renaming to itself */ 1201 if (odv == *ndvp) { 1202 error = 0; 1203 goto err_out; 1204 } 1205 1206 if (nvp->v_type == VDIR) { 1207 if (!doingdir) { 1208 error = EISDIR; 1209 goto err_out; 1210 } 1211 1212 if (vn_vfswlock(nvp)) { 1213 error = EBUSY; 1214 goto err_out; 1215 } 1216 1217 if (vn_mountedvfs(nvp) != NULL) { 1218 vn_vfsunlock(nvp); 1219 error = EBUSY; 1220 goto err_out; 1221 } 1222 1223 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1224 if ((*ndvp)->sdev_nlink > 2) { 1225 vn_vfsunlock(nvp); 1226 error = EEXIST; 1227 goto err_out; 1228 } 1229 vn_vfsunlock(nvp); 1230 1231 /* 1232 * We did not place the hold on *ndvp, so even though 1233 * we're deleting the node, we should not get rid of our 1234 * reference. 1235 */ 1236 sdev_dirdelete(nddv, *ndvp); 1237 *ndvp = NULL; 1238 ASSERT(nddv->sdev_attrvp); 1239 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1240 nddv->sdev_attrvp, cred, NULL, 0); 1241 if (error) 1242 goto err_out; 1243 } else { 1244 if (doingdir) { 1245 error = ENOTDIR; 1246 goto err_out; 1247 } 1248 1249 if (SDEV_IS_PERSIST((*ndvp))) { 1250 bkstore = 1; 1251 } 1252 1253 /* 1254 * Get rid of the node from the directory cache note. 1255 * Don't forget that it's not up to us to remove the vn 1256 * ref on the sdev node, as we did not place it. 1257 */ 1258 sdev_dirdelete(nddv, *ndvp); 1259 *ndvp = NULL; 1260 if (bkstore) { 1261 ASSERT(nddv->sdev_attrvp); 1262 error = VOP_REMOVE(nddv->sdev_attrvp, 1263 nnm, cred, NULL, 0); 1264 if (error) 1265 goto err_out; 1266 } 1267 } 1268 } 1269 1270 /* 1271 * make a fresh node from the source attrs 1272 */ 1273 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1274 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1275 NULL, (void *)link, cred, SDEV_READY); 1276 1277 if (link != NULL) { 1278 kmem_free(link, strlen(link) + 1); 1279 link = NULL; 1280 } 1281 1282 if (error) 1283 goto err_out; 1284 ASSERT(*ndvp); 1285 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1286 1287 /* move dir contents */ 1288 if (doingdir) { 1289 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1290 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1291 SDEV_HOLD(idv); 1292 error = sdev_rnmnode(odv, idv, 1293 (struct sdev_node *)(*ndvp), &ndv, 1294 idv->sdev_name, cred); 1295 SDEV_RELE(idv); 1296 if (error) 1297 goto err_out; 1298 ndv = NULL; 1299 } 1300 } 1301 1302 if ((*ndvp)->sdev_attrvp) { 1303 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1304 AT_CTIME|AT_ATIME); 1305 } else { 1306 ASSERT((*ndvp)->sdev_attr); 1307 gethrestime(&now); 1308 (*ndvp)->sdev_attr->va_ctime = now; 1309 (*ndvp)->sdev_attr->va_atime = now; 1310 } 1311 1312 if (nddv->sdev_attrvp) { 1313 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1314 AT_MTIME|AT_ATIME); 1315 } else { 1316 ASSERT(nddv->sdev_attr); 1317 gethrestime(&now); 1318 nddv->sdev_attr->va_mtime = now; 1319 nddv->sdev_attr->va_atime = now; 1320 } 1321 rw_exit(&nddv->sdev_contents); 1322 if (!samedir) 1323 rw_exit(&oddv->sdev_contents); 1324 1325 SDEV_RELE(*ndvp); 1326 return (error); 1327 1328 err_out: 1329 if (link != NULL) { 1330 kmem_free(link, strlen(link) + 1); 1331 link = NULL; 1332 } 1333 1334 rw_exit(&nddv->sdev_contents); 1335 if (!samedir) 1336 rw_exit(&oddv->sdev_contents); 1337 return (error); 1338 } 1339 1340 /* 1341 * Merge sdev_node specific information into an attribute structure. 1342 * 1343 * note: sdev_node is not locked here 1344 */ 1345 void 1346 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1347 { 1348 struct vnode *vp = SDEVTOV(dv); 1349 1350 vap->va_nlink = dv->sdev_nlink; 1351 vap->va_nodeid = dv->sdev_ino; 1352 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1353 vap->va_type = vp->v_type; 1354 1355 if (vp->v_type == VDIR) { 1356 vap->va_rdev = 0; 1357 vap->va_fsid = vp->v_rdev; 1358 } else if (vp->v_type == VLNK) { 1359 vap->va_rdev = 0; 1360 vap->va_mode &= ~S_IFMT; 1361 vap->va_mode |= S_IFLNK; 1362 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1363 vap->va_rdev = vp->v_rdev; 1364 vap->va_mode &= ~S_IFMT; 1365 if (vap->va_type == VCHR) 1366 vap->va_mode |= S_IFCHR; 1367 else 1368 vap->va_mode |= S_IFBLK; 1369 } else { 1370 vap->va_rdev = 0; 1371 } 1372 } 1373 1374 struct vattr * 1375 sdev_getdefault_attr(enum vtype type) 1376 { 1377 if (type == VDIR) 1378 return (&sdev_vattr_dir); 1379 else if (type == VCHR) 1380 return (&sdev_vattr_chr); 1381 else if (type == VBLK) 1382 return (&sdev_vattr_blk); 1383 else if (type == VLNK) 1384 return (&sdev_vattr_lnk); 1385 else 1386 return (NULL); 1387 } 1388 int 1389 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1390 { 1391 int rv = 0; 1392 struct vnode *vp = SDEVTOV(dv); 1393 1394 switch (vp->v_type) { 1395 case VCHR: 1396 case VBLK: 1397 /* 1398 * If vnode is a device, return special vnode instead 1399 * (though it knows all about -us- via sp->s_realvp) 1400 */ 1401 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1402 VN_RELE(vp); 1403 if (*vpp == NULLVP) 1404 rv = ENOSYS; 1405 break; 1406 default: /* most types are returned as is */ 1407 *vpp = vp; 1408 break; 1409 } 1410 return (rv); 1411 } 1412 1413 /* 1414 * junction between devname and root file system, e.g. ufs 1415 */ 1416 int 1417 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1418 { 1419 struct vnode *rdvp = ddv->sdev_attrvp; 1420 int rval = 0; 1421 1422 ASSERT(rdvp); 1423 1424 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1425 NULL); 1426 return (rval); 1427 } 1428 1429 static int 1430 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1431 { 1432 struct sdev_node *dv = NULL; 1433 char *nm; 1434 struct vnode *dirvp; 1435 int error; 1436 vnode_t *vp; 1437 int eof; 1438 struct iovec iov; 1439 struct uio uio; 1440 struct dirent64 *dp; 1441 dirent64_t *dbuf; 1442 size_t dbuflen; 1443 struct vattr vattr; 1444 char *link = NULL; 1445 1446 if (ddv->sdev_attrvp == NULL) 1447 return (0); 1448 if (!(ddv->sdev_flags & SDEV_BUILD)) 1449 return (0); 1450 1451 dirvp = ddv->sdev_attrvp; 1452 VN_HOLD(dirvp); 1453 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1454 1455 uio.uio_iov = &iov; 1456 uio.uio_iovcnt = 1; 1457 uio.uio_segflg = UIO_SYSSPACE; 1458 uio.uio_fmode = 0; 1459 uio.uio_extflg = UIO_COPY_CACHED; 1460 uio.uio_loffset = 0; 1461 uio.uio_llimit = MAXOFFSET_T; 1462 1463 eof = 0; 1464 error = 0; 1465 while (!error && !eof) { 1466 uio.uio_resid = dlen; 1467 iov.iov_base = (char *)dbuf; 1468 iov.iov_len = dlen; 1469 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1470 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1471 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1472 1473 dbuflen = dlen - uio.uio_resid; 1474 if (error || dbuflen == 0) 1475 break; 1476 1477 if (!(ddv->sdev_flags & SDEV_BUILD)) 1478 break; 1479 1480 for (dp = dbuf; ((intptr_t)dp < 1481 (intptr_t)dbuf + dbuflen); 1482 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1483 nm = dp->d_name; 1484 1485 if (strcmp(nm, ".") == 0 || 1486 strcmp(nm, "..") == 0) 1487 continue; 1488 1489 vp = NULLVP; 1490 dv = sdev_cache_lookup(ddv, nm); 1491 if (dv) { 1492 VERIFY(dv->sdev_state != SDEV_ZOMBIE); 1493 SDEV_SIMPLE_RELE(dv); 1494 continue; 1495 } 1496 1497 /* refill the cache if not already */ 1498 error = devname_backstore_lookup(ddv, nm, &vp); 1499 if (error) 1500 continue; 1501 1502 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 1503 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1504 if (error) 1505 continue; 1506 1507 if (vattr.va_type == VLNK) { 1508 error = sdev_getlink(vp, &link); 1509 if (error) { 1510 continue; 1511 } 1512 ASSERT(link != NULL); 1513 } 1514 1515 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1516 rw_exit(&ddv->sdev_contents); 1517 rw_enter(&ddv->sdev_contents, RW_WRITER); 1518 } 1519 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1520 cred, SDEV_READY); 1521 rw_downgrade(&ddv->sdev_contents); 1522 1523 if (link != NULL) { 1524 kmem_free(link, strlen(link) + 1); 1525 link = NULL; 1526 } 1527 1528 if (!error) { 1529 ASSERT(dv); 1530 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1531 SDEV_SIMPLE_RELE(dv); 1532 } 1533 vp = NULL; 1534 dv = NULL; 1535 } 1536 } 1537 1538 done: 1539 VN_RELE(dirvp); 1540 kmem_free(dbuf, dlen); 1541 1542 return (error); 1543 } 1544 1545 void 1546 sdev_filldir_dynamic(struct sdev_node *ddv) 1547 { 1548 int error; 1549 int i; 1550 struct vattr vattr; 1551 struct vattr *vap = &vattr; 1552 char *nm = NULL; 1553 struct sdev_node *dv = NULL; 1554 1555 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1556 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1557 1558 *vap = *sdev_getdefault_attr(VDIR); /* note structure copy here */ 1559 gethrestime(&vap->va_atime); 1560 vap->va_mtime = vap->va_atime; 1561 vap->va_ctime = vap->va_atime; 1562 for (i = 0; vtab[i].vt_name != NULL; i++) { 1563 /* 1564 * This early, we may be in a read-only /dev environment: leave 1565 * the creation of any nodes we'd attempt to persist to 1566 * devfsadm. Because /dev itself is normally persistent, any 1567 * node which is not marked dynamic will end up being marked 1568 * persistent. However, some nodes are both dynamic and 1569 * persistent, mostly lofi and rlofi, so we need to be careful 1570 * in our check. 1571 */ 1572 if ((vtab[i].vt_flags & SDEV_PERSIST) || 1573 !(vtab[i].vt_flags & SDEV_DYNAMIC)) 1574 continue; 1575 nm = vtab[i].vt_name; 1576 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1577 dv = NULL; 1578 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1579 NULL, kcred, SDEV_READY); 1580 if (error) { 1581 cmn_err(CE_WARN, "%s/%s: error %d\n", 1582 ddv->sdev_name, nm, error); 1583 } else { 1584 ASSERT(dv); 1585 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1586 SDEV_SIMPLE_RELE(dv); 1587 } 1588 } 1589 } 1590 1591 /* 1592 * Creating a backing store entry based on sdev_attr. 1593 * This is called either as part of node creation in a persistent directory 1594 * or from setattr/setsecattr to persist access attributes across reboot. 1595 */ 1596 int 1597 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1598 { 1599 int error = 0; 1600 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1601 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1602 struct vattr *vap = dv->sdev_attr; 1603 char *nm = dv->sdev_name; 1604 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1605 1606 ASSERT(dv && dv->sdev_name && rdvp); 1607 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1608 1609 lookup: 1610 /* try to find it in the backing store */ 1611 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1612 NULL); 1613 if (error == 0) { 1614 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1615 VN_HOLD(rrvp); 1616 VN_RELE(*rvp); 1617 *rvp = rrvp; 1618 } 1619 1620 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1621 dv->sdev_attr = NULL; 1622 dv->sdev_attrvp = *rvp; 1623 return (0); 1624 } 1625 1626 /* let's try to persist the node */ 1627 gethrestime(&vap->va_atime); 1628 vap->va_mtime = vap->va_atime; 1629 vap->va_ctime = vap->va_atime; 1630 vap->va_mask |= AT_TYPE|AT_MODE; 1631 switch (vap->va_type) { 1632 case VDIR: 1633 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1634 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1635 (void *)(*rvp), error)); 1636 if (!error) 1637 VN_RELE(*rvp); 1638 break; 1639 case VCHR: 1640 case VBLK: 1641 case VREG: 1642 case VDOOR: 1643 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1644 rvp, cred, 0, NULL, NULL); 1645 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1646 (void *)(*rvp), error)); 1647 if (!error) 1648 VN_RELE(*rvp); 1649 break; 1650 case VLNK: 1651 ASSERT(dv->sdev_symlink); 1652 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1653 NULL, 0); 1654 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1655 error)); 1656 break; 1657 default: 1658 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1659 "create\n", nm); 1660 /*NOTREACHED*/ 1661 } 1662 1663 /* go back to lookup to factor out spec node and set attrvp */ 1664 if (error == 0) 1665 goto lookup; 1666 1667 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1668 return (error); 1669 } 1670 1671 static void 1672 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1673 { 1674 struct sdev_node *dup = NULL; 1675 1676 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1677 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1678 sdev_direnter(ddv, *dv); 1679 } else { 1680 VERIFY(dup->sdev_state != SDEV_ZOMBIE); 1681 SDEV_SIMPLE_RELE(*dv); 1682 sdev_nodedestroy(*dv, 0); 1683 *dv = dup; 1684 } 1685 } 1686 1687 static void 1688 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1689 { 1690 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1691 sdev_dirdelete(ddv, *dv); 1692 } 1693 1694 /* 1695 * update the in-core directory cache 1696 */ 1697 void 1698 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1699 sdev_cache_ops_t ops) 1700 { 1701 ASSERT((SDEV_HELD(*dv))); 1702 1703 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1704 switch (ops) { 1705 case SDEV_CACHE_ADD: 1706 sdev_cache_add(ddv, dv, nm); 1707 break; 1708 case SDEV_CACHE_DELETE: 1709 sdev_cache_delete(ddv, dv); 1710 break; 1711 default: 1712 break; 1713 } 1714 } 1715 1716 /* 1717 * retrieve the named entry from the directory cache 1718 */ 1719 struct sdev_node * 1720 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1721 { 1722 struct sdev_node *dv = NULL; 1723 1724 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1725 dv = sdev_findbyname(ddv, nm); 1726 1727 return (dv); 1728 } 1729 1730 /* 1731 * Implicit reconfig for nodes constructed by a link generator 1732 * Start devfsadm if needed, or if devfsadm is in progress, 1733 * prepare to block on devfsadm either completing or 1734 * constructing the desired node. As devfsadmd is global 1735 * in scope, constructing all necessary nodes, we only 1736 * need to initiate it once. 1737 */ 1738 static int 1739 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1740 { 1741 int error = 0; 1742 1743 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1744 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1745 ddv->sdev_name, nm, devfsadm_state)); 1746 mutex_enter(&dv->sdev_lookup_lock); 1747 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1748 mutex_exit(&dv->sdev_lookup_lock); 1749 error = 0; 1750 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1751 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1752 ddv->sdev_name, nm, devfsadm_state)); 1753 1754 sdev_devfsadmd_thread(ddv, dv, kcred); 1755 mutex_enter(&dv->sdev_lookup_lock); 1756 SDEV_BLOCK_OTHERS(dv, 1757 (SDEV_LOOKUP | SDEV_LGWAITING)); 1758 mutex_exit(&dv->sdev_lookup_lock); 1759 error = 0; 1760 } else { 1761 error = -1; 1762 } 1763 1764 return (error); 1765 } 1766 1767 /* 1768 * Support for specialized device naming construction mechanisms 1769 */ 1770 static int 1771 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1772 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1773 void *, char *), int flags, struct cred *cred) 1774 { 1775 int rv = 0; 1776 char *physpath = NULL; 1777 struct vattr vattr; 1778 struct vattr *vap = &vattr; 1779 struct sdev_node *dv = NULL; 1780 1781 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1782 if (flags & SDEV_VLINK) { 1783 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1784 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1785 NULL); 1786 if (rv) { 1787 kmem_free(physpath, MAXPATHLEN); 1788 return (-1); 1789 } 1790 1791 *vap = *sdev_getdefault_attr(VLNK); /* structure copy */ 1792 vap->va_size = strlen(physpath); 1793 gethrestime(&vap->va_atime); 1794 vap->va_mtime = vap->va_atime; 1795 vap->va_ctime = vap->va_atime; 1796 1797 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1798 (void *)physpath, cred, SDEV_READY); 1799 kmem_free(physpath, MAXPATHLEN); 1800 if (rv) 1801 return (rv); 1802 } else if (flags & SDEV_VATTR) { 1803 /* 1804 * /dev/pts 1805 * 1806 * callback is responsible to set the basic attributes, 1807 * e.g. va_type/va_uid/va_gid/ 1808 * dev_t if VCHR or VBLK/ 1809 */ 1810 ASSERT(callback); 1811 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1812 if (rv) { 1813 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1814 "callback failed \n")); 1815 return (-1); 1816 } 1817 1818 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1819 cred, SDEV_READY); 1820 1821 if (rv) 1822 return (rv); 1823 1824 } else { 1825 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1826 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1827 __LINE__)); 1828 rv = -1; 1829 } 1830 1831 *dvp = dv; 1832 return (rv); 1833 } 1834 1835 static int 1836 is_devfsadm_thread(char *exec_name) 1837 { 1838 /* 1839 * note: because devfsadmd -> /usr/sbin/devfsadm 1840 * it is safe to use "devfsadm" to capture the lookups 1841 * from devfsadm and its daemon version. 1842 */ 1843 if (strcmp(exec_name, "devfsadm") == 0) 1844 return (1); 1845 return (0); 1846 } 1847 1848 /* 1849 * Lookup Order: 1850 * sdev_node cache; 1851 * backing store (SDEV_PERSIST); 1852 * DBNR: a. dir_ops implemented in the loadable modules; 1853 * b. vnode ops in vtab. 1854 */ 1855 int 1856 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1857 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1858 struct cred *, void *, char *), int flags) 1859 { 1860 int rv = 0, nmlen; 1861 struct vnode *rvp = NULL; 1862 struct sdev_node *dv = NULL; 1863 int retried = 0; 1864 int error = 0; 1865 struct vattr vattr; 1866 char *lookup_thread = curproc->p_user.u_comm; 1867 int failed_flags = 0; 1868 int (*vtor)(struct sdev_node *) = NULL; 1869 int state; 1870 int parent_state; 1871 char *link = NULL; 1872 1873 if (SDEVTOV(ddv)->v_type != VDIR) 1874 return (ENOTDIR); 1875 1876 /* 1877 * Empty name or ., return node itself. 1878 */ 1879 nmlen = strlen(nm); 1880 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1881 *vpp = SDEVTOV(ddv); 1882 VN_HOLD(*vpp); 1883 return (0); 1884 } 1885 1886 /* 1887 * .., return the parent directory 1888 */ 1889 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1890 *vpp = SDEVTOV(ddv->sdev_dotdot); 1891 VN_HOLD(*vpp); 1892 return (0); 1893 } 1894 1895 rw_enter(&ddv->sdev_contents, RW_READER); 1896 if (ddv->sdev_flags & SDEV_VTOR) { 1897 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1898 ASSERT(vtor); 1899 } 1900 1901 tryagain: 1902 /* 1903 * (a) directory cache lookup: 1904 */ 1905 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1906 parent_state = ddv->sdev_state; 1907 dv = sdev_cache_lookup(ddv, nm); 1908 if (dv) { 1909 state = dv->sdev_state; 1910 switch (state) { 1911 case SDEV_INIT: 1912 if (is_devfsadm_thread(lookup_thread)) 1913 break; 1914 1915 /* ZOMBIED parent won't allow node creation */ 1916 if (parent_state == SDEV_ZOMBIE) { 1917 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1918 retried); 1919 goto nolock_notfound; 1920 } 1921 1922 mutex_enter(&dv->sdev_lookup_lock); 1923 /* compensate the threads started after devfsadm */ 1924 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1925 !(SDEV_IS_LOOKUP(dv))) 1926 SDEV_BLOCK_OTHERS(dv, 1927 (SDEV_LOOKUP | SDEV_LGWAITING)); 1928 1929 if (SDEV_IS_LOOKUP(dv)) { 1930 failed_flags |= SLF_REBUILT; 1931 rw_exit(&ddv->sdev_contents); 1932 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 1933 mutex_exit(&dv->sdev_lookup_lock); 1934 rw_enter(&ddv->sdev_contents, RW_READER); 1935 1936 if (error != 0) { 1937 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1938 retried); 1939 goto nolock_notfound; 1940 } 1941 1942 state = dv->sdev_state; 1943 if (state == SDEV_INIT) { 1944 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1945 retried); 1946 goto nolock_notfound; 1947 } else if (state == SDEV_READY) { 1948 goto found; 1949 } else if (state == SDEV_ZOMBIE) { 1950 rw_exit(&ddv->sdev_contents); 1951 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1952 retried); 1953 SDEV_RELE(dv); 1954 goto lookup_failed; 1955 } 1956 } else { 1957 mutex_exit(&dv->sdev_lookup_lock); 1958 } 1959 break; 1960 case SDEV_READY: 1961 goto found; 1962 case SDEV_ZOMBIE: 1963 rw_exit(&ddv->sdev_contents); 1964 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1965 SDEV_RELE(dv); 1966 goto lookup_failed; 1967 default: 1968 rw_exit(&ddv->sdev_contents); 1969 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1970 sdev_lookup_failed(ddv, nm, failed_flags); 1971 *vpp = NULLVP; 1972 return (ENOENT); 1973 } 1974 } 1975 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1976 1977 /* 1978 * ZOMBIED parent does not allow new node creation. 1979 * bail out early 1980 */ 1981 if (parent_state == SDEV_ZOMBIE) { 1982 rw_exit(&ddv->sdev_contents); 1983 *vpp = NULLVP; 1984 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 1985 return (ENOENT); 1986 } 1987 1988 /* 1989 * (b0): backing store lookup 1990 * SDEV_PERSIST is default except: 1991 * 1) pts nodes 1992 * 2) non-chmod'ed local nodes 1993 * 3) zvol nodes 1994 */ 1995 if (SDEV_IS_PERSIST(ddv)) { 1996 error = devname_backstore_lookup(ddv, nm, &rvp); 1997 1998 if (!error) { 1999 2000 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID; 2001 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 2002 if (error) { 2003 rw_exit(&ddv->sdev_contents); 2004 if (dv) 2005 SDEV_RELE(dv); 2006 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2007 sdev_lookup_failed(ddv, nm, failed_flags); 2008 *vpp = NULLVP; 2009 return (ENOENT); 2010 } 2011 2012 if (vattr.va_type == VLNK) { 2013 error = sdev_getlink(rvp, &link); 2014 if (error) { 2015 rw_exit(&ddv->sdev_contents); 2016 if (dv) 2017 SDEV_RELE(dv); 2018 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2019 retried); 2020 sdev_lookup_failed(ddv, nm, 2021 failed_flags); 2022 *vpp = NULLVP; 2023 return (ENOENT); 2024 } 2025 ASSERT(link != NULL); 2026 } 2027 2028 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2029 rw_exit(&ddv->sdev_contents); 2030 rw_enter(&ddv->sdev_contents, RW_WRITER); 2031 } 2032 error = sdev_mknode(ddv, nm, &dv, &vattr, 2033 rvp, link, cred, SDEV_READY); 2034 rw_downgrade(&ddv->sdev_contents); 2035 2036 if (link != NULL) { 2037 kmem_free(link, strlen(link) + 1); 2038 link = NULL; 2039 } 2040 2041 if (error) { 2042 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2043 rw_exit(&ddv->sdev_contents); 2044 if (dv) 2045 SDEV_RELE(dv); 2046 goto lookup_failed; 2047 } else { 2048 goto found; 2049 } 2050 } else if (retried) { 2051 rw_exit(&ddv->sdev_contents); 2052 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2053 ddv->sdev_name, nm)); 2054 if (dv) 2055 SDEV_RELE(dv); 2056 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2057 sdev_lookup_failed(ddv, nm, failed_flags); 2058 *vpp = NULLVP; 2059 return (ENOENT); 2060 } 2061 } 2062 2063 lookup_create_node: 2064 /* first thread that is doing the lookup on this node */ 2065 if (callback) { 2066 ASSERT(dv == NULL); 2067 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2068 rw_exit(&ddv->sdev_contents); 2069 rw_enter(&ddv->sdev_contents, RW_WRITER); 2070 } 2071 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2072 flags, cred); 2073 rw_downgrade(&ddv->sdev_contents); 2074 if (error == 0) { 2075 goto found; 2076 } else { 2077 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2078 rw_exit(&ddv->sdev_contents); 2079 goto lookup_failed; 2080 } 2081 } 2082 if (!dv) { 2083 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2084 rw_exit(&ddv->sdev_contents); 2085 rw_enter(&ddv->sdev_contents, RW_WRITER); 2086 } 2087 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2088 cred, SDEV_INIT); 2089 if (!dv) { 2090 rw_exit(&ddv->sdev_contents); 2091 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2092 sdev_lookup_failed(ddv, nm, failed_flags); 2093 *vpp = NULLVP; 2094 return (ENOENT); 2095 } 2096 rw_downgrade(&ddv->sdev_contents); 2097 } 2098 2099 /* 2100 * (b1) invoking devfsadm once per life time for devfsadm nodes 2101 */ 2102 ASSERT(SDEV_HELD(dv)); 2103 2104 if (SDEV_IS_NO_NCACHE(dv)) 2105 failed_flags |= SLF_NO_NCACHE; 2106 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2107 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2108 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2109 ASSERT(SDEV_HELD(dv)); 2110 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2111 goto nolock_notfound; 2112 } 2113 2114 /* 2115 * filter out known non-existent devices recorded 2116 * during initial reconfiguration boot for which 2117 * reconfig should not be done and lookup may 2118 * be short-circuited now. 2119 */ 2120 if (sdev_lookup_filter(ddv, nm)) { 2121 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2122 goto nolock_notfound; 2123 } 2124 2125 /* bypassing devfsadm internal nodes */ 2126 if (is_devfsadm_thread(lookup_thread)) { 2127 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2128 goto nolock_notfound; 2129 } 2130 2131 if (sdev_reconfig_disable) { 2132 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2133 goto nolock_notfound; 2134 } 2135 2136 error = sdev_call_devfsadmd(ddv, dv, nm); 2137 if (error == 0) { 2138 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2139 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2140 if (sdev_reconfig_verbose) { 2141 cmn_err(CE_CONT, 2142 "?lookup of %s/%s by %s: reconfig\n", 2143 ddv->sdev_name, nm, curproc->p_user.u_comm); 2144 } 2145 retried = 1; 2146 failed_flags |= SLF_REBUILT; 2147 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2148 SDEV_SIMPLE_RELE(dv); 2149 goto tryagain; 2150 } else { 2151 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2152 goto nolock_notfound; 2153 } 2154 2155 found: 2156 ASSERT(dv->sdev_state == SDEV_READY); 2157 if (vtor) { 2158 /* 2159 * Check validity of returned node 2160 */ 2161 switch (vtor(dv)) { 2162 case SDEV_VTOR_VALID: 2163 break; 2164 case SDEV_VTOR_STALE: 2165 /* 2166 * The name exists, but the cache entry is 2167 * stale and needs to be re-created. 2168 */ 2169 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2170 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2171 rw_exit(&ddv->sdev_contents); 2172 rw_enter(&ddv->sdev_contents, RW_WRITER); 2173 } 2174 sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_DELETE); 2175 rw_downgrade(&ddv->sdev_contents); 2176 SDEV_RELE(dv); 2177 dv = NULL; 2178 goto lookup_create_node; 2179 /* FALLTHRU */ 2180 case SDEV_VTOR_INVALID: 2181 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2182 sdcmn_err7(("lookup: destroy invalid " 2183 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2184 goto nolock_notfound; 2185 case SDEV_VTOR_SKIP: 2186 sdcmn_err7(("lookup: node not applicable - " 2187 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2188 rw_exit(&ddv->sdev_contents); 2189 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2190 SDEV_RELE(dv); 2191 goto lookup_failed; 2192 default: 2193 cmn_err(CE_PANIC, 2194 "dev fs: validator failed: %s(%p)\n", 2195 dv->sdev_name, (void *)dv); 2196 break; 2197 } 2198 } 2199 2200 rw_exit(&ddv->sdev_contents); 2201 rv = sdev_to_vp(dv, vpp); 2202 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2203 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2204 dv->sdev_state, nm, rv)); 2205 return (rv); 2206 2207 nolock_notfound: 2208 /* 2209 * Destroy the node that is created for synchronization purposes. 2210 */ 2211 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2212 nm, dv->sdev_state)); 2213 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2214 if (dv->sdev_state == SDEV_INIT) { 2215 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2216 rw_exit(&ddv->sdev_contents); 2217 rw_enter(&ddv->sdev_contents, RW_WRITER); 2218 } 2219 2220 /* 2221 * Node state may have changed during the lock 2222 * changes. Re-check. 2223 */ 2224 if (dv->sdev_state == SDEV_INIT) { 2225 sdev_dirdelete(ddv, dv); 2226 rw_exit(&ddv->sdev_contents); 2227 sdev_lookup_failed(ddv, nm, failed_flags); 2228 SDEV_RELE(dv); 2229 *vpp = NULL; 2230 return (ENOENT); 2231 } 2232 } 2233 2234 rw_exit(&ddv->sdev_contents); 2235 SDEV_RELE(dv); 2236 2237 lookup_failed: 2238 sdev_lookup_failed(ddv, nm, failed_flags); 2239 *vpp = NULL; 2240 return (ENOENT); 2241 } 2242 2243 /* 2244 * Given a directory node, mark all nodes beneath as 2245 * STALE, i.e. nodes that don't exist as far as new 2246 * consumers are concerned. Remove them from the 2247 * list of directory entries so that no lookup or 2248 * directory traversal will find them. The node 2249 * not deallocated so existing holds are not affected. 2250 */ 2251 void 2252 sdev_stale(struct sdev_node *ddv) 2253 { 2254 struct sdev_node *dv; 2255 struct vnode *vp; 2256 2257 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2258 2259 rw_enter(&ddv->sdev_contents, RW_WRITER); 2260 while ((dv = SDEV_FIRST_ENTRY(ddv)) != NULL) { 2261 vp = SDEVTOV(dv); 2262 SDEV_HOLD(dv); 2263 if (vp->v_type == VDIR) 2264 sdev_stale(dv); 2265 2266 sdev_dirdelete(ddv, dv); 2267 SDEV_RELE(dv); 2268 } 2269 ddv->sdev_flags |= SDEV_BUILD; 2270 rw_exit(&ddv->sdev_contents); 2271 } 2272 2273 /* 2274 * Given a directory node, clean out all the nodes beneath. 2275 * If expr is specified, clean node with names matching expr. 2276 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2277 * so they are excluded from future lookups. 2278 */ 2279 int 2280 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2281 { 2282 int error = 0; 2283 int busy = 0; 2284 struct vnode *vp; 2285 struct sdev_node *dv, *next; 2286 int bkstore = 0; 2287 int len = 0; 2288 char *bks_name = NULL; 2289 2290 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2291 2292 /* 2293 * We try our best to destroy all unused sdev_node's 2294 */ 2295 rw_enter(&ddv->sdev_contents, RW_WRITER); 2296 for (dv = SDEV_FIRST_ENTRY(ddv); dv != NULL; dv = next) { 2297 next = SDEV_NEXT_ENTRY(ddv, dv); 2298 vp = SDEVTOV(dv); 2299 2300 if (expr && gmatch(dv->sdev_name, expr) == 0) 2301 continue; 2302 2303 if (vp->v_type == VDIR && 2304 sdev_cleandir(dv, NULL, flags) != 0) { 2305 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2306 dv->sdev_name)); 2307 busy++; 2308 continue; 2309 } 2310 2311 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2312 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2313 dv->sdev_name)); 2314 busy++; 2315 continue; 2316 } 2317 2318 /* 2319 * at this point, either dv is not held or SDEV_ENFORCE 2320 * is specified. In either case, dv needs to be deleted 2321 */ 2322 SDEV_HOLD(dv); 2323 2324 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2325 if (bkstore && (vp->v_type == VDIR)) 2326 bkstore += 1; 2327 2328 if (bkstore) { 2329 len = strlen(dv->sdev_name) + 1; 2330 bks_name = kmem_alloc(len, KM_SLEEP); 2331 bcopy(dv->sdev_name, bks_name, len); 2332 } 2333 2334 sdev_dirdelete(ddv, dv); 2335 2336 /* take care the backing store clean up */ 2337 if (bkstore) { 2338 ASSERT(bks_name); 2339 ASSERT(ddv->sdev_attrvp); 2340 2341 if (bkstore == 1) { 2342 error = VOP_REMOVE(ddv->sdev_attrvp, 2343 bks_name, kcred, NULL, 0); 2344 } else if (bkstore == 2) { 2345 error = VOP_RMDIR(ddv->sdev_attrvp, 2346 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2347 } 2348 2349 /* do not propagate the backing store errors */ 2350 if (error) { 2351 sdcmn_err9(("sdev_cleandir: backing store" 2352 "not cleaned\n")); 2353 error = 0; 2354 } 2355 2356 bkstore = 0; 2357 kmem_free(bks_name, len); 2358 bks_name = NULL; 2359 len = 0; 2360 } 2361 2362 ddv->sdev_flags |= SDEV_BUILD; 2363 SDEV_RELE(dv); 2364 } 2365 2366 ddv->sdev_flags |= SDEV_BUILD; 2367 rw_exit(&ddv->sdev_contents); 2368 2369 if (busy) { 2370 error = EBUSY; 2371 } 2372 2373 return (error); 2374 } 2375 2376 /* 2377 * a convenient wrapper for readdir() funcs 2378 */ 2379 size_t 2380 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2381 { 2382 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2383 if (reclen > size) 2384 return (0); 2385 2386 de->d_ino = (ino64_t)ino; 2387 de->d_off = (off64_t)off + 1; 2388 de->d_reclen = (ushort_t)reclen; 2389 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2390 return (reclen); 2391 } 2392 2393 /* 2394 * sdev_mount service routines 2395 */ 2396 int 2397 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2398 { 2399 int error; 2400 2401 if (uap->datalen != sizeof (*args)) 2402 return (EINVAL); 2403 2404 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2405 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2406 "get user data. error %d\n", error); 2407 return (EFAULT); 2408 } 2409 2410 return (0); 2411 } 2412 2413 #ifdef nextdp 2414 #undef nextdp 2415 #endif 2416 #define nextdp(dp) ((struct dirent64 *) \ 2417 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2418 2419 /* 2420 * readdir helper func 2421 */ 2422 int 2423 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2424 int flags) 2425 { 2426 struct sdev_node *ddv = VTOSDEV(vp); 2427 struct sdev_node *dv; 2428 dirent64_t *dp; 2429 ulong_t outcount = 0; 2430 size_t namelen; 2431 ulong_t alloc_count; 2432 void *outbuf; 2433 struct iovec *iovp; 2434 int error = 0; 2435 size_t reclen; 2436 offset_t diroff; 2437 offset_t soff; 2438 int this_reclen; 2439 int (*vtor)(struct sdev_node *) = NULL; 2440 struct vattr attr; 2441 timestruc_t now; 2442 2443 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2444 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2445 2446 if (uiop->uio_loffset >= MAXOFF_T) { 2447 if (eofp) 2448 *eofp = 1; 2449 return (0); 2450 } 2451 2452 if (uiop->uio_iovcnt != 1) 2453 return (EINVAL); 2454 2455 if (vp->v_type != VDIR) 2456 return (ENOTDIR); 2457 2458 if (ddv->sdev_flags & SDEV_VTOR) { 2459 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2460 ASSERT(vtor); 2461 } 2462 2463 if (eofp != NULL) 2464 *eofp = 0; 2465 2466 soff = uiop->uio_loffset; 2467 iovp = uiop->uio_iov; 2468 alloc_count = iovp->iov_len; 2469 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2470 outcount = 0; 2471 2472 if (ddv->sdev_state == SDEV_ZOMBIE) 2473 goto get_cache; 2474 2475 if (SDEV_IS_GLOBAL(ddv)) { 2476 2477 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2478 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2479 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2480 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2481 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2482 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2483 !sdev_reconfig_disable) { 2484 /* 2485 * invoking "devfsadm" to do system device reconfig 2486 */ 2487 mutex_enter(&ddv->sdev_lookup_lock); 2488 SDEV_BLOCK_OTHERS(ddv, 2489 (SDEV_READDIR|SDEV_LGWAITING)); 2490 mutex_exit(&ddv->sdev_lookup_lock); 2491 2492 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2493 ddv->sdev_path, curproc->p_user.u_comm)); 2494 if (sdev_reconfig_verbose) { 2495 cmn_err(CE_CONT, 2496 "?readdir of %s by %s: reconfig\n", 2497 ddv->sdev_path, curproc->p_user.u_comm); 2498 } 2499 2500 sdev_devfsadmd_thread(ddv, NULL, kcred); 2501 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2502 /* 2503 * compensate the "ls" started later than "devfsadm" 2504 */ 2505 mutex_enter(&ddv->sdev_lookup_lock); 2506 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2507 mutex_exit(&ddv->sdev_lookup_lock); 2508 } 2509 2510 /* 2511 * release the contents lock so that 2512 * the cache may be updated by devfsadmd 2513 */ 2514 rw_exit(&ddv->sdev_contents); 2515 mutex_enter(&ddv->sdev_lookup_lock); 2516 if (SDEV_IS_READDIR(ddv)) 2517 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2518 mutex_exit(&ddv->sdev_lookup_lock); 2519 rw_enter(&ddv->sdev_contents, RW_READER); 2520 2521 sdcmn_err4(("readdir of directory %s by %s\n", 2522 ddv->sdev_name, curproc->p_user.u_comm)); 2523 if (ddv->sdev_flags & SDEV_BUILD) { 2524 if (SDEV_IS_PERSIST(ddv)) { 2525 error = sdev_filldir_from_store(ddv, 2526 alloc_count, cred); 2527 } 2528 ddv->sdev_flags &= ~SDEV_BUILD; 2529 } 2530 } 2531 2532 get_cache: 2533 /* handle "." and ".." */ 2534 diroff = 0; 2535 if (soff == 0) { 2536 /* first time */ 2537 this_reclen = DIRENT64_RECLEN(1); 2538 if (alloc_count < this_reclen) { 2539 error = EINVAL; 2540 goto done; 2541 } 2542 2543 dp->d_ino = (ino64_t)ddv->sdev_ino; 2544 dp->d_off = (off64_t)1; 2545 dp->d_reclen = (ushort_t)this_reclen; 2546 2547 (void) strncpy(dp->d_name, ".", 2548 DIRENT64_NAMELEN(this_reclen)); 2549 outcount += dp->d_reclen; 2550 dp = nextdp(dp); 2551 } 2552 2553 diroff++; 2554 if (soff <= 1) { 2555 this_reclen = DIRENT64_RECLEN(2); 2556 if (alloc_count < outcount + this_reclen) { 2557 error = EINVAL; 2558 goto done; 2559 } 2560 2561 dp->d_reclen = (ushort_t)this_reclen; 2562 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2563 dp->d_off = (off64_t)2; 2564 2565 (void) strncpy(dp->d_name, "..", 2566 DIRENT64_NAMELEN(this_reclen)); 2567 outcount += dp->d_reclen; 2568 2569 dp = nextdp(dp); 2570 } 2571 2572 2573 /* gets the cache */ 2574 diroff++; 2575 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2576 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2577 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2578 diroff, soff, dv->sdev_name)); 2579 2580 /* bypassing pre-matured nodes */ 2581 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2582 sdcmn_err3(("sdev_readdir: pre-mature node " 2583 "%s %d\n", dv->sdev_name, dv->sdev_state)); 2584 continue; 2585 } 2586 2587 /* 2588 * Check validity of node 2589 * Drop invalid and nodes to be skipped. 2590 * A node the validator indicates as stale needs 2591 * to be returned as presumably the node name itself 2592 * is valid and the node data itself will be refreshed 2593 * on lookup. An application performing a readdir then 2594 * stat on each entry should thus always see consistent 2595 * data. In any case, it is not possible to synchronize 2596 * with dynamic kernel state, and any view we return can 2597 * never be anything more than a snapshot at a point in time. 2598 */ 2599 if (vtor) { 2600 switch (vtor(dv)) { 2601 case SDEV_VTOR_VALID: 2602 break; 2603 case SDEV_VTOR_INVALID: 2604 case SDEV_VTOR_SKIP: 2605 continue; 2606 case SDEV_VTOR_STALE: 2607 sdcmn_err3(("sdev_readir: %s stale\n", 2608 dv->sdev_name)); 2609 break; 2610 default: 2611 cmn_err(CE_PANIC, 2612 "dev fs: validator failed: %s(%p)\n", 2613 dv->sdev_name, (void *)dv); 2614 break; 2615 /*NOTREACHED*/ 2616 } 2617 } 2618 2619 namelen = strlen(dv->sdev_name); 2620 reclen = DIRENT64_RECLEN(namelen); 2621 if (outcount + reclen > alloc_count) { 2622 goto full; 2623 } 2624 dp->d_reclen = (ushort_t)reclen; 2625 dp->d_ino = (ino64_t)dv->sdev_ino; 2626 dp->d_off = (off64_t)diroff + 1; 2627 (void) strncpy(dp->d_name, dv->sdev_name, 2628 DIRENT64_NAMELEN(reclen)); 2629 outcount += reclen; 2630 dp = nextdp(dp); 2631 } 2632 2633 full: 2634 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2635 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2636 (void *)dv)); 2637 2638 if (outcount) 2639 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2640 2641 if (!error) { 2642 uiop->uio_loffset = diroff; 2643 if (eofp) 2644 *eofp = dv ? 0 : 1; 2645 } 2646 2647 2648 if (ddv->sdev_attrvp) { 2649 gethrestime(&now); 2650 attr.va_ctime = now; 2651 attr.va_atime = now; 2652 attr.va_mask = AT_CTIME|AT_ATIME; 2653 2654 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2655 } 2656 done: 2657 kmem_free(outbuf, alloc_count); 2658 return (error); 2659 } 2660 2661 static int 2662 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2663 { 2664 vnode_t *vp; 2665 vnode_t *cvp; 2666 struct sdev_node *svp; 2667 char *nm; 2668 struct pathname pn; 2669 int error; 2670 int persisted = 0; 2671 2672 ASSERT(INGLOBALZONE(curproc)); 2673 2674 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2675 return (error); 2676 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2677 2678 vp = rootdir; 2679 VN_HOLD(vp); 2680 2681 while (pn_pathleft(&pn)) { 2682 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2683 (void) pn_getcomponent(&pn, nm); 2684 2685 /* 2686 * Deal with the .. special case where we may be 2687 * traversing up across a mount point, to the 2688 * root of this filesystem or global root. 2689 */ 2690 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2691 checkforroot: 2692 if (VN_CMP(vp, rootdir)) { 2693 nm[1] = 0; 2694 } else if (vp->v_flag & VROOT) { 2695 vfs_t *vfsp; 2696 cvp = vp; 2697 vfsp = cvp->v_vfsp; 2698 vfs_rlock_wait(vfsp); 2699 vp = cvp->v_vfsp->vfs_vnodecovered; 2700 if (vp == NULL || 2701 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2702 vfs_unlock(vfsp); 2703 VN_RELE(cvp); 2704 error = EIO; 2705 break; 2706 } 2707 VN_HOLD(vp); 2708 vfs_unlock(vfsp); 2709 VN_RELE(cvp); 2710 cvp = NULL; 2711 goto checkforroot; 2712 } 2713 } 2714 2715 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2716 NULL, NULL); 2717 if (error) { 2718 VN_RELE(vp); 2719 break; 2720 } 2721 2722 /* traverse mount points encountered on our journey */ 2723 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2724 VN_RELE(vp); 2725 VN_RELE(cvp); 2726 break; 2727 } 2728 2729 /* 2730 * symbolic link, can be either relative and absolute 2731 */ 2732 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2733 struct pathname linkpath; 2734 pn_alloc(&linkpath); 2735 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2736 pn_free(&linkpath); 2737 break; 2738 } 2739 if (pn_pathleft(&linkpath) == 0) 2740 (void) pn_set(&linkpath, "."); 2741 error = pn_insert(&pn, &linkpath, strlen(nm)); 2742 pn_free(&linkpath); 2743 if (pn.pn_pathlen == 0) { 2744 VN_RELE(vp); 2745 return (ENOENT); 2746 } 2747 if (pn.pn_path[0] == '/') { 2748 pn_skipslash(&pn); 2749 VN_RELE(vp); 2750 VN_RELE(cvp); 2751 vp = rootdir; 2752 VN_HOLD(vp); 2753 } else { 2754 VN_RELE(cvp); 2755 } 2756 continue; 2757 } 2758 2759 VN_RELE(vp); 2760 2761 /* 2762 * Direct the operation to the persisting filesystem 2763 * underlying /dev. Bail if we encounter a 2764 * non-persistent dev entity here. 2765 */ 2766 if (cvp->v_vfsp->vfs_fstype == devtype) { 2767 2768 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2769 error = ENOENT; 2770 VN_RELE(cvp); 2771 break; 2772 } 2773 2774 if (VTOSDEV(cvp) == NULL) { 2775 error = ENOENT; 2776 VN_RELE(cvp); 2777 break; 2778 } 2779 svp = VTOSDEV(cvp); 2780 if ((vp = svp->sdev_attrvp) == NULL) { 2781 error = ENOENT; 2782 VN_RELE(cvp); 2783 break; 2784 } 2785 persisted = 1; 2786 VN_HOLD(vp); 2787 VN_RELE(cvp); 2788 cvp = vp; 2789 } 2790 2791 vp = cvp; 2792 pn_skipslash(&pn); 2793 } 2794 2795 kmem_free(nm, MAXNAMELEN); 2796 pn_free(&pn); 2797 2798 if (error) 2799 return (error); 2800 2801 /* 2802 * Only return persisted nodes in the filesystem underlying /dev. 2803 */ 2804 if (!persisted) { 2805 VN_RELE(vp); 2806 return (ENOENT); 2807 } 2808 2809 *r_vp = vp; 2810 return (0); 2811 } 2812 2813 int 2814 sdev_modctl_readdir(const char *dir, char ***dirlistp, int *npathsp, 2815 int *npathsp_alloc, int checking_empty) 2816 { 2817 char **pathlist = NULL; 2818 char **newlist = NULL; 2819 int npaths = 0; 2820 int npaths_alloc = 0; 2821 dirent64_t *dbuf = NULL; 2822 int n; 2823 char *s; 2824 int error; 2825 vnode_t *vp; 2826 int eof; 2827 struct iovec iov; 2828 struct uio uio; 2829 struct dirent64 *dp; 2830 size_t dlen; 2831 size_t dbuflen; 2832 int ndirents = 64; 2833 char *nm; 2834 2835 error = sdev_modctl_lookup(dir, &vp); 2836 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2837 dir, curproc->p_user.u_comm, 2838 (error == 0) ? "ok" : "failed")); 2839 if (error) 2840 return (error); 2841 2842 dlen = ndirents * (sizeof (*dbuf)); 2843 dbuf = kmem_alloc(dlen, KM_SLEEP); 2844 2845 uio.uio_iov = &iov; 2846 uio.uio_iovcnt = 1; 2847 uio.uio_segflg = UIO_SYSSPACE; 2848 uio.uio_fmode = 0; 2849 uio.uio_extflg = UIO_COPY_CACHED; 2850 uio.uio_loffset = 0; 2851 uio.uio_llimit = MAXOFFSET_T; 2852 2853 eof = 0; 2854 error = 0; 2855 while (!error && !eof) { 2856 uio.uio_resid = dlen; 2857 iov.iov_base = (char *)dbuf; 2858 iov.iov_len = dlen; 2859 2860 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2861 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2862 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2863 2864 dbuflen = dlen - uio.uio_resid; 2865 2866 if (error || dbuflen == 0) 2867 break; 2868 2869 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2870 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2871 2872 nm = dp->d_name; 2873 2874 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2875 continue; 2876 if (npaths == npaths_alloc) { 2877 npaths_alloc += 64; 2878 newlist = (char **) 2879 kmem_zalloc((npaths_alloc + 1) * 2880 sizeof (char *), KM_SLEEP); 2881 if (pathlist) { 2882 bcopy(pathlist, newlist, 2883 npaths * sizeof (char *)); 2884 kmem_free(pathlist, 2885 (npaths + 1) * sizeof (char *)); 2886 } 2887 pathlist = newlist; 2888 } 2889 n = strlen(nm) + 1; 2890 s = kmem_alloc(n, KM_SLEEP); 2891 bcopy(nm, s, n); 2892 pathlist[npaths++] = s; 2893 sdcmn_err11((" %s/%s\n", dir, s)); 2894 2895 /* if checking empty, one entry is as good as many */ 2896 if (checking_empty) { 2897 eof = 1; 2898 break; 2899 } 2900 } 2901 } 2902 2903 exit: 2904 VN_RELE(vp); 2905 2906 if (dbuf) 2907 kmem_free(dbuf, dlen); 2908 2909 if (error) 2910 return (error); 2911 2912 *dirlistp = pathlist; 2913 *npathsp = npaths; 2914 *npathsp_alloc = npaths_alloc; 2915 2916 return (0); 2917 } 2918 2919 void 2920 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2921 { 2922 int i, n; 2923 2924 for (i = 0; i < npaths; i++) { 2925 n = strlen(pathlist[i]) + 1; 2926 kmem_free(pathlist[i], n); 2927 } 2928 2929 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 2930 } 2931 2932 int 2933 sdev_modctl_devexists(const char *path) 2934 { 2935 vnode_t *vp; 2936 int error; 2937 2938 error = sdev_modctl_lookup(path, &vp); 2939 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 2940 path, curproc->p_user.u_comm, 2941 (error == 0) ? "ok" : "failed")); 2942 if (error == 0) 2943 VN_RELE(vp); 2944 2945 return (error); 2946 } 2947 2948 extern int sdev_vnodeops_tbl_size; 2949 2950 /* 2951 * construct a new template with overrides from vtab 2952 */ 2953 static fs_operation_def_t * 2954 sdev_merge_vtab(const fs_operation_def_t tab[]) 2955 { 2956 fs_operation_def_t *new; 2957 const fs_operation_def_t *tab_entry; 2958 2959 /* make a copy of standard vnode ops table */ 2960 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 2961 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 2962 2963 /* replace the overrides from tab */ 2964 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 2965 fs_operation_def_t *std_entry = new; 2966 while (std_entry->name) { 2967 if (strcmp(tab_entry->name, std_entry->name) == 0) { 2968 std_entry->func = tab_entry->func; 2969 break; 2970 } 2971 std_entry++; 2972 } 2973 if (std_entry->name == NULL) 2974 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 2975 tab_entry->name); 2976 } 2977 2978 return (new); 2979 } 2980 2981 /* free memory allocated by sdev_merge_vtab */ 2982 static void 2983 sdev_free_vtab(fs_operation_def_t *new) 2984 { 2985 kmem_free(new, sdev_vnodeops_tbl_size); 2986 } 2987 2988 /* 2989 * a generic setattr() function 2990 * 2991 * note: flags only supports AT_UID and AT_GID. 2992 * Future enhancements can be done for other types, e.g. AT_MODE 2993 */ 2994 int 2995 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 2996 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 2997 int), int protocol) 2998 { 2999 struct sdev_node *dv = VTOSDEV(vp); 3000 struct sdev_node *parent = dv->sdev_dotdot; 3001 struct vattr *get; 3002 uint_t mask = vap->va_mask; 3003 int error; 3004 3005 /* some sanity checks */ 3006 if (vap->va_mask & AT_NOSET) 3007 return (EINVAL); 3008 3009 if (vap->va_mask & AT_SIZE) { 3010 if (vp->v_type == VDIR) { 3011 return (EISDIR); 3012 } 3013 } 3014 3015 /* no need to set attribute, but do not fail either */ 3016 ASSERT(parent); 3017 rw_enter(&parent->sdev_contents, RW_READER); 3018 if (dv->sdev_state == SDEV_ZOMBIE) { 3019 rw_exit(&parent->sdev_contents); 3020 return (0); 3021 } 3022 3023 /* If backing store exists, just set it. */ 3024 if (dv->sdev_attrvp) { 3025 rw_exit(&parent->sdev_contents); 3026 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3027 } 3028 3029 /* 3030 * Otherwise, for nodes with the persistence attribute, create it. 3031 */ 3032 ASSERT(dv->sdev_attr); 3033 if (SDEV_IS_PERSIST(dv) || 3034 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3035 sdev_vattr_merge(dv, vap); 3036 rw_enter(&dv->sdev_contents, RW_WRITER); 3037 error = sdev_shadow_node(dv, cred); 3038 rw_exit(&dv->sdev_contents); 3039 rw_exit(&parent->sdev_contents); 3040 3041 if (error) 3042 return (error); 3043 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3044 } 3045 3046 3047 /* 3048 * sdev_attr was allocated in sdev_mknode 3049 */ 3050 rw_enter(&dv->sdev_contents, RW_WRITER); 3051 error = secpolicy_vnode_setattr(cred, vp, vap, 3052 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3053 if (error) { 3054 rw_exit(&dv->sdev_contents); 3055 rw_exit(&parent->sdev_contents); 3056 return (error); 3057 } 3058 3059 get = dv->sdev_attr; 3060 if (mask & AT_MODE) { 3061 get->va_mode &= S_IFMT; 3062 get->va_mode |= vap->va_mode & ~S_IFMT; 3063 } 3064 3065 if ((mask & AT_UID) || (mask & AT_GID)) { 3066 if (mask & AT_UID) 3067 get->va_uid = vap->va_uid; 3068 if (mask & AT_GID) 3069 get->va_gid = vap->va_gid; 3070 /* 3071 * a callback must be provided if the protocol is set 3072 */ 3073 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3074 ASSERT(callback); 3075 error = callback(dv, get, protocol); 3076 if (error) { 3077 rw_exit(&dv->sdev_contents); 3078 rw_exit(&parent->sdev_contents); 3079 return (error); 3080 } 3081 } 3082 } 3083 3084 if (mask & AT_ATIME) 3085 get->va_atime = vap->va_atime; 3086 if (mask & AT_MTIME) 3087 get->va_mtime = vap->va_mtime; 3088 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3089 gethrestime(&get->va_ctime); 3090 } 3091 3092 sdev_vattr_merge(dv, get); 3093 rw_exit(&dv->sdev_contents); 3094 rw_exit(&parent->sdev_contents); 3095 return (0); 3096 } 3097 3098 /* 3099 * a generic inactive() function 3100 */ 3101 /*ARGSUSED*/ 3102 void 3103 devname_inactive_func(struct vnode *vp, struct cred *cred, 3104 void (*callback)(struct vnode *)) 3105 { 3106 int clean; 3107 struct sdev_node *dv = VTOSDEV(vp); 3108 int state; 3109 3110 mutex_enter(&vp->v_lock); 3111 ASSERT(vp->v_count >= 1); 3112 3113 3114 if (vp->v_count == 1 && callback != NULL) 3115 callback(vp); 3116 3117 rw_enter(&dv->sdev_contents, RW_WRITER); 3118 state = dv->sdev_state; 3119 3120 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3121 3122 /* 3123 * sdev is a rather bad public citizen. It violates the general 3124 * agreement that in memory nodes should always have a valid reference 3125 * count on their vnode. But that's not the case here. This means that 3126 * we do actually have to distinguish between getting inactive callbacks 3127 * for zombies and otherwise. This should probably be fixed. 3128 */ 3129 if (clean) { 3130 /* Remove the . entry to ourselves */ 3131 if (vp->v_type == VDIR) { 3132 decr_link(dv); 3133 } 3134 VERIFY(dv->sdev_nlink == 1); 3135 decr_link(dv); 3136 VN_RELE_LOCKED(vp); 3137 rw_exit(&dv->sdev_contents); 3138 mutex_exit(&vp->v_lock); 3139 sdev_nodedestroy(dv, 0); 3140 } else { 3141 VN_RELE_LOCKED(vp); 3142 rw_exit(&dv->sdev_contents); 3143 mutex_exit(&vp->v_lock); 3144 } 3145 } 3146