1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* 27 * utility routines for the /dev fs 28 */ 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/t_lock.h> 33 #include <sys/systm.h> 34 #include <sys/sysmacros.h> 35 #include <sys/user.h> 36 #include <sys/time.h> 37 #include <sys/vfs.h> 38 #include <sys/vnode.h> 39 #include <sys/file.h> 40 #include <sys/fcntl.h> 41 #include <sys/flock.h> 42 #include <sys/kmem.h> 43 #include <sys/uio.h> 44 #include <sys/errno.h> 45 #include <sys/stat.h> 46 #include <sys/cred.h> 47 #include <sys/dirent.h> 48 #include <sys/pathname.h> 49 #include <sys/cmn_err.h> 50 #include <sys/debug.h> 51 #include <sys/mode.h> 52 #include <sys/policy.h> 53 #include <fs/fs_subr.h> 54 #include <sys/mount.h> 55 #include <sys/fs/snode.h> 56 #include <sys/fs/dv_node.h> 57 #include <sys/fs/sdev_impl.h> 58 #include <sys/sunndi.h> 59 #include <sys/sunmdi.h> 60 #include <sys/conf.h> 61 #include <sys/proc.h> 62 #include <sys/user.h> 63 #include <sys/modctl.h> 64 65 #ifdef DEBUG 66 int sdev_debug = 0x00000001; 67 int sdev_debug_cache_flags = 0; 68 #endif 69 70 /* 71 * globals 72 */ 73 /* prototype memory vattrs */ 74 vattr_t sdev_vattr_dir = { 75 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 76 VDIR, /* va_type */ 77 SDEV_DIRMODE_DEFAULT, /* va_mode */ 78 SDEV_UID_DEFAULT, /* va_uid */ 79 SDEV_GID_DEFAULT, /* va_gid */ 80 0, /* va_fsid */ 81 0, /* va_nodeid */ 82 0, /* va_nlink */ 83 0, /* va_size */ 84 0, /* va_atime */ 85 0, /* va_mtime */ 86 0, /* va_ctime */ 87 0, /* va_rdev */ 88 0, /* va_blksize */ 89 0, /* va_nblocks */ 90 0 /* va_vcode */ 91 }; 92 93 vattr_t sdev_vattr_lnk = { 94 AT_TYPE|AT_MODE, /* va_mask */ 95 VLNK, /* va_type */ 96 SDEV_LNKMODE_DEFAULT, /* va_mode */ 97 SDEV_UID_DEFAULT, /* va_uid */ 98 SDEV_GID_DEFAULT, /* va_gid */ 99 0, /* va_fsid */ 100 0, /* va_nodeid */ 101 0, /* va_nlink */ 102 0, /* va_size */ 103 0, /* va_atime */ 104 0, /* va_mtime */ 105 0, /* va_ctime */ 106 0, /* va_rdev */ 107 0, /* va_blksize */ 108 0, /* va_nblocks */ 109 0 /* va_vcode */ 110 }; 111 112 vattr_t sdev_vattr_blk = { 113 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 114 VBLK, /* va_type */ 115 S_IFBLK | SDEV_DEVMODE_DEFAULT, /* va_mode */ 116 SDEV_UID_DEFAULT, /* va_uid */ 117 SDEV_GID_DEFAULT, /* va_gid */ 118 0, /* va_fsid */ 119 0, /* va_nodeid */ 120 0, /* va_nlink */ 121 0, /* va_size */ 122 0, /* va_atime */ 123 0, /* va_mtime */ 124 0, /* va_ctime */ 125 0, /* va_rdev */ 126 0, /* va_blksize */ 127 0, /* va_nblocks */ 128 0 /* va_vcode */ 129 }; 130 131 vattr_t sdev_vattr_chr = { 132 AT_TYPE|AT_MODE|AT_UID|AT_GID, /* va_mask */ 133 VCHR, /* va_type */ 134 S_IFCHR | SDEV_DEVMODE_DEFAULT, /* va_mode */ 135 SDEV_UID_DEFAULT, /* va_uid */ 136 SDEV_GID_DEFAULT, /* va_gid */ 137 0, /* va_fsid */ 138 0, /* va_nodeid */ 139 0, /* va_nlink */ 140 0, /* va_size */ 141 0, /* va_atime */ 142 0, /* va_mtime */ 143 0, /* va_ctime */ 144 0, /* va_rdev */ 145 0, /* va_blksize */ 146 0, /* va_nblocks */ 147 0 /* va_vcode */ 148 }; 149 150 kmem_cache_t *sdev_node_cache; /* sdev_node cache */ 151 int devtype; /* fstype */ 152 153 /* static */ 154 static struct vnodeops *sdev_get_vop(struct sdev_node *); 155 static void sdev_set_no_nocache(struct sdev_node *); 156 static fs_operation_def_t *sdev_merge_vtab(const fs_operation_def_t []); 157 static void sdev_free_vtab(fs_operation_def_t *); 158 159 static void 160 sdev_prof_free(struct sdev_node *dv) 161 { 162 ASSERT(!SDEV_IS_GLOBAL(dv)); 163 if (dv->sdev_prof.dev_name) 164 nvlist_free(dv->sdev_prof.dev_name); 165 if (dv->sdev_prof.dev_map) 166 nvlist_free(dv->sdev_prof.dev_map); 167 if (dv->sdev_prof.dev_symlink) 168 nvlist_free(dv->sdev_prof.dev_symlink); 169 if (dv->sdev_prof.dev_glob_incdir) 170 nvlist_free(dv->sdev_prof.dev_glob_incdir); 171 if (dv->sdev_prof.dev_glob_excdir) 172 nvlist_free(dv->sdev_prof.dev_glob_excdir); 173 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 174 } 175 176 /* sdev_node cache constructor */ 177 /*ARGSUSED1*/ 178 static int 179 i_sdev_node_ctor(void *buf, void *cfarg, int flag) 180 { 181 struct sdev_node *dv = (struct sdev_node *)buf; 182 struct vnode *vp; 183 184 bzero(buf, sizeof (struct sdev_node)); 185 vp = dv->sdev_vnode = vn_alloc(flag); 186 if (vp == NULL) { 187 return (-1); 188 } 189 vp->v_data = dv; 190 rw_init(&dv->sdev_contents, NULL, RW_DEFAULT, NULL); 191 return (0); 192 } 193 194 /* sdev_node cache destructor */ 195 /*ARGSUSED1*/ 196 static void 197 i_sdev_node_dtor(void *buf, void *arg) 198 { 199 struct sdev_node *dv = (struct sdev_node *)buf; 200 struct vnode *vp = SDEVTOV(dv); 201 202 rw_destroy(&dv->sdev_contents); 203 vn_free(vp); 204 } 205 206 /* initialize sdev_node cache */ 207 void 208 sdev_node_cache_init() 209 { 210 int flags = 0; 211 212 #ifdef DEBUG 213 flags = sdev_debug_cache_flags; 214 if (flags) 215 sdcmn_err(("cache debug flags 0x%x\n", flags)); 216 #endif /* DEBUG */ 217 218 ASSERT(sdev_node_cache == NULL); 219 sdev_node_cache = kmem_cache_create("sdev_node_cache", 220 sizeof (struct sdev_node), 0, i_sdev_node_ctor, i_sdev_node_dtor, 221 NULL, NULL, NULL, flags); 222 } 223 224 /* destroy sdev_node cache */ 225 void 226 sdev_node_cache_fini() 227 { 228 ASSERT(sdev_node_cache != NULL); 229 kmem_cache_destroy(sdev_node_cache); 230 sdev_node_cache = NULL; 231 } 232 233 /* 234 * Compare two nodes lexographically to balance avl tree 235 */ 236 static int 237 sdev_compare_nodes(const struct sdev_node *dv1, const struct sdev_node *dv2) 238 { 239 int rv; 240 if ((rv = strcmp(dv1->sdev_name, dv2->sdev_name)) == 0) 241 return (0); 242 return ((rv < 0) ? -1 : 1); 243 } 244 245 void 246 sdev_set_nodestate(struct sdev_node *dv, sdev_node_state_t state) 247 { 248 ASSERT(dv); 249 ASSERT(RW_WRITE_HELD(&dv->sdev_contents)); 250 dv->sdev_state = state; 251 } 252 253 static void 254 sdev_attrinit(struct sdev_node *dv, vattr_t *vap) 255 { 256 timestruc_t now; 257 258 ASSERT(vap); 259 260 dv->sdev_attr = kmem_zalloc(sizeof (struct vattr), KM_SLEEP); 261 *dv->sdev_attr = *vap; 262 263 dv->sdev_attr->va_mode = MAKEIMODE(vap->va_type, vap->va_mode); 264 265 gethrestime(&now); 266 dv->sdev_attr->va_atime = now; 267 dv->sdev_attr->va_mtime = now; 268 dv->sdev_attr->va_ctime = now; 269 } 270 271 /* alloc and initialize a sdev_node */ 272 int 273 sdev_nodeinit(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 274 vattr_t *vap) 275 { 276 struct sdev_node *dv = NULL; 277 struct vnode *vp; 278 size_t nmlen, len; 279 devname_handle_t *dhl; 280 281 nmlen = strlen(nm) + 1; 282 if (nmlen > MAXNAMELEN) { 283 sdcmn_err9(("sdev_nodeinit: node name %s" 284 " too long\n", nm)); 285 *newdv = NULL; 286 return (ENAMETOOLONG); 287 } 288 289 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 290 291 dv->sdev_name = kmem_alloc(nmlen, KM_SLEEP); 292 bcopy(nm, dv->sdev_name, nmlen); 293 dv->sdev_namelen = nmlen - 1; /* '\0' not included */ 294 len = strlen(ddv->sdev_path) + strlen(nm) + 2; 295 dv->sdev_path = kmem_alloc(len, KM_SLEEP); 296 (void) snprintf(dv->sdev_path, len, "%s/%s", ddv->sdev_path, nm); 297 /* overwritten for VLNK nodes */ 298 dv->sdev_symlink = NULL; 299 300 vp = SDEVTOV(dv); 301 vn_reinit(vp); 302 vp->v_vfsp = SDEVTOV(ddv)->v_vfsp; 303 if (vap) 304 vp->v_type = vap->va_type; 305 306 /* 307 * initialized to the parent's vnodeops. 308 * maybe overwriten for a VDIR 309 */ 310 vn_setops(vp, vn_getops(SDEVTOV(ddv))); 311 vn_exists(vp); 312 313 dv->sdev_dotdot = NULL; 314 dv->sdev_attrvp = NULL; 315 if (vap) { 316 sdev_attrinit(dv, vap); 317 } else { 318 dv->sdev_attr = NULL; 319 } 320 321 dv->sdev_ino = sdev_mkino(dv); 322 dv->sdev_nlink = 0; /* updated on insert */ 323 dv->sdev_flags = ddv->sdev_flags; /* inherit from the parent first */ 324 dv->sdev_flags |= SDEV_BUILD; 325 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 326 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 327 if (SDEV_IS_GLOBAL(ddv)) { 328 dv->sdev_flags |= SDEV_GLOBAL; 329 dhl = &(dv->sdev_handle); 330 dhl->dh_data = dv; 331 dhl->dh_args = NULL; 332 sdev_set_no_nocache(dv); 333 dv->sdev_gdir_gen = 0; 334 } else { 335 dv->sdev_flags &= ~SDEV_GLOBAL; 336 dv->sdev_origin = NULL; /* set later */ 337 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 338 dv->sdev_ldir_gen = 0; 339 dv->sdev_devtree_gen = 0; 340 } 341 342 rw_enter(&dv->sdev_contents, RW_WRITER); 343 sdev_set_nodestate(dv, SDEV_INIT); 344 rw_exit(&dv->sdev_contents); 345 *newdv = dv; 346 347 return (0); 348 } 349 350 /* 351 * transition a sdev_node into SDEV_READY state 352 */ 353 int 354 sdev_nodeready(struct sdev_node *dv, struct vattr *vap, struct vnode *avp, 355 void *args, struct cred *cred) 356 { 357 int error = 0; 358 struct vnode *vp = SDEVTOV(dv); 359 vtype_t type; 360 361 ASSERT(dv && (dv->sdev_state != SDEV_READY) && vap); 362 363 type = vap->va_type; 364 vp->v_type = type; 365 vp->v_rdev = vap->va_rdev; 366 rw_enter(&dv->sdev_contents, RW_WRITER); 367 if (type == VDIR) { 368 dv->sdev_nlink = 2; 369 dv->sdev_flags &= ~SDEV_PERSIST; 370 dv->sdev_flags &= ~SDEV_DYNAMIC; 371 vn_setops(vp, sdev_get_vop(dv)); /* from internal vtab */ 372 ASSERT(dv->sdev_dotdot); 373 ASSERT(SDEVTOV(dv->sdev_dotdot)->v_type == VDIR); 374 vp->v_rdev = SDEVTOV(dv->sdev_dotdot)->v_rdev; 375 avl_create(&dv->sdev_entries, 376 (int (*)(const void *, const void *))sdev_compare_nodes, 377 sizeof (struct sdev_node), 378 offsetof(struct sdev_node, sdev_avllink)); 379 } else if (type == VLNK) { 380 ASSERT(args); 381 dv->sdev_nlink = 1; 382 dv->sdev_symlink = i_ddi_strdup((char *)args, KM_SLEEP); 383 } else { 384 dv->sdev_nlink = 1; 385 } 386 387 if (!(SDEV_IS_GLOBAL(dv))) { 388 dv->sdev_origin = (struct sdev_node *)args; 389 dv->sdev_flags &= ~SDEV_PERSIST; 390 } 391 392 /* 393 * shadow node is created here OR 394 * if failed (indicated by dv->sdev_attrvp == NULL), 395 * created later in sdev_setattr 396 */ 397 if (avp) { 398 dv->sdev_attrvp = avp; 399 } else { 400 if (dv->sdev_attr == NULL) 401 sdev_attrinit(dv, vap); 402 else 403 *dv->sdev_attr = *vap; 404 405 if ((SDEV_IS_PERSIST(dv) && (dv->sdev_attrvp == NULL)) || 406 ((SDEVTOV(dv)->v_type == VDIR) && 407 (dv->sdev_attrvp == NULL))) { 408 error = sdev_shadow_node(dv, cred); 409 } 410 } 411 412 if (error == 0) { 413 /* transition to READY state */ 414 sdev_set_nodestate(dv, SDEV_READY); 415 sdev_nc_node_exists(dv); 416 } else { 417 sdev_set_nodestate(dv, SDEV_ZOMBIE); 418 } 419 rw_exit(&dv->sdev_contents); 420 return (error); 421 } 422 423 /* 424 * setting ZOMBIE state 425 */ 426 static int 427 sdev_nodezombied(struct sdev_node *dv) 428 { 429 rw_enter(&dv->sdev_contents, RW_WRITER); 430 sdev_set_nodestate(dv, SDEV_ZOMBIE); 431 rw_exit(&dv->sdev_contents); 432 return (0); 433 } 434 435 /* 436 * Build the VROOT sdev_node. 437 */ 438 /*ARGSUSED*/ 439 struct sdev_node * 440 sdev_mkroot(struct vfs *vfsp, dev_t devdev, struct vnode *mvp, 441 struct vnode *avp, struct cred *cred) 442 { 443 struct sdev_node *dv; 444 struct vnode *vp; 445 char devdir[] = "/dev"; 446 447 ASSERT(sdev_node_cache != NULL); 448 ASSERT(avp); 449 dv = kmem_cache_alloc(sdev_node_cache, KM_SLEEP); 450 vp = SDEVTOV(dv); 451 vn_reinit(vp); 452 vp->v_flag |= VROOT; 453 vp->v_vfsp = vfsp; 454 vp->v_type = VDIR; 455 vp->v_rdev = devdev; 456 vn_setops(vp, sdev_vnodeops); /* apply the default vnodeops at /dev */ 457 vn_exists(vp); 458 459 if (vfsp->vfs_mntpt) 460 dv->sdev_name = i_ddi_strdup( 461 (char *)refstr_value(vfsp->vfs_mntpt), KM_SLEEP); 462 else 463 /* vfs_mountdev1 set mount point later */ 464 dv->sdev_name = i_ddi_strdup("/dev", KM_SLEEP); 465 dv->sdev_namelen = strlen(dv->sdev_name); /* '\0' not included */ 466 dv->sdev_path = i_ddi_strdup(devdir, KM_SLEEP); 467 dv->sdev_ino = SDEV_ROOTINO; 468 dv->sdev_nlink = 2; /* name + . (no sdev_insert) */ 469 dv->sdev_dotdot = dv; /* .. == self */ 470 dv->sdev_attrvp = avp; 471 dv->sdev_attr = NULL; 472 mutex_init(&dv->sdev_lookup_lock, NULL, MUTEX_DEFAULT, NULL); 473 cv_init(&dv->sdev_lookup_cv, NULL, CV_DEFAULT, NULL); 474 if (strcmp(dv->sdev_name, "/dev") == 0) { 475 dv->sdev_flags = SDEV_BUILD|SDEV_GLOBAL|SDEV_PERSIST; 476 bzero(&dv->sdev_handle, sizeof (dv->sdev_handle)); 477 dv->sdev_gdir_gen = 0; 478 } else { 479 dv->sdev_flags = SDEV_BUILD; 480 dv->sdev_flags &= ~SDEV_PERSIST; 481 bzero(&dv->sdev_prof, sizeof (dv->sdev_prof)); 482 dv->sdev_ldir_gen = 0; 483 dv->sdev_devtree_gen = 0; 484 } 485 486 avl_create(&dv->sdev_entries, 487 (int (*)(const void *, const void *))sdev_compare_nodes, 488 sizeof (struct sdev_node), 489 offsetof(struct sdev_node, sdev_avllink)); 490 491 rw_enter(&dv->sdev_contents, RW_WRITER); 492 sdev_set_nodestate(dv, SDEV_READY); 493 rw_exit(&dv->sdev_contents); 494 sdev_nc_node_exists(dv); 495 return (dv); 496 } 497 498 /* directory dependent vop table */ 499 struct sdev_vop_table { 500 char *vt_name; /* subdirectory name */ 501 const fs_operation_def_t *vt_service; /* vnodeops table */ 502 struct vnodeops *vt_vops; /* constructed vop */ 503 struct vnodeops **vt_global_vops; /* global container for vop */ 504 int (*vt_vtor)(struct sdev_node *); /* validate sdev_node */ 505 int vt_flags; 506 }; 507 508 /* 509 * A nice improvement would be to provide a plug-in mechanism 510 * for this table instead of a const table. 511 */ 512 static struct sdev_vop_table vtab[] = 513 { 514 { "pts", devpts_vnodeops_tbl, NULL, &devpts_vnodeops, devpts_validate, 515 SDEV_DYNAMIC | SDEV_VTOR }, 516 517 { "vt", devvt_vnodeops_tbl, NULL, &devvt_vnodeops, devvt_validate, 518 SDEV_DYNAMIC | SDEV_VTOR }, 519 520 { "zcons", NULL, NULL, NULL, NULL, SDEV_NO_NCACHE }, 521 522 { "net", devnet_vnodeops_tbl, NULL, &devnet_vnodeops, devnet_validate, 523 SDEV_DYNAMIC | SDEV_VTOR }, 524 525 { "ipnet", devipnet_vnodeops_tbl, NULL, &devipnet_vnodeops, 526 devipnet_validate, SDEV_DYNAMIC | SDEV_VTOR | SDEV_NO_NCACHE }, 527 528 { NULL, NULL, NULL, NULL, NULL, 0} 529 }; 530 531 532 /* 533 * sets a directory's vnodeops if the directory is in the vtab; 534 */ 535 static struct vnodeops * 536 sdev_get_vop(struct sdev_node *dv) 537 { 538 int i; 539 char *path; 540 541 path = dv->sdev_path; 542 ASSERT(path); 543 544 /* gets the relative path to /dev/ */ 545 path += 5; 546 547 /* gets the vtab entry if matches */ 548 for (i = 0; vtab[i].vt_name; i++) { 549 if (strcmp(vtab[i].vt_name, path) != 0) 550 continue; 551 dv->sdev_flags |= vtab[i].vt_flags; 552 553 if (vtab[i].vt_vops) { 554 if (vtab[i].vt_global_vops) 555 *(vtab[i].vt_global_vops) = vtab[i].vt_vops; 556 return (vtab[i].vt_vops); 557 } 558 559 if (vtab[i].vt_service) { 560 fs_operation_def_t *templ; 561 templ = sdev_merge_vtab(vtab[i].vt_service); 562 if (vn_make_ops(vtab[i].vt_name, 563 (const fs_operation_def_t *)templ, 564 &vtab[i].vt_vops) != 0) { 565 cmn_err(CE_PANIC, "%s: malformed vnode ops\n", 566 vtab[i].vt_name); 567 /*NOTREACHED*/ 568 } 569 if (vtab[i].vt_global_vops) { 570 *(vtab[i].vt_global_vops) = vtab[i].vt_vops; 571 } 572 sdev_free_vtab(templ); 573 return (vtab[i].vt_vops); 574 } 575 return (sdev_vnodeops); 576 } 577 578 /* child inherits the persistence of the parent */ 579 if (SDEV_IS_PERSIST(dv->sdev_dotdot)) 580 dv->sdev_flags |= SDEV_PERSIST; 581 582 return (sdev_vnodeops); 583 } 584 585 static void 586 sdev_set_no_nocache(struct sdev_node *dv) 587 { 588 int i; 589 char *path; 590 591 ASSERT(dv->sdev_path); 592 path = dv->sdev_path + strlen("/dev/"); 593 594 for (i = 0; vtab[i].vt_name; i++) { 595 if (strcmp(vtab[i].vt_name, path) == 0) { 596 if (vtab[i].vt_flags & SDEV_NO_NCACHE) 597 dv->sdev_flags |= SDEV_NO_NCACHE; 598 break; 599 } 600 } 601 } 602 603 void * 604 sdev_get_vtor(struct sdev_node *dv) 605 { 606 int i; 607 608 for (i = 0; vtab[i].vt_name; i++) { 609 if (strcmp(vtab[i].vt_name, dv->sdev_name) != 0) 610 continue; 611 return ((void *)vtab[i].vt_vtor); 612 } 613 return (NULL); 614 } 615 616 /* 617 * Build the base root inode 618 */ 619 ino_t 620 sdev_mkino(struct sdev_node *dv) 621 { 622 ino_t ino; 623 624 /* 625 * for now, follow the lead of tmpfs here 626 * need to someday understand the requirements here 627 */ 628 ino = (ino_t)(uint32_t)((uintptr_t)dv >> 3); 629 ino += SDEV_ROOTINO + 1; 630 631 return (ino); 632 } 633 634 static int 635 sdev_getlink(struct vnode *linkvp, char **link) 636 { 637 int err; 638 char *buf; 639 struct uio uio = {0}; 640 struct iovec iov = {0}; 641 642 if (linkvp == NULL) 643 return (ENOENT); 644 ASSERT(linkvp->v_type == VLNK); 645 646 buf = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 647 iov.iov_base = buf; 648 iov.iov_len = MAXPATHLEN; 649 uio.uio_iov = &iov; 650 uio.uio_iovcnt = 1; 651 uio.uio_resid = MAXPATHLEN; 652 uio.uio_segflg = UIO_SYSSPACE; 653 uio.uio_llimit = MAXOFFSET_T; 654 655 err = VOP_READLINK(linkvp, &uio, kcred, NULL); 656 if (err) { 657 cmn_err(CE_WARN, "readlink %s failed in dev\n", buf); 658 kmem_free(buf, MAXPATHLEN); 659 return (ENOENT); 660 } 661 662 /* mission complete */ 663 *link = i_ddi_strdup(buf, KM_SLEEP); 664 kmem_free(buf, MAXPATHLEN); 665 return (0); 666 } 667 668 /* 669 * A convenient wrapper to get the devfs node vnode for a device 670 * minor functionality: readlink() of a /dev symlink 671 * Place the link into dv->sdev_symlink 672 */ 673 static int 674 sdev_follow_link(struct sdev_node *dv) 675 { 676 int err; 677 struct vnode *linkvp; 678 char *link = NULL; 679 680 linkvp = SDEVTOV(dv); 681 if (linkvp == NULL) 682 return (ENOENT); 683 ASSERT(linkvp->v_type == VLNK); 684 err = sdev_getlink(linkvp, &link); 685 if (err) { 686 (void) sdev_nodezombied(dv); 687 dv->sdev_symlink = NULL; 688 return (ENOENT); 689 } 690 691 ASSERT(link != NULL); 692 dv->sdev_symlink = link; 693 return (0); 694 } 695 696 static int 697 sdev_node_check(struct sdev_node *dv, struct vattr *nvap, void *nargs) 698 { 699 vtype_t otype = SDEVTOV(dv)->v_type; 700 701 /* 702 * existing sdev_node has a different type. 703 */ 704 if (otype != nvap->va_type) { 705 sdcmn_err9(("sdev_node_check: existing node " 706 " %s type %d does not match new node type %d\n", 707 dv->sdev_name, otype, nvap->va_type)); 708 return (EEXIST); 709 } 710 711 /* 712 * For a symlink, the target should be the same. 713 */ 714 if (otype == VLNK) { 715 ASSERT(nargs != NULL); 716 ASSERT(dv->sdev_symlink != NULL); 717 if (strcmp(dv->sdev_symlink, (char *)nargs) != 0) { 718 sdcmn_err9(("sdev_node_check: existing node " 719 " %s has different symlink %s as new node " 720 " %s\n", dv->sdev_name, dv->sdev_symlink, 721 (char *)nargs)); 722 return (EEXIST); 723 } 724 } 725 726 return (0); 727 } 728 729 /* 730 * sdev_mknode - a wrapper for sdev_nodeinit(), sdev_nodeready() 731 * 732 * arguments: 733 * - ddv (parent) 734 * - nm (child name) 735 * - newdv (sdev_node for nm is returned here) 736 * - vap (vattr for the node to be created, va_type should be set. 737 * - avp (attribute vnode) 738 * the defaults should be used if unknown) 739 * - cred 740 * - args 741 * . tnm (for VLNK) 742 * . global sdev_node (for !SDEV_GLOBAL) 743 * - state: SDEV_INIT, SDEV_READY 744 * 745 * only ddv, nm, newddv, vap, cred are required for sdev_mknode(SDEV_INIT) 746 * 747 * NOTE: directory contents writers lock needs to be held before 748 * calling this routine. 749 */ 750 int 751 sdev_mknode(struct sdev_node *ddv, char *nm, struct sdev_node **newdv, 752 struct vattr *vap, struct vnode *avp, void *args, struct cred *cred, 753 sdev_node_state_t state) 754 { 755 int error = 0; 756 sdev_node_state_t node_state; 757 struct sdev_node *dv = NULL; 758 759 ASSERT(state != SDEV_ZOMBIE); 760 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 761 762 if (*newdv) { 763 dv = *newdv; 764 } else { 765 /* allocate and initialize a sdev_node */ 766 if (ddv->sdev_state == SDEV_ZOMBIE) { 767 sdcmn_err9(("sdev_mknode: parent %s ZOMBIEd\n", 768 ddv->sdev_path)); 769 return (ENOENT); 770 } 771 772 error = sdev_nodeinit(ddv, nm, &dv, vap); 773 if (error != 0) { 774 sdcmn_err9(("sdev_mknode: error %d," 775 " name %s can not be initialized\n", 776 error, nm)); 777 return (error); 778 } 779 ASSERT(dv); 780 781 /* insert into the directory cache */ 782 error = sdev_cache_update(ddv, &dv, nm, SDEV_CACHE_ADD); 783 if (error) { 784 sdcmn_err9(("sdev_mknode: node %s can not" 785 " be added into directory cache\n", nm)); 786 return (ENOENT); 787 } 788 } 789 790 ASSERT(dv); 791 node_state = dv->sdev_state; 792 ASSERT(node_state != SDEV_ZOMBIE); 793 794 if (state == SDEV_READY) { 795 switch (node_state) { 796 case SDEV_INIT: 797 error = sdev_nodeready(dv, vap, avp, args, cred); 798 if (error) { 799 sdcmn_err9(("sdev_mknode: node %s can NOT" 800 " be transitioned into READY state, " 801 "error %d\n", nm, error)); 802 } 803 break; 804 case SDEV_READY: 805 /* 806 * Do some sanity checking to make sure 807 * the existing sdev_node is what has been 808 * asked for. 809 */ 810 error = sdev_node_check(dv, vap, args); 811 break; 812 default: 813 break; 814 } 815 } 816 817 if (!error) { 818 *newdv = dv; 819 ASSERT((*newdv)->sdev_state != SDEV_ZOMBIE); 820 } else { 821 SDEV_SIMPLE_RELE(dv); 822 *newdv = NULL; 823 } 824 825 return (error); 826 } 827 828 /* 829 * convenient wrapper to change vp's ATIME, CTIME and MTIME 830 */ 831 void 832 sdev_update_timestamps(struct vnode *vp, cred_t *cred, uint_t mask) 833 { 834 struct vattr attr; 835 timestruc_t now; 836 int err; 837 838 ASSERT(vp); 839 gethrestime(&now); 840 if (mask & AT_CTIME) 841 attr.va_ctime = now; 842 if (mask & AT_MTIME) 843 attr.va_mtime = now; 844 if (mask & AT_ATIME) 845 attr.va_atime = now; 846 847 attr.va_mask = (mask & AT_TIMES); 848 err = VOP_SETATTR(vp, &attr, 0, cred, NULL); 849 if (err && (err != EROFS)) { 850 sdcmn_err(("update timestamps error %d\n", err)); 851 } 852 } 853 854 /* 855 * the backing store vnode is released here 856 */ 857 /*ARGSUSED1*/ 858 void 859 sdev_nodedestroy(struct sdev_node *dv, uint_t flags) 860 { 861 /* no references */ 862 ASSERT(dv->sdev_nlink == 0); 863 864 if (dv->sdev_attrvp != NULLVP) { 865 VN_RELE(dv->sdev_attrvp); 866 /* 867 * reset the attrvp so that no more 868 * references can be made on this already 869 * vn_rele() vnode 870 */ 871 dv->sdev_attrvp = NULLVP; 872 } 873 874 if (dv->sdev_attr != NULL) { 875 kmem_free(dv->sdev_attr, sizeof (struct vattr)); 876 dv->sdev_attr = NULL; 877 } 878 879 if (dv->sdev_name != NULL) { 880 kmem_free(dv->sdev_name, dv->sdev_namelen + 1); 881 dv->sdev_name = NULL; 882 } 883 884 if (dv->sdev_symlink != NULL) { 885 kmem_free(dv->sdev_symlink, strlen(dv->sdev_symlink) + 1); 886 dv->sdev_symlink = NULL; 887 } 888 889 if (dv->sdev_path) { 890 kmem_free(dv->sdev_path, strlen(dv->sdev_path) + 1); 891 dv->sdev_path = NULL; 892 } 893 894 if (!SDEV_IS_GLOBAL(dv)) 895 sdev_prof_free(dv); 896 897 if (SDEVTOV(dv)->v_type == VDIR) { 898 ASSERT(SDEV_FIRST_ENTRY(dv) == NULL); 899 avl_destroy(&dv->sdev_entries); 900 } 901 902 mutex_destroy(&dv->sdev_lookup_lock); 903 cv_destroy(&dv->sdev_lookup_cv); 904 905 /* return node to initial state as per constructor */ 906 (void) memset((void *)&dv->sdev_instance_data, 0, 907 sizeof (dv->sdev_instance_data)); 908 vn_invalid(SDEVTOV(dv)); 909 kmem_cache_free(sdev_node_cache, dv); 910 } 911 912 /* 913 * DIRECTORY CACHE lookup 914 */ 915 struct sdev_node * 916 sdev_findbyname(struct sdev_node *ddv, char *nm) 917 { 918 struct sdev_node *dv; 919 struct sdev_node dvtmp; 920 avl_index_t where; 921 922 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 923 924 dvtmp.sdev_name = nm; 925 dv = avl_find(&ddv->sdev_entries, &dvtmp, &where); 926 if (dv) { 927 ASSERT(dv->sdev_dotdot == ddv); 928 ASSERT(strcmp(dv->sdev_name, nm) == 0); 929 SDEV_HOLD(dv); 930 return (dv); 931 } 932 return (NULL); 933 } 934 935 /* 936 * Inserts a new sdev_node in a parent directory 937 */ 938 void 939 sdev_direnter(struct sdev_node *ddv, struct sdev_node *dv) 940 { 941 avl_index_t where; 942 943 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 944 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 945 ASSERT(ddv->sdev_nlink >= 2); 946 ASSERT(dv->sdev_nlink == 0); 947 948 dv->sdev_dotdot = ddv; 949 VERIFY(avl_find(&ddv->sdev_entries, dv, &where) == NULL); 950 avl_insert(&ddv->sdev_entries, dv, where); 951 ddv->sdev_nlink++; 952 } 953 954 /* 955 * The following check is needed because while sdev_nodes are linked 956 * in SDEV_INIT state, they have their link counts incremented only 957 * in SDEV_READY state. 958 */ 959 static void 960 decr_link(struct sdev_node *dv) 961 { 962 if (dv->sdev_state != SDEV_INIT) 963 dv->sdev_nlink--; 964 else 965 ASSERT(dv->sdev_nlink == 0); 966 } 967 968 /* 969 * Delete an existing dv from directory cache 970 * 971 * In the case of a node is still held by non-zero reference count, 972 * the node is put into ZOMBIE state. Once the reference count 973 * reaches "0", the node is unlinked and destroyed, 974 * in sdev_inactive(). 975 */ 976 static int 977 sdev_dirdelete(struct sdev_node *ddv, struct sdev_node *dv) 978 { 979 struct vnode *vp; 980 981 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 982 983 vp = SDEVTOV(dv); 984 mutex_enter(&vp->v_lock); 985 986 /* dv is held still */ 987 if (vp->v_count > 1) { 988 rw_enter(&dv->sdev_contents, RW_WRITER); 989 if (dv->sdev_state == SDEV_READY) { 990 sdcmn_err9(( 991 "sdev_delete: node %s busy with count %d\n", 992 dv->sdev_name, vp->v_count)); 993 dv->sdev_state = SDEV_ZOMBIE; 994 } 995 rw_exit(&dv->sdev_contents); 996 --vp->v_count; 997 mutex_exit(&vp->v_lock); 998 return (EBUSY); 999 } 1000 ASSERT(vp->v_count == 1); 1001 1002 /* unlink from the memory cache */ 1003 ddv->sdev_nlink--; /* .. to above */ 1004 if (vp->v_type == VDIR) { 1005 decr_link(dv); /* . to self */ 1006 } 1007 1008 avl_remove(&ddv->sdev_entries, dv); 1009 decr_link(dv); /* name, back to zero */ 1010 vp->v_count--; 1011 mutex_exit(&vp->v_lock); 1012 1013 /* destroy the node */ 1014 sdev_nodedestroy(dv, 0); 1015 return (0); 1016 } 1017 1018 /* 1019 * check if the source is in the path of the target 1020 * 1021 * source and target are different 1022 */ 1023 /*ARGSUSED2*/ 1024 static int 1025 sdev_checkpath(struct sdev_node *sdv, struct sdev_node *tdv, struct cred *cred) 1026 { 1027 int error = 0; 1028 struct sdev_node *dotdot, *dir; 1029 1030 dotdot = tdv->sdev_dotdot; 1031 ASSERT(dotdot); 1032 1033 /* fs root */ 1034 if (dotdot == tdv) { 1035 return (0); 1036 } 1037 1038 for (;;) { 1039 /* 1040 * avoid error cases like 1041 * mv a a/b 1042 * mv a a/b/c 1043 * etc. 1044 */ 1045 if (dotdot == sdv) { 1046 error = EINVAL; 1047 break; 1048 } 1049 1050 dir = dotdot; 1051 dotdot = dir->sdev_dotdot; 1052 1053 /* done checking because root is reached */ 1054 if (dir == dotdot) { 1055 break; 1056 } 1057 } 1058 return (error); 1059 } 1060 1061 int 1062 sdev_rnmnode(struct sdev_node *oddv, struct sdev_node *odv, 1063 struct sdev_node *nddv, struct sdev_node **ndvp, char *nnm, 1064 struct cred *cred) 1065 { 1066 int error = 0; 1067 struct vnode *ovp = SDEVTOV(odv); 1068 struct vnode *nvp; 1069 struct vattr vattr; 1070 int doingdir = (ovp->v_type == VDIR); 1071 char *link = NULL; 1072 int samedir = (oddv == nddv) ? 1 : 0; 1073 int bkstore = 0; 1074 struct sdev_node *idv = NULL; 1075 struct sdev_node *ndv = NULL; 1076 timestruc_t now; 1077 1078 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 1079 error = VOP_GETATTR(ovp, &vattr, 0, cred, NULL); 1080 if (error) 1081 return (error); 1082 1083 if (!samedir) 1084 rw_enter(&oddv->sdev_contents, RW_WRITER); 1085 rw_enter(&nddv->sdev_contents, RW_WRITER); 1086 1087 /* 1088 * the source may have been deleted by another thread before 1089 * we gets here. 1090 */ 1091 if (odv->sdev_state != SDEV_READY) { 1092 error = ENOENT; 1093 goto err_out; 1094 } 1095 1096 if (doingdir && (odv == nddv)) { 1097 error = EINVAL; 1098 goto err_out; 1099 } 1100 1101 /* 1102 * If renaming a directory, and the parents are different (".." must be 1103 * changed) then the source dir must not be in the dir hierarchy above 1104 * the target since it would orphan everything below the source dir. 1105 */ 1106 if (doingdir && (oddv != nddv)) { 1107 error = sdev_checkpath(odv, nddv, cred); 1108 if (error) 1109 goto err_out; 1110 } 1111 1112 /* destination existing */ 1113 if (*ndvp) { 1114 nvp = SDEVTOV(*ndvp); 1115 ASSERT(nvp); 1116 1117 /* handling renaming to itself */ 1118 if (odv == *ndvp) { 1119 error = 0; 1120 goto err_out; 1121 } 1122 1123 if (nvp->v_type == VDIR) { 1124 if (!doingdir) { 1125 error = EISDIR; 1126 goto err_out; 1127 } 1128 1129 if (vn_vfswlock(nvp)) { 1130 error = EBUSY; 1131 goto err_out; 1132 } 1133 1134 if (vn_mountedvfs(nvp) != NULL) { 1135 vn_vfsunlock(nvp); 1136 error = EBUSY; 1137 goto err_out; 1138 } 1139 1140 /* in case dir1 exists in dir2 and "mv dir1 dir2" */ 1141 if ((*ndvp)->sdev_nlink > 2) { 1142 vn_vfsunlock(nvp); 1143 error = EEXIST; 1144 goto err_out; 1145 } 1146 vn_vfsunlock(nvp); 1147 1148 (void) sdev_dirdelete(nddv, *ndvp); 1149 *ndvp = NULL; 1150 ASSERT(nddv->sdev_attrvp); 1151 error = VOP_RMDIR(nddv->sdev_attrvp, nnm, 1152 nddv->sdev_attrvp, cred, NULL, 0); 1153 if (error) 1154 goto err_out; 1155 } else { 1156 if (doingdir) { 1157 error = ENOTDIR; 1158 goto err_out; 1159 } 1160 1161 if (SDEV_IS_PERSIST((*ndvp))) { 1162 bkstore = 1; 1163 } 1164 1165 /* 1166 * get rid of the node from the directory cache 1167 * note, in case EBUSY is returned, the ZOMBIE 1168 * node is taken care in sdev_mknode. 1169 */ 1170 (void) sdev_dirdelete(nddv, *ndvp); 1171 *ndvp = NULL; 1172 if (bkstore) { 1173 ASSERT(nddv->sdev_attrvp); 1174 error = VOP_REMOVE(nddv->sdev_attrvp, 1175 nnm, cred, NULL, 0); 1176 if (error) 1177 goto err_out; 1178 } 1179 } 1180 } 1181 1182 /* fix the source for a symlink */ 1183 if (vattr.va_type == VLNK) { 1184 if (odv->sdev_symlink == NULL) { 1185 error = sdev_follow_link(odv); 1186 if (error) { 1187 error = ENOENT; 1188 goto err_out; 1189 } 1190 } 1191 ASSERT(odv->sdev_symlink); 1192 link = i_ddi_strdup(odv->sdev_symlink, KM_SLEEP); 1193 } 1194 1195 /* 1196 * make a fresh node from the source attrs 1197 */ 1198 ASSERT(RW_WRITE_HELD(&nddv->sdev_contents)); 1199 error = sdev_mknode(nddv, nnm, ndvp, &vattr, 1200 NULL, (void *)link, cred, SDEV_READY); 1201 1202 if (link) 1203 kmem_free(link, strlen(link) + 1); 1204 1205 if (error) 1206 goto err_out; 1207 ASSERT(*ndvp); 1208 ASSERT((*ndvp)->sdev_state == SDEV_READY); 1209 1210 /* move dir contents */ 1211 if (doingdir) { 1212 for (idv = SDEV_FIRST_ENTRY(odv); idv; 1213 idv = SDEV_NEXT_ENTRY(odv, idv)) { 1214 error = sdev_rnmnode(odv, idv, 1215 (struct sdev_node *)(*ndvp), &ndv, 1216 idv->sdev_name, cred); 1217 if (error) 1218 goto err_out; 1219 ndv = NULL; 1220 } 1221 } 1222 1223 if ((*ndvp)->sdev_attrvp) { 1224 sdev_update_timestamps((*ndvp)->sdev_attrvp, kcred, 1225 AT_CTIME|AT_ATIME); 1226 } else { 1227 ASSERT((*ndvp)->sdev_attr); 1228 gethrestime(&now); 1229 (*ndvp)->sdev_attr->va_ctime = now; 1230 (*ndvp)->sdev_attr->va_atime = now; 1231 } 1232 1233 if (nddv->sdev_attrvp) { 1234 sdev_update_timestamps(nddv->sdev_attrvp, kcred, 1235 AT_MTIME|AT_ATIME); 1236 } else { 1237 ASSERT(nddv->sdev_attr); 1238 gethrestime(&now); 1239 nddv->sdev_attr->va_mtime = now; 1240 nddv->sdev_attr->va_atime = now; 1241 } 1242 rw_exit(&nddv->sdev_contents); 1243 if (!samedir) 1244 rw_exit(&oddv->sdev_contents); 1245 1246 SDEV_RELE(*ndvp); 1247 return (error); 1248 1249 err_out: 1250 rw_exit(&nddv->sdev_contents); 1251 if (!samedir) 1252 rw_exit(&oddv->sdev_contents); 1253 return (error); 1254 } 1255 1256 /* 1257 * Merge sdev_node specific information into an attribute structure. 1258 * 1259 * note: sdev_node is not locked here 1260 */ 1261 void 1262 sdev_vattr_merge(struct sdev_node *dv, struct vattr *vap) 1263 { 1264 struct vnode *vp = SDEVTOV(dv); 1265 1266 vap->va_nlink = dv->sdev_nlink; 1267 vap->va_nodeid = dv->sdev_ino; 1268 vap->va_fsid = SDEVTOV(dv->sdev_dotdot)->v_rdev; 1269 vap->va_type = vp->v_type; 1270 1271 if (vp->v_type == VDIR) { 1272 vap->va_rdev = 0; 1273 vap->va_fsid = vp->v_rdev; 1274 } else if (vp->v_type == VLNK) { 1275 vap->va_rdev = 0; 1276 vap->va_mode &= ~S_IFMT; 1277 vap->va_mode |= S_IFLNK; 1278 } else if ((vp->v_type == VCHR) || (vp->v_type == VBLK)) { 1279 vap->va_rdev = vp->v_rdev; 1280 vap->va_mode &= ~S_IFMT; 1281 if (vap->va_type == VCHR) 1282 vap->va_mode |= S_IFCHR; 1283 else 1284 vap->va_mode |= S_IFBLK; 1285 } else { 1286 vap->va_rdev = 0; 1287 } 1288 } 1289 1290 static struct vattr * 1291 sdev_getdefault_attr(enum vtype type) 1292 { 1293 if (type == VDIR) 1294 return (&sdev_vattr_dir); 1295 else if (type == VCHR) 1296 return (&sdev_vattr_chr); 1297 else if (type == VBLK) 1298 return (&sdev_vattr_blk); 1299 else if (type == VLNK) 1300 return (&sdev_vattr_lnk); 1301 else 1302 return (NULL); 1303 } 1304 int 1305 sdev_to_vp(struct sdev_node *dv, struct vnode **vpp) 1306 { 1307 int rv = 0; 1308 struct vnode *vp = SDEVTOV(dv); 1309 1310 switch (vp->v_type) { 1311 case VCHR: 1312 case VBLK: 1313 /* 1314 * If vnode is a device, return special vnode instead 1315 * (though it knows all about -us- via sp->s_realvp) 1316 */ 1317 *vpp = specvp(vp, vp->v_rdev, vp->v_type, kcred); 1318 VN_RELE(vp); 1319 if (*vpp == NULLVP) 1320 rv = ENOSYS; 1321 break; 1322 default: /* most types are returned as is */ 1323 *vpp = vp; 1324 break; 1325 } 1326 return (rv); 1327 } 1328 1329 /* 1330 * the junction between devname and devfs 1331 */ 1332 static struct vnode * 1333 devname_configure_by_path(char *physpath, struct vattr *vattr) 1334 { 1335 int error = 0; 1336 struct vnode *vp; 1337 1338 ASSERT(strncmp(physpath, "/devices/", sizeof ("/devices/") - 1) 1339 == 0); 1340 1341 error = devfs_lookupname(physpath + sizeof ("/devices/") - 1, 1342 NULLVPP, &vp); 1343 if (error != 0) { 1344 if (error == ENODEV) { 1345 cmn_err(CE_CONT, "%s: not found (line %d)\n", 1346 physpath, __LINE__); 1347 } 1348 1349 return (NULL); 1350 } 1351 1352 if (vattr) 1353 (void) VOP_GETATTR(vp, vattr, 0, kcred, NULL); 1354 return (vp); 1355 } 1356 1357 /* 1358 * junction between devname and root file system, e.g. ufs 1359 */ 1360 int 1361 devname_backstore_lookup(struct sdev_node *ddv, char *nm, struct vnode **rvp) 1362 { 1363 struct vnode *rdvp = ddv->sdev_attrvp; 1364 int rval = 0; 1365 1366 ASSERT(rdvp); 1367 1368 rval = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, kcred, NULL, NULL, 1369 NULL); 1370 return (rval); 1371 } 1372 1373 static int 1374 sdev_filldir_from_store(struct sdev_node *ddv, int dlen, struct cred *cred) 1375 { 1376 struct sdev_node *dv = NULL; 1377 char *nm; 1378 struct vnode *dirvp; 1379 int error; 1380 vnode_t *vp; 1381 int eof; 1382 struct iovec iov; 1383 struct uio uio; 1384 struct dirent64 *dp; 1385 dirent64_t *dbuf; 1386 size_t dbuflen; 1387 struct vattr vattr; 1388 char *link = NULL; 1389 1390 if (ddv->sdev_attrvp == NULL) 1391 return (0); 1392 if (!(ddv->sdev_flags & SDEV_BUILD)) 1393 return (0); 1394 1395 dirvp = ddv->sdev_attrvp; 1396 VN_HOLD(dirvp); 1397 dbuf = kmem_zalloc(dlen, KM_SLEEP); 1398 1399 uio.uio_iov = &iov; 1400 uio.uio_iovcnt = 1; 1401 uio.uio_segflg = UIO_SYSSPACE; 1402 uio.uio_fmode = 0; 1403 uio.uio_extflg = UIO_COPY_CACHED; 1404 uio.uio_loffset = 0; 1405 uio.uio_llimit = MAXOFFSET_T; 1406 1407 eof = 0; 1408 error = 0; 1409 while (!error && !eof) { 1410 uio.uio_resid = dlen; 1411 iov.iov_base = (char *)dbuf; 1412 iov.iov_len = dlen; 1413 (void) VOP_RWLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1414 error = VOP_READDIR(dirvp, &uio, kcred, &eof, NULL, 0); 1415 VOP_RWUNLOCK(dirvp, V_WRITELOCK_FALSE, NULL); 1416 1417 dbuflen = dlen - uio.uio_resid; 1418 if (error || dbuflen == 0) 1419 break; 1420 1421 if (!(ddv->sdev_flags & SDEV_BUILD)) { 1422 error = 0; 1423 break; 1424 } 1425 1426 for (dp = dbuf; ((intptr_t)dp < 1427 (intptr_t)dbuf + dbuflen); 1428 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 1429 nm = dp->d_name; 1430 1431 if (strcmp(nm, ".") == 0 || 1432 strcmp(nm, "..") == 0) 1433 continue; 1434 1435 vp = NULLVP; 1436 dv = sdev_cache_lookup(ddv, nm); 1437 if (dv) { 1438 if (dv->sdev_state != SDEV_ZOMBIE) { 1439 SDEV_SIMPLE_RELE(dv); 1440 } else { 1441 /* 1442 * A ZOMBIE node may not have been 1443 * cleaned up from the backing store, 1444 * bypass this entry in this case, 1445 * and clean it up from the directory 1446 * cache if this is the last call. 1447 */ 1448 (void) sdev_dirdelete(ddv, dv); 1449 } 1450 continue; 1451 } 1452 1453 /* refill the cache if not already */ 1454 error = devname_backstore_lookup(ddv, nm, &vp); 1455 if (error) 1456 continue; 1457 1458 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 1459 error = VOP_GETATTR(vp, &vattr, 0, cred, NULL); 1460 if (error) 1461 continue; 1462 1463 if (vattr.va_type == VLNK) { 1464 error = sdev_getlink(vp, &link); 1465 if (error) { 1466 continue; 1467 } 1468 ASSERT(link != NULL); 1469 } 1470 1471 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1472 rw_exit(&ddv->sdev_contents); 1473 rw_enter(&ddv->sdev_contents, RW_WRITER); 1474 } 1475 error = sdev_mknode(ddv, nm, &dv, &vattr, vp, link, 1476 cred, SDEV_READY); 1477 rw_downgrade(&ddv->sdev_contents); 1478 1479 if (link != NULL) { 1480 kmem_free(link, strlen(link) + 1); 1481 link = NULL; 1482 } 1483 1484 if (!error) { 1485 ASSERT(dv); 1486 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1487 SDEV_SIMPLE_RELE(dv); 1488 } 1489 vp = NULL; 1490 dv = NULL; 1491 } 1492 } 1493 1494 done: 1495 VN_RELE(dirvp); 1496 kmem_free(dbuf, dlen); 1497 1498 return (error); 1499 } 1500 1501 void 1502 sdev_filldir_dynamic(struct sdev_node *ddv) 1503 { 1504 int error; 1505 int i; 1506 struct vattr *vap; 1507 char *nm = NULL; 1508 struct sdev_node *dv = NULL; 1509 1510 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1511 ASSERT((ddv->sdev_flags & SDEV_BUILD)); 1512 1513 vap = sdev_getdefault_attr(VDIR); 1514 for (i = 0; vtab[i].vt_name != NULL; i++) { 1515 nm = vtab[i].vt_name; 1516 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1517 dv = NULL; 1518 error = sdev_mknode(ddv, nm, &dv, vap, NULL, 1519 NULL, kcred, SDEV_READY); 1520 if (error) { 1521 cmn_err(CE_WARN, "%s/%s: error %d\n", 1522 ddv->sdev_name, nm, error); 1523 } else { 1524 ASSERT(dv); 1525 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 1526 SDEV_SIMPLE_RELE(dv); 1527 } 1528 } 1529 } 1530 1531 /* 1532 * Creating a backing store entry based on sdev_attr. 1533 * This is called either as part of node creation in a persistent directory 1534 * or from setattr/setsecattr to persist access attributes across reboot. 1535 */ 1536 int 1537 sdev_shadow_node(struct sdev_node *dv, struct cred *cred) 1538 { 1539 int error = 0; 1540 struct vnode *dvp = SDEVTOV(dv->sdev_dotdot); 1541 struct vnode *rdvp = VTOSDEV(dvp)->sdev_attrvp; 1542 struct vattr *vap = dv->sdev_attr; 1543 char *nm = dv->sdev_name; 1544 struct vnode *tmpvp, **rvp = &tmpvp, *rrvp = NULL; 1545 1546 ASSERT(dv && dv->sdev_name && rdvp); 1547 ASSERT(RW_WRITE_HELD(&dv->sdev_contents) && dv->sdev_attrvp == NULL); 1548 1549 lookup: 1550 /* try to find it in the backing store */ 1551 error = VOP_LOOKUP(rdvp, nm, rvp, NULL, 0, NULL, cred, NULL, NULL, 1552 NULL); 1553 if (error == 0) { 1554 if (VOP_REALVP(*rvp, &rrvp, NULL) == 0) { 1555 VN_HOLD(rrvp); 1556 VN_RELE(*rvp); 1557 *rvp = rrvp; 1558 } 1559 1560 kmem_free(dv->sdev_attr, sizeof (vattr_t)); 1561 dv->sdev_attr = NULL; 1562 dv->sdev_attrvp = *rvp; 1563 return (0); 1564 } 1565 1566 /* let's try to persist the node */ 1567 gethrestime(&vap->va_atime); 1568 vap->va_mtime = vap->va_atime; 1569 vap->va_ctime = vap->va_atime; 1570 vap->va_mask |= AT_TYPE|AT_MODE; 1571 switch (vap->va_type) { 1572 case VDIR: 1573 error = VOP_MKDIR(rdvp, nm, vap, rvp, cred, NULL, 0, NULL); 1574 sdcmn_err9(("sdev_shadow_node: mkdir vp %p error %d\n", 1575 (void *)(*rvp), error)); 1576 break; 1577 case VCHR: 1578 case VBLK: 1579 case VREG: 1580 case VDOOR: 1581 error = VOP_CREATE(rdvp, nm, vap, NONEXCL, VREAD|VWRITE, 1582 rvp, cred, 0, NULL, NULL); 1583 sdcmn_err9(("sdev_shadow_node: create vp %p, error %d\n", 1584 (void *)(*rvp), error)); 1585 if (!error) 1586 VN_RELE(*rvp); 1587 break; 1588 case VLNK: 1589 ASSERT(dv->sdev_symlink); 1590 error = VOP_SYMLINK(rdvp, nm, vap, dv->sdev_symlink, cred, 1591 NULL, 0); 1592 sdcmn_err9(("sdev_shadow_node: create symlink error %d\n", 1593 error)); 1594 break; 1595 default: 1596 cmn_err(CE_PANIC, "dev: %s: sdev_shadow_node " 1597 "create\n", nm); 1598 /*NOTREACHED*/ 1599 } 1600 1601 /* go back to lookup to factor out spec node and set attrvp */ 1602 if (error == 0) 1603 goto lookup; 1604 1605 sdcmn_err(("cannot persist %s - error %d\n", dv->sdev_path, error)); 1606 return (error); 1607 } 1608 1609 static int 1610 sdev_cache_add(struct sdev_node *ddv, struct sdev_node **dv, char *nm) 1611 { 1612 int error = 0; 1613 struct sdev_node *dup = NULL; 1614 1615 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1616 if ((dup = sdev_findbyname(ddv, nm)) == NULL) { 1617 sdev_direnter(ddv, *dv); 1618 } else { 1619 if (dup->sdev_state == SDEV_ZOMBIE) { 1620 error = sdev_dirdelete(ddv, dup); 1621 /* 1622 * The ZOMBIE node is still hanging 1623 * around with more than one reference counts. 1624 * Fail the new node creation so that 1625 * the directory cache won't have 1626 * duplicate entries for the same named node 1627 */ 1628 if (error == EBUSY) { 1629 SDEV_SIMPLE_RELE(*dv); 1630 sdev_nodedestroy(*dv, 0); 1631 *dv = NULL; 1632 return (error); 1633 } 1634 sdev_direnter(ddv, *dv); 1635 } else { 1636 ASSERT((*dv)->sdev_state != SDEV_ZOMBIE); 1637 SDEV_SIMPLE_RELE(*dv); 1638 sdev_nodedestroy(*dv, 0); 1639 *dv = dup; 1640 } 1641 } 1642 1643 return (0); 1644 } 1645 1646 static int 1647 sdev_cache_delete(struct sdev_node *ddv, struct sdev_node **dv) 1648 { 1649 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1650 return (sdev_dirdelete(ddv, *dv)); 1651 } 1652 1653 /* 1654 * update the in-core directory cache 1655 */ 1656 int 1657 sdev_cache_update(struct sdev_node *ddv, struct sdev_node **dv, char *nm, 1658 sdev_cache_ops_t ops) 1659 { 1660 int error = 0; 1661 1662 ASSERT((SDEV_HELD(*dv))); 1663 1664 ASSERT(RW_WRITE_HELD(&ddv->sdev_contents)); 1665 switch (ops) { 1666 case SDEV_CACHE_ADD: 1667 error = sdev_cache_add(ddv, dv, nm); 1668 break; 1669 case SDEV_CACHE_DELETE: 1670 error = sdev_cache_delete(ddv, dv); 1671 break; 1672 default: 1673 break; 1674 } 1675 1676 return (error); 1677 } 1678 1679 /* 1680 * retrieve the named entry from the directory cache 1681 */ 1682 struct sdev_node * 1683 sdev_cache_lookup(struct sdev_node *ddv, char *nm) 1684 { 1685 struct sdev_node *dv = NULL; 1686 1687 ASSERT(RW_LOCK_HELD(&ddv->sdev_contents)); 1688 dv = sdev_findbyname(ddv, nm); 1689 1690 return (dv); 1691 } 1692 1693 /* 1694 * Implicit reconfig for nodes constructed by a link generator 1695 * Start devfsadm if needed, or if devfsadm is in progress, 1696 * prepare to block on devfsadm either completing or 1697 * constructing the desired node. As devfsadmd is global 1698 * in scope, constructing all necessary nodes, we only 1699 * need to initiate it once. 1700 */ 1701 static int 1702 sdev_call_devfsadmd(struct sdev_node *ddv, struct sdev_node *dv, char *nm) 1703 { 1704 int error = 0; 1705 1706 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 1707 sdcmn_err6(("lookup: waiting for %s/%s, 0x%x\n", 1708 ddv->sdev_name, nm, devfsadm_state)); 1709 mutex_enter(&dv->sdev_lookup_lock); 1710 SDEV_BLOCK_OTHERS(dv, (SDEV_LOOKUP | SDEV_LGWAITING)); 1711 mutex_exit(&dv->sdev_lookup_lock); 1712 error = 0; 1713 } else if (!DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state)) { 1714 sdcmn_err6(("lookup %s/%s starting devfsadm, 0x%x\n", 1715 ddv->sdev_name, nm, devfsadm_state)); 1716 1717 sdev_devfsadmd_thread(ddv, dv, kcred); 1718 mutex_enter(&dv->sdev_lookup_lock); 1719 SDEV_BLOCK_OTHERS(dv, 1720 (SDEV_LOOKUP | SDEV_LGWAITING)); 1721 mutex_exit(&dv->sdev_lookup_lock); 1722 error = 0; 1723 } else { 1724 error = -1; 1725 } 1726 1727 return (error); 1728 } 1729 1730 /* 1731 * Support for specialized device naming construction mechanisms 1732 */ 1733 static int 1734 sdev_call_dircallback(struct sdev_node *ddv, struct sdev_node **dvp, char *nm, 1735 int (*callback)(struct sdev_node *, char *, void **, struct cred *, 1736 void *, char *), int flags, struct cred *cred) 1737 { 1738 int rv = 0; 1739 char *physpath = NULL; 1740 struct vnode *rvp = NULL; 1741 struct vattr vattr; 1742 struct vattr *vap; 1743 struct sdev_node *dv = *dvp; 1744 1745 mutex_enter(&dv->sdev_lookup_lock); 1746 SDEV_BLOCK_OTHERS(dv, SDEV_LOOKUP); 1747 mutex_exit(&dv->sdev_lookup_lock); 1748 1749 /* for non-devfsadm devices */ 1750 if (flags & SDEV_PATH) { 1751 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1752 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1753 NULL); 1754 if (rv) { 1755 kmem_free(physpath, MAXPATHLEN); 1756 return (-1); 1757 } 1758 1759 rvp = devname_configure_by_path(physpath, NULL); 1760 if (rvp == NULL) { 1761 sdcmn_err3(("devname_configure_by_path: " 1762 "failed for /dev/%s/%s\n", 1763 ddv->sdev_name, nm)); 1764 kmem_free(physpath, MAXPATHLEN); 1765 rv = -1; 1766 } else { 1767 vap = sdev_getdefault_attr(VLNK); 1768 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1769 1770 /* 1771 * Sdev_mknode may return back a different sdev_node 1772 * that was created by another thread that 1773 * raced to the directroy cache before this thread. 1774 * 1775 * With current directory cache mechanism 1776 * (linked list with the sdev_node name as 1777 * the entity key), this is a way to make sure 1778 * only one entry exists for the same name 1779 * in the same directory. The outcome is 1780 * the winner wins. 1781 */ 1782 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1783 rw_exit(&ddv->sdev_contents); 1784 rw_enter(&ddv->sdev_contents, RW_WRITER); 1785 } 1786 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1787 (void *)physpath, cred, SDEV_READY); 1788 rw_downgrade(&ddv->sdev_contents); 1789 kmem_free(physpath, MAXPATHLEN); 1790 if (rv) { 1791 return (rv); 1792 } else { 1793 mutex_enter(&dv->sdev_lookup_lock); 1794 SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); 1795 mutex_exit(&dv->sdev_lookup_lock); 1796 return (0); 1797 } 1798 } 1799 } else if (flags & SDEV_VLINK) { 1800 physpath = kmem_zalloc(MAXPATHLEN, KM_SLEEP); 1801 rv = callback(ddv, nm, (void *)&physpath, kcred, NULL, 1802 NULL); 1803 if (rv) { 1804 kmem_free(physpath, MAXPATHLEN); 1805 return (-1); 1806 } 1807 1808 vap = sdev_getdefault_attr(VLNK); 1809 vap->va_size = strlen(physpath); 1810 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1811 1812 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1813 rw_exit(&ddv->sdev_contents); 1814 rw_enter(&ddv->sdev_contents, RW_WRITER); 1815 } 1816 rv = sdev_mknode(ddv, nm, &dv, vap, NULL, 1817 (void *)physpath, cred, SDEV_READY); 1818 rw_downgrade(&ddv->sdev_contents); 1819 kmem_free(physpath, MAXPATHLEN); 1820 if (rv) 1821 return (rv); 1822 1823 mutex_enter(&dv->sdev_lookup_lock); 1824 SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); 1825 mutex_exit(&dv->sdev_lookup_lock); 1826 return (0); 1827 } else if (flags & SDEV_VNODE) { 1828 /* 1829 * DBNR has its own way to create the device 1830 * and return a backing store vnode in rvp 1831 */ 1832 ASSERT(callback); 1833 rv = callback(ddv, nm, (void *)&rvp, kcred, NULL, NULL); 1834 if (rv || (rvp == NULL)) { 1835 sdcmn_err3(("devname_lookup_func: SDEV_VNODE " 1836 "callback failed \n")); 1837 return (-1); 1838 } 1839 vap = sdev_getdefault_attr(rvp->v_type); 1840 if (vap == NULL) 1841 return (-1); 1842 1843 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1844 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1845 rw_exit(&ddv->sdev_contents); 1846 rw_enter(&ddv->sdev_contents, RW_WRITER); 1847 } 1848 rv = sdev_mknode(ddv, nm, &dv, vap, rvp, NULL, 1849 cred, SDEV_READY); 1850 rw_downgrade(&ddv->sdev_contents); 1851 if (rv) 1852 return (rv); 1853 1854 mutex_enter(&dv->sdev_lookup_lock); 1855 SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); 1856 mutex_exit(&dv->sdev_lookup_lock); 1857 return (0); 1858 } else if (flags & SDEV_VATTR) { 1859 /* 1860 * /dev/pts 1861 * 1862 * DBNR has its own way to create the device 1863 * "0" is returned upon success. 1864 * 1865 * callback is responsible to set the basic attributes, 1866 * e.g. va_type/va_uid/va_gid/ 1867 * dev_t if VCHR or VBLK/ 1868 */ 1869 ASSERT(callback); 1870 rv = callback(ddv, nm, (void *)&vattr, kcred, NULL, NULL); 1871 if (rv) { 1872 sdcmn_err3(("devname_lookup_func: SDEV_NONE " 1873 "callback failed \n")); 1874 return (-1); 1875 } 1876 1877 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1878 if (!rw_tryupgrade(&ddv->sdev_contents)) { 1879 rw_exit(&ddv->sdev_contents); 1880 rw_enter(&ddv->sdev_contents, RW_WRITER); 1881 } 1882 rv = sdev_mknode(ddv, nm, &dv, &vattr, NULL, NULL, 1883 cred, SDEV_READY); 1884 rw_downgrade(&ddv->sdev_contents); 1885 1886 if (rv) 1887 return (rv); 1888 1889 mutex_enter(&dv->sdev_lookup_lock); 1890 SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); 1891 mutex_exit(&dv->sdev_lookup_lock); 1892 return (0); 1893 } else { 1894 impossible(("lookup: %s/%s by %s not supported (%d)\n", 1895 SDEVTOV(ddv)->v_path, nm, curproc->p_user.u_comm, 1896 __LINE__)); 1897 rv = -1; 1898 } 1899 1900 *dvp = dv; 1901 return (rv); 1902 } 1903 1904 static int 1905 is_devfsadm_thread(char *exec_name) 1906 { 1907 /* 1908 * note: because devfsadmd -> /usr/sbin/devfsadm 1909 * it is safe to use "devfsadm" to capture the lookups 1910 * from devfsadm and its daemon version. 1911 */ 1912 if (strcmp(exec_name, "devfsadm") == 0) 1913 return (1); 1914 return (0); 1915 } 1916 1917 1918 /* 1919 * Lookup Order: 1920 * sdev_node cache; 1921 * backing store (SDEV_PERSIST); 1922 * DBNR: a. dir_ops implemented in the loadable modules; 1923 * b. vnode ops in vtab. 1924 */ 1925 int 1926 devname_lookup_func(struct sdev_node *ddv, char *nm, struct vnode **vpp, 1927 struct cred *cred, int (*callback)(struct sdev_node *, char *, void **, 1928 struct cred *, void *, char *), int flags) 1929 { 1930 int rv = 0, nmlen; 1931 struct vnode *rvp = NULL; 1932 struct sdev_node *dv = NULL; 1933 int retried = 0; 1934 int error = 0; 1935 struct vattr vattr; 1936 char *lookup_thread = curproc->p_user.u_comm; 1937 int failed_flags = 0; 1938 int (*vtor)(struct sdev_node *) = NULL; 1939 int state; 1940 int parent_state; 1941 char *link = NULL; 1942 1943 if (SDEVTOV(ddv)->v_type != VDIR) 1944 return (ENOTDIR); 1945 1946 /* 1947 * Empty name or ., return node itself. 1948 */ 1949 nmlen = strlen(nm); 1950 if ((nmlen == 0) || ((nmlen == 1) && (nm[0] == '.'))) { 1951 *vpp = SDEVTOV(ddv); 1952 VN_HOLD(*vpp); 1953 return (0); 1954 } 1955 1956 /* 1957 * .., return the parent directory 1958 */ 1959 if ((nmlen == 2) && (strcmp(nm, "..") == 0)) { 1960 *vpp = SDEVTOV(ddv->sdev_dotdot); 1961 VN_HOLD(*vpp); 1962 return (0); 1963 } 1964 1965 rw_enter(&ddv->sdev_contents, RW_READER); 1966 if (ddv->sdev_flags & SDEV_VTOR) { 1967 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 1968 ASSERT(vtor); 1969 } 1970 1971 tryagain: 1972 /* 1973 * (a) directory cache lookup: 1974 */ 1975 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 1976 parent_state = ddv->sdev_state; 1977 dv = sdev_cache_lookup(ddv, nm); 1978 if (dv) { 1979 state = dv->sdev_state; 1980 switch (state) { 1981 case SDEV_INIT: 1982 if (is_devfsadm_thread(lookup_thread)) 1983 break; 1984 1985 /* ZOMBIED parent won't allow node creation */ 1986 if (parent_state == SDEV_ZOMBIE) { 1987 SD_TRACE_FAILED_LOOKUP(ddv, nm, 1988 retried); 1989 goto nolock_notfound; 1990 } 1991 1992 mutex_enter(&dv->sdev_lookup_lock); 1993 /* compensate the threads started after devfsadm */ 1994 if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 1995 !(SDEV_IS_LOOKUP(dv))) 1996 SDEV_BLOCK_OTHERS(dv, 1997 (SDEV_LOOKUP | SDEV_LGWAITING)); 1998 1999 if (SDEV_IS_LOOKUP(dv)) { 2000 failed_flags |= SLF_REBUILT; 2001 rw_exit(&ddv->sdev_contents); 2002 error = sdev_wait4lookup(dv, SDEV_LOOKUP); 2003 mutex_exit(&dv->sdev_lookup_lock); 2004 rw_enter(&ddv->sdev_contents, RW_READER); 2005 2006 if (error != 0) { 2007 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2008 retried); 2009 goto nolock_notfound; 2010 } 2011 2012 state = dv->sdev_state; 2013 if (state == SDEV_INIT) { 2014 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2015 retried); 2016 goto nolock_notfound; 2017 } else if (state == SDEV_READY) { 2018 goto found; 2019 } else if (state == SDEV_ZOMBIE) { 2020 rw_exit(&ddv->sdev_contents); 2021 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2022 retried); 2023 SDEV_RELE(dv); 2024 goto lookup_failed; 2025 } 2026 } else { 2027 mutex_exit(&dv->sdev_lookup_lock); 2028 } 2029 break; 2030 case SDEV_READY: 2031 goto found; 2032 case SDEV_ZOMBIE: 2033 rw_exit(&ddv->sdev_contents); 2034 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2035 SDEV_RELE(dv); 2036 goto lookup_failed; 2037 default: 2038 rw_exit(&ddv->sdev_contents); 2039 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2040 sdev_lookup_failed(ddv, nm, failed_flags); 2041 *vpp = NULLVP; 2042 return (ENOENT); 2043 } 2044 } 2045 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2046 2047 /* 2048 * ZOMBIED parent does not allow new node creation. 2049 * bail out early 2050 */ 2051 if (parent_state == SDEV_ZOMBIE) { 2052 rw_exit(&ddv->sdev_contents); 2053 *vpp = NULL; 2054 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2055 return (ENOENT); 2056 } 2057 2058 /* 2059 * (b0): backing store lookup 2060 * SDEV_PERSIST is default except: 2061 * 1) pts nodes 2062 * 2) non-chmod'ed local nodes 2063 */ 2064 if (SDEV_IS_PERSIST(ddv)) { 2065 error = devname_backstore_lookup(ddv, nm, &rvp); 2066 2067 if (!error) { 2068 sdcmn_err3(("devname_backstore_lookup: " 2069 "found attrvp %p for %s\n", (void *)rvp, nm)); 2070 2071 vattr.va_mask = AT_MODE|AT_UID|AT_GID; 2072 error = VOP_GETATTR(rvp, &vattr, 0, cred, NULL); 2073 if (error) { 2074 rw_exit(&ddv->sdev_contents); 2075 if (dv) 2076 SDEV_RELE(dv); 2077 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2078 sdev_lookup_failed(ddv, nm, failed_flags); 2079 *vpp = NULLVP; 2080 return (ENOENT); 2081 } 2082 2083 if (vattr.va_type == VLNK) { 2084 error = sdev_getlink(rvp, &link); 2085 if (error) { 2086 rw_exit(&ddv->sdev_contents); 2087 if (dv) 2088 SDEV_RELE(dv); 2089 SD_TRACE_FAILED_LOOKUP(ddv, nm, 2090 retried); 2091 sdev_lookup_failed(ddv, nm, 2092 failed_flags); 2093 *vpp = NULLVP; 2094 return (ENOENT); 2095 } 2096 ASSERT(link != NULL); 2097 } 2098 2099 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2100 rw_exit(&ddv->sdev_contents); 2101 rw_enter(&ddv->sdev_contents, RW_WRITER); 2102 } 2103 error = sdev_mknode(ddv, nm, &dv, &vattr, 2104 rvp, link, cred, SDEV_READY); 2105 rw_downgrade(&ddv->sdev_contents); 2106 2107 if (link != NULL) { 2108 kmem_free(link, strlen(link) + 1); 2109 link = NULL; 2110 } 2111 2112 if (error) { 2113 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2114 rw_exit(&ddv->sdev_contents); 2115 if (dv) 2116 SDEV_RELE(dv); 2117 goto lookup_failed; 2118 } else { 2119 goto found; 2120 } 2121 } else if (retried) { 2122 rw_exit(&ddv->sdev_contents); 2123 sdcmn_err3(("retry of lookup of %s/%s: failed\n", 2124 ddv->sdev_name, nm)); 2125 if (dv) 2126 SDEV_RELE(dv); 2127 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2128 sdev_lookup_failed(ddv, nm, failed_flags); 2129 *vpp = NULLVP; 2130 return (ENOENT); 2131 } 2132 } 2133 2134 lookup_create_node: 2135 /* first thread that is doing the lookup on this node */ 2136 if (!dv) { 2137 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2138 rw_exit(&ddv->sdev_contents); 2139 rw_enter(&ddv->sdev_contents, RW_WRITER); 2140 } 2141 error = sdev_mknode(ddv, nm, &dv, NULL, NULL, NULL, 2142 cred, SDEV_INIT); 2143 if (!dv) { 2144 rw_exit(&ddv->sdev_contents); 2145 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2146 sdev_lookup_failed(ddv, nm, failed_flags); 2147 *vpp = NULLVP; 2148 return (ENOENT); 2149 } 2150 rw_downgrade(&ddv->sdev_contents); 2151 } 2152 ASSERT(dv); 2153 ASSERT(SDEV_HELD(dv)); 2154 2155 if (SDEV_IS_NO_NCACHE(dv)) { 2156 failed_flags |= SLF_NO_NCACHE; 2157 } 2158 2159 /* 2160 * (b1) invoking devfsadm once per life time for devfsadm nodes 2161 */ 2162 if (!callback) { 2163 2164 if (sdev_reconfig_boot || !i_ddi_io_initialized() || 2165 SDEV_IS_DYNAMIC(ddv) || SDEV_IS_NO_NCACHE(dv) || 2166 ((moddebug & MODDEBUG_FINI_EBUSY) != 0)) { 2167 ASSERT(SDEV_HELD(dv)); 2168 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2169 goto nolock_notfound; 2170 } 2171 2172 /* 2173 * filter out known non-existent devices recorded 2174 * during initial reconfiguration boot for which 2175 * reconfig should not be done and lookup may 2176 * be short-circuited now. 2177 */ 2178 if (sdev_lookup_filter(ddv, nm)) { 2179 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2180 goto nolock_notfound; 2181 } 2182 2183 /* bypassing devfsadm internal nodes */ 2184 if (is_devfsadm_thread(lookup_thread)) { 2185 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2186 goto nolock_notfound; 2187 } 2188 2189 if (sdev_reconfig_disable) { 2190 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2191 goto nolock_notfound; 2192 } 2193 2194 error = sdev_call_devfsadmd(ddv, dv, nm); 2195 if (error == 0) { 2196 sdcmn_err8(("lookup of %s/%s by %s: reconfig\n", 2197 ddv->sdev_name, nm, curproc->p_user.u_comm)); 2198 if (sdev_reconfig_verbose) { 2199 cmn_err(CE_CONT, 2200 "?lookup of %s/%s by %s: reconfig\n", 2201 ddv->sdev_name, nm, curproc->p_user.u_comm); 2202 } 2203 retried = 1; 2204 failed_flags |= SLF_REBUILT; 2205 ASSERT(dv->sdev_state != SDEV_ZOMBIE); 2206 SDEV_SIMPLE_RELE(dv); 2207 goto tryagain; 2208 } else { 2209 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2210 goto nolock_notfound; 2211 } 2212 } 2213 2214 /* 2215 * (b2) Directory Based Name Resolution (DBNR): 2216 * ddv - parent 2217 * nm - /dev/(ddv->sdev_name)/nm 2218 * 2219 * note: module vnode ops take precedence than the build-in ones 2220 */ 2221 if (callback) { 2222 error = sdev_call_dircallback(ddv, &dv, nm, callback, 2223 flags, cred); 2224 if (error == 0) { 2225 goto found; 2226 } else { 2227 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2228 goto notfound; 2229 } 2230 } 2231 ASSERT(rvp); 2232 2233 found: 2234 ASSERT(!(dv->sdev_flags & SDEV_STALE)); 2235 ASSERT(dv->sdev_state == SDEV_READY); 2236 if (vtor) { 2237 /* 2238 * Check validity of returned node 2239 */ 2240 switch (vtor(dv)) { 2241 case SDEV_VTOR_VALID: 2242 break; 2243 case SDEV_VTOR_STALE: 2244 /* 2245 * The name exists, but the cache entry is 2246 * stale and needs to be re-created. 2247 */ 2248 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2249 if (rw_tryupgrade(&ddv->sdev_contents) == 0) { 2250 rw_exit(&ddv->sdev_contents); 2251 rw_enter(&ddv->sdev_contents, RW_WRITER); 2252 } 2253 error = sdev_cache_update(ddv, &dv, nm, 2254 SDEV_CACHE_DELETE); 2255 rw_downgrade(&ddv->sdev_contents); 2256 if (error == 0) { 2257 dv = NULL; 2258 goto lookup_create_node; 2259 } 2260 /* FALLTHRU */ 2261 case SDEV_VTOR_INVALID: 2262 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2263 sdcmn_err7(("lookup: destroy invalid " 2264 "node: %s(%p)\n", dv->sdev_name, (void *)dv)); 2265 goto nolock_notfound; 2266 case SDEV_VTOR_SKIP: 2267 sdcmn_err7(("lookup: node not applicable - " 2268 "skipping: %s(%p)\n", dv->sdev_name, (void *)dv)); 2269 rw_exit(&ddv->sdev_contents); 2270 SD_TRACE_FAILED_LOOKUP(ddv, nm, retried); 2271 SDEV_RELE(dv); 2272 goto lookup_failed; 2273 default: 2274 cmn_err(CE_PANIC, 2275 "dev fs: validator failed: %s(%p)\n", 2276 dv->sdev_name, (void *)dv); 2277 break; 2278 /*NOTREACHED*/ 2279 } 2280 } 2281 2282 rw_exit(&ddv->sdev_contents); 2283 rv = sdev_to_vp(dv, vpp); 2284 sdcmn_err3(("devname_lookup_func: returning vp %p v_count %d state %d " 2285 "for nm %s, error %d\n", (void *)*vpp, (*vpp)->v_count, 2286 dv->sdev_state, nm, rv)); 2287 return (rv); 2288 2289 notfound: 2290 mutex_enter(&dv->sdev_lookup_lock); 2291 SDEV_UNBLOCK_OTHERS(dv, SDEV_LOOKUP); 2292 mutex_exit(&dv->sdev_lookup_lock); 2293 nolock_notfound: 2294 /* 2295 * Destroy the node that is created for synchronization purposes. 2296 */ 2297 sdcmn_err3(("devname_lookup_func: %s with state %d\n", 2298 nm, dv->sdev_state)); 2299 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2300 if (dv->sdev_state == SDEV_INIT) { 2301 if (!rw_tryupgrade(&ddv->sdev_contents)) { 2302 rw_exit(&ddv->sdev_contents); 2303 rw_enter(&ddv->sdev_contents, RW_WRITER); 2304 } 2305 2306 /* 2307 * Node state may have changed during the lock 2308 * changes. Re-check. 2309 */ 2310 if (dv->sdev_state == SDEV_INIT) { 2311 (void) sdev_dirdelete(ddv, dv); 2312 rw_exit(&ddv->sdev_contents); 2313 sdev_lookup_failed(ddv, nm, failed_flags); 2314 *vpp = NULL; 2315 return (ENOENT); 2316 } 2317 } 2318 2319 rw_exit(&ddv->sdev_contents); 2320 SDEV_RELE(dv); 2321 2322 lookup_failed: 2323 sdev_lookup_failed(ddv, nm, failed_flags); 2324 *vpp = NULL; 2325 return (ENOENT); 2326 } 2327 2328 /* 2329 * Given a directory node, mark all nodes beneath as 2330 * STALE, i.e. nodes that don't exist as far as new 2331 * consumers are concerned. Remove them from the 2332 * list of directory entries so that no lookup or 2333 * directory traversal will find them. The node 2334 * not deallocated so existing holds are not affected. 2335 */ 2336 void 2337 sdev_stale(struct sdev_node *ddv) 2338 { 2339 struct sdev_node *dv; 2340 struct vnode *vp; 2341 2342 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2343 2344 rw_enter(&ddv->sdev_contents, RW_WRITER); 2345 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = SDEV_NEXT_ENTRY(ddv, dv)) { 2346 vp = SDEVTOV(dv); 2347 if (vp->v_type == VDIR) 2348 sdev_stale(dv); 2349 2350 sdcmn_err9(("sdev_stale: setting stale %s\n", 2351 dv->sdev_path)); 2352 dv->sdev_flags |= SDEV_STALE; 2353 avl_remove(&ddv->sdev_entries, dv); 2354 } 2355 ddv->sdev_flags |= SDEV_BUILD; 2356 rw_exit(&ddv->sdev_contents); 2357 } 2358 2359 /* 2360 * Given a directory node, clean out all the nodes beneath. 2361 * If expr is specified, clean node with names matching expr. 2362 * If SDEV_ENFORCE is specified in flags, busy nodes are made stale, 2363 * so they are excluded from future lookups. 2364 */ 2365 int 2366 sdev_cleandir(struct sdev_node *ddv, char *expr, uint_t flags) 2367 { 2368 int error = 0; 2369 int busy = 0; 2370 struct vnode *vp; 2371 struct sdev_node *dv, *next = NULL; 2372 int bkstore = 0; 2373 int len = 0; 2374 char *bks_name = NULL; 2375 2376 ASSERT(SDEVTOV(ddv)->v_type == VDIR); 2377 2378 /* 2379 * We try our best to destroy all unused sdev_node's 2380 */ 2381 rw_enter(&ddv->sdev_contents, RW_WRITER); 2382 for (dv = SDEV_FIRST_ENTRY(ddv); dv; dv = next) { 2383 next = SDEV_NEXT_ENTRY(ddv, dv); 2384 vp = SDEVTOV(dv); 2385 2386 if (expr && gmatch(dv->sdev_name, expr) == 0) 2387 continue; 2388 2389 if (vp->v_type == VDIR && 2390 sdev_cleandir(dv, NULL, flags) != 0) { 2391 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2392 dv->sdev_name)); 2393 busy++; 2394 continue; 2395 } 2396 2397 if (vp->v_count > 0 && (flags & SDEV_ENFORCE) == 0) { 2398 sdcmn_err9(("sdev_cleandir: dir %s busy\n", 2399 dv->sdev_name)); 2400 busy++; 2401 continue; 2402 } 2403 2404 /* 2405 * at this point, either dv is not held or SDEV_ENFORCE 2406 * is specified. In either case, dv needs to be deleted 2407 */ 2408 SDEV_HOLD(dv); 2409 2410 bkstore = SDEV_IS_PERSIST(dv) ? 1 : 0; 2411 if (bkstore && (vp->v_type == VDIR)) 2412 bkstore += 1; 2413 2414 if (bkstore) { 2415 len = strlen(dv->sdev_name) + 1; 2416 bks_name = kmem_alloc(len, KM_SLEEP); 2417 bcopy(dv->sdev_name, bks_name, len); 2418 } 2419 2420 error = sdev_dirdelete(ddv, dv); 2421 2422 if (error == EBUSY) { 2423 sdcmn_err9(("sdev_cleandir: dir busy\n")); 2424 busy++; 2425 } 2426 2427 /* take care the backing store clean up */ 2428 if (bkstore && (error == 0)) { 2429 ASSERT(bks_name); 2430 ASSERT(ddv->sdev_attrvp); 2431 2432 if (bkstore == 1) { 2433 error = VOP_REMOVE(ddv->sdev_attrvp, 2434 bks_name, kcred, NULL, 0); 2435 } else if (bkstore == 2) { 2436 error = VOP_RMDIR(ddv->sdev_attrvp, 2437 bks_name, ddv->sdev_attrvp, kcred, NULL, 0); 2438 } 2439 2440 /* do not propagate the backing store errors */ 2441 if (error) { 2442 sdcmn_err9(("sdev_cleandir: backing store" 2443 "not cleaned\n")); 2444 error = 0; 2445 } 2446 2447 bkstore = 0; 2448 kmem_free(bks_name, len); 2449 bks_name = NULL; 2450 len = 0; 2451 } 2452 } 2453 2454 ddv->sdev_flags |= SDEV_BUILD; 2455 rw_exit(&ddv->sdev_contents); 2456 2457 if (busy) { 2458 error = EBUSY; 2459 } 2460 2461 return (error); 2462 } 2463 2464 /* 2465 * a convenient wrapper for readdir() funcs 2466 */ 2467 size_t 2468 add_dir_entry(dirent64_t *de, char *nm, size_t size, ino_t ino, offset_t off) 2469 { 2470 size_t reclen = DIRENT64_RECLEN(strlen(nm)); 2471 if (reclen > size) 2472 return (0); 2473 2474 de->d_ino = (ino64_t)ino; 2475 de->d_off = (off64_t)off + 1; 2476 de->d_reclen = (ushort_t)reclen; 2477 (void) strncpy(de->d_name, nm, DIRENT64_NAMELEN(reclen)); 2478 return (reclen); 2479 } 2480 2481 /* 2482 * sdev_mount service routines 2483 */ 2484 int 2485 sdev_copyin_mountargs(struct mounta *uap, struct sdev_mountargs *args) 2486 { 2487 int error; 2488 2489 if (uap->datalen != sizeof (*args)) 2490 return (EINVAL); 2491 2492 if (error = copyin(uap->dataptr, args, sizeof (*args))) { 2493 cmn_err(CE_WARN, "sdev_copyin_mountargs: can not" 2494 "get user data. error %d\n", error); 2495 return (EFAULT); 2496 } 2497 2498 return (0); 2499 } 2500 2501 #ifdef nextdp 2502 #undef nextdp 2503 #endif 2504 #define nextdp(dp) ((struct dirent64 *) \ 2505 (intptr_t)((char *)(dp) + (dp)->d_reclen)) 2506 2507 /* 2508 * readdir helper func 2509 */ 2510 int 2511 devname_readdir_func(vnode_t *vp, uio_t *uiop, cred_t *cred, int *eofp, 2512 int flags) 2513 { 2514 struct sdev_node *ddv = VTOSDEV(vp); 2515 struct sdev_node *dv; 2516 dirent64_t *dp; 2517 ulong_t outcount = 0; 2518 size_t namelen; 2519 ulong_t alloc_count; 2520 void *outbuf; 2521 struct iovec *iovp; 2522 int error = 0; 2523 size_t reclen; 2524 offset_t diroff; 2525 offset_t soff; 2526 int this_reclen; 2527 int (*vtor)(struct sdev_node *) = NULL; 2528 struct vattr attr; 2529 timestruc_t now; 2530 2531 ASSERT(ddv->sdev_attr || ddv->sdev_attrvp); 2532 ASSERT(RW_READ_HELD(&ddv->sdev_contents)); 2533 2534 if (uiop->uio_loffset >= MAXOFF_T) { 2535 if (eofp) 2536 *eofp = 1; 2537 return (0); 2538 } 2539 2540 if (uiop->uio_iovcnt != 1) 2541 return (EINVAL); 2542 2543 if (vp->v_type != VDIR) 2544 return (ENOTDIR); 2545 2546 if (ddv->sdev_flags & SDEV_VTOR) { 2547 vtor = (int (*)(struct sdev_node *))sdev_get_vtor(ddv); 2548 ASSERT(vtor); 2549 } 2550 2551 if (eofp != NULL) 2552 *eofp = 0; 2553 2554 soff = uiop->uio_loffset; 2555 iovp = uiop->uio_iov; 2556 alloc_count = iovp->iov_len; 2557 dp = outbuf = kmem_alloc(alloc_count, KM_SLEEP); 2558 outcount = 0; 2559 2560 if (ddv->sdev_state == SDEV_ZOMBIE) 2561 goto get_cache; 2562 2563 if (SDEV_IS_GLOBAL(ddv)) { 2564 2565 if ((sdev_boot_state == SDEV_BOOT_STATE_COMPLETE) && 2566 !sdev_reconfig_boot && (flags & SDEV_BROWSE) && 2567 !SDEV_IS_DYNAMIC(ddv) && !SDEV_IS_NO_NCACHE(ddv) && 2568 ((moddebug & MODDEBUG_FINI_EBUSY) == 0) && 2569 !DEVNAME_DEVFSADM_HAS_RUN(devfsadm_state) && 2570 !DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state) && 2571 !sdev_reconfig_disable) { 2572 /* 2573 * invoking "devfsadm" to do system device reconfig 2574 */ 2575 mutex_enter(&ddv->sdev_lookup_lock); 2576 SDEV_BLOCK_OTHERS(ddv, 2577 (SDEV_READDIR|SDEV_LGWAITING)); 2578 mutex_exit(&ddv->sdev_lookup_lock); 2579 2580 sdcmn_err8(("readdir of %s by %s: reconfig\n", 2581 ddv->sdev_path, curproc->p_user.u_comm)); 2582 if (sdev_reconfig_verbose) { 2583 cmn_err(CE_CONT, 2584 "?readdir of %s by %s: reconfig\n", 2585 ddv->sdev_path, curproc->p_user.u_comm); 2586 } 2587 2588 sdev_devfsadmd_thread(ddv, NULL, kcred); 2589 } else if (DEVNAME_DEVFSADM_IS_RUNNING(devfsadm_state)) { 2590 /* 2591 * compensate the "ls" started later than "devfsadm" 2592 */ 2593 mutex_enter(&ddv->sdev_lookup_lock); 2594 SDEV_BLOCK_OTHERS(ddv, (SDEV_READDIR|SDEV_LGWAITING)); 2595 mutex_exit(&ddv->sdev_lookup_lock); 2596 } 2597 2598 /* 2599 * release the contents lock so that 2600 * the cache may be updated by devfsadmd 2601 */ 2602 rw_exit(&ddv->sdev_contents); 2603 mutex_enter(&ddv->sdev_lookup_lock); 2604 if (SDEV_IS_READDIR(ddv)) 2605 (void) sdev_wait4lookup(ddv, SDEV_READDIR); 2606 mutex_exit(&ddv->sdev_lookup_lock); 2607 rw_enter(&ddv->sdev_contents, RW_READER); 2608 2609 sdcmn_err4(("readdir of directory %s by %s\n", 2610 ddv->sdev_name, curproc->p_user.u_comm)); 2611 if (ddv->sdev_flags & SDEV_BUILD) { 2612 if (SDEV_IS_PERSIST(ddv)) { 2613 error = sdev_filldir_from_store(ddv, 2614 alloc_count, cred); 2615 } 2616 ddv->sdev_flags &= ~SDEV_BUILD; 2617 } 2618 } 2619 2620 get_cache: 2621 /* handle "." and ".." */ 2622 diroff = 0; 2623 if (soff == 0) { 2624 /* first time */ 2625 this_reclen = DIRENT64_RECLEN(1); 2626 if (alloc_count < this_reclen) { 2627 error = EINVAL; 2628 goto done; 2629 } 2630 2631 dp->d_ino = (ino64_t)ddv->sdev_ino; 2632 dp->d_off = (off64_t)1; 2633 dp->d_reclen = (ushort_t)this_reclen; 2634 2635 (void) strncpy(dp->d_name, ".", 2636 DIRENT64_NAMELEN(this_reclen)); 2637 outcount += dp->d_reclen; 2638 dp = nextdp(dp); 2639 } 2640 2641 diroff++; 2642 if (soff <= 1) { 2643 this_reclen = DIRENT64_RECLEN(2); 2644 if (alloc_count < outcount + this_reclen) { 2645 error = EINVAL; 2646 goto done; 2647 } 2648 2649 dp->d_reclen = (ushort_t)this_reclen; 2650 dp->d_ino = (ino64_t)ddv->sdev_dotdot->sdev_ino; 2651 dp->d_off = (off64_t)2; 2652 2653 (void) strncpy(dp->d_name, "..", 2654 DIRENT64_NAMELEN(this_reclen)); 2655 outcount += dp->d_reclen; 2656 2657 dp = nextdp(dp); 2658 } 2659 2660 2661 /* gets the cache */ 2662 diroff++; 2663 for (dv = SDEV_FIRST_ENTRY(ddv); dv; 2664 dv = SDEV_NEXT_ENTRY(ddv, dv), diroff++) { 2665 sdcmn_err3(("sdev_readdir: diroff %lld soff %lld for '%s' \n", 2666 diroff, soff, dv->sdev_name)); 2667 2668 /* bypassing pre-matured nodes */ 2669 if (diroff < soff || (dv->sdev_state != SDEV_READY)) { 2670 sdcmn_err3(("sdev_readdir: pre-mature node " 2671 "%s\n", dv->sdev_name)); 2672 continue; 2673 } 2674 2675 /* 2676 * Check validity of node 2677 */ 2678 if (vtor) { 2679 switch (vtor(dv)) { 2680 case SDEV_VTOR_VALID: 2681 break; 2682 case SDEV_VTOR_INVALID: 2683 case SDEV_VTOR_SKIP: 2684 continue; 2685 default: 2686 cmn_err(CE_PANIC, 2687 "dev fs: validator failed: %s(%p)\n", 2688 dv->sdev_name, (void *)dv); 2689 break; 2690 /*NOTREACHED*/ 2691 } 2692 } 2693 2694 namelen = strlen(dv->sdev_name); 2695 reclen = DIRENT64_RECLEN(namelen); 2696 if (outcount + reclen > alloc_count) { 2697 goto full; 2698 } 2699 dp->d_reclen = (ushort_t)reclen; 2700 dp->d_ino = (ino64_t)dv->sdev_ino; 2701 dp->d_off = (off64_t)diroff + 1; 2702 (void) strncpy(dp->d_name, dv->sdev_name, 2703 DIRENT64_NAMELEN(reclen)); 2704 outcount += reclen; 2705 dp = nextdp(dp); 2706 } 2707 2708 full: 2709 sdcmn_err4(("sdev_readdir: moving %lu bytes: " 2710 "diroff %lld, soff %lld, dv %p\n", outcount, diroff, soff, 2711 (void *)dv)); 2712 2713 if (outcount) 2714 error = uiomove(outbuf, outcount, UIO_READ, uiop); 2715 2716 if (!error) { 2717 uiop->uio_loffset = diroff; 2718 if (eofp) 2719 *eofp = dv ? 0 : 1; 2720 } 2721 2722 2723 if (ddv->sdev_attrvp) { 2724 gethrestime(&now); 2725 attr.va_ctime = now; 2726 attr.va_atime = now; 2727 attr.va_mask = AT_CTIME|AT_ATIME; 2728 2729 (void) VOP_SETATTR(ddv->sdev_attrvp, &attr, 0, kcred, NULL); 2730 } 2731 done: 2732 kmem_free(outbuf, alloc_count); 2733 return (error); 2734 } 2735 2736 static int 2737 sdev_modctl_lookup(const char *path, vnode_t **r_vp) 2738 { 2739 vnode_t *vp; 2740 vnode_t *cvp; 2741 struct sdev_node *svp; 2742 char *nm; 2743 struct pathname pn; 2744 int error; 2745 int persisted = 0; 2746 2747 ASSERT(INGLOBALZONE(curproc)); 2748 2749 if (error = pn_get((char *)path, UIO_SYSSPACE, &pn)) 2750 return (error); 2751 nm = kmem_alloc(MAXNAMELEN, KM_SLEEP); 2752 2753 vp = rootdir; 2754 VN_HOLD(vp); 2755 2756 while (pn_pathleft(&pn)) { 2757 ASSERT(vp->v_type == VDIR || vp->v_type == VLNK); 2758 (void) pn_getcomponent(&pn, nm); 2759 2760 /* 2761 * Deal with the .. special case where we may be 2762 * traversing up across a mount point, to the 2763 * root of this filesystem or global root. 2764 */ 2765 if (nm[0] == '.' && nm[1] == '.' && nm[2] == 0) { 2766 checkforroot: 2767 if (VN_CMP(vp, rootdir)) { 2768 nm[1] = 0; 2769 } else if (vp->v_flag & VROOT) { 2770 vfs_t *vfsp; 2771 cvp = vp; 2772 vfsp = cvp->v_vfsp; 2773 vfs_rlock_wait(vfsp); 2774 vp = cvp->v_vfsp->vfs_vnodecovered; 2775 if (vp == NULL || 2776 (cvp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) { 2777 vfs_unlock(vfsp); 2778 VN_RELE(cvp); 2779 error = EIO; 2780 break; 2781 } 2782 VN_HOLD(vp); 2783 vfs_unlock(vfsp); 2784 VN_RELE(cvp); 2785 cvp = NULL; 2786 goto checkforroot; 2787 } 2788 } 2789 2790 error = VOP_LOOKUP(vp, nm, &cvp, NULL, 0, NULL, kcred, NULL, 2791 NULL, NULL); 2792 if (error) { 2793 VN_RELE(vp); 2794 break; 2795 } 2796 2797 /* traverse mount points encountered on our journey */ 2798 if (vn_ismntpt(cvp) && (error = traverse(&cvp)) != 0) { 2799 VN_RELE(vp); 2800 VN_RELE(cvp); 2801 break; 2802 } 2803 2804 /* 2805 * symbolic link, can be either relative and absolute 2806 */ 2807 if ((cvp->v_type == VLNK) && pn_pathleft(&pn)) { 2808 struct pathname linkpath; 2809 pn_alloc(&linkpath); 2810 if (error = pn_getsymlink(cvp, &linkpath, kcred)) { 2811 pn_free(&linkpath); 2812 break; 2813 } 2814 if (pn_pathleft(&linkpath) == 0) 2815 (void) pn_set(&linkpath, "."); 2816 error = pn_insert(&pn, &linkpath, strlen(nm)); 2817 pn_free(&linkpath); 2818 if (pn.pn_pathlen == 0) { 2819 VN_RELE(vp); 2820 return (ENOENT); 2821 } 2822 if (pn.pn_path[0] == '/') { 2823 pn_skipslash(&pn); 2824 VN_RELE(vp); 2825 VN_RELE(cvp); 2826 vp = rootdir; 2827 VN_HOLD(vp); 2828 } else { 2829 VN_RELE(cvp); 2830 } 2831 continue; 2832 } 2833 2834 VN_RELE(vp); 2835 2836 /* 2837 * Direct the operation to the persisting filesystem 2838 * underlying /dev. Bail if we encounter a 2839 * non-persistent dev entity here. 2840 */ 2841 if (cvp->v_vfsp->vfs_fstype == devtype) { 2842 2843 if ((VTOSDEV(cvp)->sdev_flags & SDEV_PERSIST) == 0) { 2844 error = ENOENT; 2845 VN_RELE(cvp); 2846 break; 2847 } 2848 2849 if (VTOSDEV(cvp) == NULL) { 2850 error = ENOENT; 2851 VN_RELE(cvp); 2852 break; 2853 } 2854 svp = VTOSDEV(cvp); 2855 if ((vp = svp->sdev_attrvp) == NULL) { 2856 error = ENOENT; 2857 VN_RELE(cvp); 2858 break; 2859 } 2860 persisted = 1; 2861 VN_HOLD(vp); 2862 VN_RELE(cvp); 2863 cvp = vp; 2864 } 2865 2866 vp = cvp; 2867 pn_skipslash(&pn); 2868 } 2869 2870 kmem_free(nm, MAXNAMELEN); 2871 pn_free(&pn); 2872 2873 if (error) 2874 return (error); 2875 2876 /* 2877 * Only return persisted nodes in the filesystem underlying /dev. 2878 */ 2879 if (!persisted) { 2880 VN_RELE(vp); 2881 return (ENOENT); 2882 } 2883 2884 *r_vp = vp; 2885 return (0); 2886 } 2887 2888 int 2889 sdev_modctl_readdir(const char *dir, char ***dirlistp, 2890 int *npathsp, int *npathsp_alloc, int checking_empty) 2891 { 2892 char **pathlist = NULL; 2893 char **newlist = NULL; 2894 int npaths = 0; 2895 int npaths_alloc = 0; 2896 dirent64_t *dbuf = NULL; 2897 int n; 2898 char *s; 2899 int error; 2900 vnode_t *vp; 2901 int eof; 2902 struct iovec iov; 2903 struct uio uio; 2904 struct dirent64 *dp; 2905 size_t dlen; 2906 size_t dbuflen; 2907 int ndirents = 64; 2908 char *nm; 2909 2910 error = sdev_modctl_lookup(dir, &vp); 2911 sdcmn_err11(("modctl readdir: %s by %s: %s\n", 2912 dir, curproc->p_user.u_comm, 2913 (error == 0) ? "ok" : "failed")); 2914 if (error) 2915 return (error); 2916 2917 dlen = ndirents * (sizeof (*dbuf)); 2918 dbuf = kmem_alloc(dlen, KM_SLEEP); 2919 2920 uio.uio_iov = &iov; 2921 uio.uio_iovcnt = 1; 2922 uio.uio_segflg = UIO_SYSSPACE; 2923 uio.uio_fmode = 0; 2924 uio.uio_extflg = UIO_COPY_CACHED; 2925 uio.uio_loffset = 0; 2926 uio.uio_llimit = MAXOFFSET_T; 2927 2928 eof = 0; 2929 error = 0; 2930 while (!error && !eof) { 2931 uio.uio_resid = dlen; 2932 iov.iov_base = (char *)dbuf; 2933 iov.iov_len = dlen; 2934 2935 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 2936 error = VOP_READDIR(vp, &uio, kcred, &eof, NULL, 0); 2937 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 2938 2939 dbuflen = dlen - uio.uio_resid; 2940 2941 if (error || dbuflen == 0) 2942 break; 2943 2944 for (dp = dbuf; ((intptr_t)dp < (intptr_t)dbuf + dbuflen); 2945 dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen)) { 2946 2947 nm = dp->d_name; 2948 2949 if (strcmp(nm, ".") == 0 || strcmp(nm, "..") == 0) 2950 continue; 2951 if (npaths == npaths_alloc) { 2952 npaths_alloc += 64; 2953 newlist = (char **) 2954 kmem_zalloc((npaths_alloc + 1) * 2955 sizeof (char *), KM_SLEEP); 2956 if (pathlist) { 2957 bcopy(pathlist, newlist, 2958 npaths * sizeof (char *)); 2959 kmem_free(pathlist, 2960 (npaths + 1) * sizeof (char *)); 2961 } 2962 pathlist = newlist; 2963 } 2964 n = strlen(nm) + 1; 2965 s = kmem_alloc(n, KM_SLEEP); 2966 bcopy(nm, s, n); 2967 pathlist[npaths++] = s; 2968 sdcmn_err11((" %s/%s\n", dir, s)); 2969 2970 /* if checking empty, one entry is as good as many */ 2971 if (checking_empty) { 2972 eof = 1; 2973 break; 2974 } 2975 } 2976 } 2977 2978 exit: 2979 VN_RELE(vp); 2980 2981 if (dbuf) 2982 kmem_free(dbuf, dlen); 2983 2984 if (error) 2985 return (error); 2986 2987 *dirlistp = pathlist; 2988 *npathsp = npaths; 2989 *npathsp_alloc = npaths_alloc; 2990 2991 return (0); 2992 } 2993 2994 void 2995 sdev_modctl_readdir_free(char **pathlist, int npaths, int npaths_alloc) 2996 { 2997 int i, n; 2998 2999 for (i = 0; i < npaths; i++) { 3000 n = strlen(pathlist[i]) + 1; 3001 kmem_free(pathlist[i], n); 3002 } 3003 3004 kmem_free(pathlist, (npaths_alloc + 1) * sizeof (char *)); 3005 } 3006 3007 int 3008 sdev_modctl_devexists(const char *path) 3009 { 3010 vnode_t *vp; 3011 int error; 3012 3013 error = sdev_modctl_lookup(path, &vp); 3014 sdcmn_err11(("modctl dev exists: %s by %s: %s\n", 3015 path, curproc->p_user.u_comm, 3016 (error == 0) ? "ok" : "failed")); 3017 if (error == 0) 3018 VN_RELE(vp); 3019 3020 return (error); 3021 } 3022 3023 extern int sdev_vnodeops_tbl_size; 3024 3025 /* 3026 * construct a new template with overrides from vtab 3027 */ 3028 static fs_operation_def_t * 3029 sdev_merge_vtab(const fs_operation_def_t tab[]) 3030 { 3031 fs_operation_def_t *new; 3032 const fs_operation_def_t *tab_entry; 3033 3034 /* make a copy of standard vnode ops table */ 3035 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP); 3036 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size); 3037 3038 /* replace the overrides from tab */ 3039 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) { 3040 fs_operation_def_t *std_entry = new; 3041 while (std_entry->name) { 3042 if (strcmp(tab_entry->name, std_entry->name) == 0) { 3043 std_entry->func = tab_entry->func; 3044 break; 3045 } 3046 std_entry++; 3047 } 3048 if (std_entry->name == NULL) 3049 cmn_err(CE_NOTE, "sdev_merge_vtab: entry %s unused.", 3050 tab_entry->name); 3051 } 3052 3053 return (new); 3054 } 3055 3056 /* free memory allocated by sdev_merge_vtab */ 3057 static void 3058 sdev_free_vtab(fs_operation_def_t *new) 3059 { 3060 kmem_free(new, sdev_vnodeops_tbl_size); 3061 } 3062 3063 /* 3064 * a generic setattr() function 3065 * 3066 * note: flags only supports AT_UID and AT_GID. 3067 * Future enhancements can be done for other types, e.g. AT_MODE 3068 */ 3069 int 3070 devname_setattr_func(struct vnode *vp, struct vattr *vap, int flags, 3071 struct cred *cred, int (*callback)(struct sdev_node *, struct vattr *, 3072 int), int protocol) 3073 { 3074 struct sdev_node *dv = VTOSDEV(vp); 3075 struct sdev_node *parent = dv->sdev_dotdot; 3076 struct vattr *get; 3077 uint_t mask = vap->va_mask; 3078 int error; 3079 3080 /* some sanity checks */ 3081 if (vap->va_mask & AT_NOSET) 3082 return (EINVAL); 3083 3084 if (vap->va_mask & AT_SIZE) { 3085 if (vp->v_type == VDIR) { 3086 return (EISDIR); 3087 } 3088 } 3089 3090 /* no need to set attribute, but do not fail either */ 3091 ASSERT(parent); 3092 rw_enter(&parent->sdev_contents, RW_READER); 3093 if (dv->sdev_state == SDEV_ZOMBIE) { 3094 rw_exit(&parent->sdev_contents); 3095 return (0); 3096 } 3097 3098 /* If backing store exists, just set it. */ 3099 if (dv->sdev_attrvp) { 3100 rw_exit(&parent->sdev_contents); 3101 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3102 } 3103 3104 /* 3105 * Otherwise, for nodes with the persistence attribute, create it. 3106 */ 3107 ASSERT(dv->sdev_attr); 3108 if (SDEV_IS_PERSIST(dv) || 3109 ((vap->va_mask & ~AT_TIMES) != 0 && !SDEV_IS_DYNAMIC(dv))) { 3110 sdev_vattr_merge(dv, vap); 3111 rw_enter(&dv->sdev_contents, RW_WRITER); 3112 error = sdev_shadow_node(dv, cred); 3113 rw_exit(&dv->sdev_contents); 3114 rw_exit(&parent->sdev_contents); 3115 3116 if (error) 3117 return (error); 3118 return (VOP_SETATTR(dv->sdev_attrvp, vap, flags, cred, NULL)); 3119 } 3120 3121 3122 /* 3123 * sdev_attr was allocated in sdev_mknode 3124 */ 3125 rw_enter(&dv->sdev_contents, RW_WRITER); 3126 error = secpolicy_vnode_setattr(cred, vp, vap, 3127 dv->sdev_attr, flags, sdev_unlocked_access, dv); 3128 if (error) { 3129 rw_exit(&dv->sdev_contents); 3130 rw_exit(&parent->sdev_contents); 3131 return (error); 3132 } 3133 3134 get = dv->sdev_attr; 3135 if (mask & AT_MODE) { 3136 get->va_mode &= S_IFMT; 3137 get->va_mode |= vap->va_mode & ~S_IFMT; 3138 } 3139 3140 if ((mask & AT_UID) || (mask & AT_GID)) { 3141 if (mask & AT_UID) 3142 get->va_uid = vap->va_uid; 3143 if (mask & AT_GID) 3144 get->va_gid = vap->va_gid; 3145 /* 3146 * a callback must be provided if the protocol is set 3147 */ 3148 if ((protocol & AT_UID) || (protocol & AT_GID)) { 3149 ASSERT(callback); 3150 error = callback(dv, get, protocol); 3151 if (error) { 3152 rw_exit(&dv->sdev_contents); 3153 rw_exit(&parent->sdev_contents); 3154 return (error); 3155 } 3156 } 3157 } 3158 3159 if (mask & AT_ATIME) 3160 get->va_atime = vap->va_atime; 3161 if (mask & AT_MTIME) 3162 get->va_mtime = vap->va_mtime; 3163 if (mask & (AT_MODE | AT_UID | AT_GID | AT_CTIME)) { 3164 gethrestime(&get->va_ctime); 3165 } 3166 3167 sdev_vattr_merge(dv, get); 3168 rw_exit(&dv->sdev_contents); 3169 rw_exit(&parent->sdev_contents); 3170 return (0); 3171 } 3172 3173 /* 3174 * a generic inactive() function 3175 */ 3176 /*ARGSUSED*/ 3177 void 3178 devname_inactive_func(struct vnode *vp, struct cred *cred, 3179 void (*callback)(struct vnode *)) 3180 { 3181 int clean; 3182 struct sdev_node *dv = VTOSDEV(vp); 3183 struct sdev_node *ddv = dv->sdev_dotdot; 3184 int state; 3185 3186 rw_enter(&ddv->sdev_contents, RW_WRITER); 3187 state = dv->sdev_state; 3188 3189 mutex_enter(&vp->v_lock); 3190 ASSERT(vp->v_count >= 1); 3191 3192 if (vp->v_count == 1 && callback != NULL) 3193 callback(vp); 3194 3195 clean = (vp->v_count == 1) && (state == SDEV_ZOMBIE); 3196 3197 /* 3198 * last ref count on the ZOMBIE node is released. 3199 * clean up the sdev_node, and 3200 * release the hold on the backing store node so that 3201 * the ZOMBIE backing stores also cleaned out. 3202 */ 3203 if (clean) { 3204 ASSERT(ddv); 3205 3206 ddv->sdev_nlink--; 3207 if (vp->v_type == VDIR) { 3208 dv->sdev_nlink--; 3209 } 3210 if ((dv->sdev_flags & SDEV_STALE) == 0) 3211 avl_remove(&ddv->sdev_entries, dv); 3212 dv->sdev_nlink--; 3213 --vp->v_count; 3214 mutex_exit(&vp->v_lock); 3215 sdev_nodedestroy(dv, 0); 3216 } else { 3217 --vp->v_count; 3218 mutex_exit(&vp->v_lock); 3219 } 3220 rw_exit(&ddv->sdev_contents); 3221 } 3222