1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This is the device filesystem. 30 * 31 * It is a combination of a namer to drive autoconfiguration, 32 * plus the access methods for the device drivers of the system. 33 * 34 * The prototype is fairly dependent on specfs for the latter part 35 * of its implementation, though a final version would integrate the two. 36 */ 37 #include <sys/types.h> 38 #include <sys/param.h> 39 #include <sys/sysmacros.h> 40 #include <sys/systm.h> 41 #include <sys/kmem.h> 42 #include <sys/time.h> 43 #include <sys/pathname.h> 44 #include <sys/vfs.h> 45 #include <sys/vfs_opreg.h> 46 #include <sys/vnode.h> 47 #include <sys/stat.h> 48 #include <sys/uio.h> 49 #include <sys/stat.h> 50 #include <sys/errno.h> 51 #include <sys/cmn_err.h> 52 #include <sys/cred.h> 53 #include <sys/statvfs.h> 54 #include <sys/mount.h> 55 #include <sys/debug.h> 56 #include <sys/modctl.h> 57 #include <fs/fs_subr.h> 58 #include <sys/fs/dv_node.h> 59 #include <sys/fs/snode.h> 60 #include <sys/sunndi.h> 61 #include <sys/policy.h> 62 #include <sys/sunmdi.h> 63 64 /* 65 * devfs vfs operations. 66 */ 67 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *, 68 struct cred *); 69 static int devfs_unmount(struct vfs *, int, struct cred *); 70 static int devfs_root(struct vfs *, struct vnode **); 71 static int devfs_statvfs(struct vfs *, struct statvfs64 *); 72 static int devfs_mountroot(struct vfs *, enum whymountroot); 73 74 static int devfsinit(int, char *); 75 76 static vfsdef_t devfs_vfssw = { 77 VFSDEF_VERSION, 78 "devfs", /* type name string */ 79 devfsinit, /* init routine */ 80 0, /* flags */ 81 NULL /* mount options table prototype */ 82 }; 83 84 static kmutex_t devfs_lock; /* protects global data */ 85 static int devfstype; /* fstype */ 86 static dev_t devfsdev; /* the fictious 'device' we live on */ 87 static struct devfs_data *devfs_mntinfo; /* linked list of instances */ 88 89 /* 90 * Module linkage information 91 */ 92 static struct modlfs modlfs = { 93 &mod_fsops, "devices filesystem %I%", &devfs_vfssw 94 }; 95 96 static struct modlinkage modlinkage = { 97 MODREV_1, (void *)&modlfs, NULL 98 }; 99 100 int 101 _init(void) 102 { 103 int e; 104 105 mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL); 106 dv_node_cache_init(); 107 if ((e = mod_install(&modlinkage)) != 0) { 108 dv_node_cache_fini(); 109 mutex_destroy(&devfs_lock); 110 return (e); 111 } 112 dcmn_err(("devfs loaded\n")); 113 return (0); 114 } 115 116 int 117 _fini(void) 118 { 119 return (EBUSY); 120 } 121 122 int 123 _info(struct modinfo *modinfop) 124 { 125 return (mod_info(&modlinkage, modinfop)); 126 } 127 128 /*ARGSUSED1*/ 129 static int 130 devfsinit(int fstype, char *name) 131 { 132 static const fs_operation_def_t devfs_vfsops_template[] = { 133 VFSNAME_MOUNT, { .vfs_mount = devfs_mount }, 134 VFSNAME_UNMOUNT, { .vfs_unmount = devfs_unmount }, 135 VFSNAME_ROOT, { .vfs_root = devfs_root }, 136 VFSNAME_STATVFS, { .vfs_statvfs = devfs_statvfs }, 137 VFSNAME_SYNC, { .vfs_sync = fs_sync }, 138 VFSNAME_MOUNTROOT, { .vfs_mountroot = devfs_mountroot }, 139 NULL, NULL 140 }; 141 int error; 142 int dev; 143 extern major_t getudev(void); /* gack - what a function */ 144 145 devfstype = fstype; 146 /* 147 * Associate VFS ops vector with this fstype 148 */ 149 error = vfs_setfsops(fstype, devfs_vfsops_template, NULL); 150 if (error != 0) { 151 cmn_err(CE_WARN, "devfsinit: bad vfs ops template"); 152 return (error); 153 } 154 155 error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops); 156 if (error != 0) { 157 (void) vfs_freevfsops_by_type(fstype); 158 cmn_err(CE_WARN, "devfsinit: bad vnode ops template"); 159 return (error); 160 } 161 162 /* 163 * Invent a dev_t (sigh). 164 */ 165 if ((dev = getudev()) == DDI_MAJOR_T_NONE) { 166 cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name); 167 dev = 0; 168 } 169 devfsdev = makedevice(dev, 0); 170 171 return (0); 172 } 173 174 /* 175 * The name of the mount point and the name of the attribute 176 * filesystem are passed down from userland for now. 177 */ 178 static int 179 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, 180 struct cred *cr) 181 { 182 struct devfs_data *devfs_data; 183 struct vnode *avp; 184 struct dv_node *dv; 185 struct vattr va; 186 187 dcmn_err(("devfs_mount\n")); 188 189 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 190 return (EPERM); 191 192 /* 193 * check that the mount point is sane 194 */ 195 if (mvp->v_type != VDIR) 196 return (ENOTDIR); 197 198 ASSERT(uap->flags & MS_SYSSPACE); 199 /* 200 * Devfs can only be mounted from kernel during boot. 201 * avp is the existing /devices, the same as the mount point. 202 */ 203 avp = mvp; 204 205 /* 206 * Create and initialize the vfs-private data. 207 * This includes a hand-crafted root vnode (we build 208 * this here mostly so that traverse() doesn't sleep 209 * in VFS_ROOT()). 210 */ 211 mutex_enter(&devfs_lock); 212 ASSERT(devfs_mntinfo == NULL); 213 dv = dv_mkroot(vfsp, devfsdev); 214 dv->dv_attrvp = avp; /* attribute root vp */ 215 216 ASSERT(dv == dv->dv_dotdot); 217 218 devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP); 219 devfs_data->devfs_vfsp = vfsp; 220 devfs_data->devfs_root = dv; 221 222 vfsp->vfs_data = (caddr_t)devfs_data; 223 vfsp->vfs_fstype = devfstype; 224 vfsp->vfs_dev = devfsdev; 225 vfsp->vfs_bsize = DEV_BSIZE; 226 vfsp->vfs_mtime = ddi_get_time(); 227 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype); 228 229 /* We're there. */ 230 devfs_mntinfo = devfs_data; 231 mutex_exit(&devfs_lock); 232 233 va.va_mask = AT_ATIME|AT_MTIME; 234 gethrestime(&va.va_atime); 235 gethrestime(&va.va_mtime); 236 (void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL); 237 return (0); 238 } 239 240 241 /* 242 * We never unmount devfs in a real production system. 243 */ 244 /*ARGSUSED*/ 245 static int 246 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr) 247 { 248 return (EBUSY); 249 } 250 251 /* 252 * return root vnode for given vfs 253 */ 254 static int 255 devfs_root(struct vfs *vfsp, struct vnode **vpp) 256 { 257 dcmn_err(("devfs_root\n")); 258 *vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root); 259 VN_HOLD(*vpp); 260 return (0); 261 } 262 263 /* 264 * return 'generic superblock' information to userland. 265 * 266 * not much that we can usefully admit to here 267 */ 268 static int 269 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) 270 { 271 extern kmem_cache_t *dv_node_cache; 272 273 dev32_t d32; 274 275 dcmn_err(("devfs_statvfs\n")); 276 bzero(sbp, sizeof (*sbp)); 277 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize; 278 /* 279 * We could compute the number of devfsnodes here .. but since 280 * it's dynamic anyway, it's not clear how useful this is. 281 */ 282 sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc"); 283 284 /* no illusions that free/avail files is relevant to devfs */ 285 sbp->f_ffree = 0; 286 sbp->f_favail = 0; 287 288 /* no illusions that blocks are relevant to devfs */ 289 sbp->f_bfree = 0; 290 sbp->f_bavail = 0; 291 sbp->f_blocks = 0; 292 293 (void) cmpldev(&d32, vfsp->vfs_dev); 294 sbp->f_fsid = d32; 295 (void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name); 296 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 297 sbp->f_namemax = MAXNAMELEN - 1; 298 (void) strcpy(sbp->f_fstr, "devices"); 299 300 return (0); 301 } 302 303 /* 304 * devfs always mount after root is mounted, so this should never 305 * be invoked. 306 */ 307 /*ARGSUSED*/ 308 static int 309 devfs_mountroot(struct vfs *vfsp, enum whymountroot why) 310 { 311 dcmn_err(("devfs_mountroot\n")); 312 313 return (EINVAL); 314 } 315 316 struct dv_node * 317 devfs_dip_to_dvnode(dev_info_t *dip) 318 { 319 char *dirpath; 320 struct vnode *dirvp; 321 322 ASSERT(dip != NULL); 323 324 /* no-op if devfs not mounted yet */ 325 if (devfs_mntinfo == NULL) 326 return (NULL); 327 328 /* 329 * The lookupname below only looks up cached dv_nodes 330 * because devfs_clean_key is set in thread specific data. 331 */ 332 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 333 (void) ddi_pathname(dip, dirpath); 334 if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) { 335 dcmn_err(("directory %s not found\n", dirpath)); 336 kmem_free(dirpath, MAXPATHLEN); 337 return (NULL); 338 } 339 340 kmem_free(dirpath, MAXPATHLEN); 341 return (VTODV(dirvp)); 342 } 343 344 /* 345 * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root 346 * and not a vHCI we also need to clean any vHCI branches because they 347 * may contain pHCI nodes. A detach_node() of a pHCI will fail if its 348 * mdi_devi_offline() fails, and the mdi_devi_offline() of the last 349 * pHCI will fail unless an ndi_devi_offline() of the Client nodes under 350 * the vHCI is successful - which requires a clean vHCI branch to removed 351 * the devi_refs associated with devfs vnodes. 352 */ 353 static int 354 devfs_clean_vhci(dev_info_t *dip, void *args) 355 { 356 struct dv_node *dvp; 357 uint_t flags = (uint_t)(uintptr_t)args; 358 359 (void) tsd_set(devfs_clean_key, (void *)1); 360 dvp = devfs_dip_to_dvnode(dip); 361 if (dvp) { 362 (void) dv_cleandir(dvp, NULL, flags); 363 VN_RELE(DVTOV(dvp)); 364 } 365 (void) tsd_set(devfs_clean_key, NULL); 366 return (DDI_WALK_CONTINUE); 367 } 368 369 /* 370 * devfs_clean() 371 * 372 * Destroy unreferenced dv_node's and detach devices. 373 * 374 * devfs_clean will try its best to clean up unused nodes. It is 375 * no longer valid to assume that just because devfs_clean fails, 376 * the device is not removable. This is because device contracts 377 * can result in userland processes releasing a device during the 378 * device offline process in the kernel. Thus it is no longer 379 * correct to fail an offline just because devfs_clean finds 380 * referenced dv_nodes. To enforce this, devfs_clean() always 381 * returns success i.e. 0. 382 * 383 * devfs_clean() may return before removing all possible nodes if 384 * we cannot acquire locks in areas of the code where potential for 385 * deadlock exists (see comments in dv_find() and dv_cleandir() for 386 * examples of this). 387 * 388 * devfs caches unreferenced dv_node to speed by the performance 389 * of ls, find, etc. devfs_clean() is invoked to cleanup cached 390 * dv_nodes to reclaim memory as well as to facilitate device 391 * removal (dv_node reference devinfo nodes, which prevents driver 392 * detach). 393 * 394 * If a shell parks in a /devices directory, the dv_node will be 395 * held, preventing the corresponding device to be detached. 396 * This would be a denial of service against DR. To prevent this, 397 * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag. 398 * The dv_cleandir() implementation does the right thing to ensure 399 * successful DR. 400 */ 401 int 402 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags) 403 { 404 struct dv_node *dvp; 405 406 dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x", 407 (void *)dip, flags)); 408 409 /* avoid recursion back into the device tree */ 410 (void) tsd_set(devfs_clean_key, (void *)1); 411 dvp = devfs_dip_to_dvnode(dip); 412 if (dvp == NULL) { 413 (void) tsd_set(devfs_clean_key, NULL); 414 return (0); 415 } 416 417 (void) dv_cleandir(dvp, devnm, flags); 418 (void) tsd_set(devfs_clean_key, NULL); 419 VN_RELE(DVTOV(dvp)); 420 421 /* 422 * If we are doing a DV_CLEAN_FORCE, and we did not start at the 423 * root, and we did not start at a vHCI node then clean vHCI 424 * branches too. Failure to clean vHCI branch does not cause EBUSY. 425 * 426 * Also, to accommodate nexus callers that clean 'self' to DR 'child' 427 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch 428 * above fails - this prevents a busy DR 'child' sibling from causing 429 * the DR of 'child' to fail because a vHCI branch was not cleaned. 430 */ 431 if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) && 432 (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) { 433 /* 434 * NOTE: for backport the following is recommended 435 * (void) devfs_clean_vhci(scsi_vhci_dip, 436 * (void *)(uintptr_t)flags); 437 */ 438 mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags); 439 } 440 441 return (0); 442 } 443 444 /* 445 * lookup a devfs relative pathname, returning held vnodes for the final 446 * component and the containing directory (if requested). 447 * 448 * NOTE: We can't use lookupname because this would use the current 449 * processes credentials (CRED) in the call lookuppnvp instead 450 * of kcred. It also does not give you the flexibility so 451 * specify the directory to start the resolution in (devicesdir). 452 */ 453 int 454 devfs_lookupname( 455 char *pathname, /* user pathname */ 456 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 457 vnode_t **compvpp) /* ret for ptr to component vnode */ 458 { 459 struct pathname pn; 460 int error; 461 462 ASSERT(devicesdir); /* devfs must be initialized */ 463 ASSERT(pathname); /* must have some path */ 464 465 if (error = pn_get(pathname, UIO_SYSSPACE, &pn)) 466 return (error); 467 468 /* make the path relative to /devices. */ 469 pn_skipslash(&pn); 470 if (pn_pathleft(&pn) == 0) { 471 /* all we had was "\0" or "/" (which skipslash skiped) */ 472 if (dirvpp) 473 *dirvpp = NULL; 474 if (compvpp) { 475 VN_HOLD(devicesdir); 476 *compvpp = devicesdir; 477 } 478 } else { 479 /* 480 * Use devfs lookup to resolve pathname to the vnode for 481 * the device via relative lookup in devfs. Extra holds for 482 * using devicesdir as directory we are searching and for 483 * being our root without being == rootdir. 484 */ 485 VN_HOLD(devicesdir); 486 VN_HOLD(devicesdir); 487 error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp, 488 devicesdir, devicesdir, kcred); 489 } 490 pn_free(&pn); 491 492 return (error); 493 } 494 495 /* 496 * Given a devfs path (without the /devices prefix), walk 497 * the dv_node sub-tree rooted at the path. 498 */ 499 int 500 devfs_walk( 501 char *path, 502 void (*callback)(struct dv_node *, void *), 503 void *arg) 504 { 505 char *dirpath, *devnm; 506 struct vnode *dirvp; 507 508 ASSERT(path && callback); 509 510 if (*path != '/' || devfs_mntinfo == NULL) 511 return (ENXIO); 512 513 dcmn_err(("devfs_walk: path = %s", path)); 514 515 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 516 517 (void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path); 518 519 devnm = strrchr(dirpath, '/'); 520 521 ASSERT(devnm); 522 523 *devnm++ = '\0'; 524 525 if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) { 526 dcmn_err(("directory %s not found\n", dirpath)); 527 kmem_free(dirpath, MAXPATHLEN); 528 return (ENXIO); 529 } 530 531 /* 532 * if path == "/", visit the root dv_node 533 */ 534 if (*devnm == '\0') { 535 callback(VTODV(dirvp), arg); 536 devnm = NULL; 537 } 538 539 dv_walk(VTODV(dirvp), devnm, callback, arg); 540 541 VN_RELE(dirvp); 542 543 kmem_free(dirpath, MAXPATHLEN); 544 545 return (0); 546 } 547 548 int 549 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp) 550 { 551 struct vnode *rvp; 552 struct dv_node *dvp; 553 int rval = -1; 554 555 /* fail if devfs not mounted yet */ 556 if (devfs_mntinfo == NULL) 557 return (rval); 558 559 if (VOP_REALVP(vp, &rvp, NULL) == 0 && vn_matchops(rvp, dv_vnodeops)) { 560 dvp = VTODV(rvp); 561 rw_enter(&dvp->dv_contents, RW_READER); 562 if (dvp->dv_priv) { 563 dphold(dvp->dv_priv); 564 *dpp = dvp->dv_priv; 565 rval = 0; 566 } 567 rw_exit(&dvp->dv_contents); 568 } 569 return (rval); 570 } 571