1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 /* 29 * This is the device filesystem. 30 * 31 * It is a combination of a namer to drive autoconfiguration, 32 * plus the access methods for the device drivers of the system. 33 * 34 * The prototype is fairly dependent on specfs for the latter part 35 * of its implementation, though a final version would integrate the two. 36 */ 37 #include <sys/types.h> 38 #include <sys/param.h> 39 #include <sys/sysmacros.h> 40 #include <sys/systm.h> 41 #include <sys/kmem.h> 42 #include <sys/time.h> 43 #include <sys/pathname.h> 44 #include <sys/vfs.h> 45 #include <sys/vnode.h> 46 #include <sys/stat.h> 47 #include <sys/uio.h> 48 #include <sys/stat.h> 49 #include <sys/errno.h> 50 #include <sys/cmn_err.h> 51 #include <sys/cred.h> 52 #include <sys/statvfs.h> 53 #include <sys/mount.h> 54 #include <sys/debug.h> 55 #include <sys/modctl.h> 56 #include <fs/fs_subr.h> 57 #include <sys/fs/dv_node.h> 58 #include <sys/fs/snode.h> 59 #include <sys/sunndi.h> 60 #include <sys/policy.h> 61 #include <sys/sunmdi.h> 62 63 /* 64 * devfs vfs operations. 65 */ 66 static int devfs_mount(struct vfs *, struct vnode *, struct mounta *, 67 struct cred *); 68 static int devfs_unmount(struct vfs *, int, struct cred *); 69 static int devfs_root(struct vfs *, struct vnode **); 70 static int devfs_statvfs(struct vfs *, struct statvfs64 *); 71 static int devfs_mountroot(struct vfs *, enum whymountroot); 72 73 static int devfsinit(int, char *); 74 75 static vfsdef_t devfs_vfssw = { 76 VFSDEF_VERSION, 77 "devfs", /* type name string */ 78 devfsinit, /* init routine */ 79 0, /* flags */ 80 NULL /* mount options table prototype */ 81 }; 82 83 static kmutex_t devfs_lock; /* protects global data */ 84 static int devfstype; /* fstype */ 85 static dev_t devfsdev; /* the fictious 'device' we live on */ 86 static struct devfs_data *devfs_mntinfo; /* linked list of instances */ 87 88 /* 89 * Module linkage information 90 */ 91 static struct modlfs modlfs = { 92 &mod_fsops, "devices filesystem %I%", &devfs_vfssw 93 }; 94 95 static struct modlinkage modlinkage = { 96 MODREV_1, (void *)&modlfs, NULL 97 }; 98 99 int 100 _init(void) 101 { 102 int e; 103 104 mutex_init(&devfs_lock, "devfs lock", MUTEX_DEFAULT, NULL); 105 dv_node_cache_init(); 106 if ((e = mod_install(&modlinkage)) != 0) { 107 dv_node_cache_fini(); 108 mutex_destroy(&devfs_lock); 109 return (e); 110 } 111 dcmn_err(("devfs loaded\n")); 112 return (0); 113 } 114 115 int 116 _fini(void) 117 { 118 return (EBUSY); 119 } 120 121 int 122 _info(struct modinfo *modinfop) 123 { 124 return (mod_info(&modlinkage, modinfop)); 125 } 126 127 /*ARGSUSED1*/ 128 static int 129 devfsinit(int fstype, char *name) 130 { 131 static const fs_operation_def_t devfs_vfsops_template[] = { 132 VFSNAME_MOUNT, devfs_mount, 133 VFSNAME_UNMOUNT, devfs_unmount, 134 VFSNAME_ROOT, devfs_root, 135 VFSNAME_STATVFS, devfs_statvfs, 136 VFSNAME_SYNC, (fs_generic_func_p) fs_sync, 137 VFSNAME_MOUNTROOT, devfs_mountroot, 138 NULL, NULL 139 }; 140 int error; 141 int dev; 142 extern major_t getudev(void); /* gack - what a function */ 143 144 devfstype = fstype; 145 /* 146 * Associate VFS ops vector with this fstype 147 */ 148 error = vfs_setfsops(fstype, devfs_vfsops_template, NULL); 149 if (error != 0) { 150 cmn_err(CE_WARN, "devfsinit: bad vfs ops template"); 151 return (error); 152 } 153 154 error = vn_make_ops("dev fs", dv_vnodeops_template, &dv_vnodeops); 155 if (error != 0) { 156 (void) vfs_freevfsops_by_type(fstype); 157 cmn_err(CE_WARN, "devfsinit: bad vnode ops template"); 158 return (error); 159 } 160 161 /* 162 * Invent a dev_t (sigh). 163 */ 164 if ((dev = getudev()) == (major_t)-1) { 165 cmn_err(CE_NOTE, "%s: can't get unique dev", devfs_vfssw.name); 166 dev = 0; 167 } 168 devfsdev = makedevice(dev, 0); 169 170 return (0); 171 } 172 173 /* 174 * The name of the mount point and the name of the attribute 175 * filesystem are passed down from userland for now. 176 */ 177 static int 178 devfs_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, 179 struct cred *cr) 180 { 181 struct devfs_data *devfs_data; 182 struct vnode *avp; 183 struct dv_node *dv; 184 struct vattr va; 185 186 dcmn_err(("devfs_mount\n")); 187 188 if (secpolicy_fs_mount(cr, mvp, vfsp) != 0) 189 return (EPERM); 190 191 /* 192 * check that the mount point is sane 193 */ 194 if (mvp->v_type != VDIR) 195 return (ENOTDIR); 196 197 ASSERT(uap->flags & MS_SYSSPACE); 198 /* 199 * Devfs can only be mounted from kernel during boot. 200 * avp is the existing /devices, the same as the mount point. 201 */ 202 avp = mvp; 203 204 /* 205 * Create and initialize the vfs-private data. 206 * This includes a hand-crafted root vnode (we build 207 * this here mostly so that traverse() doesn't sleep 208 * in VFS_ROOT()). 209 */ 210 mutex_enter(&devfs_lock); 211 ASSERT(devfs_mntinfo == NULL); 212 dv = dv_mkroot(vfsp, devfsdev); 213 dv->dv_attrvp = avp; /* attribute root vp */ 214 215 ASSERT(dv == dv->dv_dotdot); 216 217 devfs_data = kmem_zalloc(sizeof (struct devfs_data), KM_SLEEP); 218 devfs_data->devfs_vfsp = vfsp; 219 devfs_data->devfs_root = dv; 220 221 vfsp->vfs_data = (caddr_t)devfs_data; 222 vfsp->vfs_fstype = devfstype; 223 vfsp->vfs_dev = devfsdev; 224 vfsp->vfs_bsize = DEV_BSIZE; 225 vfsp->vfs_mtime = ddi_get_time(); 226 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devfstype); 227 228 /* We're there. */ 229 devfs_mntinfo = devfs_data; 230 mutex_exit(&devfs_lock); 231 232 va.va_mask = AT_ATIME|AT_MTIME; 233 gethrestime(&va.va_atime); 234 gethrestime(&va.va_mtime); 235 (void) VOP_SETATTR(DVTOV(dv), &va, 0, cr, NULL); 236 return (0); 237 } 238 239 240 /* 241 * We never unmount devfs in a real production system. 242 */ 243 /*ARGSUSED*/ 244 static int 245 devfs_unmount(struct vfs *vfsp, int flag, struct cred *cr) 246 { 247 return (EBUSY); 248 } 249 250 /* 251 * return root vnode for given vfs 252 */ 253 static int 254 devfs_root(struct vfs *vfsp, struct vnode **vpp) 255 { 256 dcmn_err(("devfs_root\n")); 257 *vpp = DVTOV(VFSTODVFS(vfsp)->devfs_root); 258 VN_HOLD(*vpp); 259 return (0); 260 } 261 262 /* 263 * return 'generic superblock' information to userland. 264 * 265 * not much that we can usefully admit to here 266 */ 267 static int 268 devfs_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) 269 { 270 extern kmem_cache_t *dv_node_cache; 271 272 dev32_t d32; 273 274 dcmn_err(("devfs_statvfs\n")); 275 bzero(sbp, sizeof (*sbp)); 276 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize; 277 /* 278 * We could compute the number of devfsnodes here .. but since 279 * it's dynamic anyway, it's not clear how useful this is. 280 */ 281 sbp->f_files = kmem_cache_stat(dv_node_cache, "alloc"); 282 283 /* no illusions that free/avail files is relevant to devfs */ 284 sbp->f_ffree = 0; 285 sbp->f_favail = 0; 286 287 /* no illusions that blocks are relevant to devfs */ 288 sbp->f_bfree = 0; 289 sbp->f_bavail = 0; 290 sbp->f_blocks = 0; 291 292 (void) cmpldev(&d32, vfsp->vfs_dev); 293 sbp->f_fsid = d32; 294 (void) strcpy(sbp->f_basetype, vfssw[devfstype].vsw_name); 295 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 296 sbp->f_namemax = MAXNAMELEN - 1; 297 (void) strcpy(sbp->f_fstr, "devices"); 298 299 return (0); 300 } 301 302 /* 303 * devfs always mount after root is mounted, so this should never 304 * be invoked. 305 */ 306 /*ARGSUSED*/ 307 static int 308 devfs_mountroot(struct vfs *vfsp, enum whymountroot why) 309 { 310 dcmn_err(("devfs_mountroot\n")); 311 312 return (EINVAL); 313 } 314 315 struct dv_node * 316 devfs_dip_to_dvnode(dev_info_t *dip) 317 { 318 char *dirpath; 319 struct vnode *dirvp; 320 321 ASSERT(dip != NULL); 322 323 /* no-op if devfs not mounted yet */ 324 if (devfs_mntinfo == NULL) 325 return (NULL); 326 327 /* 328 * The lookupname below only looks up cached dv_nodes 329 * because devfs_clean_key is set in thread specific data. 330 */ 331 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 332 (void) ddi_pathname(dip, dirpath); 333 if (devfs_lookupname(dirpath, NULLVPP, &dirvp)) { 334 dcmn_err(("directory %s not found\n", dirpath)); 335 kmem_free(dirpath, MAXPATHLEN); 336 return (NULL); 337 } 338 339 kmem_free(dirpath, MAXPATHLEN); 340 return (VTODV(dirvp)); 341 } 342 343 /* 344 * If DV_CLEAN_FORCE devfs_clean is issued with a dip that is not the root 345 * and not a vHCI we also need to clean any vHCI branches because they 346 * may contain pHCI nodes. A detach_node() of a pHCI will fail if its 347 * mdi_devi_offline() fails, and the mdi_devi_offline() of the last 348 * pHCI will fail unless an ndi_devi_offline() of the Client nodes under 349 * the vHCI is successful - which requires a clean vHCI branch to removed 350 * the devi_refs associated with devfs vnodes. 351 */ 352 static int 353 devfs_clean_vhci(dev_info_t *dip, void *args) 354 { 355 struct dv_node *dvp; 356 uint_t flags = (uint_t)(uintptr_t)args; 357 358 (void) tsd_set(devfs_clean_key, (void *)1); 359 dvp = devfs_dip_to_dvnode(dip); 360 (void) tsd_set(devfs_clean_key, NULL); 361 if (dvp) { 362 (void) dv_cleandir(dvp, NULL, flags); 363 VN_RELE(DVTOV(dvp)); 364 } 365 return (DDI_WALK_CONTINUE); 366 } 367 368 /* 369 * devfs_clean() 370 * 371 * Destroy unreferenced dv_node's and detach devices. 372 * Returns 0 on success, error if failed to unconfigure node. 373 * 374 * devfs caches unreferenced dv_node to speed by the performance 375 * of ls, find, etc. devfs_clean() is invoked to cleanup cached 376 * dv_nodes to reclaim memory as well as to facilitate device 377 * removal (dv_node reference devinfo nodes, which prevents driver 378 * detach). 379 * 380 * If a shell parks in a /devices directory, the dv_node will be 381 * held, preventing the corresponding device to be detached. 382 * This would be a denial of service against DR. To prevent this, 383 * DR code calls devfs_clean() with the DV_CLEAN_FORCE flag. 384 * The dv_cleandir() implementation does the right thing to ensure 385 * successful DR. 386 */ 387 int 388 devfs_clean(dev_info_t *dip, char *devnm, uint_t flags) 389 { 390 struct dv_node *dvp; 391 int rval = 0; 392 393 dcmn_err(("devfs_unconfigure: dip = 0x%p, flags = 0x%x", 394 (void *)dip, flags)); 395 396 /* avoid recursion back into the device tree */ 397 (void) tsd_set(devfs_clean_key, (void *)1); 398 dvp = devfs_dip_to_dvnode(dip); 399 (void) tsd_set(devfs_clean_key, NULL); 400 if (dvp == NULL) 401 return (0); 402 403 if (dv_cleandir(dvp, devnm, flags) != 0) 404 rval = EBUSY; 405 VN_RELE(DVTOV(dvp)); 406 407 /* 408 * If we are doing a DV_CLEAN_FORCE, and we did not start at the 409 * root, and we did not start at a vHCI node then clean vHCI 410 * branches too. Failure to clean vHCI branch does not cause EBUSY. 411 * 412 * Also, to accommodate nexus callers that clean 'self' to DR 'child' 413 * (like pcihp) we clean vHCIs even when dv_cleandir() of dip branch 414 * above fails - this prevents a busy DR 'child' sibling from causing 415 * the DR of 'child' to fail because a vHCI branch was not cleaned. 416 */ 417 if ((flags & DV_CLEAN_FORCE) && (dip != ddi_root_node()) && 418 (mdi_component_is_vhci(dip, NULL) != MDI_SUCCESS)) { 419 /* 420 * NOTE: for backport the following is recommended 421 * (void) devfs_clean_vhci(scsi_vhci_dip, 422 * (void *)(uintptr_t)flags); 423 */ 424 mdi_walk_vhcis(devfs_clean_vhci, (void *)(uintptr_t)flags); 425 } 426 427 return (rval); 428 } 429 430 /* 431 * lookup a devfs relative pathname, returning held vnodes for the final 432 * component and the containing directory (if requested). 433 * 434 * NOTE: We can't use lookupname because this would use the current 435 * processes credentials (CRED) in the call lookuppnvp instead 436 * of kcred. It also does not give you the flexibility so 437 * specify the directory to start the resolution in (devicesdir). 438 */ 439 int 440 devfs_lookupname( 441 char *pathname, /* user pathname */ 442 vnode_t **dirvpp, /* ret for ptr to parent dir vnode */ 443 vnode_t **compvpp) /* ret for ptr to component vnode */ 444 { 445 struct pathname pn; 446 int error; 447 448 ASSERT(devicesdir); /* devfs must be initialized */ 449 ASSERT(pathname); /* must have some path */ 450 451 if (error = pn_get(pathname, UIO_SYSSPACE, &pn)) 452 return (error); 453 454 /* make the path relative to /devices. */ 455 pn_skipslash(&pn); 456 if (pn_pathleft(&pn) == 0) { 457 /* all we had was "\0" or "/" (which skipslash skiped) */ 458 if (dirvpp) 459 *dirvpp = NULL; 460 if (compvpp) { 461 VN_HOLD(devicesdir); 462 *compvpp = devicesdir; 463 } 464 } else { 465 /* 466 * Use devfs lookup to resolve pathname to the vnode for 467 * the device via relative lookup in devfs. Extra holds for 468 * using devicesdir as directory we are searching and for 469 * being our root without being == rootdir. 470 */ 471 VN_HOLD(devicesdir); 472 VN_HOLD(devicesdir); 473 error = lookuppnvp(&pn, NULL, FOLLOW, dirvpp, compvpp, 474 devicesdir, devicesdir, kcred); 475 } 476 pn_free(&pn); 477 478 return (error); 479 } 480 481 /* 482 * Given a devfs path (without the /devices prefix), walk 483 * the dv_node sub-tree rooted at the path. 484 */ 485 int 486 devfs_walk( 487 char *path, 488 void (*callback)(struct dv_node *, void *), 489 void *arg) 490 { 491 char *dirpath, *devnm; 492 struct vnode *dirvp; 493 494 ASSERT(path && callback); 495 496 if (*path != '/' || devfs_mntinfo == NULL) 497 return (ENXIO); 498 499 dcmn_err(("devfs_walk: path = %s", path)); 500 501 dirpath = kmem_alloc(MAXPATHLEN, KM_SLEEP); 502 503 (void) snprintf(dirpath, MAXPATHLEN, "/devices%s", path); 504 505 devnm = strrchr(dirpath, '/'); 506 507 ASSERT(devnm); 508 509 *devnm++ = '\0'; 510 511 if (lookupname(dirpath, UIO_SYSSPACE, 0, NULL, &dirvp)) { 512 dcmn_err(("directory %s not found\n", dirpath)); 513 kmem_free(dirpath, MAXPATHLEN); 514 return (ENXIO); 515 } 516 517 /* 518 * if path == "/", visit the root dv_node 519 */ 520 if (*devnm == '\0') { 521 callback(VTODV(dirvp), arg); 522 devnm = NULL; 523 } 524 525 dv_walk(VTODV(dirvp), devnm, callback, arg); 526 527 VN_RELE(dirvp); 528 529 kmem_free(dirpath, MAXPATHLEN); 530 531 return (0); 532 } 533 534 int 535 devfs_devpolicy(vnode_t *vp, devplcy_t **dpp) 536 { 537 struct vnode *rvp; 538 struct dv_node *dvp; 539 int rval = -1; 540 541 /* fail if devfs not mounted yet */ 542 if (devfs_mntinfo == NULL) 543 return (rval); 544 545 if (VOP_REALVP(vp, &rvp) == 0 && vn_matchops(rvp, dv_vnodeops)) { 546 dvp = VTODV(rvp); 547 rw_enter(&dvp->dv_contents, RW_READER); 548 if (dvp->dv_priv) { 549 dphold(dvp->dv_priv); 550 *dpp = dvp->dv_priv; 551 rval = 0; 552 } 553 rw_exit(&dvp->dv_contents); 554 } 555 return (rval); 556 } 557