1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * Copyright 2015 Joyent, Inc. All rights reserved. 25 * Copyright (c) 2017 by Delphix. All rights reserved. 26 */ 27 28 /* 29 * This is the /dev (hence, the sdev_ prefix) filesystem. 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/sysmacros.h> 35 #include <sys/systm.h> 36 #include <sys/kmem.h> 37 #include <sys/time.h> 38 #include <sys/pathname.h> 39 #include <sys/vfs.h> 40 #include <sys/vfs_opreg.h> 41 #include <sys/vnode.h> 42 #include <sys/file.h> 43 #include <sys/stat.h> 44 #include <sys/uio.h> 45 #include <sys/stat.h> 46 #include <sys/errno.h> 47 #include <sys/cmn_err.h> 48 #include <sys/cred.h> 49 #include <sys/statvfs.h> 50 #include <sys/policy.h> 51 #include <sys/mount.h> 52 #include <sys/debug.h> 53 #include <sys/modctl.h> 54 #include <sys/mkdev.h> 55 #include <fs/fs_subr.h> 56 #include <sys/fs/sdev_impl.h> 57 #include <sys/fs/snode.h> 58 #include <sys/fs/dv_node.h> 59 #include <sys/sunndi.h> 60 #include <sys/mntent.h> 61 #include <sys/disp.h> 62 63 /* 64 * /dev vfs operations. 65 */ 66 67 /* 68 * globals 69 */ 70 struct sdev_data *sdev_origins; /* mount info for origins under /dev */ 71 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */ 72 taskq_t *sdev_taskq = NULL; 73 74 /* 75 * static 76 */ 77 static major_t devmajor; /* the fictitious major we live on */ 78 static major_t devminor; /* the fictitious minor of this instance */ 79 static struct sdev_data *sdev_mntinfo = NULL; /* linked list of instances */ 80 81 /* LINTED E_STATIC_UNUSED */ /* useful for debugging */ 82 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */ 83 84 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *, 85 struct cred *); 86 static int sdev_unmount(struct vfs *, int, struct cred *); 87 static int sdev_root(struct vfs *, struct vnode **); 88 static int sdev_statvfs(struct vfs *, struct statvfs64 *); 89 static void sdev_insert_mntinfo(struct sdev_data *); 90 static int devinit(int, char *); 91 92 static vfsdef_t sdev_vfssw = { 93 VFSDEF_VERSION, 94 "dev", /* type name string */ 95 devinit, /* init routine */ 96 VSW_CANREMOUNT, /* flags */ 97 NULL /* mount options table prototype */ 98 }; 99 100 101 /* 102 * Module linkage information 103 */ 104 static struct modlfs modlfs = { 105 &mod_fsops, "/dev filesystem", &sdev_vfssw 106 }; 107 108 static struct modlinkage modlinkage = { 109 MODREV_1, (void *)&modlfs, NULL 110 }; 111 112 int 113 _init(void) 114 { 115 int e; 116 117 mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL); 118 sdev_node_cache_init(); 119 sdev_devfsadm_lockinit(); 120 if ((e = mod_install(&modlinkage)) != 0) { 121 sdev_devfsadm_lockdestroy(); 122 sdev_node_cache_fini(); 123 mutex_destroy(&sdev_lock); 124 return (e); 125 } 126 return (0); 127 } 128 129 /* 130 * dev module remained loaded for the global /dev instance 131 */ 132 int 133 _fini(void) 134 { 135 return (EBUSY); 136 } 137 138 int 139 _info(struct modinfo *modinfop) 140 { 141 return (mod_info(&modlinkage, modinfop)); 142 } 143 144 /*ARGSUSED*/ 145 static int 146 devinit(int fstype, char *name) 147 { 148 static const fs_operation_def_t dev_vfsops_tbl[] = { 149 VFSNAME_MOUNT, { .vfs_mount = sdev_mount }, 150 VFSNAME_UNMOUNT, { .vfs_unmount = sdev_unmount }, 151 VFSNAME_ROOT, { .vfs_root = sdev_root }, 152 VFSNAME_STATVFS, { .vfs_statvfs = sdev_statvfs }, 153 NULL, NULL 154 }; 155 156 int error; 157 extern major_t getudev(void); 158 159 devtype = fstype; 160 161 error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL); 162 if (error != 0) { 163 cmn_err(CE_WARN, "devinit: bad vfs ops tbl"); 164 return (error); 165 } 166 167 error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops); 168 if (error != 0) { 169 (void) vfs_freevfsops_by_type(fstype); 170 cmn_err(CE_WARN, "devinit: bad vnode ops tbl"); 171 return (error); 172 } 173 174 if ((devmajor = getudev()) == (major_t)-1) { 175 cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name); 176 return (ENXIO); 177 } 178 179 if (sdev_plugin_init() != 0) { 180 cmn_err(CE_WARN, "%s: failed to set init plugin subsystem", 181 sdev_vfssw.name); 182 return (EIO); 183 } 184 185 /* initialize negative cache */ 186 sdev_ncache_init(); 187 188 return (0); 189 } 190 191 /* 192 * Both mount point and backing store directory name are 193 * passed in from userland 194 */ 195 static int 196 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap, 197 struct cred *cr) 198 { 199 struct sdev_data *sdev_data; 200 struct vnode *avp; 201 struct sdev_node *dv; 202 struct sdev_mountargs *args = NULL; 203 int error = 0; 204 dev_t devdev; 205 206 /* 207 * security check 208 */ 209 if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) || 210 (secpolicy_sys_devices(cr) != 0)) 211 return (EPERM); 212 213 /* 214 * Sanity check the mount point 215 */ 216 if (mvp->v_type != VDIR) 217 return (ENOTDIR); 218 219 /* 220 * Sanity Check for overlay mount. 221 */ 222 mutex_enter(&mvp->v_lock); 223 if ((uap->flags & MS_OVERLAY) == 0 && 224 (uap->flags & MS_REMOUNT) == 0 && 225 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) { 226 mutex_exit(&mvp->v_lock); 227 return (EBUSY); 228 } 229 mutex_exit(&mvp->v_lock); 230 231 args = kmem_zalloc(sizeof (*args), KM_SLEEP); 232 233 if ((uap->flags & MS_DATA) && 234 (uap->datalen != 0 && uap->dataptr != NULL)) { 235 /* copy in the arguments */ 236 if (error = sdev_copyin_mountargs(uap, args)) 237 goto cleanup; 238 } 239 240 /* 241 * Sanity check the backing store 242 */ 243 if (args->sdev_attrdir) { 244 /* user supplied an attribute store */ 245 if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir, 246 UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) { 247 cmn_err(CE_NOTE, "/dev fs: lookup on attribute " 248 "directory %s failed", 249 (char *)(uintptr_t)args->sdev_attrdir); 250 goto cleanup; 251 } 252 253 if (avp->v_type != VDIR) { 254 VN_RELE(avp); 255 error = ENOTDIR; 256 goto cleanup; 257 } 258 } else { 259 /* use mountp as the attribute store */ 260 avp = mvp; 261 VN_HOLD(avp); 262 } 263 264 mutex_enter(&sdev_lock); 265 266 /* 267 * Check that the taskq has been created. We can't do this in our 268 * _init or devinit because they run too early for ddi_taskq_create. 269 */ 270 if (sdev_taskq == NULL) { 271 sdev_taskq = taskq_create("sdev", 1, minclsyspri, 1, 1, 0); 272 if (sdev_taskq == NULL) { 273 error = ENOMEM; 274 mutex_exit(&sdev_lock); 275 VN_RELE(avp); 276 goto cleanup; 277 } 278 } 279 280 /* 281 * handling installation 282 */ 283 if (uap->flags & MS_REMOUNT) { 284 sdev_data = (struct sdev_data *)vfsp->vfs_data; 285 ASSERT(sdev_data); 286 287 dv = sdev_data->sdev_root; 288 ASSERT(dv == dv->sdev_dotdot); 289 290 /* 291 * mark all existing sdev_nodes (except root node) stale 292 */ 293 sdev_stale(dv); 294 295 /* Reset previous mountargs */ 296 if (sdev_data->sdev_mountargs) { 297 kmem_free(sdev_data->sdev_mountargs, 298 sizeof (struct sdev_mountargs)); 299 } 300 sdev_data->sdev_mountargs = args; 301 args = NULL; /* so it won't be freed below */ 302 303 sdev_stale_attrvp = dv->sdev_attrvp; 304 dv->sdev_attrvp = avp; 305 vfsp->vfs_mtime = ddi_get_time(); 306 307 mutex_exit(&sdev_lock); 308 goto cleanup; /* we're done */ 309 } 310 311 /* 312 * Create and initialize the vfs-private data. 313 */ 314 devdev = makedevice(devmajor, devminor); 315 while (vfs_devismounted(devdev)) { 316 devminor = (devminor + 1) & MAXMIN32; 317 318 /* 319 * All the minor numbers are used up. 320 */ 321 if (devminor == 0) { 322 mutex_exit(&sdev_lock); 323 VN_RELE(avp); 324 error = ENODEV; 325 goto cleanup; 326 } 327 328 devdev = makedevice(devmajor, devminor); 329 } 330 331 dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr); 332 sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP); 333 vfsp->vfs_dev = devdev; 334 vfsp->vfs_data = (caddr_t)sdev_data; 335 vfsp->vfs_fstype = devtype; 336 vfsp->vfs_bsize = DEV_BSIZE; 337 vfsp->vfs_mtime = ddi_get_time(); 338 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype); 339 340 ASSERT(dv == dv->sdev_dotdot); 341 342 sdev_data->sdev_vfsp = vfsp; 343 sdev_data->sdev_root = dv; 344 sdev_data->sdev_mountargs = args; 345 346 /* get acl flavor from attribute dir */ 347 if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor, 348 kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0) 349 sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED; 350 351 args = NULL; /* so it won't be freed below */ 352 sdev_insert_mntinfo(sdev_data); 353 mutex_exit(&sdev_lock); 354 355 if (!SDEV_IS_GLOBAL(dv)) { 356 ASSERT(sdev_origins); 357 dv->sdev_flags &= ~SDEV_GLOBAL; 358 dv->sdev_origin = sdev_origins->sdev_root; 359 SDEV_HOLD(dv->sdev_origin); 360 } else { 361 sdev_ncache_setup(); 362 rw_enter(&dv->sdev_contents, RW_WRITER); 363 sdev_filldir_dynamic(dv); 364 rw_exit(&dv->sdev_contents); 365 } 366 367 sdev_update_timestamps(dv->sdev_attrvp, 368 cr, AT_CTIME|AT_MTIME|AT_ATIME); 369 370 cleanup: 371 if (args) 372 kmem_free(args, sizeof (*args)); 373 return (error); 374 } 375 376 /* 377 * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone. 378 */ 379 static int 380 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr) 381 { 382 struct sdev_node *dv; 383 int error; 384 struct sdev_data *sdev_data, *prev, *next; 385 386 /* 387 * enforce the security policies 388 */ 389 if ((secpolicy_fs_unmount(cr, vfsp) != 0) || 390 (secpolicy_sys_devices(cr) != 0)) 391 return (EPERM); 392 393 if (flag & MS_FORCE) 394 return (ENOTSUP); 395 396 mutex_enter(&sdev_lock); 397 dv = VFSTOSDEVFS(vfsp)->sdev_root; 398 ASSERT(dv == dv->sdev_dotdot); 399 if (SDEVTOV(dv)->v_count > 1) { 400 mutex_exit(&sdev_lock); 401 return (EBUSY); 402 } 403 404 /* 405 * global instance remains mounted 406 */ 407 if (SDEV_IS_GLOBAL(dv)) { 408 mutex_exit(&sdev_lock); 409 return (EBUSY); 410 } 411 mutex_exit(&sdev_lock); 412 413 /* verify the v_count */ 414 if ((error = sdev_cleandir(dv, NULL, 0)) != 0) { 415 return (error); 416 } 417 ASSERT(SDEVTOV(dv)->v_count == 1); 418 419 /* release hold on root node and destroy it */ 420 SDEV_RELE(dv); 421 dv->sdev_nlink -= 2; 422 sdev_nodedestroy(dv, 0); 423 424 sdev_data = (struct sdev_data *)vfsp->vfs_data; 425 vfsp->vfs_data = (caddr_t)0; 426 427 /* 428 * XXX separate it into sdev_delete_mntinfo() if useful 429 */ 430 mutex_enter(&sdev_lock); 431 prev = sdev_data->sdev_prev; 432 next = sdev_data->sdev_next; 433 if (prev) 434 prev->sdev_next = next; 435 else 436 sdev_mntinfo = next; 437 if (next) 438 next->sdev_prev = prev; 439 mutex_exit(&sdev_lock); 440 441 if (sdev_data->sdev_mountargs) { 442 kmem_free(sdev_data->sdev_mountargs, 443 sizeof (struct sdev_mountargs)); 444 } 445 kmem_free(sdev_data, sizeof (struct sdev_data)); 446 return (0); 447 } 448 449 /* 450 * return root vnode for given vfs 451 */ 452 static int 453 sdev_root(struct vfs *vfsp, struct vnode **vpp) 454 { 455 *vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root); 456 VN_HOLD(*vpp); 457 return (0); 458 } 459 460 /* 461 * return 'generic superblock' information to userland. 462 * 463 * not much that we can usefully admit to here 464 */ 465 static int 466 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp) 467 { 468 dev32_t d32; 469 470 bzero(sbp, sizeof (*sbp)); 471 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize; 472 sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc"); 473 474 /* no illusions that free/avail files is relevant to dev */ 475 sbp->f_ffree = 0; 476 sbp->f_favail = 0; 477 478 /* no illusions that blocks are relevant to devfs */ 479 sbp->f_bfree = 0; 480 sbp->f_bavail = 0; 481 sbp->f_blocks = 0; 482 483 (void) cmpldev(&d32, vfsp->vfs_dev); 484 sbp->f_fsid = d32; 485 (void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name); 486 sbp->f_flag = vf_to_stf(vfsp->vfs_flag); 487 sbp->f_namemax = MAXNAMELEN - 1; 488 (void) strcpy(sbp->f_fstr, "dev"); 489 490 return (0); 491 } 492 493 static void 494 sdev_insert_mntinfo(struct sdev_data *data) 495 { 496 ASSERT(mutex_owned(&sdev_lock)); 497 data->sdev_next = sdev_mntinfo; 498 data->sdev_prev = NULL; 499 if (sdev_mntinfo) { 500 sdev_mntinfo->sdev_prev = data; 501 } else { 502 sdev_origins = data; 503 } 504 sdev_mntinfo = data; 505 } 506 507 struct sdev_data * 508 sdev_find_mntinfo(char *mntpt) 509 { 510 struct sdev_data *mntinfo; 511 512 mutex_enter(&sdev_lock); 513 mntinfo = sdev_mntinfo; 514 while (mntinfo) { 515 if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) { 516 VN_HOLD(SDEVTOV(mntinfo->sdev_root)); 517 break; 518 } 519 mntinfo = mntinfo->sdev_next; 520 } 521 mutex_exit(&sdev_lock); 522 return (mntinfo); 523 } 524 525 void 526 sdev_mntinfo_rele(struct sdev_data *mntinfo) 527 { 528 vnode_t *vp; 529 530 mutex_enter(&sdev_lock); 531 vp = SDEVTOV(mntinfo->sdev_root); 532 mutex_enter(&vp->v_lock); 533 VN_RELE_LOCKED(vp); 534 mutex_exit(&vp->v_lock); 535 mutex_exit(&sdev_lock); 536 } 537 538 void 539 sdev_mnt_walk(void (*func)(struct sdev_node *, void *), void *arg) 540 { 541 struct sdev_data *mntinfo; 542 543 mutex_enter(&sdev_lock); 544 mntinfo = sdev_mntinfo; 545 while (mntinfo != NULL) { 546 func(mntinfo->sdev_root, arg); 547 mntinfo = mntinfo->sdev_next; 548 } 549 mutex_exit(&sdev_lock); 550 } 551