1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Joyent, Inc. All rights reserved.
25 * Copyright (c) 2017 by Delphix. All rights reserved.
26 */
27
28 /*
29 * This is the /dev (hence, the sdev_ prefix) filesystem.
30 */
31
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/systm.h>
36 #include <sys/kmem.h>
37 #include <sys/time.h>
38 #include <sys/pathname.h>
39 #include <sys/vfs.h>
40 #include <sys/vfs_opreg.h>
41 #include <sys/vnode.h>
42 #include <sys/file.h>
43 #include <sys/stat.h>
44 #include <sys/uio.h>
45 #include <sys/stat.h>
46 #include <sys/errno.h>
47 #include <sys/cmn_err.h>
48 #include <sys/cred.h>
49 #include <sys/statvfs.h>
50 #include <sys/policy.h>
51 #include <sys/mount.h>
52 #include <sys/debug.h>
53 #include <sys/modctl.h>
54 #include <sys/mkdev.h>
55 #include <fs/fs_subr.h>
56 #include <sys/fs/sdev_impl.h>
57 #include <sys/fs/snode.h>
58 #include <sys/fs/dv_node.h>
59 #include <sys/sunndi.h>
60 #include <sys/mntent.h>
61 #include <sys/disp.h>
62
63 /*
64 * /dev vfs operations.
65 */
66
67 /*
68 * globals
69 */
70 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
71 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
72 taskq_t *sdev_taskq = NULL;
73
74 /*
75 * static
76 */
77 static major_t devmajor; /* the fictitious major we live on */
78 static major_t devminor; /* the fictitious minor of this instance */
79 static struct sdev_data *sdev_mntinfo = NULL; /* linked list of instances */
80
81 /* LINTED E_STATIC_UNUSED */ /* useful for debugging */
82 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
83
84 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
85 struct cred *);
86 static int sdev_unmount(struct vfs *, int, struct cred *);
87 static int sdev_root(struct vfs *, struct vnode **);
88 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
89 static void sdev_insert_mntinfo(struct sdev_data *);
90 static int devinit(int, char *);
91
92 static vfsdef_t sdev_vfssw = {
93 VFSDEF_VERSION,
94 "dev", /* type name string */
95 devinit, /* init routine */
96 VSW_CANREMOUNT, /* flags */
97 NULL /* mount options table prototype */
98 };
99
100
101 /*
102 * Module linkage information
103 */
104 static struct modlfs modlfs = {
105 &mod_fsops, "/dev filesystem", &sdev_vfssw
106 };
107
108 static struct modlinkage modlinkage = {
109 MODREV_1, (void *)&modlfs, NULL
110 };
111
112 int
_init(void)113 _init(void)
114 {
115 int e;
116
117 mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
118 sdev_node_cache_init();
119 sdev_devfsadm_lockinit();
120 if ((e = mod_install(&modlinkage)) != 0) {
121 sdev_devfsadm_lockdestroy();
122 sdev_node_cache_fini();
123 mutex_destroy(&sdev_lock);
124 return (e);
125 }
126 return (0);
127 }
128
129 /*
130 * dev module remained loaded for the global /dev instance
131 */
132 int
_fini(void)133 _fini(void)
134 {
135 return (EBUSY);
136 }
137
138 int
_info(struct modinfo * modinfop)139 _info(struct modinfo *modinfop)
140 {
141 return (mod_info(&modlinkage, modinfop));
142 }
143
144 /*ARGSUSED*/
145 static int
devinit(int fstype,char * name)146 devinit(int fstype, char *name)
147 {
148 static const fs_operation_def_t dev_vfsops_tbl[] = {
149 VFSNAME_MOUNT, { .vfs_mount = sdev_mount },
150 VFSNAME_UNMOUNT, { .vfs_unmount = sdev_unmount },
151 VFSNAME_ROOT, { .vfs_root = sdev_root },
152 VFSNAME_STATVFS, { .vfs_statvfs = sdev_statvfs },
153 NULL, NULL
154 };
155
156 int error;
157 extern major_t getudev(void);
158
159 devtype = fstype;
160
161 error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
162 if (error != 0) {
163 cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
164 return (error);
165 }
166
167 error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
168 if (error != 0) {
169 (void) vfs_freevfsops_by_type(fstype);
170 cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
171 return (error);
172 }
173
174 if ((devmajor = getudev()) == (major_t)-1) {
175 cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
176 return (ENXIO);
177 }
178
179 if (sdev_plugin_init() != 0) {
180 cmn_err(CE_WARN, "%s: failed to set init plugin subsystem",
181 sdev_vfssw.name);
182 return (EIO);
183 }
184
185 /* initialize negative cache */
186 sdev_ncache_init();
187
188 return (0);
189 }
190
191 /*
192 * Both mount point and backing store directory name are
193 * passed in from userland
194 */
195 static int
sdev_mount(struct vfs * vfsp,struct vnode * mvp,struct mounta * uap,struct cred * cr)196 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
197 struct cred *cr)
198 {
199 struct sdev_data *sdev_data;
200 struct vnode *avp;
201 struct sdev_node *dv;
202 struct sdev_mountargs *args = NULL;
203 int error = 0;
204 dev_t devdev;
205
206 /*
207 * security check
208 */
209 if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
210 (secpolicy_sys_devices(cr) != 0))
211 return (EPERM);
212
213 /*
214 * Sanity check the mount point
215 */
216 if (mvp->v_type != VDIR)
217 return (ENOTDIR);
218
219 /*
220 * Sanity Check for overlay mount.
221 */
222 mutex_enter(&mvp->v_lock);
223 if ((uap->flags & MS_OVERLAY) == 0 &&
224 (uap->flags & MS_REMOUNT) == 0 &&
225 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
226 mutex_exit(&mvp->v_lock);
227 return (EBUSY);
228 }
229 mutex_exit(&mvp->v_lock);
230
231 args = kmem_zalloc(sizeof (*args), KM_SLEEP);
232
233 if ((uap->flags & MS_DATA) &&
234 (uap->datalen != 0 && uap->dataptr != NULL)) {
235 /* copy in the arguments */
236 if (error = sdev_copyin_mountargs(uap, args))
237 goto cleanup;
238 }
239
240 /*
241 * Sanity check the backing store
242 */
243 if (args->sdev_attrdir) {
244 /* user supplied an attribute store */
245 if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
246 UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
247 cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
248 "directory %s failed",
249 (char *)(uintptr_t)args->sdev_attrdir);
250 goto cleanup;
251 }
252
253 if (avp->v_type != VDIR) {
254 VN_RELE(avp);
255 error = ENOTDIR;
256 goto cleanup;
257 }
258 } else {
259 /* use mountp as the attribute store */
260 avp = mvp;
261 VN_HOLD(avp);
262 }
263
264 mutex_enter(&sdev_lock);
265
266 /*
267 * Check that the taskq has been created. We can't do this in our
268 * _init or devinit because they run too early for ddi_taskq_create.
269 */
270 if (sdev_taskq == NULL) {
271 sdev_taskq = taskq_create("sdev", 1, minclsyspri, 1, 1, 0);
272 if (sdev_taskq == NULL) {
273 error = ENOMEM;
274 mutex_exit(&sdev_lock);
275 VN_RELE(avp);
276 goto cleanup;
277 }
278 }
279
280 /*
281 * handling installation
282 */
283 if (uap->flags & MS_REMOUNT) {
284 sdev_data = (struct sdev_data *)vfsp->vfs_data;
285 ASSERT(sdev_data);
286
287 dv = sdev_data->sdev_root;
288 ASSERT(dv == dv->sdev_dotdot);
289
290 /*
291 * mark all existing sdev_nodes (except root node) stale
292 */
293 sdev_stale(dv);
294
295 /* Reset previous mountargs */
296 if (sdev_data->sdev_mountargs) {
297 kmem_free(sdev_data->sdev_mountargs,
298 sizeof (struct sdev_mountargs));
299 }
300 sdev_data->sdev_mountargs = args;
301 args = NULL; /* so it won't be freed below */
302
303 sdev_stale_attrvp = dv->sdev_attrvp;
304 dv->sdev_attrvp = avp;
305 vfsp->vfs_mtime = ddi_get_time();
306
307 mutex_exit(&sdev_lock);
308 goto cleanup; /* we're done */
309 }
310
311 /*
312 * Create and initialize the vfs-private data.
313 */
314 devdev = makedevice(devmajor, devminor);
315 while (vfs_devismounted(devdev)) {
316 devminor = (devminor + 1) & MAXMIN32;
317
318 /*
319 * All the minor numbers are used up.
320 */
321 if (devminor == 0) {
322 mutex_exit(&sdev_lock);
323 VN_RELE(avp);
324 error = ENODEV;
325 goto cleanup;
326 }
327
328 devdev = makedevice(devmajor, devminor);
329 }
330
331 dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
332 sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
333 vfsp->vfs_dev = devdev;
334 vfsp->vfs_data = (caddr_t)sdev_data;
335 vfsp->vfs_fstype = devtype;
336 vfsp->vfs_bsize = DEV_BSIZE;
337 vfsp->vfs_mtime = ddi_get_time();
338 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
339
340 ASSERT(dv == dv->sdev_dotdot);
341
342 sdev_data->sdev_vfsp = vfsp;
343 sdev_data->sdev_root = dv;
344 sdev_data->sdev_mountargs = args;
345
346 /* get acl flavor from attribute dir */
347 if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
348 kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
349 sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
350
351 args = NULL; /* so it won't be freed below */
352 sdev_insert_mntinfo(sdev_data);
353 mutex_exit(&sdev_lock);
354
355 if (!SDEV_IS_GLOBAL(dv)) {
356 ASSERT(sdev_origins);
357 dv->sdev_flags &= ~SDEV_GLOBAL;
358 dv->sdev_origin = sdev_origins->sdev_root;
359 SDEV_HOLD(dv->sdev_origin);
360 } else {
361 sdev_ncache_setup();
362 rw_enter(&dv->sdev_contents, RW_WRITER);
363 sdev_filldir_dynamic(dv);
364 rw_exit(&dv->sdev_contents);
365 }
366
367 sdev_update_timestamps(dv->sdev_attrvp,
368 cr, AT_CTIME|AT_MTIME|AT_ATIME);
369
370 cleanup:
371 if (args)
372 kmem_free(args, sizeof (*args));
373 return (error);
374 }
375
376 /*
377 * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
378 */
379 static int
sdev_unmount(struct vfs * vfsp,int flag,struct cred * cr)380 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
381 {
382 struct sdev_node *dv;
383 int error;
384 struct sdev_data *sdev_data, *prev, *next;
385
386 /*
387 * enforce the security policies
388 */
389 if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
390 (secpolicy_sys_devices(cr) != 0))
391 return (EPERM);
392
393 if (flag & MS_FORCE)
394 return (ENOTSUP);
395
396 mutex_enter(&sdev_lock);
397 dv = VFSTOSDEVFS(vfsp)->sdev_root;
398 ASSERT(dv == dv->sdev_dotdot);
399 if (SDEVTOV(dv)->v_count > 1) {
400 mutex_exit(&sdev_lock);
401 return (EBUSY);
402 }
403
404 /*
405 * global instance remains mounted
406 */
407 if (SDEV_IS_GLOBAL(dv)) {
408 mutex_exit(&sdev_lock);
409 return (EBUSY);
410 }
411 mutex_exit(&sdev_lock);
412
413 /* verify the v_count */
414 if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
415 return (error);
416 }
417 ASSERT(SDEVTOV(dv)->v_count == 1);
418
419 /* release hold on root node and destroy it */
420 SDEV_RELE(dv);
421 dv->sdev_nlink -= 2;
422 sdev_nodedestroy(dv, 0);
423
424 sdev_data = (struct sdev_data *)vfsp->vfs_data;
425 vfsp->vfs_data = (caddr_t)0;
426
427 /*
428 * XXX separate it into sdev_delete_mntinfo() if useful
429 */
430 mutex_enter(&sdev_lock);
431 prev = sdev_data->sdev_prev;
432 next = sdev_data->sdev_next;
433 if (prev)
434 prev->sdev_next = next;
435 else
436 sdev_mntinfo = next;
437 if (next)
438 next->sdev_prev = prev;
439 mutex_exit(&sdev_lock);
440
441 if (sdev_data->sdev_mountargs) {
442 kmem_free(sdev_data->sdev_mountargs,
443 sizeof (struct sdev_mountargs));
444 }
445 kmem_free(sdev_data, sizeof (struct sdev_data));
446 return (0);
447 }
448
449 /*
450 * return root vnode for given vfs
451 */
452 static int
sdev_root(struct vfs * vfsp,struct vnode ** vpp)453 sdev_root(struct vfs *vfsp, struct vnode **vpp)
454 {
455 *vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
456 VN_HOLD(*vpp);
457 return (0);
458 }
459
460 /*
461 * return 'generic superblock' information to userland.
462 *
463 * not much that we can usefully admit to here
464 */
465 static int
sdev_statvfs(struct vfs * vfsp,struct statvfs64 * sbp)466 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
467 {
468 dev32_t d32;
469
470 bzero(sbp, sizeof (*sbp));
471 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
472 sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
473
474 /* no illusions that free/avail files is relevant to dev */
475 sbp->f_ffree = 0;
476 sbp->f_favail = 0;
477
478 /* no illusions that blocks are relevant to devfs */
479 sbp->f_bfree = 0;
480 sbp->f_bavail = 0;
481 sbp->f_blocks = 0;
482
483 (void) cmpldev(&d32, vfsp->vfs_dev);
484 sbp->f_fsid = d32;
485 (void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
486 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
487 sbp->f_namemax = MAXNAMELEN - 1;
488 (void) strcpy(sbp->f_fstr, "dev");
489
490 return (0);
491 }
492
493 static void
sdev_insert_mntinfo(struct sdev_data * data)494 sdev_insert_mntinfo(struct sdev_data *data)
495 {
496 ASSERT(mutex_owned(&sdev_lock));
497 data->sdev_next = sdev_mntinfo;
498 data->sdev_prev = NULL;
499 if (sdev_mntinfo) {
500 sdev_mntinfo->sdev_prev = data;
501 } else {
502 sdev_origins = data;
503 }
504 sdev_mntinfo = data;
505 }
506
507 struct sdev_data *
sdev_find_mntinfo(char * mntpt)508 sdev_find_mntinfo(char *mntpt)
509 {
510 struct sdev_data *mntinfo;
511
512 mutex_enter(&sdev_lock);
513 mntinfo = sdev_mntinfo;
514 while (mntinfo) {
515 if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
516 VN_HOLD(SDEVTOV(mntinfo->sdev_root));
517 break;
518 }
519 mntinfo = mntinfo->sdev_next;
520 }
521 mutex_exit(&sdev_lock);
522 return (mntinfo);
523 }
524
525 void
sdev_mntinfo_rele(struct sdev_data * mntinfo)526 sdev_mntinfo_rele(struct sdev_data *mntinfo)
527 {
528 vnode_t *vp;
529
530 mutex_enter(&sdev_lock);
531 vp = SDEVTOV(mntinfo->sdev_root);
532 mutex_enter(&vp->v_lock);
533 VN_RELE_LOCKED(vp);
534 mutex_exit(&vp->v_lock);
535 mutex_exit(&sdev_lock);
536 }
537
538 void
sdev_mnt_walk(void (* func)(struct sdev_node *,void *),void * arg)539 sdev_mnt_walk(void (*func)(struct sdev_node *, void *), void *arg)
540 {
541 struct sdev_data *mntinfo;
542
543 mutex_enter(&sdev_lock);
544 mntinfo = sdev_mntinfo;
545 while (mntinfo != NULL) {
546 func(mntinfo->sdev_root, arg);
547 mntinfo = mntinfo->sdev_next;
548 }
549 mutex_exit(&sdev_lock);
550 }
551