1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2015 Joyent, Inc. All rights reserved.
25 */
26
27 /*
28 * This is the /dev (hence, the sdev_ prefix) filesystem.
29 */
30
31 #include <sys/types.h>
32 #include <sys/param.h>
33 #include <sys/sysmacros.h>
34 #include <sys/systm.h>
35 #include <sys/kmem.h>
36 #include <sys/time.h>
37 #include <sys/pathname.h>
38 #include <sys/vfs.h>
39 #include <sys/vfs_opreg.h>
40 #include <sys/vnode.h>
41 #include <sys/file.h>
42 #include <sys/stat.h>
43 #include <sys/uio.h>
44 #include <sys/stat.h>
45 #include <sys/errno.h>
46 #include <sys/cmn_err.h>
47 #include <sys/cred.h>
48 #include <sys/statvfs.h>
49 #include <sys/policy.h>
50 #include <sys/mount.h>
51 #include <sys/debug.h>
52 #include <sys/modctl.h>
53 #include <sys/mkdev.h>
54 #include <fs/fs_subr.h>
55 #include <sys/fs/sdev_impl.h>
56 #include <sys/fs/snode.h>
57 #include <sys/fs/dv_node.h>
58 #include <sys/sunndi.h>
59 #include <sys/mntent.h>
60 #include <sys/disp.h>
61
62 /*
63 * /dev vfs operations.
64 */
65
66 /*
67 * globals
68 */
69 struct sdev_data *sdev_origins; /* mount info for origins under /dev */
70 kmutex_t sdev_lock; /* used for mount/unmount/rename synchronization */
71 taskq_t *sdev_taskq = NULL;
72
73 /*
74 * static
75 */
76 static major_t devmajor; /* the fictitious major we live on */
77 static major_t devminor; /* the fictitious minor of this instance */
78 static struct sdev_data *sdev_mntinfo = NULL; /* linked list of instances */
79
80 /* LINTED E_STATIC_UNUSED */ /* useful for debugging */
81 static struct vnode *sdev_stale_attrvp; /* stale root attrvp after remount */
82
83 static int sdev_mount(struct vfs *, struct vnode *, struct mounta *,
84 struct cred *);
85 static int sdev_unmount(struct vfs *, int, struct cred *);
86 static int sdev_root(struct vfs *, struct vnode **);
87 static int sdev_statvfs(struct vfs *, struct statvfs64 *);
88 static void sdev_insert_mntinfo(struct sdev_data *);
89 static int devinit(int, char *);
90
91 static vfsdef_t sdev_vfssw = {
92 VFSDEF_VERSION,
93 "dev", /* type name string */
94 devinit, /* init routine */
95 VSW_CANREMOUNT, /* flags */
96 NULL /* mount options table prototype */
97 };
98
99
100 /*
101 * Module linkage information
102 */
103 static struct modlfs modlfs = {
104 &mod_fsops, "/dev filesystem", &sdev_vfssw
105 };
106
107 static struct modlinkage modlinkage = {
108 MODREV_1, (void *)&modlfs, NULL
109 };
110
111 int
_init(void)112 _init(void)
113 {
114 int e;
115
116 mutex_init(&sdev_lock, NULL, MUTEX_DEFAULT, NULL);
117 sdev_node_cache_init();
118 sdev_devfsadm_lockinit();
119 if ((e = mod_install(&modlinkage)) != 0) {
120 sdev_devfsadm_lockdestroy();
121 sdev_node_cache_fini();
122 mutex_destroy(&sdev_lock);
123 return (e);
124 }
125 return (0);
126 }
127
128 /*
129 * dev module remained loaded for the global /dev instance
130 */
131 int
_fini(void)132 _fini(void)
133 {
134 return (EBUSY);
135 }
136
137 int
_info(struct modinfo * modinfop)138 _info(struct modinfo *modinfop)
139 {
140 return (mod_info(&modlinkage, modinfop));
141 }
142
143 /*ARGSUSED*/
144 static int
devinit(int fstype,char * name)145 devinit(int fstype, char *name)
146 {
147 static const fs_operation_def_t dev_vfsops_tbl[] = {
148 VFSNAME_MOUNT, { .vfs_mount = sdev_mount },
149 VFSNAME_UNMOUNT, { .vfs_unmount = sdev_unmount },
150 VFSNAME_ROOT, { .vfs_root = sdev_root },
151 VFSNAME_STATVFS, { .vfs_statvfs = sdev_statvfs },
152 NULL, NULL
153 };
154
155 int error;
156 extern major_t getudev(void);
157
158 devtype = fstype;
159
160 error = vfs_setfsops(fstype, dev_vfsops_tbl, NULL);
161 if (error != 0) {
162 cmn_err(CE_WARN, "devinit: bad vfs ops tbl");
163 return (error);
164 }
165
166 error = vn_make_ops("dev", sdev_vnodeops_tbl, &sdev_vnodeops);
167 if (error != 0) {
168 (void) vfs_freevfsops_by_type(fstype);
169 cmn_err(CE_WARN, "devinit: bad vnode ops tbl");
170 return (error);
171 }
172
173 if ((devmajor = getudev()) == (major_t)-1) {
174 cmn_err(CE_WARN, "%s: can't get unique dev", sdev_vfssw.name);
175 return (1);
176 }
177
178 /* initialize negative cache */
179 sdev_ncache_init();
180
181 return (0);
182 }
183
184 /*
185 * Both mount point and backing store directory name are
186 * passed in from userland
187 */
188 static int
sdev_mount(struct vfs * vfsp,struct vnode * mvp,struct mounta * uap,struct cred * cr)189 sdev_mount(struct vfs *vfsp, struct vnode *mvp, struct mounta *uap,
190 struct cred *cr)
191 {
192 struct sdev_data *sdev_data;
193 struct vnode *avp;
194 struct sdev_node *dv;
195 struct sdev_mountargs *args = NULL;
196 int error = 0;
197 dev_t devdev;
198
199 /*
200 * security check
201 */
202 if ((secpolicy_fs_mount(cr, mvp, vfsp) != 0) ||
203 (secpolicy_sys_devices(cr) != 0))
204 return (EPERM);
205
206 /*
207 * Sanity check the mount point
208 */
209 if (mvp->v_type != VDIR)
210 return (ENOTDIR);
211
212 /*
213 * Sanity Check for overlay mount.
214 */
215 mutex_enter(&mvp->v_lock);
216 if ((uap->flags & MS_OVERLAY) == 0 &&
217 (uap->flags & MS_REMOUNT) == 0 &&
218 (mvp->v_count > 1 || (mvp->v_flag & VROOT))) {
219 mutex_exit(&mvp->v_lock);
220 return (EBUSY);
221 }
222 mutex_exit(&mvp->v_lock);
223
224 args = kmem_zalloc(sizeof (*args), KM_SLEEP);
225
226 if ((uap->flags & MS_DATA) &&
227 (uap->datalen != 0 && uap->dataptr != NULL)) {
228 /* copy in the arguments */
229 if (error = sdev_copyin_mountargs(uap, args))
230 goto cleanup;
231 }
232
233 /*
234 * Sanity check the backing store
235 */
236 if (args->sdev_attrdir) {
237 /* user supplied an attribute store */
238 if (error = lookupname((char *)(uintptr_t)args->sdev_attrdir,
239 UIO_USERSPACE, FOLLOW, NULLVPP, &avp)) {
240 cmn_err(CE_NOTE, "/dev fs: lookup on attribute "
241 "directory %s failed",
242 (char *)(uintptr_t)args->sdev_attrdir);
243 goto cleanup;
244 }
245
246 if (avp->v_type != VDIR) {
247 VN_RELE(avp);
248 error = ENOTDIR;
249 goto cleanup;
250 }
251 } else {
252 /* use mountp as the attribute store */
253 avp = mvp;
254 VN_HOLD(avp);
255 }
256
257 mutex_enter(&sdev_lock);
258
259 /*
260 * Check that the taskq has been created. We can't do this in our
261 * _init or devinit because they run too early for ddi_taskq_create.
262 */
263 if (sdev_taskq == NULL) {
264 sdev_taskq = taskq_create("sdev", 1, minclsyspri, 1, 1, 0);
265 if (sdev_taskq == NULL) {
266 error = ENOMEM;
267 mutex_exit(&sdev_lock);
268 VN_RELE(avp);
269 goto cleanup;
270 }
271 }
272
273 /*
274 * handling installation
275 */
276 if (uap->flags & MS_REMOUNT) {
277 sdev_data = (struct sdev_data *)vfsp->vfs_data;
278 ASSERT(sdev_data);
279
280 dv = sdev_data->sdev_root;
281 ASSERT(dv == dv->sdev_dotdot);
282
283 /*
284 * mark all existing sdev_nodes (except root node) stale
285 */
286 sdev_stale(dv);
287
288 /* Reset previous mountargs */
289 if (sdev_data->sdev_mountargs) {
290 kmem_free(sdev_data->sdev_mountargs,
291 sizeof (struct sdev_mountargs));
292 }
293 sdev_data->sdev_mountargs = args;
294 args = NULL; /* so it won't be freed below */
295
296 sdev_stale_attrvp = dv->sdev_attrvp;
297 dv->sdev_attrvp = avp;
298 vfsp->vfs_mtime = ddi_get_time();
299
300 mutex_exit(&sdev_lock);
301 goto cleanup; /* we're done */
302 }
303
304 /*
305 * Create and initialize the vfs-private data.
306 */
307 devdev = makedevice(devmajor, devminor);
308 while (vfs_devismounted(devdev)) {
309 devminor = (devminor + 1) & MAXMIN32;
310
311 /*
312 * All the minor numbers are used up.
313 */
314 if (devminor == 0) {
315 mutex_exit(&sdev_lock);
316 VN_RELE(avp);
317 error = ENODEV;
318 goto cleanup;
319 }
320
321 devdev = makedevice(devmajor, devminor);
322 }
323
324 dv = sdev_mkroot(vfsp, devdev, mvp, avp, cr);
325 sdev_data = kmem_zalloc(sizeof (struct sdev_data), KM_SLEEP);
326 vfsp->vfs_dev = devdev;
327 vfsp->vfs_data = (caddr_t)sdev_data;
328 vfsp->vfs_fstype = devtype;
329 vfsp->vfs_bsize = DEV_BSIZE;
330 vfsp->vfs_mtime = ddi_get_time();
331 vfs_make_fsid(&vfsp->vfs_fsid, vfsp->vfs_dev, devtype);
332
333 ASSERT(dv == dv->sdev_dotdot);
334
335 sdev_data->sdev_vfsp = vfsp;
336 sdev_data->sdev_root = dv;
337 sdev_data->sdev_mountargs = args;
338
339 /* get acl flavor from attribute dir */
340 if (VOP_PATHCONF(avp, _PC_ACL_ENABLED, &sdev_data->sdev_acl_flavor,
341 kcred, NULL) != 0 || sdev_data->sdev_acl_flavor == 0)
342 sdev_data->sdev_acl_flavor = _ACL_ACLENT_ENABLED;
343
344 args = NULL; /* so it won't be freed below */
345 sdev_insert_mntinfo(sdev_data);
346 mutex_exit(&sdev_lock);
347
348 if (!SDEV_IS_GLOBAL(dv)) {
349 ASSERT(sdev_origins);
350 dv->sdev_flags &= ~SDEV_GLOBAL;
351 dv->sdev_origin = sdev_origins->sdev_root;
352 } else {
353 sdev_ncache_setup();
354 rw_enter(&dv->sdev_contents, RW_WRITER);
355 sdev_filldir_dynamic(dv);
356 rw_exit(&dv->sdev_contents);
357 }
358
359 sdev_update_timestamps(dv->sdev_attrvp,
360 cr, AT_CTIME|AT_MTIME|AT_ATIME);
361
362 cleanup:
363 if (args)
364 kmem_free(args, sizeof (*args));
365 return (error);
366 }
367
368 /*
369 * unmounting the non-global /dev instances, e.g. when deleting a Kevlar zone.
370 */
371 static int
sdev_unmount(struct vfs * vfsp,int flag,struct cred * cr)372 sdev_unmount(struct vfs *vfsp, int flag, struct cred *cr)
373 {
374 struct sdev_node *dv;
375 int error;
376 struct sdev_data *sdev_data, *prev, *next;
377
378 /*
379 * enforce the security policies
380 */
381 if ((secpolicy_fs_unmount(cr, vfsp) != 0) ||
382 (secpolicy_sys_devices(cr) != 0))
383 return (EPERM);
384
385 if (flag & MS_FORCE)
386 return (ENOTSUP);
387
388 mutex_enter(&sdev_lock);
389 dv = VFSTOSDEVFS(vfsp)->sdev_root;
390 ASSERT(dv == dv->sdev_dotdot);
391 if (SDEVTOV(dv)->v_count > 1) {
392 mutex_exit(&sdev_lock);
393 return (EBUSY);
394 }
395
396 /*
397 * global instance remains mounted
398 */
399 if (SDEV_IS_GLOBAL(dv)) {
400 mutex_exit(&sdev_lock);
401 return (EBUSY);
402 }
403 mutex_exit(&sdev_lock);
404
405 /* verify the v_count */
406 if ((error = sdev_cleandir(dv, NULL, 0)) != 0) {
407 return (error);
408 }
409 ASSERT(SDEVTOV(dv)->v_count == 1);
410
411 /* release hold on root node and destroy it */
412 SDEV_RELE(dv);
413 dv->sdev_nlink -= 2;
414 sdev_nodedestroy(dv, 0);
415
416 sdev_data = (struct sdev_data *)vfsp->vfs_data;
417 vfsp->vfs_data = (caddr_t)0;
418
419 /*
420 * XXX separate it into sdev_delete_mntinfo() if useful
421 */
422 mutex_enter(&sdev_lock);
423 prev = sdev_data->sdev_prev;
424 next = sdev_data->sdev_next;
425 if (prev)
426 prev->sdev_next = next;
427 else
428 sdev_mntinfo = next;
429 if (next)
430 next->sdev_prev = prev;
431 mutex_exit(&sdev_lock);
432
433 if (sdev_data->sdev_mountargs) {
434 kmem_free(sdev_data->sdev_mountargs,
435 sizeof (struct sdev_mountargs));
436 }
437 kmem_free(sdev_data, sizeof (struct sdev_data));
438 return (0);
439 }
440
441 /*
442 * return root vnode for given vfs
443 */
444 static int
sdev_root(struct vfs * vfsp,struct vnode ** vpp)445 sdev_root(struct vfs *vfsp, struct vnode **vpp)
446 {
447 *vpp = SDEVTOV(VFSTOSDEVFS(vfsp)->sdev_root);
448 VN_HOLD(*vpp);
449 return (0);
450 }
451
452 /*
453 * return 'generic superblock' information to userland.
454 *
455 * not much that we can usefully admit to here
456 */
457 static int
sdev_statvfs(struct vfs * vfsp,struct statvfs64 * sbp)458 sdev_statvfs(struct vfs *vfsp, struct statvfs64 *sbp)
459 {
460 dev32_t d32;
461
462 bzero(sbp, sizeof (*sbp));
463 sbp->f_frsize = sbp->f_bsize = vfsp->vfs_bsize;
464 sbp->f_files = kmem_cache_stat(sdev_node_cache, "alloc");
465
466 /* no illusions that free/avail files is relevant to dev */
467 sbp->f_ffree = 0;
468 sbp->f_favail = 0;
469
470 /* no illusions that blocks are relevant to devfs */
471 sbp->f_bfree = 0;
472 sbp->f_bavail = 0;
473 sbp->f_blocks = 0;
474
475 (void) cmpldev(&d32, vfsp->vfs_dev);
476 sbp->f_fsid = d32;
477 (void) strcpy(sbp->f_basetype, vfssw[devtype].vsw_name);
478 sbp->f_flag = vf_to_stf(vfsp->vfs_flag);
479 sbp->f_namemax = MAXNAMELEN - 1;
480 (void) strcpy(sbp->f_fstr, "dev");
481
482 return (0);
483 }
484
485 static void
sdev_insert_mntinfo(struct sdev_data * data)486 sdev_insert_mntinfo(struct sdev_data *data)
487 {
488 ASSERT(mutex_owned(&sdev_lock));
489 data->sdev_next = sdev_mntinfo;
490 data->sdev_prev = NULL;
491 if (sdev_mntinfo) {
492 sdev_mntinfo->sdev_prev = data;
493 } else {
494 sdev_origins = data;
495 }
496 sdev_mntinfo = data;
497 }
498
499 struct sdev_data *
sdev_find_mntinfo(char * mntpt)500 sdev_find_mntinfo(char *mntpt)
501 {
502 struct sdev_data *mntinfo;
503
504 mutex_enter(&sdev_lock);
505 mntinfo = sdev_mntinfo;
506 while (mntinfo) {
507 if (strcmp(mntpt, mntinfo->sdev_root->sdev_name) == 0) {
508 SDEVTOV(mntinfo->sdev_root)->v_count++;
509 break;
510 }
511 mntinfo = mntinfo->sdev_next;
512 }
513 mutex_exit(&sdev_lock);
514 return (mntinfo);
515 }
516
517 void
sdev_mntinfo_rele(struct sdev_data * mntinfo)518 sdev_mntinfo_rele(struct sdev_data *mntinfo)
519 {
520 mutex_enter(&sdev_lock);
521 SDEVTOV(mntinfo->sdev_root)->v_count--;
522 mutex_exit(&sdev_lock);
523 }
524