1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2019 Joyent, Inc.
14 */
15
16 /*
17 * Dynamic directory plugin interface for sdev.
18 *
19 * The sdev plugin interfaces provides a means for a dynamic directory based on
20 * in-kernel state to be simply created. Traditionally, dynamic directories were
21 * built into sdev itself. While these legacy plugins are useful, it makes more
22 * sense for these pieces of functionality to live with the individual drivers.
23 *
24 * The plugin interface requires folks to implement three interfaces and
25 * provides a series of callbacks that can be made in the context of those
26 * interfaces to interrogate the sdev_node_t without having to leak
27 * implementation details of the sdev_node_t. These interfaces are:
28 *
29 * o spo_validate
30 *
31 * Given a particular node, answer the question as to whether or not this
32 * entry is still valid. Here, plugins should use the name and the dev_t
33 * associated with the node to verify that it matches something that still
34 * exists.
35 *
36 * o spo_filldir
37 *
38 * Fill all the entries inside of a directory. Note that some of these entries
39 * may already exist.
40 *
41 * o spo_inactive
42 *
43 * The given node is no longer being used. This allows the consumer to
44 * potentially tear down anything that was being held open related to this.
45 * Note that this only fires when the given sdev_node_t becomes a zombie.
46 *
47 * During these callbacks a consumer is not allowed to register or unregister a
48 * plugin, especially their own. They may call the sdev_ctx style functions. All
49 * callbacks fire in a context where blocking is allowed (eg. the spl is below
50 * LOCK_LEVEL).
51 *
52 * When a plugin is added, we create its directory in the global zone. By doing
53 * that, we ensure that something isn't already there and that nothing else can
54 * come along and try and create something without our knowledge. We only have
55 * to create it in the GZ and not for all other instances of sdev because an
56 * instance of sdev that isn't at /dev does not have dynamic directories, and
57 * second, any instance of sdev present in a non-global zone cannot create
58 * anything, therefore we know that by it not being in the global zone's
59 * instance of sdev that we're good to go.
60 *
61 * Lock Ordering
62 * -------------
63 *
64 * The global sdev_plugin_lock must be held before any of the individual
65 * sdev_plugin_t`sp_lock. Further, once any plugin related lock has been held,
66 * it is not legal to take any holds on any sdev_node_t or to grab the
67 * sdev_node_t`contents_lock in any way.
68 */
69
70 #include <sys/types.h>
71 #include <sys/stat.h>
72 #include <sys/fs/sdev_impl.h>
73 #include <sys/fs/sdev_plugin.h>
74 #include <fs/fs_subr.h>
75 #include <sys/ddi.h>
76 #include <sys/sunddi.h>
77 #include <sys/ksynch.h>
78 #include <sys/sysmacros.h>
79 #include <sys/list.h>
80 #include <sys/ctype.h>
81
82 kmutex_t sdev_plugin_lock;
83 list_t sdev_plugin_list;
84 kmem_cache_t *sdev_plugin_cache;
85 struct vnodeops *sdev_plugin_vnops;
86
87 #define SDEV_PLUGIN_NAMELEN 64
88
89 typedef struct sdev_plugin {
90 list_node_t sp_link;
91 char sp_name[SDEV_PLUGIN_NAMELEN]; /* E */
92 int sp_nflags; /* E */
93 struct vnodeops *sp_vnops; /* E */
94 sdev_plugin_ops_t *sp_pops; /* E */
95 boolean_t sp_islegacy; /* E */
96 int (*sp_lvtor)(sdev_node_t *); /* E */
97 kmutex_t sp_lock; /* Protects everything below */
98 kcondvar_t sp_nodecv;
99 size_t sp_nnodes;
100 } sdev_plugin_t;
101
102 /* ARGSUSED */
103 static int
sdev_plugin_cache_constructor(void * buf,void * arg,int tags)104 sdev_plugin_cache_constructor(void *buf, void *arg, int tags)
105 {
106 sdev_plugin_t *spp = buf;
107 mutex_init(&spp->sp_lock, NULL, MUTEX_DRIVER, 0);
108 cv_init(&spp->sp_nodecv, NULL, CV_DRIVER, NULL);
109 return (0);
110 }
111
112 /* ARGSUSED */
113 static void
sdev_plugin_cache_destructor(void * buf,void * arg)114 sdev_plugin_cache_destructor(void *buf, void *arg)
115 {
116 sdev_plugin_t *spp = buf;
117 cv_destroy(&spp->sp_nodecv);
118 mutex_destroy(&spp->sp_lock);
119 }
120
121 enum vtype
sdev_ctx_vtype(sdev_ctx_t ctx)122 sdev_ctx_vtype(sdev_ctx_t ctx)
123 {
124 sdev_node_t *sdp = (sdev_node_t *)ctx;
125
126 ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
127 return (sdp->sdev_vnode->v_type);
128 }
129
130 const char *
sdev_ctx_path(sdev_ctx_t ctx)131 sdev_ctx_path(sdev_ctx_t ctx)
132 {
133 sdev_node_t *sdp = (sdev_node_t *)ctx;
134
135 ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
136 return (sdp->sdev_path);
137 }
138
139 const char *
sdev_ctx_name(sdev_ctx_t ctx)140 sdev_ctx_name(sdev_ctx_t ctx)
141 {
142 sdev_node_t *sdp = (sdev_node_t *)ctx;
143
144 ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
145 return (sdp->sdev_name);
146 }
147
148 int
sdev_ctx_minor(sdev_ctx_t ctx,minor_t * minorp)149 sdev_ctx_minor(sdev_ctx_t ctx, minor_t *minorp)
150 {
151 sdev_node_t *sdp = (sdev_node_t *)ctx;
152
153 ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
154 ASSERT(minorp != NULL);
155 if (sdp->sdev_vnode->v_type == VCHR ||
156 sdp->sdev_vnode->v_type == VBLK) {
157 *minorp = getminor(sdp->sdev_vnode->v_rdev);
158 return (0);
159 }
160
161 return (ENODEV);
162 }
163
164 /*
165 * Currently we only support psasing through a single flag -- SDEV_IS_GLOBAL.
166 */
167 sdev_ctx_flags_t
sdev_ctx_flags(sdev_ctx_t ctx)168 sdev_ctx_flags(sdev_ctx_t ctx)
169 {
170 sdev_node_t *sdp = (sdev_node_t *)ctx;
171
172 ASSERT(RW_LOCK_HELD(&sdp->sdev_contents));
173 return (sdp->sdev_flags & SDEV_GLOBAL);
174 }
175
176 /*
177 * Use the same rules as zones for a name. isalphanum + '-', '_', and '.'.
178 */
179 static int
sdev_plugin_name_isvalid(const char * c,int buflen)180 sdev_plugin_name_isvalid(const char *c, int buflen)
181 {
182 int i;
183
184 for (i = 0; i < buflen; i++, c++) {
185 if (*c == '\0')
186 return (1);
187
188 if (!isalnum(*c) && *c != '-' && *c != '_' && *c != '.')
189 return (0);
190 }
191 /* Never found a null terminator */
192 return (0);
193 }
194
195 static int
sdev_plugin_mknode(sdev_plugin_t * spp,sdev_node_t * sdvp,char * name,vattr_t * vap)196 sdev_plugin_mknode(sdev_plugin_t *spp, sdev_node_t *sdvp, char *name,
197 vattr_t *vap)
198 {
199 int ret;
200 sdev_node_t *svp;
201
202 ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
203 ASSERT(spp != NULL);
204 svp = sdev_cache_lookup(sdvp, name);
205 if (svp != NULL) {
206 SDEV_SIMPLE_RELE(svp);
207 return (EEXIST);
208 }
209
210 ret = sdev_mknode(sdvp, name, &svp, vap, NULL, NULL, kcred,
211 SDEV_READY);
212 if (ret != 0)
213 return (ret);
214 SDEV_SIMPLE_RELE(svp);
215
216 return (0);
217 }
218
219 /*
220 * Plugin node creation callbacks
221 */
222 int
sdev_plugin_mkdir(sdev_ctx_t ctx,char * name)223 sdev_plugin_mkdir(sdev_ctx_t ctx, char *name)
224 {
225 sdev_node_t *sdvp;
226 timestruc_t now;
227 struct vattr vap;
228
229 if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0)
230 return (EINVAL);
231
232 sdvp = (sdev_node_t *)ctx;
233 ASSERT(sdvp->sdev_private != NULL);
234 ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
235
236 vap = *sdev_getdefault_attr(VDIR);
237 gethrestime(&now);
238 vap.va_atime = now;
239 vap.va_mtime = now;
240 vap.va_ctime = now;
241
242 return (sdev_plugin_mknode(sdvp->sdev_private, sdvp, name, &vap));
243 }
244
245 int
sdev_plugin_mknod(sdev_ctx_t ctx,char * name,mode_t mode,dev_t dev)246 sdev_plugin_mknod(sdev_ctx_t ctx, char *name, mode_t mode, dev_t dev)
247 {
248 sdev_node_t *sdvp;
249 timestruc_t now;
250 struct vattr vap;
251 mode_t type = mode & S_IFMT;
252 mode_t access = mode & S_IAMB;
253
254 if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0)
255 return (EINVAL);
256
257 sdvp = (sdev_node_t *)ctx;
258 ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
259
260 /*
261 * Ensure only type and user/group/other permission bits are present.
262 * Do not allow setuid, setgid, etc.
263 */
264 if ((mode & ~(S_IFMT | S_IAMB)) != 0)
265 return (EINVAL);
266
267 /* Disallow types other than character and block devices */
268 if (type != S_IFCHR && type != S_IFBLK)
269 return (EINVAL);
270
271 /* Disallow execute bits */
272 if ((access & (S_IXUSR | S_IXGRP | S_IXOTH)) != 0)
273 return (EINVAL);
274
275 /* No bits other than 0666 in access */
276 ASSERT((access &
277 ~(S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH)) == 0);
278
279 /* Default to relatively safe access bits if none specified. */
280 if (access == 0)
281 access = 0600;
282
283 ASSERT(sdvp->sdev_private != NULL);
284
285 vap = *sdev_getdefault_attr(type == S_IFCHR ? VCHR : VBLK);
286 gethrestime(&now);
287 vap.va_atime = now;
288 vap.va_mtime = now;
289 vap.va_ctime = now;
290 vap.va_rdev = dev;
291 vap.va_mode = type | access;
292
293 /* Despite the similar name, this is in fact a different function */
294 return (sdev_plugin_mknode(sdvp->sdev_private, sdvp, name, &vap));
295 }
296
297 static int
sdev_plugin_validate(sdev_node_t * sdp)298 sdev_plugin_validate(sdev_node_t *sdp)
299 {
300 int ret;
301 sdev_plugin_t *spp;
302
303 ASSERT(sdp->sdev_private != NULL);
304 spp = sdp->sdev_private;
305 ASSERT(spp->sp_islegacy == B_FALSE);
306 ASSERT(spp->sp_pops != NULL);
307 rw_enter(&sdp->sdev_contents, RW_READER);
308 ret = spp->sp_pops->spo_validate((uintptr_t)sdp);
309 rw_exit(&sdp->sdev_contents);
310 return (ret);
311 }
312
313 static void
sdev_plugin_validate_dir(sdev_node_t * sdvp)314 sdev_plugin_validate_dir(sdev_node_t *sdvp)
315 {
316 int ret;
317 sdev_node_t *svp, *next;
318
319 ASSERT(RW_WRITE_HELD(&sdvp->sdev_contents));
320
321 for (svp = SDEV_FIRST_ENTRY(sdvp); svp != NULL; svp = next) {
322
323 next = SDEV_NEXT_ENTRY(sdvp, svp);
324 ASSERT(svp->sdev_state != SDEV_ZOMBIE);
325 /* skip nodes that aren't ready */
326 if (svp->sdev_state == SDEV_INIT)
327 continue;
328
329 switch (sdev_plugin_validate(svp)) {
330 case SDEV_VTOR_VALID:
331 case SDEV_VTOR_SKIP:
332 continue;
333 case SDEV_VTOR_INVALID:
334 case SDEV_VTOR_STALE:
335 break;
336 }
337
338 SDEV_HOLD(svp);
339
340 /*
341 * Clean out everything underneath this node before we
342 * remove it.
343 */
344 if (svp->sdev_vnode->v_type == VDIR) {
345 ret = sdev_cleandir(svp, NULL, 0);
346 ASSERT(ret == 0);
347 }
348 /* remove the cache node */
349 (void) sdev_cache_update(sdvp, &svp, svp->sdev_name,
350 SDEV_CACHE_DELETE);
351 SDEV_RELE(svp);
352 }
353 }
354
355 /* ARGSUSED */
356 static int
sdev_plugin_vop_readdir(struct vnode * dvp,struct uio * uiop,struct cred * cred,int * eofp,caller_context_t * ct_unused,int flags_unused)357 sdev_plugin_vop_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
358 int *eofp, caller_context_t *ct_unused, int flags_unused)
359 {
360 int ret;
361 sdev_node_t *sdvp = VTOSDEV(dvp);
362 sdev_plugin_t *spp;
363
364 ASSERT(RW_READ_HELD(&sdvp->sdev_contents));
365
366 /* Sanity check we're not a zombie before we do anyting else */
367 if (sdvp->sdev_state == SDEV_ZOMBIE)
368 return (ENOENT);
369
370 spp = sdvp->sdev_private;
371 ASSERT(spp != NULL);
372 ASSERT(spp->sp_islegacy == B_FALSE);
373 ASSERT(spp->sp_pops != NULL);
374
375 if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
376 return (EPERM);
377
378 if (uiop->uio_offset == 0) {
379 /*
380 * We upgrade to a write lock and grab the plugin's lock along
381 * the way. We're almost certainly going to get creation
382 * callbacks, so this is the only safe way to go.
383 */
384 if (rw_tryupgrade(&sdvp->sdev_contents) == 0) {
385 rw_exit(&sdvp->sdev_contents);
386 rw_enter(&sdvp->sdev_contents, RW_WRITER);
387 if (sdvp->sdev_state == SDEV_ZOMBIE) {
388 rw_downgrade(&sdvp->sdev_contents);
389 return (ENOENT);
390 }
391 }
392
393 sdev_plugin_validate_dir(sdvp);
394 ret = spp->sp_pops->spo_filldir((uintptr_t)sdvp);
395 rw_downgrade(&sdvp->sdev_contents);
396 if (ret != 0)
397 return (ret);
398 }
399
400 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
401 }
402
403 /*
404 * If we don't have a callback function that returns a failure, then sdev will
405 * try to create a node for us which violates all of our basic assertions. To
406 * work around that we create our own callback for devname_lookup_func which
407 * always returns ENOENT as at this point either it was created with the filldir
408 * callback or it was not.
409 */
410 /*ARGSUSED*/
411 static int
sdev_plugin_vop_lookup_cb(sdev_node_t * ddv,char * nm,void ** arg,cred_t * cred,void * unused,char * unused2)412 sdev_plugin_vop_lookup_cb(sdev_node_t *ddv, char *nm, void **arg, cred_t *cred,
413 void *unused, char *unused2)
414 {
415 return (ENOENT);
416 }
417
418 /* ARGSUSED */
419 static int
sdev_plugin_vop_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int flags,struct vnode * rdir,struct cred * cred,caller_context_t * ct,int * direntflags,pathname_t * realpnp)420 sdev_plugin_vop_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
421 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
422 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
423 {
424 int ret;
425 sdev_node_t *sdvp;
426 sdev_plugin_t *spp;
427
428 /* execute access is required to search the directory */
429 if ((ret = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
430 return (ret);
431
432 sdvp = VTOSDEV(dvp);
433 spp = sdvp->sdev_private;
434 ASSERT(spp != NULL);
435 ASSERT(spp->sp_islegacy == B_FALSE);
436 ASSERT(spp->sp_pops != NULL);
437
438 if (crgetzoneid(cred) == GLOBAL_ZONEID && !SDEV_IS_GLOBAL(sdvp))
439 return (EPERM);
440
441 /*
442 * Go straight for the write lock.
443 */
444 rw_enter(&sdvp->sdev_contents, RW_WRITER);
445 if (sdvp->sdev_state == SDEV_ZOMBIE) {
446 rw_exit(&sdvp->sdev_contents);
447 return (ENOENT);
448 }
449 sdev_plugin_validate_dir(sdvp);
450 ret = spp->sp_pops->spo_filldir((uintptr_t)sdvp);
451 rw_exit(&sdvp->sdev_contents);
452 if (ret != 0)
453 return (ret);
454
455 return (devname_lookup_func(sdvp, nm, vpp, cred,
456 sdev_plugin_vop_lookup_cb, SDEV_VATTR));
457 }
458
459 /*
460 * sdev is not a good citizen. We get inactive callbacks whenever a vnode goes
461 * to zero, but isn't necessairily a zombie yet. As such, to make things easier
462 * for users, we only fire the inactive callback when the node becomes a zombie
463 * and thus will be torn down here.
464 */
465 static void
sdev_plugin_vop_inactive_cb(struct vnode * dvp)466 sdev_plugin_vop_inactive_cb(struct vnode *dvp)
467 {
468 sdev_node_t *sdp = VTOSDEV(dvp);
469 sdev_plugin_t *spp = sdp->sdev_private;
470
471 rw_enter(&sdp->sdev_contents, RW_READER);
472 if (sdp->sdev_state != SDEV_ZOMBIE) {
473 rw_exit(&sdp->sdev_contents);
474 return;
475 }
476 spp->sp_pops->spo_inactive((uintptr_t)sdp);
477 mutex_enter(&spp->sp_lock);
478 VERIFY(spp->sp_nnodes > 0);
479 spp->sp_nnodes--;
480 cv_signal(&spp->sp_nodecv);
481 mutex_exit(&spp->sp_lock);
482 rw_exit(&sdp->sdev_contents);
483 }
484
485 /*ARGSUSED*/
486 static void
sdev_plugin_vop_inactive(struct vnode * dvp,struct cred * cred,caller_context_t * ct)487 sdev_plugin_vop_inactive(struct vnode *dvp, struct cred *cred,
488 caller_context_t *ct)
489 {
490 sdev_node_t *sdp = VTOSDEV(dvp);
491 sdev_plugin_t *spp = sdp->sdev_private;
492 ASSERT(sdp->sdev_private != NULL);
493 ASSERT(spp->sp_islegacy == B_FALSE);
494 devname_inactive_func(dvp, cred, sdev_plugin_vop_inactive_cb);
495 }
496
497 const fs_operation_def_t sdev_plugin_vnodeops_tbl[] = {
498 VOPNAME_READDIR, { .vop_readdir = sdev_plugin_vop_readdir },
499 VOPNAME_LOOKUP, { .vop_lookup = sdev_plugin_vop_lookup },
500 VOPNAME_INACTIVE, { .vop_inactive = sdev_plugin_vop_inactive },
501 VOPNAME_CREATE, { .error = fs_nosys },
502 VOPNAME_REMOVE, { .error = fs_nosys },
503 VOPNAME_MKDIR, { .error = fs_nosys },
504 VOPNAME_RMDIR, { .error = fs_nosys },
505 VOPNAME_SYMLINK, { .error = fs_nosys },
506 VOPNAME_SETSECATTR, { .error = fs_nosys },
507 NULL, NULL
508 };
509
510 /*
511 * construct a new template with overrides from vtab
512 */
513 static fs_operation_def_t *
sdev_merge_vtab(const fs_operation_def_t tab[])514 sdev_merge_vtab(const fs_operation_def_t tab[])
515 {
516 fs_operation_def_t *new;
517 const fs_operation_def_t *tab_entry;
518
519 /* make a copy of standard vnode ops table */
520 new = kmem_alloc(sdev_vnodeops_tbl_size, KM_SLEEP);
521 bcopy((void *)sdev_vnodeops_tbl, new, sdev_vnodeops_tbl_size);
522
523 /* replace the overrides from tab */
524 for (tab_entry = tab; tab_entry->name != NULL; tab_entry++) {
525 fs_operation_def_t *std_entry = new;
526 while (std_entry->name) {
527 if (strcmp(tab_entry->name, std_entry->name) == 0) {
528 std_entry->func = tab_entry->func;
529 break;
530 }
531 std_entry++;
532 }
533 }
534
535 return (new);
536 }
537
538 /* free memory allocated by sdev_merge_vtab */
539 static void
sdev_free_vtab(fs_operation_def_t * new)540 sdev_free_vtab(fs_operation_def_t *new)
541 {
542 kmem_free(new, sdev_vnodeops_tbl_size);
543 }
544
545 /*
546 * Register a new plugin.
547 */
548 sdev_plugin_hdl_t
sdev_plugin_register(const char * name,sdev_plugin_ops_t * ops,int * errp)549 sdev_plugin_register(const char *name, sdev_plugin_ops_t *ops, int *errp)
550 {
551 char buf[sizeof ("dev")] = "";
552 struct pathname pn = { 0 };
553 sdev_plugin_t *spp, *iter;
554 vnode_t *vp, *nvp;
555 sdev_node_t *sdp, *slp;
556 timestruc_t now;
557 struct vattr vap;
558 int ret, err;
559
560 /*
561 * Some consumers don't care about why they failed. To keep the code
562 * simple, we'll just pretend they gave us something.
563 */
564 if (errp == NULL)
565 errp = &err;
566
567 if (sdev_plugin_name_isvalid(name, SDEV_PLUGIN_NAMELEN) == 0) {
568 *errp = EINVAL;
569 return ((sdev_plugin_hdl_t)NULL);
570 }
571
572 if (ops->spo_version != 1) {
573 *errp = EINVAL;
574 return ((sdev_plugin_hdl_t)NULL);
575 }
576
577 if (ops->spo_validate == NULL || ops->spo_filldir == NULL ||
578 ops->spo_inactive == NULL) {
579 *errp = EINVAL;
580 return ((sdev_plugin_hdl_t)NULL);
581 }
582
583 if ((ops->spo_flags & ~SDEV_PLUGIN_FLAGS_MASK) != 0) {
584 *errp = EINVAL;
585 return ((sdev_plugin_hdl_t)NULL);
586 }
587
588 spp = kmem_cache_alloc(sdev_plugin_cache, KM_SLEEP);
589 (void) strlcpy(spp->sp_name, name, SDEV_PLUGIN_NAMELEN);
590
591 spp->sp_pops = ops;
592 spp->sp_nflags = SDEV_DYNAMIC | SDEV_VTOR;
593 if (ops->spo_flags & SDEV_PLUGIN_NO_NCACHE)
594 spp->sp_nflags |= SDEV_NO_NCACHE;
595 if (ops->spo_flags & SDEV_PLUGIN_SUBDIR)
596 spp->sp_nflags |= SDEV_SUBDIR;
597 spp->sp_vnops = sdev_plugin_vnops;
598 spp->sp_islegacy = B_FALSE;
599 spp->sp_lvtor = NULL;
600 spp->sp_nnodes = 0;
601
602 /*
603 * Make sure our /dev entry is unique and install it. We also need to
604 * go through and grab the sdev root node as we cannot grab any sdev
605 * node locks once we've grabbed the sdev_plugin_lock. We effectively
606 * assert that if a directory is not present in the GZ's /dev, then it
607 * doesn't exist in any of the local zones.
608 *
609 * Note that we may be in NGZ context: during a prof_filldir(".../dev/")
610 * enumeration, for example. So we have to dig as deep as lookuppnvp()
611 * to make sure we really get to the global /dev (i.e. escape both
612 * CRED() and ->u_rdir).
613 */
614 (void) pn_get_buf("dev", UIO_SYSSPACE, &pn, buf, sizeof (buf));
615 VN_HOLD(rootdir);
616 ret = lookuppnvp(&pn, NULL, NO_FOLLOW, NULLVPP,
617 &vp, rootdir, rootdir, kcred);
618
619 if (ret != 0) {
620 *errp = ret;
621 kmem_cache_free(sdev_plugin_cache, spp);
622 return ((sdev_plugin_hdl_t)NULL);
623 }
624 /* Make sure we have the real vnode */
625 if (VOP_REALVP(vp, &nvp, NULL) == 0) {
626 VN_HOLD(nvp);
627 VN_RELE(vp);
628 vp = nvp;
629 nvp = NULL;
630 }
631 VERIFY(vp->v_op == sdev_vnodeops);
632 sdp = VTOSDEV(vp);
633 rw_enter(&sdp->sdev_contents, RW_WRITER);
634 slp = sdev_cache_lookup(sdp, spp->sp_name);
635 if (slp != NULL) {
636 SDEV_RELE(slp);
637 rw_exit(&sdp->sdev_contents);
638 VN_RELE(vp);
639 *errp = EEXIST;
640 kmem_cache_free(sdev_plugin_cache, spp);
641 return ((sdev_plugin_hdl_t)NULL);
642 }
643
644 mutex_enter(&sdev_plugin_lock);
645 for (iter = list_head(&sdev_plugin_list); iter != NULL;
646 iter = list_next(&sdev_plugin_list, iter)) {
647 if (strcmp(spp->sp_name, iter->sp_name) == 0) {
648 mutex_exit(&sdev_plugin_lock);
649 rw_exit(&sdp->sdev_contents);
650 VN_RELE(vp);
651 *errp = EEXIST;
652 kmem_cache_free(sdev_plugin_cache, spp);
653 return ((sdev_plugin_hdl_t)NULL);
654 }
655 }
656
657 list_insert_tail(&sdev_plugin_list, spp);
658 mutex_exit(&sdev_plugin_lock);
659
660 /*
661 * Now go ahead and create the top level directory for the global zone.
662 */
663 vap = *sdev_getdefault_attr(VDIR);
664 gethrestime(&now);
665 vap.va_atime = now;
666 vap.va_mtime = now;
667 vap.va_ctime = now;
668
669 (void) sdev_plugin_mknode(spp, sdp, spp->sp_name, &vap);
670
671 rw_exit(&sdp->sdev_contents);
672 VN_RELE(vp);
673
674 *errp = 0;
675
676 return ((sdev_plugin_hdl_t)spp);
677 }
678
679 static void
sdev_plugin_unregister_cb(sdev_node_t * rdp,void * arg)680 sdev_plugin_unregister_cb(sdev_node_t *rdp, void *arg)
681 {
682 sdev_plugin_t *spp = arg;
683 sdev_node_t *sdp;
684
685 rw_enter(&rdp->sdev_contents, RW_WRITER);
686 sdp = sdev_cache_lookup(rdp, spp->sp_name);
687 /* If it doesn't exist, we're done here */
688 if (sdp == NULL) {
689 rw_exit(&rdp->sdev_contents);
690 return;
691 }
692
693 /*
694 * We first delete the directory before recursively marking everything
695 * else stale. This ordering should ensure that we don't accidentally
696 * miss anything.
697 */
698 sdev_cache_update(rdp, &sdp, spp->sp_name, SDEV_CACHE_DELETE);
699 sdev_stale(sdp);
700 SDEV_RELE(sdp);
701 rw_exit(&rdp->sdev_contents);
702 }
703
704 int sdev_plugin_unregister_allowed;
705
706 /*
707 * Remove a plugin. This will block until everything has become a zombie, thus
708 * guaranteeing the caller that nothing will call into them again once this call
709 * returns. While the call is ongoing, it could be called into. Note that while
710 * this is ongoing, it will block other mounts.
711 *
712 * NB: this is not safe when used from detach() context - we will be DEVI_BUSY,
713 * and other sdev threads may be waiting for this. Only use the over-ride if
714 * willing to risk it.
715 */
716 int
sdev_plugin_unregister(sdev_plugin_hdl_t hdl)717 sdev_plugin_unregister(sdev_plugin_hdl_t hdl)
718 {
719 sdev_plugin_t *spp = (sdev_plugin_t *)hdl;
720 if (spp->sp_islegacy)
721 return (EINVAL);
722
723 if (!sdev_plugin_unregister_allowed)
724 return (EBUSY);
725
726 mutex_enter(&sdev_plugin_lock);
727 list_remove(&sdev_plugin_list, spp);
728 mutex_exit(&sdev_plugin_lock);
729
730 sdev_mnt_walk(sdev_plugin_unregister_cb, spp);
731 mutex_enter(&spp->sp_lock);
732 while (spp->sp_nnodes > 0)
733 cv_wait(&spp->sp_nodecv, &spp->sp_lock);
734 mutex_exit(&spp->sp_lock);
735 kmem_cache_free(sdev_plugin_cache, spp);
736 return (0);
737 }
738
739 /*
740 * Register an old sdev style plugin to deal with what used to be in the vtab.
741 */
742 static int
sdev_plugin_register_legacy(struct sdev_vop_table * vtp)743 sdev_plugin_register_legacy(struct sdev_vop_table *vtp)
744 {
745 sdev_plugin_t *spp;
746
747 spp = kmem_cache_alloc(sdev_plugin_cache, KM_SLEEP);
748 (void) strlcpy(spp->sp_name, vtp->vt_name, SDEV_PLUGIN_NAMELEN);
749 spp->sp_islegacy = B_TRUE;
750 spp->sp_pops = NULL;
751 spp->sp_nflags = vtp->vt_flags;
752 spp->sp_lvtor = vtp->vt_vtor;
753 spp->sp_nnodes = 0;
754
755 if (vtp->vt_service != NULL) {
756 fs_operation_def_t *templ;
757 templ = sdev_merge_vtab(vtp->vt_service);
758 if (vn_make_ops(vtp->vt_name,
759 (const fs_operation_def_t *)templ,
760 &spp->sp_vnops) != 0) {
761 cmn_err(CE_WARN, "%s: malformed vnode ops\n",
762 vtp->vt_name);
763 sdev_free_vtab(templ);
764 kmem_cache_free(sdev_plugin_cache, spp);
765 return (1);
766 }
767
768 if (vtp->vt_global_vops) {
769 *(vtp->vt_global_vops) = spp->sp_vnops;
770 }
771
772 sdev_free_vtab(templ);
773 } else {
774 spp->sp_vnops = sdev_vnodeops;
775 }
776
777 /*
778 * No need to check for EEXIST here. These are loaded as a part of the
779 * sdev's initialization function. Further, we don't have to create them
780 * as that's taken care of in sdev's mount for the GZ.
781 */
782 mutex_enter(&sdev_plugin_lock);
783 list_insert_tail(&sdev_plugin_list, spp);
784 mutex_exit(&sdev_plugin_lock);
785
786 return (0);
787 }
788
789 /*
790 * We need to match off of the sdev_path, not the sdev_name. We are only allowed
791 * to exist directly under /dev.
792 */
793 static sdev_plugin_t *
sdev_match(sdev_node_t * dv)794 sdev_match(sdev_node_t *dv)
795 {
796 int vlen;
797 const char *path;
798 sdev_plugin_t *spp;
799
800 if (strlen(dv->sdev_path) <= 5)
801 return (NULL);
802
803 if (strncmp(dv->sdev_path, "/dev/", 5) != 0)
804 return (NULL);
805 path = dv->sdev_path + 5;
806
807 mutex_enter(&sdev_plugin_lock);
808
809 for (spp = list_head(&sdev_plugin_list); spp != NULL;
810 spp = list_next(&sdev_plugin_list, spp)) {
811 if (strcmp(spp->sp_name, path) == 0) {
812 mutex_exit(&sdev_plugin_lock);
813 return (spp);
814 }
815
816 if (spp->sp_nflags & SDEV_SUBDIR) {
817 vlen = strlen(spp->sp_name);
818 if ((strncmp(spp->sp_name, path,
819 vlen - 1) == 0) && path[vlen] == '/') {
820 mutex_exit(&sdev_plugin_lock);
821 return (spp);
822 }
823
824 }
825 }
826
827 mutex_exit(&sdev_plugin_lock);
828 return (NULL);
829 }
830
831 void
sdev_set_no_negcache(sdev_node_t * dv)832 sdev_set_no_negcache(sdev_node_t *dv)
833 {
834 char *path;
835 sdev_plugin_t *spp;
836
837 ASSERT(dv->sdev_path);
838 path = dv->sdev_path + strlen("/dev/");
839
840 mutex_enter(&sdev_plugin_lock);
841 for (spp = list_head(&sdev_plugin_list); spp != NULL;
842 spp = list_next(&sdev_plugin_list, spp)) {
843 if (strcmp(spp->sp_name, path) == 0) {
844 if (spp->sp_nflags & SDEV_NO_NCACHE)
845 dv->sdev_flags |= SDEV_NO_NCACHE;
846 break;
847 }
848 }
849 mutex_exit(&sdev_plugin_lock);
850 }
851
852 struct vnodeops *
sdev_get_vop(sdev_node_t * dv)853 sdev_get_vop(sdev_node_t *dv)
854 {
855 char *path;
856 sdev_plugin_t *spp;
857
858 path = dv->sdev_path;
859 ASSERT(path);
860
861 /* gets the relative path to /dev/ */
862 path += 5;
863
864 if ((spp = sdev_match(dv)) != NULL) {
865 dv->sdev_flags |= spp->sp_nflags;
866 if (SDEV_IS_PERSIST(dv->sdev_dotdot) &&
867 (SDEV_IS_PERSIST(dv) || !SDEV_IS_DYNAMIC(dv)))
868 dv->sdev_flags |= SDEV_PERSIST;
869 return (spp->sp_vnops);
870 }
871
872 /* child inherits the persistence of the parent */
873 if (SDEV_IS_PERSIST(dv->sdev_dotdot))
874 dv->sdev_flags |= SDEV_PERSIST;
875 return (sdev_vnodeops);
876 }
877
878 void *
sdev_get_vtor(sdev_node_t * dv)879 sdev_get_vtor(sdev_node_t *dv)
880 {
881 sdev_plugin_t *spp;
882
883 if (dv->sdev_private == NULL) {
884 spp = sdev_match(dv);
885 if (spp == NULL)
886 return (NULL);
887 } else {
888 spp = dv->sdev_private;
889 }
890
891 if (spp->sp_islegacy)
892 return ((void *)spp->sp_lvtor);
893 else
894 return ((void *)sdev_plugin_validate);
895 }
896
897 void
sdev_plugin_nodeready(sdev_node_t * sdp)898 sdev_plugin_nodeready(sdev_node_t *sdp)
899 {
900 sdev_plugin_t *spp;
901
902 ASSERT(RW_WRITE_HELD(&sdp->sdev_contents));
903 ASSERT(sdp->sdev_private == NULL);
904
905 spp = sdev_match(sdp);
906 if (spp == NULL)
907 return;
908 if (spp->sp_islegacy)
909 return;
910 sdp->sdev_private = spp;
911 mutex_enter(&spp->sp_lock);
912 spp->sp_nnodes++;
913 mutex_exit(&spp->sp_lock);
914 }
915
916 int
sdev_plugin_init(void)917 sdev_plugin_init(void)
918 {
919 sdev_vop_table_t *vtp;
920 fs_operation_def_t *templ;
921
922 sdev_plugin_cache = kmem_cache_create("sdev_plugin",
923 sizeof (sdev_plugin_t), 0, sdev_plugin_cache_constructor,
924 sdev_plugin_cache_destructor, NULL, NULL, NULL, 0);
925 if (sdev_plugin_cache == NULL)
926 return (1);
927 mutex_init(&sdev_plugin_lock, NULL, MUTEX_DRIVER, NULL);
928 list_create(&sdev_plugin_list, sizeof (sdev_plugin_t),
929 offsetof(sdev_plugin_t, sp_link));
930
931 /*
932 * Register all of the legacy vnops
933 */
934 for (vtp = &vtab[0]; vtp->vt_name != NULL; vtp++)
935 if (sdev_plugin_register_legacy(vtp) != 0)
936 return (1);
937
938 templ = sdev_merge_vtab(sdev_plugin_vnodeops_tbl);
939 if (vn_make_ops("sdev_plugin",
940 (const fs_operation_def_t *)templ,
941 &sdev_plugin_vnops) != 0) {
942 sdev_free_vtab(templ);
943 return (1);
944 }
945
946 sdev_free_vtab(templ);
947 return (0);
948 }
949