1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 * Copyright 2013 Joyent, Inc. All rights reserved.
25 */
26
27 /* vnode ops for the /dev/zvol directory */
28
29 #include <sys/types.h>
30 #include <sys/param.h>
31 #include <sys/sysmacros.h>
32 #include <sys/ddi.h>
33 #include <sys/sunndi.h>
34 #include <sys/sunldi.h>
35 #include <fs/fs_subr.h>
36 #include <sys/fs/dv_node.h>
37 #include <sys/fs/sdev_impl.h>
38 #include <sys/zfs_ioctl.h>
39 #include <sys/policy.h>
40 #include <sys/stat.h>
41 #include <sys/vfs_opreg.h>
42
43 struct vnodeops *devzvol_vnodeops;
44 static uint64_t devzvol_gen = 0;
45 static uint64_t devzvol_zclist;
46 static size_t devzvol_zclist_size;
47 static ldi_ident_t devzvol_li;
48 static ldi_handle_t devzvol_lh;
49 static kmutex_t devzvol_mtx;
50 static boolean_t devzvol_isopen;
51 static major_t devzvol_major;
52
53 /*
54 * we need to use ddi_mod* since fs/dev gets loaded early on in
55 * startup(), and linking fs/dev to fs/zfs would drag in a lot of
56 * other stuff (like drv/random) before the rest of the system is
57 * ready to go
58 */
59 ddi_modhandle_t zfs_mod;
60 int (*szcm)(char *);
61 int (*szn2m)(char *, minor_t *);
62
63 int
sdev_zvol_create_minor(char * dsname)64 sdev_zvol_create_minor(char *dsname)
65 {
66 if (szcm == NULL)
67 return (-1);
68 return ((*szcm)(dsname));
69 }
70
71 int
sdev_zvol_name2minor(char * dsname,minor_t * minor)72 sdev_zvol_name2minor(char *dsname, minor_t *minor)
73 {
74 if (szn2m == NULL)
75 return (-1);
76 return ((*szn2m)(dsname, minor));
77 }
78
79 int
devzvol_open_zfs()80 devzvol_open_zfs()
81 {
82 int rc;
83 dev_t dv;
84
85 devzvol_li = ldi_ident_from_anon();
86 if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
87 &devzvol_lh, devzvol_li))
88 return (-1);
89 if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs",
90 KRTLD_MODE_FIRST, &rc)) == NULL)) {
91 return (rc);
92 }
93 ASSERT(szcm == NULL && szn2m == NULL);
94 if ((szcm = (int (*)(char *))
95 ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) {
96 cmn_err(CE_WARN, "couldn't resolve zvol_create_minor");
97 return (rc);
98 }
99 if ((szn2m = (int(*)(char *, minor_t *))
100 ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
101 cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
102 return (rc);
103 }
104 if (ldi_get_dev(devzvol_lh, &dv))
105 return (-1);
106 devzvol_major = getmajor(dv);
107 return (0);
108 }
109
110 void
devzvol_close_zfs()111 devzvol_close_zfs()
112 {
113 szcm = NULL;
114 szn2m = NULL;
115 (void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred);
116 ldi_ident_release(devzvol_li);
117 if (zfs_mod != NULL) {
118 (void) ddi_modclose(zfs_mod);
119 zfs_mod = NULL;
120 }
121 }
122
123 int
devzvol_handle_ioctl(int cmd,zfs_cmd_t * zc,size_t * alloc_size)124 devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size)
125 {
126 uint64_t cookie;
127 int size = 8000;
128 int unused;
129 int rc;
130
131 if (cmd != ZFS_IOC_POOL_CONFIGS)
132 mutex_enter(&devzvol_mtx);
133 if (!devzvol_isopen) {
134 if ((rc = devzvol_open_zfs()) == 0) {
135 devzvol_isopen = B_TRUE;
136 } else {
137 if (cmd != ZFS_IOC_POOL_CONFIGS)
138 mutex_exit(&devzvol_mtx);
139 return (ENXIO);
140 }
141 }
142 cookie = zc->zc_cookie;
143 again:
144 zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
145 KM_SLEEP);
146 zc->zc_nvlist_dst_size = size;
147 rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred,
148 &unused);
149 if (rc == ENOMEM) {
150 int newsize;
151 newsize = zc->zc_nvlist_dst_size;
152 ASSERT(newsize > size);
153 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
154 size = newsize;
155 zc->zc_cookie = cookie;
156 goto again;
157 }
158 if (alloc_size == NULL)
159 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
160 else
161 *alloc_size = size;
162 if (cmd != ZFS_IOC_POOL_CONFIGS)
163 mutex_exit(&devzvol_mtx);
164 return (rc);
165 }
166
167 /* figures out if the objset exists and returns its type */
168 int
devzvol_objset_check(char * dsname,dmu_objset_type_t * type)169 devzvol_objset_check(char *dsname, dmu_objset_type_t *type)
170 {
171 boolean_t ispool;
172 zfs_cmd_t *zc;
173 int rc;
174
175 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
176 (void) strlcpy(zc->zc_name, dsname, MAXPATHLEN);
177
178 ispool = (strchr(dsname, '/') == NULL) ? B_TRUE : B_FALSE;
179 if (!ispool && sdev_zvol_name2minor(dsname, NULL) == 0) {
180 sdcmn_err13(("found cached minor node"));
181 if (type)
182 *type = DMU_OST_ZVOL;
183 kmem_free(zc, sizeof (zfs_cmd_t));
184 return (0);
185 }
186 rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS :
187 ZFS_IOC_OBJSET_STATS, zc, NULL);
188 if (type && rc == 0)
189 *type = (ispool) ? DMU_OST_ZFS :
190 zc->zc_objset_stats.dds_type;
191 kmem_free(zc, sizeof (zfs_cmd_t));
192 return (rc);
193 }
194
195 /*
196 * returns what the zfs dataset name should be, given the /dev/zvol
197 * path and an optional name; otherwise NULL
198 */
199 char *
devzvol_make_dsname(const char * path,const char * name)200 devzvol_make_dsname(const char *path, const char *name)
201 {
202 char *dsname;
203 const char *ptr;
204 int dslen;
205
206 if (strcmp(path, ZVOL_DIR) == 0)
207 return (NULL);
208 if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0))
209 return (NULL);
210 ptr = path + strlen(ZVOL_DIR);
211 if (strncmp(ptr, "/dsk", 4) == 0)
212 ptr += strlen("/dsk");
213 else if (strncmp(ptr, "/rdsk", 5) == 0)
214 ptr += strlen("/rdsk");
215 else
216 return (NULL);
217 if (*ptr == '/')
218 ptr++;
219
220 dslen = strlen(ptr);
221 if (dslen)
222 dslen++; /* plus null */
223 if (name)
224 dslen += strlen(name) + 1; /* plus slash */
225 dsname = kmem_zalloc(dslen, KM_SLEEP);
226 if (*ptr) {
227 (void) strlcpy(dsname, ptr, dslen);
228 if (name)
229 (void) strlcat(dsname, "/", dslen);
230 }
231 if (name)
232 (void) strlcat(dsname, name, dslen);
233 return (dsname);
234 }
235
236 /*
237 * check if the zvol's sdev_node is still valid, which means make
238 * sure the zvol is still valid. zvol minors aren't proactively
239 * destroyed when the zvol is destroyed, so we use a validator to clean
240 * these up (in other words, when such nodes are encountered during
241 * subsequent lookup() and readdir() operations) so that only valid
242 * nodes are returned. The ordering between devname_lookup_func and
243 * devzvol_validate is a little inefficient in the case of invalid
244 * or stale nodes because devname_lookup_func calls
245 * devzvol_create_{dir, link}, then the validator says it's invalid,
246 * and then the node gets cleaned up.
247 */
248 int
devzvol_validate(struct sdev_node * dv)249 devzvol_validate(struct sdev_node *dv)
250 {
251 dmu_objset_type_t do_type;
252 char *dsname;
253 char *nm = dv->sdev_name;
254 int rc;
255
256 sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm));
257 /*
258 * validate only READY nodes; if someone is sitting on the
259 * directory of a dataset that just got destroyed we could
260 * get a zombie node which we just skip.
261 */
262 if (dv->sdev_state != SDEV_READY) {
263 sdcmn_err13(("skipping '%s'", nm));
264 return (SDEV_VTOR_SKIP);
265 }
266
267 if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) ||
268 (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0))
269 return (SDEV_VTOR_VALID);
270 dsname = devzvol_make_dsname(dv->sdev_path, NULL);
271 if (dsname == NULL)
272 return (SDEV_VTOR_INVALID);
273
274 rc = devzvol_objset_check(dsname, &do_type);
275 sdcmn_err13((" '%s' rc %d", dsname, rc));
276 if (rc != 0) {
277 kmem_free(dsname, strlen(dsname) + 1);
278 return (SDEV_VTOR_INVALID);
279 }
280 sdcmn_err13((" v_type %d do_type %d",
281 SDEVTOV(dv)->v_type, do_type));
282 if ((SDEVTOV(dv)->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
283 ((SDEVTOV(dv)->v_type == VBLK || SDEVTOV(dv)->v_type == VCHR) &&
284 do_type != DMU_OST_ZVOL) ||
285 (SDEVTOV(dv)->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
286 kmem_free(dsname, strlen(dsname) + 1);
287 return (SDEV_VTOR_STALE);
288 }
289 if (SDEVTOV(dv)->v_type == VLNK) {
290 char *ptr, *link;
291 long val = 0;
292 minor_t lminor, ominor;
293
294 rc = sdev_getlink(SDEVTOV(dv), &link);
295 ASSERT(rc == 0);
296
297 ptr = strrchr(link, ':') + 1;
298 rc = ddi_strtol(ptr, NULL, 10, &val);
299 kmem_free(link, strlen(link) + 1);
300 ASSERT(rc == 0 && val != 0);
301 lminor = (minor_t)val;
302 if (sdev_zvol_name2minor(dsname, &ominor) < 0 ||
303 ominor != lminor) {
304 kmem_free(dsname, strlen(dsname) + 1);
305 return (SDEV_VTOR_STALE);
306 }
307 }
308 kmem_free(dsname, strlen(dsname) + 1);
309 return (SDEV_VTOR_VALID);
310 }
311
312 /*
313 * creates directories as needed in response to a readdir
314 */
315 void
devzvol_create_pool_dirs(struct vnode * dvp)316 devzvol_create_pool_dirs(struct vnode *dvp)
317 {
318 zfs_cmd_t *zc;
319 nvlist_t *nv = NULL;
320 nvpair_t *elem = NULL;
321 size_t size;
322 int pools = 0;
323 int rc;
324
325 sdcmn_err13(("devzvol_create_pool_dirs"));
326 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
327 mutex_enter(&devzvol_mtx);
328 zc->zc_cookie = devzvol_gen;
329
330 rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size);
331 switch (rc) {
332 case 0:
333 /* new generation */
334 ASSERT(devzvol_gen != zc->zc_cookie);
335 devzvol_gen = zc->zc_cookie;
336 if (devzvol_zclist)
337 kmem_free((void *)(uintptr_t)devzvol_zclist,
338 devzvol_zclist_size);
339 devzvol_zclist = zc->zc_nvlist_dst;
340 devzvol_zclist_size = size;
341 break;
342 case EEXIST:
343 /*
344 * no change in the configuration; still need
345 * to do lookups in case we did a lookup in
346 * zvol/rdsk but not zvol/dsk (or vice versa)
347 */
348 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
349 size);
350 break;
351 default:
352 kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
353 size);
354 goto out;
355 }
356 rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist,
357 devzvol_zclist_size, &nv, 0);
358 if (rc) {
359 ASSERT(rc == 0);
360 kmem_free((void *)(uintptr_t)devzvol_zclist,
361 devzvol_zclist_size);
362 devzvol_gen = 0;
363 devzvol_zclist = NULL;
364 devzvol_zclist_size = 0;
365 goto out;
366 }
367 mutex_exit(&devzvol_mtx);
368 while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
369 struct vnode *vp;
370 ASSERT(dvp->v_count > 0);
371 rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
372 NULL, kcred, NULL, 0, NULL);
373 /* should either work, or not be visible from a zone */
374 ASSERT(rc == 0 || rc == ENOENT);
375 if (rc == 0)
376 VN_RELE(vp);
377 pools++;
378 }
379 nvlist_free(nv);
380 mutex_enter(&devzvol_mtx);
381 if (devzvol_isopen && pools == 0) {
382 /* clean up so zfs can be unloaded */
383 devzvol_close_zfs();
384 devzvol_isopen = B_FALSE;
385 }
386 out:
387 mutex_exit(&devzvol_mtx);
388 kmem_free(zc, sizeof (zfs_cmd_t));
389 }
390
391 /*ARGSUSED3*/
392 static int
devzvol_create_dir(struct sdev_node * ddv,char * nm,void ** arg,cred_t * cred,void * whatever,char * whichever)393 devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg,
394 cred_t *cred, void *whatever, char *whichever)
395 {
396 timestruc_t now;
397 struct vattr *vap = (struct vattr *)arg;
398
399 sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name,
400 ddv->sdev_path, nm));
401 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR,
402 strlen(ZVOL_DIR)) == 0);
403 *vap = *sdev_getdefault_attr(VDIR);
404 gethrestime(&now);
405 vap->va_atime = now;
406 vap->va_mtime = now;
407 vap->va_ctime = now;
408 return (0);
409 }
410
411 /*ARGSUSED3*/
412 static int
devzvol_create_link(struct sdev_node * ddv,char * nm,void ** arg,cred_t * cred,void * whatever,char * whichever)413 devzvol_create_link(struct sdev_node *ddv, char *nm,
414 void **arg, cred_t *cred, void *whatever, char *whichever)
415 {
416 minor_t minor;
417 char *pathname = (char *)*arg;
418 int rc;
419 char *dsname;
420 char *x;
421 char str[MAXNAMELEN];
422 sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name,
423 ddv->sdev_path, nm));
424 dsname = devzvol_make_dsname(ddv->sdev_path, nm);
425 rc = sdev_zvol_create_minor(dsname);
426 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
427 sdev_zvol_name2minor(dsname, &minor)) {
428 sdcmn_err13(("devzvol_create_link %d", rc));
429 kmem_free(dsname, strlen(dsname) + 1);
430 return (-1);
431 }
432 kmem_free(dsname, strlen(dsname) + 1);
433
434 /*
435 * This is a valid zvol; create a symlink that points to the
436 * minor which was created under /devices/pseudo/zfs@0
437 */
438 *pathname = '\0';
439 for (x = ddv->sdev_path; x = strchr(x, '/'); x++)
440 (void) strcat(pathname, "../");
441 (void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor);
442 (void) strncat(pathname, str, MAXPATHLEN);
443 if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR,
444 strlen(ZVOL_FULL_RDEV_DIR)) == 0)
445 (void) strcat(pathname, ",raw");
446 return (0);
447 }
448
449 /* Clean zvol sdev_nodes that are no longer valid. */
450 static void
devzvol_prunedir(struct sdev_node * ddv)451 devzvol_prunedir(struct sdev_node *ddv)
452 {
453 struct sdev_node *dv;
454
455 ASSERT(RW_READ_HELD(&ddv->sdev_contents));
456
457 sdcmn_err13(("prunedir '%s'", ddv->sdev_name));
458 ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
459 if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
460 rw_exit(&ddv->sdev_contents);
461 rw_enter(&ddv->sdev_contents, RW_WRITER);
462 }
463
464 dv = SDEV_FIRST_ENTRY(ddv);
465 while (dv) {
466 sdcmn_err13(("sdev_name '%s'", dv->sdev_name));
467
468 switch (devzvol_validate(dv)) {
469 case SDEV_VTOR_VALID:
470 case SDEV_VTOR_SKIP:
471 dv = SDEV_NEXT_ENTRY(ddv, dv);
472 continue;
473 case SDEV_VTOR_INVALID:
474 sdcmn_err7(("prunedir: destroy invalid "
475 "node: %s\n", dv->sdev_name));
476 break;
477 }
478
479 if ((SDEVTOV(dv)->v_type == VDIR) &&
480 (sdev_cleandir(dv, NULL, 0) != 0)) {
481 dv = SDEV_NEXT_ENTRY(ddv, dv);
482 continue;
483 }
484 SDEV_HOLD(dv);
485 /* remove the cache node */
486 sdev_cache_update(ddv, &dv, dv->sdev_name,
487 SDEV_CACHE_DELETE);
488 SDEV_RELE(dv);
489 dv = SDEV_FIRST_ENTRY(ddv);
490 }
491 rw_downgrade(&ddv->sdev_contents);
492 }
493
494 /*
495 * This function is used to create a dir or dev inside a zone's /dev when the
496 * zone has a zvol that is dynamically created within the zone (i.e. inside
497 * of a delegated dataset. Since there is no /devices tree within a zone,
498 * we create the chr/blk devices directly inside the zone's /dev instead of
499 * making symlinks.
500 */
501 static int
devzvol_mk_ngz_node(struct sdev_node * parent,char * nm)502 devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
503 {
504 struct vattr vattr;
505 timestruc_t now;
506 enum vtype expected_type = VDIR;
507 dmu_objset_type_t do_type;
508 struct sdev_node *dv = NULL;
509 int res;
510 char *dsname;
511
512 bzero(&vattr, sizeof (vattr));
513 gethrestime(&now);
514 vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
515 vattr.va_uid = SDEV_UID_DEFAULT;
516 vattr.va_gid = SDEV_GID_DEFAULT;
517 vattr.va_type = VNON;
518 vattr.va_atime = now;
519 vattr.va_mtime = now;
520 vattr.va_ctime = now;
521
522 if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
523 return (ENOENT);
524
525 if (devzvol_objset_check(dsname, &do_type) != 0) {
526 kmem_free(dsname, strlen(dsname) + 1);
527 return (ENOENT);
528 }
529 if (do_type == DMU_OST_ZVOL)
530 expected_type = VBLK;
531
532 if (expected_type == VDIR) {
533 vattr.va_type = VDIR;
534 vattr.va_mode = SDEV_DIRMODE_DEFAULT;
535 } else {
536 minor_t minor;
537 dev_t devnum;
538 int rc;
539
540 rc = sdev_zvol_create_minor(dsname);
541 if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
542 sdev_zvol_name2minor(dsname, &minor)) {
543 kmem_free(dsname, strlen(dsname) + 1);
544 return (ENOENT);
545 }
546
547 devnum = makedevice(devzvol_major, minor);
548 vattr.va_rdev = devnum;
549
550 if (strstr(parent->sdev_path, "/rdsk/") != NULL)
551 vattr.va_type = VCHR;
552 else
553 vattr.va_type = VBLK;
554 vattr.va_mode = SDEV_DEVMODE_DEFAULT;
555 }
556 kmem_free(dsname, strlen(dsname) + 1);
557
558 rw_enter(&parent->sdev_contents, RW_WRITER);
559
560 res = sdev_mknode(parent, nm, &dv, &vattr,
561 NULL, NULL, kcred, SDEV_READY);
562 rw_exit(&parent->sdev_contents);
563 if (res != 0)
564 return (ENOENT);
565
566 SDEV_RELE(dv);
567 return (0);
568 }
569
570 /*ARGSUSED*/
571 static int
devzvol_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int flags,struct vnode * rdir,struct cred * cred,caller_context_t * ct,int * direntflags,pathname_t * realpnp)572 devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
573 struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
574 caller_context_t *ct, int *direntflags, pathname_t *realpnp)
575 {
576 enum vtype expected_type = VDIR;
577 struct sdev_node *parent = VTOSDEV(dvp);
578 char *dsname;
579 dmu_objset_type_t do_type;
580 int error;
581
582 sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm));
583 *vpp = NULL;
584 /* execute access is required to search the directory */
585 if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
586 return (error);
587
588 rw_enter(&parent->sdev_contents, RW_READER);
589 if (SDEV_IS_GLOBAL(parent)) {
590 /*
591 * During iter_datasets, don't create GZ dev when running in
592 * NGZ. We can't return ENOENT here since that could
593 * incorrectly trigger the creation of the dev from the
594 * recursive call through prof_filldir during iter_datasets.
595 */
596 if (getzoneid() != GLOBAL_ZONEID) {
597 rw_exit(&parent->sdev_contents);
598 return (EPERM);
599 }
600 } else {
601 int res;
602
603 rw_exit(&parent->sdev_contents);
604
605 /*
606 * If we're in the global zone and reach down into a non-global
607 * zone's /dev/zvol then this action could trigger the creation
608 * of all of the zvol devices for every zone into the non-global
609 * zone's /dev tree. This could be a big security hole. To
610 * prevent this, disallow the global zone from looking inside
611 * a non-global zones /dev/zvol. This behavior is similar to
612 * delegated datasets, which cannot be used by the global zone.
613 */
614 if (getzoneid() == GLOBAL_ZONEID)
615 return (EPERM);
616
617 res = prof_lookup(dvp, nm, vpp, cred);
618
619 /*
620 * We won't find a zvol that was dynamically created inside
621 * a NGZ, within a delegated dataset, in the zone's dev profile
622 * but prof_lookup will also find it via sdev_cache_lookup.
623 */
624 if (res == ENOENT) {
625 /*
626 * We have to create the sdev node for the dymamically
627 * created zvol.
628 */
629 if (devzvol_mk_ngz_node(parent, nm) != 0)
630 return (ENOENT);
631 res = prof_lookup(dvp, nm, vpp, cred);
632 }
633
634 return (res);
635 }
636
637 dsname = devzvol_make_dsname(parent->sdev_path, nm);
638 rw_exit(&parent->sdev_contents);
639 sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
640 if (dsname) {
641 error = devzvol_objset_check(dsname, &do_type);
642 if (error != 0) {
643 error = ENOENT;
644 goto out;
645 }
646 if (do_type == DMU_OST_ZVOL)
647 expected_type = VLNK;
648 }
649 /*
650 * the callbacks expect:
651 *
652 * parent->sdev_path nm
653 * /dev/zvol {r}dsk
654 * /dev/zvol/{r}dsk <pool name>
655 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
656 *
657 * sdev_name is always last path component of sdev_path
658 */
659 if (expected_type == VDIR) {
660 error = devname_lookup_func(parent, nm, vpp, cred,
661 devzvol_create_dir, SDEV_VATTR);
662 } else {
663 error = devname_lookup_func(parent, nm, vpp, cred,
664 devzvol_create_link, SDEV_VLINK);
665 }
666 sdcmn_err13(("devzvol_lookup %d %d", expected_type, error));
667 ASSERT(error || ((*vpp)->v_type == expected_type));
668 out:
669 if (dsname)
670 kmem_free(dsname, strlen(dsname) + 1);
671 sdcmn_err13(("devzvol_lookup %d", error));
672 return (error);
673 }
674
675 /*
676 * We allow create to find existing nodes
677 * - if the node doesn't exist - EROFS
678 * - creating an existing dir read-only succeeds, otherwise EISDIR
679 * - exclusive creates fail - EEXIST
680 */
681 /*ARGSUSED2*/
682 static int
devzvol_create(struct vnode * dvp,char * nm,struct vattr * vap,vcexcl_t excl,int mode,struct vnode ** vpp,struct cred * cred,int flag,caller_context_t * ct,vsecattr_t * vsecp)683 devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
684 int mode, struct vnode **vpp, struct cred *cred, int flag,
685 caller_context_t *ct, vsecattr_t *vsecp)
686 {
687 int error;
688 struct vnode *vp;
689
690 *vpp = NULL;
691
692 error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL,
693 NULL);
694 if (error == 0) {
695 if (excl == EXCL)
696 error = EEXIST;
697 else if (vp->v_type == VDIR && (mode & VWRITE))
698 error = EISDIR;
699 else
700 error = VOP_ACCESS(vp, mode, 0, cred, ct);
701
702 if (error) {
703 VN_RELE(vp);
704 } else
705 *vpp = vp;
706 } else if (error == ENOENT) {
707 error = EROFS;
708 }
709
710 return (error);
711 }
712
713 void sdev_iter_snapshots(struct vnode *dvp, char *name);
714
715 void
sdev_iter_datasets(struct vnode * dvp,int arg,char * name)716 sdev_iter_datasets(struct vnode *dvp, int arg, char *name)
717 {
718 zfs_cmd_t *zc;
719 int rc;
720
721 sdcmn_err13(("iter name is '%s' (arg %x)", name, arg));
722 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
723 (void) strcpy(zc->zc_name, name);
724
725 while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) {
726 struct vnode *vpp;
727 char *ptr;
728
729 sdcmn_err13((" name %s", zc->zc_name));
730 if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%'))
731 goto skip;
732 ptr = strrchr(zc->zc_name, '/') + 1;
733 rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL,
734 kcred, NULL, NULL, NULL);
735 if (rc == 0) {
736 VN_RELE(vpp);
737 } else if (rc == ENOENT) {
738 goto skip;
739 } else {
740 /*
741 * EBUSY == problem with zvols's dmu holds?
742 * EPERM when in a NGZ and traversing up and out.
743 */
744 goto skip;
745 }
746 if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
747 zc->zc_objset_stats.dds_type != DMU_OST_ZFS)
748 sdev_iter_snapshots(dvp, zc->zc_name);
749 skip:
750 (void) strcpy(zc->zc_name, name);
751 }
752 kmem_free(zc, sizeof (zfs_cmd_t));
753 }
754
755 void
sdev_iter_snapshots(struct vnode * dvp,char * name)756 sdev_iter_snapshots(struct vnode *dvp, char *name)
757 {
758 sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name);
759 }
760
761 /*ARGSUSED4*/
762 static int
devzvol_readdir(struct vnode * dvp,struct uio * uiop,struct cred * cred,int * eofp,caller_context_t * ct_unused,int flags_unused)763 devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
764 int *eofp, caller_context_t *ct_unused, int flags_unused)
765 {
766 struct sdev_node *sdvp = VTOSDEV(dvp);
767 char *ptr;
768
769 sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path,
770 sdvp->sdev_name));
771
772 if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) {
773 struct vnode *vp;
774
775 rw_exit(&sdvp->sdev_contents);
776 (void) devname_lookup_func(sdvp, "dsk", &vp, cred,
777 devzvol_create_dir, SDEV_VATTR);
778 VN_RELE(vp);
779 (void) devname_lookup_func(sdvp, "rdsk", &vp, cred,
780 devzvol_create_dir, SDEV_VATTR);
781 VN_RELE(vp);
782 rw_enter(&sdvp->sdev_contents, RW_READER);
783 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
784 }
785 if (uiop->uio_offset == 0)
786 devzvol_prunedir(sdvp);
787 ptr = sdvp->sdev_path + strlen(ZVOL_DIR);
788 if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) {
789 rw_exit(&sdvp->sdev_contents);
790 devzvol_create_pool_dirs(dvp);
791 rw_enter(&sdvp->sdev_contents, RW_READER);
792 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
793 }
794
795 ptr = strchr(ptr + 1, '/');
796 if (ptr == NULL)
797 return (ENOENT);
798 ptr++;
799 rw_exit(&sdvp->sdev_contents);
800 sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr);
801 rw_enter(&sdvp->sdev_contents, RW_READER);
802 return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
803 }
804
805 const fs_operation_def_t devzvol_vnodeops_tbl[] = {
806 VOPNAME_READDIR, { .vop_readdir = devzvol_readdir },
807 VOPNAME_LOOKUP, { .vop_lookup = devzvol_lookup },
808 VOPNAME_CREATE, { .vop_create = devzvol_create },
809 VOPNAME_RENAME, { .error = fs_nosys },
810 VOPNAME_MKDIR, { .error = fs_nosys },
811 VOPNAME_RMDIR, { .error = fs_nosys },
812 VOPNAME_REMOVE, { .error = fs_nosys },
813 VOPNAME_SYMLINK, { .error = fs_nosys },
814 NULL, NULL
815 };
816