1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 *
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
25 * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
26 * LLNL-CODE-403049.
27 * Rewritten for Linux by:
28 * Rohan Puri <rohan.puri15@gmail.com>
29 * Brian Behlendorf <behlendorf1@llnl.gov>
30 * Copyright (c) 2013 by Delphix. All rights reserved.
31 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
32 * Copyright (c) 2018 George Melikov. All Rights Reserved.
33 * Copyright (c) 2019 Datto, Inc. All rights reserved.
34 * Copyright (c) 2020 The MathWorks, Inc. All rights reserved.
35 */
36
37 /*
38 * ZFS control directory (a.k.a. ".zfs")
39 *
40 * This directory provides a common location for all ZFS meta-objects.
41 * Currently, this is only the 'snapshot' and 'shares' directory, but this may
42 * expand in the future. The elements are built dynamically, as the hierarchy
43 * does not actually exist on disk.
44 *
45 * For 'snapshot', we don't want to have all snapshots always mounted, because
46 * this would take up a huge amount of space in /etc/mnttab. We have three
47 * types of objects:
48 *
49 * ctldir ------> snapshotdir -------> snapshot
50 * |
51 * |
52 * V
53 * mounted fs
54 *
55 * The 'snapshot' node contains just enough information to lookup '..' and act
56 * as a mountpoint for the snapshot. Whenever we lookup a specific snapshot, we
57 * perform an automount of the underlying filesystem and return the
58 * corresponding inode.
59 *
60 * All mounts are handled automatically by an user mode helper which invokes
61 * the mount procedure. Unmounts are handled by allowing the mount
62 * point to expire so the kernel may automatically unmount it.
63 *
64 * The '.zfs', '.zfs/snapshot', and all directories created under
65 * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
66 * zfsvfs_t as the head filesystem (what '.zfs' lives under).
67 *
68 * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
69 * (ie: snapshots) are complete ZFS filesystems and have their own unique
70 * zfsvfs_t. However, the fsid reported by these mounts will be the same
71 * as that used by the parent zfsvfs_t to make NFS happy.
72 */
73
74 #include <sys/types.h>
75 #include <sys/param.h>
76 #include <sys/time.h>
77 #include <sys/sysmacros.h>
78 #include <sys/pathname.h>
79 #include <sys/vfs.h>
80 #include <sys/zfs_ctldir.h>
81 #include <sys/zfs_ioctl.h>
82 #include <sys/zfs_vfsops.h>
83 #include <sys/zfs_vnops.h>
84 #include <sys/stat.h>
85 #include <sys/dmu.h>
86 #include <sys/dmu_objset.h>
87 #include <sys/dsl_destroy.h>
88 #include <sys/dsl_deleg.h>
89 #include <sys/zpl.h>
90 #include <sys/mntent.h>
91 #include "zfs_namecheck.h"
92
93 /*
94 * Two AVL trees are maintained which contain all currently automounted
95 * snapshots. Every automounted snapshots maps to a single zfs_snapentry_t
96 * entry which MUST:
97 *
98 * - be attached to both trees, and
99 * - be unique, no duplicate entries are allowed.
100 *
101 * The zfs_snapshots_by_name tree is indexed by the full dataset name
102 * while the zfs_snapshots_by_objsetid tree is indexed by the unique
103 * objsetid. This allows for fast lookups either by name or objsetid.
104 */
105 static avl_tree_t zfs_snapshots_by_name;
106 static avl_tree_t zfs_snapshots_by_objsetid;
107 static krwlock_t zfs_snapshot_lock;
108
109 /*
110 * Control Directory Tunables (.zfs)
111 */
112 int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
113 static int zfs_admin_snapshot = 0;
114 static int zfs_snapshot_no_setuid = 0;
115
116 typedef struct {
117 char *se_name; /* full snapshot name */
118 char *se_path; /* full mount path */
119 spa_t *se_spa; /* pool spa */
120 uint64_t se_objsetid; /* snapshot objset id */
121 struct dentry *se_root_dentry; /* snapshot root dentry */
122 krwlock_t se_taskqid_lock; /* scheduled unmount taskqid lock */
123 taskqid_t se_taskqid; /* scheduled unmount taskqid */
124 avl_node_t se_node_name; /* zfs_snapshots_by_name link */
125 avl_node_t se_node_objsetid; /* zfs_snapshots_by_objsetid link */
126 zfs_refcount_t se_refcount; /* reference count */
127 } zfs_snapentry_t;
128
129 static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
130
131 /*
132 * Allocate a new zfs_snapentry_t being careful to make a copy of the
133 * the snapshot name and provided mount point. No reference is taken.
134 */
135 static zfs_snapentry_t *
zfsctl_snapshot_alloc(const char * full_name,const char * full_path,spa_t * spa,uint64_t objsetid,struct dentry * root_dentry)136 zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
137 uint64_t objsetid, struct dentry *root_dentry)
138 {
139 zfs_snapentry_t *se;
140
141 se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
142
143 se->se_name = kmem_strdup(full_name);
144 se->se_path = kmem_strdup(full_path);
145 se->se_spa = spa;
146 se->se_objsetid = objsetid;
147 se->se_root_dentry = root_dentry;
148 se->se_taskqid = TASKQID_INVALID;
149 rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL);
150
151 zfs_refcount_create(&se->se_refcount);
152
153 return (se);
154 }
155
156 /*
157 * Free a zfs_snapentry_t the caller must ensure there are no active
158 * references.
159 */
160 static void
zfsctl_snapshot_free(zfs_snapentry_t * se)161 zfsctl_snapshot_free(zfs_snapentry_t *se)
162 {
163 zfs_refcount_destroy(&se->se_refcount);
164 kmem_strfree(se->se_name);
165 kmem_strfree(se->se_path);
166 rw_destroy(&se->se_taskqid_lock);
167
168 kmem_free(se, sizeof (zfs_snapentry_t));
169 }
170
171 /*
172 * Hold a reference on the zfs_snapentry_t.
173 */
174 static void
zfsctl_snapshot_hold(zfs_snapentry_t * se)175 zfsctl_snapshot_hold(zfs_snapentry_t *se)
176 {
177 zfs_refcount_add(&se->se_refcount, NULL);
178 }
179
180 /*
181 * Release a reference on the zfs_snapentry_t. When the number of
182 * references drops to zero the structure will be freed.
183 */
184 static void
zfsctl_snapshot_rele(zfs_snapentry_t * se)185 zfsctl_snapshot_rele(zfs_snapentry_t *se)
186 {
187 if (zfs_refcount_remove(&se->se_refcount, NULL) == 0)
188 zfsctl_snapshot_free(se);
189 }
190
191 /*
192 * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
193 * zfs_snapshots_by_objsetid trees. While the zfs_snapentry_t is part
194 * of the trees a reference is held.
195 */
196 static void
zfsctl_snapshot_add(zfs_snapentry_t * se)197 zfsctl_snapshot_add(zfs_snapentry_t *se)
198 {
199 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
200 zfsctl_snapshot_hold(se);
201 avl_add(&zfs_snapshots_by_name, se);
202 avl_add(&zfs_snapshots_by_objsetid, se);
203 }
204
205 /*
206 * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
207 * zfs_snapshots_by_objsetid trees. Upon removal a reference is dropped,
208 * this can result in the structure being freed if that was the last
209 * remaining reference.
210 */
211 static void
zfsctl_snapshot_remove(zfs_snapentry_t * se)212 zfsctl_snapshot_remove(zfs_snapentry_t *se)
213 {
214 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
215 avl_remove(&zfs_snapshots_by_name, se);
216 avl_remove(&zfs_snapshots_by_objsetid, se);
217 zfsctl_snapshot_rele(se);
218 }
219
220 /*
221 * Snapshot name comparison function for the zfs_snapshots_by_name.
222 */
223 static int
snapentry_compare_by_name(const void * a,const void * b)224 snapentry_compare_by_name(const void *a, const void *b)
225 {
226 const zfs_snapentry_t *se_a = a;
227 const zfs_snapentry_t *se_b = b;
228 int ret;
229
230 ret = strcmp(se_a->se_name, se_b->se_name);
231
232 if (ret < 0)
233 return (-1);
234 else if (ret > 0)
235 return (1);
236 else
237 return (0);
238 }
239
240 /*
241 * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
242 */
243 static int
snapentry_compare_by_objsetid(const void * a,const void * b)244 snapentry_compare_by_objsetid(const void *a, const void *b)
245 {
246 const zfs_snapentry_t *se_a = a;
247 const zfs_snapentry_t *se_b = b;
248
249 if (se_a->se_spa != se_b->se_spa)
250 return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
251
252 if (se_a->se_objsetid < se_b->se_objsetid)
253 return (-1);
254 else if (se_a->se_objsetid > se_b->se_objsetid)
255 return (1);
256 else
257 return (0);
258 }
259
260 /*
261 * Find a zfs_snapentry_t in zfs_snapshots_by_name. If the snapname
262 * is found a pointer to the zfs_snapentry_t is returned and a reference
263 * taken on the structure. The caller is responsible for dropping the
264 * reference with zfsctl_snapshot_rele(). If the snapname is not found
265 * NULL will be returned.
266 */
267 static zfs_snapentry_t *
zfsctl_snapshot_find_by_name(const char * snapname)268 zfsctl_snapshot_find_by_name(const char *snapname)
269 {
270 zfs_snapentry_t *se, search;
271
272 ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
273
274 search.se_name = (char *)snapname;
275 se = avl_find(&zfs_snapshots_by_name, &search, NULL);
276 if (se)
277 zfsctl_snapshot_hold(se);
278
279 return (se);
280 }
281
282 /*
283 * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
284 * rather than the snapname. In all other respects it behaves the same
285 * as zfsctl_snapshot_find_by_name().
286 */
287 static zfs_snapentry_t *
zfsctl_snapshot_find_by_objsetid(spa_t * spa,uint64_t objsetid)288 zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
289 {
290 zfs_snapentry_t *se, search;
291
292 ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
293
294 search.se_spa = spa;
295 search.se_objsetid = objsetid;
296 se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
297 if (se)
298 zfsctl_snapshot_hold(se);
299
300 return (se);
301 }
302
303 /*
304 * Rename a zfs_snapentry_t in the zfs_snapshots_by_name. The structure is
305 * removed, renamed, and added back to the new correct location in the tree.
306 */
307 static int
zfsctl_snapshot_rename(const char * old_snapname,const char * new_snapname)308 zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
309 {
310 zfs_snapentry_t *se;
311
312 ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
313
314 se = zfsctl_snapshot_find_by_name(old_snapname);
315 if (se == NULL)
316 return (SET_ERROR(ENOENT));
317
318 zfsctl_snapshot_remove(se);
319 kmem_strfree(se->se_name);
320 se->se_name = kmem_strdup(new_snapname);
321 zfsctl_snapshot_add(se);
322 zfsctl_snapshot_rele(se);
323
324 return (0);
325 }
326
327 /*
328 * Delayed task responsible for unmounting an expired automounted snapshot.
329 */
330 static void
snapentry_expire(void * data)331 snapentry_expire(void *data)
332 {
333 zfs_snapentry_t *se = (zfs_snapentry_t *)data;
334 spa_t *spa = se->se_spa;
335 uint64_t objsetid = se->se_objsetid;
336
337 if (zfs_expire_snapshot <= 0) {
338 zfsctl_snapshot_rele(se);
339 return;
340 }
341
342 rw_enter(&se->se_taskqid_lock, RW_WRITER);
343 se->se_taskqid = TASKQID_INVALID;
344 rw_exit(&se->se_taskqid_lock);
345 (void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
346 zfsctl_snapshot_rele(se);
347
348 /*
349 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
350 * This can occur when the snapshot is busy.
351 */
352 rw_enter(&zfs_snapshot_lock, RW_READER);
353 if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
354 zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
355 zfsctl_snapshot_rele(se);
356 }
357 rw_exit(&zfs_snapshot_lock);
358 }
359
360 /*
361 * Cancel an automatic unmount of a snapname. This callback is responsible
362 * for dropping the reference on the zfs_snapentry_t which was taken when
363 * during dispatch.
364 */
365 static void
zfsctl_snapshot_unmount_cancel(zfs_snapentry_t * se)366 zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
367 {
368 int err = 0;
369 rw_enter(&se->se_taskqid_lock, RW_WRITER);
370 err = taskq_cancel_id(system_delay_taskq, se->se_taskqid);
371 /*
372 * if we get ENOENT, the taskq couldn't be found to be
373 * canceled, so we can just mark it as invalid because
374 * it's already gone. If we got EBUSY, then we already
375 * blocked until it was gone _anyway_, so we don't care.
376 */
377 se->se_taskqid = TASKQID_INVALID;
378 rw_exit(&se->se_taskqid_lock);
379 if (err == 0) {
380 zfsctl_snapshot_rele(se);
381 }
382 }
383
384 /*
385 * Dispatch the unmount task for delayed handling with a hold protecting it.
386 */
387 static void
zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t * se,int delay)388 zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
389 {
390
391 if (delay <= 0)
392 return;
393
394 zfsctl_snapshot_hold(se);
395 rw_enter(&se->se_taskqid_lock, RW_WRITER);
396 /*
397 * If this condition happens, we managed to:
398 * - dispatch once
399 * - want to dispatch _again_ before it returned
400 *
401 * So let's just return - if that task fails at unmounting,
402 * we'll eventually dispatch again, and if it succeeds,
403 * no problem.
404 */
405 if (se->se_taskqid != TASKQID_INVALID) {
406 rw_exit(&se->se_taskqid_lock);
407 zfsctl_snapshot_rele(se);
408 return;
409 }
410 se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
411 snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
412 rw_exit(&se->se_taskqid_lock);
413 }
414
415 /*
416 * Schedule an automatic unmount of objset id to occur in delay seconds from
417 * now. Any previous delayed unmount will be cancelled in favor of the
418 * updated deadline. A reference is taken by zfsctl_snapshot_find_by_name()
419 * and held until the outstanding task is handled or cancelled.
420 */
421 int
zfsctl_snapshot_unmount_delay(spa_t * spa,uint64_t objsetid,int delay)422 zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
423 {
424 zfs_snapentry_t *se;
425 int error = ENOENT;
426
427 rw_enter(&zfs_snapshot_lock, RW_READER);
428 if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
429 zfsctl_snapshot_unmount_cancel(se);
430 zfsctl_snapshot_unmount_delay_impl(se, delay);
431 zfsctl_snapshot_rele(se);
432 error = 0;
433 }
434 rw_exit(&zfs_snapshot_lock);
435
436 return (error);
437 }
438
439 /*
440 * Check if snapname is currently mounted. Returned non-zero when mounted
441 * and zero when unmounted.
442 */
443 static boolean_t
zfsctl_snapshot_ismounted(const char * snapname)444 zfsctl_snapshot_ismounted(const char *snapname)
445 {
446 zfs_snapentry_t *se;
447 boolean_t ismounted = B_FALSE;
448
449 rw_enter(&zfs_snapshot_lock, RW_READER);
450 if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
451 zfsctl_snapshot_rele(se);
452 ismounted = B_TRUE;
453 }
454 rw_exit(&zfs_snapshot_lock);
455
456 return (ismounted);
457 }
458
459 /*
460 * Check if the given inode is a part of the virtual .zfs directory.
461 */
462 boolean_t
zfsctl_is_node(struct inode * ip)463 zfsctl_is_node(struct inode *ip)
464 {
465 return (ITOZ(ip)->z_is_ctldir);
466 }
467
468 /*
469 * Check if the given inode is a .zfs/snapshots/snapname directory.
470 */
471 boolean_t
zfsctl_is_snapdir(struct inode * ip)472 zfsctl_is_snapdir(struct inode *ip)
473 {
474 return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
475 }
476
477 /*
478 * Allocate a new inode with the passed id and ops.
479 */
480 static struct inode *
zfsctl_inode_alloc(zfsvfs_t * zfsvfs,uint64_t id,const struct file_operations * fops,const struct inode_operations * ops,uint64_t creation)481 zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
482 const struct file_operations *fops, const struct inode_operations *ops,
483 uint64_t creation)
484 {
485 struct inode *ip;
486 znode_t *zp;
487 inode_timespec_t now = {.tv_sec = creation};
488
489 ip = new_inode(zfsvfs->z_sb);
490 if (ip == NULL)
491 return (NULL);
492
493 if (!creation)
494 now = current_time(ip);
495 zp = ITOZ(ip);
496 ASSERT3P(zp->z_dirlocks, ==, NULL);
497 ASSERT3P(zp->z_acl_cached, ==, NULL);
498 ASSERT3P(zp->z_xattr_cached, ==, NULL);
499 zp->z_id = id;
500 zp->z_unlinked = B_FALSE;
501 zp->z_atime_dirty = B_FALSE;
502 zp->z_zn_prefetch = B_FALSE;
503 zp->z_is_sa = B_FALSE;
504 zp->z_is_ctldir = B_TRUE;
505 zp->z_sa_hdl = NULL;
506 zp->z_blksz = 0;
507 zp->z_seq = 0;
508 zp->z_mapcnt = 0;
509 zp->z_size = 0;
510 zp->z_pflags = 0;
511 zp->z_mode = 0;
512 zp->z_sync_cnt = 0;
513 zp->z_sync_writes_cnt = 0;
514 zp->z_async_writes_cnt = 0;
515 ip->i_generation = 0;
516 ip->i_ino = id;
517 ip->i_mode = (S_IFDIR | S_IRWXUGO);
518 ip->i_uid = SUID_TO_KUID(0);
519 ip->i_gid = SGID_TO_KGID(0);
520 ip->i_blkbits = SPA_MINBLOCKSHIFT;
521 zpl_inode_set_atime_to_ts(ip, now);
522 zpl_inode_set_mtime_to_ts(ip, now);
523 zpl_inode_set_ctime_to_ts(ip, now);
524 ip->i_fop = fops;
525 ip->i_op = ops;
526 #if defined(IOP_XATTR)
527 ip->i_opflags &= ~IOP_XATTR;
528 #endif
529
530 if (insert_inode_locked(ip)) {
531 unlock_new_inode(ip);
532 iput(ip);
533 return (NULL);
534 }
535
536 mutex_enter(&zfsvfs->z_znodes_lock);
537 list_insert_tail(&zfsvfs->z_all_znodes, zp);
538 membar_producer();
539 mutex_exit(&zfsvfs->z_znodes_lock);
540
541 unlock_new_inode(ip);
542
543 return (ip);
544 }
545
546 /*
547 * Lookup the inode with given id, it will be allocated if needed.
548 */
549 static struct inode *
zfsctl_inode_lookup(zfsvfs_t * zfsvfs,uint64_t id,const struct file_operations * fops,const struct inode_operations * ops)550 zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
551 const struct file_operations *fops, const struct inode_operations *ops)
552 {
553 struct inode *ip = NULL;
554 uint64_t creation = 0;
555 dsl_dataset_t *snap_ds;
556 dsl_pool_t *pool;
557
558 while (ip == NULL) {
559 ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
560 if (ip)
561 break;
562
563 if (id <= ZFSCTL_INO_SNAPDIRS && !creation) {
564 pool = dmu_objset_pool(zfsvfs->z_os);
565 dsl_pool_config_enter(pool, FTAG);
566 if (!dsl_dataset_hold_obj(pool,
567 ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) {
568 creation = dsl_get_creation(snap_ds);
569 dsl_dataset_rele(snap_ds, FTAG);
570 }
571 dsl_pool_config_exit(pool, FTAG);
572 }
573
574 /* May fail due to concurrent zfsctl_inode_alloc() */
575 ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation);
576 }
577
578 return (ip);
579 }
580
581 /*
582 * Create the '.zfs' directory. This directory is cached as part of the VFS
583 * structure. This results in a hold on the zfsvfs_t. The code in zfs_umount()
584 * therefore checks against a vfs_count of 2 instead of 1. This reference
585 * is removed when the ctldir is destroyed in the unmount. All other entities
586 * under the '.zfs' directory are created dynamically as needed.
587 *
588 * Because the dynamically created '.zfs' directory entries assume the use
589 * of 64-bit inode numbers this support must be disabled on 32-bit systems.
590 */
591 int
zfsctl_create(zfsvfs_t * zfsvfs)592 zfsctl_create(zfsvfs_t *zfsvfs)
593 {
594 ASSERT(zfsvfs->z_ctldir == NULL);
595
596 zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
597 &zpl_fops_root, &zpl_ops_root, 0);
598 if (zfsvfs->z_ctldir == NULL)
599 return (SET_ERROR(ENOENT));
600
601 return (0);
602 }
603
604 /*
605 * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
606 * Only called when the filesystem is unmounted.
607 */
608 void
zfsctl_destroy(zfsvfs_t * zfsvfs)609 zfsctl_destroy(zfsvfs_t *zfsvfs)
610 {
611 if (zfsvfs->z_issnap) {
612 zfs_snapentry_t *se;
613 spa_t *spa = zfsvfs->z_os->os_spa;
614 uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
615
616 rw_enter(&zfs_snapshot_lock, RW_WRITER);
617 se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
618 if (se != NULL)
619 zfsctl_snapshot_remove(se);
620 rw_exit(&zfs_snapshot_lock);
621 if (se != NULL) {
622 zfsctl_snapshot_unmount_cancel(se);
623 zfsctl_snapshot_rele(se);
624 }
625 } else if (zfsvfs->z_ctldir) {
626 iput(zfsvfs->z_ctldir);
627 zfsvfs->z_ctldir = NULL;
628 }
629 }
630
631 /*
632 * Given a root znode, retrieve the associated .zfs directory.
633 * Add a hold to the vnode and return it.
634 */
635 struct inode *
zfsctl_root(znode_t * zp)636 zfsctl_root(znode_t *zp)
637 {
638 ASSERT(zfs_has_ctldir(zp));
639 /* Must have an existing ref, so igrab() cannot return NULL */
640 VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL);
641 return (ZTOZSB(zp)->z_ctldir);
642 }
643
644 /*
645 * Generate a long fid to indicate a snapdir. We encode whether snapdir is
646 * already mounted in gen field. We do this because nfsd lookup will not
647 * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
648 * this and do automount and return ESTALE to force nfsd revalidate and follow
649 * mount.
650 */
651 static int
zfsctl_snapdir_fid(struct inode * ip,fid_t * fidp)652 zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
653 {
654 zfid_short_t *zfid = (zfid_short_t *)fidp;
655 zfid_long_t *zlfid = (zfid_long_t *)fidp;
656 uint32_t gen = 0;
657 uint64_t object;
658 uint64_t objsetid;
659 int i;
660 struct dentry *dentry;
661
662 if (fidp->fid_len < LONG_FID_LEN) {
663 fidp->fid_len = LONG_FID_LEN;
664 return (SET_ERROR(ENOSPC));
665 }
666
667 object = ip->i_ino;
668 objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
669 zfid->zf_len = LONG_FID_LEN;
670
671 dentry = d_obtain_alias(igrab(ip));
672 if (!IS_ERR(dentry)) {
673 gen = !!d_mountpoint(dentry);
674 dput(dentry);
675 }
676
677 for (i = 0; i < sizeof (zfid->zf_object); i++)
678 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
679
680 for (i = 0; i < sizeof (zfid->zf_gen); i++)
681 zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
682
683 for (i = 0; i < sizeof (zlfid->zf_setid); i++)
684 zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
685
686 for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
687 zlfid->zf_setgen[i] = 0;
688
689 return (0);
690 }
691
692 /*
693 * Generate an appropriate fid for an entry in the .zfs directory.
694 */
695 int
zfsctl_fid(struct inode * ip,fid_t * fidp)696 zfsctl_fid(struct inode *ip, fid_t *fidp)
697 {
698 znode_t *zp = ITOZ(ip);
699 zfsvfs_t *zfsvfs = ITOZSB(ip);
700 uint64_t object = zp->z_id;
701 zfid_short_t *zfid;
702 int i;
703 int error;
704
705 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
706 return (error);
707
708 if (zfsctl_is_snapdir(ip)) {
709 zfs_exit(zfsvfs, FTAG);
710 return (zfsctl_snapdir_fid(ip, fidp));
711 }
712
713 if (fidp->fid_len < SHORT_FID_LEN) {
714 fidp->fid_len = SHORT_FID_LEN;
715 zfs_exit(zfsvfs, FTAG);
716 return (SET_ERROR(ENOSPC));
717 }
718
719 zfid = (zfid_short_t *)fidp;
720
721 zfid->zf_len = SHORT_FID_LEN;
722
723 for (i = 0; i < sizeof (zfid->zf_object); i++)
724 zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
725
726 /* .zfs znodes always have a generation number of 0 */
727 for (i = 0; i < sizeof (zfid->zf_gen); i++)
728 zfid->zf_gen[i] = 0;
729
730 zfs_exit(zfsvfs, FTAG);
731 return (0);
732 }
733
734 /*
735 * Construct a full dataset name in full_name: "pool/dataset@snap_name"
736 */
737 static int
zfsctl_snapshot_name(zfsvfs_t * zfsvfs,const char * snap_name,int len,char * full_name)738 zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
739 char *full_name)
740 {
741 objset_t *os = zfsvfs->z_os;
742
743 if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
744 return (SET_ERROR(EILSEQ));
745
746 dmu_objset_name(os, full_name);
747 if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
748 return (SET_ERROR(ENAMETOOLONG));
749
750 (void) strcat(full_name, "@");
751 (void) strcat(full_name, snap_name);
752
753 return (0);
754 }
755
756 /*
757 * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
758 */
759 static int
zfsctl_snapshot_path_objset(zfsvfs_t * zfsvfs,uint64_t objsetid,int path_len,char * full_path)760 zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
761 int path_len, char *full_path)
762 {
763 objset_t *os = zfsvfs->z_os;
764 fstrans_cookie_t cookie;
765 char *snapname;
766 boolean_t case_conflict;
767 uint64_t id, pos = 0;
768 int error = 0;
769
770 cookie = spl_fstrans_mark();
771 snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
772
773 while (error == 0) {
774 dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
775 error = dmu_snapshot_list_next(zfsvfs->z_os,
776 ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
777 &case_conflict);
778 dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
779 if (error)
780 goto out;
781
782 if (id == objsetid)
783 break;
784 }
785
786 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
787 if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
788 snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
789 zfsvfs->z_vfs->vfs_mntpoint, snapname);
790 } else
791 error = SET_ERROR(ENOENT);
792 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
793
794 out:
795 kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
796 spl_fstrans_unmark(cookie);
797
798 return (error);
799 }
800
801 /*
802 * Special case the handling of "..".
803 */
804 int
zfsctl_root_lookup(struct inode * dip,const char * name,struct inode ** ipp,int flags,cred_t * cr,int * direntflags,pathname_t * realpnp)805 zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
806 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
807 {
808 zfsvfs_t *zfsvfs = ITOZSB(dip);
809 int error = 0;
810
811 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
812 return (error);
813
814 if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) {
815 *ipp = NULL;
816 } else if (strcmp(name, "..") == 0) {
817 *ipp = dip->i_sb->s_root->d_inode;
818 } else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
819 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
820 &zpl_fops_snapdir, &zpl_ops_snapdir);
821 } else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
822 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
823 &zpl_fops_shares, &zpl_ops_shares);
824 } else {
825 *ipp = NULL;
826 }
827
828 if (*ipp == NULL)
829 error = SET_ERROR(ENOENT);
830
831 zfs_exit(zfsvfs, FTAG);
832
833 return (error);
834 }
835
836 /*
837 * Lookup entry point for the 'snapshot' directory. Try to open the
838 * snapshot if it exist, creating the pseudo filesystem inode as necessary.
839 */
840 int
zfsctl_snapdir_lookup(struct inode * dip,const char * name,struct inode ** ipp,int flags,cred_t * cr,int * direntflags,pathname_t * realpnp)841 zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
842 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
843 {
844 zfsvfs_t *zfsvfs = ITOZSB(dip);
845 uint64_t id;
846 int error;
847
848 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
849 return (error);
850
851 error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
852 if (error) {
853 zfs_exit(zfsvfs, FTAG);
854 return (error);
855 }
856
857 *ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
858 &simple_dir_operations, &simple_dir_inode_operations);
859 if (*ipp == NULL)
860 error = SET_ERROR(ENOENT);
861
862 zfs_exit(zfsvfs, FTAG);
863
864 return (error);
865 }
866
867 /*
868 * Renaming a directory under '.zfs/snapshot' will automatically trigger
869 * a rename of the snapshot to the new given name. The rename is confined
870 * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
871 */
872 int
zfsctl_snapdir_rename(struct inode * sdip,const char * snm,struct inode * tdip,const char * tnm,cred_t * cr,int flags)873 zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
874 struct inode *tdip, const char *tnm, cred_t *cr, int flags)
875 {
876 zfsvfs_t *zfsvfs = ITOZSB(sdip);
877 char *to, *from, *real, *fsname;
878 int error;
879
880 if (!zfs_admin_snapshot)
881 return (SET_ERROR(EACCES));
882
883 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
884 return (error);
885
886 to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
887 from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
888 real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
889 fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
890
891 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
892 error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
893 ZFS_MAX_DATASET_NAME_LEN, NULL);
894 if (error == 0) {
895 snm = real;
896 } else if (error != ENOTSUP) {
897 goto out;
898 }
899 }
900
901 dmu_objset_name(zfsvfs->z_os, fsname);
902
903 error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
904 ZFS_MAX_DATASET_NAME_LEN, from);
905 if (error == 0)
906 error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
907 ZFS_MAX_DATASET_NAME_LEN, to);
908 if (error == 0)
909 error = zfs_secpolicy_rename_perms(from, to, cr);
910 if (error != 0)
911 goto out;
912
913 /*
914 * Cannot move snapshots out of the snapdir.
915 */
916 if (sdip != tdip) {
917 error = SET_ERROR(EINVAL);
918 goto out;
919 }
920
921 /*
922 * No-op when names are identical.
923 */
924 if (strcmp(snm, tnm) == 0) {
925 error = 0;
926 goto out;
927 }
928
929 rw_enter(&zfs_snapshot_lock, RW_WRITER);
930
931 error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
932 if (error == 0)
933 (void) zfsctl_snapshot_rename(snm, tnm);
934
935 rw_exit(&zfs_snapshot_lock);
936 out:
937 kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
938 kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
939 kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
940 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
941
942 zfs_exit(zfsvfs, FTAG);
943
944 return (error);
945 }
946
947 /*
948 * Removing a directory under '.zfs/snapshot' will automatically trigger
949 * the removal of the snapshot with the given name.
950 */
951 int
zfsctl_snapdir_remove(struct inode * dip,const char * name,cred_t * cr,int flags)952 zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
953 int flags)
954 {
955 zfsvfs_t *zfsvfs = ITOZSB(dip);
956 char *snapname, *real;
957 int error;
958
959 if (!zfs_admin_snapshot)
960 return (SET_ERROR(EACCES));
961
962 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
963 return (error);
964
965 snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
966 real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
967
968 if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
969 error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
970 ZFS_MAX_DATASET_NAME_LEN, NULL);
971 if (error == 0) {
972 name = real;
973 } else if (error != ENOTSUP) {
974 goto out;
975 }
976 }
977
978 error = zfsctl_snapshot_name(ITOZSB(dip), name,
979 ZFS_MAX_DATASET_NAME_LEN, snapname);
980 if (error == 0)
981 error = zfs_secpolicy_destroy_perms(snapname, cr);
982 if (error != 0)
983 goto out;
984
985 error = zfsctl_snapshot_unmount(snapname, MNT_FORCE);
986 if ((error == 0) || (error == ENOENT))
987 error = dsl_destroy_snapshot(snapname, B_FALSE);
988 out:
989 kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
990 kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
991
992 zfs_exit(zfsvfs, FTAG);
993
994 return (error);
995 }
996
997 /*
998 * Creating a directory under '.zfs/snapshot' will automatically trigger
999 * the creation of a new snapshot with the given name.
1000 */
1001 int
zfsctl_snapdir_mkdir(struct inode * dip,const char * dirname,vattr_t * vap,struct inode ** ipp,cred_t * cr,int flags)1002 zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap,
1003 struct inode **ipp, cred_t *cr, int flags)
1004 {
1005 zfsvfs_t *zfsvfs = ITOZSB(dip);
1006 char *dsname;
1007 int error;
1008
1009 if (!zfs_admin_snapshot)
1010 return (SET_ERROR(EACCES));
1011
1012 dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
1013
1014 if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
1015 error = SET_ERROR(EILSEQ);
1016 goto out;
1017 }
1018
1019 dmu_objset_name(zfsvfs->z_os, dsname);
1020
1021 error = zfs_secpolicy_snapshot_perms(dsname, cr);
1022 if (error != 0)
1023 goto out;
1024
1025 if (error == 0) {
1026 error = dmu_objset_snapshot_one(dsname, dirname);
1027 if (error != 0)
1028 goto out;
1029
1030 error = zfsctl_snapdir_lookup(dip, dirname, ipp,
1031 0, cr, NULL, NULL);
1032 }
1033 out:
1034 kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
1035
1036 return (error);
1037 }
1038
1039 /*
1040 * Flush everything out of the kernel's export table and such.
1041 * This is needed as once the snapshot is used over NFS, its
1042 * entries in svc_export and svc_expkey caches hold reference
1043 * to the snapshot mount point. There is no known way of flushing
1044 * only the entries related to the snapshot.
1045 */
1046 static void
exportfs_flush(void)1047 exportfs_flush(void)
1048 {
1049 char *argv[] = { "/usr/sbin/exportfs", "-f", NULL };
1050 char *envp[] = { NULL };
1051
1052 (void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1053 }
1054
1055 /*
1056 * Returns the path in char format for given struct path. Uses
1057 * d_path exported by kernel to convert struct path to char
1058 * format. Returns the correct path for mountpoints and chroot
1059 * environments.
1060 *
1061 * If chroot environment has directories that are mounted with
1062 * --bind or --rbind flag, d_path returns the complete path inside
1063 * chroot environment but does not return the absolute path, i.e.
1064 * the path to chroot environment is missing.
1065 */
1066 static int
get_root_path(struct path * path,char * buff,int len)1067 get_root_path(struct path *path, char *buff, int len)
1068 {
1069 char *path_buffer, *path_ptr;
1070 int error = 0;
1071
1072 path_get(path);
1073 path_buffer = kmem_zalloc(len, KM_SLEEP);
1074 path_ptr = d_path(path, path_buffer, len);
1075 if (IS_ERR(path_ptr))
1076 error = SET_ERROR(-PTR_ERR(path_ptr));
1077 else
1078 strcpy(buff, path_ptr);
1079
1080 kmem_free(path_buffer, len);
1081 path_put(path);
1082 return (error);
1083 }
1084
1085 /*
1086 * Returns if the current process root is chrooted or not. Linux
1087 * kernel exposes the task_struct for current process and init.
1088 * Since init process root points to actual root filesystem when
1089 * Linux runtime is reached, we can compare the current process
1090 * root with init process root to determine if root of the current
1091 * process is different from init, which can reliably determine if
1092 * current process is in chroot context or not.
1093 */
1094 static int
is_current_chrooted(void)1095 is_current_chrooted(void)
1096 {
1097 struct task_struct *curr = current, *global = &init_task;
1098 struct path cr_root, gl_root;
1099
1100 task_lock(curr);
1101 get_fs_root(curr->fs, &cr_root);
1102 task_unlock(curr);
1103
1104 task_lock(global);
1105 get_fs_root(global->fs, &gl_root);
1106 task_unlock(global);
1107
1108 int chrooted = !path_equal(&cr_root, &gl_root);
1109 path_put(&gl_root);
1110 path_put(&cr_root);
1111
1112 return (chrooted);
1113 }
1114
1115 /*
1116 * Attempt to unmount a snapshot by making a call to user space.
1117 * There is no assurance that this can or will succeed, is just a
1118 * best effort. In the case where it does fail, perhaps because
1119 * it's in use, the unmount will fail harmlessly.
1120 */
1121 int
zfsctl_snapshot_unmount(const char * snapname,int flags)1122 zfsctl_snapshot_unmount(const char *snapname, int flags)
1123 {
1124 char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL,
1125 NULL };
1126 char *envp[] = { NULL };
1127 zfs_snapentry_t *se;
1128 int error;
1129
1130 rw_enter(&zfs_snapshot_lock, RW_READER);
1131 if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
1132 rw_exit(&zfs_snapshot_lock);
1133 return (SET_ERROR(ENOENT));
1134 }
1135 rw_exit(&zfs_snapshot_lock);
1136
1137 exportfs_flush();
1138
1139 if (flags & MNT_FORCE)
1140 argv[4] = "-fn";
1141 argv[5] = se->se_path;
1142 dprintf("unmount; path=%s\n", se->se_path);
1143 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1144 zfsctl_snapshot_rele(se);
1145
1146
1147 /*
1148 * The umount system utility will return 256 on error. We must
1149 * assume this error is because the file system is busy so it is
1150 * converted to the more sensible EBUSY.
1151 */
1152 if (error)
1153 error = SET_ERROR(EBUSY);
1154
1155 return (error);
1156 }
1157
1158 int
zfsctl_snapshot_mount(struct path * path,int flags)1159 zfsctl_snapshot_mount(struct path *path, int flags)
1160 {
1161 struct dentry *dentry = path->dentry;
1162 struct inode *ip = dentry->d_inode;
1163 zfsvfs_t *zfsvfs;
1164 zfsvfs_t *snap_zfsvfs;
1165 zfs_snapentry_t *se;
1166 char *full_name, *full_path, *options;
1167 char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n",
1168 "-o", NULL, NULL, NULL, NULL };
1169 char *envp[] = { NULL };
1170 int error;
1171 struct path spath;
1172
1173 if (ip == NULL)
1174 return (SET_ERROR(EISDIR));
1175
1176 zfsvfs = ITOZSB(ip);
1177 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
1178 return (error);
1179
1180 full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
1181 full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1182 options = kmem_zalloc(7, KM_SLEEP);
1183
1184 error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
1185 ZFS_MAX_DATASET_NAME_LEN, full_name);
1186 if (error)
1187 goto error;
1188
1189 if (is_current_chrooted() == 0) {
1190 /*
1191 * Current process is not in chroot context
1192 */
1193
1194 char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1195 struct path mnt_path;
1196 mnt_path.mnt = path->mnt;
1197 mnt_path.dentry = path->mnt->mnt_root;
1198
1199 /*
1200 * Get path to current mountpoint
1201 */
1202 error = get_root_path(&mnt_path, m, MAXPATHLEN);
1203 if (error != 0) {
1204 kmem_free(m, MAXPATHLEN);
1205 goto error;
1206 }
1207 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
1208 if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
1209 /*
1210 * If current mnountpoint and vfs_mntpoint are not same,
1211 * store current mountpoint in vfs_mntpoint.
1212 */
1213 if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) {
1214 kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint);
1215 zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
1216 }
1217 } else
1218 zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
1219 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
1220 kmem_free(m, MAXPATHLEN);
1221 }
1222
1223 /*
1224 * Construct a mount point path from sb of the ctldir inode and dirent
1225 * name, instead of from d_path(), so that chroot'd process doesn't fail
1226 * on mount.zfs(8).
1227 */
1228 mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
1229 snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
1230 zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
1231 dname(dentry));
1232 mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
1233
1234 snprintf(options, 7, "%s",
1235 zfs_snapshot_no_setuid ? "nosuid" : "suid");
1236
1237 /*
1238 * Multiple concurrent automounts of a snapshot are never allowed.
1239 * The snapshot may be manually mounted as many times as desired.
1240 */
1241 if (zfsctl_snapshot_ismounted(full_name)) {
1242 error = 0;
1243 goto error;
1244 }
1245
1246 /*
1247 * Attempt to mount the snapshot from user space. Normally this
1248 * would be done using the vfs_kern_mount() function, however that
1249 * function is marked GPL-only and cannot be used. On error we
1250 * careful to log the real error to the console and return EISDIR
1251 * to safely abort the automount. This should be very rare.
1252 *
1253 * If the user mode helper happens to return EBUSY, a concurrent
1254 * mount is already in progress in which case the error is ignored.
1255 * Take note that if the program was executed successfully the return
1256 * value from call_usermodehelper() will be (exitcode << 8 + signal).
1257 */
1258 dprintf("mount; name=%s path=%s\n", full_name, full_path);
1259 argv[7] = options;
1260 argv[8] = full_name;
1261 argv[9] = full_path;
1262 error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1263 if (error) {
1264 if (!(error & MOUNT_BUSY << 8)) {
1265 zfs_dbgmsg("Unable to automount %s error=%d",
1266 full_path, error);
1267 error = SET_ERROR(EISDIR);
1268 } else {
1269 /*
1270 * EBUSY, this could mean a concurrent mount, or the
1271 * snapshot has already been mounted at completely
1272 * different place. We return 0 so VFS will retry. For
1273 * the latter case the VFS will retry several times
1274 * and return ELOOP, which is probably not a very good
1275 * behavior.
1276 */
1277 error = 0;
1278 }
1279 goto error;
1280 }
1281
1282 /*
1283 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
1284 * to identify this as an automounted filesystem.
1285 */
1286 spath = *path;
1287 path_get(&spath);
1288 if (follow_down_one(&spath)) {
1289 snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
1290 snap_zfsvfs->z_parent = zfsvfs;
1291 dentry = spath.dentry;
1292 spath.mnt->mnt_flags |= MNT_SHRINKABLE;
1293
1294 rw_enter(&zfs_snapshot_lock, RW_WRITER);
1295 se = zfsctl_snapshot_alloc(full_name, full_path,
1296 snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
1297 dentry);
1298 zfsctl_snapshot_add(se);
1299 zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
1300 rw_exit(&zfs_snapshot_lock);
1301 }
1302 path_put(&spath);
1303 error:
1304 kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
1305 kmem_free(full_path, MAXPATHLEN);
1306
1307 zfs_exit(zfsvfs, FTAG);
1308
1309 return (error);
1310 }
1311
1312 /*
1313 * Get the snapdir inode from fid
1314 */
1315 int
zfsctl_snapdir_vget(struct super_block * sb,uint64_t objsetid,int gen,struct inode ** ipp)1316 zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
1317 struct inode **ipp)
1318 {
1319 int error;
1320 struct path path;
1321 char *mnt;
1322 struct dentry *dentry;
1323
1324 mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1325
1326 error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
1327 MAXPATHLEN, mnt);
1328 if (error)
1329 goto out;
1330
1331 /* Trigger automount */
1332 error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
1333 if (error)
1334 goto out;
1335
1336 path_put(&path);
1337 /*
1338 * Get the snapdir inode. Note, we don't want to use the above
1339 * path because it contains the root of the snapshot rather
1340 * than the snapdir.
1341 */
1342 *ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
1343 if (*ipp == NULL) {
1344 error = SET_ERROR(ENOENT);
1345 goto out;
1346 }
1347
1348 /* check gen, see zfsctl_snapdir_fid */
1349 dentry = d_obtain_alias(igrab(*ipp));
1350 if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
1351 iput(*ipp);
1352 *ipp = NULL;
1353 error = SET_ERROR(ENOENT);
1354 }
1355 if (!IS_ERR(dentry))
1356 dput(dentry);
1357 out:
1358 kmem_free(mnt, MAXPATHLEN);
1359 return (error);
1360 }
1361
1362 int
zfsctl_shares_lookup(struct inode * dip,char * name,struct inode ** ipp,int flags,cred_t * cr,int * direntflags,pathname_t * realpnp)1363 zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
1364 int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
1365 {
1366 zfsvfs_t *zfsvfs = ITOZSB(dip);
1367 znode_t *zp;
1368 znode_t *dzp;
1369 int error;
1370
1371 if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
1372 return (error);
1373
1374 if (zfsvfs->z_shares_dir == 0) {
1375 zfs_exit(zfsvfs, FTAG);
1376 return (SET_ERROR(ENOTSUP));
1377 }
1378
1379 if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1380 error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL);
1381 zrele(dzp);
1382 }
1383
1384 zfs_exit(zfsvfs, FTAG);
1385
1386 return (error);
1387 }
1388
1389 /*
1390 * Initialize the various pieces we'll need to create and manipulate .zfs
1391 * directories. Currently this is unused but available.
1392 */
1393 void
zfsctl_init(void)1394 zfsctl_init(void)
1395 {
1396 avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name,
1397 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
1398 se_node_name));
1399 avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
1400 sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
1401 se_node_objsetid));
1402 rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
1403 }
1404
1405 /*
1406 * Cleanup the various pieces we needed for .zfs directories. In particular
1407 * ensure the expiry timer is canceled safely.
1408 */
1409 void
zfsctl_fini(void)1410 zfsctl_fini(void)
1411 {
1412 avl_destroy(&zfs_snapshots_by_name);
1413 avl_destroy(&zfs_snapshots_by_objsetid);
1414 rw_destroy(&zfs_snapshot_lock);
1415 }
1416
1417 module_param(zfs_admin_snapshot, int, 0644);
1418 MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
1419
1420 module_param(zfs_expire_snapshot, int, 0644);
1421 MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
1422
1423 module_param(zfs_snapshot_no_setuid, int, 0644);
1424 MODULE_PARM_DESC(zfs_snapshot_no_setuid,
1425 "Disable setuid/setgid for automounts in .zfs/snapshot");
1426