xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c (revision 87bf66d4a7488c496af110d4d05cc0273d49f82e)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * The contents of this file are subject to the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License (the "License").
6eda14cbcSMatt Macy  * You may not use this file except in compliance with the License.
7eda14cbcSMatt Macy  *
8eda14cbcSMatt Macy  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9271171e0SMartin Matuska  * or https://opensource.org/licenses/CDDL-1.0.
10eda14cbcSMatt Macy  * See the License for the specific language governing permissions
11eda14cbcSMatt Macy  * and limitations under the License.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * When distributing Covered Code, include this CDDL HEADER in each
14eda14cbcSMatt Macy  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15eda14cbcSMatt Macy  * If applicable, add the following below this CDDL HEADER, with the
16eda14cbcSMatt Macy  * fields enclosed by brackets "[]" replaced with your own identifying
17eda14cbcSMatt Macy  * information: Portions Copyright [yyyy] [name of copyright owner]
18eda14cbcSMatt Macy  *
19eda14cbcSMatt Macy  * CDDL HEADER END
20eda14cbcSMatt Macy  */
21eda14cbcSMatt Macy /*
22eda14cbcSMatt Macy  *
23eda14cbcSMatt Macy  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24eda14cbcSMatt Macy  * Copyright (C) 2011 Lawrence Livermore National Security, LLC.
25eda14cbcSMatt Macy  * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
26eda14cbcSMatt Macy  * LLNL-CODE-403049.
27eda14cbcSMatt Macy  * Rewritten for Linux by:
28eda14cbcSMatt Macy  *   Rohan Puri <rohan.puri15@gmail.com>
29eda14cbcSMatt Macy  *   Brian Behlendorf <behlendorf1@llnl.gov>
30eda14cbcSMatt Macy  * Copyright (c) 2013 by Delphix. All rights reserved.
31eda14cbcSMatt Macy  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
32eda14cbcSMatt Macy  * Copyright (c) 2018 George Melikov. All Rights Reserved.
33eda14cbcSMatt Macy  * Copyright (c) 2019 Datto, Inc. All rights reserved.
34ac0bf12eSMatt Macy  * Copyright (c) 2020 The MathWorks, Inc. All rights reserved.
35eda14cbcSMatt Macy  */
36eda14cbcSMatt Macy 
37eda14cbcSMatt Macy /*
38eda14cbcSMatt Macy  * ZFS control directory (a.k.a. ".zfs")
39eda14cbcSMatt Macy  *
40eda14cbcSMatt Macy  * This directory provides a common location for all ZFS meta-objects.
41eda14cbcSMatt Macy  * Currently, this is only the 'snapshot' and 'shares' directory, but this may
42eda14cbcSMatt Macy  * expand in the future.  The elements are built dynamically, as the hierarchy
43eda14cbcSMatt Macy  * does not actually exist on disk.
44eda14cbcSMatt Macy  *
45eda14cbcSMatt Macy  * For 'snapshot', we don't want to have all snapshots always mounted, because
46eda14cbcSMatt Macy  * this would take up a huge amount of space in /etc/mnttab.  We have three
47eda14cbcSMatt Macy  * types of objects:
48eda14cbcSMatt Macy  *
49eda14cbcSMatt Macy  *	ctldir ------> snapshotdir -------> snapshot
50eda14cbcSMatt Macy  *                                             |
51eda14cbcSMatt Macy  *                                             |
52eda14cbcSMatt Macy  *                                             V
53eda14cbcSMatt Macy  *                                         mounted fs
54eda14cbcSMatt Macy  *
55eda14cbcSMatt Macy  * The 'snapshot' node contains just enough information to lookup '..' and act
56eda14cbcSMatt Macy  * as a mountpoint for the snapshot.  Whenever we lookup a specific snapshot, we
57eda14cbcSMatt Macy  * perform an automount of the underlying filesystem and return the
58eda14cbcSMatt Macy  * corresponding inode.
59eda14cbcSMatt Macy  *
60eda14cbcSMatt Macy  * All mounts are handled automatically by an user mode helper which invokes
61eda14cbcSMatt Macy  * the mount procedure.  Unmounts are handled by allowing the mount
62eda14cbcSMatt Macy  * point to expire so the kernel may automatically unmount it.
63eda14cbcSMatt Macy  *
64eda14cbcSMatt Macy  * The '.zfs', '.zfs/snapshot', and all directories created under
65eda14cbcSMatt Macy  * '.zfs/snapshot' (ie: '.zfs/snapshot/<snapname>') all share the same
66eda14cbcSMatt Macy  * zfsvfs_t as the head filesystem (what '.zfs' lives under).
67eda14cbcSMatt Macy  *
68eda14cbcSMatt Macy  * File systems mounted on top of the '.zfs/snapshot/<snapname>' paths
69eda14cbcSMatt Macy  * (ie: snapshots) are complete ZFS filesystems and have their own unique
70eda14cbcSMatt Macy  * zfsvfs_t.  However, the fsid reported by these mounts will be the same
71eda14cbcSMatt Macy  * as that used by the parent zfsvfs_t to make NFS happy.
72eda14cbcSMatt Macy  */
73eda14cbcSMatt Macy 
74eda14cbcSMatt Macy #include <sys/types.h>
75eda14cbcSMatt Macy #include <sys/param.h>
76eda14cbcSMatt Macy #include <sys/time.h>
77eda14cbcSMatt Macy #include <sys/sysmacros.h>
78eda14cbcSMatt Macy #include <sys/pathname.h>
79eda14cbcSMatt Macy #include <sys/vfs.h>
80eda14cbcSMatt Macy #include <sys/zfs_ctldir.h>
81eda14cbcSMatt Macy #include <sys/zfs_ioctl.h>
82eda14cbcSMatt Macy #include <sys/zfs_vfsops.h>
83eda14cbcSMatt Macy #include <sys/zfs_vnops.h>
84eda14cbcSMatt Macy #include <sys/stat.h>
85eda14cbcSMatt Macy #include <sys/dmu.h>
86eda14cbcSMatt Macy #include <sys/dmu_objset.h>
87eda14cbcSMatt Macy #include <sys/dsl_destroy.h>
88eda14cbcSMatt Macy #include <sys/dsl_deleg.h>
89eda14cbcSMatt Macy #include <sys/zpl.h>
90eda14cbcSMatt Macy #include <sys/mntent.h>
91eda14cbcSMatt Macy #include "zfs_namecheck.h"
92eda14cbcSMatt Macy 
93eda14cbcSMatt Macy /*
94eda14cbcSMatt Macy  * Two AVL trees are maintained which contain all currently automounted
95eda14cbcSMatt Macy  * snapshots.  Every automounted snapshots maps to a single zfs_snapentry_t
96eda14cbcSMatt Macy  * entry which MUST:
97eda14cbcSMatt Macy  *
98eda14cbcSMatt Macy  *   - be attached to both trees, and
99eda14cbcSMatt Macy  *   - be unique, no duplicate entries are allowed.
100eda14cbcSMatt Macy  *
101eda14cbcSMatt Macy  * The zfs_snapshots_by_name tree is indexed by the full dataset name
102eda14cbcSMatt Macy  * while the zfs_snapshots_by_objsetid tree is indexed by the unique
103eda14cbcSMatt Macy  * objsetid.  This allows for fast lookups either by name or objsetid.
104eda14cbcSMatt Macy  */
105eda14cbcSMatt Macy static avl_tree_t zfs_snapshots_by_name;
106eda14cbcSMatt Macy static avl_tree_t zfs_snapshots_by_objsetid;
107eda14cbcSMatt Macy static krwlock_t zfs_snapshot_lock;
108eda14cbcSMatt Macy 
109eda14cbcSMatt Macy /*
110eda14cbcSMatt Macy  * Control Directory Tunables (.zfs)
111eda14cbcSMatt Macy  */
112eda14cbcSMatt Macy int zfs_expire_snapshot = ZFSCTL_EXPIRE_SNAPSHOT;
113e92ffd9bSMartin Matuska static int zfs_admin_snapshot = 0;
1147a7741afSMartin Matuska static int zfs_snapshot_no_setuid = 0;
115eda14cbcSMatt Macy 
116eda14cbcSMatt Macy typedef struct {
117eda14cbcSMatt Macy 	char		*se_name;	/* full snapshot name */
118eda14cbcSMatt Macy 	char		*se_path;	/* full mount path */
119eda14cbcSMatt Macy 	spa_t		*se_spa;	/* pool spa */
120eda14cbcSMatt Macy 	uint64_t	se_objsetid;	/* snapshot objset id */
121eda14cbcSMatt Macy 	struct dentry   *se_root_dentry; /* snapshot root dentry */
12281b22a98SMartin Matuska 	krwlock_t	se_taskqid_lock;  /* scheduled unmount taskqid lock */
123eda14cbcSMatt Macy 	taskqid_t	se_taskqid;	/* scheduled unmount taskqid */
124eda14cbcSMatt Macy 	avl_node_t	se_node_name;	/* zfs_snapshots_by_name link */
125eda14cbcSMatt Macy 	avl_node_t	se_node_objsetid; /* zfs_snapshots_by_objsetid link */
126eda14cbcSMatt Macy 	zfs_refcount_t	se_refcount;	/* reference count */
127eda14cbcSMatt Macy } zfs_snapentry_t;
128eda14cbcSMatt Macy 
129eda14cbcSMatt Macy static void zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay);
130eda14cbcSMatt Macy 
131eda14cbcSMatt Macy /*
132eda14cbcSMatt Macy  * Allocate a new zfs_snapentry_t being careful to make a copy of the
133eda14cbcSMatt Macy  * the snapshot name and provided mount point.  No reference is taken.
134eda14cbcSMatt Macy  */
135eda14cbcSMatt Macy static zfs_snapentry_t *
zfsctl_snapshot_alloc(const char * full_name,const char * full_path,spa_t * spa,uint64_t objsetid,struct dentry * root_dentry)136180f8225SMatt Macy zfsctl_snapshot_alloc(const char *full_name, const char *full_path, spa_t *spa,
137eda14cbcSMatt Macy     uint64_t objsetid, struct dentry *root_dentry)
138eda14cbcSMatt Macy {
139eda14cbcSMatt Macy 	zfs_snapentry_t *se;
140eda14cbcSMatt Macy 
141eda14cbcSMatt Macy 	se = kmem_zalloc(sizeof (zfs_snapentry_t), KM_SLEEP);
142eda14cbcSMatt Macy 
143eda14cbcSMatt Macy 	se->se_name = kmem_strdup(full_name);
144eda14cbcSMatt Macy 	se->se_path = kmem_strdup(full_path);
145eda14cbcSMatt Macy 	se->se_spa = spa;
146eda14cbcSMatt Macy 	se->se_objsetid = objsetid;
147eda14cbcSMatt Macy 	se->se_root_dentry = root_dentry;
148eda14cbcSMatt Macy 	se->se_taskqid = TASKQID_INVALID;
14981b22a98SMartin Matuska 	rw_init(&se->se_taskqid_lock, NULL, RW_DEFAULT, NULL);
150eda14cbcSMatt Macy 
151eda14cbcSMatt Macy 	zfs_refcount_create(&se->se_refcount);
152eda14cbcSMatt Macy 
153eda14cbcSMatt Macy 	return (se);
154eda14cbcSMatt Macy }
155eda14cbcSMatt Macy 
156eda14cbcSMatt Macy /*
157eda14cbcSMatt Macy  * Free a zfs_snapentry_t the caller must ensure there are no active
158eda14cbcSMatt Macy  * references.
159eda14cbcSMatt Macy  */
160eda14cbcSMatt Macy static void
zfsctl_snapshot_free(zfs_snapentry_t * se)161eda14cbcSMatt Macy zfsctl_snapshot_free(zfs_snapentry_t *se)
162eda14cbcSMatt Macy {
163eda14cbcSMatt Macy 	zfs_refcount_destroy(&se->se_refcount);
164eda14cbcSMatt Macy 	kmem_strfree(se->se_name);
165eda14cbcSMatt Macy 	kmem_strfree(se->se_path);
166716fd348SMartin Matuska 	rw_destroy(&se->se_taskqid_lock);
167eda14cbcSMatt Macy 
168eda14cbcSMatt Macy 	kmem_free(se, sizeof (zfs_snapentry_t));
169eda14cbcSMatt Macy }
170eda14cbcSMatt Macy 
171eda14cbcSMatt Macy /*
172eda14cbcSMatt Macy  * Hold a reference on the zfs_snapentry_t.
173eda14cbcSMatt Macy  */
174eda14cbcSMatt Macy static void
zfsctl_snapshot_hold(zfs_snapentry_t * se)175eda14cbcSMatt Macy zfsctl_snapshot_hold(zfs_snapentry_t *se)
176eda14cbcSMatt Macy {
177eda14cbcSMatt Macy 	zfs_refcount_add(&se->se_refcount, NULL);
178eda14cbcSMatt Macy }
179eda14cbcSMatt Macy 
180eda14cbcSMatt Macy /*
181eda14cbcSMatt Macy  * Release a reference on the zfs_snapentry_t.  When the number of
182eda14cbcSMatt Macy  * references drops to zero the structure will be freed.
183eda14cbcSMatt Macy  */
184eda14cbcSMatt Macy static void
zfsctl_snapshot_rele(zfs_snapentry_t * se)185eda14cbcSMatt Macy zfsctl_snapshot_rele(zfs_snapentry_t *se)
186eda14cbcSMatt Macy {
187eda14cbcSMatt Macy 	if (zfs_refcount_remove(&se->se_refcount, NULL) == 0)
188eda14cbcSMatt Macy 		zfsctl_snapshot_free(se);
189eda14cbcSMatt Macy }
190eda14cbcSMatt Macy 
191eda14cbcSMatt Macy /*
192eda14cbcSMatt Macy  * Add a zfs_snapentry_t to both the zfs_snapshots_by_name and
193eda14cbcSMatt Macy  * zfs_snapshots_by_objsetid trees.  While the zfs_snapentry_t is part
194eda14cbcSMatt Macy  * of the trees a reference is held.
195eda14cbcSMatt Macy  */
196eda14cbcSMatt Macy static void
zfsctl_snapshot_add(zfs_snapentry_t * se)197eda14cbcSMatt Macy zfsctl_snapshot_add(zfs_snapentry_t *se)
198eda14cbcSMatt Macy {
199eda14cbcSMatt Macy 	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
200eda14cbcSMatt Macy 	zfsctl_snapshot_hold(se);
201eda14cbcSMatt Macy 	avl_add(&zfs_snapshots_by_name, se);
202eda14cbcSMatt Macy 	avl_add(&zfs_snapshots_by_objsetid, se);
203eda14cbcSMatt Macy }
204eda14cbcSMatt Macy 
205eda14cbcSMatt Macy /*
206eda14cbcSMatt Macy  * Remove a zfs_snapentry_t from both the zfs_snapshots_by_name and
207eda14cbcSMatt Macy  * zfs_snapshots_by_objsetid trees.  Upon removal a reference is dropped,
208eda14cbcSMatt Macy  * this can result in the structure being freed if that was the last
209eda14cbcSMatt Macy  * remaining reference.
210eda14cbcSMatt Macy  */
211eda14cbcSMatt Macy static void
zfsctl_snapshot_remove(zfs_snapentry_t * se)212eda14cbcSMatt Macy zfsctl_snapshot_remove(zfs_snapentry_t *se)
213eda14cbcSMatt Macy {
214eda14cbcSMatt Macy 	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
215eda14cbcSMatt Macy 	avl_remove(&zfs_snapshots_by_name, se);
216eda14cbcSMatt Macy 	avl_remove(&zfs_snapshots_by_objsetid, se);
217eda14cbcSMatt Macy 	zfsctl_snapshot_rele(se);
218eda14cbcSMatt Macy }
219eda14cbcSMatt Macy 
220eda14cbcSMatt Macy /*
221eda14cbcSMatt Macy  * Snapshot name comparison function for the zfs_snapshots_by_name.
222eda14cbcSMatt Macy  */
223eda14cbcSMatt Macy static int
snapentry_compare_by_name(const void * a,const void * b)224eda14cbcSMatt Macy snapentry_compare_by_name(const void *a, const void *b)
225eda14cbcSMatt Macy {
226eda14cbcSMatt Macy 	const zfs_snapentry_t *se_a = a;
227eda14cbcSMatt Macy 	const zfs_snapentry_t *se_b = b;
228eda14cbcSMatt Macy 	int ret;
229eda14cbcSMatt Macy 
230eda14cbcSMatt Macy 	ret = strcmp(se_a->se_name, se_b->se_name);
231eda14cbcSMatt Macy 
232eda14cbcSMatt Macy 	if (ret < 0)
233eda14cbcSMatt Macy 		return (-1);
234eda14cbcSMatt Macy 	else if (ret > 0)
235eda14cbcSMatt Macy 		return (1);
236eda14cbcSMatt Macy 	else
237eda14cbcSMatt Macy 		return (0);
238eda14cbcSMatt Macy }
239eda14cbcSMatt Macy 
240eda14cbcSMatt Macy /*
241eda14cbcSMatt Macy  * Snapshot name comparison function for the zfs_snapshots_by_objsetid.
242eda14cbcSMatt Macy  */
243eda14cbcSMatt Macy static int
snapentry_compare_by_objsetid(const void * a,const void * b)244eda14cbcSMatt Macy snapentry_compare_by_objsetid(const void *a, const void *b)
245eda14cbcSMatt Macy {
246eda14cbcSMatt Macy 	const zfs_snapentry_t *se_a = a;
247eda14cbcSMatt Macy 	const zfs_snapentry_t *se_b = b;
248eda14cbcSMatt Macy 
249eda14cbcSMatt Macy 	if (se_a->se_spa != se_b->se_spa)
250eda14cbcSMatt Macy 		return ((ulong_t)se_a->se_spa < (ulong_t)se_b->se_spa ? -1 : 1);
251eda14cbcSMatt Macy 
252eda14cbcSMatt Macy 	if (se_a->se_objsetid < se_b->se_objsetid)
253eda14cbcSMatt Macy 		return (-1);
254eda14cbcSMatt Macy 	else if (se_a->se_objsetid > se_b->se_objsetid)
255eda14cbcSMatt Macy 		return (1);
256eda14cbcSMatt Macy 	else
257eda14cbcSMatt Macy 		return (0);
258eda14cbcSMatt Macy }
259eda14cbcSMatt Macy 
260eda14cbcSMatt Macy /*
261eda14cbcSMatt Macy  * Find a zfs_snapentry_t in zfs_snapshots_by_name.  If the snapname
262eda14cbcSMatt Macy  * is found a pointer to the zfs_snapentry_t is returned and a reference
263eda14cbcSMatt Macy  * taken on the structure.  The caller is responsible for dropping the
264eda14cbcSMatt Macy  * reference with zfsctl_snapshot_rele().  If the snapname is not found
265eda14cbcSMatt Macy  * NULL will be returned.
266eda14cbcSMatt Macy  */
267eda14cbcSMatt Macy static zfs_snapentry_t *
zfsctl_snapshot_find_by_name(const char * snapname)268180f8225SMatt Macy zfsctl_snapshot_find_by_name(const char *snapname)
269eda14cbcSMatt Macy {
270eda14cbcSMatt Macy 	zfs_snapentry_t *se, search;
271eda14cbcSMatt Macy 
272eda14cbcSMatt Macy 	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
273eda14cbcSMatt Macy 
274180f8225SMatt Macy 	search.se_name = (char *)snapname;
275eda14cbcSMatt Macy 	se = avl_find(&zfs_snapshots_by_name, &search, NULL);
276eda14cbcSMatt Macy 	if (se)
277eda14cbcSMatt Macy 		zfsctl_snapshot_hold(se);
278eda14cbcSMatt Macy 
279eda14cbcSMatt Macy 	return (se);
280eda14cbcSMatt Macy }
281eda14cbcSMatt Macy 
282eda14cbcSMatt Macy /*
283eda14cbcSMatt Macy  * Find a zfs_snapentry_t in zfs_snapshots_by_objsetid given the objset id
284eda14cbcSMatt Macy  * rather than the snapname.  In all other respects it behaves the same
285eda14cbcSMatt Macy  * as zfsctl_snapshot_find_by_name().
286eda14cbcSMatt Macy  */
287eda14cbcSMatt Macy static zfs_snapentry_t *
zfsctl_snapshot_find_by_objsetid(spa_t * spa,uint64_t objsetid)288eda14cbcSMatt Macy zfsctl_snapshot_find_by_objsetid(spa_t *spa, uint64_t objsetid)
289eda14cbcSMatt Macy {
290eda14cbcSMatt Macy 	zfs_snapentry_t *se, search;
291eda14cbcSMatt Macy 
292eda14cbcSMatt Macy 	ASSERT(RW_LOCK_HELD(&zfs_snapshot_lock));
293eda14cbcSMatt Macy 
294eda14cbcSMatt Macy 	search.se_spa = spa;
295eda14cbcSMatt Macy 	search.se_objsetid = objsetid;
296eda14cbcSMatt Macy 	se = avl_find(&zfs_snapshots_by_objsetid, &search, NULL);
297eda14cbcSMatt Macy 	if (se)
298eda14cbcSMatt Macy 		zfsctl_snapshot_hold(se);
299eda14cbcSMatt Macy 
300eda14cbcSMatt Macy 	return (se);
301eda14cbcSMatt Macy }
302eda14cbcSMatt Macy 
303eda14cbcSMatt Macy /*
304eda14cbcSMatt Macy  * Rename a zfs_snapentry_t in the zfs_snapshots_by_name.  The structure is
305eda14cbcSMatt Macy  * removed, renamed, and added back to the new correct location in the tree.
306eda14cbcSMatt Macy  */
307eda14cbcSMatt Macy static int
zfsctl_snapshot_rename(const char * old_snapname,const char * new_snapname)308180f8225SMatt Macy zfsctl_snapshot_rename(const char *old_snapname, const char *new_snapname)
309eda14cbcSMatt Macy {
310eda14cbcSMatt Macy 	zfs_snapentry_t *se;
311eda14cbcSMatt Macy 
312eda14cbcSMatt Macy 	ASSERT(RW_WRITE_HELD(&zfs_snapshot_lock));
313eda14cbcSMatt Macy 
314eda14cbcSMatt Macy 	se = zfsctl_snapshot_find_by_name(old_snapname);
315eda14cbcSMatt Macy 	if (se == NULL)
316eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
317eda14cbcSMatt Macy 
318eda14cbcSMatt Macy 	zfsctl_snapshot_remove(se);
319eda14cbcSMatt Macy 	kmem_strfree(se->se_name);
320eda14cbcSMatt Macy 	se->se_name = kmem_strdup(new_snapname);
321eda14cbcSMatt Macy 	zfsctl_snapshot_add(se);
322eda14cbcSMatt Macy 	zfsctl_snapshot_rele(se);
323eda14cbcSMatt Macy 
324eda14cbcSMatt Macy 	return (0);
325eda14cbcSMatt Macy }
326eda14cbcSMatt Macy 
327eda14cbcSMatt Macy /*
328eda14cbcSMatt Macy  * Delayed task responsible for unmounting an expired automounted snapshot.
329eda14cbcSMatt Macy  */
330eda14cbcSMatt Macy static void
snapentry_expire(void * data)331eda14cbcSMatt Macy snapentry_expire(void *data)
332eda14cbcSMatt Macy {
333eda14cbcSMatt Macy 	zfs_snapentry_t *se = (zfs_snapentry_t *)data;
334eda14cbcSMatt Macy 	spa_t *spa = se->se_spa;
335eda14cbcSMatt Macy 	uint64_t objsetid = se->se_objsetid;
336eda14cbcSMatt Macy 
337eda14cbcSMatt Macy 	if (zfs_expire_snapshot <= 0) {
338eda14cbcSMatt Macy 		zfsctl_snapshot_rele(se);
339eda14cbcSMatt Macy 		return;
340eda14cbcSMatt Macy 	}
341eda14cbcSMatt Macy 
34281b22a98SMartin Matuska 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
343eda14cbcSMatt Macy 	se->se_taskqid = TASKQID_INVALID;
34481b22a98SMartin Matuska 	rw_exit(&se->se_taskqid_lock);
345eda14cbcSMatt Macy 	(void) zfsctl_snapshot_unmount(se->se_name, MNT_EXPIRE);
346eda14cbcSMatt Macy 	zfsctl_snapshot_rele(se);
347eda14cbcSMatt Macy 
348eda14cbcSMatt Macy 	/*
349eda14cbcSMatt Macy 	 * Reschedule the unmount if the zfs_snapentry_t wasn't removed.
350eda14cbcSMatt Macy 	 * This can occur when the snapshot is busy.
351eda14cbcSMatt Macy 	 */
352eda14cbcSMatt Macy 	rw_enter(&zfs_snapshot_lock, RW_READER);
353eda14cbcSMatt Macy 	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
354eda14cbcSMatt Macy 		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
355eda14cbcSMatt Macy 		zfsctl_snapshot_rele(se);
356eda14cbcSMatt Macy 	}
357eda14cbcSMatt Macy 	rw_exit(&zfs_snapshot_lock);
358eda14cbcSMatt Macy }
359eda14cbcSMatt Macy 
360eda14cbcSMatt Macy /*
361eda14cbcSMatt Macy  * Cancel an automatic unmount of a snapname.  This callback is responsible
362eda14cbcSMatt Macy  * for dropping the reference on the zfs_snapentry_t which was taken when
363eda14cbcSMatt Macy  * during dispatch.
364eda14cbcSMatt Macy  */
365eda14cbcSMatt Macy static void
zfsctl_snapshot_unmount_cancel(zfs_snapentry_t * se)366eda14cbcSMatt Macy zfsctl_snapshot_unmount_cancel(zfs_snapentry_t *se)
367eda14cbcSMatt Macy {
36881b22a98SMartin Matuska 	int err = 0;
36981b22a98SMartin Matuska 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
37081b22a98SMartin Matuska 	err = taskq_cancel_id(system_delay_taskq, se->se_taskqid);
37181b22a98SMartin Matuska 	/*
37281b22a98SMartin Matuska 	 * if we get ENOENT, the taskq couldn't be found to be
37381b22a98SMartin Matuska 	 * canceled, so we can just mark it as invalid because
37481b22a98SMartin Matuska 	 * it's already gone. If we got EBUSY, then we already
37581b22a98SMartin Matuska 	 * blocked until it was gone _anyway_, so we don't care.
37681b22a98SMartin Matuska 	 */
377eda14cbcSMatt Macy 	se->se_taskqid = TASKQID_INVALID;
37881b22a98SMartin Matuska 	rw_exit(&se->se_taskqid_lock);
37981b22a98SMartin Matuska 	if (err == 0) {
380eda14cbcSMatt Macy 		zfsctl_snapshot_rele(se);
381eda14cbcSMatt Macy 	}
382eda14cbcSMatt Macy }
383eda14cbcSMatt Macy 
384eda14cbcSMatt Macy /*
385eda14cbcSMatt Macy  * Dispatch the unmount task for delayed handling with a hold protecting it.
386eda14cbcSMatt Macy  */
387eda14cbcSMatt Macy static void
zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t * se,int delay)388eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
389eda14cbcSMatt Macy {
390eda14cbcSMatt Macy 
391eda14cbcSMatt Macy 	if (delay <= 0)
392eda14cbcSMatt Macy 		return;
393eda14cbcSMatt Macy 
394eda14cbcSMatt Macy 	zfsctl_snapshot_hold(se);
39581b22a98SMartin Matuska 	rw_enter(&se->se_taskqid_lock, RW_WRITER);
396c9539b89SMartin Matuska 	/*
397c9539b89SMartin Matuska 	 * If this condition happens, we managed to:
398c9539b89SMartin Matuska 	 * - dispatch once
399c9539b89SMartin Matuska 	 * - want to dispatch _again_ before it returned
400c9539b89SMartin Matuska 	 *
401c9539b89SMartin Matuska 	 * So let's just return - if that task fails at unmounting,
402c9539b89SMartin Matuska 	 * we'll eventually dispatch again, and if it succeeds,
403c9539b89SMartin Matuska 	 * no problem.
404c9539b89SMartin Matuska 	 */
405c9539b89SMartin Matuska 	if (se->se_taskqid != TASKQID_INVALID) {
406c9539b89SMartin Matuska 		rw_exit(&se->se_taskqid_lock);
407c9539b89SMartin Matuska 		zfsctl_snapshot_rele(se);
408c9539b89SMartin Matuska 		return;
409c9539b89SMartin Matuska 	}
410eda14cbcSMatt Macy 	se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
411eda14cbcSMatt Macy 	    snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
41281b22a98SMartin Matuska 	rw_exit(&se->se_taskqid_lock);
413eda14cbcSMatt Macy }
414eda14cbcSMatt Macy 
415eda14cbcSMatt Macy /*
416eda14cbcSMatt Macy  * Schedule an automatic unmount of objset id to occur in delay seconds from
417eda14cbcSMatt Macy  * now.  Any previous delayed unmount will be cancelled in favor of the
418eda14cbcSMatt Macy  * updated deadline.  A reference is taken by zfsctl_snapshot_find_by_name()
419eda14cbcSMatt Macy  * and held until the outstanding task is handled or cancelled.
420eda14cbcSMatt Macy  */
421eda14cbcSMatt Macy int
zfsctl_snapshot_unmount_delay(spa_t * spa,uint64_t objsetid,int delay)422eda14cbcSMatt Macy zfsctl_snapshot_unmount_delay(spa_t *spa, uint64_t objsetid, int delay)
423eda14cbcSMatt Macy {
424eda14cbcSMatt Macy 	zfs_snapentry_t *se;
425eda14cbcSMatt Macy 	int error = ENOENT;
426eda14cbcSMatt Macy 
427eda14cbcSMatt Macy 	rw_enter(&zfs_snapshot_lock, RW_READER);
428eda14cbcSMatt Macy 	if ((se = zfsctl_snapshot_find_by_objsetid(spa, objsetid)) != NULL) {
429eda14cbcSMatt Macy 		zfsctl_snapshot_unmount_cancel(se);
430eda14cbcSMatt Macy 		zfsctl_snapshot_unmount_delay_impl(se, delay);
431eda14cbcSMatt Macy 		zfsctl_snapshot_rele(se);
432eda14cbcSMatt Macy 		error = 0;
433eda14cbcSMatt Macy 	}
434eda14cbcSMatt Macy 	rw_exit(&zfs_snapshot_lock);
435eda14cbcSMatt Macy 
436eda14cbcSMatt Macy 	return (error);
437eda14cbcSMatt Macy }
438eda14cbcSMatt Macy 
439eda14cbcSMatt Macy /*
440eda14cbcSMatt Macy  * Check if snapname is currently mounted.  Returned non-zero when mounted
441eda14cbcSMatt Macy  * and zero when unmounted.
442eda14cbcSMatt Macy  */
443eda14cbcSMatt Macy static boolean_t
zfsctl_snapshot_ismounted(const char * snapname)444180f8225SMatt Macy zfsctl_snapshot_ismounted(const char *snapname)
445eda14cbcSMatt Macy {
446eda14cbcSMatt Macy 	zfs_snapentry_t *se;
447eda14cbcSMatt Macy 	boolean_t ismounted = B_FALSE;
448eda14cbcSMatt Macy 
449eda14cbcSMatt Macy 	rw_enter(&zfs_snapshot_lock, RW_READER);
450eda14cbcSMatt Macy 	if ((se = zfsctl_snapshot_find_by_name(snapname)) != NULL) {
451eda14cbcSMatt Macy 		zfsctl_snapshot_rele(se);
452eda14cbcSMatt Macy 		ismounted = B_TRUE;
453eda14cbcSMatt Macy 	}
454eda14cbcSMatt Macy 	rw_exit(&zfs_snapshot_lock);
455eda14cbcSMatt Macy 
456eda14cbcSMatt Macy 	return (ismounted);
457eda14cbcSMatt Macy }
458eda14cbcSMatt Macy 
459eda14cbcSMatt Macy /*
460eda14cbcSMatt Macy  * Check if the given inode is a part of the virtual .zfs directory.
461eda14cbcSMatt Macy  */
462eda14cbcSMatt Macy boolean_t
zfsctl_is_node(struct inode * ip)463eda14cbcSMatt Macy zfsctl_is_node(struct inode *ip)
464eda14cbcSMatt Macy {
465eda14cbcSMatt Macy 	return (ITOZ(ip)->z_is_ctldir);
466eda14cbcSMatt Macy }
467eda14cbcSMatt Macy 
468eda14cbcSMatt Macy /*
469eda14cbcSMatt Macy  * Check if the given inode is a .zfs/snapshots/snapname directory.
470eda14cbcSMatt Macy  */
471eda14cbcSMatt Macy boolean_t
zfsctl_is_snapdir(struct inode * ip)472eda14cbcSMatt Macy zfsctl_is_snapdir(struct inode *ip)
473eda14cbcSMatt Macy {
474eda14cbcSMatt Macy 	return (zfsctl_is_node(ip) && (ip->i_ino <= ZFSCTL_INO_SNAPDIRS));
475eda14cbcSMatt Macy }
476eda14cbcSMatt Macy 
477eda14cbcSMatt Macy /*
478eda14cbcSMatt Macy  * Allocate a new inode with the passed id and ops.
479eda14cbcSMatt Macy  */
480eda14cbcSMatt Macy static struct inode *
zfsctl_inode_alloc(zfsvfs_t * zfsvfs,uint64_t id,const struct file_operations * fops,const struct inode_operations * ops,uint64_t creation)481eda14cbcSMatt Macy zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
482315ee00fSMartin Matuska     const struct file_operations *fops, const struct inode_operations *ops,
483315ee00fSMartin Matuska     uint64_t creation)
484eda14cbcSMatt Macy {
485eda14cbcSMatt Macy 	struct inode *ip;
486eda14cbcSMatt Macy 	znode_t *zp;
487315ee00fSMartin Matuska 	inode_timespec_t now = {.tv_sec = creation};
488eda14cbcSMatt Macy 
489eda14cbcSMatt Macy 	ip = new_inode(zfsvfs->z_sb);
490eda14cbcSMatt Macy 	if (ip == NULL)
491eda14cbcSMatt Macy 		return (NULL);
492eda14cbcSMatt Macy 
493315ee00fSMartin Matuska 	if (!creation)
494eda14cbcSMatt Macy 		now = current_time(ip);
495eda14cbcSMatt Macy 	zp = ITOZ(ip);
496eda14cbcSMatt Macy 	ASSERT3P(zp->z_dirlocks, ==, NULL);
497eda14cbcSMatt Macy 	ASSERT3P(zp->z_acl_cached, ==, NULL);
498eda14cbcSMatt Macy 	ASSERT3P(zp->z_xattr_cached, ==, NULL);
499eda14cbcSMatt Macy 	zp->z_id = id;
500eda14cbcSMatt Macy 	zp->z_unlinked = B_FALSE;
501eda14cbcSMatt Macy 	zp->z_atime_dirty = B_FALSE;
502eda14cbcSMatt Macy 	zp->z_zn_prefetch = B_FALSE;
503eda14cbcSMatt Macy 	zp->z_is_sa = B_FALSE;
504eda14cbcSMatt Macy 	zp->z_is_ctldir = B_TRUE;
505eda14cbcSMatt Macy 	zp->z_sa_hdl = NULL;
506eda14cbcSMatt Macy 	zp->z_blksz = 0;
507eda14cbcSMatt Macy 	zp->z_seq = 0;
508eda14cbcSMatt Macy 	zp->z_mapcnt = 0;
509eda14cbcSMatt Macy 	zp->z_size = 0;
510eda14cbcSMatt Macy 	zp->z_pflags = 0;
511eda14cbcSMatt Macy 	zp->z_mode = 0;
512eda14cbcSMatt Macy 	zp->z_sync_cnt = 0;
513716fd348SMartin Matuska 	zp->z_sync_writes_cnt = 0;
514716fd348SMartin Matuska 	zp->z_async_writes_cnt = 0;
515eda14cbcSMatt Macy 	ip->i_generation = 0;
516eda14cbcSMatt Macy 	ip->i_ino = id;
517eda14cbcSMatt Macy 	ip->i_mode = (S_IFDIR | S_IRWXUGO);
518eda14cbcSMatt Macy 	ip->i_uid = SUID_TO_KUID(0);
519eda14cbcSMatt Macy 	ip->i_gid = SGID_TO_KGID(0);
520eda14cbcSMatt Macy 	ip->i_blkbits = SPA_MINBLOCKSHIFT;
521b356da80SMartin Matuska 	zpl_inode_set_atime_to_ts(ip, now);
522b356da80SMartin Matuska 	zpl_inode_set_mtime_to_ts(ip, now);
523abcdc1b9SMartin Matuska 	zpl_inode_set_ctime_to_ts(ip, now);
524eda14cbcSMatt Macy 	ip->i_fop = fops;
525eda14cbcSMatt Macy 	ip->i_op = ops;
526eda14cbcSMatt Macy #if defined(IOP_XATTR)
527eda14cbcSMatt Macy 	ip->i_opflags &= ~IOP_XATTR;
528eda14cbcSMatt Macy #endif
529eda14cbcSMatt Macy 
530eda14cbcSMatt Macy 	if (insert_inode_locked(ip)) {
531eda14cbcSMatt Macy 		unlock_new_inode(ip);
532eda14cbcSMatt Macy 		iput(ip);
533eda14cbcSMatt Macy 		return (NULL);
534eda14cbcSMatt Macy 	}
535eda14cbcSMatt Macy 
536eda14cbcSMatt Macy 	mutex_enter(&zfsvfs->z_znodes_lock);
537eda14cbcSMatt Macy 	list_insert_tail(&zfsvfs->z_all_znodes, zp);
538eda14cbcSMatt Macy 	membar_producer();
539eda14cbcSMatt Macy 	mutex_exit(&zfsvfs->z_znodes_lock);
540eda14cbcSMatt Macy 
541eda14cbcSMatt Macy 	unlock_new_inode(ip);
542eda14cbcSMatt Macy 
543eda14cbcSMatt Macy 	return (ip);
544eda14cbcSMatt Macy }
545eda14cbcSMatt Macy 
546eda14cbcSMatt Macy /*
547eda14cbcSMatt Macy  * Lookup the inode with given id, it will be allocated if needed.
548eda14cbcSMatt Macy  */
549eda14cbcSMatt Macy static struct inode *
zfsctl_inode_lookup(zfsvfs_t * zfsvfs,uint64_t id,const struct file_operations * fops,const struct inode_operations * ops)550eda14cbcSMatt Macy zfsctl_inode_lookup(zfsvfs_t *zfsvfs, uint64_t id,
551eda14cbcSMatt Macy     const struct file_operations *fops, const struct inode_operations *ops)
552eda14cbcSMatt Macy {
553eda14cbcSMatt Macy 	struct inode *ip = NULL;
554315ee00fSMartin Matuska 	uint64_t creation = 0;
555315ee00fSMartin Matuska 	dsl_dataset_t *snap_ds;
556315ee00fSMartin Matuska 	dsl_pool_t *pool;
557eda14cbcSMatt Macy 
558eda14cbcSMatt Macy 	while (ip == NULL) {
559eda14cbcSMatt Macy 		ip = ilookup(zfsvfs->z_sb, (unsigned long)id);
560eda14cbcSMatt Macy 		if (ip)
561eda14cbcSMatt Macy 			break;
562eda14cbcSMatt Macy 
563315ee00fSMartin Matuska 		if (id <= ZFSCTL_INO_SNAPDIRS && !creation) {
564315ee00fSMartin Matuska 			pool = dmu_objset_pool(zfsvfs->z_os);
565315ee00fSMartin Matuska 			dsl_pool_config_enter(pool, FTAG);
566315ee00fSMartin Matuska 			if (!dsl_dataset_hold_obj(pool,
567315ee00fSMartin Matuska 			    ZFSCTL_INO_SNAPDIRS - id, FTAG, &snap_ds)) {
568315ee00fSMartin Matuska 				creation = dsl_get_creation(snap_ds);
569315ee00fSMartin Matuska 				dsl_dataset_rele(snap_ds, FTAG);
570315ee00fSMartin Matuska 			}
571315ee00fSMartin Matuska 			dsl_pool_config_exit(pool, FTAG);
572315ee00fSMartin Matuska 		}
573315ee00fSMartin Matuska 
574eda14cbcSMatt Macy 		/* May fail due to concurrent zfsctl_inode_alloc() */
575315ee00fSMartin Matuska 		ip = zfsctl_inode_alloc(zfsvfs, id, fops, ops, creation);
576eda14cbcSMatt Macy 	}
577eda14cbcSMatt Macy 
578eda14cbcSMatt Macy 	return (ip);
579eda14cbcSMatt Macy }
580eda14cbcSMatt Macy 
581eda14cbcSMatt Macy /*
582eda14cbcSMatt Macy  * Create the '.zfs' directory.  This directory is cached as part of the VFS
583eda14cbcSMatt Macy  * structure.  This results in a hold on the zfsvfs_t.  The code in zfs_umount()
584eda14cbcSMatt Macy  * therefore checks against a vfs_count of 2 instead of 1.  This reference
585eda14cbcSMatt Macy  * is removed when the ctldir is destroyed in the unmount.  All other entities
586eda14cbcSMatt Macy  * under the '.zfs' directory are created dynamically as needed.
587eda14cbcSMatt Macy  *
588eda14cbcSMatt Macy  * Because the dynamically created '.zfs' directory entries assume the use
589eda14cbcSMatt Macy  * of 64-bit inode numbers this support must be disabled on 32-bit systems.
590eda14cbcSMatt Macy  */
591eda14cbcSMatt Macy int
zfsctl_create(zfsvfs_t * zfsvfs)592eda14cbcSMatt Macy zfsctl_create(zfsvfs_t *zfsvfs)
593eda14cbcSMatt Macy {
594eda14cbcSMatt Macy 	ASSERT(zfsvfs->z_ctldir == NULL);
595eda14cbcSMatt Macy 
596eda14cbcSMatt Macy 	zfsvfs->z_ctldir = zfsctl_inode_alloc(zfsvfs, ZFSCTL_INO_ROOT,
597315ee00fSMartin Matuska 	    &zpl_fops_root, &zpl_ops_root, 0);
598eda14cbcSMatt Macy 	if (zfsvfs->z_ctldir == NULL)
599eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
600eda14cbcSMatt Macy 
601eda14cbcSMatt Macy 	return (0);
602eda14cbcSMatt Macy }
603eda14cbcSMatt Macy 
604eda14cbcSMatt Macy /*
605eda14cbcSMatt Macy  * Destroy the '.zfs' directory or remove a snapshot from zfs_snapshots_by_name.
606eda14cbcSMatt Macy  * Only called when the filesystem is unmounted.
607eda14cbcSMatt Macy  */
608eda14cbcSMatt Macy void
zfsctl_destroy(zfsvfs_t * zfsvfs)609eda14cbcSMatt Macy zfsctl_destroy(zfsvfs_t *zfsvfs)
610eda14cbcSMatt Macy {
611eda14cbcSMatt Macy 	if (zfsvfs->z_issnap) {
612eda14cbcSMatt Macy 		zfs_snapentry_t *se;
613eda14cbcSMatt Macy 		spa_t *spa = zfsvfs->z_os->os_spa;
614eda14cbcSMatt Macy 		uint64_t objsetid = dmu_objset_id(zfsvfs->z_os);
615eda14cbcSMatt Macy 
616eda14cbcSMatt Macy 		rw_enter(&zfs_snapshot_lock, RW_WRITER);
617eda14cbcSMatt Macy 		se = zfsctl_snapshot_find_by_objsetid(spa, objsetid);
618eda14cbcSMatt Macy 		if (se != NULL)
619eda14cbcSMatt Macy 			zfsctl_snapshot_remove(se);
620eda14cbcSMatt Macy 		rw_exit(&zfs_snapshot_lock);
621eda14cbcSMatt Macy 		if (se != NULL) {
622eda14cbcSMatt Macy 			zfsctl_snapshot_unmount_cancel(se);
623eda14cbcSMatt Macy 			zfsctl_snapshot_rele(se);
624eda14cbcSMatt Macy 		}
625eda14cbcSMatt Macy 	} else if (zfsvfs->z_ctldir) {
626eda14cbcSMatt Macy 		iput(zfsvfs->z_ctldir);
627eda14cbcSMatt Macy 		zfsvfs->z_ctldir = NULL;
628eda14cbcSMatt Macy 	}
629eda14cbcSMatt Macy }
630eda14cbcSMatt Macy 
631eda14cbcSMatt Macy /*
632eda14cbcSMatt Macy  * Given a root znode, retrieve the associated .zfs directory.
633eda14cbcSMatt Macy  * Add a hold to the vnode and return it.
634eda14cbcSMatt Macy  */
635eda14cbcSMatt Macy struct inode *
zfsctl_root(znode_t * zp)636eda14cbcSMatt Macy zfsctl_root(znode_t *zp)
637eda14cbcSMatt Macy {
638eda14cbcSMatt Macy 	ASSERT(zfs_has_ctldir(zp));
639f9693befSMartin Matuska 	/* Must have an existing ref, so igrab() cannot return NULL */
640f9693befSMartin Matuska 	VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL);
641eda14cbcSMatt Macy 	return (ZTOZSB(zp)->z_ctldir);
642eda14cbcSMatt Macy }
643eda14cbcSMatt Macy 
644eda14cbcSMatt Macy /*
645eda14cbcSMatt Macy  * Generate a long fid to indicate a snapdir. We encode whether snapdir is
646eda14cbcSMatt Macy  * already mounted in gen field. We do this because nfsd lookup will not
647eda14cbcSMatt Macy  * trigger automount. Next time the nfsd does fh_to_dentry, we will notice
648eda14cbcSMatt Macy  * this and do automount and return ESTALE to force nfsd revalidate and follow
649eda14cbcSMatt Macy  * mount.
650eda14cbcSMatt Macy  */
651eda14cbcSMatt Macy static int
zfsctl_snapdir_fid(struct inode * ip,fid_t * fidp)652eda14cbcSMatt Macy zfsctl_snapdir_fid(struct inode *ip, fid_t *fidp)
653eda14cbcSMatt Macy {
654eda14cbcSMatt Macy 	zfid_short_t *zfid = (zfid_short_t *)fidp;
655eda14cbcSMatt Macy 	zfid_long_t *zlfid = (zfid_long_t *)fidp;
656eda14cbcSMatt Macy 	uint32_t gen = 0;
657eda14cbcSMatt Macy 	uint64_t object;
658eda14cbcSMatt Macy 	uint64_t objsetid;
659eda14cbcSMatt Macy 	int i;
660eda14cbcSMatt Macy 	struct dentry *dentry;
661eda14cbcSMatt Macy 
662eda14cbcSMatt Macy 	if (fidp->fid_len < LONG_FID_LEN) {
663eda14cbcSMatt Macy 		fidp->fid_len = LONG_FID_LEN;
664eda14cbcSMatt Macy 		return (SET_ERROR(ENOSPC));
665eda14cbcSMatt Macy 	}
666eda14cbcSMatt Macy 
667eda14cbcSMatt Macy 	object = ip->i_ino;
668eda14cbcSMatt Macy 	objsetid = ZFSCTL_INO_SNAPDIRS - ip->i_ino;
669eda14cbcSMatt Macy 	zfid->zf_len = LONG_FID_LEN;
670eda14cbcSMatt Macy 
671eda14cbcSMatt Macy 	dentry = d_obtain_alias(igrab(ip));
672eda14cbcSMatt Macy 	if (!IS_ERR(dentry)) {
673eda14cbcSMatt Macy 		gen = !!d_mountpoint(dentry);
674eda14cbcSMatt Macy 		dput(dentry);
675eda14cbcSMatt Macy 	}
676eda14cbcSMatt Macy 
677eda14cbcSMatt Macy 	for (i = 0; i < sizeof (zfid->zf_object); i++)
678eda14cbcSMatt Macy 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
679eda14cbcSMatt Macy 
680eda14cbcSMatt Macy 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
681eda14cbcSMatt Macy 		zfid->zf_gen[i] = (uint8_t)(gen >> (8 * i));
682eda14cbcSMatt Macy 
683eda14cbcSMatt Macy 	for (i = 0; i < sizeof (zlfid->zf_setid); i++)
684eda14cbcSMatt Macy 		zlfid->zf_setid[i] = (uint8_t)(objsetid >> (8 * i));
685eda14cbcSMatt Macy 
686eda14cbcSMatt Macy 	for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
687eda14cbcSMatt Macy 		zlfid->zf_setgen[i] = 0;
688eda14cbcSMatt Macy 
689eda14cbcSMatt Macy 	return (0);
690eda14cbcSMatt Macy }
691eda14cbcSMatt Macy 
692eda14cbcSMatt Macy /*
693eda14cbcSMatt Macy  * Generate an appropriate fid for an entry in the .zfs directory.
694eda14cbcSMatt Macy  */
695eda14cbcSMatt Macy int
zfsctl_fid(struct inode * ip,fid_t * fidp)696eda14cbcSMatt Macy zfsctl_fid(struct inode *ip, fid_t *fidp)
697eda14cbcSMatt Macy {
698eda14cbcSMatt Macy 	znode_t		*zp = ITOZ(ip);
699eda14cbcSMatt Macy 	zfsvfs_t	*zfsvfs = ITOZSB(ip);
700eda14cbcSMatt Macy 	uint64_t	object = zp->z_id;
701eda14cbcSMatt Macy 	zfid_short_t	*zfid;
702eda14cbcSMatt Macy 	int		i;
703c7046f76SMartin Matuska 	int		error;
704eda14cbcSMatt Macy 
705c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
706c7046f76SMartin Matuska 		return (error);
707eda14cbcSMatt Macy 
708eda14cbcSMatt Macy 	if (zfsctl_is_snapdir(ip)) {
709c7046f76SMartin Matuska 		zfs_exit(zfsvfs, FTAG);
710eda14cbcSMatt Macy 		return (zfsctl_snapdir_fid(ip, fidp));
711eda14cbcSMatt Macy 	}
712eda14cbcSMatt Macy 
713eda14cbcSMatt Macy 	if (fidp->fid_len < SHORT_FID_LEN) {
714eda14cbcSMatt Macy 		fidp->fid_len = SHORT_FID_LEN;
715c7046f76SMartin Matuska 		zfs_exit(zfsvfs, FTAG);
716eda14cbcSMatt Macy 		return (SET_ERROR(ENOSPC));
717eda14cbcSMatt Macy 	}
718eda14cbcSMatt Macy 
719eda14cbcSMatt Macy 	zfid = (zfid_short_t *)fidp;
720eda14cbcSMatt Macy 
721eda14cbcSMatt Macy 	zfid->zf_len = SHORT_FID_LEN;
722eda14cbcSMatt Macy 
723eda14cbcSMatt Macy 	for (i = 0; i < sizeof (zfid->zf_object); i++)
724eda14cbcSMatt Macy 		zfid->zf_object[i] = (uint8_t)(object >> (8 * i));
725eda14cbcSMatt Macy 
726eda14cbcSMatt Macy 	/* .zfs znodes always have a generation number of 0 */
727eda14cbcSMatt Macy 	for (i = 0; i < sizeof (zfid->zf_gen); i++)
728eda14cbcSMatt Macy 		zfid->zf_gen[i] = 0;
729eda14cbcSMatt Macy 
730c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
731eda14cbcSMatt Macy 	return (0);
732eda14cbcSMatt Macy }
733eda14cbcSMatt Macy 
734eda14cbcSMatt Macy /*
735eda14cbcSMatt Macy  * Construct a full dataset name in full_name: "pool/dataset@snap_name"
736eda14cbcSMatt Macy  */
737eda14cbcSMatt Macy static int
zfsctl_snapshot_name(zfsvfs_t * zfsvfs,const char * snap_name,int len,char * full_name)738eda14cbcSMatt Macy zfsctl_snapshot_name(zfsvfs_t *zfsvfs, const char *snap_name, int len,
739eda14cbcSMatt Macy     char *full_name)
740eda14cbcSMatt Macy {
741eda14cbcSMatt Macy 	objset_t *os = zfsvfs->z_os;
742eda14cbcSMatt Macy 
743eda14cbcSMatt Macy 	if (zfs_component_namecheck(snap_name, NULL, NULL) != 0)
744eda14cbcSMatt Macy 		return (SET_ERROR(EILSEQ));
745eda14cbcSMatt Macy 
746eda14cbcSMatt Macy 	dmu_objset_name(os, full_name);
747eda14cbcSMatt Macy 	if ((strlen(full_name) + 1 + strlen(snap_name)) >= len)
748eda14cbcSMatt Macy 		return (SET_ERROR(ENAMETOOLONG));
749eda14cbcSMatt Macy 
750eda14cbcSMatt Macy 	(void) strcat(full_name, "@");
751eda14cbcSMatt Macy 	(void) strcat(full_name, snap_name);
752eda14cbcSMatt Macy 
753eda14cbcSMatt Macy 	return (0);
754eda14cbcSMatt Macy }
755eda14cbcSMatt Macy 
756eda14cbcSMatt Macy /*
757eda14cbcSMatt Macy  * Returns full path in full_path: "/pool/dataset/.zfs/snapshot/snap_name/"
758eda14cbcSMatt Macy  */
759eda14cbcSMatt Macy static int
zfsctl_snapshot_path_objset(zfsvfs_t * zfsvfs,uint64_t objsetid,int path_len,char * full_path)760eda14cbcSMatt Macy zfsctl_snapshot_path_objset(zfsvfs_t *zfsvfs, uint64_t objsetid,
761eda14cbcSMatt Macy     int path_len, char *full_path)
762eda14cbcSMatt Macy {
763eda14cbcSMatt Macy 	objset_t *os = zfsvfs->z_os;
764eda14cbcSMatt Macy 	fstrans_cookie_t cookie;
765eda14cbcSMatt Macy 	char *snapname;
766eda14cbcSMatt Macy 	boolean_t case_conflict;
767eda14cbcSMatt Macy 	uint64_t id, pos = 0;
768eda14cbcSMatt Macy 	int error = 0;
769eda14cbcSMatt Macy 
770eda14cbcSMatt Macy 	cookie = spl_fstrans_mark();
771eda14cbcSMatt Macy 	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
772eda14cbcSMatt Macy 
773eda14cbcSMatt Macy 	while (error == 0) {
774eda14cbcSMatt Macy 		dsl_pool_config_enter(dmu_objset_pool(os), FTAG);
775eda14cbcSMatt Macy 		error = dmu_snapshot_list_next(zfsvfs->z_os,
776eda14cbcSMatt Macy 		    ZFS_MAX_DATASET_NAME_LEN, snapname, &id, &pos,
777eda14cbcSMatt Macy 		    &case_conflict);
778eda14cbcSMatt Macy 		dsl_pool_config_exit(dmu_objset_pool(os), FTAG);
779eda14cbcSMatt Macy 		if (error)
780eda14cbcSMatt Macy 			goto out;
781eda14cbcSMatt Macy 
782eda14cbcSMatt Macy 		if (id == objsetid)
783eda14cbcSMatt Macy 			break;
784eda14cbcSMatt Macy 	}
785eda14cbcSMatt Macy 
786*87bf66d4SMartin Matuska 	mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
787*87bf66d4SMartin Matuska 	if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
788eda14cbcSMatt Macy 		snprintf(full_path, path_len, "%s/.zfs/snapshot/%s",
789eda14cbcSMatt Macy 		    zfsvfs->z_vfs->vfs_mntpoint, snapname);
790*87bf66d4SMartin Matuska 	} else
791*87bf66d4SMartin Matuska 		error = SET_ERROR(ENOENT);
792*87bf66d4SMartin Matuska 	mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
793*87bf66d4SMartin Matuska 
794eda14cbcSMatt Macy out:
795eda14cbcSMatt Macy 	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
796eda14cbcSMatt Macy 	spl_fstrans_unmark(cookie);
797eda14cbcSMatt Macy 
798eda14cbcSMatt Macy 	return (error);
799eda14cbcSMatt Macy }
800eda14cbcSMatt Macy 
801eda14cbcSMatt Macy /*
802eda14cbcSMatt Macy  * Special case the handling of "..".
803eda14cbcSMatt Macy  */
804eda14cbcSMatt Macy int
zfsctl_root_lookup(struct inode * dip,const char * name,struct inode ** ipp,int flags,cred_t * cr,int * direntflags,pathname_t * realpnp)805180f8225SMatt Macy zfsctl_root_lookup(struct inode *dip, const char *name, struct inode **ipp,
806eda14cbcSMatt Macy     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
807eda14cbcSMatt Macy {
808eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs = ITOZSB(dip);
809eda14cbcSMatt Macy 	int error = 0;
810eda14cbcSMatt Macy 
811c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
812c7046f76SMartin Matuska 		return (error);
813eda14cbcSMatt Macy 
8147a7741afSMartin Matuska 	if (zfsvfs->z_show_ctldir == ZFS_SNAPDIR_DISABLED) {
8157a7741afSMartin Matuska 		*ipp = NULL;
8167a7741afSMartin Matuska 	} else if (strcmp(name, "..") == 0) {
817eda14cbcSMatt Macy 		*ipp = dip->i_sb->s_root->d_inode;
818eda14cbcSMatt Macy 	} else if (strcmp(name, ZFS_SNAPDIR_NAME) == 0) {
819eda14cbcSMatt Macy 		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIR,
820eda14cbcSMatt Macy 		    &zpl_fops_snapdir, &zpl_ops_snapdir);
821eda14cbcSMatt Macy 	} else if (strcmp(name, ZFS_SHAREDIR_NAME) == 0) {
822eda14cbcSMatt Macy 		*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SHARES,
823eda14cbcSMatt Macy 		    &zpl_fops_shares, &zpl_ops_shares);
824eda14cbcSMatt Macy 	} else {
825eda14cbcSMatt Macy 		*ipp = NULL;
826eda14cbcSMatt Macy 	}
827eda14cbcSMatt Macy 
828eda14cbcSMatt Macy 	if (*ipp == NULL)
829eda14cbcSMatt Macy 		error = SET_ERROR(ENOENT);
830eda14cbcSMatt Macy 
831c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
832eda14cbcSMatt Macy 
833eda14cbcSMatt Macy 	return (error);
834eda14cbcSMatt Macy }
835eda14cbcSMatt Macy 
836eda14cbcSMatt Macy /*
837eda14cbcSMatt Macy  * Lookup entry point for the 'snapshot' directory.  Try to open the
838eda14cbcSMatt Macy  * snapshot if it exist, creating the pseudo filesystem inode as necessary.
839eda14cbcSMatt Macy  */
840eda14cbcSMatt Macy int
zfsctl_snapdir_lookup(struct inode * dip,const char * name,struct inode ** ipp,int flags,cred_t * cr,int * direntflags,pathname_t * realpnp)841180f8225SMatt Macy zfsctl_snapdir_lookup(struct inode *dip, const char *name, struct inode **ipp,
842eda14cbcSMatt Macy     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
843eda14cbcSMatt Macy {
844eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs = ITOZSB(dip);
845eda14cbcSMatt Macy 	uint64_t id;
846eda14cbcSMatt Macy 	int error;
847eda14cbcSMatt Macy 
848c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
849c7046f76SMartin Matuska 		return (error);
850eda14cbcSMatt Macy 
851eda14cbcSMatt Macy 	error = dmu_snapshot_lookup(zfsvfs->z_os, name, &id);
852eda14cbcSMatt Macy 	if (error) {
853c7046f76SMartin Matuska 		zfs_exit(zfsvfs, FTAG);
854eda14cbcSMatt Macy 		return (error);
855eda14cbcSMatt Macy 	}
856eda14cbcSMatt Macy 
857eda14cbcSMatt Macy 	*ipp = zfsctl_inode_lookup(zfsvfs, ZFSCTL_INO_SNAPDIRS - id,
858eda14cbcSMatt Macy 	    &simple_dir_operations, &simple_dir_inode_operations);
859eda14cbcSMatt Macy 	if (*ipp == NULL)
860eda14cbcSMatt Macy 		error = SET_ERROR(ENOENT);
861eda14cbcSMatt Macy 
862c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
863eda14cbcSMatt Macy 
864eda14cbcSMatt Macy 	return (error);
865eda14cbcSMatt Macy }
866eda14cbcSMatt Macy 
867eda14cbcSMatt Macy /*
868eda14cbcSMatt Macy  * Renaming a directory under '.zfs/snapshot' will automatically trigger
869eda14cbcSMatt Macy  * a rename of the snapshot to the new given name.  The rename is confined
870eda14cbcSMatt Macy  * to the '.zfs/snapshot' directory snapshots cannot be moved elsewhere.
871eda14cbcSMatt Macy  */
872eda14cbcSMatt Macy int
zfsctl_snapdir_rename(struct inode * sdip,const char * snm,struct inode * tdip,const char * tnm,cred_t * cr,int flags)873180f8225SMatt Macy zfsctl_snapdir_rename(struct inode *sdip, const char *snm,
874180f8225SMatt Macy     struct inode *tdip, const char *tnm, cred_t *cr, int flags)
875eda14cbcSMatt Macy {
876eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs = ITOZSB(sdip);
877eda14cbcSMatt Macy 	char *to, *from, *real, *fsname;
878eda14cbcSMatt Macy 	int error;
879eda14cbcSMatt Macy 
880eda14cbcSMatt Macy 	if (!zfs_admin_snapshot)
881eda14cbcSMatt Macy 		return (SET_ERROR(EACCES));
882eda14cbcSMatt Macy 
883c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
884c7046f76SMartin Matuska 		return (error);
885eda14cbcSMatt Macy 
886eda14cbcSMatt Macy 	to = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
887eda14cbcSMatt Macy 	from = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
888eda14cbcSMatt Macy 	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
889eda14cbcSMatt Macy 	fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
890eda14cbcSMatt Macy 
891eda14cbcSMatt Macy 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
892eda14cbcSMatt Macy 		error = dmu_snapshot_realname(zfsvfs->z_os, snm, real,
893eda14cbcSMatt Macy 		    ZFS_MAX_DATASET_NAME_LEN, NULL);
894eda14cbcSMatt Macy 		if (error == 0) {
895eda14cbcSMatt Macy 			snm = real;
896eda14cbcSMatt Macy 		} else if (error != ENOTSUP) {
897eda14cbcSMatt Macy 			goto out;
898eda14cbcSMatt Macy 		}
899eda14cbcSMatt Macy 	}
900eda14cbcSMatt Macy 
901eda14cbcSMatt Macy 	dmu_objset_name(zfsvfs->z_os, fsname);
902eda14cbcSMatt Macy 
903eda14cbcSMatt Macy 	error = zfsctl_snapshot_name(ITOZSB(sdip), snm,
904eda14cbcSMatt Macy 	    ZFS_MAX_DATASET_NAME_LEN, from);
905eda14cbcSMatt Macy 	if (error == 0)
906eda14cbcSMatt Macy 		error = zfsctl_snapshot_name(ITOZSB(tdip), tnm,
907eda14cbcSMatt Macy 		    ZFS_MAX_DATASET_NAME_LEN, to);
908eda14cbcSMatt Macy 	if (error == 0)
909eda14cbcSMatt Macy 		error = zfs_secpolicy_rename_perms(from, to, cr);
910eda14cbcSMatt Macy 	if (error != 0)
911eda14cbcSMatt Macy 		goto out;
912eda14cbcSMatt Macy 
913eda14cbcSMatt Macy 	/*
914eda14cbcSMatt Macy 	 * Cannot move snapshots out of the snapdir.
915eda14cbcSMatt Macy 	 */
916eda14cbcSMatt Macy 	if (sdip != tdip) {
917eda14cbcSMatt Macy 		error = SET_ERROR(EINVAL);
918eda14cbcSMatt Macy 		goto out;
919eda14cbcSMatt Macy 	}
920eda14cbcSMatt Macy 
921eda14cbcSMatt Macy 	/*
922eda14cbcSMatt Macy 	 * No-op when names are identical.
923eda14cbcSMatt Macy 	 */
924eda14cbcSMatt Macy 	if (strcmp(snm, tnm) == 0) {
925eda14cbcSMatt Macy 		error = 0;
926eda14cbcSMatt Macy 		goto out;
927eda14cbcSMatt Macy 	}
928eda14cbcSMatt Macy 
929eda14cbcSMatt Macy 	rw_enter(&zfs_snapshot_lock, RW_WRITER);
930eda14cbcSMatt Macy 
931eda14cbcSMatt Macy 	error = dsl_dataset_rename_snapshot(fsname, snm, tnm, B_FALSE);
932eda14cbcSMatt Macy 	if (error == 0)
933eda14cbcSMatt Macy 		(void) zfsctl_snapshot_rename(snm, tnm);
934eda14cbcSMatt Macy 
935eda14cbcSMatt Macy 	rw_exit(&zfs_snapshot_lock);
936eda14cbcSMatt Macy out:
937eda14cbcSMatt Macy 	kmem_free(from, ZFS_MAX_DATASET_NAME_LEN);
938eda14cbcSMatt Macy 	kmem_free(to, ZFS_MAX_DATASET_NAME_LEN);
939eda14cbcSMatt Macy 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
940eda14cbcSMatt Macy 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
941eda14cbcSMatt Macy 
942c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
943eda14cbcSMatt Macy 
944eda14cbcSMatt Macy 	return (error);
945eda14cbcSMatt Macy }
946eda14cbcSMatt Macy 
947eda14cbcSMatt Macy /*
948eda14cbcSMatt Macy  * Removing a directory under '.zfs/snapshot' will automatically trigger
949eda14cbcSMatt Macy  * the removal of the snapshot with the given name.
950eda14cbcSMatt Macy  */
951eda14cbcSMatt Macy int
zfsctl_snapdir_remove(struct inode * dip,const char * name,cred_t * cr,int flags)952180f8225SMatt Macy zfsctl_snapdir_remove(struct inode *dip, const char *name, cred_t *cr,
953180f8225SMatt Macy     int flags)
954eda14cbcSMatt Macy {
955eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs = ITOZSB(dip);
956eda14cbcSMatt Macy 	char *snapname, *real;
957eda14cbcSMatt Macy 	int error;
958eda14cbcSMatt Macy 
959eda14cbcSMatt Macy 	if (!zfs_admin_snapshot)
960eda14cbcSMatt Macy 		return (SET_ERROR(EACCES));
961eda14cbcSMatt Macy 
962c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
963c7046f76SMartin Matuska 		return (error);
964eda14cbcSMatt Macy 
965eda14cbcSMatt Macy 	snapname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
966eda14cbcSMatt Macy 	real = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
967eda14cbcSMatt Macy 
968eda14cbcSMatt Macy 	if (zfsvfs->z_case == ZFS_CASE_INSENSITIVE) {
969eda14cbcSMatt Macy 		error = dmu_snapshot_realname(zfsvfs->z_os, name, real,
970eda14cbcSMatt Macy 		    ZFS_MAX_DATASET_NAME_LEN, NULL);
971eda14cbcSMatt Macy 		if (error == 0) {
972eda14cbcSMatt Macy 			name = real;
973eda14cbcSMatt Macy 		} else if (error != ENOTSUP) {
974eda14cbcSMatt Macy 			goto out;
975eda14cbcSMatt Macy 		}
976eda14cbcSMatt Macy 	}
977eda14cbcSMatt Macy 
978eda14cbcSMatt Macy 	error = zfsctl_snapshot_name(ITOZSB(dip), name,
979eda14cbcSMatt Macy 	    ZFS_MAX_DATASET_NAME_LEN, snapname);
980eda14cbcSMatt Macy 	if (error == 0)
981eda14cbcSMatt Macy 		error = zfs_secpolicy_destroy_perms(snapname, cr);
982eda14cbcSMatt Macy 	if (error != 0)
983eda14cbcSMatt Macy 		goto out;
984eda14cbcSMatt Macy 
985eda14cbcSMatt Macy 	error = zfsctl_snapshot_unmount(snapname, MNT_FORCE);
986eda14cbcSMatt Macy 	if ((error == 0) || (error == ENOENT))
987eda14cbcSMatt Macy 		error = dsl_destroy_snapshot(snapname, B_FALSE);
988eda14cbcSMatt Macy out:
989eda14cbcSMatt Macy 	kmem_free(snapname, ZFS_MAX_DATASET_NAME_LEN);
990eda14cbcSMatt Macy 	kmem_free(real, ZFS_MAX_DATASET_NAME_LEN);
991eda14cbcSMatt Macy 
992c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
993eda14cbcSMatt Macy 
994eda14cbcSMatt Macy 	return (error);
995eda14cbcSMatt Macy }
996eda14cbcSMatt Macy 
997eda14cbcSMatt Macy /*
998eda14cbcSMatt Macy  * Creating a directory under '.zfs/snapshot' will automatically trigger
999eda14cbcSMatt Macy  * the creation of a new snapshot with the given name.
1000eda14cbcSMatt Macy  */
1001eda14cbcSMatt Macy int
zfsctl_snapdir_mkdir(struct inode * dip,const char * dirname,vattr_t * vap,struct inode ** ipp,cred_t * cr,int flags)1002180f8225SMatt Macy zfsctl_snapdir_mkdir(struct inode *dip, const char *dirname, vattr_t *vap,
1003eda14cbcSMatt Macy     struct inode **ipp, cred_t *cr, int flags)
1004eda14cbcSMatt Macy {
1005eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs = ITOZSB(dip);
1006eda14cbcSMatt Macy 	char *dsname;
1007eda14cbcSMatt Macy 	int error;
1008eda14cbcSMatt Macy 
1009eda14cbcSMatt Macy 	if (!zfs_admin_snapshot)
1010eda14cbcSMatt Macy 		return (SET_ERROR(EACCES));
1011eda14cbcSMatt Macy 
1012eda14cbcSMatt Macy 	dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
1013eda14cbcSMatt Macy 
1014eda14cbcSMatt Macy 	if (zfs_component_namecheck(dirname, NULL, NULL) != 0) {
1015eda14cbcSMatt Macy 		error = SET_ERROR(EILSEQ);
1016eda14cbcSMatt Macy 		goto out;
1017eda14cbcSMatt Macy 	}
1018eda14cbcSMatt Macy 
1019eda14cbcSMatt Macy 	dmu_objset_name(zfsvfs->z_os, dsname);
1020eda14cbcSMatt Macy 
1021eda14cbcSMatt Macy 	error = zfs_secpolicy_snapshot_perms(dsname, cr);
1022eda14cbcSMatt Macy 	if (error != 0)
1023eda14cbcSMatt Macy 		goto out;
1024eda14cbcSMatt Macy 
1025eda14cbcSMatt Macy 	if (error == 0) {
1026eda14cbcSMatt Macy 		error = dmu_objset_snapshot_one(dsname, dirname);
1027eda14cbcSMatt Macy 		if (error != 0)
1028eda14cbcSMatt Macy 			goto out;
1029eda14cbcSMatt Macy 
1030eda14cbcSMatt Macy 		error = zfsctl_snapdir_lookup(dip, dirname, ipp,
1031eda14cbcSMatt Macy 		    0, cr, NULL, NULL);
1032eda14cbcSMatt Macy 	}
1033eda14cbcSMatt Macy out:
1034eda14cbcSMatt Macy 	kmem_free(dsname, ZFS_MAX_DATASET_NAME_LEN);
1035eda14cbcSMatt Macy 
1036eda14cbcSMatt Macy 	return (error);
1037eda14cbcSMatt Macy }
1038eda14cbcSMatt Macy 
1039eda14cbcSMatt Macy /*
1040ac0bf12eSMatt Macy  * Flush everything out of the kernel's export table and such.
1041ac0bf12eSMatt Macy  * This is needed as once the snapshot is used over NFS, its
1042ac0bf12eSMatt Macy  * entries in svc_export and svc_expkey caches hold reference
1043ac0bf12eSMatt Macy  * to the snapshot mount point. There is no known way of flushing
1044ac0bf12eSMatt Macy  * only the entries related to the snapshot.
1045ac0bf12eSMatt Macy  */
1046ac0bf12eSMatt Macy static void
exportfs_flush(void)1047ac0bf12eSMatt Macy exportfs_flush(void)
1048ac0bf12eSMatt Macy {
1049ac0bf12eSMatt Macy 	char *argv[] = { "/usr/sbin/exportfs", "-f", NULL };
1050ac0bf12eSMatt Macy 	char *envp[] = { NULL };
1051ac0bf12eSMatt Macy 
1052ac0bf12eSMatt Macy 	(void) call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1053ac0bf12eSMatt Macy }
1054ac0bf12eSMatt Macy 
1055ac0bf12eSMatt Macy /*
1056*87bf66d4SMartin Matuska  * Returns the path in char format for given struct path. Uses
1057*87bf66d4SMartin Matuska  * d_path exported by kernel to convert struct path to char
1058*87bf66d4SMartin Matuska  * format. Returns the correct path for mountpoints and chroot
1059*87bf66d4SMartin Matuska  * environments.
1060*87bf66d4SMartin Matuska  *
1061*87bf66d4SMartin Matuska  * If chroot environment has directories that are mounted with
1062*87bf66d4SMartin Matuska  * --bind or --rbind flag, d_path returns the complete path inside
1063*87bf66d4SMartin Matuska  * chroot environment but does not return the absolute path, i.e.
1064*87bf66d4SMartin Matuska  * the path to chroot environment is missing.
1065*87bf66d4SMartin Matuska  */
1066*87bf66d4SMartin Matuska static int
get_root_path(struct path * path,char * buff,int len)1067*87bf66d4SMartin Matuska get_root_path(struct path *path, char *buff, int len)
1068*87bf66d4SMartin Matuska {
1069*87bf66d4SMartin Matuska 	char *path_buffer, *path_ptr;
1070*87bf66d4SMartin Matuska 	int error = 0;
1071*87bf66d4SMartin Matuska 
1072*87bf66d4SMartin Matuska 	path_get(path);
1073*87bf66d4SMartin Matuska 	path_buffer = kmem_zalloc(len, KM_SLEEP);
1074*87bf66d4SMartin Matuska 	path_ptr = d_path(path, path_buffer, len);
1075*87bf66d4SMartin Matuska 	if (IS_ERR(path_ptr))
1076*87bf66d4SMartin Matuska 		error = SET_ERROR(-PTR_ERR(path_ptr));
1077*87bf66d4SMartin Matuska 	else
1078*87bf66d4SMartin Matuska 		strcpy(buff, path_ptr);
1079*87bf66d4SMartin Matuska 
1080*87bf66d4SMartin Matuska 	kmem_free(path_buffer, len);
1081*87bf66d4SMartin Matuska 	path_put(path);
1082*87bf66d4SMartin Matuska 	return (error);
1083*87bf66d4SMartin Matuska }
1084*87bf66d4SMartin Matuska 
1085*87bf66d4SMartin Matuska /*
1086*87bf66d4SMartin Matuska  * Returns if the current process root is chrooted or not. Linux
1087*87bf66d4SMartin Matuska  * kernel exposes the task_struct for current process and init.
1088*87bf66d4SMartin Matuska  * Since init process root points to actual root filesystem when
1089*87bf66d4SMartin Matuska  * Linux runtime is reached, we can compare the current process
1090*87bf66d4SMartin Matuska  * root with init process root to determine if root of the current
1091*87bf66d4SMartin Matuska  * process is different from init, which can reliably determine if
1092*87bf66d4SMartin Matuska  * current process is in chroot context or not.
1093*87bf66d4SMartin Matuska  */
1094*87bf66d4SMartin Matuska static int
is_current_chrooted(void)1095*87bf66d4SMartin Matuska is_current_chrooted(void)
1096*87bf66d4SMartin Matuska {
1097*87bf66d4SMartin Matuska 	struct task_struct *curr = current, *global = &init_task;
1098*87bf66d4SMartin Matuska 	struct path cr_root, gl_root;
1099*87bf66d4SMartin Matuska 
1100*87bf66d4SMartin Matuska 	task_lock(curr);
1101*87bf66d4SMartin Matuska 	get_fs_root(curr->fs, &cr_root);
1102*87bf66d4SMartin Matuska 	task_unlock(curr);
1103*87bf66d4SMartin Matuska 
1104*87bf66d4SMartin Matuska 	task_lock(global);
1105*87bf66d4SMartin Matuska 	get_fs_root(global->fs, &gl_root);
1106*87bf66d4SMartin Matuska 	task_unlock(global);
1107*87bf66d4SMartin Matuska 
1108*87bf66d4SMartin Matuska 	int chrooted = !path_equal(&cr_root, &gl_root);
1109*87bf66d4SMartin Matuska 	path_put(&gl_root);
1110*87bf66d4SMartin Matuska 	path_put(&cr_root);
1111*87bf66d4SMartin Matuska 
1112*87bf66d4SMartin Matuska 	return (chrooted);
1113*87bf66d4SMartin Matuska }
1114*87bf66d4SMartin Matuska 
1115*87bf66d4SMartin Matuska /*
1116eda14cbcSMatt Macy  * Attempt to unmount a snapshot by making a call to user space.
1117eda14cbcSMatt Macy  * There is no assurance that this can or will succeed, is just a
1118eda14cbcSMatt Macy  * best effort.  In the case where it does fail, perhaps because
1119eda14cbcSMatt Macy  * it's in use, the unmount will fail harmlessly.
1120eda14cbcSMatt Macy  */
1121eda14cbcSMatt Macy int
zfsctl_snapshot_unmount(const char * snapname,int flags)1122180f8225SMatt Macy zfsctl_snapshot_unmount(const char *snapname, int flags)
1123eda14cbcSMatt Macy {
1124eda14cbcSMatt Macy 	char *argv[] = { "/usr/bin/env", "umount", "-t", "zfs", "-n", NULL,
1125eda14cbcSMatt Macy 	    NULL };
1126eda14cbcSMatt Macy 	char *envp[] = { NULL };
1127eda14cbcSMatt Macy 	zfs_snapentry_t *se;
1128eda14cbcSMatt Macy 	int error;
1129eda14cbcSMatt Macy 
1130eda14cbcSMatt Macy 	rw_enter(&zfs_snapshot_lock, RW_READER);
1131eda14cbcSMatt Macy 	if ((se = zfsctl_snapshot_find_by_name(snapname)) == NULL) {
1132eda14cbcSMatt Macy 		rw_exit(&zfs_snapshot_lock);
1133eda14cbcSMatt Macy 		return (SET_ERROR(ENOENT));
1134eda14cbcSMatt Macy 	}
1135eda14cbcSMatt Macy 	rw_exit(&zfs_snapshot_lock);
1136eda14cbcSMatt Macy 
1137ac0bf12eSMatt Macy 	exportfs_flush();
1138ac0bf12eSMatt Macy 
1139eda14cbcSMatt Macy 	if (flags & MNT_FORCE)
1140eda14cbcSMatt Macy 		argv[4] = "-fn";
1141eda14cbcSMatt Macy 	argv[5] = se->se_path;
1142eda14cbcSMatt Macy 	dprintf("unmount; path=%s\n", se->se_path);
1143eda14cbcSMatt Macy 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1144eda14cbcSMatt Macy 	zfsctl_snapshot_rele(se);
1145eda14cbcSMatt Macy 
1146eda14cbcSMatt Macy 
1147eda14cbcSMatt Macy 	/*
1148eda14cbcSMatt Macy 	 * The umount system utility will return 256 on error.  We must
1149eda14cbcSMatt Macy 	 * assume this error is because the file system is busy so it is
1150eda14cbcSMatt Macy 	 * converted to the more sensible EBUSY.
1151eda14cbcSMatt Macy 	 */
1152eda14cbcSMatt Macy 	if (error)
1153eda14cbcSMatt Macy 		error = SET_ERROR(EBUSY);
1154eda14cbcSMatt Macy 
1155eda14cbcSMatt Macy 	return (error);
1156eda14cbcSMatt Macy }
1157eda14cbcSMatt Macy 
1158eda14cbcSMatt Macy int
zfsctl_snapshot_mount(struct path * path,int flags)1159eda14cbcSMatt Macy zfsctl_snapshot_mount(struct path *path, int flags)
1160eda14cbcSMatt Macy {
1161eda14cbcSMatt Macy 	struct dentry *dentry = path->dentry;
1162eda14cbcSMatt Macy 	struct inode *ip = dentry->d_inode;
1163eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs;
1164eda14cbcSMatt Macy 	zfsvfs_t *snap_zfsvfs;
1165eda14cbcSMatt Macy 	zfs_snapentry_t *se;
11667a7741afSMartin Matuska 	char *full_name, *full_path, *options;
1167e2df9bb4SMartin Matuska 	char *argv[] = { "/usr/bin/env", "mount", "-i", "-t", "zfs", "-n",
11687a7741afSMartin Matuska 	    "-o", NULL, NULL, NULL, NULL };
1169eda14cbcSMatt Macy 	char *envp[] = { NULL };
1170eda14cbcSMatt Macy 	int error;
1171eda14cbcSMatt Macy 	struct path spath;
1172eda14cbcSMatt Macy 
1173eda14cbcSMatt Macy 	if (ip == NULL)
1174eda14cbcSMatt Macy 		return (SET_ERROR(EISDIR));
1175eda14cbcSMatt Macy 
1176eda14cbcSMatt Macy 	zfsvfs = ITOZSB(ip);
1177c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
1178c7046f76SMartin Matuska 		return (error);
1179eda14cbcSMatt Macy 
1180eda14cbcSMatt Macy 	full_name = kmem_zalloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
1181eda14cbcSMatt Macy 	full_path = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
11827a7741afSMartin Matuska 	options = kmem_zalloc(7, KM_SLEEP);
1183eda14cbcSMatt Macy 
1184eda14cbcSMatt Macy 	error = zfsctl_snapshot_name(zfsvfs, dname(dentry),
1185eda14cbcSMatt Macy 	    ZFS_MAX_DATASET_NAME_LEN, full_name);
1186eda14cbcSMatt Macy 	if (error)
1187eda14cbcSMatt Macy 		goto error;
1188eda14cbcSMatt Macy 
1189*87bf66d4SMartin Matuska 	if (is_current_chrooted() == 0) {
1190*87bf66d4SMartin Matuska 		/*
1191*87bf66d4SMartin Matuska 		 * Current process is not in chroot context
1192*87bf66d4SMartin Matuska 		 */
1193*87bf66d4SMartin Matuska 
1194*87bf66d4SMartin Matuska 		char *m = kmem_zalloc(MAXPATHLEN, KM_SLEEP);
1195*87bf66d4SMartin Matuska 		struct path mnt_path;
1196*87bf66d4SMartin Matuska 		mnt_path.mnt = path->mnt;
1197*87bf66d4SMartin Matuska 		mnt_path.dentry = path->mnt->mnt_root;
1198*87bf66d4SMartin Matuska 
1199*87bf66d4SMartin Matuska 		/*
1200*87bf66d4SMartin Matuska 		 * Get path to current mountpoint
1201*87bf66d4SMartin Matuska 		 */
1202*87bf66d4SMartin Matuska 		error = get_root_path(&mnt_path, m, MAXPATHLEN);
1203*87bf66d4SMartin Matuska 		if (error != 0) {
1204*87bf66d4SMartin Matuska 			kmem_free(m, MAXPATHLEN);
1205*87bf66d4SMartin Matuska 			goto error;
1206*87bf66d4SMartin Matuska 		}
1207*87bf66d4SMartin Matuska 		mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
1208*87bf66d4SMartin Matuska 		if (zfsvfs->z_vfs->vfs_mntpoint != NULL) {
1209*87bf66d4SMartin Matuska 			/*
1210*87bf66d4SMartin Matuska 			 * If current mnountpoint and vfs_mntpoint are not same,
1211*87bf66d4SMartin Matuska 			 * store current mountpoint in vfs_mntpoint.
1212*87bf66d4SMartin Matuska 			 */
1213*87bf66d4SMartin Matuska 			if (strcmp(zfsvfs->z_vfs->vfs_mntpoint, m) != 0) {
1214*87bf66d4SMartin Matuska 				kmem_strfree(zfsvfs->z_vfs->vfs_mntpoint);
1215*87bf66d4SMartin Matuska 				zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
1216*87bf66d4SMartin Matuska 			}
1217*87bf66d4SMartin Matuska 		} else
1218*87bf66d4SMartin Matuska 			zfsvfs->z_vfs->vfs_mntpoint = kmem_strdup(m);
1219*87bf66d4SMartin Matuska 		mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
1220*87bf66d4SMartin Matuska 		kmem_free(m, MAXPATHLEN);
1221*87bf66d4SMartin Matuska 	}
1222*87bf66d4SMartin Matuska 
1223eda14cbcSMatt Macy 	/*
1224eda14cbcSMatt Macy 	 * Construct a mount point path from sb of the ctldir inode and dirent
1225eda14cbcSMatt Macy 	 * name, instead of from d_path(), so that chroot'd process doesn't fail
1226eda14cbcSMatt Macy 	 * on mount.zfs(8).
1227eda14cbcSMatt Macy 	 */
1228*87bf66d4SMartin Matuska 	mutex_enter(&zfsvfs->z_vfs->vfs_mntpt_lock);
1229eda14cbcSMatt Macy 	snprintf(full_path, MAXPATHLEN, "%s/.zfs/snapshot/%s",
1230eda14cbcSMatt Macy 	    zfsvfs->z_vfs->vfs_mntpoint ? zfsvfs->z_vfs->vfs_mntpoint : "",
1231eda14cbcSMatt Macy 	    dname(dentry));
1232*87bf66d4SMartin Matuska 	mutex_exit(&zfsvfs->z_vfs->vfs_mntpt_lock);
1233eda14cbcSMatt Macy 
12347a7741afSMartin Matuska 	snprintf(options, 7, "%s",
12357a7741afSMartin Matuska 	    zfs_snapshot_no_setuid ? "nosuid" : "suid");
12367a7741afSMartin Matuska 
1237eda14cbcSMatt Macy 	/*
1238eda14cbcSMatt Macy 	 * Multiple concurrent automounts of a snapshot are never allowed.
1239eda14cbcSMatt Macy 	 * The snapshot may be manually mounted as many times as desired.
1240eda14cbcSMatt Macy 	 */
1241eda14cbcSMatt Macy 	if (zfsctl_snapshot_ismounted(full_name)) {
1242eda14cbcSMatt Macy 		error = 0;
1243eda14cbcSMatt Macy 		goto error;
1244eda14cbcSMatt Macy 	}
1245eda14cbcSMatt Macy 
1246eda14cbcSMatt Macy 	/*
1247eda14cbcSMatt Macy 	 * Attempt to mount the snapshot from user space.  Normally this
1248eda14cbcSMatt Macy 	 * would be done using the vfs_kern_mount() function, however that
1249eda14cbcSMatt Macy 	 * function is marked GPL-only and cannot be used.  On error we
1250eda14cbcSMatt Macy 	 * careful to log the real error to the console and return EISDIR
1251eda14cbcSMatt Macy 	 * to safely abort the automount.  This should be very rare.
1252eda14cbcSMatt Macy 	 *
1253eda14cbcSMatt Macy 	 * If the user mode helper happens to return EBUSY, a concurrent
1254eda14cbcSMatt Macy 	 * mount is already in progress in which case the error is ignored.
1255eda14cbcSMatt Macy 	 * Take note that if the program was executed successfully the return
1256eda14cbcSMatt Macy 	 * value from call_usermodehelper() will be (exitcode << 8 + signal).
1257eda14cbcSMatt Macy 	 */
1258eda14cbcSMatt Macy 	dprintf("mount; name=%s path=%s\n", full_name, full_path);
12597a7741afSMartin Matuska 	argv[7] = options;
12607a7741afSMartin Matuska 	argv[8] = full_name;
12617a7741afSMartin Matuska 	argv[9] = full_path;
1262eda14cbcSMatt Macy 	error = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
1263eda14cbcSMatt Macy 	if (error) {
1264eda14cbcSMatt Macy 		if (!(error & MOUNT_BUSY << 8)) {
1265eda14cbcSMatt Macy 			zfs_dbgmsg("Unable to automount %s error=%d",
1266eda14cbcSMatt Macy 			    full_path, error);
1267eda14cbcSMatt Macy 			error = SET_ERROR(EISDIR);
1268eda14cbcSMatt Macy 		} else {
1269eda14cbcSMatt Macy 			/*
1270eda14cbcSMatt Macy 			 * EBUSY, this could mean a concurrent mount, or the
1271eda14cbcSMatt Macy 			 * snapshot has already been mounted at completely
1272eda14cbcSMatt Macy 			 * different place. We return 0 so VFS will retry. For
1273eda14cbcSMatt Macy 			 * the latter case the VFS will retry several times
1274eda14cbcSMatt Macy 			 * and return ELOOP, which is probably not a very good
1275eda14cbcSMatt Macy 			 * behavior.
1276eda14cbcSMatt Macy 			 */
1277eda14cbcSMatt Macy 			error = 0;
1278eda14cbcSMatt Macy 		}
1279eda14cbcSMatt Macy 		goto error;
1280eda14cbcSMatt Macy 	}
1281eda14cbcSMatt Macy 
1282eda14cbcSMatt Macy 	/*
1283eda14cbcSMatt Macy 	 * Follow down in to the mounted snapshot and set MNT_SHRINKABLE
1284eda14cbcSMatt Macy 	 * to identify this as an automounted filesystem.
1285eda14cbcSMatt Macy 	 */
1286eda14cbcSMatt Macy 	spath = *path;
1287eda14cbcSMatt Macy 	path_get(&spath);
1288eda14cbcSMatt Macy 	if (follow_down_one(&spath)) {
1289eda14cbcSMatt Macy 		snap_zfsvfs = ITOZSB(spath.dentry->d_inode);
1290eda14cbcSMatt Macy 		snap_zfsvfs->z_parent = zfsvfs;
1291eda14cbcSMatt Macy 		dentry = spath.dentry;
1292eda14cbcSMatt Macy 		spath.mnt->mnt_flags |= MNT_SHRINKABLE;
1293eda14cbcSMatt Macy 
1294eda14cbcSMatt Macy 		rw_enter(&zfs_snapshot_lock, RW_WRITER);
1295eda14cbcSMatt Macy 		se = zfsctl_snapshot_alloc(full_name, full_path,
1296eda14cbcSMatt Macy 		    snap_zfsvfs->z_os->os_spa, dmu_objset_id(snap_zfsvfs->z_os),
1297eda14cbcSMatt Macy 		    dentry);
1298eda14cbcSMatt Macy 		zfsctl_snapshot_add(se);
1299eda14cbcSMatt Macy 		zfsctl_snapshot_unmount_delay_impl(se, zfs_expire_snapshot);
1300eda14cbcSMatt Macy 		rw_exit(&zfs_snapshot_lock);
1301eda14cbcSMatt Macy 	}
1302eda14cbcSMatt Macy 	path_put(&spath);
1303eda14cbcSMatt Macy error:
1304eda14cbcSMatt Macy 	kmem_free(full_name, ZFS_MAX_DATASET_NAME_LEN);
1305eda14cbcSMatt Macy 	kmem_free(full_path, MAXPATHLEN);
1306eda14cbcSMatt Macy 
1307c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
1308eda14cbcSMatt Macy 
1309eda14cbcSMatt Macy 	return (error);
1310eda14cbcSMatt Macy }
1311eda14cbcSMatt Macy 
1312eda14cbcSMatt Macy /*
1313eda14cbcSMatt Macy  * Get the snapdir inode from fid
1314eda14cbcSMatt Macy  */
1315eda14cbcSMatt Macy int
zfsctl_snapdir_vget(struct super_block * sb,uint64_t objsetid,int gen,struct inode ** ipp)1316eda14cbcSMatt Macy zfsctl_snapdir_vget(struct super_block *sb, uint64_t objsetid, int gen,
1317eda14cbcSMatt Macy     struct inode **ipp)
1318eda14cbcSMatt Macy {
1319eda14cbcSMatt Macy 	int error;
1320eda14cbcSMatt Macy 	struct path path;
1321eda14cbcSMatt Macy 	char *mnt;
1322eda14cbcSMatt Macy 	struct dentry *dentry;
1323eda14cbcSMatt Macy 
1324eda14cbcSMatt Macy 	mnt = kmem_alloc(MAXPATHLEN, KM_SLEEP);
1325eda14cbcSMatt Macy 
1326eda14cbcSMatt Macy 	error = zfsctl_snapshot_path_objset(sb->s_fs_info, objsetid,
1327eda14cbcSMatt Macy 	    MAXPATHLEN, mnt);
1328eda14cbcSMatt Macy 	if (error)
1329eda14cbcSMatt Macy 		goto out;
1330eda14cbcSMatt Macy 
1331eda14cbcSMatt Macy 	/* Trigger automount */
1332eda14cbcSMatt Macy 	error = -kern_path(mnt, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &path);
1333eda14cbcSMatt Macy 	if (error)
1334eda14cbcSMatt Macy 		goto out;
1335eda14cbcSMatt Macy 
1336eda14cbcSMatt Macy 	path_put(&path);
1337eda14cbcSMatt Macy 	/*
1338eda14cbcSMatt Macy 	 * Get the snapdir inode. Note, we don't want to use the above
1339eda14cbcSMatt Macy 	 * path because it contains the root of the snapshot rather
1340eda14cbcSMatt Macy 	 * than the snapdir.
1341eda14cbcSMatt Macy 	 */
1342eda14cbcSMatt Macy 	*ipp = ilookup(sb, ZFSCTL_INO_SNAPDIRS - objsetid);
1343eda14cbcSMatt Macy 	if (*ipp == NULL) {
1344eda14cbcSMatt Macy 		error = SET_ERROR(ENOENT);
1345eda14cbcSMatt Macy 		goto out;
1346eda14cbcSMatt Macy 	}
1347eda14cbcSMatt Macy 
1348eda14cbcSMatt Macy 	/* check gen, see zfsctl_snapdir_fid */
1349eda14cbcSMatt Macy 	dentry = d_obtain_alias(igrab(*ipp));
1350eda14cbcSMatt Macy 	if (gen != (!IS_ERR(dentry) && d_mountpoint(dentry))) {
1351eda14cbcSMatt Macy 		iput(*ipp);
1352eda14cbcSMatt Macy 		*ipp = NULL;
1353eda14cbcSMatt Macy 		error = SET_ERROR(ENOENT);
1354eda14cbcSMatt Macy 	}
1355eda14cbcSMatt Macy 	if (!IS_ERR(dentry))
1356eda14cbcSMatt Macy 		dput(dentry);
1357eda14cbcSMatt Macy out:
1358eda14cbcSMatt Macy 	kmem_free(mnt, MAXPATHLEN);
1359eda14cbcSMatt Macy 	return (error);
1360eda14cbcSMatt Macy }
1361eda14cbcSMatt Macy 
1362eda14cbcSMatt Macy int
zfsctl_shares_lookup(struct inode * dip,char * name,struct inode ** ipp,int flags,cred_t * cr,int * direntflags,pathname_t * realpnp)1363eda14cbcSMatt Macy zfsctl_shares_lookup(struct inode *dip, char *name, struct inode **ipp,
1364eda14cbcSMatt Macy     int flags, cred_t *cr, int *direntflags, pathname_t *realpnp)
1365eda14cbcSMatt Macy {
1366eda14cbcSMatt Macy 	zfsvfs_t *zfsvfs = ITOZSB(dip);
1367eda14cbcSMatt Macy 	znode_t *zp;
1368eda14cbcSMatt Macy 	znode_t *dzp;
1369eda14cbcSMatt Macy 	int error;
1370eda14cbcSMatt Macy 
1371c7046f76SMartin Matuska 	if ((error = zfs_enter(zfsvfs, FTAG)) != 0)
1372c7046f76SMartin Matuska 		return (error);
1373eda14cbcSMatt Macy 
1374eda14cbcSMatt Macy 	if (zfsvfs->z_shares_dir == 0) {
1375c7046f76SMartin Matuska 		zfs_exit(zfsvfs, FTAG);
1376eda14cbcSMatt Macy 		return (SET_ERROR(ENOTSUP));
1377eda14cbcSMatt Macy 	}
1378eda14cbcSMatt Macy 
1379eda14cbcSMatt Macy 	if ((error = zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp)) == 0) {
1380eda14cbcSMatt Macy 		error = zfs_lookup(dzp, name, &zp, 0, cr, NULL, NULL);
1381eda14cbcSMatt Macy 		zrele(dzp);
1382eda14cbcSMatt Macy 	}
1383eda14cbcSMatt Macy 
1384c7046f76SMartin Matuska 	zfs_exit(zfsvfs, FTAG);
1385eda14cbcSMatt Macy 
1386eda14cbcSMatt Macy 	return (error);
1387eda14cbcSMatt Macy }
1388eda14cbcSMatt Macy 
1389eda14cbcSMatt Macy /*
1390eda14cbcSMatt Macy  * Initialize the various pieces we'll need to create and manipulate .zfs
1391eda14cbcSMatt Macy  * directories.  Currently this is unused but available.
1392eda14cbcSMatt Macy  */
1393eda14cbcSMatt Macy void
zfsctl_init(void)1394eda14cbcSMatt Macy zfsctl_init(void)
1395eda14cbcSMatt Macy {
1396eda14cbcSMatt Macy 	avl_create(&zfs_snapshots_by_name, snapentry_compare_by_name,
1397eda14cbcSMatt Macy 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
1398eda14cbcSMatt Macy 	    se_node_name));
1399eda14cbcSMatt Macy 	avl_create(&zfs_snapshots_by_objsetid, snapentry_compare_by_objsetid,
1400eda14cbcSMatt Macy 	    sizeof (zfs_snapentry_t), offsetof(zfs_snapentry_t,
1401eda14cbcSMatt Macy 	    se_node_objsetid));
1402eda14cbcSMatt Macy 	rw_init(&zfs_snapshot_lock, NULL, RW_DEFAULT, NULL);
1403eda14cbcSMatt Macy }
1404eda14cbcSMatt Macy 
1405eda14cbcSMatt Macy /*
1406eda14cbcSMatt Macy  * Cleanup the various pieces we needed for .zfs directories.  In particular
1407eda14cbcSMatt Macy  * ensure the expiry timer is canceled safely.
1408eda14cbcSMatt Macy  */
1409eda14cbcSMatt Macy void
zfsctl_fini(void)1410eda14cbcSMatt Macy zfsctl_fini(void)
1411eda14cbcSMatt Macy {
1412eda14cbcSMatt Macy 	avl_destroy(&zfs_snapshots_by_name);
1413eda14cbcSMatt Macy 	avl_destroy(&zfs_snapshots_by_objsetid);
1414eda14cbcSMatt Macy 	rw_destroy(&zfs_snapshot_lock);
1415eda14cbcSMatt Macy }
1416eda14cbcSMatt Macy 
1417eda14cbcSMatt Macy module_param(zfs_admin_snapshot, int, 0644);
1418eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_admin_snapshot, "Enable mkdir/rmdir/mv in .zfs/snapshot");
1419eda14cbcSMatt Macy 
1420eda14cbcSMatt Macy module_param(zfs_expire_snapshot, int, 0644);
1421eda14cbcSMatt Macy MODULE_PARM_DESC(zfs_expire_snapshot, "Seconds to expire .zfs/snapshot");
14227a7741afSMartin Matuska 
14237a7741afSMartin Matuska module_param(zfs_snapshot_no_setuid, int, 0644);
14247a7741afSMartin Matuska MODULE_PARM_DESC(zfs_snapshot_no_setuid,
14257a7741afSMartin Matuska 	"Disable setuid/setgid for automounts in .zfs/snapshot");
1426