xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c (revision fa38579f317d5c2ff2926fab9b12ee6d429bd155)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2011, Lawrence Livermore National Security, LLC.
24  * Copyright (c) 2023, Datto Inc. All rights reserved.
25  */
26 
27 
28 #include <sys/zfs_znode.h>
29 #include <sys/zfs_vfsops.h>
30 #include <sys/zfs_vnops.h>
31 #include <sys/zfs_ctldir.h>
32 #include <sys/zpl.h>
33 #include <linux/iversion.h>
34 
35 
36 static struct inode *
37 zpl_inode_alloc(struct super_block *sb)
38 {
39 	struct inode *ip;
40 
41 	VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0);
42 	inode_set_iversion(ip, 1);
43 
44 	return (ip);
45 }
46 
47 static void
48 zpl_inode_destroy(struct inode *ip)
49 {
50 	ASSERT(atomic_read(&ip->i_count) == 0);
51 	zfs_inode_destroy(ip);
52 }
53 
54 /*
55  * Called from __mark_inode_dirty() to reflect that something in the
56  * inode has changed.  We use it to ensure the znode system attributes
57  * are always strictly update to date with respect to the inode.
58  */
59 static void
60 zpl_dirty_inode(struct inode *ip, int flags)
61 {
62 	fstrans_cookie_t cookie;
63 
64 	cookie = spl_fstrans_mark();
65 	zfs_dirty_inode(ip, flags);
66 	spl_fstrans_unmark(cookie);
67 }
68 
69 /*
70  * When ->drop_inode() is called its return value indicates if the
71  * inode should be evicted from the inode cache.  If the inode is
72  * unhashed and has no links the default policy is to evict it
73  * immediately.
74  *
75  * The ->evict_inode() callback must minimally truncate the inode pages,
76  * and call clear_inode().  For 2.6.35 and later kernels this will
77  * simply update the inode state, with the sync occurring before the
78  * truncate in evict().  For earlier kernels clear_inode() maps to
79  * end_writeback() which is responsible for completing all outstanding
80  * write back.  In either case, once this is done it is safe to cleanup
81  * any remaining inode specific data via zfs_inactive().
82  * remaining filesystem specific data.
83  */
84 static void
85 zpl_evict_inode(struct inode *ip)
86 {
87 	fstrans_cookie_t cookie;
88 
89 	cookie = spl_fstrans_mark();
90 	truncate_setsize(ip, 0);
91 	clear_inode(ip);
92 	zfs_inactive(ip);
93 	spl_fstrans_unmark(cookie);
94 }
95 
96 static void
97 zpl_put_super(struct super_block *sb)
98 {
99 	fstrans_cookie_t cookie;
100 	int error;
101 
102 	cookie = spl_fstrans_mark();
103 	error = -zfs_umount(sb);
104 	spl_fstrans_unmark(cookie);
105 	ASSERT3S(error, <=, 0);
106 }
107 
108 static int
109 zpl_sync_fs(struct super_block *sb, int wait)
110 {
111 	fstrans_cookie_t cookie;
112 	cred_t *cr = CRED();
113 	int error;
114 
115 	crhold(cr);
116 	cookie = spl_fstrans_mark();
117 	error = -zfs_sync(sb, wait, cr);
118 	spl_fstrans_unmark(cookie);
119 	crfree(cr);
120 	ASSERT3S(error, <=, 0);
121 
122 	return (error);
123 }
124 
125 static int
126 zpl_statfs(struct dentry *dentry, struct kstatfs *statp)
127 {
128 	fstrans_cookie_t cookie;
129 	int error;
130 
131 	cookie = spl_fstrans_mark();
132 	error = -zfs_statvfs(dentry->d_inode, statp);
133 	spl_fstrans_unmark(cookie);
134 	ASSERT3S(error, <=, 0);
135 
136 	/*
137 	 * If required by a 32-bit system call, dynamically scale the
138 	 * block size up to 16MiB and decrease the block counts.  This
139 	 * allows for a maximum size of 64EiB to be reported.  The file
140 	 * counts must be artificially capped at 2^32-1.
141 	 */
142 	if (unlikely(zpl_is_32bit_api())) {
143 		while (statp->f_blocks > UINT32_MAX &&
144 		    statp->f_bsize < SPA_MAXBLOCKSIZE) {
145 			statp->f_frsize <<= 1;
146 			statp->f_bsize <<= 1;
147 
148 			statp->f_blocks >>= 1;
149 			statp->f_bfree >>= 1;
150 			statp->f_bavail >>= 1;
151 		}
152 
153 		uint64_t usedobjs = statp->f_files - statp->f_ffree;
154 		statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs);
155 		statp->f_files = statp->f_ffree + usedobjs;
156 	}
157 
158 	return (error);
159 }
160 
161 static int
162 zpl_remount_fs(struct super_block *sb, int *flags, char *data)
163 {
164 	zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data };
165 	fstrans_cookie_t cookie;
166 	int error;
167 
168 	cookie = spl_fstrans_mark();
169 	error = -zfs_remount(sb, flags, &zm);
170 	spl_fstrans_unmark(cookie);
171 	ASSERT3S(error, <=, 0);
172 
173 	return (error);
174 }
175 
176 static int
177 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs)
178 {
179 	int error;
180 	if ((error = zpl_enter(zfsvfs, FTAG)) != 0)
181 		return (error);
182 
183 	char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
184 	dmu_objset_name(zfsvfs->z_os, fsname);
185 
186 	for (int i = 0; fsname[i] != 0; i++) {
187 		/*
188 		 * Spaces in the dataset name must be converted to their
189 		 * octal escape sequence for getmntent(3) to correctly
190 		 * parse then fsname portion of /proc/self/mounts.
191 		 */
192 		if (fsname[i] == ' ') {
193 			seq_puts(seq, "\\040");
194 		} else {
195 			seq_putc(seq, fsname[i]);
196 		}
197 	}
198 
199 	kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN);
200 
201 	zpl_exit(zfsvfs, FTAG);
202 
203 	return (0);
204 }
205 
206 static int
207 zpl_show_devname(struct seq_file *seq, struct dentry *root)
208 {
209 	return (__zpl_show_devname(seq, root->d_sb->s_fs_info));
210 }
211 
212 static int
213 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs)
214 {
215 	seq_printf(seq, ",%s",
216 	    zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr");
217 
218 #ifdef CONFIG_FS_POSIX_ACL
219 	switch (zfsvfs->z_acl_type) {
220 	case ZFS_ACLTYPE_POSIX:
221 		seq_puts(seq, ",posixacl");
222 		break;
223 	default:
224 		seq_puts(seq, ",noacl");
225 		break;
226 	}
227 #endif /* CONFIG_FS_POSIX_ACL */
228 
229 	switch (zfsvfs->z_case) {
230 	case ZFS_CASE_SENSITIVE:
231 		seq_puts(seq, ",casesensitive");
232 		break;
233 	case ZFS_CASE_INSENSITIVE:
234 		seq_puts(seq, ",caseinsensitive");
235 		break;
236 	default:
237 		seq_puts(seq, ",casemixed");
238 		break;
239 	}
240 
241 	return (0);
242 }
243 
244 static int
245 zpl_show_options(struct seq_file *seq, struct dentry *root)
246 {
247 	return (__zpl_show_options(seq, root->d_sb->s_fs_info));
248 }
249 
250 static int
251 zpl_fill_super(struct super_block *sb, void *data, int silent)
252 {
253 	zfs_mnt_t *zm = (zfs_mnt_t *)data;
254 	fstrans_cookie_t cookie;
255 	int error;
256 
257 	cookie = spl_fstrans_mark();
258 	error = -zfs_domount(sb, zm, silent);
259 	spl_fstrans_unmark(cookie);
260 	ASSERT3S(error, <=, 0);
261 
262 	return (error);
263 }
264 
265 static int
266 zpl_test_super(struct super_block *s, void *data)
267 {
268 	zfsvfs_t *zfsvfs = s->s_fs_info;
269 	objset_t *os = data;
270 	/*
271 	 * If the os doesn't match the z_os in the super_block, assume it is
272 	 * not a match. Matching would imply a multimount of a dataset. It is
273 	 * possible that during a multimount, there is a simultaneous operation
274 	 * that changes the z_os, e.g., rollback, where the match will be
275 	 * missed, but in that case the user will get an EBUSY.
276 	 */
277 	return (zfsvfs != NULL && os == zfsvfs->z_os);
278 }
279 
280 static struct super_block *
281 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm)
282 {
283 	struct super_block *s;
284 	objset_t *os;
285 	boolean_t issnap = B_FALSE;
286 	int err;
287 
288 	err = dmu_objset_hold(zm->mnt_osname, FTAG, &os);
289 	if (err)
290 		return (ERR_PTR(-err));
291 
292 	/*
293 	 * The dsl pool lock must be released prior to calling sget().
294 	 * It is possible sget() may block on the lock in grab_super()
295 	 * while deactivate_super() holds that same lock and waits for
296 	 * a txg sync.  If the dsl_pool lock is held over sget()
297 	 * this can prevent the pool sync and cause a deadlock.
298 	 */
299 	dsl_dataset_long_hold(dmu_objset_ds(os), FTAG);
300 	dsl_pool_rele(dmu_objset_pool(os), FTAG);
301 
302 	s = sget(fs_type, zpl_test_super, set_anon_super, flags, os);
303 
304 	/*
305 	 * Recheck with the lock held to prevent mounting the wrong dataset
306 	 * since z_os can be stale when the teardown lock is held.
307 	 *
308 	 * We can't do this in zpl_test_super in since it's under spinlock and
309 	 * also s_umount lock is not held there so it would race with
310 	 * zfs_umount and zfsvfs can be freed.
311 	 */
312 	if (!IS_ERR(s) && s->s_fs_info != NULL) {
313 		zfsvfs_t *zfsvfs = s->s_fs_info;
314 		if (zpl_enter(zfsvfs, FTAG) == 0) {
315 			if (os != zfsvfs->z_os)
316 				err = -SET_ERROR(EBUSY);
317 			issnap = zfsvfs->z_issnap;
318 			zpl_exit(zfsvfs, FTAG);
319 		} else {
320 			err = -SET_ERROR(EBUSY);
321 		}
322 	}
323 	dsl_dataset_long_rele(dmu_objset_ds(os), FTAG);
324 	dsl_dataset_rele(dmu_objset_ds(os), FTAG);
325 
326 	if (IS_ERR(s))
327 		return (ERR_CAST(s));
328 
329 	if (err) {
330 		deactivate_locked_super(s);
331 		return (ERR_PTR(err));
332 	}
333 
334 	if (s->s_root == NULL) {
335 		err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0);
336 		if (err) {
337 			deactivate_locked_super(s);
338 			return (ERR_PTR(err));
339 		}
340 		s->s_flags |= SB_ACTIVE;
341 	} else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) {
342 		/*
343 		 * Skip ro check for snap since snap is always ro regardless
344 		 * ro flag is passed by mount or not.
345 		 */
346 		deactivate_locked_super(s);
347 		return (ERR_PTR(-EBUSY));
348 	}
349 
350 	return (s);
351 }
352 
353 static struct dentry *
354 zpl_mount(struct file_system_type *fs_type, int flags,
355     const char *osname, void *data)
356 {
357 	zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data };
358 
359 	struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm);
360 	if (IS_ERR(sb))
361 		return (ERR_CAST(sb));
362 
363 	return (dget(sb->s_root));
364 }
365 
366 static void
367 zpl_kill_sb(struct super_block *sb)
368 {
369 	zfs_preumount(sb);
370 	kill_anon_super(sb);
371 }
372 
373 void
374 zpl_prune_sb(uint64_t nr_to_scan, void *arg)
375 {
376 	struct super_block *sb = (struct super_block *)arg;
377 	int objects = 0;
378 
379 	/*
380 	 * Ensure the superblock is not in the process of being torn down.
381 	 */
382 #ifdef HAVE_SB_DYING
383 	if (down_read_trylock(&sb->s_umount)) {
384 		if (!(sb->s_flags & SB_DYING) && sb->s_root &&
385 		    (sb->s_flags & SB_BORN)) {
386 			(void) zfs_prune(sb, nr_to_scan, &objects);
387 		}
388 		up_read(&sb->s_umount);
389 	}
390 #else
391 	if (down_read_trylock(&sb->s_umount)) {
392 		if (!hlist_unhashed(&sb->s_instances) &&
393 		    sb->s_root && (sb->s_flags & SB_BORN)) {
394 			(void) zfs_prune(sb, nr_to_scan, &objects);
395 		}
396 		up_read(&sb->s_umount);
397 	}
398 #endif
399 }
400 
401 const struct super_operations zpl_super_operations = {
402 	.alloc_inode		= zpl_inode_alloc,
403 	.destroy_inode		= zpl_inode_destroy,
404 	.dirty_inode		= zpl_dirty_inode,
405 	.write_inode		= NULL,
406 	.evict_inode		= zpl_evict_inode,
407 	.put_super		= zpl_put_super,
408 	.sync_fs		= zpl_sync_fs,
409 	.statfs			= zpl_statfs,
410 	.remount_fs		= zpl_remount_fs,
411 	.show_devname		= zpl_show_devname,
412 	.show_options		= zpl_show_options,
413 	.show_stats		= NULL,
414 };
415 
416 struct file_system_type zpl_fs_type = {
417 	.owner			= THIS_MODULE,
418 	.name			= ZFS_DRIVER,
419 #if defined(HAVE_IDMAP_MNT_API)
420 	.fs_flags		= FS_USERNS_MOUNT | FS_ALLOW_IDMAP,
421 #else
422 	.fs_flags		= FS_USERNS_MOUNT,
423 #endif
424 	.mount			= zpl_mount,
425 	.kill_sb		= zpl_kill_sb,
426 };
427