1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (c) 2011, Lawrence Livermore National Security, LLC. 24 * Copyright (c) 2023, Datto Inc. All rights reserved. 25 */ 26 27 28 #include <sys/zfs_znode.h> 29 #include <sys/zfs_vfsops.h> 30 #include <sys/zfs_vnops.h> 31 #include <sys/zfs_ctldir.h> 32 #include <sys/zpl.h> 33 #include <linux/iversion.h> 34 35 36 static struct inode * 37 zpl_inode_alloc(struct super_block *sb) 38 { 39 struct inode *ip; 40 41 VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0); 42 inode_set_iversion(ip, 1); 43 44 return (ip); 45 } 46 47 static void 48 zpl_inode_destroy(struct inode *ip) 49 { 50 ASSERT(atomic_read(&ip->i_count) == 0); 51 zfs_inode_destroy(ip); 52 } 53 54 /* 55 * Called from __mark_inode_dirty() to reflect that something in the 56 * inode has changed. We use it to ensure the znode system attributes 57 * are always strictly update to date with respect to the inode. 58 */ 59 static void 60 zpl_dirty_inode(struct inode *ip, int flags) 61 { 62 fstrans_cookie_t cookie; 63 64 cookie = spl_fstrans_mark(); 65 zfs_dirty_inode(ip, flags); 66 spl_fstrans_unmark(cookie); 67 } 68 69 /* 70 * When ->drop_inode() is called its return value indicates if the 71 * inode should be evicted from the inode cache. If the inode is 72 * unhashed and has no links the default policy is to evict it 73 * immediately. 74 * 75 * The ->evict_inode() callback must minimally truncate the inode pages, 76 * and call clear_inode(). For 2.6.35 and later kernels this will 77 * simply update the inode state, with the sync occurring before the 78 * truncate in evict(). For earlier kernels clear_inode() maps to 79 * end_writeback() which is responsible for completing all outstanding 80 * write back. In either case, once this is done it is safe to cleanup 81 * any remaining inode specific data via zfs_inactive(). 82 * remaining filesystem specific data. 83 */ 84 static void 85 zpl_evict_inode(struct inode *ip) 86 { 87 fstrans_cookie_t cookie; 88 89 cookie = spl_fstrans_mark(); 90 truncate_setsize(ip, 0); 91 clear_inode(ip); 92 zfs_inactive(ip); 93 spl_fstrans_unmark(cookie); 94 } 95 96 static void 97 zpl_put_super(struct super_block *sb) 98 { 99 fstrans_cookie_t cookie; 100 int error; 101 102 cookie = spl_fstrans_mark(); 103 error = -zfs_umount(sb); 104 spl_fstrans_unmark(cookie); 105 ASSERT3S(error, <=, 0); 106 } 107 108 static int 109 zpl_sync_fs(struct super_block *sb, int wait) 110 { 111 fstrans_cookie_t cookie; 112 cred_t *cr = CRED(); 113 int error; 114 115 crhold(cr); 116 cookie = spl_fstrans_mark(); 117 error = -zfs_sync(sb, wait, cr); 118 spl_fstrans_unmark(cookie); 119 crfree(cr); 120 ASSERT3S(error, <=, 0); 121 122 return (error); 123 } 124 125 static int 126 zpl_statfs(struct dentry *dentry, struct kstatfs *statp) 127 { 128 fstrans_cookie_t cookie; 129 int error; 130 131 cookie = spl_fstrans_mark(); 132 error = -zfs_statvfs(dentry->d_inode, statp); 133 spl_fstrans_unmark(cookie); 134 ASSERT3S(error, <=, 0); 135 136 /* 137 * If required by a 32-bit system call, dynamically scale the 138 * block size up to 16MiB and decrease the block counts. This 139 * allows for a maximum size of 64EiB to be reported. The file 140 * counts must be artificially capped at 2^32-1. 141 */ 142 if (unlikely(zpl_is_32bit_api())) { 143 while (statp->f_blocks > UINT32_MAX && 144 statp->f_bsize < SPA_MAXBLOCKSIZE) { 145 statp->f_frsize <<= 1; 146 statp->f_bsize <<= 1; 147 148 statp->f_blocks >>= 1; 149 statp->f_bfree >>= 1; 150 statp->f_bavail >>= 1; 151 } 152 153 uint64_t usedobjs = statp->f_files - statp->f_ffree; 154 statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs); 155 statp->f_files = statp->f_ffree + usedobjs; 156 } 157 158 return (error); 159 } 160 161 static int 162 zpl_remount_fs(struct super_block *sb, int *flags, char *data) 163 { 164 zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data }; 165 fstrans_cookie_t cookie; 166 int error; 167 168 cookie = spl_fstrans_mark(); 169 error = -zfs_remount(sb, flags, &zm); 170 spl_fstrans_unmark(cookie); 171 ASSERT3S(error, <=, 0); 172 173 return (error); 174 } 175 176 static int 177 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs) 178 { 179 int error; 180 if ((error = zpl_enter(zfsvfs, FTAG)) != 0) 181 return (error); 182 183 char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 184 dmu_objset_name(zfsvfs->z_os, fsname); 185 186 for (int i = 0; fsname[i] != 0; i++) { 187 /* 188 * Spaces in the dataset name must be converted to their 189 * octal escape sequence for getmntent(3) to correctly 190 * parse then fsname portion of /proc/self/mounts. 191 */ 192 if (fsname[i] == ' ') { 193 seq_puts(seq, "\\040"); 194 } else { 195 seq_putc(seq, fsname[i]); 196 } 197 } 198 199 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); 200 201 zpl_exit(zfsvfs, FTAG); 202 203 return (0); 204 } 205 206 static int 207 zpl_show_devname(struct seq_file *seq, struct dentry *root) 208 { 209 return (__zpl_show_devname(seq, root->d_sb->s_fs_info)); 210 } 211 212 static int 213 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs) 214 { 215 seq_printf(seq, ",%s", 216 zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr"); 217 218 #ifdef CONFIG_FS_POSIX_ACL 219 switch (zfsvfs->z_acl_type) { 220 case ZFS_ACLTYPE_POSIX: 221 seq_puts(seq, ",posixacl"); 222 break; 223 default: 224 seq_puts(seq, ",noacl"); 225 break; 226 } 227 #endif /* CONFIG_FS_POSIX_ACL */ 228 229 switch (zfsvfs->z_case) { 230 case ZFS_CASE_SENSITIVE: 231 seq_puts(seq, ",casesensitive"); 232 break; 233 case ZFS_CASE_INSENSITIVE: 234 seq_puts(seq, ",caseinsensitive"); 235 break; 236 default: 237 seq_puts(seq, ",casemixed"); 238 break; 239 } 240 241 return (0); 242 } 243 244 static int 245 zpl_show_options(struct seq_file *seq, struct dentry *root) 246 { 247 return (__zpl_show_options(seq, root->d_sb->s_fs_info)); 248 } 249 250 static int 251 zpl_fill_super(struct super_block *sb, void *data, int silent) 252 { 253 zfs_mnt_t *zm = (zfs_mnt_t *)data; 254 fstrans_cookie_t cookie; 255 int error; 256 257 cookie = spl_fstrans_mark(); 258 error = -zfs_domount(sb, zm, silent); 259 spl_fstrans_unmark(cookie); 260 ASSERT3S(error, <=, 0); 261 262 return (error); 263 } 264 265 static int 266 zpl_test_super(struct super_block *s, void *data) 267 { 268 zfsvfs_t *zfsvfs = s->s_fs_info; 269 objset_t *os = data; 270 /* 271 * If the os doesn't match the z_os in the super_block, assume it is 272 * not a match. Matching would imply a multimount of a dataset. It is 273 * possible that during a multimount, there is a simultaneous operation 274 * that changes the z_os, e.g., rollback, where the match will be 275 * missed, but in that case the user will get an EBUSY. 276 */ 277 return (zfsvfs != NULL && os == zfsvfs->z_os); 278 } 279 280 static struct super_block * 281 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) 282 { 283 struct super_block *s; 284 objset_t *os; 285 boolean_t issnap = B_FALSE; 286 int err; 287 288 err = dmu_objset_hold(zm->mnt_osname, FTAG, &os); 289 if (err) 290 return (ERR_PTR(-err)); 291 292 /* 293 * The dsl pool lock must be released prior to calling sget(). 294 * It is possible sget() may block on the lock in grab_super() 295 * while deactivate_super() holds that same lock and waits for 296 * a txg sync. If the dsl_pool lock is held over sget() 297 * this can prevent the pool sync and cause a deadlock. 298 */ 299 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG); 300 dsl_pool_rele(dmu_objset_pool(os), FTAG); 301 302 s = sget(fs_type, zpl_test_super, set_anon_super, flags, os); 303 304 /* 305 * Recheck with the lock held to prevent mounting the wrong dataset 306 * since z_os can be stale when the teardown lock is held. 307 * 308 * We can't do this in zpl_test_super in since it's under spinlock and 309 * also s_umount lock is not held there so it would race with 310 * zfs_umount and zfsvfs can be freed. 311 */ 312 if (!IS_ERR(s) && s->s_fs_info != NULL) { 313 zfsvfs_t *zfsvfs = s->s_fs_info; 314 if (zpl_enter(zfsvfs, FTAG) == 0) { 315 if (os != zfsvfs->z_os) 316 err = -SET_ERROR(EBUSY); 317 issnap = zfsvfs->z_issnap; 318 zpl_exit(zfsvfs, FTAG); 319 } else { 320 err = -SET_ERROR(EBUSY); 321 } 322 } 323 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); 324 dsl_dataset_rele(dmu_objset_ds(os), FTAG); 325 326 if (IS_ERR(s)) 327 return (ERR_CAST(s)); 328 329 if (err) { 330 deactivate_locked_super(s); 331 return (ERR_PTR(err)); 332 } 333 334 if (s->s_root == NULL) { 335 err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0); 336 if (err) { 337 deactivate_locked_super(s); 338 return (ERR_PTR(err)); 339 } 340 s->s_flags |= SB_ACTIVE; 341 } else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) { 342 /* 343 * Skip ro check for snap since snap is always ro regardless 344 * ro flag is passed by mount or not. 345 */ 346 deactivate_locked_super(s); 347 return (ERR_PTR(-EBUSY)); 348 } 349 350 return (s); 351 } 352 353 static struct dentry * 354 zpl_mount(struct file_system_type *fs_type, int flags, 355 const char *osname, void *data) 356 { 357 zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data }; 358 359 struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm); 360 if (IS_ERR(sb)) 361 return (ERR_CAST(sb)); 362 363 return (dget(sb->s_root)); 364 } 365 366 static void 367 zpl_kill_sb(struct super_block *sb) 368 { 369 zfs_preumount(sb); 370 kill_anon_super(sb); 371 } 372 373 void 374 zpl_prune_sb(uint64_t nr_to_scan, void *arg) 375 { 376 struct super_block *sb = (struct super_block *)arg; 377 int objects = 0; 378 379 /* 380 * Ensure the superblock is not in the process of being torn down. 381 */ 382 #ifdef HAVE_SB_DYING 383 if (down_read_trylock(&sb->s_umount)) { 384 if (!(sb->s_flags & SB_DYING) && sb->s_root && 385 (sb->s_flags & SB_BORN)) { 386 (void) zfs_prune(sb, nr_to_scan, &objects); 387 } 388 up_read(&sb->s_umount); 389 } 390 #else 391 if (down_read_trylock(&sb->s_umount)) { 392 if (!hlist_unhashed(&sb->s_instances) && 393 sb->s_root && (sb->s_flags & SB_BORN)) { 394 (void) zfs_prune(sb, nr_to_scan, &objects); 395 } 396 up_read(&sb->s_umount); 397 } 398 #endif 399 } 400 401 const struct super_operations zpl_super_operations = { 402 .alloc_inode = zpl_inode_alloc, 403 .destroy_inode = zpl_inode_destroy, 404 .dirty_inode = zpl_dirty_inode, 405 .write_inode = NULL, 406 .evict_inode = zpl_evict_inode, 407 .put_super = zpl_put_super, 408 .sync_fs = zpl_sync_fs, 409 .statfs = zpl_statfs, 410 .remount_fs = zpl_remount_fs, 411 .show_devname = zpl_show_devname, 412 .show_options = zpl_show_options, 413 .show_stats = NULL, 414 }; 415 416 struct file_system_type zpl_fs_type = { 417 .owner = THIS_MODULE, 418 .name = ZFS_DRIVER, 419 #if defined(HAVE_IDMAP_MNT_API) 420 .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 421 #else 422 .fs_flags = FS_USERNS_MOUNT, 423 #endif 424 .mount = zpl_mount, 425 .kill_sb = zpl_kill_sb, 426 }; 427