1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC. 23 * Copyright (c) 2023, Datto Inc. All rights reserved. 24 */ 25 26 27 #include <sys/zfs_znode.h> 28 #include <sys/zfs_vfsops.h> 29 #include <sys/zfs_vnops.h> 30 #include <sys/zfs_ctldir.h> 31 #include <sys/zpl.h> 32 #include <linux/iversion.h> 33 34 35 static struct inode * 36 zpl_inode_alloc(struct super_block *sb) 37 { 38 struct inode *ip; 39 40 VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0); 41 inode_set_iversion(ip, 1); 42 43 return (ip); 44 } 45 46 static void 47 zpl_inode_destroy(struct inode *ip) 48 { 49 ASSERT(atomic_read(&ip->i_count) == 0); 50 zfs_inode_destroy(ip); 51 } 52 53 /* 54 * Called from __mark_inode_dirty() to reflect that something in the 55 * inode has changed. We use it to ensure the znode system attributes 56 * are always strictly update to date with respect to the inode. 57 */ 58 static void 59 zpl_dirty_inode(struct inode *ip, int flags) 60 { 61 fstrans_cookie_t cookie; 62 63 cookie = spl_fstrans_mark(); 64 zfs_dirty_inode(ip, flags); 65 spl_fstrans_unmark(cookie); 66 } 67 68 /* 69 * When ->drop_inode() is called its return value indicates if the 70 * inode should be evicted from the inode cache. If the inode is 71 * unhashed and has no links the default policy is to evict it 72 * immediately. 73 * 74 * The ->evict_inode() callback must minimally truncate the inode pages, 75 * and call clear_inode(). For 2.6.35 and later kernels this will 76 * simply update the inode state, with the sync occurring before the 77 * truncate in evict(). For earlier kernels clear_inode() maps to 78 * end_writeback() which is responsible for completing all outstanding 79 * write back. In either case, once this is done it is safe to cleanup 80 * any remaining inode specific data via zfs_inactive(). 81 * remaining filesystem specific data. 82 */ 83 static void 84 zpl_evict_inode(struct inode *ip) 85 { 86 fstrans_cookie_t cookie; 87 88 cookie = spl_fstrans_mark(); 89 truncate_setsize(ip, 0); 90 clear_inode(ip); 91 zfs_inactive(ip); 92 spl_fstrans_unmark(cookie); 93 } 94 95 static void 96 zpl_put_super(struct super_block *sb) 97 { 98 fstrans_cookie_t cookie; 99 int error; 100 101 cookie = spl_fstrans_mark(); 102 error = -zfs_umount(sb); 103 spl_fstrans_unmark(cookie); 104 ASSERT3S(error, <=, 0); 105 } 106 107 static int 108 zpl_sync_fs(struct super_block *sb, int wait) 109 { 110 fstrans_cookie_t cookie; 111 cred_t *cr = CRED(); 112 int error; 113 114 crhold(cr); 115 cookie = spl_fstrans_mark(); 116 error = -zfs_sync(sb, wait, cr); 117 spl_fstrans_unmark(cookie); 118 crfree(cr); 119 ASSERT3S(error, <=, 0); 120 121 return (error); 122 } 123 124 static int 125 zpl_statfs(struct dentry *dentry, struct kstatfs *statp) 126 { 127 fstrans_cookie_t cookie; 128 int error; 129 130 cookie = spl_fstrans_mark(); 131 error = -zfs_statvfs(dentry->d_inode, statp); 132 spl_fstrans_unmark(cookie); 133 ASSERT3S(error, <=, 0); 134 135 /* 136 * If required by a 32-bit system call, dynamically scale the 137 * block size up to 16MiB and decrease the block counts. This 138 * allows for a maximum size of 64EiB to be reported. The file 139 * counts must be artificially capped at 2^32-1. 140 */ 141 if (unlikely(zpl_is_32bit_api())) { 142 while (statp->f_blocks > UINT32_MAX && 143 statp->f_bsize < SPA_MAXBLOCKSIZE) { 144 statp->f_frsize <<= 1; 145 statp->f_bsize <<= 1; 146 147 statp->f_blocks >>= 1; 148 statp->f_bfree >>= 1; 149 statp->f_bavail >>= 1; 150 } 151 152 uint64_t usedobjs = statp->f_files - statp->f_ffree; 153 statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs); 154 statp->f_files = statp->f_ffree + usedobjs; 155 } 156 157 return (error); 158 } 159 160 static int 161 zpl_remount_fs(struct super_block *sb, int *flags, char *data) 162 { 163 zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data }; 164 fstrans_cookie_t cookie; 165 int error; 166 167 cookie = spl_fstrans_mark(); 168 error = -zfs_remount(sb, flags, &zm); 169 spl_fstrans_unmark(cookie); 170 ASSERT3S(error, <=, 0); 171 172 return (error); 173 } 174 175 static int 176 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs) 177 { 178 int error; 179 if ((error = zpl_enter(zfsvfs, FTAG)) != 0) 180 return (error); 181 182 char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 183 dmu_objset_name(zfsvfs->z_os, fsname); 184 185 for (int i = 0; fsname[i] != 0; i++) { 186 /* 187 * Spaces in the dataset name must be converted to their 188 * octal escape sequence for getmntent(3) to correctly 189 * parse then fsname portion of /proc/self/mounts. 190 */ 191 if (fsname[i] == ' ') { 192 seq_puts(seq, "\\040"); 193 } else { 194 seq_putc(seq, fsname[i]); 195 } 196 } 197 198 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); 199 200 zpl_exit(zfsvfs, FTAG); 201 202 return (0); 203 } 204 205 static int 206 zpl_show_devname(struct seq_file *seq, struct dentry *root) 207 { 208 return (__zpl_show_devname(seq, root->d_sb->s_fs_info)); 209 } 210 211 static int 212 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs) 213 { 214 seq_printf(seq, ",%s", 215 zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr"); 216 217 #ifdef CONFIG_FS_POSIX_ACL 218 switch (zfsvfs->z_acl_type) { 219 case ZFS_ACLTYPE_POSIX: 220 seq_puts(seq, ",posixacl"); 221 break; 222 default: 223 seq_puts(seq, ",noacl"); 224 break; 225 } 226 #endif /* CONFIG_FS_POSIX_ACL */ 227 228 switch (zfsvfs->z_case) { 229 case ZFS_CASE_SENSITIVE: 230 seq_puts(seq, ",casesensitive"); 231 break; 232 case ZFS_CASE_INSENSITIVE: 233 seq_puts(seq, ",caseinsensitive"); 234 break; 235 default: 236 seq_puts(seq, ",casemixed"); 237 break; 238 } 239 240 return (0); 241 } 242 243 static int 244 zpl_show_options(struct seq_file *seq, struct dentry *root) 245 { 246 return (__zpl_show_options(seq, root->d_sb->s_fs_info)); 247 } 248 249 static int 250 zpl_fill_super(struct super_block *sb, void *data, int silent) 251 { 252 zfs_mnt_t *zm = (zfs_mnt_t *)data; 253 fstrans_cookie_t cookie; 254 int error; 255 256 cookie = spl_fstrans_mark(); 257 error = -zfs_domount(sb, zm, silent); 258 spl_fstrans_unmark(cookie); 259 ASSERT3S(error, <=, 0); 260 261 return (error); 262 } 263 264 static int 265 zpl_test_super(struct super_block *s, void *data) 266 { 267 zfsvfs_t *zfsvfs = s->s_fs_info; 268 objset_t *os = data; 269 /* 270 * If the os doesn't match the z_os in the super_block, assume it is 271 * not a match. Matching would imply a multimount of a dataset. It is 272 * possible that during a multimount, there is a simultaneous operation 273 * that changes the z_os, e.g., rollback, where the match will be 274 * missed, but in that case the user will get an EBUSY. 275 */ 276 return (zfsvfs != NULL && os == zfsvfs->z_os); 277 } 278 279 static struct super_block * 280 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) 281 { 282 struct super_block *s; 283 objset_t *os; 284 boolean_t issnap = B_FALSE; 285 int err; 286 287 err = dmu_objset_hold(zm->mnt_osname, FTAG, &os); 288 if (err) 289 return (ERR_PTR(-err)); 290 291 /* 292 * The dsl pool lock must be released prior to calling sget(). 293 * It is possible sget() may block on the lock in grab_super() 294 * while deactivate_super() holds that same lock and waits for 295 * a txg sync. If the dsl_pool lock is held over sget() 296 * this can prevent the pool sync and cause a deadlock. 297 */ 298 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG); 299 dsl_pool_rele(dmu_objset_pool(os), FTAG); 300 301 s = sget(fs_type, zpl_test_super, set_anon_super, flags, os); 302 303 /* 304 * Recheck with the lock held to prevent mounting the wrong dataset 305 * since z_os can be stale when the teardown lock is held. 306 * 307 * We can't do this in zpl_test_super in since it's under spinlock and 308 * also s_umount lock is not held there so it would race with 309 * zfs_umount and zfsvfs can be freed. 310 */ 311 if (!IS_ERR(s) && s->s_fs_info != NULL) { 312 zfsvfs_t *zfsvfs = s->s_fs_info; 313 if (zpl_enter(zfsvfs, FTAG) == 0) { 314 if (os != zfsvfs->z_os) 315 err = -SET_ERROR(EBUSY); 316 issnap = zfsvfs->z_issnap; 317 zpl_exit(zfsvfs, FTAG); 318 } else { 319 err = -SET_ERROR(EBUSY); 320 } 321 } 322 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); 323 dsl_dataset_rele(dmu_objset_ds(os), FTAG); 324 325 if (IS_ERR(s)) 326 return (ERR_CAST(s)); 327 328 if (err) { 329 deactivate_locked_super(s); 330 return (ERR_PTR(err)); 331 } 332 333 if (s->s_root == NULL) { 334 err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0); 335 if (err) { 336 deactivate_locked_super(s); 337 return (ERR_PTR(err)); 338 } 339 s->s_flags |= SB_ACTIVE; 340 } else if (!issnap && ((flags ^ s->s_flags) & SB_RDONLY)) { 341 /* 342 * Skip ro check for snap since snap is always ro regardless 343 * ro flag is passed by mount or not. 344 */ 345 deactivate_locked_super(s); 346 return (ERR_PTR(-EBUSY)); 347 } 348 349 return (s); 350 } 351 352 static struct dentry * 353 zpl_mount(struct file_system_type *fs_type, int flags, 354 const char *osname, void *data) 355 { 356 zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data }; 357 358 struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm); 359 if (IS_ERR(sb)) 360 return (ERR_CAST(sb)); 361 362 return (dget(sb->s_root)); 363 } 364 365 static void 366 zpl_kill_sb(struct super_block *sb) 367 { 368 zfs_preumount(sb); 369 kill_anon_super(sb); 370 } 371 372 void 373 zpl_prune_sb(uint64_t nr_to_scan, void *arg) 374 { 375 struct super_block *sb = (struct super_block *)arg; 376 int objects = 0; 377 378 /* 379 * deactivate_locked_super calls shrinker_free and only then 380 * sops->kill_sb cb, resulting in UAF on umount when trying to reach 381 * for the shrinker functions in zpl_prune_sb of in-umount dataset. 382 * Increment if s_active is not zero, but don't prune if it is - 383 * umount could be underway. 384 */ 385 if (atomic_inc_not_zero(&sb->s_active)) { 386 (void) -zfs_prune(sb, nr_to_scan, &objects); 387 atomic_dec(&sb->s_active); 388 } 389 390 } 391 392 const struct super_operations zpl_super_operations = { 393 .alloc_inode = zpl_inode_alloc, 394 .destroy_inode = zpl_inode_destroy, 395 .dirty_inode = zpl_dirty_inode, 396 .write_inode = NULL, 397 .evict_inode = zpl_evict_inode, 398 .put_super = zpl_put_super, 399 .sync_fs = zpl_sync_fs, 400 .statfs = zpl_statfs, 401 .remount_fs = zpl_remount_fs, 402 .show_devname = zpl_show_devname, 403 .show_options = zpl_show_options, 404 .show_stats = NULL, 405 }; 406 407 struct file_system_type zpl_fs_type = { 408 .owner = THIS_MODULE, 409 .name = ZFS_DRIVER, 410 #if defined(HAVE_IDMAP_MNT_API) 411 .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 412 #else 413 .fs_flags = FS_USERNS_MOUNT, 414 #endif 415 .mount = zpl_mount, 416 .kill_sb = zpl_kill_sb, 417 }; 418