1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2011, Lawrence Livermore National Security, LLC. 23 * Copyright (c) 2023, Datto Inc. All rights reserved. 24 */ 25 26 27 #include <sys/zfs_znode.h> 28 #include <sys/zfs_vfsops.h> 29 #include <sys/zfs_vnops.h> 30 #include <sys/zfs_ctldir.h> 31 #include <sys/zpl.h> 32 33 34 static struct inode * 35 zpl_inode_alloc(struct super_block *sb) 36 { 37 struct inode *ip; 38 39 VERIFY3S(zfs_inode_alloc(sb, &ip), ==, 0); 40 inode_set_iversion(ip, 1); 41 42 return (ip); 43 } 44 45 static void 46 zpl_inode_destroy(struct inode *ip) 47 { 48 ASSERT(atomic_read(&ip->i_count) == 0); 49 zfs_inode_destroy(ip); 50 } 51 52 /* 53 * Called from __mark_inode_dirty() to reflect that something in the 54 * inode has changed. We use it to ensure the znode system attributes 55 * are always strictly update to date with respect to the inode. 56 */ 57 #ifdef HAVE_DIRTY_INODE_WITH_FLAGS 58 static void 59 zpl_dirty_inode(struct inode *ip, int flags) 60 { 61 fstrans_cookie_t cookie; 62 63 cookie = spl_fstrans_mark(); 64 zfs_dirty_inode(ip, flags); 65 spl_fstrans_unmark(cookie); 66 } 67 #else 68 static void 69 zpl_dirty_inode(struct inode *ip) 70 { 71 fstrans_cookie_t cookie; 72 73 cookie = spl_fstrans_mark(); 74 zfs_dirty_inode(ip, 0); 75 spl_fstrans_unmark(cookie); 76 } 77 #endif /* HAVE_DIRTY_INODE_WITH_FLAGS */ 78 79 /* 80 * When ->drop_inode() is called its return value indicates if the 81 * inode should be evicted from the inode cache. If the inode is 82 * unhashed and has no links the default policy is to evict it 83 * immediately. 84 * 85 * The ->evict_inode() callback must minimally truncate the inode pages, 86 * and call clear_inode(). For 2.6.35 and later kernels this will 87 * simply update the inode state, with the sync occurring before the 88 * truncate in evict(). For earlier kernels clear_inode() maps to 89 * end_writeback() which is responsible for completing all outstanding 90 * write back. In either case, once this is done it is safe to cleanup 91 * any remaining inode specific data via zfs_inactive(). 92 * remaining filesystem specific data. 93 */ 94 static void 95 zpl_evict_inode(struct inode *ip) 96 { 97 fstrans_cookie_t cookie; 98 99 cookie = spl_fstrans_mark(); 100 truncate_setsize(ip, 0); 101 clear_inode(ip); 102 zfs_inactive(ip); 103 spl_fstrans_unmark(cookie); 104 } 105 106 static void 107 zpl_put_super(struct super_block *sb) 108 { 109 fstrans_cookie_t cookie; 110 int error; 111 112 cookie = spl_fstrans_mark(); 113 error = -zfs_umount(sb); 114 spl_fstrans_unmark(cookie); 115 ASSERT3S(error, <=, 0); 116 } 117 118 static int 119 zpl_sync_fs(struct super_block *sb, int wait) 120 { 121 fstrans_cookie_t cookie; 122 cred_t *cr = CRED(); 123 int error; 124 125 crhold(cr); 126 cookie = spl_fstrans_mark(); 127 error = -zfs_sync(sb, wait, cr); 128 spl_fstrans_unmark(cookie); 129 crfree(cr); 130 ASSERT3S(error, <=, 0); 131 132 return (error); 133 } 134 135 static int 136 zpl_statfs(struct dentry *dentry, struct kstatfs *statp) 137 { 138 fstrans_cookie_t cookie; 139 int error; 140 141 cookie = spl_fstrans_mark(); 142 error = -zfs_statvfs(dentry->d_inode, statp); 143 spl_fstrans_unmark(cookie); 144 ASSERT3S(error, <=, 0); 145 146 /* 147 * If required by a 32-bit system call, dynamically scale the 148 * block size up to 16MiB and decrease the block counts. This 149 * allows for a maximum size of 64EiB to be reported. The file 150 * counts must be artificially capped at 2^32-1. 151 */ 152 if (unlikely(zpl_is_32bit_api())) { 153 while (statp->f_blocks > UINT32_MAX && 154 statp->f_bsize < SPA_MAXBLOCKSIZE) { 155 statp->f_frsize <<= 1; 156 statp->f_bsize <<= 1; 157 158 statp->f_blocks >>= 1; 159 statp->f_bfree >>= 1; 160 statp->f_bavail >>= 1; 161 } 162 163 uint64_t usedobjs = statp->f_files - statp->f_ffree; 164 statp->f_ffree = MIN(statp->f_ffree, UINT32_MAX - usedobjs); 165 statp->f_files = statp->f_ffree + usedobjs; 166 } 167 168 return (error); 169 } 170 171 static int 172 zpl_remount_fs(struct super_block *sb, int *flags, char *data) 173 { 174 zfs_mnt_t zm = { .mnt_osname = NULL, .mnt_data = data }; 175 fstrans_cookie_t cookie; 176 int error; 177 178 cookie = spl_fstrans_mark(); 179 error = -zfs_remount(sb, flags, &zm); 180 spl_fstrans_unmark(cookie); 181 ASSERT3S(error, <=, 0); 182 183 return (error); 184 } 185 186 static int 187 __zpl_show_devname(struct seq_file *seq, zfsvfs_t *zfsvfs) 188 { 189 int error; 190 if ((error = zpl_enter(zfsvfs, FTAG)) != 0) 191 return (error); 192 193 char *fsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP); 194 dmu_objset_name(zfsvfs->z_os, fsname); 195 196 for (int i = 0; fsname[i] != 0; i++) { 197 /* 198 * Spaces in the dataset name must be converted to their 199 * octal escape sequence for getmntent(3) to correctly 200 * parse then fsname portion of /proc/self/mounts. 201 */ 202 if (fsname[i] == ' ') { 203 seq_puts(seq, "\\040"); 204 } else { 205 seq_putc(seq, fsname[i]); 206 } 207 } 208 209 kmem_free(fsname, ZFS_MAX_DATASET_NAME_LEN); 210 211 zpl_exit(zfsvfs, FTAG); 212 213 return (0); 214 } 215 216 static int 217 zpl_show_devname(struct seq_file *seq, struct dentry *root) 218 { 219 return (__zpl_show_devname(seq, root->d_sb->s_fs_info)); 220 } 221 222 static int 223 __zpl_show_options(struct seq_file *seq, zfsvfs_t *zfsvfs) 224 { 225 seq_printf(seq, ",%s", 226 zfsvfs->z_flags & ZSB_XATTR ? "xattr" : "noxattr"); 227 228 #ifdef CONFIG_FS_POSIX_ACL 229 switch (zfsvfs->z_acl_type) { 230 case ZFS_ACLTYPE_POSIX: 231 seq_puts(seq, ",posixacl"); 232 break; 233 default: 234 seq_puts(seq, ",noacl"); 235 break; 236 } 237 #endif /* CONFIG_FS_POSIX_ACL */ 238 239 switch (zfsvfs->z_case) { 240 case ZFS_CASE_SENSITIVE: 241 seq_puts(seq, ",casesensitive"); 242 break; 243 case ZFS_CASE_INSENSITIVE: 244 seq_puts(seq, ",caseinsensitive"); 245 break; 246 default: 247 seq_puts(seq, ",casemixed"); 248 break; 249 } 250 251 return (0); 252 } 253 254 static int 255 zpl_show_options(struct seq_file *seq, struct dentry *root) 256 { 257 return (__zpl_show_options(seq, root->d_sb->s_fs_info)); 258 } 259 260 static int 261 zpl_fill_super(struct super_block *sb, void *data, int silent) 262 { 263 zfs_mnt_t *zm = (zfs_mnt_t *)data; 264 fstrans_cookie_t cookie; 265 int error; 266 267 cookie = spl_fstrans_mark(); 268 error = -zfs_domount(sb, zm, silent); 269 spl_fstrans_unmark(cookie); 270 ASSERT3S(error, <=, 0); 271 272 return (error); 273 } 274 275 static int 276 zpl_test_super(struct super_block *s, void *data) 277 { 278 zfsvfs_t *zfsvfs = s->s_fs_info; 279 objset_t *os = data; 280 /* 281 * If the os doesn't match the z_os in the super_block, assume it is 282 * not a match. Matching would imply a multimount of a dataset. It is 283 * possible that during a multimount, there is a simultaneous operation 284 * that changes the z_os, e.g., rollback, where the match will be 285 * missed, but in that case the user will get an EBUSY. 286 */ 287 return (zfsvfs != NULL && os == zfsvfs->z_os); 288 } 289 290 static struct super_block * 291 zpl_mount_impl(struct file_system_type *fs_type, int flags, zfs_mnt_t *zm) 292 { 293 struct super_block *s; 294 objset_t *os; 295 int err; 296 297 err = dmu_objset_hold(zm->mnt_osname, FTAG, &os); 298 if (err) 299 return (ERR_PTR(-err)); 300 301 /* 302 * The dsl pool lock must be released prior to calling sget(). 303 * It is possible sget() may block on the lock in grab_super() 304 * while deactivate_super() holds that same lock and waits for 305 * a txg sync. If the dsl_pool lock is held over sget() 306 * this can prevent the pool sync and cause a deadlock. 307 */ 308 dsl_dataset_long_hold(dmu_objset_ds(os), FTAG); 309 dsl_pool_rele(dmu_objset_pool(os), FTAG); 310 311 s = sget(fs_type, zpl_test_super, set_anon_super, flags, os); 312 313 /* 314 * Recheck with the lock held to prevent mounting the wrong dataset 315 * since z_os can be stale when the teardown lock is held. 316 * 317 * We can't do this in zpl_test_super in since it's under spinlock and 318 * also s_umount lock is not held there so it would race with 319 * zfs_umount and zfsvfs can be freed. 320 */ 321 if (!IS_ERR(s) && s->s_fs_info != NULL) { 322 zfsvfs_t *zfsvfs = s->s_fs_info; 323 if (zpl_enter(zfsvfs, FTAG) == 0) { 324 if (os != zfsvfs->z_os) 325 err = -SET_ERROR(EBUSY); 326 zpl_exit(zfsvfs, FTAG); 327 } else { 328 err = -SET_ERROR(EBUSY); 329 } 330 } 331 dsl_dataset_long_rele(dmu_objset_ds(os), FTAG); 332 dsl_dataset_rele(dmu_objset_ds(os), FTAG); 333 334 if (IS_ERR(s)) 335 return (ERR_CAST(s)); 336 337 if (err) { 338 deactivate_locked_super(s); 339 return (ERR_PTR(err)); 340 } 341 342 if (s->s_root == NULL) { 343 err = zpl_fill_super(s, zm, flags & SB_SILENT ? 1 : 0); 344 if (err) { 345 deactivate_locked_super(s); 346 return (ERR_PTR(err)); 347 } 348 s->s_flags |= SB_ACTIVE; 349 } else if ((flags ^ s->s_flags) & SB_RDONLY) { 350 deactivate_locked_super(s); 351 return (ERR_PTR(-EBUSY)); 352 } 353 354 return (s); 355 } 356 357 static struct dentry * 358 zpl_mount(struct file_system_type *fs_type, int flags, 359 const char *osname, void *data) 360 { 361 zfs_mnt_t zm = { .mnt_osname = osname, .mnt_data = data }; 362 363 struct super_block *sb = zpl_mount_impl(fs_type, flags, &zm); 364 if (IS_ERR(sb)) 365 return (ERR_CAST(sb)); 366 367 return (dget(sb->s_root)); 368 } 369 370 static void 371 zpl_kill_sb(struct super_block *sb) 372 { 373 zfs_preumount(sb); 374 kill_anon_super(sb); 375 } 376 377 void 378 zpl_prune_sb(uint64_t nr_to_scan, void *arg) 379 { 380 struct super_block *sb = (struct super_block *)arg; 381 int objects = 0; 382 383 (void) -zfs_prune(sb, nr_to_scan, &objects); 384 } 385 386 const struct super_operations zpl_super_operations = { 387 .alloc_inode = zpl_inode_alloc, 388 .destroy_inode = zpl_inode_destroy, 389 .dirty_inode = zpl_dirty_inode, 390 .write_inode = NULL, 391 .evict_inode = zpl_evict_inode, 392 .put_super = zpl_put_super, 393 .sync_fs = zpl_sync_fs, 394 .statfs = zpl_statfs, 395 .remount_fs = zpl_remount_fs, 396 .show_devname = zpl_show_devname, 397 .show_options = zpl_show_options, 398 .show_stats = NULL, 399 }; 400 401 struct file_system_type zpl_fs_type = { 402 .owner = THIS_MODULE, 403 .name = ZFS_DRIVER, 404 #if defined(HAVE_IDMAP_MNT_API) 405 .fs_flags = FS_USERNS_MOUNT | FS_ALLOW_IDMAP, 406 #else 407 .fs_flags = FS_USERNS_MOUNT, 408 #endif 409 .mount = zpl_mount, 410 .kill_sb = zpl_kill_sb, 411 }; 412