1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef BTRFS_FS_H 4 #define BTRFS_FS_H 5 6 #include <crypto/blake2b.h> 7 #include <crypto/sha2.h> 8 #include <linux/blkdev.h> 9 #include <linux/sizes.h> 10 #include <linux/time64.h> 11 #include <linux/compiler.h> 12 #include <linux/math.h> 13 #include <linux/atomic.h> 14 #include <linux/percpu_counter.h> 15 #include <linux/completion.h> 16 #include <linux/lockdep.h> 17 #include <linux/spinlock.h> 18 #include <linux/mutex.h> 19 #include <linux/rwsem.h> 20 #include <linux/semaphore.h> 21 #include <linux/list.h> 22 #include <linux/pagemap.h> 23 #include <linux/radix-tree.h> 24 #include <linux/workqueue.h> 25 #include <linux/wait.h> 26 #include <linux/wait_bit.h> 27 #include <linux/sched.h> 28 #include <linux/rbtree.h> 29 #include <linux/xxhash.h> 30 #include <linux/fserror.h> 31 #include <uapi/linux/btrfs.h> 32 #include <uapi/linux/btrfs_tree.h> 33 #include "extent-io-tree.h" 34 #include "async-thread.h" 35 #include "block-rsv.h" 36 #include "messages.h" 37 38 struct inode; 39 struct super_block; 40 struct kobject; 41 struct reloc_control; 42 struct ulist; 43 struct btrfs_device; 44 struct btrfs_block_group; 45 struct btrfs_root; 46 struct btrfs_fs_devices; 47 struct btrfs_transaction; 48 struct btrfs_balance_control; 49 struct btrfs_subpage_info; 50 struct btrfs_stripe_hash_table; 51 struct btrfs_space_info; 52 53 /* Minimum data and metadata block size. */ 54 #define BTRFS_MIN_BLOCKSIZE (SZ_4K) 55 #define BTRFS_MAX_BLOCKSIZE (SZ_64K) 56 57 /* The maximum folio size btrfs supports. */ 58 #define BTRFS_MAX_FOLIO_SIZE (SZ_2M) 59 static_assert(BTRFS_MAX_FOLIO_SIZE > PAGE_SIZE); 60 61 /* 62 * The maximum number of blocks a huge folio can support. 63 * 64 * Depending on the filesystem block size, the real maximum blocks per folio 65 * may also be limited by the above BTRFS_MAX_FOLIO_SIZE. 66 */ 67 #ifdef CONFIG_BTRFS_EXPERIMENTAL 68 #define BTRFS_MAX_BLOCKS_PER_FOLIO (512) 69 #else 70 #define BTRFS_MAX_BLOCKS_PER_FOLIO (BITS_PER_LONG) 71 #endif 72 73 #define BTRFS_MAX_EXTENT_SIZE SZ_128M 74 75 /* 76 * Maximum length to trim in a single iteration to avoid holding device list 77 * mutex for too long. 78 */ 79 #define BTRFS_MAX_TRIM_LENGTH SZ_2G 80 81 #define BTRFS_OLDEST_GENERATION 0ULL 82 83 #define BTRFS_EMPTY_DIR_SIZE 0 84 85 #define BTRFS_DIRTY_METADATA_THRESH SZ_32M 86 87 #define BTRFS_SUPER_INFO_OFFSET SZ_64K 88 #define BTRFS_SUPER_INFO_SIZE 4096 89 static_assert(sizeof(struct btrfs_super_block) == BTRFS_SUPER_INFO_SIZE); 90 91 /* Array of bytes with variable length, hexadecimal format 0x1234 */ 92 #define BTRFS_CSUM_FMT "0x%*phN" 93 #define BTRFS_CSUM_FMT_VALUE(size, bytes) size, bytes 94 95 #define BTRFS_KEY_FMT "(%llu %u %llu)" 96 #define BTRFS_KEY_FMT_VALUE(key) (key)->objectid, (key)->type, (key)->offset 97 98 #define BTRFS_QGROUP_FMT "%hu/%llu" 99 #define BTRFS_QGROUP_FMT_VALUE(qgroup) btrfs_qgroup_level((qgroup)->qgroupid), \ 100 btrfs_qgroup_subvolid((qgroup)->qgroupid) 101 102 /* 103 * Number of metadata items necessary for an unlink operation: 104 * 105 * 1 for the possible orphan item 106 * 1 for the dir item 107 * 1 for the dir index 108 * 1 for the inode ref 109 * 1 for the inode 110 * 1 for the parent inode 111 */ 112 #define BTRFS_UNLINK_METADATA_UNITS 6 113 114 /* 115 * The reserved space at the beginning of each device. It covers the primary 116 * super block and leaves space for potential use by other tools like 117 * bootloaders or to lower potential damage of accidental overwrite. 118 */ 119 #define BTRFS_DEVICE_RANGE_RESERVED (SZ_1M) 120 /* 121 * Runtime (in-memory) states of filesystem 122 */ 123 enum { 124 /* 125 * Filesystem is being remounted, allow to skip some operations, like 126 * defrag 127 */ 128 BTRFS_FS_STATE_REMOUNTING, 129 /* Filesystem in RO mode */ 130 BTRFS_FS_STATE_RO, 131 /* Track if a transaction abort has been reported on this filesystem */ 132 BTRFS_FS_STATE_TRANS_ABORTED, 133 /* Track if log replay has failed. */ 134 BTRFS_FS_STATE_LOG_REPLAY_ABORTED, 135 /* 136 * Bio operations should be blocked on this filesystem because a source 137 * or target device is being destroyed as part of a device replace 138 */ 139 BTRFS_FS_STATE_DEV_REPLACING, 140 /* The btrfs_fs_info created for self-tests */ 141 BTRFS_FS_STATE_DUMMY_FS_INFO, 142 143 /* Checksum errors are ignored. */ 144 BTRFS_FS_STATE_NO_DATA_CSUMS, 145 BTRFS_FS_STATE_SKIP_META_CSUMS, 146 147 /* Indicates there was an error cleaning up a log tree. */ 148 BTRFS_FS_STATE_LOG_CLEANUP_ERROR, 149 150 /* No more delayed iput can be queued. */ 151 BTRFS_FS_STATE_NO_DELAYED_IPUT, 152 153 /* 154 * Emergency shutdown, a step further than transaction aborted by 155 * rejecting all operations. 156 */ 157 BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, 158 159 BTRFS_FS_STATE_COUNT 160 }; 161 162 enum { 163 BTRFS_FS_CLOSING_START, 164 BTRFS_FS_CLOSING_DONE, 165 BTRFS_FS_LOG_RECOVERING, 166 BTRFS_FS_OPEN, 167 BTRFS_FS_QUOTA_ENABLED, 168 BTRFS_FS_SQUOTA_ENABLING, 169 BTRFS_FS_UPDATE_UUID_TREE_GEN, 170 BTRFS_FS_CREATING_FREE_SPACE_TREE, 171 BTRFS_FS_BTREE_ERR, 172 BTRFS_FS_LOG1_ERR, 173 BTRFS_FS_LOG2_ERR, 174 BTRFS_FS_QUOTA_OVERRIDE, 175 /* Used to record internally whether fs has been frozen */ 176 BTRFS_FS_FROZEN, 177 /* 178 * Indicate that balance has been set up from the ioctl and is in the 179 * main phase. The fs_info::balance_ctl is initialized. 180 */ 181 BTRFS_FS_BALANCE_RUNNING, 182 183 /* 184 * Indicate that relocation of a chunk has started, it's set per chunk 185 * and is toggled between chunks. 186 */ 187 BTRFS_FS_RELOC_RUNNING, 188 189 /* Indicate that the cleaner thread is awake and doing something. */ 190 BTRFS_FS_CLEANER_RUNNING, 191 192 /* 193 * The checksumming has an optimized version and is considered fast, 194 * so we don't need to offload checksums to workqueues. 195 */ 196 BTRFS_FS_CSUM_IMPL_FAST, 197 198 /* Indicate that the discard workqueue can service discards. */ 199 BTRFS_FS_DISCARD_RUNNING, 200 201 /* Indicate that we need to cleanup space cache v1 */ 202 BTRFS_FS_CLEANUP_SPACE_CACHE_V1, 203 204 /* Indicate that we can't trust the free space tree for caching yet */ 205 BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, 206 207 /* Indicate whether there are any tree modification log users */ 208 BTRFS_FS_TREE_MOD_LOG_USERS, 209 210 /* Indicate that we want the transaction kthread to commit right now. */ 211 BTRFS_FS_COMMIT_TRANS, 212 213 /* Indicate we have half completed snapshot deletions pending. */ 214 BTRFS_FS_UNFINISHED_DROPS, 215 216 /* Indicate we have to finish a zone to do next allocation. */ 217 BTRFS_FS_NEED_ZONE_FINISH, 218 219 /* Indicate that we want to commit the transaction. */ 220 BTRFS_FS_NEED_TRANS_COMMIT, 221 222 /* This is set when active zone tracking is needed. */ 223 BTRFS_FS_ACTIVE_ZONE_TRACKING, 224 225 /* 226 * Indicate if we have some features changed, this is mostly for 227 * cleaner thread to update the sysfs interface. 228 */ 229 BTRFS_FS_FEATURE_CHANGED, 230 231 /* 232 * Indicate that we have found a tree block which is only aligned to 233 * sectorsize, but not to nodesize. This should be rare nowadays. 234 */ 235 BTRFS_FS_UNALIGNED_TREE_BLOCK, 236 237 #if BITS_PER_LONG == 32 238 /* Indicate if we have error/warn message printed on 32bit systems */ 239 BTRFS_FS_32BIT_ERROR, 240 BTRFS_FS_32BIT_WARN, 241 #endif 242 }; 243 244 /* 245 * Flags for mount options. 246 * 247 * Note: don't forget to add new options to btrfs_show_options() 248 */ 249 enum { 250 BTRFS_MOUNT_NODATASUM = (1ULL << 0), 251 BTRFS_MOUNT_NODATACOW = (1ULL << 1), 252 BTRFS_MOUNT_NOBARRIER = (1ULL << 2), 253 BTRFS_MOUNT_SSD = (1ULL << 3), 254 BTRFS_MOUNT_DEGRADED = (1ULL << 4), 255 BTRFS_MOUNT_COMPRESS = (1ULL << 5), 256 BTRFS_MOUNT_NOTREELOG = (1ULL << 6), 257 BTRFS_MOUNT_FLUSHONCOMMIT = (1ULL << 7), 258 BTRFS_MOUNT_SSD_SPREAD = (1ULL << 8), 259 BTRFS_MOUNT_NOSSD = (1ULL << 9), 260 BTRFS_MOUNT_DISCARD_SYNC = (1ULL << 10), 261 BTRFS_MOUNT_FORCE_COMPRESS = (1ULL << 11), 262 BTRFS_MOUNT_SPACE_CACHE = (1ULL << 12), 263 BTRFS_MOUNT_CLEAR_CACHE = (1ULL << 13), 264 BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED = (1ULL << 14), 265 BTRFS_MOUNT_ENOSPC_DEBUG = (1ULL << 15), 266 BTRFS_MOUNT_AUTO_DEFRAG = (1ULL << 16), 267 BTRFS_MOUNT_USEBACKUPROOT = (1ULL << 17), 268 BTRFS_MOUNT_SKIP_BALANCE = (1ULL << 18), 269 BTRFS_MOUNT_PANIC_ON_FATAL_ERROR = (1ULL << 19), 270 BTRFS_MOUNT_RESCAN_UUID_TREE = (1ULL << 20), 271 BTRFS_MOUNT_FRAGMENT_DATA = (1ULL << 21), 272 BTRFS_MOUNT_FRAGMENT_METADATA = (1ULL << 22), 273 BTRFS_MOUNT_FREE_SPACE_TREE = (1ULL << 23), 274 BTRFS_MOUNT_NOLOGREPLAY = (1ULL << 24), 275 BTRFS_MOUNT_REF_VERIFY = (1ULL << 25), 276 BTRFS_MOUNT_DISCARD_ASYNC = (1ULL << 26), 277 BTRFS_MOUNT_IGNOREBADROOTS = (1ULL << 27), 278 BTRFS_MOUNT_IGNOREDATACSUMS = (1ULL << 28), 279 BTRFS_MOUNT_NODISCARD = (1ULL << 29), 280 BTRFS_MOUNT_NOSPACECACHE = (1ULL << 30), 281 BTRFS_MOUNT_IGNOREMETACSUMS = (1ULL << 31), 282 BTRFS_MOUNT_IGNORESUPERFLAGS = (1ULL << 32), 283 BTRFS_MOUNT_REF_TRACKER = (1ULL << 33), 284 }; 285 286 /* These mount options require a full read-only fs, no new transaction is allowed. */ 287 #define BTRFS_MOUNT_FULL_RO_MASK \ 288 (BTRFS_MOUNT_NOLOGREPLAY | \ 289 BTRFS_MOUNT_IGNOREBADROOTS | \ 290 BTRFS_MOUNT_IGNOREDATACSUMS | \ 291 BTRFS_MOUNT_IGNOREMETACSUMS | \ 292 BTRFS_MOUNT_IGNORESUPERFLAGS) 293 294 /* 295 * Compat flags that we support. If any incompat flags are set other than the 296 * ones specified below then we will fail to mount 297 */ 298 #define BTRFS_FEATURE_COMPAT_SUPP 0ULL 299 #define BTRFS_FEATURE_COMPAT_SAFE_SET 0ULL 300 #define BTRFS_FEATURE_COMPAT_SAFE_CLEAR 0ULL 301 302 #define BTRFS_FEATURE_COMPAT_RO_SUPP \ 303 (BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE | \ 304 BTRFS_FEATURE_COMPAT_RO_FREE_SPACE_TREE_VALID | \ 305 BTRFS_FEATURE_COMPAT_RO_VERITY | \ 306 BTRFS_FEATURE_COMPAT_RO_BLOCK_GROUP_TREE) 307 308 #define BTRFS_FEATURE_COMPAT_RO_SAFE_SET 0ULL 309 #define BTRFS_FEATURE_COMPAT_RO_SAFE_CLEAR 0ULL 310 311 #define BTRFS_FEATURE_INCOMPAT_SUPP_STABLE \ 312 (BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF | \ 313 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL | \ 314 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS | \ 315 BTRFS_FEATURE_INCOMPAT_BIG_METADATA | \ 316 BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO | \ 317 BTRFS_FEATURE_INCOMPAT_COMPRESS_ZSTD | \ 318 BTRFS_FEATURE_INCOMPAT_RAID56 | \ 319 BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF | \ 320 BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA | \ 321 BTRFS_FEATURE_INCOMPAT_NO_HOLES | \ 322 BTRFS_FEATURE_INCOMPAT_METADATA_UUID | \ 323 BTRFS_FEATURE_INCOMPAT_RAID1C34 | \ 324 BTRFS_FEATURE_INCOMPAT_ZONED | \ 325 BTRFS_FEATURE_INCOMPAT_SIMPLE_QUOTA) 326 327 #ifdef CONFIG_BTRFS_EXPERIMENTAL 328 /* 329 * Features under development like Extent tree v2 support is enabled 330 * only under CONFIG_BTRFS_EXPERIMENTAL 331 */ 332 #define BTRFS_FEATURE_INCOMPAT_SUPP \ 333 (BTRFS_FEATURE_INCOMPAT_SUPP_STABLE | \ 334 BTRFS_FEATURE_INCOMPAT_RAID_STRIPE_TREE | \ 335 BTRFS_FEATURE_INCOMPAT_EXTENT_TREE_V2 | \ 336 BTRFS_FEATURE_INCOMPAT_REMAP_TREE) 337 338 #else 339 340 #define BTRFS_FEATURE_INCOMPAT_SUPP \ 341 (BTRFS_FEATURE_INCOMPAT_SUPP_STABLE) 342 343 #endif 344 345 #define BTRFS_FEATURE_INCOMPAT_SAFE_SET \ 346 (BTRFS_FEATURE_INCOMPAT_EXTENDED_IREF) 347 #define BTRFS_FEATURE_INCOMPAT_SAFE_CLEAR 0ULL 348 349 #define BTRFS_DEFAULT_COMMIT_INTERVAL (30) 350 #define BTRFS_WARNING_COMMIT_INTERVAL (300) 351 #define BTRFS_DEFAULT_MAX_INLINE (2048) 352 353 enum btrfs_compression_type { 354 BTRFS_COMPRESS_NONE = 0, 355 BTRFS_COMPRESS_ZLIB = 1, 356 BTRFS_COMPRESS_LZO = 2, 357 BTRFS_COMPRESS_ZSTD = 3, 358 BTRFS_NR_COMPRESS_TYPES = 4, 359 360 BTRFS_DEFRAG_DONT_COMPRESS, 361 }; 362 363 struct btrfs_dev_replace { 364 /* See #define above */ 365 u64 replace_state; 366 /* Seconds since 1-Jan-1970 */ 367 time64_t time_started; 368 /* Seconds since 1-Jan-1970 */ 369 time64_t time_stopped; 370 atomic64_t num_write_errors; 371 atomic64_t num_uncorrectable_read_errors; 372 373 u64 cursor_left; 374 u64 committed_cursor_left; 375 u64 cursor_left_last_write_of_item; 376 u64 cursor_right; 377 378 /* See #define above */ 379 u64 cont_reading_from_srcdev_mode; 380 381 int is_valid; 382 int item_needs_writeback; 383 struct btrfs_device *srcdev; 384 struct btrfs_device *tgtdev; 385 386 struct mutex lock_finishing_cancel_unmount; 387 struct rw_semaphore rwsem; 388 389 struct btrfs_scrub_progress scrub_progress; 390 391 struct percpu_counter bio_counter; 392 wait_queue_head_t replace_wait; 393 394 struct task_struct *replace_task; 395 }; 396 397 /* 398 * Free clusters are used to claim free space in relatively large chunks, 399 * allowing us to do less seeky writes. They are used for all metadata 400 * allocations. In ssd_spread mode they are also used for data allocations. 401 */ 402 struct btrfs_free_cluster { 403 spinlock_t lock; 404 spinlock_t refill_lock; 405 struct rb_root root; 406 407 /* Largest extent in this cluster */ 408 u64 max_size; 409 410 /* First extent starting offset */ 411 u64 window_start; 412 413 /* We did a full search and couldn't create a cluster */ 414 bool fragmented; 415 416 struct btrfs_block_group *block_group; 417 /* 418 * When a cluster is allocated from a block group, we put the cluster 419 * onto a list in the block group so that it can be freed before the 420 * block group is freed. 421 */ 422 struct list_head block_group_list; 423 }; 424 425 /* Discard control. */ 426 /* 427 * Async discard uses multiple lists to differentiate the discard filter 428 * parameters. Index 0 is for completely free block groups where we need to 429 * ensure the entire block group is trimmed without being lossy. Indices 430 * afterwards represent monotonically decreasing discard filter sizes to 431 * prioritize what should be discarded next. 432 */ 433 #define BTRFS_NR_DISCARD_LISTS 3 434 #define BTRFS_DISCARD_INDEX_UNUSED 0 435 #define BTRFS_DISCARD_INDEX_START 1 436 437 struct btrfs_discard_ctl { 438 struct workqueue_struct *discard_workers; 439 struct delayed_work work; 440 spinlock_t lock; 441 struct btrfs_block_group *block_group; 442 struct list_head discard_list[BTRFS_NR_DISCARD_LISTS]; 443 u64 prev_discard; 444 u64 prev_discard_time; 445 atomic_t discardable_extents; 446 atomic64_t discardable_bytes; 447 u64 max_discard_size; 448 u64 delay_ms; 449 u32 iops_limit; 450 u32 kbps_limit; 451 u64 discard_extent_bytes; 452 u64 discard_bitmap_bytes; 453 atomic64_t discard_bytes_saved; 454 }; 455 456 /* 457 * Exclusive operations (device replace, resize, device add/remove, balance) 458 */ 459 enum btrfs_exclusive_operation { 460 BTRFS_EXCLOP_NONE, 461 BTRFS_EXCLOP_BALANCE_PAUSED, 462 BTRFS_EXCLOP_BALANCE, 463 BTRFS_EXCLOP_DEV_ADD, 464 BTRFS_EXCLOP_DEV_REMOVE, 465 BTRFS_EXCLOP_DEV_REPLACE, 466 BTRFS_EXCLOP_RESIZE, 467 BTRFS_EXCLOP_SWAP_ACTIVATE, 468 }; 469 470 /* Store data about transaction commits, exported via sysfs. */ 471 struct btrfs_commit_stats { 472 /* Total number of commits */ 473 u64 commit_count; 474 /* The maximum commit duration so far in ns */ 475 u64 max_commit_dur; 476 /* The last commit duration in ns */ 477 u64 last_commit_dur; 478 /* The total commit duration in ns */ 479 u64 total_commit_dur; 480 /* Start of the last critical section in ns. */ 481 u64 critical_section_start_time; 482 }; 483 484 struct btrfs_delayed_root { 485 spinlock_t lock; 486 int nodes; /* for delayed nodes */ 487 struct list_head node_list; 488 /* 489 * Used for delayed nodes which is waiting to be dealt with by the 490 * worker. If the delayed node is inserted into the work queue, we 491 * drop it from this list. 492 */ 493 struct list_head prepare_list; 494 atomic_t items; /* for delayed items */ 495 atomic_t items_seq; /* for delayed items */ 496 wait_queue_head_t wait; 497 }; 498 499 struct btrfs_free_space_ctl; 500 struct btrfs_free_space; 501 502 struct btrfs_fs_info { 503 u8 chunk_tree_uuid[BTRFS_UUID_SIZE]; 504 unsigned long flags; 505 struct btrfs_root *tree_root; 506 struct btrfs_root *chunk_root; 507 struct btrfs_root *dev_root; 508 struct btrfs_root *fs_root; 509 struct btrfs_root *quota_root; 510 struct btrfs_root *uuid_root; 511 struct btrfs_root *data_reloc_root; 512 struct btrfs_root *block_group_root; 513 struct btrfs_root *stripe_root; 514 struct btrfs_root *remap_root; 515 516 /* The log root tree is a directory of all the other log roots */ 517 struct btrfs_root *log_root_tree; 518 519 /* The tree that holds the global roots (csum, extent, etc) */ 520 rwlock_t global_root_lock; 521 struct rb_root global_root_tree; 522 523 spinlock_t fs_roots_radix_lock; 524 struct radix_tree_root fs_roots_radix; 525 526 /* Block group cache stuff */ 527 rwlock_t block_group_cache_lock; 528 struct rb_root_cached block_group_cache_tree; 529 530 /* Keep track of unallocated space */ 531 atomic64_t free_chunk_space; 532 533 /* Track ranges which are used by log trees blocks/logged data extents */ 534 struct extent_io_tree excluded_extents; 535 536 /* logical->physical extent mapping */ 537 struct rb_root_cached mapping_tree; 538 rwlock_t mapping_tree_lock; 539 540 /* 541 * Block reservation for extent, checksum, root tree and delayed dir 542 * index item. 543 */ 544 struct btrfs_block_rsv global_block_rsv; 545 /* Block reservation for metadata operations */ 546 struct btrfs_block_rsv trans_block_rsv; 547 /* Block reservation for chunk tree */ 548 struct btrfs_block_rsv chunk_block_rsv; 549 /* Block reservation for remap tree. */ 550 struct btrfs_block_rsv remap_block_rsv; 551 /* Block reservation for delayed operations */ 552 struct btrfs_block_rsv delayed_block_rsv; 553 /* Block reservation for delayed refs */ 554 struct btrfs_block_rsv delayed_refs_rsv; 555 /* Block reservation for treelog tree */ 556 struct btrfs_block_rsv treelog_rsv; 557 558 struct btrfs_block_rsv empty_block_rsv; 559 560 /* 561 * Updated while holding the lock 'trans_lock'. Due to the life cycle of 562 * a transaction, it can be directly read while holding a transaction 563 * handle, everywhere else must be read with btrfs_get_fs_generation(). 564 * Should always be updated using btrfs_set_fs_generation(). 565 */ 566 u64 generation; 567 /* 568 * Always use btrfs_get_last_trans_committed() and 569 * btrfs_set_last_trans_committed() to read and update this field. 570 */ 571 u64 last_trans_committed; 572 /* 573 * Generation of the last transaction used for block group relocation 574 * since the filesystem was last mounted (or 0 if none happened yet). 575 * Must be written and read while holding btrfs_fs_info::commit_root_sem. 576 */ 577 u64 last_reloc_trans; 578 579 /* 580 * This is updated to the current trans every time a full commit is 581 * required instead of the faster short fsync log commits 582 */ 583 u64 last_trans_log_full_commit; 584 unsigned long long mount_opt; 585 586 /* Compress related structures. */ 587 void *compr_wsm[BTRFS_NR_COMPRESS_TYPES]; 588 589 int compress_type; 590 int compress_level; 591 u32 commit_interval; 592 /* 593 * It is a suggestive number, the read side is safe even it gets a 594 * wrong number because we will write out the data into a regular 595 * extent. The write side(mount/remount) is under ->s_umount lock, 596 * so it is also safe. 597 */ 598 u64 max_inline; 599 600 struct btrfs_transaction *running_transaction; 601 wait_queue_head_t transaction_throttle; 602 wait_queue_head_t transaction_wait; 603 wait_queue_head_t transaction_blocked_wait; 604 wait_queue_head_t async_submit_wait; 605 606 /* 607 * Used to protect the incompat_flags, compat_flags, compat_ro_flags 608 * when they are updated. 609 * 610 * Because we do not clear the flags for ever, so we needn't use 611 * the lock on the read side. 612 * 613 * We also needn't use the lock when we mount the fs, because 614 * there is no other task which will update the flag. 615 */ 616 spinlock_t super_lock; 617 struct btrfs_super_block *super_copy; 618 struct btrfs_super_block *super_for_commit; 619 struct super_block *sb; 620 struct inode *btree_inode; 621 struct mutex tree_log_mutex; 622 struct mutex transaction_kthread_mutex; 623 struct mutex cleaner_mutex; 624 struct mutex chunk_mutex; 625 struct mutex remap_mutex; 626 627 /* 628 * This is taken to make sure we don't set block groups ro after the 629 * free space cache has been allocated on them. 630 */ 631 struct mutex ro_block_group_mutex; 632 633 /* 634 * This is used during read/modify/write to make sure no two ios are 635 * trying to mod the same stripe at the same time. 636 */ 637 struct btrfs_stripe_hash_table *stripe_hash_table; 638 639 /* 640 * This protects the ordered operations list only while we are 641 * processing all of the entries on it. This way we make sure the 642 * commit code doesn't find the list temporarily empty because another 643 * function happens to be doing non-waiting preflush before jumping 644 * into the main commit. 645 */ 646 struct mutex ordered_operations_mutex; 647 648 struct rw_semaphore commit_root_sem; 649 650 struct rw_semaphore cleanup_work_sem; 651 652 struct rw_semaphore subvol_sem; 653 654 spinlock_t trans_lock; 655 /* 656 * The reloc mutex goes with the trans lock, it is taken during commit 657 * to protect us from the relocation code. 658 */ 659 struct mutex reloc_mutex; 660 /* Protects setting, clearing and getting fs_info->reloc_ctl. */ 661 spinlock_t reloc_ctl_lock; 662 663 struct list_head trans_list; 664 struct list_head dead_roots; 665 struct list_head caching_block_groups; 666 667 spinlock_t delayed_iput_lock; 668 struct list_head delayed_iputs; 669 atomic_t nr_delayed_iputs; 670 wait_queue_head_t delayed_iputs_wait; 671 672 atomic64_t tree_mod_seq; 673 674 /* This protects tree_mod_log and tree_mod_seq_list */ 675 rwlock_t tree_mod_log_lock; 676 struct rb_root tree_mod_log; 677 struct list_head tree_mod_seq_list; 678 679 atomic_t async_delalloc_pages; 680 681 /* This is used to protect the following list -- ordered_roots. */ 682 spinlock_t ordered_root_lock; 683 684 /* 685 * All fs/file tree roots in which there are data=ordered extents 686 * pending writeback are added into this list. 687 * 688 * These can span multiple transactions and basically include every 689 * dirty data page that isn't from nodatacow. 690 */ 691 struct list_head ordered_roots; 692 693 struct mutex delalloc_root_mutex; 694 spinlock_t delalloc_root_lock; 695 /* All fs/file tree roots that have delalloc inodes. */ 696 struct list_head delalloc_roots; 697 698 /* 699 * There is a pool of worker threads for checksumming during writes and 700 * a pool for checksumming after reads. This is because readers can 701 * run with FS locks held, and the writers may be waiting for those 702 * locks. We don't want ordering in the pending list to cause 703 * deadlocks, and so the two are serviced separately. 704 * 705 * A third pool does submit_bio to avoid deadlocking with the other two. 706 */ 707 struct btrfs_workqueue *workers; 708 struct btrfs_workqueue *delalloc_workers; 709 struct btrfs_workqueue *flush_workers; 710 struct workqueue_struct *endio_workers; 711 struct workqueue_struct *endio_meta_workers; 712 struct workqueue_struct *rmw_workers; 713 struct btrfs_workqueue *endio_write_workers; 714 struct btrfs_workqueue *endio_freespace_worker; 715 struct btrfs_workqueue *caching_workers; 716 struct btrfs_workqueue *delayed_workers; 717 718 struct task_struct *transaction_kthread; 719 struct task_struct *cleaner_kthread; 720 u32 thread_pool_size; 721 722 struct kobject *space_info_kobj; 723 struct kobject *qgroups_kobj; 724 struct kobject *discard_kobj; 725 726 /* Track the number of blocks (sectors) read by the filesystem. */ 727 struct percpu_counter stats_read_blocks; 728 729 /* Used to keep from writing metadata until there is a nice batch */ 730 struct percpu_counter dirty_metadata_bytes; 731 struct percpu_counter delalloc_bytes; 732 struct percpu_counter ordered_bytes; 733 s32 dirty_metadata_batch; 734 s32 delalloc_batch; 735 736 struct percpu_counter evictable_extent_maps; 737 u64 em_shrinker_last_root; 738 u64 em_shrinker_last_ino; 739 atomic64_t em_shrinker_nr_to_scan; 740 struct work_struct em_shrinker_work; 741 742 /* Protected by 'trans_lock'. */ 743 struct list_head dirty_cowonly_roots; 744 745 struct btrfs_fs_devices *fs_devices; 746 747 /* 748 * The space_info list is effectively read only after initial setup. 749 * It is populated at mount time and cleaned up after all block groups 750 * are removed. RCU is used to protect it. 751 */ 752 struct list_head space_info; 753 754 struct btrfs_space_info *data_sinfo; 755 756 struct reloc_control *reloc_ctl; 757 758 /* data_alloc_cluster is only used in ssd_spread mode */ 759 struct btrfs_free_cluster data_alloc_cluster; 760 761 /* All metadata allocations go through this cluster. */ 762 struct btrfs_free_cluster meta_alloc_cluster; 763 764 /* Auto defrag inodes go here. */ 765 spinlock_t defrag_inodes_lock; 766 struct rb_root defrag_inodes; 767 atomic_t defrag_running; 768 769 /* Used to protect avail_{data, metadata, system}_alloc_bits */ 770 seqlock_t profiles_lock; 771 /* 772 * These three are in extended format (availability of single chunks is 773 * denoted by BTRFS_AVAIL_ALLOC_BIT_SINGLE bit, other types are denoted 774 * by corresponding BTRFS_BLOCK_GROUP_* bits) 775 */ 776 u64 avail_data_alloc_bits; 777 u64 avail_metadata_alloc_bits; 778 u64 avail_system_alloc_bits; 779 780 /* Balance state */ 781 spinlock_t balance_lock; 782 struct mutex balance_mutex; 783 atomic_t balance_pause_req; 784 atomic_t balance_cancel_req; 785 struct btrfs_balance_control *balance_ctl; 786 wait_queue_head_t balance_wait_q; 787 788 /* Cancellation requests for chunk relocation */ 789 atomic_t reloc_cancel_req; 790 791 u32 data_chunk_allocations; 792 u32 metadata_ratio; 793 794 /* Private scrub information */ 795 struct mutex scrub_lock; 796 atomic_t scrubs_running; 797 atomic_t scrub_pause_req; 798 atomic_t scrubs_paused; 799 atomic_t scrub_cancel_req; 800 wait_queue_head_t scrub_pause_wait; 801 /* 802 * The worker pointers are NULL iff the refcount is 0, ie. scrub is not 803 * running. 804 */ 805 refcount_t scrub_workers_refcnt; 806 struct workqueue_struct *scrub_workers; 807 808 struct btrfs_discard_ctl discard_ctl; 809 810 /* Is qgroup tracking in a consistent state? */ 811 u64 qgroup_flags; 812 813 /* Holds configuration and tracking. Protected by qgroup_lock. */ 814 struct rb_root qgroup_tree; 815 spinlock_t qgroup_lock; 816 817 /* 818 * Protect user change for quota operations. If a transaction is needed, 819 * it must be started before locking this lock. 820 */ 821 struct mutex qgroup_ioctl_lock; 822 823 /* List of dirty qgroups to be written at next commit. */ 824 struct list_head dirty_qgroups; 825 826 /* Used by qgroup for an efficient tree traversal. */ 827 u64 qgroup_seq; 828 829 /* Qgroup rescan items. */ 830 /* Protects the progress item */ 831 struct mutex qgroup_rescan_lock; 832 struct btrfs_key qgroup_rescan_progress; 833 struct btrfs_workqueue *qgroup_rescan_workers; 834 struct completion qgroup_rescan_completion; 835 struct btrfs_work qgroup_rescan_work; 836 /* Protected by qgroup_rescan_lock */ 837 bool qgroup_rescan_running; 838 u8 qgroup_drop_subtree_thres; 839 u64 qgroup_enable_gen; 840 841 /* 842 * If this is not 0, then it indicates a serious filesystem error has 843 * happened and it contains that error (negative errno value). 844 */ 845 int fs_error; 846 847 /* Filesystem state */ 848 unsigned long fs_state; 849 850 struct btrfs_delayed_root delayed_root; 851 852 /* Entries are eb->start >> nodesize_bits */ 853 struct xarray buffer_tree; 854 855 /* Next backup root to be overwritten */ 856 int backup_root_index; 857 858 /* Device replace state */ 859 struct btrfs_dev_replace dev_replace; 860 861 struct semaphore uuid_tree_rescan_sem; 862 863 /* Used to reclaim the metadata space in the background. */ 864 struct work_struct async_reclaim_work; 865 struct work_struct async_data_reclaim_work; 866 struct work_struct preempt_reclaim_work; 867 868 /* Reclaim partially filled block groups in the background */ 869 struct work_struct reclaim_bgs_work; 870 /* Protected by unused_bgs_lock. */ 871 struct list_head reclaim_bgs; 872 int bg_reclaim_threshold; 873 874 /* Protects the lists unused_bgs, reclaim_bgs, and fully_remapped_bgs. */ 875 spinlock_t unused_bgs_lock; 876 /* Protected by unused_bgs_lock. */ 877 struct list_head unused_bgs; 878 struct list_head fully_remapped_bgs; 879 struct mutex unused_bg_unpin_mutex; 880 /* Protect block groups that are going to be deleted */ 881 struct mutex reclaim_bgs_lock; 882 883 /* Cached block sizes */ 884 u32 nodesize; 885 u32 nodesize_bits; 886 u32 sectorsize; 887 /* ilog2 of sectorsize, use to avoid 64bit division */ 888 u32 sectorsize_bits; 889 u32 block_min_order; 890 u32 block_max_order; 891 u32 stripesize; 892 u32 writeback_bio_size; 893 u32 csum_size; 894 u32 csums_per_leaf; 895 u32 csum_type; 896 897 /* 898 * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular 899 * filesystem, on zoned it depends on the device constraints. 900 */ 901 u64 max_extent_size; 902 903 /* Block groups and devices containing active swapfiles. */ 904 spinlock_t swapfile_pins_lock; 905 struct rb_root swapfile_pins; 906 907 /* Type of exclusive operation running, protected by super_lock */ 908 enum btrfs_exclusive_operation exclusive_operation; 909 910 /* 911 * Zone size > 0 when in ZONED mode, otherwise it's used for a check 912 * if the mode is enabled 913 */ 914 u64 zone_size; 915 916 /* Constraints for ZONE_APPEND commands: */ 917 struct queue_limits limits; 918 u64 max_zone_append_size; 919 920 struct mutex zoned_meta_io_lock; 921 spinlock_t treelog_bg_lock; 922 u64 treelog_bg; 923 924 /* 925 * Start of the dedicated data relocation block group, protected by 926 * relocation_bg_lock. 927 */ 928 spinlock_t relocation_bg_lock; 929 u64 data_reloc_bg; 930 struct mutex zoned_data_reloc_io_lock; 931 932 struct btrfs_block_group *active_meta_bg; 933 struct btrfs_block_group *active_system_bg; 934 935 u64 nr_global_roots; 936 937 spinlock_t zone_active_bgs_lock; 938 struct list_head zone_active_bgs; 939 940 /* Updates are not protected by any lock */ 941 struct btrfs_commit_stats commit_stats; 942 943 /* 944 * Last generation where we dropped a non-relocation root. 945 * Use btrfs_set_last_root_drop_gen() and btrfs_get_last_root_drop_gen() 946 * to change it and to read it, respectively. 947 */ 948 u64 last_root_drop_gen; 949 950 /* 951 * Annotations for transaction events (structures are empty when 952 * compiled without lockdep). 953 */ 954 struct lockdep_map btrfs_trans_num_writers_map; 955 struct lockdep_map btrfs_trans_num_extwriters_map; 956 struct lockdep_map btrfs_state_change_map[4]; 957 struct lockdep_map btrfs_trans_pending_ordered_map; 958 struct lockdep_map btrfs_ordered_extent_map; 959 960 #ifdef CONFIG_BTRFS_DEBUG 961 spinlock_t ref_verify_lock; 962 struct rb_root block_tree; 963 964 struct kobject *debug_kobj; 965 struct list_head allocated_roots; 966 967 spinlock_t eb_leak_lock; 968 struct list_head allocated_ebs; 969 #endif 970 971 /* Used by self tests only. */ 972 bool (*use_bitmap)(struct btrfs_free_space_ctl *ctl, 973 struct btrfs_free_space *info); 974 }; 975 976 #define folio_to_inode(_folio) (BTRFS_I(_Generic((_folio), \ 977 struct folio *: (_folio))->mapping->host)) 978 979 #define folio_to_fs_info(_folio) (folio_to_inode(_folio)->root->fs_info) 980 981 #define inode_to_fs_info(_inode) (BTRFS_I(_Generic((_inode), \ 982 struct inode *: (_inode)))->root->fs_info) 983 984 static inline gfp_t btrfs_alloc_write_mask(const struct address_space *mapping) 985 { 986 return mapping_gfp_constraint(mapping, ~__GFP_FS); 987 } 988 989 /* Return the minimal folio size of the fs. */ 990 static inline unsigned int btrfs_min_folio_size(const struct btrfs_fs_info *fs_info) 991 { 992 return 1U << (PAGE_SHIFT + fs_info->block_min_order); 993 } 994 995 static inline u64 btrfs_get_fs_generation(const struct btrfs_fs_info *fs_info) 996 { 997 return READ_ONCE(fs_info->generation); 998 } 999 1000 static inline void btrfs_set_fs_generation(struct btrfs_fs_info *fs_info, u64 gen) 1001 { 1002 WRITE_ONCE(fs_info->generation, gen); 1003 } 1004 1005 static inline u64 btrfs_get_last_trans_committed(const struct btrfs_fs_info *fs_info) 1006 { 1007 return READ_ONCE(fs_info->last_trans_committed); 1008 } 1009 1010 static inline void btrfs_set_last_trans_committed(struct btrfs_fs_info *fs_info, u64 gen) 1011 { 1012 WRITE_ONCE(fs_info->last_trans_committed, gen); 1013 } 1014 1015 static inline void btrfs_set_last_root_drop_gen(struct btrfs_fs_info *fs_info, 1016 u64 gen) 1017 { 1018 WRITE_ONCE(fs_info->last_root_drop_gen, gen); 1019 } 1020 1021 static inline u64 btrfs_get_last_root_drop_gen(const struct btrfs_fs_info *fs_info) 1022 { 1023 return READ_ONCE(fs_info->last_root_drop_gen); 1024 } 1025 1026 /* 1027 * Take the number of bytes to be checksummed and figure out how many leaves 1028 * it would require to store the csums for that many bytes. 1029 */ 1030 static inline u64 btrfs_csum_bytes_to_leaves( 1031 const struct btrfs_fs_info *fs_info, u64 csum_bytes) 1032 { 1033 const u64 num_csums = csum_bytes >> fs_info->sectorsize_bits; 1034 1035 return DIV_ROUND_UP_ULL(num_csums, fs_info->csums_per_leaf); 1036 } 1037 1038 /* 1039 * Use this if we would be adding new items, as we could split nodes as we cow 1040 * down the tree. 1041 */ 1042 static inline u64 btrfs_calc_insert_metadata_size(const struct btrfs_fs_info *fs_info, 1043 unsigned num_items) 1044 { 1045 return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * 2 * num_items; 1046 } 1047 1048 /* 1049 * Doing a truncate or a modification won't result in new nodes or leaves, just 1050 * what we need for COW. 1051 */ 1052 static inline u64 btrfs_calc_metadata_size(const struct btrfs_fs_info *fs_info, 1053 unsigned num_items) 1054 { 1055 return (u64)fs_info->nodesize * BTRFS_MAX_LEVEL * num_items; 1056 } 1057 1058 #define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \ 1059 sizeof(struct btrfs_item)) 1060 1061 #define BTRFS_BYTES_TO_BLKS(fs_info, bytes) ((bytes) >> (fs_info)->sectorsize_bits) 1062 1063 static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info) 1064 { 1065 return IS_ENABLED(CONFIG_BLK_DEV_ZONED) && fs_info->zone_size > 0; 1066 } 1067 1068 /* 1069 * Count how many fs_info->max_extent_size cover the @size 1070 */ 1071 static inline u32 count_max_extents(const struct btrfs_fs_info *fs_info, u64 size) 1072 { 1073 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 1074 if (!fs_info) 1075 return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE); 1076 #endif 1077 1078 return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size); 1079 } 1080 1081 static inline unsigned int btrfs_blocks_per_folio(const struct btrfs_fs_info *fs_info, 1082 const struct folio *folio) 1083 { 1084 return folio_size(folio) >> fs_info->sectorsize_bits; 1085 } 1086 1087 bool __attribute_const__ btrfs_supported_blocksize(u32 blocksize); 1088 bool btrfs_exclop_start(struct btrfs_fs_info *fs_info, 1089 enum btrfs_exclusive_operation type); 1090 bool btrfs_exclop_start_try_lock(struct btrfs_fs_info *fs_info, 1091 enum btrfs_exclusive_operation type); 1092 void btrfs_exclop_start_unlock(struct btrfs_fs_info *fs_info); 1093 void btrfs_exclop_finish(struct btrfs_fs_info *fs_info); 1094 void btrfs_exclop_balance(struct btrfs_fs_info *fs_info, 1095 enum btrfs_exclusive_operation op); 1096 1097 int btrfs_check_ioctl_vol_args_path(const struct btrfs_ioctl_vol_args *vol_args); 1098 1099 u16 btrfs_csum_type_size(u16 type); 1100 int btrfs_super_csum_size(const struct btrfs_super_block *s); 1101 const char *btrfs_super_csum_name(u16 csum_type); 1102 size_t __attribute_const__ btrfs_get_num_csums(void); 1103 struct btrfs_csum_ctx { 1104 u16 csum_type; 1105 union { 1106 u32 crc32; 1107 struct xxh64_state xxh64; 1108 struct sha256_ctx sha256; 1109 struct blake2b_ctx blake2b; 1110 }; 1111 }; 1112 void btrfs_csum(u16 csum_type, const u8 *data, size_t len, u8 *out); 1113 void btrfs_csum_init(struct btrfs_csum_ctx *ctx, u16 csum_type); 1114 void btrfs_csum_update(struct btrfs_csum_ctx *ctx, const u8 *data, size_t len); 1115 void btrfs_csum_final(struct btrfs_csum_ctx *ctx, u8 *out); 1116 1117 static inline bool btrfs_is_empty_uuid(const u8 *uuid) 1118 { 1119 return uuid_is_null((const uuid_t *)uuid); 1120 } 1121 1122 /* Compatibility and incompatibility defines */ 1123 void __btrfs_set_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, 1124 const char *name); 1125 void __btrfs_clear_fs_incompat(struct btrfs_fs_info *fs_info, u64 flag, 1126 const char *name); 1127 void __btrfs_set_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, 1128 const char *name); 1129 void __btrfs_clear_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag, 1130 const char *name); 1131 1132 #define __btrfs_fs_incompat(fs_info, flags) \ 1133 (!!(btrfs_super_incompat_flags((fs_info)->super_copy) & (flags))) 1134 1135 #define __btrfs_fs_compat_ro(fs_info, flags) \ 1136 (!!(btrfs_super_compat_ro_flags((fs_info)->super_copy) & (flags))) 1137 1138 #define btrfs_set_fs_incompat(__fs_info, opt) \ 1139 __btrfs_set_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) 1140 1141 #define btrfs_clear_fs_incompat(__fs_info, opt) \ 1142 __btrfs_clear_fs_incompat((__fs_info), BTRFS_FEATURE_INCOMPAT_##opt, #opt) 1143 1144 #define btrfs_fs_incompat(fs_info, opt) \ 1145 __btrfs_fs_incompat((fs_info), BTRFS_FEATURE_INCOMPAT_##opt) 1146 1147 #define btrfs_set_fs_compat_ro(__fs_info, opt) \ 1148 __btrfs_set_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) 1149 1150 #define btrfs_clear_fs_compat_ro(__fs_info, opt) \ 1151 __btrfs_clear_fs_compat_ro((__fs_info), BTRFS_FEATURE_COMPAT_RO_##opt, #opt) 1152 1153 #define btrfs_fs_compat_ro(fs_info, opt) \ 1154 __btrfs_fs_compat_ro((fs_info), BTRFS_FEATURE_COMPAT_RO_##opt) 1155 1156 #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1157 #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1158 #define btrfs_raw_test_opt(o, opt) ((o) & BTRFS_MOUNT_##opt) 1159 #define btrfs_test_opt(fs_info, opt) ((fs_info)->mount_opt & \ 1160 BTRFS_MOUNT_##opt) 1161 1162 static inline bool btrfs_fs_closing(const struct btrfs_fs_info *fs_info) 1163 { 1164 return unlikely(test_bit(BTRFS_FS_CLOSING_START, &fs_info->flags)); 1165 } 1166 1167 static inline bool btrfs_fs_closing_done(const struct btrfs_fs_info *fs_info) 1168 { 1169 if (btrfs_fs_closing(fs_info) && test_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags)) 1170 return true; 1171 1172 return false; 1173 } 1174 1175 /* 1176 * If we remount the fs to be R/O or umount the fs, the cleaner needn't do 1177 * anything except sleeping. This function is used to check the status of 1178 * the fs. 1179 * We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount, 1180 * since setting and checking for SB_RDONLY in the superblock's flags is not 1181 * atomic. 1182 */ 1183 static inline int btrfs_need_cleaner_sleep(const struct btrfs_fs_info *fs_info) 1184 { 1185 return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) || 1186 btrfs_fs_closing(fs_info); 1187 } 1188 1189 static inline void btrfs_wake_unfinished_drop(struct btrfs_fs_info *fs_info) 1190 { 1191 clear_and_wake_up_bit(BTRFS_FS_UNFINISHED_DROPS, &fs_info->flags); 1192 } 1193 1194 #define BTRFS_FS_ERROR(fs_info) (READ_ONCE((fs_info)->fs_error)) 1195 1196 #define BTRFS_FS_LOG_CLEANUP_ERROR(fs_info) \ 1197 (unlikely(test_bit(BTRFS_FS_STATE_LOG_CLEANUP_ERROR, \ 1198 &(fs_info)->fs_state))) 1199 1200 static inline bool btrfs_is_shutdown(const struct btrfs_fs_info *fs_info) 1201 { 1202 return unlikely(test_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state)); 1203 } 1204 1205 static inline void btrfs_force_shutdown(struct btrfs_fs_info *fs_info) 1206 { 1207 /* 1208 * Here we do not want to use handle_fs_error(), which will mark the fs 1209 * read-only. 1210 * Some call sites like shutdown ioctl will mark the fs shutdown when 1211 * the fs is frozen. But thaw path will handle RO and RW fs 1212 * differently. 1213 * 1214 * So here we only mark the fs error without flipping it RO. 1215 */ 1216 WRITE_ONCE(fs_info->fs_error, -EIO); 1217 if (!test_and_set_bit(BTRFS_FS_STATE_EMERGENCY_SHUTDOWN, &fs_info->fs_state)) { 1218 btrfs_crit(fs_info, "emergency shutdown"); 1219 fserror_report_shutdown(fs_info->sb, GFP_KERNEL); 1220 } 1221 } 1222 1223 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 1224 1225 #define EXPORT_FOR_TESTS 1226 1227 static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info) 1228 { 1229 return unlikely(test_bit(BTRFS_FS_STATE_DUMMY_FS_INFO, &fs_info->fs_state)); 1230 } 1231 1232 void btrfs_test_destroy_inode(struct inode *inode); 1233 1234 #else 1235 1236 #define EXPORT_FOR_TESTS static 1237 1238 static inline bool btrfs_is_testing(const struct btrfs_fs_info *fs_info) 1239 { 1240 return false; 1241 } 1242 #endif 1243 1244 #endif 1245