1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef BTRFS_SPACE_INFO_H 4 #define BTRFS_SPACE_INFO_H 5 6 #include <trace/events/btrfs.h> 7 #include <linux/spinlock.h> 8 #include <linux/list.h> 9 #include <linux/kobject.h> 10 #include <linux/lockdep.h> 11 #include <linux/wait.h> 12 #include <linux/rwsem.h> 13 #include "volumes.h" 14 15 struct btrfs_fs_info; 16 struct btrfs_block_group; 17 18 /* 19 * Different levels for to flush space when doing space reservations. 20 * 21 * The higher the level, the more methods we try to reclaim space. 22 */ 23 enum btrfs_reserve_flush_enum { 24 /* 25 * Used when we can't flush or don't need: 26 * 27 * 1) We are holding a transaction handle open, so we can't flush as 28 * that could deadlock. 29 * 30 * 2) For a nowait write we don't want to block when reserving delalloc. 31 * 32 * 3) Joining a transaction or attaching a transaction, we don't want 33 * to wait and we don't need to reserve anything (any needed space 34 * was reserved before in a dedicated block reserve, or we rely on 35 * the global block reserve, see btrfs_init_root_block_rsv()). 36 * 37 * 4) Starting a transaction when we don't need to reserve space, as 38 * we don't need it because we previously reserved in a dedicated 39 * block reserve or rely on the global block reserve, like the above 40 * case. 41 */ 42 BTRFS_RESERVE_NO_FLUSH, 43 44 /* 45 * Flush space by: 46 * - Running delayed inode items 47 * - Allocating a new chunk 48 */ 49 BTRFS_RESERVE_FLUSH_LIMIT, 50 51 /* 52 * Flush space by: 53 * - Running delayed inode items 54 * - Running delayed refs 55 * - Running delalloc and waiting for ordered extents 56 * - Allocating a new chunk 57 * - Committing transaction 58 */ 59 BTRFS_RESERVE_FLUSH_EVICT, 60 61 /* 62 * Flush space by above mentioned methods and by: 63 * - Running delayed iputs 64 * - Committing transaction 65 * 66 * Can be interrupted by a fatal signal. 67 */ 68 BTRFS_RESERVE_FLUSH_DATA, 69 BTRFS_RESERVE_FLUSH_FREE_SPACE_INODE, 70 BTRFS_RESERVE_FLUSH_ALL, 71 72 /* 73 * Pretty much the same as FLUSH_ALL, but can also steal space from 74 * global rsv. 75 * 76 * Can be interrupted by a fatal signal. 77 */ 78 BTRFS_RESERVE_FLUSH_ALL_STEAL, 79 80 /* 81 * This is for btrfs_use_block_rsv only. We have exhausted our block 82 * rsv and our global block rsv. This can happen for things like 83 * delalloc where we are overwriting a lot of extents with a single 84 * extent and didn't reserve enough space. Alternatively it can happen 85 * with delalloc where we reserve 1 extents worth for a large extent but 86 * fragmentation leads to multiple extents being created. This will 87 * give us the reservation in the case of 88 * 89 * if (num_bytes < (space_info->total_bytes - 90 * btrfs_space_info_used(space_info, false)) 91 * 92 * Which ignores bytes_may_use. This is potentially dangerous, but our 93 * reservation system is generally pessimistic so is able to absorb this 94 * style of mistake. 95 */ 96 BTRFS_RESERVE_FLUSH_EMERGENCY, 97 }; 98 99 /* 100 * Please be aware that the order of enum values will be the order of the reclaim 101 * process in btrfs_async_reclaim_metadata_space(). 102 */ 103 enum btrfs_flush_state { 104 FLUSH_DELAYED_ITEMS_NR = 1, 105 FLUSH_DELAYED_ITEMS = 2, 106 FLUSH_DELAYED_REFS_NR = 3, 107 FLUSH_DELAYED_REFS = 4, 108 FLUSH_DELALLOC = 5, 109 FLUSH_DELALLOC_WAIT = 6, 110 FLUSH_DELALLOC_FULL = 7, 111 ALLOC_CHUNK = 8, 112 ALLOC_CHUNK_FORCE = 9, 113 RUN_DELAYED_IPUTS = 10, 114 COMMIT_TRANS = 11, 115 RESET_ZONES = 12, 116 RECLAIM_ZONES = 13, 117 }; 118 119 enum btrfs_space_info_sub_group { 120 BTRFS_SUB_GROUP_PRIMARY, 121 BTRFS_SUB_GROUP_DATA_RELOC, 122 BTRFS_SUB_GROUP_TREELOG, 123 }; 124 125 #define BTRFS_SPACE_INFO_SUB_GROUP_MAX 1 126 struct btrfs_space_info { 127 struct btrfs_fs_info *fs_info; 128 struct btrfs_space_info *parent; 129 struct btrfs_space_info *sub_group[BTRFS_SPACE_INFO_SUB_GROUP_MAX]; 130 int subgroup_id; 131 spinlock_t lock; 132 133 u64 total_bytes; /* total bytes in the space, 134 this doesn't take mirrors into account */ 135 u64 bytes_used; /* total bytes used, 136 this doesn't take mirrors into account */ 137 u64 bytes_pinned; /* total bytes pinned, will be freed when the 138 transaction finishes */ 139 u64 bytes_reserved; /* total bytes the allocator has reserved for 140 current allocations */ 141 u64 bytes_may_use; /* number of bytes that may be used for 142 delalloc/allocations */ 143 u64 bytes_readonly; /* total bytes that are read only */ 144 u64 bytes_zone_unusable; /* total bytes that are unusable until 145 resetting the device zone */ 146 147 u64 max_extent_size; /* This will hold the maximum extent size of 148 the space info if we had an ENOSPC in the 149 allocator. */ 150 /* Chunk size in bytes */ 151 u64 chunk_size; 152 153 /* 154 * Once a block group drops below this threshold (percents) we'll 155 * schedule it for reclaim. 156 */ 157 int bg_reclaim_threshold; 158 159 int clamp; /* Used to scale our threshold for preemptive 160 flushing. The value is >> clamp, so turns 161 out to be a 2^clamp divisor. */ 162 163 bool full; /* indicates that we cannot allocate any more 164 chunks for this space */ 165 bool chunk_alloc; /* set if we are allocating a chunk */ 166 167 bool flush; /* set if we are trying to make space */ 168 169 unsigned int force_alloc; /* set if we need to force a chunk 170 alloc for this space */ 171 172 u64 disk_used; /* total bytes used on disk */ 173 u64 disk_total; /* total bytes on disk, takes mirrors into 174 account */ 175 176 u64 flags; 177 178 struct list_head list; 179 /* Protected by the spinlock 'lock'. */ 180 struct list_head ro_bgs; 181 struct list_head priority_tickets; 182 struct list_head tickets; 183 184 /* 185 * Size of space that needs to be reclaimed in order to satisfy pending 186 * tickets 187 */ 188 u64 reclaim_size; 189 190 /* 191 * tickets_id just indicates the next ticket will be handled, so note 192 * it's not stored per ticket. 193 */ 194 u64 tickets_id; 195 196 struct rw_semaphore groups_sem; 197 /* for block groups in our same type */ 198 struct list_head block_groups[BTRFS_NR_RAID_TYPES]; 199 200 struct kobject kobj; 201 struct kobject *block_group_kobjs[BTRFS_NR_RAID_TYPES]; 202 203 /* 204 * Monotonically increasing counter of block group reclaim attempts 205 * Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_count 206 */ 207 u64 reclaim_count; 208 209 /* 210 * Monotonically increasing counter of reclaimed bytes 211 * Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_bytes 212 */ 213 u64 reclaim_bytes; 214 215 /* 216 * Monotonically increasing counter of reclaim errors 217 * Exposed in /sys/fs/<uuid>/allocation/<type>/reclaim_errors 218 */ 219 u64 reclaim_errors; 220 221 /* 222 * If true, use the dynamic relocation threshold, instead of the 223 * fixed bg_reclaim_threshold. 224 */ 225 bool dynamic_reclaim; 226 227 /* 228 * Periodically check all block groups against the reclaim 229 * threshold in the cleaner thread. 230 */ 231 bool periodic_reclaim; 232 233 /* 234 * Periodic reclaim should be a no-op if a space_info hasn't 235 * freed any space since the last time we tried. 236 */ 237 bool periodic_reclaim_ready; 238 239 /* 240 * Net bytes freed or allocated since the last reclaim pass. 241 */ 242 s64 reclaimable_bytes; 243 }; 244 245 static inline bool btrfs_mixed_space_info(const struct btrfs_space_info *space_info) 246 { 247 return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) && 248 (space_info->flags & BTRFS_BLOCK_GROUP_DATA)); 249 } 250 251 /* 252 * 253 * Declare a helper function to detect underflow of various space info members 254 */ 255 #define DECLARE_SPACE_INFO_UPDATE(name, trace_name) \ 256 static inline void \ 257 btrfs_space_info_update_##name(struct btrfs_space_info *sinfo, \ 258 s64 bytes) \ 259 { \ 260 struct btrfs_fs_info *fs_info = sinfo->fs_info; \ 261 const u64 abs_bytes = (bytes < 0) ? -bytes : bytes; \ 262 lockdep_assert_held(&sinfo->lock); \ 263 trace_update_##name(fs_info, sinfo, sinfo->name, bytes); \ 264 trace_btrfs_space_reservation(fs_info, trace_name, \ 265 sinfo->flags, abs_bytes, \ 266 bytes > 0); \ 267 if (bytes < 0 && sinfo->name < -bytes) { \ 268 WARN_ON(1); \ 269 sinfo->name = 0; \ 270 return; \ 271 } \ 272 sinfo->name += bytes; \ 273 } 274 275 DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info"); 276 DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned"); 277 DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable"); 278 279 static inline u64 btrfs_space_info_used(const struct btrfs_space_info *s_info, 280 bool may_use_included) 281 { 282 lockdep_assert_held(&s_info->lock); 283 284 return s_info->bytes_used + s_info->bytes_reserved + 285 s_info->bytes_pinned + s_info->bytes_readonly + 286 s_info->bytes_zone_unusable + 287 (may_use_included ? s_info->bytes_may_use : 0); 288 } 289 290 int btrfs_init_space_info(struct btrfs_fs_info *fs_info); 291 void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info, 292 struct btrfs_block_group *block_group); 293 void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info, 294 u64 chunk_size); 295 struct btrfs_space_info *btrfs_find_space_info(const struct btrfs_fs_info *info, 296 u64 flags); 297 void btrfs_clear_space_info_full(struct btrfs_fs_info *info); 298 void btrfs_dump_space_info(struct btrfs_space_info *info, u64 bytes, 299 bool dump_block_groups); 300 int btrfs_reserve_metadata_bytes(struct btrfs_space_info *space_info, 301 u64 orig_bytes, 302 enum btrfs_reserve_flush_enum flush); 303 void btrfs_try_granting_tickets(struct btrfs_space_info *space_info); 304 bool btrfs_can_overcommit(const struct btrfs_space_info *space_info, u64 bytes, 305 enum btrfs_reserve_flush_enum flush); 306 307 static inline void btrfs_space_info_free_bytes_may_use( 308 struct btrfs_space_info *space_info, 309 u64 num_bytes) 310 { 311 spin_lock(&space_info->lock); 312 btrfs_space_info_update_bytes_may_use(space_info, -num_bytes); 313 btrfs_try_granting_tickets(space_info); 314 spin_unlock(&space_info->lock); 315 } 316 int btrfs_reserve_data_bytes(struct btrfs_space_info *space_info, u64 bytes, 317 enum btrfs_reserve_flush_enum flush); 318 void btrfs_dump_space_info_for_trans_abort(struct btrfs_fs_info *fs_info); 319 void btrfs_init_async_reclaim_work(struct btrfs_fs_info *fs_info); 320 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo); 321 322 void btrfs_space_info_update_reclaimable(struct btrfs_space_info *space_info, s64 bytes); 323 void btrfs_set_periodic_reclaim_ready(struct btrfs_space_info *space_info, bool ready); 324 int btrfs_calc_reclaim_threshold(const struct btrfs_space_info *space_info); 325 void btrfs_reclaim_sweep(const struct btrfs_fs_info *fs_info); 326 void btrfs_return_free_space(struct btrfs_space_info *space_info, u64 len); 327 328 static inline const char *btrfs_space_info_type_str(const struct btrfs_space_info *space_info) 329 { 330 switch (space_info->flags) { 331 case BTRFS_BLOCK_GROUP_SYSTEM: 332 return "SYSTEM"; 333 case BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA: 334 return "DATA+METADATA"; 335 case BTRFS_BLOCK_GROUP_DATA: 336 return "DATA"; 337 case BTRFS_BLOCK_GROUP_METADATA: 338 return "METADATA"; 339 default: 340 return "UNKNOWN"; 341 } 342 } 343 344 #endif /* BTRFS_SPACE_INFO_H */ 345