1 /* SPDX-License-Identifier: GPL-2.0 */ 2 3 #ifndef BTRFS_EXTENT_IO_H 4 #define BTRFS_EXTENT_IO_H 5 6 #include <linux/rbtree.h> 7 #include <linux/refcount.h> 8 #include <linux/fiemap.h> 9 #include <linux/btrfs_tree.h> 10 #include "compression.h" 11 #include "ulist.h" 12 #include "misc.h" 13 14 struct btrfs_trans_handle; 15 16 enum { 17 EXTENT_BUFFER_UPTODATE, 18 EXTENT_BUFFER_DIRTY, 19 EXTENT_BUFFER_CORRUPT, 20 /* this got triggered by readahead */ 21 EXTENT_BUFFER_READAHEAD, 22 EXTENT_BUFFER_TREE_REF, 23 EXTENT_BUFFER_STALE, 24 EXTENT_BUFFER_WRITEBACK, 25 /* read IO error */ 26 EXTENT_BUFFER_READ_ERR, 27 EXTENT_BUFFER_UNMAPPED, 28 EXTENT_BUFFER_IN_TREE, 29 /* write IO error */ 30 EXTENT_BUFFER_WRITE_ERR, 31 /* Indicate the extent buffer is written zeroed out (for zoned) */ 32 EXTENT_BUFFER_ZONED_ZEROOUT, 33 /* Indicate that extent buffer pages a being read */ 34 EXTENT_BUFFER_READING, 35 }; 36 37 /* these are flags for __process_pages_contig */ 38 enum { 39 ENUM_BIT(PAGE_UNLOCK), 40 /* Page starts writeback, clear dirty bit and set writeback bit */ 41 ENUM_BIT(PAGE_START_WRITEBACK), 42 ENUM_BIT(PAGE_END_WRITEBACK), 43 ENUM_BIT(PAGE_SET_ORDERED), 44 }; 45 46 /* 47 * Folio private values. Every page that is controlled by the extent map has 48 * folio private set to this value. 49 */ 50 #define EXTENT_FOLIO_PRIVATE 1 51 52 /* 53 * The extent buffer bitmap operations are done with byte granularity instead of 54 * word granularity for two reasons: 55 * 1. The bitmaps must be little-endian on disk. 56 * 2. Bitmap items are not guaranteed to be aligned to a word and therefore a 57 * single word in a bitmap may straddle two pages in the extent buffer. 58 */ 59 #define BIT_BYTE(nr) ((nr) / BITS_PER_BYTE) 60 #define BYTE_MASK ((1 << BITS_PER_BYTE) - 1) 61 #define BITMAP_FIRST_BYTE_MASK(start) \ 62 ((BYTE_MASK << ((start) & (BITS_PER_BYTE - 1))) & BYTE_MASK) 63 #define BITMAP_LAST_BYTE_MASK(nbits) \ 64 (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) 65 66 struct btrfs_root; 67 struct btrfs_inode; 68 struct btrfs_fs_info; 69 struct extent_io_tree; 70 struct btrfs_tree_parent_check; 71 72 int __init extent_buffer_init_cachep(void); 73 void __cold extent_buffer_free_cachep(void); 74 75 #define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE) 76 struct extent_buffer { 77 u64 start; 78 unsigned long len; 79 unsigned long bflags; 80 struct btrfs_fs_info *fs_info; 81 82 /* 83 * The address where the eb can be accessed without any cross-page handling. 84 * This can be NULL if not possible. 85 */ 86 void *addr; 87 88 spinlock_t refs_lock; 89 atomic_t refs; 90 int read_mirror; 91 /* >= 0 if eb belongs to a log tree, -1 otherwise */ 92 s8 log_index; 93 struct rcu_head rcu_head; 94 95 struct rw_semaphore lock; 96 97 /* 98 * Pointers to all the folios of the extent buffer. 99 * 100 * For now the folio is always order 0 (aka, a single page). 101 */ 102 struct folio *folios[INLINE_EXTENT_BUFFER_PAGES]; 103 #ifdef CONFIG_BTRFS_DEBUG 104 struct list_head leak_list; 105 pid_t lock_owner; 106 #endif 107 }; 108 109 struct btrfs_eb_write_context { 110 struct writeback_control *wbc; 111 struct extent_buffer *eb; 112 /* Block group @eb resides in. Only used for zoned mode. */ 113 struct btrfs_block_group *zoned_bg; 114 }; 115 116 /* 117 * Get the correct offset inside the page of extent buffer. 118 * 119 * @eb: target extent buffer 120 * @start: offset inside the extent buffer 121 * 122 * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases. 123 */ 124 static inline size_t get_eb_offset_in_folio(const struct extent_buffer *eb, 125 unsigned long offset) 126 { 127 /* 128 * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case 129 * 1.1) One large folio covering the whole eb 130 * The eb->start is aligned to folio size, thus adding it 131 * won't cause any difference. 132 * 1.2) Several page sized folios 133 * The eb->start is aligned to folio (page) size, thus 134 * adding it won't cause any difference. 135 * 136 * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case 137 * In this case there would only be one page sized folio, and there 138 * may be several different extent buffers in the page/folio. 139 * We need to add eb->start to properly access the offset inside 140 * that eb. 141 */ 142 return offset_in_folio(eb->folios[0], offset + eb->start); 143 } 144 145 static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb, 146 unsigned long offset) 147 { 148 /* 149 * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case 150 * 1.1) One large folio covering the whole eb. 151 * the folio_shift would be large enough to always make us 152 * return 0 as index. 153 * 1.2) Several page sized folios 154 * The folio_shift() would be PAGE_SHIFT, giving us the correct 155 * index. 156 * 157 * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case 158 * The folio would only be page sized, and always give us 0 as index. 159 */ 160 return offset >> folio_shift(eb->folios[0]); 161 } 162 163 /* 164 * Structure to record how many bytes and which ranges are set/cleared 165 */ 166 struct extent_changeset { 167 /* How many bytes are set/cleared in this operation */ 168 u64 bytes_changed; 169 170 /* Changed ranges */ 171 struct ulist range_changed; 172 }; 173 174 static inline void extent_changeset_init(struct extent_changeset *changeset) 175 { 176 changeset->bytes_changed = 0; 177 ulist_init(&changeset->range_changed); 178 } 179 180 static inline struct extent_changeset *extent_changeset_alloc(void) 181 { 182 struct extent_changeset *ret; 183 184 ret = kmalloc(sizeof(*ret), GFP_KERNEL); 185 if (!ret) 186 return NULL; 187 188 extent_changeset_init(ret); 189 return ret; 190 } 191 192 static inline void extent_changeset_release(struct extent_changeset *changeset) 193 { 194 if (!changeset) 195 return; 196 changeset->bytes_changed = 0; 197 ulist_release(&changeset->range_changed); 198 } 199 200 static inline void extent_changeset_free(struct extent_changeset *changeset) 201 { 202 if (!changeset) 203 return; 204 extent_changeset_release(changeset); 205 kfree(changeset); 206 } 207 208 struct extent_map_tree; 209 210 int try_release_extent_mapping(struct page *page, gfp_t mask); 211 int try_release_extent_buffer(struct page *page); 212 213 int btrfs_read_folio(struct file *file, struct folio *folio); 214 void extent_write_locked_range(struct inode *inode, struct page *locked_page, 215 u64 start, u64 end, struct writeback_control *wbc, 216 bool pages_dirty); 217 int extent_writepages(struct address_space *mapping, 218 struct writeback_control *wbc); 219 int btree_write_cache_pages(struct address_space *mapping, 220 struct writeback_control *wbc); 221 void extent_readahead(struct readahead_control *rac); 222 int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, 223 u64 start, u64 len); 224 int set_page_extent_mapped(struct page *page); 225 void clear_page_extent_mapped(struct page *page); 226 227 struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, 228 u64 start, u64 owner_root, int level); 229 struct extent_buffer *__alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 230 u64 start, unsigned long len); 231 struct extent_buffer *alloc_dummy_extent_buffer(struct btrfs_fs_info *fs_info, 232 u64 start); 233 struct extent_buffer *btrfs_clone_extent_buffer(const struct extent_buffer *src); 234 struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info, 235 u64 start); 236 void free_extent_buffer(struct extent_buffer *eb); 237 void free_extent_buffer_stale(struct extent_buffer *eb); 238 #define WAIT_NONE 0 239 #define WAIT_COMPLETE 1 240 #define WAIT_PAGE_LOCK 2 241 int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, 242 struct btrfs_tree_parent_check *parent_check); 243 void wait_on_extent_buffer_writeback(struct extent_buffer *eb); 244 void btrfs_readahead_tree_block(struct btrfs_fs_info *fs_info, 245 u64 bytenr, u64 owner_root, u64 gen, int level); 246 void btrfs_readahead_node_child(struct extent_buffer *node, int slot); 247 248 static inline int num_extent_pages(const struct extent_buffer *eb) 249 { 250 /* 251 * For sectorsize == PAGE_SIZE case, since nodesize is always aligned to 252 * sectorsize, it's just eb->len >> PAGE_SHIFT. 253 * 254 * For sectorsize < PAGE_SIZE case, we could have nodesize < PAGE_SIZE, 255 * thus have to ensure we get at least one page. 256 */ 257 return (eb->len >> PAGE_SHIFT) ?: 1; 258 } 259 260 /* 261 * This can only be determined at runtime by checking eb::folios[0]. 262 * 263 * As we can have either one large folio covering the whole eb 264 * (either nodesize <= PAGE_SIZE, or high order folio), or multiple 265 * single-paged folios. 266 */ 267 static inline int num_extent_folios(const struct extent_buffer *eb) 268 { 269 if (folio_order(eb->folios[0])) 270 return 1; 271 return num_extent_pages(eb); 272 } 273 274 static inline int extent_buffer_uptodate(const struct extent_buffer *eb) 275 { 276 return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); 277 } 278 279 int memcmp_extent_buffer(const struct extent_buffer *eb, const void *ptrv, 280 unsigned long start, unsigned long len); 281 void read_extent_buffer(const struct extent_buffer *eb, void *dst, 282 unsigned long start, 283 unsigned long len); 284 int read_extent_buffer_to_user_nofault(const struct extent_buffer *eb, 285 void __user *dst, unsigned long start, 286 unsigned long len); 287 void write_extent_buffer(const struct extent_buffer *eb, const void *src, 288 unsigned long start, unsigned long len); 289 290 static inline void write_extent_buffer_chunk_tree_uuid( 291 const struct extent_buffer *eb, const void *chunk_tree_uuid) 292 { 293 write_extent_buffer(eb, chunk_tree_uuid, 294 offsetof(struct btrfs_header, chunk_tree_uuid), 295 BTRFS_FSID_SIZE); 296 } 297 298 static inline void write_extent_buffer_fsid(const struct extent_buffer *eb, 299 const void *fsid) 300 { 301 write_extent_buffer(eb, fsid, offsetof(struct btrfs_header, fsid), 302 BTRFS_FSID_SIZE); 303 } 304 305 void copy_extent_buffer_full(const struct extent_buffer *dst, 306 const struct extent_buffer *src); 307 void copy_extent_buffer(const struct extent_buffer *dst, 308 const struct extent_buffer *src, 309 unsigned long dst_offset, unsigned long src_offset, 310 unsigned long len); 311 void memcpy_extent_buffer(const struct extent_buffer *dst, 312 unsigned long dst_offset, unsigned long src_offset, 313 unsigned long len); 314 void memmove_extent_buffer(const struct extent_buffer *dst, 315 unsigned long dst_offset, unsigned long src_offset, 316 unsigned long len); 317 void memzero_extent_buffer(const struct extent_buffer *eb, unsigned long start, 318 unsigned long len); 319 int extent_buffer_test_bit(const struct extent_buffer *eb, unsigned long start, 320 unsigned long pos); 321 void extent_buffer_bitmap_set(const struct extent_buffer *eb, unsigned long start, 322 unsigned long pos, unsigned long len); 323 void extent_buffer_bitmap_clear(const struct extent_buffer *eb, 324 unsigned long start, unsigned long pos, 325 unsigned long len); 326 void set_extent_buffer_dirty(struct extent_buffer *eb); 327 void set_extent_buffer_uptodate(struct extent_buffer *eb); 328 void clear_extent_buffer_uptodate(struct extent_buffer *eb); 329 void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); 330 void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, 331 struct page *locked_page, 332 u32 bits_to_clear, unsigned long page_ops); 333 int extent_invalidate_folio(struct extent_io_tree *tree, 334 struct folio *folio, size_t offset); 335 void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, 336 struct extent_buffer *buf); 337 338 int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, 339 gfp_t extra_gfp); 340 341 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS 342 bool find_lock_delalloc_range(struct inode *inode, 343 struct page *locked_page, u64 *start, 344 u64 *end); 345 #endif 346 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info, 347 u64 start); 348 349 #ifdef CONFIG_BTRFS_DEBUG 350 void btrfs_extent_buffer_leak_debug_check(struct btrfs_fs_info *fs_info); 351 #else 352 #define btrfs_extent_buffer_leak_debug_check(fs_info) do {} while (0) 353 #endif 354 355 #endif 356