Lines Matching +full:cache +full:- +full:block +full:- +full:size

1 // SPDX-License-Identifier: GPL-2.0
12 #include "disk-io.h"
13 #include "block-group.h"
14 #include "dev-replace.h"
15 #include "space-info.h"
25 #define WP_MISSING_DEV ((u64)-1)
27 #define WP_CONVENTIONAL ((u64)-2)
32 * - primary superblock: 0B (zone 0)
33 * - first copy: 512G (zone starting at that offset)
34 * - second copy: 4T (zone starting at that offset)
52 * - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
53 * - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
54 * - 1 zone for tree-log dedicated block group
55 * - 1 zone for relocation
60 * Minimum / maximum supported zone size. Currently, SMR disks have a zone
61 * size of 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range.
62 * We do not expect the zone size to become larger than 8GiB or smaller than
75 return (zone->cond == BLK_ZONE_COND_FULL) || in sb_zone_is_full()
76 (zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity); in sb_zone_is_full()
121 return -ENOENT; in sb_write_pointer()
124 struct address_space *mapping = bdev->bd_mapping; in sb_write_pointer()
130 u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - in sb_write_pointer()
156 return -EUCLEAN; in sb_write_pointer()
172 case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; in sb_zone_number()
173 case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; in sb_zone_number()
190 return (u64)zone_number << zone_info->zone_size_shift; in zone_start_physical()
194 * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
201 const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT; in emulate_report_zones()
202 sector_t bdev_size = bdev_nr_sectors(device->bdev); in emulate_report_zones()
226 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_get_dev_zones()
232 if (!bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zones()
238 /* Check cache */ in btrfs_get_dev_zones()
239 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
243 ASSERT(IS_ALIGNED(pos, zinfo->zone_size)); in btrfs_get_dev_zones()
244 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
249 *nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno); in btrfs_get_dev_zones()
254 zone_info = &zinfo->zone_cache[zno + i]; in btrfs_get_dev_zones()
255 if (!zone_info->len) in btrfs_get_dev_zones()
260 /* Cache hit on all the zones */ in btrfs_get_dev_zones()
261 memcpy(zones, zinfo->zone_cache + zno, in btrfs_get_dev_zones()
262 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
267 ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, in btrfs_get_dev_zones()
270 btrfs_err(device->fs_info, in btrfs_get_dev_zones()
272 pos, rcu_dereference(device->name), in btrfs_get_dev_zones()
273 device->devid); in btrfs_get_dev_zones()
278 return -EIO; in btrfs_get_dev_zones()
280 /* Populate cache */ in btrfs_get_dev_zones()
281 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
282 u32 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
284 memcpy(zinfo->zone_cache + zno, zones, in btrfs_get_dev_zones()
285 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
291 /* The emulated zone size is determined from the size of device extent */
295 struct btrfs_root *root = fs_info->dev_root; in calculate_emulated_zone_size()
307 return -ENOMEM; in calculate_emulated_zone_size()
313 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { in calculate_emulated_zone_size()
319 return -EUCLEAN; in calculate_emulated_zone_size()
322 leaf = path->nodes[0]; in calculate_emulated_zone_size()
323 dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); in calculate_emulated_zone_size()
324 fs_info->zone_size = btrfs_dev_extent_length(leaf, dext); in calculate_emulated_zone_size()
330 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_get_dev_zone_info_all_devices()
334 /* fs_info->zone_size might not set yet. Use the incomapt flag here. */ in btrfs_get_dev_zone_info_all_devices()
338 mutex_lock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
339 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_get_dev_zone_info_all_devices()
341 if (!device->bdev) in btrfs_get_dev_zone_info_all_devices()
348 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
355 struct btrfs_fs_info *fs_info = device->fs_info; in btrfs_get_dev_zone_info()
357 struct block_device *bdev = device->bdev; in btrfs_get_dev_zone_info()
375 if (device->zone_info) in btrfs_get_dev_zone_info()
380 return -ENOMEM; in btrfs_get_dev_zone_info()
382 device->zone_info = zone_info; in btrfs_get_dev_zone_info()
385 if (!fs_info->zone_size) { in btrfs_get_dev_zone_info()
391 ASSERT(fs_info->zone_size); in btrfs_get_dev_zone_info()
392 zone_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_get_dev_zone_info()
398 zone_info->zone_size = zone_sectors << SECTOR_SHIFT; in btrfs_get_dev_zone_info()
400 /* We reject devices with a zone size larger than 8GB */ in btrfs_get_dev_zone_info()
401 if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { in btrfs_get_dev_zone_info()
403 "zoned: %s: zone size %llu larger than supported maximum %llu", in btrfs_get_dev_zone_info()
404 rcu_dereference(device->name), in btrfs_get_dev_zone_info()
405 zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); in btrfs_get_dev_zone_info()
406 ret = -EINVAL; in btrfs_get_dev_zone_info()
408 } else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) { in btrfs_get_dev_zone_info()
410 "zoned: %s: zone size %llu smaller than supported minimum %u", in btrfs_get_dev_zone_info()
411 rcu_dereference(device->name), in btrfs_get_dev_zone_info()
412 zone_info->zone_size, BTRFS_MIN_ZONE_SIZE); in btrfs_get_dev_zone_info()
413 ret = -EINVAL; in btrfs_get_dev_zone_info()
418 zone_info->zone_size_shift = ilog2(zone_info->zone_size); in btrfs_get_dev_zone_info()
419 zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); in btrfs_get_dev_zone_info()
421 zone_info->nr_zones++; in btrfs_get_dev_zone_info()
425 if (!max_active_zones && zone_info->nr_zones > BTRFS_DEFAULT_MAX_ACTIVE_ZONES) in btrfs_get_dev_zone_info()
430 rcu_dereference(device->name), max_active_zones, in btrfs_get_dev_zone_info()
432 ret = -EINVAL; in btrfs_get_dev_zone_info()
435 zone_info->max_active_zones = max_active_zones; in btrfs_get_dev_zone_info()
437 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
438 if (!zone_info->seq_zones) { in btrfs_get_dev_zone_info()
439 ret = -ENOMEM; in btrfs_get_dev_zone_info()
443 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
444 if (!zone_info->empty_zones) { in btrfs_get_dev_zone_info()
445 ret = -ENOMEM; in btrfs_get_dev_zone_info()
449 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
450 if (!zone_info->active_zones) { in btrfs_get_dev_zone_info()
451 ret = -ENOMEM; in btrfs_get_dev_zone_info()
457 ret = -ENOMEM; in btrfs_get_dev_zone_info()
462 * Enable zone cache only for a zoned device. On a non-zoned device, we in btrfs_get_dev_zone_info()
464 * use the cache. in btrfs_get_dev_zone_info()
466 if (populate_cache && bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zone_info()
467 zone_info->zone_cache = vcalloc(zone_info->nr_zones, in btrfs_get_dev_zone_info()
469 if (!zone_info->zone_cache) { in btrfs_get_dev_zone_info()
470 btrfs_err(device->fs_info, in btrfs_get_dev_zone_info()
471 "zoned: failed to allocate zone cache for %s", in btrfs_get_dev_zone_info()
472 rcu_dereference(device->name)); in btrfs_get_dev_zone_info()
473 ret = -ENOMEM; in btrfs_get_dev_zone_info()
489 __set_bit(nreported, zone_info->seq_zones); in btrfs_get_dev_zone_info()
492 __set_bit(nreported, zone_info->empty_zones); in btrfs_get_dev_zone_info()
497 __set_bit(nreported, zone_info->active_zones); in btrfs_get_dev_zone_info()
503 sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; in btrfs_get_dev_zone_info()
506 if (unlikely(nreported != zone_info->nr_zones)) { in btrfs_get_dev_zone_info()
507 btrfs_err(device->fs_info, in btrfs_get_dev_zone_info()
509 rcu_dereference(device->name), nreported, in btrfs_get_dev_zone_info()
510 zone_info->nr_zones); in btrfs_get_dev_zone_info()
511 ret = -EIO; in btrfs_get_dev_zone_info()
519 zone_info->max_active_zones = 0; in btrfs_get_dev_zone_info()
522 btrfs_err(device->fs_info, in btrfs_get_dev_zone_info()
524 nactive, rcu_dereference(device->name), in btrfs_get_dev_zone_info()
526 ret = -EIO; in btrfs_get_dev_zone_info()
529 atomic_set(&zone_info->active_zones_left, in btrfs_get_dev_zone_info()
530 max_active_zones - nactive); in btrfs_get_dev_zone_info()
531 set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); in btrfs_get_dev_zone_info()
542 sb_zone = sb_zone_number(zone_info->zone_size_shift, i); in btrfs_get_dev_zone_info()
543 if (sb_zone + 1 >= zone_info->nr_zones) in btrfs_get_dev_zone_info()
548 &zone_info->sb_zones[sb_pos], in btrfs_get_dev_zone_info()
554 btrfs_err(device->fs_info, in btrfs_get_dev_zone_info()
555 "zoned: failed to read super block log zone info at devid %llu zone %u", in btrfs_get_dev_zone_info()
556 device->devid, sb_zone); in btrfs_get_dev_zone_info()
557 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
565 if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type == in btrfs_get_dev_zone_info()
569 ret = sb_write_pointer(device->bdev, in btrfs_get_dev_zone_info()
570 &zone_info->sb_zones[sb_pos], &sb_wp); in btrfs_get_dev_zone_info()
571 if (unlikely(ret != -ENOENT && ret)) { in btrfs_get_dev_zone_info()
572 btrfs_err(device->fs_info, in btrfs_get_dev_zone_info()
573 "zoned: super block log zone corrupted devid %llu zone %u", in btrfs_get_dev_zone_info()
574 device->devid, sb_zone); in btrfs_get_dev_zone_info()
575 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
584 model = "host-managed zoned"; in btrfs_get_dev_zone_info()
592 "%s block device %s, %u %szones of %llu bytes", in btrfs_get_dev_zone_info()
593 model, rcu_dereference(device->name), zone_info->nr_zones, in btrfs_get_dev_zone_info()
594 emulated, zone_info->zone_size); in btrfs_get_dev_zone_info()
606 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_destroy_dev_zone_info()
611 bitmap_free(zone_info->active_zones); in btrfs_destroy_dev_zone_info()
612 bitmap_free(zone_info->seq_zones); in btrfs_destroy_dev_zone_info()
613 bitmap_free(zone_info->empty_zones); in btrfs_destroy_dev_zone_info()
614 vfree(zone_info->zone_cache); in btrfs_destroy_dev_zone_info()
616 device->zone_info = NULL; in btrfs_destroy_dev_zone_info()
623 zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); in btrfs_clone_dev_zone_info()
627 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
628 if (!zone_info->seq_zones) in btrfs_clone_dev_zone_info()
631 bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, in btrfs_clone_dev_zone_info()
632 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
634 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
635 if (!zone_info->empty_zones) in btrfs_clone_dev_zone_info()
638 bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, in btrfs_clone_dev_zone_info()
639 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
641 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
642 if (!zone_info->active_zones) in btrfs_clone_dev_zone_info()
645 bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, in btrfs_clone_dev_zone_info()
646 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
647 zone_info->zone_cache = NULL; in btrfs_clone_dev_zone_info()
652 bitmap_free(zone_info->seq_zones); in btrfs_clone_dev_zone_info()
653 bitmap_free(zone_info->empty_zones); in btrfs_clone_dev_zone_info()
654 bitmap_free(zone_info->active_zones); in btrfs_clone_dev_zone_info()
666 return ret ? ret : -EIO; in btrfs_get_dev_zone()
675 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_for_zoned_device()
676 if (device->bdev && bdev_is_zoned(device->bdev)) { in btrfs_check_for_zoned_device()
679 device->bdev); in btrfs_check_for_zoned_device()
680 return -EINVAL; in btrfs_check_for_zoned_device()
689 struct queue_limits *lim = &fs_info->limits; in btrfs_check_zoned_mode()
695 * Host-Managed devices can't be used without the ZONED flag. With the in btrfs_check_zoned_mode()
703 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_zoned_mode()
704 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_check_zoned_mode()
706 if (!device->bdev) in btrfs_check_zoned_mode()
710 zone_size = zone_info->zone_size; in btrfs_check_zoned_mode()
711 } else if (zone_info->zone_size != zone_size) { in btrfs_check_zoned_mode()
713 "zoned: unequal block device zone sizes: have %llu found %llu", in btrfs_check_zoned_mode()
714 zone_info->zone_size, zone_size); in btrfs_check_zoned_mode()
715 return -EINVAL; in btrfs_check_zoned_mode()
719 * With the zoned emulation, we can have non-zoned device on the in btrfs_check_zoned_mode()
721 * append size. in btrfs_check_zoned_mode()
723 if (bdev_is_zoned(device->bdev)) in btrfs_check_zoned_mode()
724 blk_stack_limits(lim, bdev_limits(device->bdev), 0); in btrfs_check_zoned_mode()
740 "zoned: zone size %llu not aligned to stripe %u", in btrfs_check_zoned_mode()
742 return -EINVAL; in btrfs_check_zoned_mode()
746 btrfs_err(fs_info, "zoned: mixed block groups not supported"); in btrfs_check_zoned_mode()
747 return -EINVAL; in btrfs_check_zoned_mode()
750 fs_info->zone_size = zone_size; in btrfs_check_zoned_mode()
758 fs_info->max_zone_append_size = ALIGN_DOWN( in btrfs_check_zoned_mode()
759 min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
760 (u64)lim->max_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
761 (u64)lim->max_segments << PAGE_SHIFT), in btrfs_check_zoned_mode()
762 fs_info->sectorsize); in btrfs_check_zoned_mode()
763 fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; in btrfs_check_zoned_mode()
765 fs_info->max_extent_size = min_not_zero(fs_info->max_extent_size, in btrfs_check_zoned_mode()
766 fs_info->max_zone_append_size); in btrfs_check_zoned_mode()
769 * Check mount options here, because we might change fs_info->zoned in btrfs_check_zoned_mode()
770 * from fs_info->zone_size. in btrfs_check_zoned_mode()
772 ret = btrfs_check_mountopts_zoned(fs_info, &fs_info->mount_opt); in btrfs_check_zoned_mode()
776 btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); in btrfs_check_zoned_mode()
787 * Space cache writing is not COWed. Disable that to avoid write errors in btrfs_check_mountopts_zoned()
791 btrfs_err(info, "zoned: space cache v1 is not supported"); in btrfs_check_mountopts_zoned()
792 return -EINVAL; in btrfs_check_mountopts_zoned()
797 return -EINVAL; in btrfs_check_mountopts_zoned()
821 if (ret != -ENOENT && ret < 0) in sb_log_location()
832 if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { in sb_log_location()
839 reset->start, reset->len); in sb_log_location()
844 reset->cond = BLK_ZONE_COND_EMPTY; in sb_log_location()
845 reset->wp = reset->start; in sb_log_location()
847 } else if (ret != -ENOENT) { in sb_log_location()
862 wp -= BTRFS_SUPER_INFO_SIZE; in sb_log_location()
890 return -EINVAL; in btrfs_sb_log_location_bdev()
897 return -ENOENT; in btrfs_sb_log_location_bdev()
905 return -EIO; in btrfs_sb_log_location_bdev()
913 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_sb_log_location()
917 * For a zoned filesystem on a non-zoned block device, use the same in btrfs_sb_log_location()
918 * super block locations as regular filesystem. Doing so, the super in btrfs_sb_log_location()
919 * block can always be retrieved and the zoned flag of the volume in btrfs_sb_log_location()
920 * detected from the super block information. in btrfs_sb_log_location()
922 if (!bdev_is_zoned(device->bdev)) { in btrfs_sb_log_location()
927 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in btrfs_sb_log_location()
928 if (zone_num + 1 >= zinfo->nr_zones) in btrfs_sb_log_location()
929 return -ENOENT; in btrfs_sb_log_location()
931 return sb_log_location(device->bdev, in btrfs_sb_log_location()
932 &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], in btrfs_sb_log_location()
944 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in is_sb_log_zone()
945 if (zone_num + 1 >= zinfo->nr_zones) in is_sb_log_zone()
948 if (!test_bit(zone_num, zinfo->seq_zones)) in is_sb_log_zone()
956 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_advance_sb_log()
963 zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; in btrfs_advance_sb_log()
966 if (zone->cond == BLK_ZONE_COND_FULL) { in btrfs_advance_sb_log()
971 if (zone->cond == BLK_ZONE_COND_EMPTY) in btrfs_advance_sb_log()
972 zone->cond = BLK_ZONE_COND_IMP_OPEN; in btrfs_advance_sb_log()
974 zone->wp += SUPER_INFO_SECTORS; in btrfs_advance_sb_log()
985 if (zone->wp != zone->start + zone->capacity) { in btrfs_advance_sb_log()
990 ret = blkdev_zone_mgmt(device->bdev, in btrfs_advance_sb_log()
991 REQ_OP_ZONE_FINISH, zone->start, in btrfs_advance_sb_log()
992 zone->len); in btrfs_advance_sb_log()
998 zone->wp = zone->start + zone->len; in btrfs_advance_sb_log()
999 zone->cond = BLK_ZONE_COND_FULL; in btrfs_advance_sb_log()
1006 return -EIO; in btrfs_advance_sb_log()
1026 return -ENOENT; in btrfs_reset_sb_log_zones()
1041 * @num_bytes: size of wanted region
1050 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_find_allocatable_zones()
1051 const u8 shift = zinfo->zone_size_shift; in btrfs_find_allocatable_zones()
1058 ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1059 ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1065 if (end > zinfo->nr_zones) in btrfs_find_allocatable_zones()
1070 !bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) { in btrfs_find_allocatable_zones()
1071 pos += zinfo->zone_size; in btrfs_find_allocatable_zones()
1095 zinfo->zone_size); in btrfs_find_allocatable_zones()
1108 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_set_active_zone()
1109 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_set_active_zone()
1112 if (zone_info->max_active_zones == 0) in btrfs_dev_set_active_zone()
1115 if (!test_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1117 if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) in btrfs_dev_set_active_zone()
1119 if (test_and_set_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1121 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_set_active_zone()
1130 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_clear_active_zone()
1131 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_clear_active_zone()
1134 if (zone_info->max_active_zones == 0) in btrfs_dev_clear_active_zone()
1137 if (test_and_clear_bit(zno, zone_info->active_zones)) in btrfs_dev_clear_active_zone()
1138 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_clear_active_zone()
1149 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_device_zone()
1159 physical += device->zone_info->zone_size; in btrfs_reset_device_zone()
1160 length -= device->zone_info->zone_size; in btrfs_reset_device_zone()
1166 int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) in btrfs_ensure_empty_zones() argument
1168 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_ensure_empty_zones()
1169 const u8 shift = zinfo->zone_size_shift; in btrfs_ensure_empty_zones()
1171 unsigned long nbits = size >> shift; in btrfs_ensure_empty_zones()
1175 ASSERT(IS_ALIGNED(start, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1176 ASSERT(IS_ALIGNED(size, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1178 if (begin + nbits > zinfo->nr_zones) in btrfs_ensure_empty_zones()
1179 return -ERANGE; in btrfs_ensure_empty_zones()
1182 if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1186 if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) && in btrfs_ensure_empty_zones()
1187 bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1190 for (pos = start; pos < start + size; pos += zinfo->zone_size) { in btrfs_ensure_empty_zones()
1199 device->fs_info, in btrfs_ensure_empty_zones()
1201 rcu_dereference(device->name), device->devid, pos >> shift); in btrfs_ensure_empty_zones()
1204 ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, in btrfs_ensure_empty_zones()
1215 * for a block group consist of conventional zones. It is pointed to the
1216 * end of the highest addressed extent in the block group as an allocation
1219 static int calculate_alloc_pointer(struct btrfs_block_group *cache, in calculate_alloc_pointer() argument
1222 struct btrfs_fs_info *fs_info = cache->fs_info; in calculate_alloc_pointer()
1231 * Avoid tree lookups for a new block group, there's no use for it. in calculate_alloc_pointer()
1234 * Also, we have a lock chain of extent buffer lock -> chunk mutex. in calculate_alloc_pointer()
1235 * For new a block group, this function is called from in calculate_alloc_pointer()
1247 return -ENOMEM; in calculate_alloc_pointer()
1249 key.objectid = cache->start + cache->length; in calculate_alloc_pointer()
1257 ret = -EUCLEAN; in calculate_alloc_pointer()
1261 ret = btrfs_previous_extent_item(root, path, cache->start); in calculate_alloc_pointer()
1270 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); in calculate_alloc_pointer()
1275 length = fs_info->nodesize; in calculate_alloc_pointer()
1277 if (unlikely(!(found_key.objectid >= cache->start && in calculate_alloc_pointer()
1278 found_key.objectid + length <= cache->start + cache->length))) { in calculate_alloc_pointer()
1279 return -EUCLEAN; in calculate_alloc_pointer()
1281 *offset_ret = found_key.objectid + length - cache->start; in calculate_alloc_pointer()
1295 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in btrfs_load_zone_info()
1302 info->physical = map->stripes[zone_idx].physical; in btrfs_load_zone_info()
1304 down_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1305 device = map->stripes[zone_idx].dev; in btrfs_load_zone_info()
1307 if (!device->bdev) { in btrfs_load_zone_info()
1308 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1309 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1314 if (!device->zone_info->max_active_zones) in btrfs_load_zone_info()
1317 if (!btrfs_dev_is_sequential(device, info->physical)) { in btrfs_load_zone_info()
1318 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1319 info->alloc_offset = WP_CONVENTIONAL; in btrfs_load_zone_info()
1323 ASSERT(!new || btrfs_dev_is_empty_zone(device, info->physical)); in btrfs_load_zone_info()
1325 /* This zone will be used for allocation, so mark this zone non-empty. */ in btrfs_load_zone_info()
1326 btrfs_dev_clear_zone_empty(device, info->physical); in btrfs_load_zone_info()
1329 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) in btrfs_load_zone_info()
1330 btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); in btrfs_load_zone_info()
1336 WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); in btrfs_load_zone_info()
1341 capacity = bdev_zone_capacity(device->bdev, info->physical >> SECTOR_SHIFT); in btrfs_load_zone_info()
1342 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1343 info->alloc_offset = 0; in btrfs_load_zone_info()
1344 info->capacity = capacity << SECTOR_SHIFT; in btrfs_load_zone_info()
1350 ret = btrfs_get_dev_zone(device, info->physical, &zone); in btrfs_load_zone_info()
1353 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1354 if (ret != -EIO && ret != -EOPNOTSUPP) in btrfs_load_zone_info()
1356 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1363 zone.start << SECTOR_SHIFT, rcu_dereference(device->name), in btrfs_load_zone_info()
1364 device->devid); in btrfs_load_zone_info()
1365 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1366 return -EIO; in btrfs_load_zone_info()
1369 info->capacity = (zone.capacity << SECTOR_SHIFT); in btrfs_load_zone_info()
1376 (info->physical >> device->zone_info->zone_size_shift), in btrfs_load_zone_info()
1377 rcu_dereference(device->name), device->devid); in btrfs_load_zone_info()
1378 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1381 info->alloc_offset = 0; in btrfs_load_zone_info()
1384 info->alloc_offset = info->capacity; in btrfs_load_zone_info()
1388 info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_load_zone_info()
1393 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1402 if (unlikely(info->alloc_offset == WP_MISSING_DEV)) { in btrfs_load_block_group_single()
1403 btrfs_err(bg->fs_info, in btrfs_load_block_group_single()
1405 info->physical); in btrfs_load_block_group_single()
1406 return -EIO; in btrfs_load_block_group_single()
1409 bg->alloc_offset = info->alloc_offset; in btrfs_load_block_group_single()
1410 bg->zone_capacity = info->capacity; in btrfs_load_block_group_single()
1412 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_single()
1422 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_dup()
1424 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_dup()
1425 btrfs_err(fs_info, "zoned: data DUP profile needs raid-stripe-tree"); in btrfs_load_block_group_dup()
1426 return -EINVAL; in btrfs_load_block_group_dup()
1429 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_dup()
1432 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1435 return -EIO; in btrfs_load_block_group_dup()
1438 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1441 return -EIO; in btrfs_load_block_group_dup()
1451 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1453 return -EIO; in btrfs_load_block_group_dup()
1458 return -EIO; in btrfs_load_block_group_dup()
1460 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_dup()
1463 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_dup()
1473 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid1()
1476 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid1()
1477 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid1()
1478 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid1()
1479 return -EINVAL; in btrfs_load_block_group_raid1()
1483 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_raid1()
1485 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid1()
1496 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid1()
1497 return -EIO; in btrfs_load_block_group_raid1()
1502 return -EIO; in btrfs_load_block_group_raid1()
1506 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid1()
1511 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_raid1()
1513 bg->alloc_offset = zone_info[i - 1].alloc_offset; in btrfs_load_block_group_raid1()
1524 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid0()
1526 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid0()
1527 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid0()
1528 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid0()
1529 return -EINVAL; in btrfs_load_block_group_raid0()
1532 for (int i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid0()
1541 stripe_nr = div64_u64(last_alloc, map->stripe_size); in btrfs_load_block_group_raid0()
1542 stripe_offset = stripe_nr * map->stripe_size; in btrfs_load_block_group_raid0()
1543 full_stripe_nr = div_u64(stripe_nr, map->num_stripes); in btrfs_load_block_group_raid0()
1544 div_u64_rem(stripe_nr, map->num_stripes, &stripe_index); in btrfs_load_block_group_raid0()
1547 full_stripe_nr * map->stripe_size; in btrfs_load_block_group_raid0()
1550 zone_info[i].alloc_offset += map->stripe_size; in btrfs_load_block_group_raid0()
1553 (last_alloc - stripe_offset); in btrfs_load_block_group_raid0()
1558 return -EIO; in btrfs_load_block_group_raid0()
1561 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid0()
1563 bg->zone_capacity += zone_info[i].capacity; in btrfs_load_block_group_raid0()
1564 bg->alloc_offset += zone_info[i].alloc_offset; in btrfs_load_block_group_raid0()
1576 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid10()
1578 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid10()
1579 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid10()
1580 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid10()
1581 return -EINVAL; in btrfs_load_block_group_raid10()
1584 for (int i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid10()
1590 return -EIO; in btrfs_load_block_group_raid10()
1593 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid10()
1601 stripe_nr = div64_u64(last_alloc, map->stripe_size); in btrfs_load_block_group_raid10()
1602 stripe_offset = stripe_nr * map->stripe_size; in btrfs_load_block_group_raid10()
1604 map->num_stripes / map->sub_stripes); in btrfs_load_block_group_raid10()
1606 (map->num_stripes / map->sub_stripes), in btrfs_load_block_group_raid10()
1610 full_stripe_nr * map->stripe_size; in btrfs_load_block_group_raid10()
1612 if (stripe_index > (i / map->sub_stripes)) in btrfs_load_block_group_raid10()
1613 zone_info[i].alloc_offset += map->stripe_size; in btrfs_load_block_group_raid10()
1614 else if (stripe_index == (i / map->sub_stripes)) in btrfs_load_block_group_raid10()
1616 (last_alloc - stripe_offset); in btrfs_load_block_group_raid10()
1619 if ((i % map->sub_stripes) == 0) { in btrfs_load_block_group_raid10()
1620 bg->zone_capacity += zone_info[i].capacity; in btrfs_load_block_group_raid10()
1621 bg->alloc_offset += zone_info[i].alloc_offset; in btrfs_load_block_group_raid10()
1628 int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) in btrfs_load_block_group_zone_info() argument
1630 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_load_block_group_zone_info()
1632 u64 logical = cache->start; in btrfs_load_block_group_zone_info()
1633 u64 length = cache->length; in btrfs_load_block_group_zone_info()
1646 if (unlikely(!IS_ALIGNED(length, fs_info->zone_size))) { in btrfs_load_block_group_zone_info()
1648 "zoned: block group %llu len %llu unaligned to zone size %llu", in btrfs_load_block_group_zone_info()
1649 logical, length, fs_info->zone_size); in btrfs_load_block_group_zone_info()
1650 return -EIO; in btrfs_load_block_group_zone_info()
1655 return -EINVAL; in btrfs_load_block_group_zone_info()
1657 cache->physical_map = map; in btrfs_load_block_group_zone_info()
1659 zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); in btrfs_load_block_group_zone_info()
1661 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1665 active = bitmap_zalloc(map->num_stripes, GFP_NOFS); in btrfs_load_block_group_zone_info()
1667 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1671 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_zone_info()
1683 set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1686 /* Zone capacity is always zone size in emulation */ in btrfs_load_block_group_zone_info()
1687 cache->zone_capacity = cache->length; in btrfs_load_block_group_zone_info()
1688 ret = calculate_alloc_pointer(cache, &last_alloc, new); in btrfs_load_block_group_zone_info()
1692 cache->start); in btrfs_load_block_group_zone_info()
1694 } else if (map->num_stripes == num_conventional) { in btrfs_load_block_group_zone_info()
1695 cache->alloc_offset = last_alloc; in btrfs_load_block_group_zone_info()
1696 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1701 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; in btrfs_load_block_group_zone_info()
1704 ret = btrfs_load_block_group_single(cache, &zone_info[0], active); in btrfs_load_block_group_zone_info()
1707 ret = btrfs_load_block_group_dup(cache, map, zone_info, active, in btrfs_load_block_group_zone_info()
1713 ret = btrfs_load_block_group_raid1(cache, map, zone_info, in btrfs_load_block_group_zone_info()
1717 ret = btrfs_load_block_group_raid0(cache, map, zone_info, in btrfs_load_block_group_zone_info()
1721 ret = btrfs_load_block_group_raid10(cache, map, zone_info, in btrfs_load_block_group_zone_info()
1728 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1729 ret = -EINVAL; in btrfs_load_block_group_zone_info()
1733 if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 && in btrfs_load_block_group_zone_info()
1736 * Detected broken write pointer. Make this block group in btrfs_load_block_group_zone_info()
1738 * allocatable region. Relocating this block group will fix the in btrfs_load_block_group_zone_info()
1743 * reading from this block group won't work anyway by a missing in btrfs_load_block_group_zone_info()
1746 cache->alloc_offset = cache->zone_capacity; in btrfs_load_block_group_zone_info()
1751 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && in btrfs_load_block_group_zone_info()
1752 (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && in btrfs_load_block_group_zone_info()
1753 !fs_info->stripe_root) { in btrfs_load_block_group_zone_info()
1754 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_zone_info()
1755 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1756 ret = -EINVAL; in btrfs_load_block_group_zone_info()
1759 if (unlikely(cache->alloc_offset > cache->zone_capacity)) { in btrfs_load_block_group_zone_info()
1761 "zoned: invalid write pointer %llu (larger than zone capacity %llu) in block group %llu", in btrfs_load_block_group_zone_info()
1762 cache->alloc_offset, cache->zone_capacity, in btrfs_load_block_group_zone_info()
1763 cache->start); in btrfs_load_block_group_zone_info()
1764 ret = -EIO; in btrfs_load_block_group_zone_info()
1768 if (!ret && num_conventional && last_alloc > cache->alloc_offset) { in btrfs_load_block_group_zone_info()
1771 logical, last_alloc, cache->alloc_offset); in btrfs_load_block_group_zone_info()
1772 ret = -EIO; in btrfs_load_block_group_zone_info()
1776 cache->meta_write_pointer = cache->alloc_offset + cache->start; in btrfs_load_block_group_zone_info()
1777 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) { in btrfs_load_block_group_zone_info()
1778 btrfs_get_block_group(cache); in btrfs_load_block_group_zone_info()
1779 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1780 list_add_tail(&cache->active_bg_list, in btrfs_load_block_group_zone_info()
1781 &fs_info->zone_active_bgs); in btrfs_load_block_group_zone_info()
1782 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1785 btrfs_free_chunk_map(cache->physical_map); in btrfs_load_block_group_zone_info()
1786 cache->physical_map = NULL; in btrfs_load_block_group_zone_info()
1794 void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) in btrfs_calc_zone_unusable() argument
1798 if (!btrfs_is_zoned(cache->fs_info)) in btrfs_calc_zone_unusable()
1801 WARN_ON(cache->bytes_super != 0); in btrfs_calc_zone_unusable()
1802 unusable = (cache->alloc_offset - cache->used) + in btrfs_calc_zone_unusable()
1803 (cache->length - cache->zone_capacity); in btrfs_calc_zone_unusable()
1804 free = cache->zone_capacity - cache->alloc_offset; in btrfs_calc_zone_unusable()
1806 /* We only need ->free_space in ALLOC_SEQ block groups */ in btrfs_calc_zone_unusable()
1807 cache->cached = BTRFS_CACHE_FINISHED; in btrfs_calc_zone_unusable()
1808 cache->free_space_ctl->free_space = free; in btrfs_calc_zone_unusable()
1809 cache->zone_unusable = unusable; in btrfs_calc_zone_unusable()
1814 u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT); in btrfs_use_zone_append()
1815 struct btrfs_inode *inode = bbio->inode; in btrfs_use_zone_append()
1816 struct btrfs_fs_info *fs_info = bbio->fs_info; in btrfs_use_zone_append()
1817 struct btrfs_block_group *cache; in btrfs_use_zone_append() local
1826 if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) in btrfs_use_zone_append()
1832 * Furthermore we have set aside own block-group from which only the in btrfs_use_zone_append()
1837 if (btrfs_is_data_reloc_root(inode->root)) in btrfs_use_zone_append()
1840 cache = btrfs_lookup_block_group(fs_info, start); in btrfs_use_zone_append()
1841 ASSERT(cache); in btrfs_use_zone_append()
1842 if (!cache) in btrfs_use_zone_append()
1845 ret = !!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_use_zone_append()
1846 btrfs_put_block_group(cache); in btrfs_use_zone_append()
1853 const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; in btrfs_record_physical_zoned()
1854 struct btrfs_ordered_sum *sum = bbio->sums; in btrfs_record_physical_zoned()
1856 if (physical < bbio->orig_physical) in btrfs_record_physical_zoned()
1857 sum->logical -= bbio->orig_physical - physical; in btrfs_record_physical_zoned()
1859 sum->logical += physical - bbio->orig_physical; in btrfs_record_physical_zoned()
1865 struct extent_map_tree *em_tree = &ordered->inode->extent_tree; in btrfs_rewrite_logical_zoned()
1868 ordered->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1870 write_lock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1871 em = btrfs_search_extent_mapping(em_tree, ordered->file_offset, in btrfs_rewrite_logical_zoned()
1872 ordered->num_bytes); in btrfs_rewrite_logical_zoned()
1874 ASSERT(em->offset == 0); in btrfs_rewrite_logical_zoned()
1875 em->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1877 write_unlock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1885 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && in btrfs_zoned_split_ordered()
1886 btrfs_split_extent_map(ordered->inode, ordered->file_offset, in btrfs_zoned_split_ordered()
1887 ordered->num_bytes, len, logical)) in btrfs_zoned_split_ordered()
1893 new->disk_bytenr = logical; in btrfs_zoned_split_ordered()
1900 struct btrfs_inode *inode = ordered->inode; in btrfs_finish_ordered_zoned()
1901 struct btrfs_fs_info *fs_info = inode->root->fs_info; in btrfs_finish_ordered_zoned()
1906 * Write to pre-allocated region is for the data relocation, and so in btrfs_finish_ordered_zoned()
1909 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) in btrfs_finish_ordered_zoned()
1912 ASSERT(!list_empty(&ordered->list)); in btrfs_finish_ordered_zoned()
1913 /* The ordered->list can be empty in the above pre-alloc case. */ in btrfs_finish_ordered_zoned()
1914 sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list); in btrfs_finish_ordered_zoned()
1915 logical = sum->logical; in btrfs_finish_ordered_zoned()
1916 len = sum->len; in btrfs_finish_ordered_zoned()
1918 while (len < ordered->disk_num_bytes) { in btrfs_finish_ordered_zoned()
1920 if (sum->logical == logical + len) { in btrfs_finish_ordered_zoned()
1921 len += sum->len; in btrfs_finish_ordered_zoned()
1925 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); in btrfs_finish_ordered_zoned()
1929 logical = sum->logical; in btrfs_finish_ordered_zoned()
1930 len = sum->len; in btrfs_finish_ordered_zoned()
1933 if (ordered->disk_bytenr != logical) in btrfs_finish_ordered_zoned()
1943 if ((inode->flags & BTRFS_INODE_NODATASUM) || in btrfs_finish_ordered_zoned()
1944 test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) { in btrfs_finish_ordered_zoned()
1945 while ((sum = list_first_entry_or_null(&ordered->list, in btrfs_finish_ordered_zoned()
1947 list_del(&sum->list); in btrfs_finish_ordered_zoned()
1956 const struct writeback_control *wbc = ctx->wbc; in check_bg_is_active()
1957 struct btrfs_block_group *block_group = ctx->zoned_bg; in check_bg_is_active()
1958 struct btrfs_fs_info *fs_info = block_group->fs_info; in check_bg_is_active()
1960 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) in check_bg_is_active()
1963 if (fs_info->treelog_bg == block_group->start) { in check_bg_is_active()
1973 /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */ in check_bg_is_active()
1974 lockdep_assert_held(&fs_info->zoned_meta_io_lock); in check_bg_is_active()
1981 if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) { in check_bg_is_active()
1982 if (wbc->sync_mode == WB_SYNC_NONE || in check_bg_is_active()
1983 (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)) in check_bg_is_active()
1987 /* Pivot active metadata/system block group. */ in check_bg_is_active()
2010 * Check if @ctx->eb is aligned to the write pointer.
2013 * 0: @ctx->eb is at the write pointer. You can write it.
2014 * -EAGAIN: There is a hole. The caller should handle the case.
2015 * -EBUSY: There is a hole, but the caller can just bail out.
2020 const struct writeback_control *wbc = ctx->wbc; in btrfs_check_meta_write_pointer()
2021 const struct extent_buffer *eb = ctx->eb; in btrfs_check_meta_write_pointer()
2022 struct btrfs_block_group *block_group = ctx->zoned_bg; in btrfs_check_meta_write_pointer()
2028 if (block_group->start > eb->start || in btrfs_check_meta_write_pointer()
2029 block_group->start + block_group->length <= eb->start) { in btrfs_check_meta_write_pointer()
2032 ctx->zoned_bg = NULL; in btrfs_check_meta_write_pointer()
2037 block_group = btrfs_lookup_block_group(fs_info, eb->start); in btrfs_check_meta_write_pointer()
2040 ctx->zoned_bg = block_group; in btrfs_check_meta_write_pointer()
2043 if (block_group->meta_write_pointer == eb->start) { in btrfs_check_meta_write_pointer()
2046 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_meta_write_pointer()
2049 if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) in btrfs_check_meta_write_pointer()
2050 tgt = &fs_info->active_system_bg; in btrfs_check_meta_write_pointer()
2052 tgt = &fs_info->active_meta_bg; in btrfs_check_meta_write_pointer()
2058 * Since we may release fs_info->zoned_meta_io_lock, someone can already in btrfs_check_meta_write_pointer()
2061 if (block_group->meta_write_pointer > eb->start) in btrfs_check_meta_write_pointer()
2062 return -EBUSY; in btrfs_check_meta_write_pointer()
2065 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) in btrfs_check_meta_write_pointer()
2066 return -EAGAIN; in btrfs_check_meta_write_pointer()
2067 return -EBUSY; in btrfs_check_meta_write_pointer()
2073 return -EOPNOTSUPP; in btrfs_zoned_issue_zeroout()
2075 return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT, in btrfs_zoned_issue_zeroout()
2091 ret = -EIO; in read_zone_info()
2095 if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { in read_zone_info()
2096 ret = -EINVAL; in read_zone_info()
2101 nmirrors = (int)bioc->num_stripes; in read_zone_info()
2103 u64 physical = bioc->stripes[i].physical; in read_zone_info()
2104 struct btrfs_device *dev = bioc->stripes[i].dev; in read_zone_info()
2107 if (!dev->bdev) in read_zone_info()
2112 if (ret == -EIO || ret == -EOPNOTSUPP) in read_zone_info()
2124 * filling zeros between @physical_pos to a write pointer of dev-replace
2130 struct btrfs_fs_info *fs_info = tgt_dev->fs_info; in btrfs_sync_zone_write_pointer()
2143 wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_sync_zone_write_pointer()
2149 return -EUCLEAN; in btrfs_sync_zone_write_pointer()
2151 length = wp - physical_pos; in btrfs_sync_zone_write_pointer()
2156 * Activate block group and underlying device zones
2158 * @block_group: the block group to activate
2164 struct btrfs_fs_info *fs_info = block_group->fs_info; in btrfs_zone_activate()
2168 const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA); in btrfs_zone_activate()
2172 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_activate()
2175 map = block_group->physical_map; in btrfs_zone_activate()
2177 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2178 spin_lock(&block_group->lock); in btrfs_zone_activate()
2179 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in btrfs_zone_activate()
2184 if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) { in btrfs_zone_activate()
2185 /* The caller should check if the block group is full. */ in btrfs_zone_activate()
2192 WARN_ON_ONCE(block_group->meta_write_pointer != block_group->start); in btrfs_zone_activate()
2195 for (i = 0; i < map->num_stripes; i++) { in btrfs_zone_activate()
2199 device = map->stripes[i].dev; in btrfs_zone_activate()
2200 physical = map->stripes[i].physical; in btrfs_zone_activate()
2201 zinfo = device->zone_info; in btrfs_zone_activate()
2203 if (!device->bdev) in btrfs_zone_activate()
2206 if (zinfo->max_active_zones == 0) in btrfs_zone_activate()
2210 reserved = zinfo->reserved_active_zones; in btrfs_zone_activate()
2212 * For the data block group, leave active zones for one in btrfs_zone_activate()
2213 * metadata block group and one system block group. in btrfs_zone_activate()
2215 if (atomic_read(&zinfo->active_zones_left) <= reserved) { in btrfs_zone_activate()
2226 zinfo->reserved_active_zones--; in btrfs_zone_activate()
2230 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in btrfs_zone_activate()
2231 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2233 /* For the active block group list */ in btrfs_zone_activate()
2235 list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); in btrfs_zone_activate()
2236 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2241 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2242 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2248 struct btrfs_fs_info *fs_info = block_group->fs_info; in wait_eb_writebacks()
2249 const u64 end = block_group->start + block_group->length; in wait_eb_writebacks()
2251 unsigned long index, start = (block_group->start >> fs_info->nodesize_bits); in wait_eb_writebacks()
2254 xa_for_each_start(&fs_info->buffer_tree, index, eb, start) { in wait_eb_writebacks()
2255 if (eb->start < block_group->start) in wait_eb_writebacks()
2257 if (eb->start >= end) in wait_eb_writebacks()
2269 struct btrfs_device *device = stripe->dev; in call_zone_finish()
2270 const u64 physical = stripe->physical; in call_zone_finish()
2271 struct btrfs_zoned_device_info *zinfo = device->zone_info; in call_zone_finish()
2274 if (!device->bdev) in call_zone_finish()
2277 if (zinfo->max_active_zones == 0) in call_zone_finish()
2284 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, in call_zone_finish()
2286 zinfo->zone_size >> SECTOR_SHIFT); in call_zone_finish()
2293 if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) in call_zone_finish()
2294 zinfo->reserved_active_zones++; in call_zone_finish()
2302 struct btrfs_fs_info *fs_info = block_group->fs_info; in do_zone_finish()
2304 const bool is_metadata = (block_group->flags & in do_zone_finish()
2306 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in do_zone_finish()
2310 spin_lock(&block_group->lock); in do_zone_finish()
2311 if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in do_zone_finish()
2312 spin_unlock(&block_group->lock); in do_zone_finish()
2318 block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { in do_zone_finish()
2319 spin_unlock(&block_group->lock); in do_zone_finish()
2320 return -EAGAIN; in do_zone_finish()
2324 * If we are sure that the block group is full (= no more room left for in do_zone_finish()
2325 * new allocation) and the IO for the last usable block is completed, we in do_zone_finish()
2328 * and block_group->meta_write_pointer for metadata. in do_zone_finish()
2331 if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in do_zone_finish()
2332 spin_unlock(&block_group->lock); in do_zone_finish()
2333 return -EAGAIN; in do_zone_finish()
2335 spin_unlock(&block_group->lock); in do_zone_finish()
2341 /* Ensure all writes in this block group finish */ in do_zone_finish()
2349 spin_lock(&block_group->lock); in do_zone_finish()
2352 * Bail out if someone already deactivated the block group, or in do_zone_finish()
2353 * allocated space is left in the block group. in do_zone_finish()
2356 &block_group->runtime_flags)) { in do_zone_finish()
2357 spin_unlock(&block_group->lock); in do_zone_finish()
2362 if (block_group->reserved || in do_zone_finish()
2364 &block_group->runtime_flags)) { in do_zone_finish()
2365 spin_unlock(&block_group->lock); in do_zone_finish()
2367 return -EAGAIN; in do_zone_finish()
2371 clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in do_zone_finish()
2372 block_group->alloc_offset = block_group->zone_capacity; in do_zone_finish()
2373 if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) in do_zone_finish()
2374 block_group->meta_write_pointer = block_group->start + in do_zone_finish()
2375 block_group->zone_capacity; in do_zone_finish()
2376 block_group->free_space_ctl->free_space = 0; in do_zone_finish()
2379 spin_unlock(&block_group->lock); in do_zone_finish()
2381 down_read(&dev_replace->rwsem); in do_zone_finish()
2382 map = block_group->physical_map; in do_zone_finish()
2383 for (i = 0; i < map->num_stripes; i++) { in do_zone_finish()
2385 ret = call_zone_finish(block_group, &map->stripes[i]); in do_zone_finish()
2387 up_read(&dev_replace->rwsem); in do_zone_finish()
2391 up_read(&dev_replace->rwsem); in do_zone_finish()
2396 spin_lock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2397 ASSERT(!list_empty(&block_group->active_bg_list)); in do_zone_finish()
2398 list_del_init(&block_group->active_bg_list); in do_zone_finish()
2399 spin_unlock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2404 clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in do_zone_finish()
2411 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_finish()
2419 struct btrfs_fs_info *fs_info = fs_devices->fs_info; in btrfs_can_activate_zone()
2426 if (test_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags)) in btrfs_can_activate_zone()
2430 mutex_lock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2431 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2432 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { in btrfs_can_activate_zone()
2433 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_can_activate_zone()
2436 if (!device->bdev) in btrfs_can_activate_zone()
2439 if (!zinfo->max_active_zones) { in btrfs_can_activate_zone()
2445 reserved = zinfo->reserved_active_zones; in btrfs_can_activate_zone()
2449 ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved)); in btrfs_can_activate_zone()
2452 ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved)); in btrfs_can_activate_zone()
2458 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2459 mutex_unlock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2462 set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in btrfs_can_activate_zone()
2477 return -ENOENT; in btrfs_zone_finish_endio()
2480 if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) in btrfs_zone_finish_endio()
2481 min_alloc_bytes = fs_info->sectorsize; in btrfs_zone_finish_endio()
2483 min_alloc_bytes = fs_info->nodesize; in btrfs_zone_finish_endio()
2485 /* Bail out if we can allocate more data from this block group. */ in btrfs_zone_finish_endio()
2487 block_group->start + block_group->zone_capacity) in btrfs_zone_finish_endio()
2503 wait_on_extent_buffer_writeback(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2504 free_extent_buffer(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2507 btrfs_handle_fs_error(bg->fs_info, ret, in btrfs_zone_finish_endio_workfn()
2508 "Failed to finish block-group's zone"); in btrfs_zone_finish_endio_workfn()
2515 if (!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &bg->runtime_flags) || in btrfs_schedule_zone_finish_bg()
2516 eb->start + eb->len * 2 <= bg->start + bg->zone_capacity) in btrfs_schedule_zone_finish_bg()
2519 if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) { in btrfs_schedule_zone_finish_bg()
2520 btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing", in btrfs_schedule_zone_finish_bg()
2521 bg->start); in btrfs_schedule_zone_finish_bg()
2527 refcount_inc(&eb->refs); in btrfs_schedule_zone_finish_bg()
2528 bg->last_eb = eb; in btrfs_schedule_zone_finish_bg()
2529 INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); in btrfs_schedule_zone_finish_bg()
2530 queue_work(system_dfl_wq, &bg->zone_finish_work); in btrfs_schedule_zone_finish_bg()
2535 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_clear_data_reloc_bg()
2537 spin_lock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2538 if (fs_info->data_reloc_bg == bg->start) in btrfs_clear_data_reloc_bg()
2539 fs_info->data_reloc_bg = 0; in btrfs_clear_data_reloc_bg()
2540 spin_unlock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2545 struct btrfs_space_info *data_sinfo = fs_info->data_sinfo; in btrfs_zoned_reserve_data_reloc_bg()
2559 if (fs_info->data_reloc_bg) in btrfs_zoned_reserve_data_reloc_bg()
2562 if (sb_rdonly(fs_info->sb)) in btrfs_zoned_reserve_data_reloc_bg()
2565 alloc_flags = btrfs_get_alloc_profile(fs_info, space_info->flags); in btrfs_zoned_reserve_data_reloc_bg()
2568 /* Scan the data space_info to find empty block groups. Take the second one. */ in btrfs_zoned_reserve_data_reloc_bg()
2570 bg_list = &space_info->block_groups[index]; in btrfs_zoned_reserve_data_reloc_bg()
2572 if (bg->alloc_offset != 0) in btrfs_zoned_reserve_data_reloc_bg()
2581 /* Migrate the block group to the data relocation space_info. */ in btrfs_zoned_reserve_data_reloc_bg()
2582 struct btrfs_space_info *reloc_sinfo = data_sinfo->sub_group[0]; in btrfs_zoned_reserve_data_reloc_bg()
2585 ASSERT(reloc_sinfo->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); in btrfs_zoned_reserve_data_reloc_bg()
2586 factor = btrfs_bg_type_to_factor(bg->flags); in btrfs_zoned_reserve_data_reloc_bg()
2588 down_write(&space_info->groups_sem); in btrfs_zoned_reserve_data_reloc_bg()
2589 list_del_init(&bg->list); in btrfs_zoned_reserve_data_reloc_bg()
2591 ASSERT(!list_empty(&space_info->block_groups[index])); in btrfs_zoned_reserve_data_reloc_bg()
2592 up_write(&space_info->groups_sem); in btrfs_zoned_reserve_data_reloc_bg()
2594 spin_lock(&space_info->lock); in btrfs_zoned_reserve_data_reloc_bg()
2595 space_info->total_bytes -= bg->length; in btrfs_zoned_reserve_data_reloc_bg()
2596 space_info->disk_total -= bg->length * factor; in btrfs_zoned_reserve_data_reloc_bg()
2597 space_info->disk_total -= bg->zone_unusable; in btrfs_zoned_reserve_data_reloc_bg()
2599 ASSERT(bg->used == 0); in btrfs_zoned_reserve_data_reloc_bg()
2600 /* No super block in a block group on the zoned setup. */ in btrfs_zoned_reserve_data_reloc_bg()
2601 ASSERT(bg->bytes_super == 0); in btrfs_zoned_reserve_data_reloc_bg()
2602 spin_unlock(&space_info->lock); in btrfs_zoned_reserve_data_reloc_bg()
2604 bg->space_info = reloc_sinfo; in btrfs_zoned_reserve_data_reloc_bg()
2605 if (reloc_sinfo->block_group_kobjs[index] == NULL) in btrfs_zoned_reserve_data_reloc_bg()
2611 fs_info->data_reloc_bg = bg->start; in btrfs_zoned_reserve_data_reloc_bg()
2612 set_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &bg->runtime_flags); in btrfs_zoned_reserve_data_reloc_bg()
2621 trans = btrfs_join_transaction(fs_info->tree_root); in btrfs_zoned_reserve_data_reloc_bg()
2626 space_info = data_sinfo->sub_group[0]; in btrfs_zoned_reserve_data_reloc_bg()
2627 ASSERT(space_info->subgroup_id == BTRFS_SUB_GROUP_DATA_RELOC); in btrfs_zoned_reserve_data_reloc_bg()
2632 * We allocated a new block group in the data relocation space_info. We in btrfs_zoned_reserve_data_reloc_bg()
2643 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_free_zone_cache()
2649 mutex_lock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2650 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_free_zone_cache()
2651 if (device->zone_info) { in btrfs_free_zone_cache()
2652 vfree(device->zone_info->zone_cache); in btrfs_free_zone_cache()
2653 device->zone_info->zone_cache = NULL; in btrfs_free_zone_cache()
2656 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2661 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_zoned_should_reclaim()
2663 u64 total = btrfs_super_total_bytes(fs_info->super_copy); in btrfs_zoned_should_reclaim()
2669 if (fs_info->bg_reclaim_threshold == 0) in btrfs_zoned_should_reclaim()
2672 mutex_lock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2673 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_zoned_should_reclaim()
2674 if (!device->bdev) in btrfs_zoned_should_reclaim()
2677 used += device->bytes_used; in btrfs_zoned_should_reclaim()
2679 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2682 return factor >= fs_info->bg_reclaim_threshold; in btrfs_zoned_should_reclaim()
2694 /* It should be called on a previous data relocation block group. */ in btrfs_zoned_release_data_reloc_bg()
2695 ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); in btrfs_zoned_release_data_reloc_bg()
2697 spin_lock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2698 if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) in btrfs_zoned_release_data_reloc_bg()
2702 if (block_group->start + block_group->alloc_offset == logical + length) { in btrfs_zoned_release_data_reloc_bg()
2704 * Now, release this block group for further allocations and in btrfs_zoned_release_data_reloc_bg()
2708 &block_group->runtime_flags); in btrfs_zoned_release_data_reloc_bg()
2712 spin_unlock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2723 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2724 list_for_each_entry(block_group, &fs_info->zone_active_bgs, in btrfs_zone_finish_one_bg()
2728 spin_lock(&block_group->lock); in btrfs_zone_finish_one_bg()
2729 if (block_group->reserved || block_group->alloc_offset == 0 || in btrfs_zone_finish_one_bg()
2730 !(block_group->flags & BTRFS_BLOCK_GROUP_DATA) || in btrfs_zone_finish_one_bg()
2731 test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in btrfs_zone_finish_one_bg()
2732 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2736 avail = block_group->zone_capacity - block_group->alloc_offset; in btrfs_zone_finish_one_bg()
2744 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2746 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2764 if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) in btrfs_zoned_activate_one_bg()
2771 down_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2773 list_for_each_entry(bg, &space_info->block_groups[index], in btrfs_zoned_activate_one_bg()
2775 if (!spin_trylock(&bg->lock)) in btrfs_zoned_activate_one_bg()
2779 &bg->runtime_flags)) { in btrfs_zoned_activate_one_bg()
2780 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2783 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2786 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2793 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2809 * Reserve zones for one metadata block group, one tree-log block group, and one
2810 * system block group.
2814 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_check_active_zone_reservation()
2817 /* Reserve zones for normal SINGLE metadata and tree-log block group. */ in btrfs_check_active_zone_reservation()
2819 /* Reserve a zone for SINGLE system block group. */ in btrfs_check_active_zone_reservation()
2822 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_active_zone_reservation()
2829 if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2831 if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2835 mutex_lock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2836 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_check_active_zone_reservation()
2837 if (!device->bdev) in btrfs_check_active_zone_reservation()
2840 device->zone_info->reserved_active_zones = in btrfs_check_active_zone_reservation()
2843 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2845 /* Release reservation for currently active block groups. */ in btrfs_check_active_zone_reservation()
2846 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2847 list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { in btrfs_check_active_zone_reservation()
2848 struct btrfs_chunk_map *map = block_group->physical_map; in btrfs_check_active_zone_reservation()
2850 if (!(block_group->flags & in btrfs_check_active_zone_reservation()
2854 for (int i = 0; i < map->num_stripes; i++) in btrfs_check_active_zone_reservation()
2855 map->stripes[i].dev->zone_info->reserved_active_zones--; in btrfs_check_active_zone_reservation()
2857 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2861 * Reset the zones of unused block groups from @space_info->bytes_zone_unusable.
2866 * This one resets the zones of a block group, so we can reuse the region
2867 * without removing the block group. On the other hand, btrfs_delete_unused_bgs()
2868 * just removes a block group and frees up the underlying zones. So, we still
2869 * need to allocate a new block group to reuse the zones.
2871 * Resetting is faster than deleting/recreating a block group. It is similar
2873 * the block group's profile with this operation.
2877 struct btrfs_fs_info *fs_info = space_info->fs_info; in btrfs_reset_unused_block_groups()
2878 const sector_t zone_size_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_reset_unused_block_groups()
2890 * Here, we choose a fully zone_unusable block group. It's in btrfs_reset_unused_block_groups()
2891 * technically possible to reset a partly zone_unusable block in btrfs_reset_unused_block_groups()
2897 spin_lock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2898 list_for_each_entry(bg, &fs_info->unused_bgs, bg_list) { in btrfs_reset_unused_block_groups()
2899 if ((bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != space_info->flags) in btrfs_reset_unused_block_groups()
2905 * &bg->lock -> &fs_info->unused_bgs_lock. We skip a in btrfs_reset_unused_block_groups()
2906 * block group if we cannot take its lock. in btrfs_reset_unused_block_groups()
2908 if (!spin_trylock(&bg->lock)) in btrfs_reset_unused_block_groups()
2910 if (btrfs_is_block_group_used(bg) || bg->zone_unusable < bg->length) { in btrfs_reset_unused_block_groups()
2911 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2914 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2919 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2923 list_del_init(&bg->bg_list); in btrfs_reset_unused_block_groups()
2925 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2928 * Since the block group is fully zone_unusable and we cannot in btrfs_reset_unused_block_groups()
2929 * allocate from this block group anymore, we don't need to set in btrfs_reset_unused_block_groups()
2930 * this block group read-only. in btrfs_reset_unused_block_groups()
2933 down_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2934 map = bg->physical_map; in btrfs_reset_unused_block_groups()
2935 for (int i = 0; i < map->num_stripes; i++) { in btrfs_reset_unused_block_groups()
2936 struct btrfs_io_stripe *stripe = &map->stripes[i]; in btrfs_reset_unused_block_groups()
2941 ret = blkdev_zone_mgmt(stripe->dev->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_unused_block_groups()
2942 stripe->physical >> SECTOR_SHIFT, in btrfs_reset_unused_block_groups()
2947 up_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2951 up_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2953 spin_lock(&space_info->lock); in btrfs_reset_unused_block_groups()
2954 spin_lock(&bg->lock); in btrfs_reset_unused_block_groups()
2956 if (bg->ro) { in btrfs_reset_unused_block_groups()
2957 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2958 spin_unlock(&space_info->lock); in btrfs_reset_unused_block_groups()
2962 reclaimed = bg->alloc_offset; in btrfs_reset_unused_block_groups()
2963 bg->zone_unusable = bg->length - bg->zone_capacity; in btrfs_reset_unused_block_groups()
2964 bg->alloc_offset = 0; in btrfs_reset_unused_block_groups()
2967 * block group. in btrfs_reset_unused_block_groups()
2969 ASSERT(reclaimed == bg->zone_capacity); in btrfs_reset_unused_block_groups()
2970 bg->free_space_ctl->free_space += reclaimed; in btrfs_reset_unused_block_groups()
2971 space_info->bytes_zone_unusable -= reclaimed; in btrfs_reset_unused_block_groups()
2972 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2974 spin_unlock(&space_info->lock); in btrfs_reset_unused_block_groups()
2978 num_bytes -= reclaimed; in btrfs_reset_unused_block_groups()