1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 4 */ 5 6 #include <linux/blkdev.h> 7 #include <linux/mm.h> 8 #include <linux/sched/mm.h> 9 #include <linux/slab.h> 10 #include <linux/bitmap.h> 11 12 #include "dm-core.h" 13 14 #define DM_MSG_PREFIX "zone" 15 16 #define DM_ZONE_INVALID_WP_OFST UINT_MAX 17 18 /* 19 * For internal zone reports bypassing the top BIO submission path. 20 */ 21 static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, 22 sector_t sector, unsigned int nr_zones, 23 report_zones_cb cb, void *data) 24 { 25 struct gendisk *disk = md->disk; 26 int ret; 27 struct dm_report_zones_args args = { 28 .next_sector = sector, 29 .orig_data = data, 30 .orig_cb = cb, 31 }; 32 33 do { 34 struct dm_target *tgt; 35 36 tgt = dm_table_find_target(t, args.next_sector); 37 if (WARN_ON_ONCE(!tgt->type->report_zones)) 38 return -EIO; 39 40 args.tgt = tgt; 41 ret = tgt->type->report_zones(tgt, &args, 42 nr_zones - args.zone_idx); 43 if (ret < 0) 44 return ret; 45 } while (args.zone_idx < nr_zones && 46 args.next_sector < get_capacity(disk)); 47 48 return args.zone_idx; 49 } 50 51 /* 52 * User facing dm device block device report zone operation. This calls the 53 * report_zones operation for each target of a device table. This operation is 54 * generally implemented by targets using dm_report_zones(). 55 */ 56 int dm_blk_report_zones(struct gendisk *disk, sector_t sector, 57 unsigned int nr_zones, report_zones_cb cb, void *data) 58 { 59 struct mapped_device *md = disk->private_data; 60 struct dm_table *map; 61 int srcu_idx, ret; 62 63 if (!md->zone_revalidate_map) { 64 /* Regular user context */ 65 if (dm_suspended_md(md)) 66 return -EAGAIN; 67 68 map = dm_get_live_table(md, &srcu_idx); 69 if (!map) 70 return -EIO; 71 } else { 72 /* Zone revalidation during __bind() */ 73 map = md->zone_revalidate_map; 74 } 75 76 ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); 77 78 if (!md->zone_revalidate_map) 79 dm_put_live_table(md, srcu_idx); 80 81 return ret; 82 } 83 84 static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, 85 void *data) 86 { 87 struct dm_report_zones_args *args = data; 88 sector_t sector_diff = args->tgt->begin - args->start; 89 90 /* 91 * Ignore zones beyond the target range. 92 */ 93 if (zone->start >= args->start + args->tgt->len) 94 return 0; 95 96 /* 97 * Remap the start sector and write pointer position of the zone 98 * to match its position in the target range. 99 */ 100 zone->start += sector_diff; 101 if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { 102 if (zone->cond == BLK_ZONE_COND_FULL) 103 zone->wp = zone->start + zone->len; 104 else if (zone->cond == BLK_ZONE_COND_EMPTY) 105 zone->wp = zone->start; 106 else 107 zone->wp += sector_diff; 108 } 109 110 args->next_sector = zone->start + zone->len; 111 return args->orig_cb(zone, args->zone_idx++, args->orig_data); 112 } 113 114 /* 115 * Helper for drivers of zoned targets to implement struct target_type 116 * report_zones operation. 117 */ 118 int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, 119 struct dm_report_zones_args *args, unsigned int nr_zones) 120 { 121 /* 122 * Set the target mapping start sector first so that 123 * dm_report_zones_cb() can correctly remap zone information. 124 */ 125 args->start = start; 126 127 return blkdev_report_zones(bdev, sector, nr_zones, 128 dm_report_zones_cb, args); 129 } 130 EXPORT_SYMBOL_GPL(dm_report_zones); 131 132 bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) 133 { 134 struct request_queue *q = md->queue; 135 136 if (!blk_queue_is_zoned(q)) 137 return false; 138 139 switch (bio_op(bio)) { 140 case REQ_OP_WRITE_ZEROES: 141 case REQ_OP_WRITE: 142 return !op_is_flush(bio->bi_opf) && bio_sectors(bio); 143 default: 144 return false; 145 } 146 } 147 148 /* 149 * Count conventional zones of a mapped zoned device. If the device 150 * only has conventional zones, do not expose it as zoned. 151 */ 152 static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx, 153 void *data) 154 { 155 unsigned int *nr_conv_zones = data; 156 157 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) 158 (*nr_conv_zones)++; 159 160 return 0; 161 } 162 163 /* 164 * Revalidate the zones of a mapped device to initialize resource necessary 165 * for zone append emulation. Note that we cannot simply use the block layer 166 * blk_revalidate_disk_zones() function here as the mapped device is suspended 167 * (this is called from __bind() context). 168 */ 169 static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) 170 { 171 struct gendisk *disk = md->disk; 172 int ret; 173 174 /* Revalidate only if something changed. */ 175 if (!disk->nr_zones || disk->nr_zones != md->nr_zones) 176 md->nr_zones = 0; 177 178 if (md->nr_zones) 179 return 0; 180 181 /* 182 * Our table is not live yet. So the call to dm_get_live_table() 183 * in dm_blk_report_zones() will fail. Set a temporary pointer to 184 * our table for dm_blk_report_zones() to use directly. 185 */ 186 md->zone_revalidate_map = t; 187 ret = blk_revalidate_disk_zones(disk); 188 md->zone_revalidate_map = NULL; 189 190 if (ret) { 191 DMERR("Revalidate zones failed %d", ret); 192 return ret; 193 } 194 195 md->nr_zones = disk->nr_zones; 196 197 return 0; 198 } 199 200 static int device_not_zone_append_capable(struct dm_target *ti, 201 struct dm_dev *dev, sector_t start, 202 sector_t len, void *data) 203 { 204 return !bdev_is_zoned(dev->bdev); 205 } 206 207 static bool dm_table_supports_zone_append(struct dm_table *t) 208 { 209 for (unsigned int i = 0; i < t->num_targets; i++) { 210 struct dm_target *ti = dm_table_get_target(t, i); 211 212 if (ti->emulate_zone_append) 213 return false; 214 215 if (!ti->type->iterate_devices || 216 ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) 217 return false; 218 } 219 220 return true; 221 } 222 223 int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q, 224 struct queue_limits *lim) 225 { 226 struct mapped_device *md = t->md; 227 struct gendisk *disk = md->disk; 228 unsigned int nr_conv_zones = 0; 229 int ret; 230 231 /* 232 * Check if zone append is natively supported, and if not, set the 233 * mapped device queue as needing zone append emulation. 234 */ 235 WARN_ON_ONCE(queue_is_mq(q)); 236 if (dm_table_supports_zone_append(t)) { 237 clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 238 } else { 239 set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 240 lim->max_zone_append_sectors = 0; 241 } 242 243 if (!get_capacity(md->disk)) 244 return 0; 245 246 /* 247 * Count conventional zones to check that the mapped device will indeed 248 * have sequential write required zones. 249 */ 250 md->zone_revalidate_map = t; 251 ret = dm_blk_report_zones(disk, 0, UINT_MAX, 252 dm_check_zoned_cb, &nr_conv_zones); 253 md->zone_revalidate_map = NULL; 254 if (ret < 0) { 255 DMERR("Check zoned failed %d", ret); 256 return ret; 257 } 258 259 /* 260 * If we only have conventional zones, expose the mapped device as 261 * a regular device. 262 */ 263 if (nr_conv_zones >= ret) { 264 lim->max_open_zones = 0; 265 lim->max_active_zones = 0; 266 lim->zoned = false; 267 clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 268 disk->nr_zones = 0; 269 return 0; 270 } 271 272 if (!md->disk->nr_zones) { 273 DMINFO("%s using %s zone append", 274 md->disk->disk_name, 275 queue_emulates_zone_append(q) ? "emulated" : "native"); 276 } 277 278 ret = dm_revalidate_zones(md, t); 279 if (ret < 0) 280 return ret; 281 282 if (!static_key_enabled(&zoned_enabled.key)) 283 static_branch_enable(&zoned_enabled); 284 return 0; 285 } 286 287 /* 288 * IO completion callback called from clone_endio(). 289 */ 290 void dm_zone_endio(struct dm_io *io, struct bio *clone) 291 { 292 struct mapped_device *md = io->md; 293 struct gendisk *disk = md->disk; 294 struct bio *orig_bio = io->orig_bio; 295 296 /* 297 * Get the offset within the zone of the written sector 298 * and add that to the original bio sector position. 299 */ 300 if (clone->bi_status == BLK_STS_OK && 301 bio_op(clone) == REQ_OP_ZONE_APPEND) { 302 sector_t mask = bdev_zone_sectors(disk->part0) - 1; 303 304 orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask; 305 } 306 307 return; 308 } 309