1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2021 Western Digital Corporation or its affiliates. 4 */ 5 6 #include <linux/blkdev.h> 7 #include <linux/mm.h> 8 #include <linux/sched/mm.h> 9 #include <linux/slab.h> 10 #include <linux/bitmap.h> 11 12 #include "dm-core.h" 13 14 #define DM_MSG_PREFIX "zone" 15 16 #define DM_ZONE_INVALID_WP_OFST UINT_MAX 17 18 /* 19 * For internal zone reports bypassing the top BIO submission path. 20 */ 21 static int dm_blk_do_report_zones(struct mapped_device *md, struct dm_table *t, 22 sector_t sector, unsigned int nr_zones, 23 report_zones_cb cb, void *data) 24 { 25 struct gendisk *disk = md->disk; 26 int ret; 27 struct dm_report_zones_args args = { 28 .next_sector = sector, 29 .orig_data = data, 30 .orig_cb = cb, 31 }; 32 33 do { 34 struct dm_target *tgt; 35 36 tgt = dm_table_find_target(t, args.next_sector); 37 if (WARN_ON_ONCE(!tgt->type->report_zones)) 38 return -EIO; 39 40 args.tgt = tgt; 41 ret = tgt->type->report_zones(tgt, &args, 42 nr_zones - args.zone_idx); 43 if (ret < 0) 44 return ret; 45 } while (args.zone_idx < nr_zones && 46 args.next_sector < get_capacity(disk)); 47 48 return args.zone_idx; 49 } 50 51 /* 52 * User facing dm device block device report zone operation. This calls the 53 * report_zones operation for each target of a device table. This operation is 54 * generally implemented by targets using dm_report_zones(). 55 */ 56 int dm_blk_report_zones(struct gendisk *disk, sector_t sector, 57 unsigned int nr_zones, report_zones_cb cb, void *data) 58 { 59 struct mapped_device *md = disk->private_data; 60 struct dm_table *map; 61 int srcu_idx, ret; 62 63 if (!md->zone_revalidate_map) { 64 /* Regular user context */ 65 if (dm_suspended_md(md)) 66 return -EAGAIN; 67 68 map = dm_get_live_table(md, &srcu_idx); 69 if (!map) 70 return -EIO; 71 } else { 72 /* Zone revalidation during __bind() */ 73 map = md->zone_revalidate_map; 74 } 75 76 ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data); 77 78 if (!md->zone_revalidate_map) 79 dm_put_live_table(md, srcu_idx); 80 81 return ret; 82 } 83 84 static int dm_report_zones_cb(struct blk_zone *zone, unsigned int idx, 85 void *data) 86 { 87 struct dm_report_zones_args *args = data; 88 sector_t sector_diff = args->tgt->begin - args->start; 89 90 /* 91 * Ignore zones beyond the target range. 92 */ 93 if (zone->start >= args->start + args->tgt->len) 94 return 0; 95 96 /* 97 * Remap the start sector and write pointer position of the zone 98 * to match its position in the target range. 99 */ 100 zone->start += sector_diff; 101 if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { 102 if (zone->cond == BLK_ZONE_COND_FULL) 103 zone->wp = zone->start + zone->len; 104 else if (zone->cond == BLK_ZONE_COND_EMPTY) 105 zone->wp = zone->start; 106 else 107 zone->wp += sector_diff; 108 } 109 110 args->next_sector = zone->start + zone->len; 111 return args->orig_cb(zone, args->zone_idx++, args->orig_data); 112 } 113 114 /* 115 * Helper for drivers of zoned targets to implement struct target_type 116 * report_zones operation. 117 */ 118 int dm_report_zones(struct block_device *bdev, sector_t start, sector_t sector, 119 struct dm_report_zones_args *args, unsigned int nr_zones) 120 { 121 /* 122 * Set the target mapping start sector first so that 123 * dm_report_zones_cb() can correctly remap zone information. 124 */ 125 args->start = start; 126 127 return blkdev_report_zones(bdev, sector, nr_zones, 128 dm_report_zones_cb, args); 129 } 130 EXPORT_SYMBOL_GPL(dm_report_zones); 131 132 bool dm_is_zone_write(struct mapped_device *md, struct bio *bio) 133 { 134 struct request_queue *q = md->queue; 135 136 if (!blk_queue_is_zoned(q)) 137 return false; 138 139 switch (bio_op(bio)) { 140 case REQ_OP_WRITE_ZEROES: 141 case REQ_OP_WRITE: 142 return !op_is_flush(bio->bi_opf) && bio_sectors(bio); 143 default: 144 return false; 145 } 146 } 147 148 /* 149 * Count conventional zones of a mapped zoned device. If the device 150 * only has conventional zones, do not expose it as zoned. 151 */ 152 static int dm_check_zoned_cb(struct blk_zone *zone, unsigned int idx, 153 void *data) 154 { 155 unsigned int *nr_conv_zones = data; 156 157 if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) 158 (*nr_conv_zones)++; 159 160 return 0; 161 } 162 163 static int dm_check_zoned(struct mapped_device *md, struct dm_table *t) 164 { 165 struct gendisk *disk = md->disk; 166 unsigned int nr_conv_zones = 0; 167 int ret; 168 169 /* Count conventional zones */ 170 md->zone_revalidate_map = t; 171 ret = dm_blk_report_zones(disk, 0, UINT_MAX, 172 dm_check_zoned_cb, &nr_conv_zones); 173 md->zone_revalidate_map = NULL; 174 if (ret < 0) { 175 DMERR("Check zoned failed %d", ret); 176 return ret; 177 } 178 179 /* 180 * If we only have conventional zones, expose the mapped device as 181 * a regular device. 182 */ 183 if (nr_conv_zones >= ret) { 184 disk->queue->limits.max_open_zones = 0; 185 disk->queue->limits.max_active_zones = 0; 186 disk->queue->limits.zoned = false; 187 clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 188 disk->nr_zones = 0; 189 } 190 191 return 0; 192 } 193 194 /* 195 * Revalidate the zones of a mapped device to initialize resource necessary 196 * for zone append emulation. Note that we cannot simply use the block layer 197 * blk_revalidate_disk_zones() function here as the mapped device is suspended 198 * (this is called from __bind() context). 199 */ 200 static int dm_revalidate_zones(struct mapped_device *md, struct dm_table *t) 201 { 202 struct gendisk *disk = md->disk; 203 int ret; 204 205 /* Revalidate only if something changed. */ 206 if (!disk->nr_zones || disk->nr_zones != md->nr_zones) 207 md->nr_zones = 0; 208 209 if (md->nr_zones) 210 return 0; 211 212 /* 213 * Our table is not live yet. So the call to dm_get_live_table() 214 * in dm_blk_report_zones() will fail. Set a temporary pointer to 215 * our table for dm_blk_report_zones() to use directly. 216 */ 217 md->zone_revalidate_map = t; 218 ret = blk_revalidate_disk_zones(disk); 219 md->zone_revalidate_map = NULL; 220 221 if (ret) { 222 DMERR("Revalidate zones failed %d", ret); 223 return ret; 224 } 225 226 md->nr_zones = disk->nr_zones; 227 228 return 0; 229 } 230 231 static int device_not_zone_append_capable(struct dm_target *ti, 232 struct dm_dev *dev, sector_t start, 233 sector_t len, void *data) 234 { 235 return !bdev_is_zoned(dev->bdev); 236 } 237 238 static bool dm_table_supports_zone_append(struct dm_table *t) 239 { 240 for (unsigned int i = 0; i < t->num_targets; i++) { 241 struct dm_target *ti = dm_table_get_target(t, i); 242 243 if (ti->emulate_zone_append) 244 return false; 245 246 if (!ti->type->iterate_devices || 247 ti->type->iterate_devices(ti, device_not_zone_append_capable, NULL)) 248 return false; 249 } 250 251 return true; 252 } 253 254 int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q) 255 { 256 struct mapped_device *md = t->md; 257 int ret; 258 259 /* 260 * Check if zone append is natively supported, and if not, set the 261 * mapped device queue as needing zone append emulation. 262 */ 263 WARN_ON_ONCE(queue_is_mq(q)); 264 if (dm_table_supports_zone_append(t)) { 265 clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 266 } else { 267 set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags); 268 blk_queue_max_zone_append_sectors(q, 0); 269 } 270 271 if (!get_capacity(md->disk)) 272 return 0; 273 274 /* 275 * Check that the mapped device will indeed be zoned, that is, that it 276 * has sequential write required zones. 277 */ 278 ret = dm_check_zoned(md, t); 279 if (ret) 280 return ret; 281 if (!blk_queue_is_zoned(q)) 282 return 0; 283 284 if (!md->disk->nr_zones) { 285 DMINFO("%s using %s zone append", 286 md->disk->disk_name, 287 queue_emulates_zone_append(q) ? "emulated" : "native"); 288 } 289 290 return dm_revalidate_zones(md, t); 291 } 292 293 /* 294 * IO completion callback called from clone_endio(). 295 */ 296 void dm_zone_endio(struct dm_io *io, struct bio *clone) 297 { 298 struct mapped_device *md = io->md; 299 struct gendisk *disk = md->disk; 300 struct bio *orig_bio = io->orig_bio; 301 302 /* 303 * Get the offset within the zone of the written sector 304 * and add that to the original bio sector position. 305 */ 306 if (clone->bi_status == BLK_STS_OK && 307 bio_op(clone) == REQ_OP_ZONE_APPEND) { 308 sector_t mask = bdev_zone_sectors(disk->part0) - 1; 309 310 orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask; 311 } 312 313 return; 314 } 315