1 /* 2 * Zoned block device handling 3 * 4 * Copyright (c) 2015, Hannes Reinecke 5 * Copyright (c) 2015, SUSE Linux GmbH 6 * 7 * Copyright (c) 2016, Damien Le Moal 8 * Copyright (c) 2016, Western Digital 9 */ 10 11 #include <linux/kernel.h> 12 #include <linux/module.h> 13 #include <linux/rbtree.h> 14 #include <linux/blkdev.h> 15 16 static inline sector_t blk_zone_start(struct request_queue *q, 17 sector_t sector) 18 { 19 sector_t zone_mask = blk_queue_zone_sectors(q) - 1; 20 21 return sector & ~zone_mask; 22 } 23 24 /* 25 * Return true if a request is a write requests that needs zone write locking. 26 */ 27 bool blk_req_needs_zone_write_lock(struct request *rq) 28 { 29 if (!rq->q->seq_zones_wlock) 30 return false; 31 32 if (blk_rq_is_passthrough(rq)) 33 return false; 34 35 switch (req_op(rq)) { 36 case REQ_OP_WRITE_ZEROES: 37 case REQ_OP_WRITE_SAME: 38 case REQ_OP_WRITE: 39 return blk_rq_zone_is_seq(rq); 40 default: 41 return false; 42 } 43 } 44 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock); 45 46 void __blk_req_zone_write_lock(struct request *rq) 47 { 48 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq), 49 rq->q->seq_zones_wlock))) 50 return; 51 52 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED); 53 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED; 54 } 55 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock); 56 57 void __blk_req_zone_write_unlock(struct request *rq) 58 { 59 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED; 60 if (rq->q->seq_zones_wlock) 61 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq), 62 rq->q->seq_zones_wlock)); 63 } 64 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock); 65 66 /* 67 * Check that a zone report belongs to the partition. 68 * If yes, fix its start sector and write pointer, copy it in the 69 * zone information array and return true. Return false otherwise. 70 */ 71 static bool blkdev_report_zone(struct block_device *bdev, 72 struct blk_zone *rep, 73 struct blk_zone *zone) 74 { 75 sector_t offset = get_start_sect(bdev); 76 77 if (rep->start < offset) 78 return false; 79 80 rep->start -= offset; 81 if (rep->start + rep->len > bdev->bd_part->nr_sects) 82 return false; 83 84 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL) 85 rep->wp = rep->start + rep->len; 86 else 87 rep->wp -= offset; 88 memcpy(zone, rep, sizeof(struct blk_zone)); 89 90 return true; 91 } 92 93 /** 94 * blkdev_report_zones - Get zones information 95 * @bdev: Target block device 96 * @sector: Sector from which to report zones 97 * @zones: Array of zone structures where to return the zones information 98 * @nr_zones: Number of zone structures in the zone array 99 * @gfp_mask: Memory allocation flags (for bio_alloc) 100 * 101 * Description: 102 * Get zone information starting from the zone containing @sector. 103 * The number of zone information reported may be less than the number 104 * requested by @nr_zones. The number of zones actually reported is 105 * returned in @nr_zones. 106 */ 107 int blkdev_report_zones(struct block_device *bdev, 108 sector_t sector, 109 struct blk_zone *zones, 110 unsigned int *nr_zones, 111 gfp_t gfp_mask) 112 { 113 struct request_queue *q = bdev_get_queue(bdev); 114 struct blk_zone_report_hdr *hdr; 115 unsigned int nrz = *nr_zones; 116 struct page *page; 117 unsigned int nr_rep; 118 size_t rep_bytes; 119 unsigned int nr_pages; 120 struct bio *bio; 121 struct bio_vec *bv; 122 unsigned int i, n, nz; 123 unsigned int ofst; 124 void *addr; 125 int ret; 126 127 if (!q) 128 return -ENXIO; 129 130 if (!blk_queue_is_zoned(q)) 131 return -EOPNOTSUPP; 132 133 if (!nrz) 134 return 0; 135 136 if (sector > bdev->bd_part->nr_sects) { 137 *nr_zones = 0; 138 return 0; 139 } 140 141 /* 142 * The zone report has a header. So make room for it in the 143 * payload. Also make sure that the report fits in a single BIO 144 * that will not be split down the stack. 145 */ 146 rep_bytes = sizeof(struct blk_zone_report_hdr) + 147 sizeof(struct blk_zone) * nrz; 148 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK; 149 if (rep_bytes > (queue_max_sectors(q) << 9)) 150 rep_bytes = queue_max_sectors(q) << 9; 151 152 nr_pages = min_t(unsigned int, BIO_MAX_PAGES, 153 rep_bytes >> PAGE_SHIFT); 154 nr_pages = min_t(unsigned int, nr_pages, 155 queue_max_segments(q)); 156 157 bio = bio_alloc(gfp_mask, nr_pages); 158 if (!bio) 159 return -ENOMEM; 160 161 bio_set_dev(bio, bdev); 162 bio->bi_iter.bi_sector = blk_zone_start(q, sector); 163 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); 164 165 for (i = 0; i < nr_pages; i++) { 166 page = alloc_page(gfp_mask); 167 if (!page) { 168 ret = -ENOMEM; 169 goto out; 170 } 171 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { 172 __free_page(page); 173 break; 174 } 175 } 176 177 if (i == 0) 178 ret = -ENOMEM; 179 else 180 ret = submit_bio_wait(bio); 181 if (ret) 182 goto out; 183 184 /* 185 * Process the report result: skip the header and go through the 186 * reported zones to fixup and fixup the zone information for 187 * partitions. At the same time, return the zone information into 188 * the zone array. 189 */ 190 n = 0; 191 nz = 0; 192 nr_rep = 0; 193 bio_for_each_segment_all(bv, bio, i) { 194 195 if (!bv->bv_page) 196 break; 197 198 addr = kmap_atomic(bv->bv_page); 199 200 /* Get header in the first page */ 201 ofst = 0; 202 if (!nr_rep) { 203 hdr = addr; 204 nr_rep = hdr->nr_zones; 205 ofst = sizeof(struct blk_zone_report_hdr); 206 } 207 208 /* Fixup and report zones */ 209 while (ofst < bv->bv_len && 210 n < nr_rep && nz < nrz) { 211 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz])) 212 nz++; 213 ofst += sizeof(struct blk_zone); 214 n++; 215 } 216 217 kunmap_atomic(addr); 218 219 if (n >= nr_rep || nz >= nrz) 220 break; 221 222 } 223 224 *nr_zones = nz; 225 out: 226 bio_for_each_segment_all(bv, bio, i) 227 __free_page(bv->bv_page); 228 bio_put(bio); 229 230 return ret; 231 } 232 EXPORT_SYMBOL_GPL(blkdev_report_zones); 233 234 /** 235 * blkdev_reset_zones - Reset zones write pointer 236 * @bdev: Target block device 237 * @sector: Start sector of the first zone to reset 238 * @nr_sectors: Number of sectors, at least the length of one zone 239 * @gfp_mask: Memory allocation flags (for bio_alloc) 240 * 241 * Description: 242 * Reset the write pointer of the zones contained in the range 243 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range 244 * is valid, but the specified range should not contain conventional zones. 245 */ 246 int blkdev_reset_zones(struct block_device *bdev, 247 sector_t sector, sector_t nr_sectors, 248 gfp_t gfp_mask) 249 { 250 struct request_queue *q = bdev_get_queue(bdev); 251 sector_t zone_sectors; 252 sector_t end_sector = sector + nr_sectors; 253 struct bio *bio; 254 int ret; 255 256 if (!q) 257 return -ENXIO; 258 259 if (!blk_queue_is_zoned(q)) 260 return -EOPNOTSUPP; 261 262 if (end_sector > bdev->bd_part->nr_sects) 263 /* Out of range */ 264 return -EINVAL; 265 266 /* Check alignment (handle eventual smaller last zone) */ 267 zone_sectors = blk_queue_zone_sectors(q); 268 if (sector & (zone_sectors - 1)) 269 return -EINVAL; 270 271 if ((nr_sectors & (zone_sectors - 1)) && 272 end_sector != bdev->bd_part->nr_sects) 273 return -EINVAL; 274 275 while (sector < end_sector) { 276 277 bio = bio_alloc(gfp_mask, 0); 278 bio->bi_iter.bi_sector = sector; 279 bio_set_dev(bio, bdev); 280 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0); 281 282 ret = submit_bio_wait(bio); 283 bio_put(bio); 284 285 if (ret) 286 return ret; 287 288 sector += zone_sectors; 289 290 /* This may take a while, so be nice to others */ 291 cond_resched(); 292 293 } 294 295 return 0; 296 } 297 EXPORT_SYMBOL_GPL(blkdev_reset_zones); 298 299 /* 300 * BLKREPORTZONE ioctl processing. 301 * Called from blkdev_ioctl. 302 */ 303 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode, 304 unsigned int cmd, unsigned long arg) 305 { 306 void __user *argp = (void __user *)arg; 307 struct request_queue *q; 308 struct blk_zone_report rep; 309 struct blk_zone *zones; 310 int ret; 311 312 if (!argp) 313 return -EINVAL; 314 315 q = bdev_get_queue(bdev); 316 if (!q) 317 return -ENXIO; 318 319 if (!blk_queue_is_zoned(q)) 320 return -ENOTTY; 321 322 if (!capable(CAP_SYS_ADMIN)) 323 return -EACCES; 324 325 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report))) 326 return -EFAULT; 327 328 if (!rep.nr_zones) 329 return -EINVAL; 330 331 if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone)) 332 return -ERANGE; 333 334 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone), 335 GFP_KERNEL | __GFP_ZERO); 336 if (!zones) 337 return -ENOMEM; 338 339 ret = blkdev_report_zones(bdev, rep.sector, 340 zones, &rep.nr_zones, 341 GFP_KERNEL); 342 if (ret) 343 goto out; 344 345 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) { 346 ret = -EFAULT; 347 goto out; 348 } 349 350 if (rep.nr_zones) { 351 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones, 352 sizeof(struct blk_zone) * rep.nr_zones)) 353 ret = -EFAULT; 354 } 355 356 out: 357 kvfree(zones); 358 359 return ret; 360 } 361 362 /* 363 * BLKRESETZONE ioctl processing. 364 * Called from blkdev_ioctl. 365 */ 366 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode, 367 unsigned int cmd, unsigned long arg) 368 { 369 void __user *argp = (void __user *)arg; 370 struct request_queue *q; 371 struct blk_zone_range zrange; 372 373 if (!argp) 374 return -EINVAL; 375 376 q = bdev_get_queue(bdev); 377 if (!q) 378 return -ENXIO; 379 380 if (!blk_queue_is_zoned(q)) 381 return -ENOTTY; 382 383 if (!capable(CAP_SYS_ADMIN)) 384 return -EACCES; 385 386 if (!(mode & FMODE_WRITE)) 387 return -EBADF; 388 389 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range))) 390 return -EFAULT; 391 392 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors, 393 GFP_KERNEL); 394 } 395