1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to generic helpers functions 4 */ 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/bio.h> 8 #include <linux/blkdev.h> 9 #include <linux/scatterlist.h> 10 11 #include "blk.h" 12 13 static struct bio *next_bio(struct bio *bio, unsigned int nr_pages, 14 gfp_t gfp) 15 { 16 struct bio *new = bio_alloc(gfp, nr_pages); 17 18 if (bio) { 19 bio_chain(bio, new); 20 submit_bio(bio); 21 } 22 23 return new; 24 } 25 26 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, 27 sector_t nr_sects, gfp_t gfp_mask, int flags, 28 struct bio **biop) 29 { 30 struct request_queue *q = bdev_get_queue(bdev); 31 struct bio *bio = *biop; 32 unsigned int granularity; 33 unsigned int op; 34 int alignment; 35 sector_t bs_mask; 36 37 if (!q) 38 return -ENXIO; 39 40 if (flags & BLKDEV_DISCARD_SECURE) { 41 if (!blk_queue_secure_erase(q)) 42 return -EOPNOTSUPP; 43 op = REQ_OP_SECURE_ERASE; 44 } else { 45 if (!blk_queue_discard(q)) 46 return -EOPNOTSUPP; 47 op = REQ_OP_DISCARD; 48 } 49 50 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 51 if ((sector | nr_sects) & bs_mask) 52 return -EINVAL; 53 54 /* Zero-sector (unknown) and one-sector granularities are the same. */ 55 granularity = max(q->limits.discard_granularity >> 9, 1U); 56 alignment = (bdev_discard_alignment(bdev) >> 9) % granularity; 57 58 while (nr_sects) { 59 unsigned int req_sects; 60 sector_t end_sect, tmp; 61 62 /* Make sure bi_size doesn't overflow */ 63 req_sects = min_t(sector_t, nr_sects, UINT_MAX >> 9); 64 65 /** 66 * If splitting a request, and the next starting sector would be 67 * misaligned, stop the discard at the previous aligned sector. 68 */ 69 end_sect = sector + req_sects; 70 tmp = end_sect; 71 if (req_sects < nr_sects && 72 sector_div(tmp, granularity) != alignment) { 73 end_sect = end_sect - alignment; 74 sector_div(end_sect, granularity); 75 end_sect = end_sect * granularity + alignment; 76 req_sects = end_sect - sector; 77 } 78 79 bio = next_bio(bio, 0, gfp_mask); 80 bio->bi_iter.bi_sector = sector; 81 bio_set_dev(bio, bdev); 82 bio_set_op_attrs(bio, op, 0); 83 84 bio->bi_iter.bi_size = req_sects << 9; 85 nr_sects -= req_sects; 86 sector = end_sect; 87 88 /* 89 * We can loop for a long time in here, if someone does 90 * full device discards (like mkfs). Be nice and allow 91 * us to schedule out to avoid softlocking if preempt 92 * is disabled. 93 */ 94 cond_resched(); 95 } 96 97 *biop = bio; 98 return 0; 99 } 100 EXPORT_SYMBOL(__blkdev_issue_discard); 101 102 /** 103 * blkdev_issue_discard - queue a discard 104 * @bdev: blockdev to issue discard for 105 * @sector: start sector 106 * @nr_sects: number of sectors to discard 107 * @gfp_mask: memory allocation flags (for bio_alloc) 108 * @flags: BLKDEV_DISCARD_* flags to control behaviour 109 * 110 * Description: 111 * Issue a discard request for the sectors in question. 112 */ 113 int blkdev_issue_discard(struct block_device *bdev, sector_t sector, 114 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) 115 { 116 struct bio *bio = NULL; 117 struct blk_plug plug; 118 int ret; 119 120 blk_start_plug(&plug); 121 ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags, 122 &bio); 123 if (!ret && bio) { 124 ret = submit_bio_wait(bio); 125 if (ret == -EOPNOTSUPP) 126 ret = 0; 127 bio_put(bio); 128 } 129 blk_finish_plug(&plug); 130 131 return ret; 132 } 133 EXPORT_SYMBOL(blkdev_issue_discard); 134 135 /** 136 * __blkdev_issue_write_same - generate number of bios with same page 137 * @bdev: target blockdev 138 * @sector: start sector 139 * @nr_sects: number of sectors to write 140 * @gfp_mask: memory allocation flags (for bio_alloc) 141 * @page: page containing data to write 142 * @biop: pointer to anchor bio 143 * 144 * Description: 145 * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page. 146 */ 147 static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector, 148 sector_t nr_sects, gfp_t gfp_mask, struct page *page, 149 struct bio **biop) 150 { 151 struct request_queue *q = bdev_get_queue(bdev); 152 unsigned int max_write_same_sectors; 153 struct bio *bio = *biop; 154 sector_t bs_mask; 155 156 if (!q) 157 return -ENXIO; 158 159 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 160 if ((sector | nr_sects) & bs_mask) 161 return -EINVAL; 162 163 if (!bdev_write_same(bdev)) 164 return -EOPNOTSUPP; 165 166 /* Ensure that max_write_same_sectors doesn't overflow bi_size */ 167 max_write_same_sectors = UINT_MAX >> 9; 168 169 while (nr_sects) { 170 bio = next_bio(bio, 1, gfp_mask); 171 bio->bi_iter.bi_sector = sector; 172 bio_set_dev(bio, bdev); 173 bio->bi_vcnt = 1; 174 bio->bi_io_vec->bv_page = page; 175 bio->bi_io_vec->bv_offset = 0; 176 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev); 177 bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0); 178 179 if (nr_sects > max_write_same_sectors) { 180 bio->bi_iter.bi_size = max_write_same_sectors << 9; 181 nr_sects -= max_write_same_sectors; 182 sector += max_write_same_sectors; 183 } else { 184 bio->bi_iter.bi_size = nr_sects << 9; 185 nr_sects = 0; 186 } 187 cond_resched(); 188 } 189 190 *biop = bio; 191 return 0; 192 } 193 194 /** 195 * blkdev_issue_write_same - queue a write same operation 196 * @bdev: target blockdev 197 * @sector: start sector 198 * @nr_sects: number of sectors to write 199 * @gfp_mask: memory allocation flags (for bio_alloc) 200 * @page: page containing data 201 * 202 * Description: 203 * Issue a write same request for the sectors in question. 204 */ 205 int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, 206 sector_t nr_sects, gfp_t gfp_mask, 207 struct page *page) 208 { 209 struct bio *bio = NULL; 210 struct blk_plug plug; 211 int ret; 212 213 blk_start_plug(&plug); 214 ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page, 215 &bio); 216 if (ret == 0 && bio) { 217 ret = submit_bio_wait(bio); 218 bio_put(bio); 219 } 220 blk_finish_plug(&plug); 221 return ret; 222 } 223 EXPORT_SYMBOL(blkdev_issue_write_same); 224 225 static int __blkdev_issue_write_zeroes(struct block_device *bdev, 226 sector_t sector, sector_t nr_sects, gfp_t gfp_mask, 227 struct bio **biop, unsigned flags) 228 { 229 struct bio *bio = *biop; 230 unsigned int max_write_zeroes_sectors; 231 struct request_queue *q = bdev_get_queue(bdev); 232 233 if (!q) 234 return -ENXIO; 235 236 /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */ 237 max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev); 238 239 if (max_write_zeroes_sectors == 0) 240 return -EOPNOTSUPP; 241 242 while (nr_sects) { 243 bio = next_bio(bio, 0, gfp_mask); 244 bio->bi_iter.bi_sector = sector; 245 bio_set_dev(bio, bdev); 246 bio->bi_opf = REQ_OP_WRITE_ZEROES; 247 if (flags & BLKDEV_ZERO_NOUNMAP) 248 bio->bi_opf |= REQ_NOUNMAP; 249 250 if (nr_sects > max_write_zeroes_sectors) { 251 bio->bi_iter.bi_size = max_write_zeroes_sectors << 9; 252 nr_sects -= max_write_zeroes_sectors; 253 sector += max_write_zeroes_sectors; 254 } else { 255 bio->bi_iter.bi_size = nr_sects << 9; 256 nr_sects = 0; 257 } 258 cond_resched(); 259 } 260 261 *biop = bio; 262 return 0; 263 } 264 265 /* 266 * Convert a number of 512B sectors to a number of pages. 267 * The result is limited to a number of pages that can fit into a BIO. 268 * Also make sure that the result is always at least 1 (page) for the cases 269 * where nr_sects is lower than the number of sectors in a page. 270 */ 271 static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects) 272 { 273 sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512); 274 275 return min(pages, (sector_t)BIO_MAX_PAGES); 276 } 277 278 static int __blkdev_issue_zero_pages(struct block_device *bdev, 279 sector_t sector, sector_t nr_sects, gfp_t gfp_mask, 280 struct bio **biop) 281 { 282 struct request_queue *q = bdev_get_queue(bdev); 283 struct bio *bio = *biop; 284 int bi_size = 0; 285 unsigned int sz; 286 287 if (!q) 288 return -ENXIO; 289 290 while (nr_sects != 0) { 291 bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects), 292 gfp_mask); 293 bio->bi_iter.bi_sector = sector; 294 bio_set_dev(bio, bdev); 295 bio_set_op_attrs(bio, REQ_OP_WRITE, 0); 296 297 while (nr_sects != 0) { 298 sz = min((sector_t) PAGE_SIZE, nr_sects << 9); 299 bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0); 300 nr_sects -= bi_size >> 9; 301 sector += bi_size >> 9; 302 if (bi_size < sz) 303 break; 304 } 305 cond_resched(); 306 } 307 308 *biop = bio; 309 return 0; 310 } 311 312 /** 313 * __blkdev_issue_zeroout - generate number of zero filed write bios 314 * @bdev: blockdev to issue 315 * @sector: start sector 316 * @nr_sects: number of sectors to write 317 * @gfp_mask: memory allocation flags (for bio_alloc) 318 * @biop: pointer to anchor bio 319 * @flags: controls detailed behavior 320 * 321 * Description: 322 * Zero-fill a block range, either using hardware offload or by explicitly 323 * writing zeroes to the device. 324 * 325 * If a device is using logical block provisioning, the underlying space will 326 * not be released if %flags contains BLKDEV_ZERO_NOUNMAP. 327 * 328 * If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return 329 * -EOPNOTSUPP if no explicit hardware offload for zeroing is provided. 330 */ 331 int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 332 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, 333 unsigned flags) 334 { 335 int ret; 336 sector_t bs_mask; 337 338 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 339 if ((sector | nr_sects) & bs_mask) 340 return -EINVAL; 341 342 ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask, 343 biop, flags); 344 if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK)) 345 return ret; 346 347 return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask, 348 biop); 349 } 350 EXPORT_SYMBOL(__blkdev_issue_zeroout); 351 352 /** 353 * blkdev_issue_zeroout - zero-fill a block range 354 * @bdev: blockdev to write 355 * @sector: start sector 356 * @nr_sects: number of sectors to write 357 * @gfp_mask: memory allocation flags (for bio_alloc) 358 * @flags: controls detailed behavior 359 * 360 * Description: 361 * Zero-fill a block range, either using hardware offload or by explicitly 362 * writing zeroes to the device. See __blkdev_issue_zeroout() for the 363 * valid values for %flags. 364 */ 365 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, 366 sector_t nr_sects, gfp_t gfp_mask, unsigned flags) 367 { 368 int ret = 0; 369 sector_t bs_mask; 370 struct bio *bio; 371 struct blk_plug plug; 372 bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev); 373 374 bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1; 375 if ((sector | nr_sects) & bs_mask) 376 return -EINVAL; 377 378 retry: 379 bio = NULL; 380 blk_start_plug(&plug); 381 if (try_write_zeroes) { 382 ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, 383 gfp_mask, &bio, flags); 384 } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { 385 ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects, 386 gfp_mask, &bio); 387 } else { 388 /* No zeroing offload support */ 389 ret = -EOPNOTSUPP; 390 } 391 if (ret == 0 && bio) { 392 ret = submit_bio_wait(bio); 393 bio_put(bio); 394 } 395 blk_finish_plug(&plug); 396 if (ret && try_write_zeroes) { 397 if (!(flags & BLKDEV_ZERO_NOFALLBACK)) { 398 try_write_zeroes = false; 399 goto retry; 400 } 401 if (!bdev_write_zeroes_sectors(bdev)) { 402 /* 403 * Zeroing offload support was indicated, but the 404 * device reported ILLEGAL REQUEST (for some devices 405 * there is no non-destructive way to verify whether 406 * WRITE ZEROES is actually supported). 407 */ 408 ret = -EOPNOTSUPP; 409 } 410 } 411 412 return ret; 413 } 414 EXPORT_SYMBOL(blkdev_issue_zeroout); 415