1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to segment and merge handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/bio.h> 8 #include <linux/blkdev.h> 9 #include <linux/blk-integrity.h> 10 #include <linux/part_stat.h> 11 #include <linux/blk-cgroup.h> 12 13 #include <trace/events/block.h> 14 15 #include "blk.h" 16 #include "blk-mq-sched.h" 17 #include "blk-rq-qos.h" 18 #include "blk-throttle.h" 19 20 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) 21 { 22 *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 23 } 24 25 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) 26 { 27 struct bvec_iter iter = bio->bi_iter; 28 int idx; 29 30 bio_get_first_bvec(bio, bv); 31 if (bv->bv_len == bio->bi_iter.bi_size) 32 return; /* this bio only has a single bvec */ 33 34 bio_advance_iter(bio, &iter, iter.bi_size); 35 36 if (!iter.bi_bvec_done) 37 idx = iter.bi_idx - 1; 38 else /* in the middle of bvec */ 39 idx = iter.bi_idx; 40 41 *bv = bio->bi_io_vec[idx]; 42 43 /* 44 * iter.bi_bvec_done records actual length of the last bvec 45 * if this bio ends in the middle of one io vector 46 */ 47 if (iter.bi_bvec_done) 48 bv->bv_len = iter.bi_bvec_done; 49 } 50 51 static inline bool bio_will_gap(struct request_queue *q, 52 struct request *prev_rq, struct bio *prev, struct bio *next) 53 { 54 struct bio_vec pb, nb; 55 56 if (!bio_has_data(prev) || !queue_virt_boundary(q)) 57 return false; 58 59 /* 60 * Don't merge if the 1st bio starts with non-zero offset, otherwise it 61 * is quite difficult to respect the sg gap limit. We work hard to 62 * merge a huge number of small single bios in case of mkfs. 63 */ 64 if (prev_rq) 65 bio_get_first_bvec(prev_rq->bio, &pb); 66 else 67 bio_get_first_bvec(prev, &pb); 68 if (pb.bv_offset & queue_virt_boundary(q)) 69 return true; 70 71 /* 72 * We don't need to worry about the situation that the merged segment 73 * ends in unaligned virt boundary: 74 * 75 * - if 'pb' ends aligned, the merged segment ends aligned 76 * - if 'pb' ends unaligned, the next bio must include 77 * one single bvec of 'nb', otherwise the 'nb' can't 78 * merge with 'pb' 79 */ 80 bio_get_last_bvec(prev, &pb); 81 bio_get_first_bvec(next, &nb); 82 if (biovec_phys_mergeable(q, &pb, &nb)) 83 return false; 84 return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset); 85 } 86 87 static inline bool req_gap_back_merge(struct request *req, struct bio *bio) 88 { 89 return bio_will_gap(req->q, req, req->biotail, bio); 90 } 91 92 static inline bool req_gap_front_merge(struct request *req, struct bio *bio) 93 { 94 return bio_will_gap(req->q, NULL, bio, req->bio); 95 } 96 97 /* 98 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size 99 * is defined as 'unsigned int', meantime it has to be aligned to with the 100 * logical block size, which is the minimum accepted unit by hardware. 101 */ 102 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim) 103 { 104 return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT; 105 } 106 107 /* 108 * bio_submit_split_bioset - Submit a bio, splitting it at a designated sector 109 * @bio: the original bio to be submitted and split 110 * @split_sectors: the sector count at which to split 111 * @bs: the bio set used for allocating the new split bio 112 * 113 * The original bio is modified to contain the remaining sectors and submitted. 114 * The caller is responsible for submitting the returned bio. 115 * 116 * If succeed, the newly allocated bio representing the initial part will be 117 * returned, on failure NULL will be returned and original bio will fail. 118 */ 119 struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors, 120 struct bio_set *bs) 121 { 122 struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs); 123 124 if (IS_ERR(split)) { 125 bio->bi_status = errno_to_blk_status(PTR_ERR(split)); 126 bio_endio(bio); 127 return NULL; 128 } 129 130 bio_chain(split, bio); 131 trace_block_split(split, bio->bi_iter.bi_sector); 132 WARN_ON_ONCE(bio_zone_write_plugging(bio)); 133 134 if (should_fail_bio(bio)) 135 bio_io_error(bio); 136 else if (!blk_throtl_bio(bio)) 137 submit_bio_noacct_nocheck(bio, true); 138 139 return split; 140 } 141 EXPORT_SYMBOL_GPL(bio_submit_split_bioset); 142 143 static struct bio *bio_submit_split(struct bio *bio, int split_sectors) 144 { 145 if (unlikely(split_sectors < 0)) { 146 bio->bi_status = errno_to_blk_status(split_sectors); 147 bio_endio(bio); 148 return NULL; 149 } 150 151 if (split_sectors) { 152 bio = bio_submit_split_bioset(bio, split_sectors, 153 &bio->bi_bdev->bd_disk->bio_split); 154 if (bio) 155 bio->bi_opf |= REQ_NOMERGE; 156 } 157 158 return bio; 159 } 160 161 struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, 162 unsigned *nsegs) 163 { 164 unsigned int max_discard_sectors, granularity; 165 sector_t tmp; 166 unsigned split_sectors; 167 168 *nsegs = 1; 169 170 granularity = max(lim->discard_granularity >> 9, 1U); 171 172 max_discard_sectors = 173 min(lim->max_discard_sectors, bio_allowed_max_sectors(lim)); 174 max_discard_sectors -= max_discard_sectors % granularity; 175 if (unlikely(!max_discard_sectors)) 176 return bio; 177 178 if (bio_sectors(bio) <= max_discard_sectors) 179 return bio; 180 181 split_sectors = max_discard_sectors; 182 183 /* 184 * If the next starting sector would be misaligned, stop the discard at 185 * the previous aligned sector. 186 */ 187 tmp = bio->bi_iter.bi_sector + split_sectors - 188 ((lim->discard_alignment >> 9) % granularity); 189 tmp = sector_div(tmp, granularity); 190 191 if (split_sectors > tmp) 192 split_sectors -= tmp; 193 194 return bio_submit_split(bio, split_sectors); 195 } 196 197 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim, 198 bool is_atomic) 199 { 200 /* 201 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if 202 * both non-zero. 203 */ 204 if (is_atomic && lim->atomic_write_boundary_sectors) 205 return lim->atomic_write_boundary_sectors; 206 207 return lim->chunk_sectors; 208 } 209 210 /* 211 * Return the maximum number of sectors from the start of a bio that may be 212 * submitted as a single request to a block device. If enough sectors remain, 213 * align the end to the physical block size. Otherwise align the end to the 214 * logical block size. This approach minimizes the number of non-aligned 215 * requests that are submitted to a block device if the start of a bio is not 216 * aligned to a physical block boundary. 217 */ 218 static inline unsigned get_max_io_size(struct bio *bio, 219 const struct queue_limits *lim) 220 { 221 unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT; 222 unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT; 223 bool is_atomic = bio->bi_opf & REQ_ATOMIC; 224 unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic); 225 unsigned max_sectors, start, end; 226 227 /* 228 * We ignore lim->max_sectors for atomic writes because it may less 229 * than the actual bio size, which we cannot tolerate. 230 */ 231 if (bio_op(bio) == REQ_OP_WRITE_ZEROES) 232 max_sectors = lim->max_write_zeroes_sectors; 233 else if (is_atomic) 234 max_sectors = lim->atomic_write_max_sectors; 235 else 236 max_sectors = lim->max_sectors; 237 238 if (boundary_sectors) { 239 max_sectors = min(max_sectors, 240 blk_boundary_sectors_left(bio->bi_iter.bi_sector, 241 boundary_sectors)); 242 } 243 244 start = bio->bi_iter.bi_sector & (pbs - 1); 245 end = (start + max_sectors) & ~(pbs - 1); 246 if (end > start) 247 return end - start; 248 return max_sectors & ~(lbs - 1); 249 } 250 251 /** 252 * bvec_split_segs - verify whether or not a bvec should be split in the middle 253 * @lim: [in] queue limits to split based on 254 * @bv: [in] bvec to examine 255 * @nsegs: [in,out] Number of segments in the bio being built. Incremented 256 * by the number of segments from @bv that may be appended to that 257 * bio without exceeding @max_segs 258 * @bytes: [in,out] Number of bytes in the bio being built. Incremented 259 * by the number of bytes from @bv that may be appended to that 260 * bio without exceeding @max_bytes 261 * @max_segs: [in] upper bound for *@nsegs 262 * @max_bytes: [in] upper bound for *@bytes 263 * 264 * When splitting a bio, it can happen that a bvec is encountered that is too 265 * big to fit in a single segment and hence that it has to be split in the 266 * middle. This function verifies whether or not that should happen. The value 267 * %true is returned if and only if appending the entire @bv to a bio with 268 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for 269 * the block driver. 270 */ 271 static bool bvec_split_segs(const struct queue_limits *lim, 272 const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes, 273 unsigned max_segs, unsigned max_bytes) 274 { 275 unsigned max_len = max_bytes - *bytes; 276 unsigned len = min(bv->bv_len, max_len); 277 unsigned total_len = 0; 278 unsigned seg_size = 0; 279 280 while (len && *nsegs < max_segs) { 281 seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len); 282 283 (*nsegs)++; 284 total_len += seg_size; 285 len -= seg_size; 286 287 if ((bv->bv_offset + total_len) & lim->virt_boundary_mask) 288 break; 289 } 290 291 *bytes += total_len; 292 293 /* tell the caller to split the bvec if it is too big to fit */ 294 return len > 0 || bv->bv_len > max_len; 295 } 296 297 static unsigned int bio_split_alignment(struct bio *bio, 298 const struct queue_limits *lim) 299 { 300 if (op_is_write(bio_op(bio)) && lim->zone_write_granularity) 301 return lim->zone_write_granularity; 302 return lim->logical_block_size; 303 } 304 305 static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv, 306 struct bio_vec *bv) 307 { 308 return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len); 309 } 310 311 /** 312 * bio_split_io_at - check if and where to split a bio 313 * @bio: [in] bio to be split 314 * @lim: [in] queue limits to split based on 315 * @segs: [out] number of segments in the bio with the first half of the sectors 316 * @max_bytes: [in] maximum number of bytes per bio 317 * @len_align_mask: [in] length alignment mask for each vector 318 * 319 * Find out if @bio needs to be split to fit the queue limits in @lim and a 320 * maximum size of @max_bytes. Returns a negative error number if @bio can't be 321 * split, 0 if the bio doesn't have to be split, or a positive sector offset if 322 * @bio needs to be split. 323 */ 324 int bio_split_io_at(struct bio *bio, const struct queue_limits *lim, 325 unsigned *segs, unsigned max_bytes, unsigned len_align_mask) 326 { 327 struct bio_vec bv, bvprv, *bvprvp = NULL; 328 unsigned nsegs = 0, bytes = 0, gaps = 0; 329 struct bvec_iter iter; 330 331 bio_for_each_bvec(bv, bio, iter) { 332 if (bv.bv_offset & lim->dma_alignment || 333 bv.bv_len & len_align_mask) 334 return -EINVAL; 335 336 /* 337 * If the queue doesn't support SG gaps and adding this 338 * offset would create a gap, disallow it. 339 */ 340 if (bvprvp) { 341 if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) 342 goto split; 343 gaps |= bvec_seg_gap(bvprvp, &bv); 344 } 345 346 if (nsegs < lim->max_segments && 347 bytes + bv.bv_len <= max_bytes && 348 bv.bv_offset + bv.bv_len <= lim->max_fast_segment_size) { 349 nsegs++; 350 bytes += bv.bv_len; 351 } else { 352 if (bvec_split_segs(lim, &bv, &nsegs, &bytes, 353 lim->max_segments, max_bytes)) 354 goto split; 355 } 356 357 bvprv = bv; 358 bvprvp = &bvprv; 359 } 360 361 *segs = nsegs; 362 bio->bi_bvec_gap_bit = ffs(gaps); 363 return 0; 364 split: 365 if (bio->bi_opf & REQ_ATOMIC) 366 return -EINVAL; 367 368 /* 369 * We can't sanely support splitting for a REQ_NOWAIT bio. End it 370 * with EAGAIN if splitting is required and return an error pointer. 371 */ 372 if (bio->bi_opf & REQ_NOWAIT) 373 return -EAGAIN; 374 375 *segs = nsegs; 376 377 /* 378 * Individual bvecs might not be logical block aligned. Round down the 379 * split size so that each bio is properly block size aligned, even if 380 * we do not use the full hardware limits. 381 * 382 * It is possible to submit a bio that can't be split into a valid io: 383 * there may either be too many discontiguous vectors for the max 384 * segments limit, or contain virtual boundary gaps without having a 385 * valid block sized split. A zero byte result means one of those 386 * conditions occured. 387 */ 388 bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim)); 389 if (!bytes) 390 return -EINVAL; 391 392 /* 393 * Bio splitting may cause subtle trouble such as hang when doing sync 394 * iopoll in direct IO routine. Given performance gain of iopoll for 395 * big IO can be trival, disable iopoll when split needed. 396 */ 397 bio_clear_polled(bio); 398 bio->bi_bvec_gap_bit = ffs(gaps); 399 return bytes >> SECTOR_SHIFT; 400 } 401 EXPORT_SYMBOL_GPL(bio_split_io_at); 402 403 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, 404 unsigned *nr_segs) 405 { 406 return bio_submit_split(bio, 407 bio_split_rw_at(bio, lim, nr_segs, 408 get_max_io_size(bio, lim) << SECTOR_SHIFT)); 409 } 410 411 /* 412 * REQ_OP_ZONE_APPEND bios must never be split by the block layer. 413 * 414 * But we want the nr_segs calculation provided by bio_split_rw_at, and having 415 * a good sanity check that the submitter built the bio correctly is nice to 416 * have as well. 417 */ 418 struct bio *bio_split_zone_append(struct bio *bio, 419 const struct queue_limits *lim, unsigned *nr_segs) 420 { 421 int split_sectors; 422 423 split_sectors = bio_split_rw_at(bio, lim, nr_segs, 424 lim->max_zone_append_sectors << SECTOR_SHIFT); 425 if (WARN_ON_ONCE(split_sectors > 0)) 426 split_sectors = -EINVAL; 427 return bio_submit_split(bio, split_sectors); 428 } 429 430 struct bio *bio_split_write_zeroes(struct bio *bio, 431 const struct queue_limits *lim, unsigned *nsegs) 432 { 433 unsigned int max_sectors = get_max_io_size(bio, lim); 434 435 *nsegs = 0; 436 437 /* 438 * An unset limit should normally not happen, as bio submission is keyed 439 * off having a non-zero limit. But SCSI can clear the limit in the 440 * I/O completion handler, and we can race and see this. Splitting to a 441 * zero limit obviously doesn't make sense, so band-aid it here. 442 */ 443 if (!max_sectors) 444 return bio; 445 if (bio_sectors(bio) <= max_sectors) 446 return bio; 447 return bio_submit_split(bio, max_sectors); 448 } 449 450 /** 451 * bio_split_to_limits - split a bio to fit the queue limits 452 * @bio: bio to be split 453 * 454 * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and 455 * if so split off a bio fitting the limits from the beginning of @bio and 456 * return it. @bio is shortened to the remainder and re-submitted. 457 * 458 * The split bio is allocated from @q->bio_split, which is provided by the 459 * block layer. 460 */ 461 struct bio *bio_split_to_limits(struct bio *bio) 462 { 463 unsigned int nr_segs; 464 465 return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs); 466 } 467 EXPORT_SYMBOL(bio_split_to_limits); 468 469 unsigned int blk_recalc_rq_segments(struct request *rq) 470 { 471 unsigned int nr_phys_segs = 0; 472 unsigned int bytes = 0; 473 struct req_iterator iter; 474 struct bio_vec bv; 475 476 if (!rq->bio) 477 return 0; 478 479 switch (bio_op(rq->bio)) { 480 case REQ_OP_DISCARD: 481 case REQ_OP_SECURE_ERASE: 482 if (queue_max_discard_segments(rq->q) > 1) { 483 struct bio *bio = rq->bio; 484 485 for_each_bio(bio) 486 nr_phys_segs++; 487 return nr_phys_segs; 488 } 489 return 1; 490 case REQ_OP_WRITE_ZEROES: 491 return 0; 492 default: 493 break; 494 } 495 496 rq_for_each_bvec(bv, rq, iter) 497 bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes, 498 UINT_MAX, UINT_MAX); 499 return nr_phys_segs; 500 } 501 502 static inline unsigned int blk_rq_get_max_sectors(struct request *rq, 503 sector_t offset) 504 { 505 struct request_queue *q = rq->q; 506 struct queue_limits *lim = &q->limits; 507 unsigned int max_sectors, boundary_sectors; 508 bool is_atomic = rq->cmd_flags & REQ_ATOMIC; 509 510 if (blk_rq_is_passthrough(rq)) 511 return q->limits.max_hw_sectors; 512 513 boundary_sectors = blk_boundary_sectors(lim, is_atomic); 514 max_sectors = blk_queue_get_max_sectors(rq); 515 516 if (!boundary_sectors || 517 req_op(rq) == REQ_OP_DISCARD || 518 req_op(rq) == REQ_OP_SECURE_ERASE) 519 return max_sectors; 520 return min(max_sectors, 521 blk_boundary_sectors_left(offset, boundary_sectors)); 522 } 523 524 static inline int ll_new_hw_segment(struct request *req, struct bio *bio, 525 unsigned int nr_phys_segs) 526 { 527 if (!blk_cgroup_mergeable(req, bio)) 528 goto no_merge; 529 530 if (blk_integrity_merge_bio(req->q, req, bio) == false) 531 goto no_merge; 532 533 /* discard request merge won't add new segment */ 534 if (req_op(req) == REQ_OP_DISCARD) 535 return 1; 536 537 if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req)) 538 goto no_merge; 539 540 /* 541 * This will form the start of a new hw segment. Bump both 542 * counters. 543 */ 544 req->nr_phys_segments += nr_phys_segs; 545 if (bio_integrity(bio)) 546 req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q, 547 bio); 548 return 1; 549 550 no_merge: 551 req_set_nomerge(req->q, req); 552 return 0; 553 } 554 555 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) 556 { 557 if (req_gap_back_merge(req, bio)) 558 return 0; 559 if (blk_integrity_rq(req) && 560 integrity_req_gap_back_merge(req, bio)) 561 return 0; 562 if (!bio_crypt_ctx_back_mergeable(req, bio)) 563 return 0; 564 if (blk_rq_sectors(req) + bio_sectors(bio) > 565 blk_rq_get_max_sectors(req, blk_rq_pos(req))) { 566 req_set_nomerge(req->q, req); 567 return 0; 568 } 569 570 return ll_new_hw_segment(req, bio, nr_segs); 571 } 572 573 static int ll_front_merge_fn(struct request *req, struct bio *bio, 574 unsigned int nr_segs) 575 { 576 if (req_gap_front_merge(req, bio)) 577 return 0; 578 if (blk_integrity_rq(req) && 579 integrity_req_gap_front_merge(req, bio)) 580 return 0; 581 if (!bio_crypt_ctx_front_mergeable(req, bio)) 582 return 0; 583 if (blk_rq_sectors(req) + bio_sectors(bio) > 584 blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) { 585 req_set_nomerge(req->q, req); 586 return 0; 587 } 588 589 return ll_new_hw_segment(req, bio, nr_segs); 590 } 591 592 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, 593 struct request *next) 594 { 595 unsigned short segments = blk_rq_nr_discard_segments(req); 596 597 if (segments >= queue_max_discard_segments(q)) 598 goto no_merge; 599 if (blk_rq_sectors(req) + bio_sectors(next->bio) > 600 blk_rq_get_max_sectors(req, blk_rq_pos(req))) 601 goto no_merge; 602 603 req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next); 604 return true; 605 no_merge: 606 req_set_nomerge(q, req); 607 return false; 608 } 609 610 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 611 struct request *next) 612 { 613 int total_phys_segments; 614 615 if (req_gap_back_merge(req, next->bio)) 616 return 0; 617 618 /* 619 * Will it become too large? 620 */ 621 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 622 blk_rq_get_max_sectors(req, blk_rq_pos(req))) 623 return 0; 624 625 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 626 if (total_phys_segments > blk_rq_get_max_segments(req)) 627 return 0; 628 629 if (!blk_cgroup_mergeable(req, next->bio)) 630 return 0; 631 632 if (blk_integrity_merge_rq(q, req, next) == false) 633 return 0; 634 635 if (!bio_crypt_ctx_merge_rq(req, next)) 636 return 0; 637 638 /* Merge is OK... */ 639 req->nr_phys_segments = total_phys_segments; 640 req->nr_integrity_segments += next->nr_integrity_segments; 641 return 1; 642 } 643 644 /** 645 * blk_rq_set_mixed_merge - mark a request as mixed merge 646 * @rq: request to mark as mixed merge 647 * 648 * Description: 649 * @rq is about to be mixed merged. Make sure the attributes 650 * which can be mixed are set in each bio and mark @rq as mixed 651 * merged. 652 */ 653 static void blk_rq_set_mixed_merge(struct request *rq) 654 { 655 blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; 656 struct bio *bio; 657 658 if (rq->rq_flags & RQF_MIXED_MERGE) 659 return; 660 661 /* 662 * @rq will no longer represent mixable attributes for all the 663 * contained bios. It will just track those of the first one. 664 * Distributes the attributs to each bio. 665 */ 666 for (bio = rq->bio; bio; bio = bio->bi_next) { 667 WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) && 668 (bio->bi_opf & REQ_FAILFAST_MASK) != ff); 669 bio->bi_opf |= ff; 670 } 671 rq->rq_flags |= RQF_MIXED_MERGE; 672 } 673 674 static inline blk_opf_t bio_failfast(const struct bio *bio) 675 { 676 if (bio->bi_opf & REQ_RAHEAD) 677 return REQ_FAILFAST_MASK; 678 679 return bio->bi_opf & REQ_FAILFAST_MASK; 680 } 681 682 /* 683 * After we are marked as MIXED_MERGE, any new RA bio has to be updated 684 * as failfast, and request's failfast has to be updated in case of 685 * front merge. 686 */ 687 static inline void blk_update_mixed_merge(struct request *req, 688 struct bio *bio, bool front_merge) 689 { 690 if (req->rq_flags & RQF_MIXED_MERGE) { 691 if (bio->bi_opf & REQ_RAHEAD) 692 bio->bi_opf |= REQ_FAILFAST_MASK; 693 694 if (front_merge) { 695 req->cmd_flags &= ~REQ_FAILFAST_MASK; 696 req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK; 697 } 698 } 699 } 700 701 static void blk_account_io_merge_request(struct request *req) 702 { 703 if (req->rq_flags & RQF_IO_STAT) { 704 part_stat_lock(); 705 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); 706 part_stat_local_dec(req->part, 707 in_flight[op_is_write(req_op(req))]); 708 part_stat_unlock(); 709 } 710 } 711 712 static enum elv_merge blk_try_req_merge(struct request *req, 713 struct request *next) 714 { 715 if (blk_discard_mergable(req)) 716 return ELEVATOR_DISCARD_MERGE; 717 else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) 718 return ELEVATOR_BACK_MERGE; 719 720 return ELEVATOR_NO_MERGE; 721 } 722 723 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq, 724 struct bio *bio) 725 { 726 return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC); 727 } 728 729 static bool blk_atomic_write_mergeable_rqs(struct request *rq, 730 struct request *next) 731 { 732 return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC); 733 } 734 735 u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next, 736 u8 gaps_bit) 737 { 738 struct bio_vec pb, nb; 739 740 if (!bio_has_data(prev)) 741 return 0; 742 743 gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit); 744 gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit); 745 746 bio_get_last_bvec(prev, &pb); 747 bio_get_first_bvec(next, &nb); 748 if (!biovec_phys_mergeable(q, &pb, &nb)) 749 gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb))); 750 return gaps_bit; 751 } 752 753 /* 754 * For non-mq, this has to be called with the request spinlock acquired. 755 * For mq with scheduling, the appropriate queue wide lock should be held. 756 */ 757 static struct request *attempt_merge(struct request_queue *q, 758 struct request *req, struct request *next) 759 { 760 if (!rq_mergeable(req) || !rq_mergeable(next)) 761 return NULL; 762 763 if (req_op(req) != req_op(next)) 764 return NULL; 765 766 if (req->bio->bi_write_hint != next->bio->bi_write_hint) 767 return NULL; 768 if (req->bio->bi_write_stream != next->bio->bi_write_stream) 769 return NULL; 770 if (req->bio->bi_ioprio != next->bio->bi_ioprio) 771 return NULL; 772 if (!blk_atomic_write_mergeable_rqs(req, next)) 773 return NULL; 774 775 /* 776 * If we are allowed to merge, then append bio list 777 * from next to rq and release next. merge_requests_fn 778 * will have updated segment counts, update sector 779 * counts here. Handle DISCARDs separately, as they 780 * have separate settings. 781 */ 782 783 switch (blk_try_req_merge(req, next)) { 784 case ELEVATOR_DISCARD_MERGE: 785 if (!req_attempt_discard_merge(q, req, next)) 786 return NULL; 787 break; 788 case ELEVATOR_BACK_MERGE: 789 if (!ll_merge_requests_fn(q, req, next)) 790 return NULL; 791 break; 792 default: 793 return NULL; 794 } 795 796 /* 797 * If failfast settings disagree or any of the two is already 798 * a mixed merge, mark both as mixed before proceeding. This 799 * makes sure that all involved bios have mixable attributes 800 * set properly. 801 */ 802 if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) || 803 (req->cmd_flags & REQ_FAILFAST_MASK) != 804 (next->cmd_flags & REQ_FAILFAST_MASK)) { 805 blk_rq_set_mixed_merge(req); 806 blk_rq_set_mixed_merge(next); 807 } 808 809 /* 810 * At this point we have either done a back merge or front merge. We 811 * need the smaller start_time_ns of the merged requests to be the 812 * current request for accounting purposes. 813 */ 814 if (next->start_time_ns < req->start_time_ns) 815 req->start_time_ns = next->start_time_ns; 816 817 req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio, 818 min_not_zero(next->phys_gap_bit, 819 req->phys_gap_bit)); 820 req->biotail->bi_next = next->bio; 821 req->biotail = next->biotail; 822 823 req->__data_len += blk_rq_bytes(next); 824 825 if (!blk_discard_mergable(req)) 826 elv_merge_requests(q, req, next); 827 828 blk_crypto_rq_put_keyslot(next); 829 830 /* 831 * 'next' is going away, so update stats accordingly 832 */ 833 blk_account_io_merge_request(next); 834 835 trace_block_rq_merge(next); 836 837 /* 838 * ownership of bio passed from next to req, return 'next' for 839 * the caller to free 840 */ 841 next->bio = NULL; 842 return next; 843 } 844 845 static struct request *attempt_back_merge(struct request_queue *q, 846 struct request *rq) 847 { 848 struct request *next = elv_latter_request(q, rq); 849 850 if (next) 851 return attempt_merge(q, rq, next); 852 853 return NULL; 854 } 855 856 static struct request *attempt_front_merge(struct request_queue *q, 857 struct request *rq) 858 { 859 struct request *prev = elv_former_request(q, rq); 860 861 if (prev) 862 return attempt_merge(q, prev, rq); 863 864 return NULL; 865 } 866 867 /* 868 * Try to merge 'next' into 'rq'. Return true if the merge happened, false 869 * otherwise. The caller is responsible for freeing 'next' if the merge 870 * happened. 871 */ 872 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, 873 struct request *next) 874 { 875 return attempt_merge(q, rq, next); 876 } 877 878 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 879 { 880 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 881 return false; 882 883 if (req_op(rq) != bio_op(bio)) 884 return false; 885 886 if (!blk_cgroup_mergeable(rq, bio)) 887 return false; 888 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 889 return false; 890 if (!bio_crypt_rq_ctx_compatible(rq, bio)) 891 return false; 892 if (rq->bio->bi_write_hint != bio->bi_write_hint) 893 return false; 894 if (rq->bio->bi_write_stream != bio->bi_write_stream) 895 return false; 896 if (rq->bio->bi_ioprio != bio->bi_ioprio) 897 return false; 898 if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false) 899 return false; 900 901 return true; 902 } 903 904 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) 905 { 906 if (blk_discard_mergable(rq)) 907 return ELEVATOR_DISCARD_MERGE; 908 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 909 return ELEVATOR_BACK_MERGE; 910 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 911 return ELEVATOR_FRONT_MERGE; 912 return ELEVATOR_NO_MERGE; 913 } 914 915 static void blk_account_io_merge_bio(struct request *req) 916 { 917 if (req->rq_flags & RQF_IO_STAT) { 918 part_stat_lock(); 919 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); 920 part_stat_unlock(); 921 } 922 } 923 924 enum bio_merge_status bio_attempt_back_merge(struct request *req, 925 struct bio *bio, unsigned int nr_segs) 926 { 927 const blk_opf_t ff = bio_failfast(bio); 928 929 if (!ll_back_merge_fn(req, bio, nr_segs)) 930 return BIO_MERGE_FAILED; 931 932 trace_block_bio_backmerge(bio); 933 rq_qos_merge(req->q, req, bio); 934 935 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 936 blk_rq_set_mixed_merge(req); 937 938 blk_update_mixed_merge(req, bio, false); 939 940 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING) 941 blk_zone_write_plug_bio_merged(bio); 942 943 req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio, 944 req->phys_gap_bit); 945 req->biotail->bi_next = bio; 946 req->biotail = bio; 947 req->__data_len += bio->bi_iter.bi_size; 948 949 bio_crypt_free_ctx(bio); 950 951 blk_account_io_merge_bio(req); 952 return BIO_MERGE_OK; 953 } 954 955 static enum bio_merge_status bio_attempt_front_merge(struct request *req, 956 struct bio *bio, unsigned int nr_segs) 957 { 958 const blk_opf_t ff = bio_failfast(bio); 959 960 /* 961 * A front merge for writes to sequential zones of a zoned block device 962 * can happen only if the user submitted writes out of order. Do not 963 * merge such write to let it fail. 964 */ 965 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING) 966 return BIO_MERGE_FAILED; 967 968 if (!ll_front_merge_fn(req, bio, nr_segs)) 969 return BIO_MERGE_FAILED; 970 971 trace_block_bio_frontmerge(bio); 972 rq_qos_merge(req->q, req, bio); 973 974 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 975 blk_rq_set_mixed_merge(req); 976 977 blk_update_mixed_merge(req, bio, true); 978 979 req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio, 980 req->phys_gap_bit); 981 bio->bi_next = req->bio; 982 req->bio = bio; 983 984 req->__sector = bio->bi_iter.bi_sector; 985 req->__data_len += bio->bi_iter.bi_size; 986 987 bio_crypt_do_front_merge(req, bio); 988 989 blk_account_io_merge_bio(req); 990 return BIO_MERGE_OK; 991 } 992 993 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q, 994 struct request *req, struct bio *bio) 995 { 996 unsigned short segments = blk_rq_nr_discard_segments(req); 997 998 if (segments >= queue_max_discard_segments(q)) 999 goto no_merge; 1000 if (blk_rq_sectors(req) + bio_sectors(bio) > 1001 blk_rq_get_max_sectors(req, blk_rq_pos(req))) 1002 goto no_merge; 1003 1004 rq_qos_merge(q, req, bio); 1005 1006 req->biotail->bi_next = bio; 1007 req->biotail = bio; 1008 req->__data_len += bio->bi_iter.bi_size; 1009 req->nr_phys_segments = segments + 1; 1010 1011 blk_account_io_merge_bio(req); 1012 return BIO_MERGE_OK; 1013 no_merge: 1014 req_set_nomerge(q, req); 1015 return BIO_MERGE_FAILED; 1016 } 1017 1018 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, 1019 struct request *rq, 1020 struct bio *bio, 1021 unsigned int nr_segs, 1022 bool sched_allow_merge) 1023 { 1024 if (!blk_rq_merge_ok(rq, bio)) 1025 return BIO_MERGE_NONE; 1026 1027 switch (blk_try_merge(rq, bio)) { 1028 case ELEVATOR_BACK_MERGE: 1029 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio)) 1030 return bio_attempt_back_merge(rq, bio, nr_segs); 1031 break; 1032 case ELEVATOR_FRONT_MERGE: 1033 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio)) 1034 return bio_attempt_front_merge(rq, bio, nr_segs); 1035 break; 1036 case ELEVATOR_DISCARD_MERGE: 1037 return bio_attempt_discard_merge(q, rq, bio); 1038 default: 1039 return BIO_MERGE_NONE; 1040 } 1041 1042 return BIO_MERGE_FAILED; 1043 } 1044 1045 /** 1046 * blk_attempt_plug_merge - try to merge with %current's plugged list 1047 * @q: request_queue new bio is being queued at 1048 * @bio: new bio being queued 1049 * @nr_segs: number of segments in @bio 1050 * from the passed in @q already in the plug list 1051 * 1052 * Determine whether @bio being queued on @q can be merged with the previous 1053 * request on %current's plugged list. Returns %true if merge was successful, 1054 * otherwise %false. 1055 * 1056 * Plugging coalesces IOs from the same issuer for the same purpose without 1057 * going through @q->queue_lock. As such it's more of an issuing mechanism 1058 * than scheduling, and the request, while may have elvpriv data, is not 1059 * added on the elevator at this point. In addition, we don't have 1060 * reliable access to the elevator outside queue lock. Only check basic 1061 * merging parameters without querying the elevator. 1062 * 1063 * Caller must ensure !blk_queue_nomerges(q) beforehand. 1064 */ 1065 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 1066 unsigned int nr_segs) 1067 { 1068 struct blk_plug *plug = current->plug; 1069 struct request *rq; 1070 1071 if (!plug || rq_list_empty(&plug->mq_list)) 1072 return false; 1073 1074 rq = plug->mq_list.tail; 1075 if (rq->q == q) 1076 return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == 1077 BIO_MERGE_OK; 1078 else if (!plug->multiple_queues) 1079 return false; 1080 1081 rq_list_for_each(&plug->mq_list, rq) { 1082 if (rq->q != q) 1083 continue; 1084 if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == 1085 BIO_MERGE_OK) 1086 return true; 1087 break; 1088 } 1089 return false; 1090 } 1091 1092 /* 1093 * Iterate list of requests and see if we can merge this bio with any 1094 * of them. 1095 */ 1096 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, 1097 struct bio *bio, unsigned int nr_segs) 1098 { 1099 struct request *rq; 1100 int checked = 8; 1101 1102 list_for_each_entry_reverse(rq, list, queuelist) { 1103 if (!checked--) 1104 break; 1105 1106 switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) { 1107 case BIO_MERGE_NONE: 1108 continue; 1109 case BIO_MERGE_OK: 1110 return true; 1111 case BIO_MERGE_FAILED: 1112 return false; 1113 } 1114 1115 } 1116 1117 return false; 1118 } 1119 EXPORT_SYMBOL_GPL(blk_bio_list_merge); 1120 1121 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 1122 unsigned int nr_segs, struct request **merged_request) 1123 { 1124 struct request *rq; 1125 1126 switch (elv_merge(q, &rq, bio)) { 1127 case ELEVATOR_BACK_MERGE: 1128 if (!blk_mq_sched_allow_merge(q, rq, bio)) 1129 return false; 1130 if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK) 1131 return false; 1132 *merged_request = attempt_back_merge(q, rq); 1133 if (!*merged_request) 1134 elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); 1135 return true; 1136 case ELEVATOR_FRONT_MERGE: 1137 if (!blk_mq_sched_allow_merge(q, rq, bio)) 1138 return false; 1139 if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK) 1140 return false; 1141 *merged_request = attempt_front_merge(q, rq); 1142 if (!*merged_request) 1143 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); 1144 return true; 1145 case ELEVATOR_DISCARD_MERGE: 1146 return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK; 1147 default: 1148 return false; 1149 } 1150 } 1151 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 1152