1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to segment and merge handling 4 */ 5 #include <linux/kernel.h> 6 #include <linux/module.h> 7 #include <linux/bio.h> 8 #include <linux/blkdev.h> 9 #include <linux/blk-integrity.h> 10 #include <linux/part_stat.h> 11 #include <linux/blk-cgroup.h> 12 13 #include <trace/events/block.h> 14 15 #include "blk.h" 16 #include "blk-mq-sched.h" 17 #include "blk-rq-qos.h" 18 #include "blk-throttle.h" 19 20 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) 21 { 22 *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter); 23 } 24 25 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) 26 { 27 struct bvec_iter iter = bio->bi_iter; 28 int idx; 29 30 bio_get_first_bvec(bio, bv); 31 if (bv->bv_len == bio->bi_iter.bi_size) 32 return; /* this bio only has a single bvec */ 33 34 bio_advance_iter(bio, &iter, iter.bi_size); 35 36 if (!iter.bi_bvec_done) 37 idx = iter.bi_idx - 1; 38 else /* in the middle of bvec */ 39 idx = iter.bi_idx; 40 41 *bv = bio->bi_io_vec[idx]; 42 43 /* 44 * iter.bi_bvec_done records actual length of the last bvec 45 * if this bio ends in the middle of one io vector 46 */ 47 if (iter.bi_bvec_done) 48 bv->bv_len = iter.bi_bvec_done; 49 } 50 51 static inline bool bio_will_gap(struct request_queue *q, 52 struct request *prev_rq, struct bio *prev, struct bio *next) 53 { 54 struct bio_vec pb, nb; 55 56 if (!bio_has_data(prev) || !queue_virt_boundary(q)) 57 return false; 58 59 /* 60 * Don't merge if the 1st bio starts with non-zero offset, otherwise it 61 * is quite difficult to respect the sg gap limit. We work hard to 62 * merge a huge number of small single bios in case of mkfs. 63 */ 64 if (prev_rq) 65 bio_get_first_bvec(prev_rq->bio, &pb); 66 else 67 bio_get_first_bvec(prev, &pb); 68 if (pb.bv_offset & queue_virt_boundary(q)) 69 return true; 70 71 /* 72 * We don't need to worry about the situation that the merged segment 73 * ends in unaligned virt boundary: 74 * 75 * - if 'pb' ends aligned, the merged segment ends aligned 76 * - if 'pb' ends unaligned, the next bio must include 77 * one single bvec of 'nb', otherwise the 'nb' can't 78 * merge with 'pb' 79 */ 80 bio_get_last_bvec(prev, &pb); 81 bio_get_first_bvec(next, &nb); 82 if (biovec_phys_mergeable(q, &pb, &nb)) 83 return false; 84 return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset); 85 } 86 87 static inline bool req_gap_back_merge(struct request *req, struct bio *bio) 88 { 89 return bio_will_gap(req->q, req, req->biotail, bio); 90 } 91 92 static inline bool req_gap_front_merge(struct request *req, struct bio *bio) 93 { 94 return bio_will_gap(req->q, NULL, bio, req->bio); 95 } 96 97 /* 98 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size 99 * is defined as 'unsigned int', meantime it has to be aligned to with the 100 * logical block size, which is the minimum accepted unit by hardware. 101 */ 102 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim) 103 { 104 return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT; 105 } 106 107 static struct bio *bio_submit_split(struct bio *bio, int split_sectors) 108 { 109 if (unlikely(split_sectors < 0)) 110 goto error; 111 112 if (split_sectors) { 113 struct bio *split; 114 115 split = bio_split(bio, split_sectors, GFP_NOIO, 116 &bio->bi_bdev->bd_disk->bio_split); 117 if (IS_ERR(split)) { 118 split_sectors = PTR_ERR(split); 119 goto error; 120 } 121 split->bi_opf |= REQ_NOMERGE; 122 blkcg_bio_issue_init(split); 123 bio_chain(split, bio); 124 trace_block_split(split, bio->bi_iter.bi_sector); 125 WARN_ON_ONCE(bio_zone_write_plugging(bio)); 126 submit_bio_noacct(bio); 127 return split; 128 } 129 130 return bio; 131 error: 132 bio->bi_status = errno_to_blk_status(split_sectors); 133 bio_endio(bio); 134 return NULL; 135 } 136 137 struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim, 138 unsigned *nsegs) 139 { 140 unsigned int max_discard_sectors, granularity; 141 sector_t tmp; 142 unsigned split_sectors; 143 144 *nsegs = 1; 145 146 granularity = max(lim->discard_granularity >> 9, 1U); 147 148 max_discard_sectors = 149 min(lim->max_discard_sectors, bio_allowed_max_sectors(lim)); 150 max_discard_sectors -= max_discard_sectors % granularity; 151 if (unlikely(!max_discard_sectors)) 152 return bio; 153 154 if (bio_sectors(bio) <= max_discard_sectors) 155 return bio; 156 157 split_sectors = max_discard_sectors; 158 159 /* 160 * If the next starting sector would be misaligned, stop the discard at 161 * the previous aligned sector. 162 */ 163 tmp = bio->bi_iter.bi_sector + split_sectors - 164 ((lim->discard_alignment >> 9) % granularity); 165 tmp = sector_div(tmp, granularity); 166 167 if (split_sectors > tmp) 168 split_sectors -= tmp; 169 170 return bio_submit_split(bio, split_sectors); 171 } 172 173 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim, 174 bool is_atomic) 175 { 176 /* 177 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if 178 * both non-zero. 179 */ 180 if (is_atomic && lim->atomic_write_boundary_sectors) 181 return lim->atomic_write_boundary_sectors; 182 183 return lim->chunk_sectors; 184 } 185 186 /* 187 * Return the maximum number of sectors from the start of a bio that may be 188 * submitted as a single request to a block device. If enough sectors remain, 189 * align the end to the physical block size. Otherwise align the end to the 190 * logical block size. This approach minimizes the number of non-aligned 191 * requests that are submitted to a block device if the start of a bio is not 192 * aligned to a physical block boundary. 193 */ 194 static inline unsigned get_max_io_size(struct bio *bio, 195 const struct queue_limits *lim) 196 { 197 unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT; 198 unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT; 199 bool is_atomic = bio->bi_opf & REQ_ATOMIC; 200 unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic); 201 unsigned max_sectors, start, end; 202 203 /* 204 * We ignore lim->max_sectors for atomic writes because it may less 205 * than the actual bio size, which we cannot tolerate. 206 */ 207 if (bio_op(bio) == REQ_OP_WRITE_ZEROES) 208 max_sectors = lim->max_write_zeroes_sectors; 209 else if (is_atomic) 210 max_sectors = lim->atomic_write_max_sectors; 211 else 212 max_sectors = lim->max_sectors; 213 214 if (boundary_sectors) { 215 max_sectors = min(max_sectors, 216 blk_boundary_sectors_left(bio->bi_iter.bi_sector, 217 boundary_sectors)); 218 } 219 220 start = bio->bi_iter.bi_sector & (pbs - 1); 221 end = (start + max_sectors) & ~(pbs - 1); 222 if (end > start) 223 return end - start; 224 return max_sectors & ~(lbs - 1); 225 } 226 227 /** 228 * bvec_split_segs - verify whether or not a bvec should be split in the middle 229 * @lim: [in] queue limits to split based on 230 * @bv: [in] bvec to examine 231 * @nsegs: [in,out] Number of segments in the bio being built. Incremented 232 * by the number of segments from @bv that may be appended to that 233 * bio without exceeding @max_segs 234 * @bytes: [in,out] Number of bytes in the bio being built. Incremented 235 * by the number of bytes from @bv that may be appended to that 236 * bio without exceeding @max_bytes 237 * @max_segs: [in] upper bound for *@nsegs 238 * @max_bytes: [in] upper bound for *@bytes 239 * 240 * When splitting a bio, it can happen that a bvec is encountered that is too 241 * big to fit in a single segment and hence that it has to be split in the 242 * middle. This function verifies whether or not that should happen. The value 243 * %true is returned if and only if appending the entire @bv to a bio with 244 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for 245 * the block driver. 246 */ 247 static bool bvec_split_segs(const struct queue_limits *lim, 248 const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes, 249 unsigned max_segs, unsigned max_bytes) 250 { 251 unsigned max_len = max_bytes - *bytes; 252 unsigned len = min(bv->bv_len, max_len); 253 unsigned total_len = 0; 254 unsigned seg_size = 0; 255 256 while (len && *nsegs < max_segs) { 257 seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len); 258 259 (*nsegs)++; 260 total_len += seg_size; 261 len -= seg_size; 262 263 if ((bv->bv_offset + total_len) & lim->virt_boundary_mask) 264 break; 265 } 266 267 *bytes += total_len; 268 269 /* tell the caller to split the bvec if it is too big to fit */ 270 return len > 0 || bv->bv_len > max_len; 271 } 272 273 static unsigned int bio_split_alignment(struct bio *bio, 274 const struct queue_limits *lim) 275 { 276 if (op_is_write(bio_op(bio)) && lim->zone_write_granularity) 277 return lim->zone_write_granularity; 278 return lim->logical_block_size; 279 } 280 281 /** 282 * bio_split_rw_at - check if and where to split a read/write bio 283 * @bio: [in] bio to be split 284 * @lim: [in] queue limits to split based on 285 * @segs: [out] number of segments in the bio with the first half of the sectors 286 * @max_bytes: [in] maximum number of bytes per bio 287 * 288 * Find out if @bio needs to be split to fit the queue limits in @lim and a 289 * maximum size of @max_bytes. Returns a negative error number if @bio can't be 290 * split, 0 if the bio doesn't have to be split, or a positive sector offset if 291 * @bio needs to be split. 292 */ 293 int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim, 294 unsigned *segs, unsigned max_bytes) 295 { 296 struct bio_vec bv, bvprv, *bvprvp = NULL; 297 struct bvec_iter iter; 298 unsigned nsegs = 0, bytes = 0; 299 300 bio_for_each_bvec(bv, bio, iter) { 301 /* 302 * If the queue doesn't support SG gaps and adding this 303 * offset would create a gap, disallow it. 304 */ 305 if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset)) 306 goto split; 307 308 if (nsegs < lim->max_segments && 309 bytes + bv.bv_len <= max_bytes && 310 bv.bv_offset + bv.bv_len <= lim->min_segment_size) { 311 nsegs++; 312 bytes += bv.bv_len; 313 } else { 314 if (bvec_split_segs(lim, &bv, &nsegs, &bytes, 315 lim->max_segments, max_bytes)) 316 goto split; 317 } 318 319 bvprv = bv; 320 bvprvp = &bvprv; 321 } 322 323 *segs = nsegs; 324 return 0; 325 split: 326 if (bio->bi_opf & REQ_ATOMIC) 327 return -EINVAL; 328 329 /* 330 * We can't sanely support splitting for a REQ_NOWAIT bio. End it 331 * with EAGAIN if splitting is required and return an error pointer. 332 */ 333 if (bio->bi_opf & REQ_NOWAIT) 334 return -EAGAIN; 335 336 *segs = nsegs; 337 338 /* 339 * Individual bvecs might not be logical block aligned. Round down the 340 * split size so that each bio is properly block size aligned, even if 341 * we do not use the full hardware limits. 342 */ 343 bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim)); 344 345 /* 346 * Bio splitting may cause subtle trouble such as hang when doing sync 347 * iopoll in direct IO routine. Given performance gain of iopoll for 348 * big IO can be trival, disable iopoll when split needed. 349 */ 350 bio_clear_polled(bio); 351 return bytes >> SECTOR_SHIFT; 352 } 353 EXPORT_SYMBOL_GPL(bio_split_rw_at); 354 355 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim, 356 unsigned *nr_segs) 357 { 358 return bio_submit_split(bio, 359 bio_split_rw_at(bio, lim, nr_segs, 360 get_max_io_size(bio, lim) << SECTOR_SHIFT)); 361 } 362 363 /* 364 * REQ_OP_ZONE_APPEND bios must never be split by the block layer. 365 * 366 * But we want the nr_segs calculation provided by bio_split_rw_at, and having 367 * a good sanity check that the submitter built the bio correctly is nice to 368 * have as well. 369 */ 370 struct bio *bio_split_zone_append(struct bio *bio, 371 const struct queue_limits *lim, unsigned *nr_segs) 372 { 373 int split_sectors; 374 375 split_sectors = bio_split_rw_at(bio, lim, nr_segs, 376 lim->max_zone_append_sectors << SECTOR_SHIFT); 377 if (WARN_ON_ONCE(split_sectors > 0)) 378 split_sectors = -EINVAL; 379 return bio_submit_split(bio, split_sectors); 380 } 381 382 struct bio *bio_split_write_zeroes(struct bio *bio, 383 const struct queue_limits *lim, unsigned *nsegs) 384 { 385 unsigned int max_sectors = get_max_io_size(bio, lim); 386 387 *nsegs = 0; 388 389 /* 390 * An unset limit should normally not happen, as bio submission is keyed 391 * off having a non-zero limit. But SCSI can clear the limit in the 392 * I/O completion handler, and we can race and see this. Splitting to a 393 * zero limit obviously doesn't make sense, so band-aid it here. 394 */ 395 if (!max_sectors) 396 return bio; 397 if (bio_sectors(bio) <= max_sectors) 398 return bio; 399 return bio_submit_split(bio, max_sectors); 400 } 401 402 /** 403 * bio_split_to_limits - split a bio to fit the queue limits 404 * @bio: bio to be split 405 * 406 * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and 407 * if so split off a bio fitting the limits from the beginning of @bio and 408 * return it. @bio is shortened to the remainder and re-submitted. 409 * 410 * The split bio is allocated from @q->bio_split, which is provided by the 411 * block layer. 412 */ 413 struct bio *bio_split_to_limits(struct bio *bio) 414 { 415 unsigned int nr_segs; 416 417 return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs); 418 } 419 EXPORT_SYMBOL(bio_split_to_limits); 420 421 unsigned int blk_recalc_rq_segments(struct request *rq) 422 { 423 unsigned int nr_phys_segs = 0; 424 unsigned int bytes = 0; 425 struct req_iterator iter; 426 struct bio_vec bv; 427 428 if (!rq->bio) 429 return 0; 430 431 switch (bio_op(rq->bio)) { 432 case REQ_OP_DISCARD: 433 case REQ_OP_SECURE_ERASE: 434 if (queue_max_discard_segments(rq->q) > 1) { 435 struct bio *bio = rq->bio; 436 437 for_each_bio(bio) 438 nr_phys_segs++; 439 return nr_phys_segs; 440 } 441 return 1; 442 case REQ_OP_WRITE_ZEROES: 443 return 0; 444 default: 445 break; 446 } 447 448 rq_for_each_bvec(bv, rq, iter) 449 bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes, 450 UINT_MAX, UINT_MAX); 451 return nr_phys_segs; 452 } 453 454 static inline unsigned int blk_rq_get_max_sectors(struct request *rq, 455 sector_t offset) 456 { 457 struct request_queue *q = rq->q; 458 struct queue_limits *lim = &q->limits; 459 unsigned int max_sectors, boundary_sectors; 460 bool is_atomic = rq->cmd_flags & REQ_ATOMIC; 461 462 if (blk_rq_is_passthrough(rq)) 463 return q->limits.max_hw_sectors; 464 465 boundary_sectors = blk_boundary_sectors(lim, is_atomic); 466 max_sectors = blk_queue_get_max_sectors(rq); 467 468 if (!boundary_sectors || 469 req_op(rq) == REQ_OP_DISCARD || 470 req_op(rq) == REQ_OP_SECURE_ERASE) 471 return max_sectors; 472 return min(max_sectors, 473 blk_boundary_sectors_left(offset, boundary_sectors)); 474 } 475 476 static inline int ll_new_hw_segment(struct request *req, struct bio *bio, 477 unsigned int nr_phys_segs) 478 { 479 if (!blk_cgroup_mergeable(req, bio)) 480 goto no_merge; 481 482 if (blk_integrity_merge_bio(req->q, req, bio) == false) 483 goto no_merge; 484 485 /* discard request merge won't add new segment */ 486 if (req_op(req) == REQ_OP_DISCARD) 487 return 1; 488 489 if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req)) 490 goto no_merge; 491 492 /* 493 * This will form the start of a new hw segment. Bump both 494 * counters. 495 */ 496 req->nr_phys_segments += nr_phys_segs; 497 if (bio_integrity(bio)) 498 req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q, 499 bio); 500 return 1; 501 502 no_merge: 503 req_set_nomerge(req->q, req); 504 return 0; 505 } 506 507 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs) 508 { 509 if (req_gap_back_merge(req, bio)) 510 return 0; 511 if (blk_integrity_rq(req) && 512 integrity_req_gap_back_merge(req, bio)) 513 return 0; 514 if (!bio_crypt_ctx_back_mergeable(req, bio)) 515 return 0; 516 if (blk_rq_sectors(req) + bio_sectors(bio) > 517 blk_rq_get_max_sectors(req, blk_rq_pos(req))) { 518 req_set_nomerge(req->q, req); 519 return 0; 520 } 521 522 return ll_new_hw_segment(req, bio, nr_segs); 523 } 524 525 static int ll_front_merge_fn(struct request *req, struct bio *bio, 526 unsigned int nr_segs) 527 { 528 if (req_gap_front_merge(req, bio)) 529 return 0; 530 if (blk_integrity_rq(req) && 531 integrity_req_gap_front_merge(req, bio)) 532 return 0; 533 if (!bio_crypt_ctx_front_mergeable(req, bio)) 534 return 0; 535 if (blk_rq_sectors(req) + bio_sectors(bio) > 536 blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) { 537 req_set_nomerge(req->q, req); 538 return 0; 539 } 540 541 return ll_new_hw_segment(req, bio, nr_segs); 542 } 543 544 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req, 545 struct request *next) 546 { 547 unsigned short segments = blk_rq_nr_discard_segments(req); 548 549 if (segments >= queue_max_discard_segments(q)) 550 goto no_merge; 551 if (blk_rq_sectors(req) + bio_sectors(next->bio) > 552 blk_rq_get_max_sectors(req, blk_rq_pos(req))) 553 goto no_merge; 554 555 req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next); 556 return true; 557 no_merge: 558 req_set_nomerge(q, req); 559 return false; 560 } 561 562 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 563 struct request *next) 564 { 565 int total_phys_segments; 566 567 if (req_gap_back_merge(req, next->bio)) 568 return 0; 569 570 /* 571 * Will it become too large? 572 */ 573 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 574 blk_rq_get_max_sectors(req, blk_rq_pos(req))) 575 return 0; 576 577 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 578 if (total_phys_segments > blk_rq_get_max_segments(req)) 579 return 0; 580 581 if (!blk_cgroup_mergeable(req, next->bio)) 582 return 0; 583 584 if (blk_integrity_merge_rq(q, req, next) == false) 585 return 0; 586 587 if (!bio_crypt_ctx_merge_rq(req, next)) 588 return 0; 589 590 /* Merge is OK... */ 591 req->nr_phys_segments = total_phys_segments; 592 req->nr_integrity_segments += next->nr_integrity_segments; 593 return 1; 594 } 595 596 /** 597 * blk_rq_set_mixed_merge - mark a request as mixed merge 598 * @rq: request to mark as mixed merge 599 * 600 * Description: 601 * @rq is about to be mixed merged. Make sure the attributes 602 * which can be mixed are set in each bio and mark @rq as mixed 603 * merged. 604 */ 605 static void blk_rq_set_mixed_merge(struct request *rq) 606 { 607 blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK; 608 struct bio *bio; 609 610 if (rq->rq_flags & RQF_MIXED_MERGE) 611 return; 612 613 /* 614 * @rq will no longer represent mixable attributes for all the 615 * contained bios. It will just track those of the first one. 616 * Distributes the attributs to each bio. 617 */ 618 for (bio = rq->bio; bio; bio = bio->bi_next) { 619 WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) && 620 (bio->bi_opf & REQ_FAILFAST_MASK) != ff); 621 bio->bi_opf |= ff; 622 } 623 rq->rq_flags |= RQF_MIXED_MERGE; 624 } 625 626 static inline blk_opf_t bio_failfast(const struct bio *bio) 627 { 628 if (bio->bi_opf & REQ_RAHEAD) 629 return REQ_FAILFAST_MASK; 630 631 return bio->bi_opf & REQ_FAILFAST_MASK; 632 } 633 634 /* 635 * After we are marked as MIXED_MERGE, any new RA bio has to be updated 636 * as failfast, and request's failfast has to be updated in case of 637 * front merge. 638 */ 639 static inline void blk_update_mixed_merge(struct request *req, 640 struct bio *bio, bool front_merge) 641 { 642 if (req->rq_flags & RQF_MIXED_MERGE) { 643 if (bio->bi_opf & REQ_RAHEAD) 644 bio->bi_opf |= REQ_FAILFAST_MASK; 645 646 if (front_merge) { 647 req->cmd_flags &= ~REQ_FAILFAST_MASK; 648 req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK; 649 } 650 } 651 } 652 653 static void blk_account_io_merge_request(struct request *req) 654 { 655 if (req->rq_flags & RQF_IO_STAT) { 656 part_stat_lock(); 657 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); 658 part_stat_local_dec(req->part, 659 in_flight[op_is_write(req_op(req))]); 660 part_stat_unlock(); 661 } 662 } 663 664 static enum elv_merge blk_try_req_merge(struct request *req, 665 struct request *next) 666 { 667 if (blk_discard_mergable(req)) 668 return ELEVATOR_DISCARD_MERGE; 669 else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next)) 670 return ELEVATOR_BACK_MERGE; 671 672 return ELEVATOR_NO_MERGE; 673 } 674 675 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq, 676 struct bio *bio) 677 { 678 return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC); 679 } 680 681 static bool blk_atomic_write_mergeable_rqs(struct request *rq, 682 struct request *next) 683 { 684 return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC); 685 } 686 687 /* 688 * For non-mq, this has to be called with the request spinlock acquired. 689 * For mq with scheduling, the appropriate queue wide lock should be held. 690 */ 691 static struct request *attempt_merge(struct request_queue *q, 692 struct request *req, struct request *next) 693 { 694 if (!rq_mergeable(req) || !rq_mergeable(next)) 695 return NULL; 696 697 if (req_op(req) != req_op(next)) 698 return NULL; 699 700 if (req->bio->bi_write_hint != next->bio->bi_write_hint) 701 return NULL; 702 if (req->bio->bi_write_stream != next->bio->bi_write_stream) 703 return NULL; 704 if (req->bio->bi_ioprio != next->bio->bi_ioprio) 705 return NULL; 706 if (!blk_atomic_write_mergeable_rqs(req, next)) 707 return NULL; 708 709 /* 710 * If we are allowed to merge, then append bio list 711 * from next to rq and release next. merge_requests_fn 712 * will have updated segment counts, update sector 713 * counts here. Handle DISCARDs separately, as they 714 * have separate settings. 715 */ 716 717 switch (blk_try_req_merge(req, next)) { 718 case ELEVATOR_DISCARD_MERGE: 719 if (!req_attempt_discard_merge(q, req, next)) 720 return NULL; 721 break; 722 case ELEVATOR_BACK_MERGE: 723 if (!ll_merge_requests_fn(q, req, next)) 724 return NULL; 725 break; 726 default: 727 return NULL; 728 } 729 730 /* 731 * If failfast settings disagree or any of the two is already 732 * a mixed merge, mark both as mixed before proceeding. This 733 * makes sure that all involved bios have mixable attributes 734 * set properly. 735 */ 736 if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) || 737 (req->cmd_flags & REQ_FAILFAST_MASK) != 738 (next->cmd_flags & REQ_FAILFAST_MASK)) { 739 blk_rq_set_mixed_merge(req); 740 blk_rq_set_mixed_merge(next); 741 } 742 743 /* 744 * At this point we have either done a back merge or front merge. We 745 * need the smaller start_time_ns of the merged requests to be the 746 * current request for accounting purposes. 747 */ 748 if (next->start_time_ns < req->start_time_ns) 749 req->start_time_ns = next->start_time_ns; 750 751 req->biotail->bi_next = next->bio; 752 req->biotail = next->biotail; 753 754 req->__data_len += blk_rq_bytes(next); 755 756 if (!blk_discard_mergable(req)) 757 elv_merge_requests(q, req, next); 758 759 blk_crypto_rq_put_keyslot(next); 760 761 /* 762 * 'next' is going away, so update stats accordingly 763 */ 764 blk_account_io_merge_request(next); 765 766 trace_block_rq_merge(next); 767 768 /* 769 * ownership of bio passed from next to req, return 'next' for 770 * the caller to free 771 */ 772 next->bio = NULL; 773 return next; 774 } 775 776 static struct request *attempt_back_merge(struct request_queue *q, 777 struct request *rq) 778 { 779 struct request *next = elv_latter_request(q, rq); 780 781 if (next) 782 return attempt_merge(q, rq, next); 783 784 return NULL; 785 } 786 787 static struct request *attempt_front_merge(struct request_queue *q, 788 struct request *rq) 789 { 790 struct request *prev = elv_former_request(q, rq); 791 792 if (prev) 793 return attempt_merge(q, prev, rq); 794 795 return NULL; 796 } 797 798 /* 799 * Try to merge 'next' into 'rq'. Return true if the merge happened, false 800 * otherwise. The caller is responsible for freeing 'next' if the merge 801 * happened. 802 */ 803 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq, 804 struct request *next) 805 { 806 return attempt_merge(q, rq, next); 807 } 808 809 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 810 { 811 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 812 return false; 813 814 if (req_op(rq) != bio_op(bio)) 815 return false; 816 817 if (!blk_cgroup_mergeable(rq, bio)) 818 return false; 819 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 820 return false; 821 if (!bio_crypt_rq_ctx_compatible(rq, bio)) 822 return false; 823 if (rq->bio->bi_write_hint != bio->bi_write_hint) 824 return false; 825 if (rq->bio->bi_write_stream != bio->bi_write_stream) 826 return false; 827 if (rq->bio->bi_ioprio != bio->bi_ioprio) 828 return false; 829 if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false) 830 return false; 831 832 return true; 833 } 834 835 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio) 836 { 837 if (blk_discard_mergable(rq)) 838 return ELEVATOR_DISCARD_MERGE; 839 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 840 return ELEVATOR_BACK_MERGE; 841 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 842 return ELEVATOR_FRONT_MERGE; 843 return ELEVATOR_NO_MERGE; 844 } 845 846 static void blk_account_io_merge_bio(struct request *req) 847 { 848 if (req->rq_flags & RQF_IO_STAT) { 849 part_stat_lock(); 850 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]); 851 part_stat_unlock(); 852 } 853 } 854 855 enum bio_merge_status bio_attempt_back_merge(struct request *req, 856 struct bio *bio, unsigned int nr_segs) 857 { 858 const blk_opf_t ff = bio_failfast(bio); 859 860 if (!ll_back_merge_fn(req, bio, nr_segs)) 861 return BIO_MERGE_FAILED; 862 863 trace_block_bio_backmerge(bio); 864 rq_qos_merge(req->q, req, bio); 865 866 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 867 blk_rq_set_mixed_merge(req); 868 869 blk_update_mixed_merge(req, bio, false); 870 871 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING) 872 blk_zone_write_plug_bio_merged(bio); 873 874 req->biotail->bi_next = bio; 875 req->biotail = bio; 876 req->__data_len += bio->bi_iter.bi_size; 877 878 bio_crypt_free_ctx(bio); 879 880 blk_account_io_merge_bio(req); 881 return BIO_MERGE_OK; 882 } 883 884 static enum bio_merge_status bio_attempt_front_merge(struct request *req, 885 struct bio *bio, unsigned int nr_segs) 886 { 887 const blk_opf_t ff = bio_failfast(bio); 888 889 /* 890 * A front merge for writes to sequential zones of a zoned block device 891 * can happen only if the user submitted writes out of order. Do not 892 * merge such write to let it fail. 893 */ 894 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING) 895 return BIO_MERGE_FAILED; 896 897 if (!ll_front_merge_fn(req, bio, nr_segs)) 898 return BIO_MERGE_FAILED; 899 900 trace_block_bio_frontmerge(bio); 901 rq_qos_merge(req->q, req, bio); 902 903 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff) 904 blk_rq_set_mixed_merge(req); 905 906 blk_update_mixed_merge(req, bio, true); 907 908 bio->bi_next = req->bio; 909 req->bio = bio; 910 911 req->__sector = bio->bi_iter.bi_sector; 912 req->__data_len += bio->bi_iter.bi_size; 913 914 bio_crypt_do_front_merge(req, bio); 915 916 blk_account_io_merge_bio(req); 917 return BIO_MERGE_OK; 918 } 919 920 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q, 921 struct request *req, struct bio *bio) 922 { 923 unsigned short segments = blk_rq_nr_discard_segments(req); 924 925 if (segments >= queue_max_discard_segments(q)) 926 goto no_merge; 927 if (blk_rq_sectors(req) + bio_sectors(bio) > 928 blk_rq_get_max_sectors(req, blk_rq_pos(req))) 929 goto no_merge; 930 931 rq_qos_merge(q, req, bio); 932 933 req->biotail->bi_next = bio; 934 req->biotail = bio; 935 req->__data_len += bio->bi_iter.bi_size; 936 req->nr_phys_segments = segments + 1; 937 938 blk_account_io_merge_bio(req); 939 return BIO_MERGE_OK; 940 no_merge: 941 req_set_nomerge(q, req); 942 return BIO_MERGE_FAILED; 943 } 944 945 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q, 946 struct request *rq, 947 struct bio *bio, 948 unsigned int nr_segs, 949 bool sched_allow_merge) 950 { 951 if (!blk_rq_merge_ok(rq, bio)) 952 return BIO_MERGE_NONE; 953 954 switch (blk_try_merge(rq, bio)) { 955 case ELEVATOR_BACK_MERGE: 956 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio)) 957 return bio_attempt_back_merge(rq, bio, nr_segs); 958 break; 959 case ELEVATOR_FRONT_MERGE: 960 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio)) 961 return bio_attempt_front_merge(rq, bio, nr_segs); 962 break; 963 case ELEVATOR_DISCARD_MERGE: 964 return bio_attempt_discard_merge(q, rq, bio); 965 default: 966 return BIO_MERGE_NONE; 967 } 968 969 return BIO_MERGE_FAILED; 970 } 971 972 /** 973 * blk_attempt_plug_merge - try to merge with %current's plugged list 974 * @q: request_queue new bio is being queued at 975 * @bio: new bio being queued 976 * @nr_segs: number of segments in @bio 977 * from the passed in @q already in the plug list 978 * 979 * Determine whether @bio being queued on @q can be merged with the previous 980 * request on %current's plugged list. Returns %true if merge was successful, 981 * otherwise %false. 982 * 983 * Plugging coalesces IOs from the same issuer for the same purpose without 984 * going through @q->queue_lock. As such it's more of an issuing mechanism 985 * than scheduling, and the request, while may have elvpriv data, is not 986 * added on the elevator at this point. In addition, we don't have 987 * reliable access to the elevator outside queue lock. Only check basic 988 * merging parameters without querying the elevator. 989 * 990 * Caller must ensure !blk_queue_nomerges(q) beforehand. 991 */ 992 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, 993 unsigned int nr_segs) 994 { 995 struct blk_plug *plug = current->plug; 996 struct request *rq; 997 998 if (!plug || rq_list_empty(&plug->mq_list)) 999 return false; 1000 1001 rq = plug->mq_list.tail; 1002 if (rq->q == q) 1003 return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == 1004 BIO_MERGE_OK; 1005 else if (!plug->multiple_queues) 1006 return false; 1007 1008 rq_list_for_each(&plug->mq_list, rq) { 1009 if (rq->q != q) 1010 continue; 1011 if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == 1012 BIO_MERGE_OK) 1013 return true; 1014 break; 1015 } 1016 return false; 1017 } 1018 1019 /* 1020 * Iterate list of requests and see if we can merge this bio with any 1021 * of them. 1022 */ 1023 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list, 1024 struct bio *bio, unsigned int nr_segs) 1025 { 1026 struct request *rq; 1027 int checked = 8; 1028 1029 list_for_each_entry_reverse(rq, list, queuelist) { 1030 if (!checked--) 1031 break; 1032 1033 switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) { 1034 case BIO_MERGE_NONE: 1035 continue; 1036 case BIO_MERGE_OK: 1037 return true; 1038 case BIO_MERGE_FAILED: 1039 return false; 1040 } 1041 1042 } 1043 1044 return false; 1045 } 1046 EXPORT_SYMBOL_GPL(blk_bio_list_merge); 1047 1048 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio, 1049 unsigned int nr_segs, struct request **merged_request) 1050 { 1051 struct request *rq; 1052 1053 switch (elv_merge(q, &rq, bio)) { 1054 case ELEVATOR_BACK_MERGE: 1055 if (!blk_mq_sched_allow_merge(q, rq, bio)) 1056 return false; 1057 if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK) 1058 return false; 1059 *merged_request = attempt_back_merge(q, rq); 1060 if (!*merged_request) 1061 elv_merged_request(q, rq, ELEVATOR_BACK_MERGE); 1062 return true; 1063 case ELEVATOR_FRONT_MERGE: 1064 if (!blk_mq_sched_allow_merge(q, rq, bio)) 1065 return false; 1066 if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK) 1067 return false; 1068 *merged_request = attempt_front_merge(q, rq); 1069 if (!*merged_request) 1070 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE); 1071 return true; 1072 case ELEVATOR_DISCARD_MERGE: 1073 return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK; 1074 default: 1075 return false; 1076 } 1077 } 1078 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge); 1079