1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 13 struct bio *bio, 14 bool no_sg_merge) 15 { 16 struct bio_vec bv, bvprv = { NULL }; 17 int cluster, high, highprv = 1; 18 unsigned int seg_size, nr_phys_segs; 19 struct bio *fbio, *bbio; 20 struct bvec_iter iter; 21 22 if (!bio) 23 return 0; 24 25 /* 26 * This should probably be returning 0, but blk_add_request_payload() 27 * (Christoph!!!!) 28 */ 29 if (bio->bi_rw & REQ_DISCARD) 30 return 1; 31 32 if (bio->bi_rw & REQ_WRITE_SAME) 33 return 1; 34 35 fbio = bio; 36 cluster = blk_queue_cluster(q); 37 seg_size = 0; 38 nr_phys_segs = 0; 39 high = 0; 40 for_each_bio(bio) { 41 bio_for_each_segment(bv, bio, iter) { 42 /* 43 * If SG merging is disabled, each bio vector is 44 * a segment 45 */ 46 if (no_sg_merge) 47 goto new_segment; 48 49 /* 50 * the trick here is making sure that a high page is 51 * never considered part of another segment, since 52 * that might change with the bounce page. 53 */ 54 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); 55 if (!high && !highprv && cluster) { 56 if (seg_size + bv.bv_len 57 > queue_max_segment_size(q)) 58 goto new_segment; 59 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 60 goto new_segment; 61 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 62 goto new_segment; 63 64 seg_size += bv.bv_len; 65 bvprv = bv; 66 continue; 67 } 68 new_segment: 69 if (nr_phys_segs == 1 && seg_size > 70 fbio->bi_seg_front_size) 71 fbio->bi_seg_front_size = seg_size; 72 73 nr_phys_segs++; 74 bvprv = bv; 75 seg_size = bv.bv_len; 76 highprv = high; 77 } 78 bbio = bio; 79 } 80 81 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 82 fbio->bi_seg_front_size = seg_size; 83 if (seg_size > bbio->bi_seg_back_size) 84 bbio->bi_seg_back_size = seg_size; 85 86 return nr_phys_segs; 87 } 88 89 void blk_recalc_rq_segments(struct request *rq) 90 { 91 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 92 &rq->q->queue_flags); 93 94 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, 95 no_sg_merge); 96 } 97 98 void blk_recount_segments(struct request_queue *q, struct bio *bio) 99 { 100 if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && 101 bio->bi_vcnt < queue_max_segments(q)) 102 bio->bi_phys_segments = bio->bi_vcnt; 103 else { 104 struct bio *nxt = bio->bi_next; 105 106 bio->bi_next = NULL; 107 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); 108 bio->bi_next = nxt; 109 } 110 111 bio->bi_flags |= (1 << BIO_SEG_VALID); 112 } 113 EXPORT_SYMBOL(blk_recount_segments); 114 115 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 116 struct bio *nxt) 117 { 118 struct bio_vec end_bv = { NULL }, nxt_bv; 119 struct bvec_iter iter; 120 121 if (!blk_queue_cluster(q)) 122 return 0; 123 124 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 125 queue_max_segment_size(q)) 126 return 0; 127 128 if (!bio_has_data(bio)) 129 return 1; 130 131 bio_for_each_segment(end_bv, bio, iter) 132 if (end_bv.bv_len == iter.bi_size) 133 break; 134 135 nxt_bv = bio_iovec(nxt); 136 137 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 138 return 0; 139 140 /* 141 * bio and nxt are contiguous in memory; check if the queue allows 142 * these two to be merged into one 143 */ 144 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 145 return 1; 146 147 return 0; 148 } 149 150 static inline void 151 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 152 struct scatterlist *sglist, struct bio_vec *bvprv, 153 struct scatterlist **sg, int *nsegs, int *cluster) 154 { 155 156 int nbytes = bvec->bv_len; 157 158 if (*sg && *cluster) { 159 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 160 goto new_segment; 161 162 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 163 goto new_segment; 164 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 165 goto new_segment; 166 167 (*sg)->length += nbytes; 168 } else { 169 new_segment: 170 if (!*sg) 171 *sg = sglist; 172 else { 173 /* 174 * If the driver previously mapped a shorter 175 * list, we could see a termination bit 176 * prematurely unless it fully inits the sg 177 * table on each mapping. We KNOW that there 178 * must be more entries here or the driver 179 * would be buggy, so force clear the 180 * termination bit to avoid doing a full 181 * sg_init_table() in drivers for each command. 182 */ 183 sg_unmark_end(*sg); 184 *sg = sg_next(*sg); 185 } 186 187 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 188 (*nsegs)++; 189 } 190 *bvprv = *bvec; 191 } 192 193 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 194 struct scatterlist *sglist, 195 struct scatterlist **sg) 196 { 197 struct bio_vec bvec, bvprv = { NULL }; 198 struct bvec_iter iter; 199 int nsegs, cluster; 200 201 nsegs = 0; 202 cluster = blk_queue_cluster(q); 203 204 if (bio->bi_rw & REQ_DISCARD) { 205 /* 206 * This is a hack - drivers should be neither modifying the 207 * biovec, nor relying on bi_vcnt - but because of 208 * blk_add_request_payload(), a discard bio may or may not have 209 * a payload we need to set up here (thank you Christoph) and 210 * bi_vcnt is really the only way of telling if we need to. 211 */ 212 213 if (bio->bi_vcnt) 214 goto single_segment; 215 216 return 0; 217 } 218 219 if (bio->bi_rw & REQ_WRITE_SAME) { 220 single_segment: 221 *sg = sglist; 222 bvec = bio_iovec(bio); 223 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 224 return 1; 225 } 226 227 for_each_bio(bio) 228 bio_for_each_segment(bvec, bio, iter) 229 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 230 &nsegs, &cluster); 231 232 return nsegs; 233 } 234 235 /* 236 * map a request to scatterlist, return number of sg entries setup. Caller 237 * must make sure sg can hold rq->nr_phys_segments entries 238 */ 239 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 240 struct scatterlist *sglist) 241 { 242 struct scatterlist *sg = NULL; 243 int nsegs = 0; 244 245 if (rq->bio) 246 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 247 248 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 249 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 250 unsigned int pad_len = 251 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 252 253 sg->length += pad_len; 254 rq->extra_len += pad_len; 255 } 256 257 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 258 if (rq->cmd_flags & REQ_WRITE) 259 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 260 261 sg->page_link &= ~0x02; 262 sg = sg_next(sg); 263 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 264 q->dma_drain_size, 265 ((unsigned long)q->dma_drain_buffer) & 266 (PAGE_SIZE - 1)); 267 nsegs++; 268 rq->extra_len += q->dma_drain_size; 269 } 270 271 if (sg) 272 sg_mark_end(sg); 273 274 return nsegs; 275 } 276 EXPORT_SYMBOL(blk_rq_map_sg); 277 278 /** 279 * blk_bio_map_sg - map a bio to a scatterlist 280 * @q: request_queue in question 281 * @bio: bio being mapped 282 * @sglist: scatterlist being mapped 283 * 284 * Note: 285 * Caller must make sure sg can hold bio->bi_phys_segments entries 286 * 287 * Will return the number of sg entries setup 288 */ 289 int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 290 struct scatterlist *sglist) 291 { 292 struct scatterlist *sg = NULL; 293 int nsegs; 294 struct bio *next = bio->bi_next; 295 bio->bi_next = NULL; 296 297 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); 298 bio->bi_next = next; 299 if (sg) 300 sg_mark_end(sg); 301 302 BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); 303 return nsegs; 304 } 305 EXPORT_SYMBOL(blk_bio_map_sg); 306 307 static inline int ll_new_hw_segment(struct request_queue *q, 308 struct request *req, 309 struct bio *bio) 310 { 311 int nr_phys_segs = bio_phys_segments(q, bio); 312 313 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 314 goto no_merge; 315 316 if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio)) 317 goto no_merge; 318 319 /* 320 * This will form the start of a new hw segment. Bump both 321 * counters. 322 */ 323 req->nr_phys_segments += nr_phys_segs; 324 return 1; 325 326 no_merge: 327 req->cmd_flags |= REQ_NOMERGE; 328 if (req == q->last_merge) 329 q->last_merge = NULL; 330 return 0; 331 } 332 333 int ll_back_merge_fn(struct request_queue *q, struct request *req, 334 struct bio *bio) 335 { 336 if (blk_rq_sectors(req) + bio_sectors(bio) > 337 blk_rq_get_max_sectors(req)) { 338 req->cmd_flags |= REQ_NOMERGE; 339 if (req == q->last_merge) 340 q->last_merge = NULL; 341 return 0; 342 } 343 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 344 blk_recount_segments(q, req->biotail); 345 if (!bio_flagged(bio, BIO_SEG_VALID)) 346 blk_recount_segments(q, bio); 347 348 return ll_new_hw_segment(q, req, bio); 349 } 350 351 int ll_front_merge_fn(struct request_queue *q, struct request *req, 352 struct bio *bio) 353 { 354 if (blk_rq_sectors(req) + bio_sectors(bio) > 355 blk_rq_get_max_sectors(req)) { 356 req->cmd_flags |= REQ_NOMERGE; 357 if (req == q->last_merge) 358 q->last_merge = NULL; 359 return 0; 360 } 361 if (!bio_flagged(bio, BIO_SEG_VALID)) 362 blk_recount_segments(q, bio); 363 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 364 blk_recount_segments(q, req->bio); 365 366 return ll_new_hw_segment(q, req, bio); 367 } 368 369 /* 370 * blk-mq uses req->special to carry normal driver per-request payload, it 371 * does not indicate a prepared command that we cannot merge with. 372 */ 373 static bool req_no_special_merge(struct request *req) 374 { 375 struct request_queue *q = req->q; 376 377 return !q->mq_ops && req->special; 378 } 379 380 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 381 struct request *next) 382 { 383 int total_phys_segments; 384 unsigned int seg_size = 385 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 386 387 /* 388 * First check if the either of the requests are re-queued 389 * requests. Can't merge them if they are. 390 */ 391 if (req_no_special_merge(req) || req_no_special_merge(next)) 392 return 0; 393 394 /* 395 * Will it become too large? 396 */ 397 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 398 blk_rq_get_max_sectors(req)) 399 return 0; 400 401 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 402 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 403 if (req->nr_phys_segments == 1) 404 req->bio->bi_seg_front_size = seg_size; 405 if (next->nr_phys_segments == 1) 406 next->biotail->bi_seg_back_size = seg_size; 407 total_phys_segments--; 408 } 409 410 if (total_phys_segments > queue_max_segments(q)) 411 return 0; 412 413 if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next)) 414 return 0; 415 416 /* Merge is OK... */ 417 req->nr_phys_segments = total_phys_segments; 418 return 1; 419 } 420 421 /** 422 * blk_rq_set_mixed_merge - mark a request as mixed merge 423 * @rq: request to mark as mixed merge 424 * 425 * Description: 426 * @rq is about to be mixed merged. Make sure the attributes 427 * which can be mixed are set in each bio and mark @rq as mixed 428 * merged. 429 */ 430 void blk_rq_set_mixed_merge(struct request *rq) 431 { 432 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 433 struct bio *bio; 434 435 if (rq->cmd_flags & REQ_MIXED_MERGE) 436 return; 437 438 /* 439 * @rq will no longer represent mixable attributes for all the 440 * contained bios. It will just track those of the first one. 441 * Distributes the attributs to each bio. 442 */ 443 for (bio = rq->bio; bio; bio = bio->bi_next) { 444 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 445 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 446 bio->bi_rw |= ff; 447 } 448 rq->cmd_flags |= REQ_MIXED_MERGE; 449 } 450 451 static void blk_account_io_merge(struct request *req) 452 { 453 if (blk_do_io_stat(req)) { 454 struct hd_struct *part; 455 int cpu; 456 457 cpu = part_stat_lock(); 458 part = req->part; 459 460 part_round_stats(cpu, part); 461 part_dec_in_flight(part, rq_data_dir(req)); 462 463 hd_struct_put(part); 464 part_stat_unlock(); 465 } 466 } 467 468 /* 469 * Has to be called with the request spinlock acquired 470 */ 471 static int attempt_merge(struct request_queue *q, struct request *req, 472 struct request *next) 473 { 474 if (!rq_mergeable(req) || !rq_mergeable(next)) 475 return 0; 476 477 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 478 return 0; 479 480 /* 481 * not contiguous 482 */ 483 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 484 return 0; 485 486 if (rq_data_dir(req) != rq_data_dir(next) 487 || req->rq_disk != next->rq_disk 488 || req_no_special_merge(next)) 489 return 0; 490 491 if (req->cmd_flags & REQ_WRITE_SAME && 492 !blk_write_same_mergeable(req->bio, next->bio)) 493 return 0; 494 495 /* 496 * If we are allowed to merge, then append bio list 497 * from next to rq and release next. merge_requests_fn 498 * will have updated segment counts, update sector 499 * counts here. 500 */ 501 if (!ll_merge_requests_fn(q, req, next)) 502 return 0; 503 504 /* 505 * If failfast settings disagree or any of the two is already 506 * a mixed merge, mark both as mixed before proceeding. This 507 * makes sure that all involved bios have mixable attributes 508 * set properly. 509 */ 510 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 511 (req->cmd_flags & REQ_FAILFAST_MASK) != 512 (next->cmd_flags & REQ_FAILFAST_MASK)) { 513 blk_rq_set_mixed_merge(req); 514 blk_rq_set_mixed_merge(next); 515 } 516 517 /* 518 * At this point we have either done a back merge 519 * or front merge. We need the smaller start_time of 520 * the merged requests to be the current request 521 * for accounting purposes. 522 */ 523 if (time_after(req->start_time, next->start_time)) 524 req->start_time = next->start_time; 525 526 req->biotail->bi_next = next->bio; 527 req->biotail = next->biotail; 528 529 req->__data_len += blk_rq_bytes(next); 530 531 elv_merge_requests(q, req, next); 532 533 /* 534 * 'next' is going away, so update stats accordingly 535 */ 536 blk_account_io_merge(next); 537 538 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 539 if (blk_rq_cpu_valid(next)) 540 req->cpu = next->cpu; 541 542 /* owner-ship of bio passed from next to req */ 543 next->bio = NULL; 544 __blk_put_request(q, next); 545 return 1; 546 } 547 548 int attempt_back_merge(struct request_queue *q, struct request *rq) 549 { 550 struct request *next = elv_latter_request(q, rq); 551 552 if (next) 553 return attempt_merge(q, rq, next); 554 555 return 0; 556 } 557 558 int attempt_front_merge(struct request_queue *q, struct request *rq) 559 { 560 struct request *prev = elv_former_request(q, rq); 561 562 if (prev) 563 return attempt_merge(q, prev, rq); 564 565 return 0; 566 } 567 568 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 569 struct request *next) 570 { 571 return attempt_merge(q, rq, next); 572 } 573 574 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 575 { 576 struct request_queue *q = rq->q; 577 578 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 579 return false; 580 581 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 582 return false; 583 584 /* different data direction or already started, don't merge */ 585 if (bio_data_dir(bio) != rq_data_dir(rq)) 586 return false; 587 588 /* must be same device and not a special request */ 589 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 590 return false; 591 592 /* only merge integrity protected bio into ditto rq */ 593 if (bio_integrity(bio) != blk_integrity_rq(rq)) 594 return false; 595 596 /* must be using the same buffer */ 597 if (rq->cmd_flags & REQ_WRITE_SAME && 598 !blk_write_same_mergeable(rq->bio, bio)) 599 return false; 600 601 if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { 602 struct bio_vec *bprev; 603 604 bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; 605 if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) 606 return false; 607 } 608 609 return true; 610 } 611 612 int blk_try_merge(struct request *rq, struct bio *bio) 613 { 614 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 615 return ELEVATOR_BACK_MERGE; 616 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 617 return ELEVATOR_FRONT_MERGE; 618 return ELEVATOR_NO_MERGE; 619 } 620