1 /* 2 * Functions related to segment and merge handling 3 */ 4 #include <linux/kernel.h> 5 #include <linux/module.h> 6 #include <linux/bio.h> 7 #include <linux/blkdev.h> 8 #include <linux/scatterlist.h> 9 10 #include "blk.h" 11 12 static unsigned int __blk_recalc_rq_segments(struct request_queue *q, 13 struct bio *bio, 14 bool no_sg_merge) 15 { 16 struct bio_vec bv, bvprv = { NULL }; 17 int cluster, high, highprv = 1; 18 unsigned int seg_size, nr_phys_segs; 19 struct bio *fbio, *bbio; 20 struct bvec_iter iter; 21 22 if (!bio) 23 return 0; 24 25 /* 26 * This should probably be returning 0, but blk_add_request_payload() 27 * (Christoph!!!!) 28 */ 29 if (bio->bi_rw & REQ_DISCARD) 30 return 1; 31 32 if (bio->bi_rw & REQ_WRITE_SAME) 33 return 1; 34 35 fbio = bio; 36 cluster = blk_queue_cluster(q); 37 seg_size = 0; 38 nr_phys_segs = 0; 39 high = 0; 40 for_each_bio(bio) { 41 bio_for_each_segment(bv, bio, iter) { 42 /* 43 * If SG merging is disabled, each bio vector is 44 * a segment 45 */ 46 if (no_sg_merge) 47 goto new_segment; 48 49 /* 50 * the trick here is making sure that a high page is 51 * never considered part of another segment, since 52 * that might change with the bounce page. 53 */ 54 high = page_to_pfn(bv.bv_page) > queue_bounce_pfn(q); 55 if (!high && !highprv && cluster) { 56 if (seg_size + bv.bv_len 57 > queue_max_segment_size(q)) 58 goto new_segment; 59 if (!BIOVEC_PHYS_MERGEABLE(&bvprv, &bv)) 60 goto new_segment; 61 if (!BIOVEC_SEG_BOUNDARY(q, &bvprv, &bv)) 62 goto new_segment; 63 64 seg_size += bv.bv_len; 65 bvprv = bv; 66 continue; 67 } 68 new_segment: 69 if (nr_phys_segs == 1 && seg_size > 70 fbio->bi_seg_front_size) 71 fbio->bi_seg_front_size = seg_size; 72 73 nr_phys_segs++; 74 bvprv = bv; 75 seg_size = bv.bv_len; 76 highprv = high; 77 } 78 bbio = bio; 79 } 80 81 if (nr_phys_segs == 1 && seg_size > fbio->bi_seg_front_size) 82 fbio->bi_seg_front_size = seg_size; 83 if (seg_size > bbio->bi_seg_back_size) 84 bbio->bi_seg_back_size = seg_size; 85 86 return nr_phys_segs; 87 } 88 89 void blk_recalc_rq_segments(struct request *rq) 90 { 91 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 92 &rq->q->queue_flags); 93 94 rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, 95 no_sg_merge); 96 } 97 98 void blk_recount_segments(struct request_queue *q, struct bio *bio) 99 { 100 bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, 101 &q->queue_flags); 102 bool merge_not_need = bio->bi_vcnt < queue_max_segments(q); 103 104 if (no_sg_merge && !bio_flagged(bio, BIO_CLONED) && 105 merge_not_need) 106 bio->bi_phys_segments = bio->bi_vcnt; 107 else { 108 struct bio *nxt = bio->bi_next; 109 110 bio->bi_next = NULL; 111 bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, 112 no_sg_merge && merge_not_need); 113 bio->bi_next = nxt; 114 } 115 116 bio->bi_flags |= (1 << BIO_SEG_VALID); 117 } 118 EXPORT_SYMBOL(blk_recount_segments); 119 120 static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio, 121 struct bio *nxt) 122 { 123 struct bio_vec end_bv = { NULL }, nxt_bv; 124 struct bvec_iter iter; 125 126 if (!blk_queue_cluster(q)) 127 return 0; 128 129 if (bio->bi_seg_back_size + nxt->bi_seg_front_size > 130 queue_max_segment_size(q)) 131 return 0; 132 133 if (!bio_has_data(bio)) 134 return 1; 135 136 bio_for_each_segment(end_bv, bio, iter) 137 if (end_bv.bv_len == iter.bi_size) 138 break; 139 140 nxt_bv = bio_iovec(nxt); 141 142 if (!BIOVEC_PHYS_MERGEABLE(&end_bv, &nxt_bv)) 143 return 0; 144 145 /* 146 * bio and nxt are contiguous in memory; check if the queue allows 147 * these two to be merged into one 148 */ 149 if (BIOVEC_SEG_BOUNDARY(q, &end_bv, &nxt_bv)) 150 return 1; 151 152 return 0; 153 } 154 155 static inline void 156 __blk_segment_map_sg(struct request_queue *q, struct bio_vec *bvec, 157 struct scatterlist *sglist, struct bio_vec *bvprv, 158 struct scatterlist **sg, int *nsegs, int *cluster) 159 { 160 161 int nbytes = bvec->bv_len; 162 163 if (*sg && *cluster) { 164 if ((*sg)->length + nbytes > queue_max_segment_size(q)) 165 goto new_segment; 166 167 if (!BIOVEC_PHYS_MERGEABLE(bvprv, bvec)) 168 goto new_segment; 169 if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bvec)) 170 goto new_segment; 171 172 (*sg)->length += nbytes; 173 } else { 174 new_segment: 175 if (!*sg) 176 *sg = sglist; 177 else { 178 /* 179 * If the driver previously mapped a shorter 180 * list, we could see a termination bit 181 * prematurely unless it fully inits the sg 182 * table on each mapping. We KNOW that there 183 * must be more entries here or the driver 184 * would be buggy, so force clear the 185 * termination bit to avoid doing a full 186 * sg_init_table() in drivers for each command. 187 */ 188 sg_unmark_end(*sg); 189 *sg = sg_next(*sg); 190 } 191 192 sg_set_page(*sg, bvec->bv_page, nbytes, bvec->bv_offset); 193 (*nsegs)++; 194 } 195 *bvprv = *bvec; 196 } 197 198 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio, 199 struct scatterlist *sglist, 200 struct scatterlist **sg) 201 { 202 struct bio_vec bvec, bvprv = { NULL }; 203 struct bvec_iter iter; 204 int nsegs, cluster; 205 206 nsegs = 0; 207 cluster = blk_queue_cluster(q); 208 209 if (bio->bi_rw & REQ_DISCARD) { 210 /* 211 * This is a hack - drivers should be neither modifying the 212 * biovec, nor relying on bi_vcnt - but because of 213 * blk_add_request_payload(), a discard bio may or may not have 214 * a payload we need to set up here (thank you Christoph) and 215 * bi_vcnt is really the only way of telling if we need to. 216 */ 217 218 if (bio->bi_vcnt) 219 goto single_segment; 220 221 return 0; 222 } 223 224 if (bio->bi_rw & REQ_WRITE_SAME) { 225 single_segment: 226 *sg = sglist; 227 bvec = bio_iovec(bio); 228 sg_set_page(*sg, bvec.bv_page, bvec.bv_len, bvec.bv_offset); 229 return 1; 230 } 231 232 for_each_bio(bio) 233 bio_for_each_segment(bvec, bio, iter) 234 __blk_segment_map_sg(q, &bvec, sglist, &bvprv, sg, 235 &nsegs, &cluster); 236 237 return nsegs; 238 } 239 240 /* 241 * map a request to scatterlist, return number of sg entries setup. Caller 242 * must make sure sg can hold rq->nr_phys_segments entries 243 */ 244 int blk_rq_map_sg(struct request_queue *q, struct request *rq, 245 struct scatterlist *sglist) 246 { 247 struct scatterlist *sg = NULL; 248 int nsegs = 0; 249 250 if (rq->bio) 251 nsegs = __blk_bios_map_sg(q, rq->bio, sglist, &sg); 252 253 if (unlikely(rq->cmd_flags & REQ_COPY_USER) && 254 (blk_rq_bytes(rq) & q->dma_pad_mask)) { 255 unsigned int pad_len = 256 (q->dma_pad_mask & ~blk_rq_bytes(rq)) + 1; 257 258 sg->length += pad_len; 259 rq->extra_len += pad_len; 260 } 261 262 if (q->dma_drain_size && q->dma_drain_needed(rq)) { 263 if (rq->cmd_flags & REQ_WRITE) 264 memset(q->dma_drain_buffer, 0, q->dma_drain_size); 265 266 sg->page_link &= ~0x02; 267 sg = sg_next(sg); 268 sg_set_page(sg, virt_to_page(q->dma_drain_buffer), 269 q->dma_drain_size, 270 ((unsigned long)q->dma_drain_buffer) & 271 (PAGE_SIZE - 1)); 272 nsegs++; 273 rq->extra_len += q->dma_drain_size; 274 } 275 276 if (sg) 277 sg_mark_end(sg); 278 279 return nsegs; 280 } 281 EXPORT_SYMBOL(blk_rq_map_sg); 282 283 /** 284 * blk_bio_map_sg - map a bio to a scatterlist 285 * @q: request_queue in question 286 * @bio: bio being mapped 287 * @sglist: scatterlist being mapped 288 * 289 * Note: 290 * Caller must make sure sg can hold bio->bi_phys_segments entries 291 * 292 * Will return the number of sg entries setup 293 */ 294 int blk_bio_map_sg(struct request_queue *q, struct bio *bio, 295 struct scatterlist *sglist) 296 { 297 struct scatterlist *sg = NULL; 298 int nsegs; 299 struct bio *next = bio->bi_next; 300 bio->bi_next = NULL; 301 302 nsegs = __blk_bios_map_sg(q, bio, sglist, &sg); 303 bio->bi_next = next; 304 if (sg) 305 sg_mark_end(sg); 306 307 BUG_ON(bio->bi_phys_segments && nsegs > bio->bi_phys_segments); 308 return nsegs; 309 } 310 EXPORT_SYMBOL(blk_bio_map_sg); 311 312 static inline int ll_new_hw_segment(struct request_queue *q, 313 struct request *req, 314 struct bio *bio) 315 { 316 int nr_phys_segs = bio_phys_segments(q, bio); 317 318 if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) 319 goto no_merge; 320 321 if (blk_integrity_merge_bio(q, req, bio) == false) 322 goto no_merge; 323 324 /* 325 * This will form the start of a new hw segment. Bump both 326 * counters. 327 */ 328 req->nr_phys_segments += nr_phys_segs; 329 return 1; 330 331 no_merge: 332 req->cmd_flags |= REQ_NOMERGE; 333 if (req == q->last_merge) 334 q->last_merge = NULL; 335 return 0; 336 } 337 338 int ll_back_merge_fn(struct request_queue *q, struct request *req, 339 struct bio *bio) 340 { 341 if (blk_rq_sectors(req) + bio_sectors(bio) > 342 blk_rq_get_max_sectors(req)) { 343 req->cmd_flags |= REQ_NOMERGE; 344 if (req == q->last_merge) 345 q->last_merge = NULL; 346 return 0; 347 } 348 if (!bio_flagged(req->biotail, BIO_SEG_VALID)) 349 blk_recount_segments(q, req->biotail); 350 if (!bio_flagged(bio, BIO_SEG_VALID)) 351 blk_recount_segments(q, bio); 352 353 return ll_new_hw_segment(q, req, bio); 354 } 355 356 int ll_front_merge_fn(struct request_queue *q, struct request *req, 357 struct bio *bio) 358 { 359 if (blk_rq_sectors(req) + bio_sectors(bio) > 360 blk_rq_get_max_sectors(req)) { 361 req->cmd_flags |= REQ_NOMERGE; 362 if (req == q->last_merge) 363 q->last_merge = NULL; 364 return 0; 365 } 366 if (!bio_flagged(bio, BIO_SEG_VALID)) 367 blk_recount_segments(q, bio); 368 if (!bio_flagged(req->bio, BIO_SEG_VALID)) 369 blk_recount_segments(q, req->bio); 370 371 return ll_new_hw_segment(q, req, bio); 372 } 373 374 /* 375 * blk-mq uses req->special to carry normal driver per-request payload, it 376 * does not indicate a prepared command that we cannot merge with. 377 */ 378 static bool req_no_special_merge(struct request *req) 379 { 380 struct request_queue *q = req->q; 381 382 return !q->mq_ops && req->special; 383 } 384 385 static int ll_merge_requests_fn(struct request_queue *q, struct request *req, 386 struct request *next) 387 { 388 int total_phys_segments; 389 unsigned int seg_size = 390 req->biotail->bi_seg_back_size + next->bio->bi_seg_front_size; 391 392 /* 393 * First check if the either of the requests are re-queued 394 * requests. Can't merge them if they are. 395 */ 396 if (req_no_special_merge(req) || req_no_special_merge(next)) 397 return 0; 398 399 /* 400 * Will it become too large? 401 */ 402 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) > 403 blk_rq_get_max_sectors(req)) 404 return 0; 405 406 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments; 407 if (blk_phys_contig_segment(q, req->biotail, next->bio)) { 408 if (req->nr_phys_segments == 1) 409 req->bio->bi_seg_front_size = seg_size; 410 if (next->nr_phys_segments == 1) 411 next->biotail->bi_seg_back_size = seg_size; 412 total_phys_segments--; 413 } 414 415 if (total_phys_segments > queue_max_segments(q)) 416 return 0; 417 418 if (blk_integrity_merge_rq(q, req, next) == false) 419 return 0; 420 421 /* Merge is OK... */ 422 req->nr_phys_segments = total_phys_segments; 423 return 1; 424 } 425 426 /** 427 * blk_rq_set_mixed_merge - mark a request as mixed merge 428 * @rq: request to mark as mixed merge 429 * 430 * Description: 431 * @rq is about to be mixed merged. Make sure the attributes 432 * which can be mixed are set in each bio and mark @rq as mixed 433 * merged. 434 */ 435 void blk_rq_set_mixed_merge(struct request *rq) 436 { 437 unsigned int ff = rq->cmd_flags & REQ_FAILFAST_MASK; 438 struct bio *bio; 439 440 if (rq->cmd_flags & REQ_MIXED_MERGE) 441 return; 442 443 /* 444 * @rq will no longer represent mixable attributes for all the 445 * contained bios. It will just track those of the first one. 446 * Distributes the attributs to each bio. 447 */ 448 for (bio = rq->bio; bio; bio = bio->bi_next) { 449 WARN_ON_ONCE((bio->bi_rw & REQ_FAILFAST_MASK) && 450 (bio->bi_rw & REQ_FAILFAST_MASK) != ff); 451 bio->bi_rw |= ff; 452 } 453 rq->cmd_flags |= REQ_MIXED_MERGE; 454 } 455 456 static void blk_account_io_merge(struct request *req) 457 { 458 if (blk_do_io_stat(req)) { 459 struct hd_struct *part; 460 int cpu; 461 462 cpu = part_stat_lock(); 463 part = req->part; 464 465 part_round_stats(cpu, part); 466 part_dec_in_flight(part, rq_data_dir(req)); 467 468 hd_struct_put(part); 469 part_stat_unlock(); 470 } 471 } 472 473 /* 474 * Has to be called with the request spinlock acquired 475 */ 476 static int attempt_merge(struct request_queue *q, struct request *req, 477 struct request *next) 478 { 479 if (!rq_mergeable(req) || !rq_mergeable(next)) 480 return 0; 481 482 if (!blk_check_merge_flags(req->cmd_flags, next->cmd_flags)) 483 return 0; 484 485 /* 486 * not contiguous 487 */ 488 if (blk_rq_pos(req) + blk_rq_sectors(req) != blk_rq_pos(next)) 489 return 0; 490 491 if (rq_data_dir(req) != rq_data_dir(next) 492 || req->rq_disk != next->rq_disk 493 || req_no_special_merge(next)) 494 return 0; 495 496 if (req->cmd_flags & REQ_WRITE_SAME && 497 !blk_write_same_mergeable(req->bio, next->bio)) 498 return 0; 499 500 /* 501 * If we are allowed to merge, then append bio list 502 * from next to rq and release next. merge_requests_fn 503 * will have updated segment counts, update sector 504 * counts here. 505 */ 506 if (!ll_merge_requests_fn(q, req, next)) 507 return 0; 508 509 /* 510 * If failfast settings disagree or any of the two is already 511 * a mixed merge, mark both as mixed before proceeding. This 512 * makes sure that all involved bios have mixable attributes 513 * set properly. 514 */ 515 if ((req->cmd_flags | next->cmd_flags) & REQ_MIXED_MERGE || 516 (req->cmd_flags & REQ_FAILFAST_MASK) != 517 (next->cmd_flags & REQ_FAILFAST_MASK)) { 518 blk_rq_set_mixed_merge(req); 519 blk_rq_set_mixed_merge(next); 520 } 521 522 /* 523 * At this point we have either done a back merge 524 * or front merge. We need the smaller start_time of 525 * the merged requests to be the current request 526 * for accounting purposes. 527 */ 528 if (time_after(req->start_time, next->start_time)) 529 req->start_time = next->start_time; 530 531 req->biotail->bi_next = next->bio; 532 req->biotail = next->biotail; 533 534 req->__data_len += blk_rq_bytes(next); 535 536 elv_merge_requests(q, req, next); 537 538 /* 539 * 'next' is going away, so update stats accordingly 540 */ 541 blk_account_io_merge(next); 542 543 req->ioprio = ioprio_best(req->ioprio, next->ioprio); 544 if (blk_rq_cpu_valid(next)) 545 req->cpu = next->cpu; 546 547 /* owner-ship of bio passed from next to req */ 548 next->bio = NULL; 549 __blk_put_request(q, next); 550 return 1; 551 } 552 553 int attempt_back_merge(struct request_queue *q, struct request *rq) 554 { 555 struct request *next = elv_latter_request(q, rq); 556 557 if (next) 558 return attempt_merge(q, rq, next); 559 560 return 0; 561 } 562 563 int attempt_front_merge(struct request_queue *q, struct request *rq) 564 { 565 struct request *prev = elv_former_request(q, rq); 566 567 if (prev) 568 return attempt_merge(q, prev, rq); 569 570 return 0; 571 } 572 573 int blk_attempt_req_merge(struct request_queue *q, struct request *rq, 574 struct request *next) 575 { 576 return attempt_merge(q, rq, next); 577 } 578 579 bool blk_rq_merge_ok(struct request *rq, struct bio *bio) 580 { 581 struct request_queue *q = rq->q; 582 583 if (!rq_mergeable(rq) || !bio_mergeable(bio)) 584 return false; 585 586 if (!blk_check_merge_flags(rq->cmd_flags, bio->bi_rw)) 587 return false; 588 589 /* different data direction or already started, don't merge */ 590 if (bio_data_dir(bio) != rq_data_dir(rq)) 591 return false; 592 593 /* must be same device and not a special request */ 594 if (rq->rq_disk != bio->bi_bdev->bd_disk || req_no_special_merge(rq)) 595 return false; 596 597 /* only merge integrity protected bio into ditto rq */ 598 if (blk_integrity_merge_bio(rq->q, rq, bio) == false) 599 return false; 600 601 /* must be using the same buffer */ 602 if (rq->cmd_flags & REQ_WRITE_SAME && 603 !blk_write_same_mergeable(rq->bio, bio)) 604 return false; 605 606 if (q->queue_flags & (1 << QUEUE_FLAG_SG_GAPS)) { 607 struct bio_vec *bprev; 608 609 bprev = &rq->biotail->bi_io_vec[bio->bi_vcnt - 1]; 610 if (bvec_gap_to_prev(bprev, bio->bi_io_vec[0].bv_offset)) 611 return false; 612 } 613 614 return true; 615 } 616 617 int blk_try_merge(struct request *rq, struct bio *bio) 618 { 619 if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector) 620 return ELEVATOR_BACK_MERGE; 621 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector) 622 return ELEVATOR_FRONT_MERGE; 623 return ELEVATOR_NO_MERGE; 624 } 625