1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Functions related to segment and merge handling
4 */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/part_stat.h>
11 #include <linux/blk-cgroup.h>
12
13 #include <trace/events/block.h>
14
15 #include "blk.h"
16 #include "blk-mq-sched.h"
17 #include "blk-rq-qos.h"
18 #include "blk-throttle.h"
19
bio_get_first_bvec(struct bio * bio,struct bio_vec * bv)20 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
21 {
22 *bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
23 }
24
bio_get_last_bvec(struct bio * bio,struct bio_vec * bv)25 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
26 {
27 struct bvec_iter iter = bio->bi_iter;
28 int idx;
29
30 bio_get_first_bvec(bio, bv);
31 if (bv->bv_len == bio->bi_iter.bi_size)
32 return; /* this bio only has a single bvec */
33
34 bio_advance_iter(bio, &iter, iter.bi_size);
35
36 if (!iter.bi_bvec_done)
37 idx = iter.bi_idx - 1;
38 else /* in the middle of bvec */
39 idx = iter.bi_idx;
40
41 *bv = bio->bi_io_vec[idx];
42
43 /*
44 * iter.bi_bvec_done records actual length of the last bvec
45 * if this bio ends in the middle of one io vector
46 */
47 if (iter.bi_bvec_done)
48 bv->bv_len = iter.bi_bvec_done;
49 }
50
bio_will_gap(struct request_queue * q,struct request * prev_rq,struct bio * prev,struct bio * next)51 static inline bool bio_will_gap(struct request_queue *q,
52 struct request *prev_rq, struct bio *prev, struct bio *next)
53 {
54 struct bio_vec pb, nb;
55
56 if (!bio_has_data(prev) || !queue_virt_boundary(q))
57 return false;
58
59 /*
60 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
61 * is quite difficult to respect the sg gap limit. We work hard to
62 * merge a huge number of small single bios in case of mkfs.
63 */
64 if (prev_rq)
65 bio_get_first_bvec(prev_rq->bio, &pb);
66 else
67 bio_get_first_bvec(prev, &pb);
68 if (pb.bv_offset & queue_virt_boundary(q))
69 return true;
70
71 /*
72 * We don't need to worry about the situation that the merged segment
73 * ends in unaligned virt boundary:
74 *
75 * - if 'pb' ends aligned, the merged segment ends aligned
76 * - if 'pb' ends unaligned, the next bio must include
77 * one single bvec of 'nb', otherwise the 'nb' can't
78 * merge with 'pb'
79 */
80 bio_get_last_bvec(prev, &pb);
81 bio_get_first_bvec(next, &nb);
82 if (biovec_phys_mergeable(q, &pb, &nb))
83 return false;
84 return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
85 }
86
req_gap_back_merge(struct request * req,struct bio * bio)87 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
88 {
89 return bio_will_gap(req->q, req, req->biotail, bio);
90 }
91
req_gap_front_merge(struct request * req,struct bio * bio)92 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
93 {
94 return bio_will_gap(req->q, NULL, bio, req->bio);
95 }
96
97 /*
98 * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
99 * is defined as 'unsigned int', meantime it has to be aligned to with the
100 * logical block size, which is the minimum accepted unit by hardware.
101 */
bio_allowed_max_sectors(const struct queue_limits * lim)102 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
103 {
104 return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
105 }
106
bio_submit_split(struct bio * bio,int split_sectors)107 static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
108 {
109 if (unlikely(split_sectors < 0))
110 goto error;
111
112 if (split_sectors) {
113 struct bio *split;
114
115 split = bio_split(bio, split_sectors, GFP_NOIO,
116 &bio->bi_bdev->bd_disk->bio_split);
117 if (IS_ERR(split)) {
118 split_sectors = PTR_ERR(split);
119 goto error;
120 }
121 split->bi_opf |= REQ_NOMERGE;
122 blkcg_bio_issue_init(split);
123 bio_chain(split, bio);
124 trace_block_split(split, bio->bi_iter.bi_sector);
125 WARN_ON_ONCE(bio_zone_write_plugging(bio));
126 submit_bio_noacct(bio);
127 return split;
128 }
129
130 return bio;
131 error:
132 bio->bi_status = errno_to_blk_status(split_sectors);
133 bio_endio(bio);
134 return NULL;
135 }
136
bio_split_discard(struct bio * bio,const struct queue_limits * lim,unsigned * nsegs)137 struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
138 unsigned *nsegs)
139 {
140 unsigned int max_discard_sectors, granularity;
141 sector_t tmp;
142 unsigned split_sectors;
143
144 *nsegs = 1;
145
146 granularity = max(lim->discard_granularity >> 9, 1U);
147
148 max_discard_sectors =
149 min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
150 max_discard_sectors -= max_discard_sectors % granularity;
151 if (unlikely(!max_discard_sectors))
152 return bio;
153
154 if (bio_sectors(bio) <= max_discard_sectors)
155 return bio;
156
157 split_sectors = max_discard_sectors;
158
159 /*
160 * If the next starting sector would be misaligned, stop the discard at
161 * the previous aligned sector.
162 */
163 tmp = bio->bi_iter.bi_sector + split_sectors -
164 ((lim->discard_alignment >> 9) % granularity);
165 tmp = sector_div(tmp, granularity);
166
167 if (split_sectors > tmp)
168 split_sectors -= tmp;
169
170 return bio_submit_split(bio, split_sectors);
171 }
172
blk_boundary_sectors(const struct queue_limits * lim,bool is_atomic)173 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
174 bool is_atomic)
175 {
176 /*
177 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
178 * both non-zero.
179 */
180 if (is_atomic && lim->atomic_write_boundary_sectors)
181 return lim->atomic_write_boundary_sectors;
182
183 return lim->chunk_sectors;
184 }
185
186 /*
187 * Return the maximum number of sectors from the start of a bio that may be
188 * submitted as a single request to a block device. If enough sectors remain,
189 * align the end to the physical block size. Otherwise align the end to the
190 * logical block size. This approach minimizes the number of non-aligned
191 * requests that are submitted to a block device if the start of a bio is not
192 * aligned to a physical block boundary.
193 */
get_max_io_size(struct bio * bio,const struct queue_limits * lim)194 static inline unsigned get_max_io_size(struct bio *bio,
195 const struct queue_limits *lim)
196 {
197 unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
198 unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
199 bool is_atomic = bio->bi_opf & REQ_ATOMIC;
200 unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
201 unsigned max_sectors, start, end;
202
203 /*
204 * We ignore lim->max_sectors for atomic writes because it may less
205 * than the actual bio size, which we cannot tolerate.
206 */
207 if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
208 max_sectors = lim->max_write_zeroes_sectors;
209 else if (is_atomic)
210 max_sectors = lim->atomic_write_max_sectors;
211 else
212 max_sectors = lim->max_sectors;
213
214 if (boundary_sectors) {
215 max_sectors = min(max_sectors,
216 blk_boundary_sectors_left(bio->bi_iter.bi_sector,
217 boundary_sectors));
218 }
219
220 start = bio->bi_iter.bi_sector & (pbs - 1);
221 end = (start + max_sectors) & ~(pbs - 1);
222 if (end > start)
223 return end - start;
224 return max_sectors & ~(lbs - 1);
225 }
226
227 /**
228 * bvec_split_segs - verify whether or not a bvec should be split in the middle
229 * @lim: [in] queue limits to split based on
230 * @bv: [in] bvec to examine
231 * @nsegs: [in,out] Number of segments in the bio being built. Incremented
232 * by the number of segments from @bv that may be appended to that
233 * bio without exceeding @max_segs
234 * @bytes: [in,out] Number of bytes in the bio being built. Incremented
235 * by the number of bytes from @bv that may be appended to that
236 * bio without exceeding @max_bytes
237 * @max_segs: [in] upper bound for *@nsegs
238 * @max_bytes: [in] upper bound for *@bytes
239 *
240 * When splitting a bio, it can happen that a bvec is encountered that is too
241 * big to fit in a single segment and hence that it has to be split in the
242 * middle. This function verifies whether or not that should happen. The value
243 * %true is returned if and only if appending the entire @bv to a bio with
244 * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
245 * the block driver.
246 */
bvec_split_segs(const struct queue_limits * lim,const struct bio_vec * bv,unsigned * nsegs,unsigned * bytes,unsigned max_segs,unsigned max_bytes)247 static bool bvec_split_segs(const struct queue_limits *lim,
248 const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
249 unsigned max_segs, unsigned max_bytes)
250 {
251 unsigned max_len = max_bytes - *bytes;
252 unsigned len = min(bv->bv_len, max_len);
253 unsigned total_len = 0;
254 unsigned seg_size = 0;
255
256 while (len && *nsegs < max_segs) {
257 seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
258
259 (*nsegs)++;
260 total_len += seg_size;
261 len -= seg_size;
262
263 if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
264 break;
265 }
266
267 *bytes += total_len;
268
269 /* tell the caller to split the bvec if it is too big to fit */
270 return len > 0 || bv->bv_len > max_len;
271 }
272
bio_split_alignment(struct bio * bio,const struct queue_limits * lim)273 static unsigned int bio_split_alignment(struct bio *bio,
274 const struct queue_limits *lim)
275 {
276 if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
277 return lim->zone_write_granularity;
278 return lim->logical_block_size;
279 }
280
281 /**
282 * bio_split_rw_at - check if and where to split a read/write bio
283 * @bio: [in] bio to be split
284 * @lim: [in] queue limits to split based on
285 * @segs: [out] number of segments in the bio with the first half of the sectors
286 * @max_bytes: [in] maximum number of bytes per bio
287 *
288 * Find out if @bio needs to be split to fit the queue limits in @lim and a
289 * maximum size of @max_bytes. Returns a negative error number if @bio can't be
290 * split, 0 if the bio doesn't have to be split, or a positive sector offset if
291 * @bio needs to be split.
292 */
bio_split_rw_at(struct bio * bio,const struct queue_limits * lim,unsigned * segs,unsigned max_bytes)293 int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
294 unsigned *segs, unsigned max_bytes)
295 {
296 struct bio_vec bv, bvprv, *bvprvp = NULL;
297 struct bvec_iter iter;
298 unsigned nsegs = 0, bytes = 0;
299
300 bio_for_each_bvec(bv, bio, iter) {
301 /*
302 * If the queue doesn't support SG gaps and adding this
303 * offset would create a gap, disallow it.
304 */
305 if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
306 goto split;
307
308 if (nsegs < lim->max_segments &&
309 bytes + bv.bv_len <= max_bytes &&
310 bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
311 nsegs++;
312 bytes += bv.bv_len;
313 } else {
314 if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
315 lim->max_segments, max_bytes))
316 goto split;
317 }
318
319 bvprv = bv;
320 bvprvp = &bvprv;
321 }
322
323 *segs = nsegs;
324 return 0;
325 split:
326 if (bio->bi_opf & REQ_ATOMIC)
327 return -EINVAL;
328
329 /*
330 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
331 * with EAGAIN if splitting is required and return an error pointer.
332 */
333 if (bio->bi_opf & REQ_NOWAIT)
334 return -EAGAIN;
335
336 *segs = nsegs;
337
338 /*
339 * Individual bvecs might not be logical block aligned. Round down the
340 * split size so that each bio is properly block size aligned, even if
341 * we do not use the full hardware limits.
342 */
343 bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
344
345 /*
346 * Bio splitting may cause subtle trouble such as hang when doing sync
347 * iopoll in direct IO routine. Given performance gain of iopoll for
348 * big IO can be trival, disable iopoll when split needed.
349 */
350 bio_clear_polled(bio);
351 return bytes >> SECTOR_SHIFT;
352 }
353 EXPORT_SYMBOL_GPL(bio_split_rw_at);
354
bio_split_rw(struct bio * bio,const struct queue_limits * lim,unsigned * nr_segs)355 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
356 unsigned *nr_segs)
357 {
358 return bio_submit_split(bio,
359 bio_split_rw_at(bio, lim, nr_segs,
360 get_max_io_size(bio, lim) << SECTOR_SHIFT));
361 }
362
363 /*
364 * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
365 *
366 * But we want the nr_segs calculation provided by bio_split_rw_at, and having
367 * a good sanity check that the submitter built the bio correctly is nice to
368 * have as well.
369 */
bio_split_zone_append(struct bio * bio,const struct queue_limits * lim,unsigned * nr_segs)370 struct bio *bio_split_zone_append(struct bio *bio,
371 const struct queue_limits *lim, unsigned *nr_segs)
372 {
373 int split_sectors;
374
375 split_sectors = bio_split_rw_at(bio, lim, nr_segs,
376 lim->max_zone_append_sectors << SECTOR_SHIFT);
377 if (WARN_ON_ONCE(split_sectors > 0))
378 split_sectors = -EINVAL;
379 return bio_submit_split(bio, split_sectors);
380 }
381
bio_split_write_zeroes(struct bio * bio,const struct queue_limits * lim,unsigned * nsegs)382 struct bio *bio_split_write_zeroes(struct bio *bio,
383 const struct queue_limits *lim, unsigned *nsegs)
384 {
385 unsigned int max_sectors = get_max_io_size(bio, lim);
386
387 *nsegs = 0;
388
389 /*
390 * An unset limit should normally not happen, as bio submission is keyed
391 * off having a non-zero limit. But SCSI can clear the limit in the
392 * I/O completion handler, and we can race and see this. Splitting to a
393 * zero limit obviously doesn't make sense, so band-aid it here.
394 */
395 if (!max_sectors)
396 return bio;
397 if (bio_sectors(bio) <= max_sectors)
398 return bio;
399 return bio_submit_split(bio, max_sectors);
400 }
401
402 /**
403 * bio_split_to_limits - split a bio to fit the queue limits
404 * @bio: bio to be split
405 *
406 * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
407 * if so split off a bio fitting the limits from the beginning of @bio and
408 * return it. @bio is shortened to the remainder and re-submitted.
409 *
410 * The split bio is allocated from @q->bio_split, which is provided by the
411 * block layer.
412 */
bio_split_to_limits(struct bio * bio)413 struct bio *bio_split_to_limits(struct bio *bio)
414 {
415 unsigned int nr_segs;
416
417 return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
418 }
419 EXPORT_SYMBOL(bio_split_to_limits);
420
blk_recalc_rq_segments(struct request * rq)421 unsigned int blk_recalc_rq_segments(struct request *rq)
422 {
423 unsigned int nr_phys_segs = 0;
424 unsigned int bytes = 0;
425 struct req_iterator iter;
426 struct bio_vec bv;
427
428 if (!rq->bio)
429 return 0;
430
431 switch (bio_op(rq->bio)) {
432 case REQ_OP_DISCARD:
433 case REQ_OP_SECURE_ERASE:
434 if (queue_max_discard_segments(rq->q) > 1) {
435 struct bio *bio = rq->bio;
436
437 for_each_bio(bio)
438 nr_phys_segs++;
439 return nr_phys_segs;
440 }
441 return 1;
442 case REQ_OP_WRITE_ZEROES:
443 return 0;
444 default:
445 break;
446 }
447
448 rq_for_each_bvec(bv, rq, iter)
449 bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
450 UINT_MAX, UINT_MAX);
451 return nr_phys_segs;
452 }
453
blk_rq_get_max_sectors(struct request * rq,sector_t offset)454 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
455 sector_t offset)
456 {
457 struct request_queue *q = rq->q;
458 struct queue_limits *lim = &q->limits;
459 unsigned int max_sectors, boundary_sectors;
460 bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
461
462 if (blk_rq_is_passthrough(rq))
463 return q->limits.max_hw_sectors;
464
465 boundary_sectors = blk_boundary_sectors(lim, is_atomic);
466 max_sectors = blk_queue_get_max_sectors(rq);
467
468 if (!boundary_sectors ||
469 req_op(rq) == REQ_OP_DISCARD ||
470 req_op(rq) == REQ_OP_SECURE_ERASE)
471 return max_sectors;
472 return min(max_sectors,
473 blk_boundary_sectors_left(offset, boundary_sectors));
474 }
475
ll_new_hw_segment(struct request * req,struct bio * bio,unsigned int nr_phys_segs)476 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
477 unsigned int nr_phys_segs)
478 {
479 if (!blk_cgroup_mergeable(req, bio))
480 goto no_merge;
481
482 if (blk_integrity_merge_bio(req->q, req, bio) == false)
483 goto no_merge;
484
485 /* discard request merge won't add new segment */
486 if (req_op(req) == REQ_OP_DISCARD)
487 return 1;
488
489 if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
490 goto no_merge;
491
492 /*
493 * This will form the start of a new hw segment. Bump both
494 * counters.
495 */
496 req->nr_phys_segments += nr_phys_segs;
497 if (bio_integrity(bio))
498 req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
499 bio);
500 return 1;
501
502 no_merge:
503 req_set_nomerge(req->q, req);
504 return 0;
505 }
506
ll_back_merge_fn(struct request * req,struct bio * bio,unsigned int nr_segs)507 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
508 {
509 if (req_gap_back_merge(req, bio))
510 return 0;
511 if (blk_integrity_rq(req) &&
512 integrity_req_gap_back_merge(req, bio))
513 return 0;
514 if (!bio_crypt_ctx_back_mergeable(req, bio))
515 return 0;
516 if (blk_rq_sectors(req) + bio_sectors(bio) >
517 blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
518 req_set_nomerge(req->q, req);
519 return 0;
520 }
521
522 return ll_new_hw_segment(req, bio, nr_segs);
523 }
524
ll_front_merge_fn(struct request * req,struct bio * bio,unsigned int nr_segs)525 static int ll_front_merge_fn(struct request *req, struct bio *bio,
526 unsigned int nr_segs)
527 {
528 if (req_gap_front_merge(req, bio))
529 return 0;
530 if (blk_integrity_rq(req) &&
531 integrity_req_gap_front_merge(req, bio))
532 return 0;
533 if (!bio_crypt_ctx_front_mergeable(req, bio))
534 return 0;
535 if (blk_rq_sectors(req) + bio_sectors(bio) >
536 blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
537 req_set_nomerge(req->q, req);
538 return 0;
539 }
540
541 return ll_new_hw_segment(req, bio, nr_segs);
542 }
543
req_attempt_discard_merge(struct request_queue * q,struct request * req,struct request * next)544 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
545 struct request *next)
546 {
547 unsigned short segments = blk_rq_nr_discard_segments(req);
548
549 if (segments >= queue_max_discard_segments(q))
550 goto no_merge;
551 if (blk_rq_sectors(req) + bio_sectors(next->bio) >
552 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
553 goto no_merge;
554
555 req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
556 return true;
557 no_merge:
558 req_set_nomerge(q, req);
559 return false;
560 }
561
ll_merge_requests_fn(struct request_queue * q,struct request * req,struct request * next)562 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
563 struct request *next)
564 {
565 int total_phys_segments;
566
567 if (req_gap_back_merge(req, next->bio))
568 return 0;
569
570 /*
571 * Will it become too large?
572 */
573 if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
574 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
575 return 0;
576
577 total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
578 if (total_phys_segments > blk_rq_get_max_segments(req))
579 return 0;
580
581 if (!blk_cgroup_mergeable(req, next->bio))
582 return 0;
583
584 if (blk_integrity_merge_rq(q, req, next) == false)
585 return 0;
586
587 if (!bio_crypt_ctx_merge_rq(req, next))
588 return 0;
589
590 /* Merge is OK... */
591 req->nr_phys_segments = total_phys_segments;
592 req->nr_integrity_segments += next->nr_integrity_segments;
593 return 1;
594 }
595
596 /**
597 * blk_rq_set_mixed_merge - mark a request as mixed merge
598 * @rq: request to mark as mixed merge
599 *
600 * Description:
601 * @rq is about to be mixed merged. Make sure the attributes
602 * which can be mixed are set in each bio and mark @rq as mixed
603 * merged.
604 */
blk_rq_set_mixed_merge(struct request * rq)605 static void blk_rq_set_mixed_merge(struct request *rq)
606 {
607 blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
608 struct bio *bio;
609
610 if (rq->rq_flags & RQF_MIXED_MERGE)
611 return;
612
613 /*
614 * @rq will no longer represent mixable attributes for all the
615 * contained bios. It will just track those of the first one.
616 * Distributes the attributs to each bio.
617 */
618 for (bio = rq->bio; bio; bio = bio->bi_next) {
619 WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
620 (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
621 bio->bi_opf |= ff;
622 }
623 rq->rq_flags |= RQF_MIXED_MERGE;
624 }
625
bio_failfast(const struct bio * bio)626 static inline blk_opf_t bio_failfast(const struct bio *bio)
627 {
628 if (bio->bi_opf & REQ_RAHEAD)
629 return REQ_FAILFAST_MASK;
630
631 return bio->bi_opf & REQ_FAILFAST_MASK;
632 }
633
634 /*
635 * After we are marked as MIXED_MERGE, any new RA bio has to be updated
636 * as failfast, and request's failfast has to be updated in case of
637 * front merge.
638 */
blk_update_mixed_merge(struct request * req,struct bio * bio,bool front_merge)639 static inline void blk_update_mixed_merge(struct request *req,
640 struct bio *bio, bool front_merge)
641 {
642 if (req->rq_flags & RQF_MIXED_MERGE) {
643 if (bio->bi_opf & REQ_RAHEAD)
644 bio->bi_opf |= REQ_FAILFAST_MASK;
645
646 if (front_merge) {
647 req->cmd_flags &= ~REQ_FAILFAST_MASK;
648 req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
649 }
650 }
651 }
652
blk_account_io_merge_request(struct request * req)653 static void blk_account_io_merge_request(struct request *req)
654 {
655 if (req->rq_flags & RQF_IO_STAT) {
656 part_stat_lock();
657 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
658 part_stat_local_dec(req->part,
659 in_flight[op_is_write(req_op(req))]);
660 part_stat_unlock();
661 }
662 }
663
blk_try_req_merge(struct request * req,struct request * next)664 static enum elv_merge blk_try_req_merge(struct request *req,
665 struct request *next)
666 {
667 if (blk_discard_mergable(req))
668 return ELEVATOR_DISCARD_MERGE;
669 else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
670 return ELEVATOR_BACK_MERGE;
671
672 return ELEVATOR_NO_MERGE;
673 }
674
blk_atomic_write_mergeable_rq_bio(struct request * rq,struct bio * bio)675 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
676 struct bio *bio)
677 {
678 return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
679 }
680
blk_atomic_write_mergeable_rqs(struct request * rq,struct request * next)681 static bool blk_atomic_write_mergeable_rqs(struct request *rq,
682 struct request *next)
683 {
684 return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
685 }
686
687 /*
688 * For non-mq, this has to be called with the request spinlock acquired.
689 * For mq with scheduling, the appropriate queue wide lock should be held.
690 */
attempt_merge(struct request_queue * q,struct request * req,struct request * next)691 static struct request *attempt_merge(struct request_queue *q,
692 struct request *req, struct request *next)
693 {
694 if (!rq_mergeable(req) || !rq_mergeable(next))
695 return NULL;
696
697 if (req_op(req) != req_op(next))
698 return NULL;
699
700 if (req->bio->bi_write_hint != next->bio->bi_write_hint)
701 return NULL;
702 if (req->bio->bi_write_stream != next->bio->bi_write_stream)
703 return NULL;
704 if (req->bio->bi_ioprio != next->bio->bi_ioprio)
705 return NULL;
706 if (!blk_atomic_write_mergeable_rqs(req, next))
707 return NULL;
708
709 /*
710 * If we are allowed to merge, then append bio list
711 * from next to rq and release next. merge_requests_fn
712 * will have updated segment counts, update sector
713 * counts here. Handle DISCARDs separately, as they
714 * have separate settings.
715 */
716
717 switch (blk_try_req_merge(req, next)) {
718 case ELEVATOR_DISCARD_MERGE:
719 if (!req_attempt_discard_merge(q, req, next))
720 return NULL;
721 break;
722 case ELEVATOR_BACK_MERGE:
723 if (!ll_merge_requests_fn(q, req, next))
724 return NULL;
725 break;
726 default:
727 return NULL;
728 }
729
730 /*
731 * If failfast settings disagree or any of the two is already
732 * a mixed merge, mark both as mixed before proceeding. This
733 * makes sure that all involved bios have mixable attributes
734 * set properly.
735 */
736 if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
737 (req->cmd_flags & REQ_FAILFAST_MASK) !=
738 (next->cmd_flags & REQ_FAILFAST_MASK)) {
739 blk_rq_set_mixed_merge(req);
740 blk_rq_set_mixed_merge(next);
741 }
742
743 /*
744 * At this point we have either done a back merge or front merge. We
745 * need the smaller start_time_ns of the merged requests to be the
746 * current request for accounting purposes.
747 */
748 if (next->start_time_ns < req->start_time_ns)
749 req->start_time_ns = next->start_time_ns;
750
751 req->biotail->bi_next = next->bio;
752 req->biotail = next->biotail;
753
754 req->__data_len += blk_rq_bytes(next);
755
756 if (!blk_discard_mergable(req))
757 elv_merge_requests(q, req, next);
758
759 blk_crypto_rq_put_keyslot(next);
760
761 /*
762 * 'next' is going away, so update stats accordingly
763 */
764 blk_account_io_merge_request(next);
765
766 trace_block_rq_merge(next);
767
768 /*
769 * ownership of bio passed from next to req, return 'next' for
770 * the caller to free
771 */
772 next->bio = NULL;
773 return next;
774 }
775
attempt_back_merge(struct request_queue * q,struct request * rq)776 static struct request *attempt_back_merge(struct request_queue *q,
777 struct request *rq)
778 {
779 struct request *next = elv_latter_request(q, rq);
780
781 if (next)
782 return attempt_merge(q, rq, next);
783
784 return NULL;
785 }
786
attempt_front_merge(struct request_queue * q,struct request * rq)787 static struct request *attempt_front_merge(struct request_queue *q,
788 struct request *rq)
789 {
790 struct request *prev = elv_former_request(q, rq);
791
792 if (prev)
793 return attempt_merge(q, prev, rq);
794
795 return NULL;
796 }
797
798 /*
799 * Try to merge 'next' into 'rq'. Return true if the merge happened, false
800 * otherwise. The caller is responsible for freeing 'next' if the merge
801 * happened.
802 */
blk_attempt_req_merge(struct request_queue * q,struct request * rq,struct request * next)803 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
804 struct request *next)
805 {
806 return attempt_merge(q, rq, next);
807 }
808
blk_rq_merge_ok(struct request * rq,struct bio * bio)809 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
810 {
811 if (!rq_mergeable(rq) || !bio_mergeable(bio))
812 return false;
813
814 if (req_op(rq) != bio_op(bio))
815 return false;
816
817 if (!blk_cgroup_mergeable(rq, bio))
818 return false;
819 if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
820 return false;
821 if (!bio_crypt_rq_ctx_compatible(rq, bio))
822 return false;
823 if (rq->bio->bi_write_hint != bio->bi_write_hint)
824 return false;
825 if (rq->bio->bi_write_stream != bio->bi_write_stream)
826 return false;
827 if (rq->bio->bi_ioprio != bio->bi_ioprio)
828 return false;
829 if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
830 return false;
831
832 return true;
833 }
834
blk_try_merge(struct request * rq,struct bio * bio)835 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
836 {
837 if (blk_discard_mergable(rq))
838 return ELEVATOR_DISCARD_MERGE;
839 else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
840 return ELEVATOR_BACK_MERGE;
841 else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
842 return ELEVATOR_FRONT_MERGE;
843 return ELEVATOR_NO_MERGE;
844 }
845
blk_account_io_merge_bio(struct request * req)846 static void blk_account_io_merge_bio(struct request *req)
847 {
848 if (req->rq_flags & RQF_IO_STAT) {
849 part_stat_lock();
850 part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
851 part_stat_unlock();
852 }
853 }
854
bio_attempt_back_merge(struct request * req,struct bio * bio,unsigned int nr_segs)855 enum bio_merge_status bio_attempt_back_merge(struct request *req,
856 struct bio *bio, unsigned int nr_segs)
857 {
858 const blk_opf_t ff = bio_failfast(bio);
859
860 if (!ll_back_merge_fn(req, bio, nr_segs))
861 return BIO_MERGE_FAILED;
862
863 trace_block_bio_backmerge(bio);
864 rq_qos_merge(req->q, req, bio);
865
866 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
867 blk_rq_set_mixed_merge(req);
868
869 blk_update_mixed_merge(req, bio, false);
870
871 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
872 blk_zone_write_plug_bio_merged(bio);
873
874 req->biotail->bi_next = bio;
875 req->biotail = bio;
876 req->__data_len += bio->bi_iter.bi_size;
877
878 bio_crypt_free_ctx(bio);
879
880 blk_account_io_merge_bio(req);
881 return BIO_MERGE_OK;
882 }
883
bio_attempt_front_merge(struct request * req,struct bio * bio,unsigned int nr_segs)884 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
885 struct bio *bio, unsigned int nr_segs)
886 {
887 const blk_opf_t ff = bio_failfast(bio);
888
889 /*
890 * A front merge for writes to sequential zones of a zoned block device
891 * can happen only if the user submitted writes out of order. Do not
892 * merge such write to let it fail.
893 */
894 if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
895 return BIO_MERGE_FAILED;
896
897 if (!ll_front_merge_fn(req, bio, nr_segs))
898 return BIO_MERGE_FAILED;
899
900 trace_block_bio_frontmerge(bio);
901 rq_qos_merge(req->q, req, bio);
902
903 if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
904 blk_rq_set_mixed_merge(req);
905
906 blk_update_mixed_merge(req, bio, true);
907
908 bio->bi_next = req->bio;
909 req->bio = bio;
910
911 req->__sector = bio->bi_iter.bi_sector;
912 req->__data_len += bio->bi_iter.bi_size;
913
914 bio_crypt_do_front_merge(req, bio);
915
916 blk_account_io_merge_bio(req);
917 return BIO_MERGE_OK;
918 }
919
bio_attempt_discard_merge(struct request_queue * q,struct request * req,struct bio * bio)920 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
921 struct request *req, struct bio *bio)
922 {
923 unsigned short segments = blk_rq_nr_discard_segments(req);
924
925 if (segments >= queue_max_discard_segments(q))
926 goto no_merge;
927 if (blk_rq_sectors(req) + bio_sectors(bio) >
928 blk_rq_get_max_sectors(req, blk_rq_pos(req)))
929 goto no_merge;
930
931 rq_qos_merge(q, req, bio);
932
933 req->biotail->bi_next = bio;
934 req->biotail = bio;
935 req->__data_len += bio->bi_iter.bi_size;
936 req->nr_phys_segments = segments + 1;
937
938 blk_account_io_merge_bio(req);
939 return BIO_MERGE_OK;
940 no_merge:
941 req_set_nomerge(q, req);
942 return BIO_MERGE_FAILED;
943 }
944
blk_attempt_bio_merge(struct request_queue * q,struct request * rq,struct bio * bio,unsigned int nr_segs,bool sched_allow_merge)945 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
946 struct request *rq,
947 struct bio *bio,
948 unsigned int nr_segs,
949 bool sched_allow_merge)
950 {
951 if (!blk_rq_merge_ok(rq, bio))
952 return BIO_MERGE_NONE;
953
954 switch (blk_try_merge(rq, bio)) {
955 case ELEVATOR_BACK_MERGE:
956 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
957 return bio_attempt_back_merge(rq, bio, nr_segs);
958 break;
959 case ELEVATOR_FRONT_MERGE:
960 if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
961 return bio_attempt_front_merge(rq, bio, nr_segs);
962 break;
963 case ELEVATOR_DISCARD_MERGE:
964 return bio_attempt_discard_merge(q, rq, bio);
965 default:
966 return BIO_MERGE_NONE;
967 }
968
969 return BIO_MERGE_FAILED;
970 }
971
972 /**
973 * blk_attempt_plug_merge - try to merge with %current's plugged list
974 * @q: request_queue new bio is being queued at
975 * @bio: new bio being queued
976 * @nr_segs: number of segments in @bio
977 * from the passed in @q already in the plug list
978 *
979 * Determine whether @bio being queued on @q can be merged with the previous
980 * request on %current's plugged list. Returns %true if merge was successful,
981 * otherwise %false.
982 *
983 * Plugging coalesces IOs from the same issuer for the same purpose without
984 * going through @q->queue_lock. As such it's more of an issuing mechanism
985 * than scheduling, and the request, while may have elvpriv data, is not
986 * added on the elevator at this point. In addition, we don't have
987 * reliable access to the elevator outside queue lock. Only check basic
988 * merging parameters without querying the elevator.
989 *
990 * Caller must ensure !blk_queue_nomerges(q) beforehand.
991 */
blk_attempt_plug_merge(struct request_queue * q,struct bio * bio,unsigned int nr_segs)992 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
993 unsigned int nr_segs)
994 {
995 struct blk_plug *plug = current->plug;
996 struct request *rq;
997
998 if (!plug || rq_list_empty(&plug->mq_list))
999 return false;
1000
1001 rq = plug->mq_list.tail;
1002 if (rq->q == q)
1003 return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1004 BIO_MERGE_OK;
1005 else if (!plug->multiple_queues)
1006 return false;
1007
1008 rq_list_for_each(&plug->mq_list, rq) {
1009 if (rq->q != q)
1010 continue;
1011 if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1012 BIO_MERGE_OK)
1013 return true;
1014 break;
1015 }
1016 return false;
1017 }
1018
1019 /*
1020 * Iterate list of requests and see if we can merge this bio with any
1021 * of them.
1022 */
blk_bio_list_merge(struct request_queue * q,struct list_head * list,struct bio * bio,unsigned int nr_segs)1023 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1024 struct bio *bio, unsigned int nr_segs)
1025 {
1026 struct request *rq;
1027 int checked = 8;
1028
1029 list_for_each_entry_reverse(rq, list, queuelist) {
1030 if (!checked--)
1031 break;
1032
1033 switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1034 case BIO_MERGE_NONE:
1035 continue;
1036 case BIO_MERGE_OK:
1037 return true;
1038 case BIO_MERGE_FAILED:
1039 return false;
1040 }
1041
1042 }
1043
1044 return false;
1045 }
1046 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1047
blk_mq_sched_try_merge(struct request_queue * q,struct bio * bio,unsigned int nr_segs,struct request ** merged_request)1048 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1049 unsigned int nr_segs, struct request **merged_request)
1050 {
1051 struct request *rq;
1052
1053 switch (elv_merge(q, &rq, bio)) {
1054 case ELEVATOR_BACK_MERGE:
1055 if (!blk_mq_sched_allow_merge(q, rq, bio))
1056 return false;
1057 if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1058 return false;
1059 *merged_request = attempt_back_merge(q, rq);
1060 if (!*merged_request)
1061 elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1062 return true;
1063 case ELEVATOR_FRONT_MERGE:
1064 if (!blk_mq_sched_allow_merge(q, rq, bio))
1065 return false;
1066 if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1067 return false;
1068 *merged_request = attempt_front_merge(q, rq);
1069 if (!*merged_request)
1070 elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1071 return true;
1072 case ELEVATOR_DISCARD_MERGE:
1073 return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1074 default:
1075 return false;
1076 }
1077 }
1078 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1079