xref: /linux/block/blk-merge.c (revision f713ffa3639cd57673754a5e83aedebf50dce332)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to segment and merge handling
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/part_stat.h>
11 #include <linux/blk-cgroup.h>
12 
13 #include <trace/events/block.h>
14 
15 #include "blk.h"
16 #include "blk-mq-sched.h"
17 #include "blk-rq-qos.h"
18 #include "blk-throttle.h"
19 
bio_get_first_bvec(struct bio * bio,struct bio_vec * bv)20 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
21 {
22 	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
23 }
24 
bio_get_last_bvec(struct bio * bio,struct bio_vec * bv)25 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
26 {
27 	struct bvec_iter iter = bio->bi_iter;
28 	int idx;
29 
30 	bio_get_first_bvec(bio, bv);
31 	if (bv->bv_len == bio->bi_iter.bi_size)
32 		return;		/* this bio only has a single bvec */
33 
34 	bio_advance_iter(bio, &iter, iter.bi_size);
35 
36 	if (!iter.bi_bvec_done)
37 		idx = iter.bi_idx - 1;
38 	else	/* in the middle of bvec */
39 		idx = iter.bi_idx;
40 
41 	*bv = bio->bi_io_vec[idx];
42 
43 	/*
44 	 * iter.bi_bvec_done records actual length of the last bvec
45 	 * if this bio ends in the middle of one io vector
46 	 */
47 	if (iter.bi_bvec_done)
48 		bv->bv_len = iter.bi_bvec_done;
49 }
50 
bio_will_gap(struct request_queue * q,struct request * prev_rq,struct bio * prev,struct bio * next)51 static inline bool bio_will_gap(struct request_queue *q,
52 		struct request *prev_rq, struct bio *prev, struct bio *next)
53 {
54 	struct bio_vec pb, nb;
55 
56 	if (!bio_has_data(prev) || !queue_virt_boundary(q))
57 		return false;
58 
59 	/*
60 	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
61 	 * is quite difficult to respect the sg gap limit.  We work hard to
62 	 * merge a huge number of small single bios in case of mkfs.
63 	 */
64 	if (prev_rq)
65 		bio_get_first_bvec(prev_rq->bio, &pb);
66 	else
67 		bio_get_first_bvec(prev, &pb);
68 	if (pb.bv_offset & queue_virt_boundary(q))
69 		return true;
70 
71 	/*
72 	 * We don't need to worry about the situation that the merged segment
73 	 * ends in unaligned virt boundary:
74 	 *
75 	 * - if 'pb' ends aligned, the merged segment ends aligned
76 	 * - if 'pb' ends unaligned, the next bio must include
77 	 *   one single bvec of 'nb', otherwise the 'nb' can't
78 	 *   merge with 'pb'
79 	 */
80 	bio_get_last_bvec(prev, &pb);
81 	bio_get_first_bvec(next, &nb);
82 	if (biovec_phys_mergeable(q, &pb, &nb))
83 		return false;
84 	return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
85 }
86 
req_gap_back_merge(struct request * req,struct bio * bio)87 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
88 {
89 	return bio_will_gap(req->q, req, req->biotail, bio);
90 }
91 
req_gap_front_merge(struct request * req,struct bio * bio)92 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
93 {
94 	return bio_will_gap(req->q, NULL, bio, req->bio);
95 }
96 
97 /*
98  * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
99  * is defined as 'unsigned int', meantime it has to be aligned to with the
100  * logical block size, which is the minimum accepted unit by hardware.
101  */
bio_allowed_max_sectors(const struct queue_limits * lim)102 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
103 {
104 	return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
105 }
106 
bio_submit_split(struct bio * bio,int split_sectors)107 static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
108 {
109 	if (unlikely(split_sectors < 0))
110 		goto error;
111 
112 	if (split_sectors) {
113 		struct bio *split;
114 
115 		split = bio_split(bio, split_sectors, GFP_NOIO,
116 				&bio->bi_bdev->bd_disk->bio_split);
117 		if (IS_ERR(split)) {
118 			split_sectors = PTR_ERR(split);
119 			goto error;
120 		}
121 		split->bi_opf |= REQ_NOMERGE;
122 		blkcg_bio_issue_init(split);
123 		bio_chain(split, bio);
124 		trace_block_split(split, bio->bi_iter.bi_sector);
125 		WARN_ON_ONCE(bio_zone_write_plugging(bio));
126 		submit_bio_noacct(bio);
127 		return split;
128 	}
129 
130 	return bio;
131 error:
132 	bio->bi_status = errno_to_blk_status(split_sectors);
133 	bio_endio(bio);
134 	return NULL;
135 }
136 
bio_split_discard(struct bio * bio,const struct queue_limits * lim,unsigned * nsegs)137 struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
138 		unsigned *nsegs)
139 {
140 	unsigned int max_discard_sectors, granularity;
141 	sector_t tmp;
142 	unsigned split_sectors;
143 
144 	*nsegs = 1;
145 
146 	granularity = max(lim->discard_granularity >> 9, 1U);
147 
148 	max_discard_sectors =
149 		min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
150 	max_discard_sectors -= max_discard_sectors % granularity;
151 	if (unlikely(!max_discard_sectors))
152 		return bio;
153 
154 	if (bio_sectors(bio) <= max_discard_sectors)
155 		return bio;
156 
157 	split_sectors = max_discard_sectors;
158 
159 	/*
160 	 * If the next starting sector would be misaligned, stop the discard at
161 	 * the previous aligned sector.
162 	 */
163 	tmp = bio->bi_iter.bi_sector + split_sectors -
164 		((lim->discard_alignment >> 9) % granularity);
165 	tmp = sector_div(tmp, granularity);
166 
167 	if (split_sectors > tmp)
168 		split_sectors -= tmp;
169 
170 	return bio_submit_split(bio, split_sectors);
171 }
172 
blk_boundary_sectors(const struct queue_limits * lim,bool is_atomic)173 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
174 						bool is_atomic)
175 {
176 	/*
177 	 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
178 	 * both non-zero.
179 	 */
180 	if (is_atomic && lim->atomic_write_boundary_sectors)
181 		return lim->atomic_write_boundary_sectors;
182 
183 	return lim->chunk_sectors;
184 }
185 
186 /*
187  * Return the maximum number of sectors from the start of a bio that may be
188  * submitted as a single request to a block device. If enough sectors remain,
189  * align the end to the physical block size. Otherwise align the end to the
190  * logical block size. This approach minimizes the number of non-aligned
191  * requests that are submitted to a block device if the start of a bio is not
192  * aligned to a physical block boundary.
193  */
get_max_io_size(struct bio * bio,const struct queue_limits * lim)194 static inline unsigned get_max_io_size(struct bio *bio,
195 				       const struct queue_limits *lim)
196 {
197 	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
198 	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
199 	bool is_atomic = bio->bi_opf & REQ_ATOMIC;
200 	unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
201 	unsigned max_sectors, start, end;
202 
203 	/*
204 	 * We ignore lim->max_sectors for atomic writes because it may less
205 	 * than the actual bio size, which we cannot tolerate.
206 	 */
207 	if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
208 		max_sectors = lim->max_write_zeroes_sectors;
209 	else if (is_atomic)
210 		max_sectors = lim->atomic_write_max_sectors;
211 	else
212 		max_sectors = lim->max_sectors;
213 
214 	if (boundary_sectors) {
215 		max_sectors = min(max_sectors,
216 			blk_boundary_sectors_left(bio->bi_iter.bi_sector,
217 					      boundary_sectors));
218 	}
219 
220 	start = bio->bi_iter.bi_sector & (pbs - 1);
221 	end = (start + max_sectors) & ~(pbs - 1);
222 	if (end > start)
223 		return end - start;
224 	return max_sectors & ~(lbs - 1);
225 }
226 
227 /**
228  * bvec_split_segs - verify whether or not a bvec should be split in the middle
229  * @lim:      [in] queue limits to split based on
230  * @bv:       [in] bvec to examine
231  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
232  *            by the number of segments from @bv that may be appended to that
233  *            bio without exceeding @max_segs
234  * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
235  *            by the number of bytes from @bv that may be appended to that
236  *            bio without exceeding @max_bytes
237  * @max_segs: [in] upper bound for *@nsegs
238  * @max_bytes: [in] upper bound for *@bytes
239  *
240  * When splitting a bio, it can happen that a bvec is encountered that is too
241  * big to fit in a single segment and hence that it has to be split in the
242  * middle. This function verifies whether or not that should happen. The value
243  * %true is returned if and only if appending the entire @bv to a bio with
244  * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
245  * the block driver.
246  */
bvec_split_segs(const struct queue_limits * lim,const struct bio_vec * bv,unsigned * nsegs,unsigned * bytes,unsigned max_segs,unsigned max_bytes)247 static bool bvec_split_segs(const struct queue_limits *lim,
248 		const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
249 		unsigned max_segs, unsigned max_bytes)
250 {
251 	unsigned max_len = max_bytes - *bytes;
252 	unsigned len = min(bv->bv_len, max_len);
253 	unsigned total_len = 0;
254 	unsigned seg_size = 0;
255 
256 	while (len && *nsegs < max_segs) {
257 		seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
258 
259 		(*nsegs)++;
260 		total_len += seg_size;
261 		len -= seg_size;
262 
263 		if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
264 			break;
265 	}
266 
267 	*bytes += total_len;
268 
269 	/* tell the caller to split the bvec if it is too big to fit */
270 	return len > 0 || bv->bv_len > max_len;
271 }
272 
bio_split_alignment(struct bio * bio,const struct queue_limits * lim)273 static unsigned int bio_split_alignment(struct bio *bio,
274 		const struct queue_limits *lim)
275 {
276 	if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
277 		return lim->zone_write_granularity;
278 	return lim->logical_block_size;
279 }
280 
281 /**
282  * bio_split_rw_at - check if and where to split a read/write bio
283  * @bio:  [in] bio to be split
284  * @lim:  [in] queue limits to split based on
285  * @segs: [out] number of segments in the bio with the first half of the sectors
286  * @max_bytes: [in] maximum number of bytes per bio
287  *
288  * Find out if @bio needs to be split to fit the queue limits in @lim and a
289  * maximum size of @max_bytes.  Returns a negative error number if @bio can't be
290  * split, 0 if the bio doesn't have to be split, or a positive sector offset if
291  * @bio needs to be split.
292  */
bio_split_rw_at(struct bio * bio,const struct queue_limits * lim,unsigned * segs,unsigned max_bytes)293 int bio_split_rw_at(struct bio *bio, const struct queue_limits *lim,
294 		unsigned *segs, unsigned max_bytes)
295 {
296 	struct bio_vec bv, bvprv, *bvprvp = NULL;
297 	struct bvec_iter iter;
298 	unsigned nsegs = 0, bytes = 0;
299 
300 	bio_for_each_bvec(bv, bio, iter) {
301 		/*
302 		 * If the queue doesn't support SG gaps and adding this
303 		 * offset would create a gap, disallow it.
304 		 */
305 		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
306 			goto split;
307 
308 		if (nsegs < lim->max_segments &&
309 		    bytes + bv.bv_len <= max_bytes &&
310 		    bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
311 			nsegs++;
312 			bytes += bv.bv_len;
313 		} else {
314 			if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
315 					lim->max_segments, max_bytes))
316 				goto split;
317 		}
318 
319 		bvprv = bv;
320 		bvprvp = &bvprv;
321 	}
322 
323 	*segs = nsegs;
324 	return 0;
325 split:
326 	if (bio->bi_opf & REQ_ATOMIC)
327 		return -EINVAL;
328 
329 	/*
330 	 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
331 	 * with EAGAIN if splitting is required and return an error pointer.
332 	 */
333 	if (bio->bi_opf & REQ_NOWAIT)
334 		return -EAGAIN;
335 
336 	*segs = nsegs;
337 
338 	/*
339 	 * Individual bvecs might not be logical block aligned. Round down the
340 	 * split size so that each bio is properly block size aligned, even if
341 	 * we do not use the full hardware limits.
342 	 */
343 	bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
344 
345 	/*
346 	 * Bio splitting may cause subtle trouble such as hang when doing sync
347 	 * iopoll in direct IO routine. Given performance gain of iopoll for
348 	 * big IO can be trival, disable iopoll when split needed.
349 	 */
350 	bio_clear_polled(bio);
351 	return bytes >> SECTOR_SHIFT;
352 }
353 EXPORT_SYMBOL_GPL(bio_split_rw_at);
354 
bio_split_rw(struct bio * bio,const struct queue_limits * lim,unsigned * nr_segs)355 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
356 		unsigned *nr_segs)
357 {
358 	return bio_submit_split(bio,
359 		bio_split_rw_at(bio, lim, nr_segs,
360 			get_max_io_size(bio, lim) << SECTOR_SHIFT));
361 }
362 
363 /*
364  * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
365  *
366  * But we want the nr_segs calculation provided by bio_split_rw_at, and having
367  * a good sanity check that the submitter built the bio correctly is nice to
368  * have as well.
369  */
bio_split_zone_append(struct bio * bio,const struct queue_limits * lim,unsigned * nr_segs)370 struct bio *bio_split_zone_append(struct bio *bio,
371 		const struct queue_limits *lim, unsigned *nr_segs)
372 {
373 	int split_sectors;
374 
375 	split_sectors = bio_split_rw_at(bio, lim, nr_segs,
376 			lim->max_zone_append_sectors << SECTOR_SHIFT);
377 	if (WARN_ON_ONCE(split_sectors > 0))
378 		split_sectors = -EINVAL;
379 	return bio_submit_split(bio, split_sectors);
380 }
381 
bio_split_write_zeroes(struct bio * bio,const struct queue_limits * lim,unsigned * nsegs)382 struct bio *bio_split_write_zeroes(struct bio *bio,
383 		const struct queue_limits *lim, unsigned *nsegs)
384 {
385 	unsigned int max_sectors = get_max_io_size(bio, lim);
386 
387 	*nsegs = 0;
388 
389 	/*
390 	 * An unset limit should normally not happen, as bio submission is keyed
391 	 * off having a non-zero limit.  But SCSI can clear the limit in the
392 	 * I/O completion handler, and we can race and see this.  Splitting to a
393 	 * zero limit obviously doesn't make sense, so band-aid it here.
394 	 */
395 	if (!max_sectors)
396 		return bio;
397 	if (bio_sectors(bio) <= max_sectors)
398 		return bio;
399 	return bio_submit_split(bio, max_sectors);
400 }
401 
402 /**
403  * bio_split_to_limits - split a bio to fit the queue limits
404  * @bio:     bio to be split
405  *
406  * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
407  * if so split off a bio fitting the limits from the beginning of @bio and
408  * return it.  @bio is shortened to the remainder and re-submitted.
409  *
410  * The split bio is allocated from @q->bio_split, which is provided by the
411  * block layer.
412  */
bio_split_to_limits(struct bio * bio)413 struct bio *bio_split_to_limits(struct bio *bio)
414 {
415 	unsigned int nr_segs;
416 
417 	return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
418 }
419 EXPORT_SYMBOL(bio_split_to_limits);
420 
blk_recalc_rq_segments(struct request * rq)421 unsigned int blk_recalc_rq_segments(struct request *rq)
422 {
423 	unsigned int nr_phys_segs = 0;
424 	unsigned int bytes = 0;
425 	struct req_iterator iter;
426 	struct bio_vec bv;
427 
428 	if (!rq->bio)
429 		return 0;
430 
431 	switch (bio_op(rq->bio)) {
432 	case REQ_OP_DISCARD:
433 	case REQ_OP_SECURE_ERASE:
434 		if (queue_max_discard_segments(rq->q) > 1) {
435 			struct bio *bio = rq->bio;
436 
437 			for_each_bio(bio)
438 				nr_phys_segs++;
439 			return nr_phys_segs;
440 		}
441 		return 1;
442 	case REQ_OP_WRITE_ZEROES:
443 		return 0;
444 	default:
445 		break;
446 	}
447 
448 	rq_for_each_bvec(bv, rq, iter)
449 		bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
450 				UINT_MAX, UINT_MAX);
451 	return nr_phys_segs;
452 }
453 
blk_rq_get_max_sectors(struct request * rq,sector_t offset)454 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
455 						  sector_t offset)
456 {
457 	struct request_queue *q = rq->q;
458 	struct queue_limits *lim = &q->limits;
459 	unsigned int max_sectors, boundary_sectors;
460 	bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
461 
462 	if (blk_rq_is_passthrough(rq))
463 		return q->limits.max_hw_sectors;
464 
465 	boundary_sectors = blk_boundary_sectors(lim, is_atomic);
466 	max_sectors = blk_queue_get_max_sectors(rq);
467 
468 	if (!boundary_sectors ||
469 	    req_op(rq) == REQ_OP_DISCARD ||
470 	    req_op(rq) == REQ_OP_SECURE_ERASE)
471 		return max_sectors;
472 	return min(max_sectors,
473 		   blk_boundary_sectors_left(offset, boundary_sectors));
474 }
475 
ll_new_hw_segment(struct request * req,struct bio * bio,unsigned int nr_phys_segs)476 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
477 		unsigned int nr_phys_segs)
478 {
479 	if (!blk_cgroup_mergeable(req, bio))
480 		goto no_merge;
481 
482 	if (blk_integrity_merge_bio(req->q, req, bio) == false)
483 		goto no_merge;
484 
485 	/* discard request merge won't add new segment */
486 	if (req_op(req) == REQ_OP_DISCARD)
487 		return 1;
488 
489 	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
490 		goto no_merge;
491 
492 	/*
493 	 * This will form the start of a new hw segment.  Bump both
494 	 * counters.
495 	 */
496 	req->nr_phys_segments += nr_phys_segs;
497 	if (bio_integrity(bio))
498 		req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
499 									bio);
500 	return 1;
501 
502 no_merge:
503 	req_set_nomerge(req->q, req);
504 	return 0;
505 }
506 
ll_back_merge_fn(struct request * req,struct bio * bio,unsigned int nr_segs)507 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
508 {
509 	if (req_gap_back_merge(req, bio))
510 		return 0;
511 	if (blk_integrity_rq(req) &&
512 	    integrity_req_gap_back_merge(req, bio))
513 		return 0;
514 	if (!bio_crypt_ctx_back_mergeable(req, bio))
515 		return 0;
516 	if (blk_rq_sectors(req) + bio_sectors(bio) >
517 	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
518 		req_set_nomerge(req->q, req);
519 		return 0;
520 	}
521 
522 	return ll_new_hw_segment(req, bio, nr_segs);
523 }
524 
ll_front_merge_fn(struct request * req,struct bio * bio,unsigned int nr_segs)525 static int ll_front_merge_fn(struct request *req, struct bio *bio,
526 		unsigned int nr_segs)
527 {
528 	if (req_gap_front_merge(req, bio))
529 		return 0;
530 	if (blk_integrity_rq(req) &&
531 	    integrity_req_gap_front_merge(req, bio))
532 		return 0;
533 	if (!bio_crypt_ctx_front_mergeable(req, bio))
534 		return 0;
535 	if (blk_rq_sectors(req) + bio_sectors(bio) >
536 	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
537 		req_set_nomerge(req->q, req);
538 		return 0;
539 	}
540 
541 	return ll_new_hw_segment(req, bio, nr_segs);
542 }
543 
req_attempt_discard_merge(struct request_queue * q,struct request * req,struct request * next)544 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
545 		struct request *next)
546 {
547 	unsigned short segments = blk_rq_nr_discard_segments(req);
548 
549 	if (segments >= queue_max_discard_segments(q))
550 		goto no_merge;
551 	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
552 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
553 		goto no_merge;
554 
555 	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
556 	return true;
557 no_merge:
558 	req_set_nomerge(q, req);
559 	return false;
560 }
561 
ll_merge_requests_fn(struct request_queue * q,struct request * req,struct request * next)562 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
563 				struct request *next)
564 {
565 	int total_phys_segments;
566 
567 	if (req_gap_back_merge(req, next->bio))
568 		return 0;
569 
570 	/*
571 	 * Will it become too large?
572 	 */
573 	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
574 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
575 		return 0;
576 
577 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
578 	if (total_phys_segments > blk_rq_get_max_segments(req))
579 		return 0;
580 
581 	if (!blk_cgroup_mergeable(req, next->bio))
582 		return 0;
583 
584 	if (blk_integrity_merge_rq(q, req, next) == false)
585 		return 0;
586 
587 	if (!bio_crypt_ctx_merge_rq(req, next))
588 		return 0;
589 
590 	/* Merge is OK... */
591 	req->nr_phys_segments = total_phys_segments;
592 	req->nr_integrity_segments += next->nr_integrity_segments;
593 	return 1;
594 }
595 
596 /**
597  * blk_rq_set_mixed_merge - mark a request as mixed merge
598  * @rq: request to mark as mixed merge
599  *
600  * Description:
601  *     @rq is about to be mixed merged.  Make sure the attributes
602  *     which can be mixed are set in each bio and mark @rq as mixed
603  *     merged.
604  */
blk_rq_set_mixed_merge(struct request * rq)605 static void blk_rq_set_mixed_merge(struct request *rq)
606 {
607 	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
608 	struct bio *bio;
609 
610 	if (rq->rq_flags & RQF_MIXED_MERGE)
611 		return;
612 
613 	/*
614 	 * @rq will no longer represent mixable attributes for all the
615 	 * contained bios.  It will just track those of the first one.
616 	 * Distributes the attributs to each bio.
617 	 */
618 	for (bio = rq->bio; bio; bio = bio->bi_next) {
619 		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
620 			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
621 		bio->bi_opf |= ff;
622 	}
623 	rq->rq_flags |= RQF_MIXED_MERGE;
624 }
625 
bio_failfast(const struct bio * bio)626 static inline blk_opf_t bio_failfast(const struct bio *bio)
627 {
628 	if (bio->bi_opf & REQ_RAHEAD)
629 		return REQ_FAILFAST_MASK;
630 
631 	return bio->bi_opf & REQ_FAILFAST_MASK;
632 }
633 
634 /*
635  * After we are marked as MIXED_MERGE, any new RA bio has to be updated
636  * as failfast, and request's failfast has to be updated in case of
637  * front merge.
638  */
blk_update_mixed_merge(struct request * req,struct bio * bio,bool front_merge)639 static inline void blk_update_mixed_merge(struct request *req,
640 		struct bio *bio, bool front_merge)
641 {
642 	if (req->rq_flags & RQF_MIXED_MERGE) {
643 		if (bio->bi_opf & REQ_RAHEAD)
644 			bio->bi_opf |= REQ_FAILFAST_MASK;
645 
646 		if (front_merge) {
647 			req->cmd_flags &= ~REQ_FAILFAST_MASK;
648 			req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
649 		}
650 	}
651 }
652 
blk_account_io_merge_request(struct request * req)653 static void blk_account_io_merge_request(struct request *req)
654 {
655 	if (req->rq_flags & RQF_IO_STAT) {
656 		part_stat_lock();
657 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
658 		part_stat_local_dec(req->part,
659 				    in_flight[op_is_write(req_op(req))]);
660 		part_stat_unlock();
661 	}
662 }
663 
blk_try_req_merge(struct request * req,struct request * next)664 static enum elv_merge blk_try_req_merge(struct request *req,
665 					struct request *next)
666 {
667 	if (blk_discard_mergable(req))
668 		return ELEVATOR_DISCARD_MERGE;
669 	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
670 		return ELEVATOR_BACK_MERGE;
671 
672 	return ELEVATOR_NO_MERGE;
673 }
674 
blk_atomic_write_mergeable_rq_bio(struct request * rq,struct bio * bio)675 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
676 					      struct bio *bio)
677 {
678 	return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
679 }
680 
blk_atomic_write_mergeable_rqs(struct request * rq,struct request * next)681 static bool blk_atomic_write_mergeable_rqs(struct request *rq,
682 					   struct request *next)
683 {
684 	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
685 }
686 
687 /*
688  * For non-mq, this has to be called with the request spinlock acquired.
689  * For mq with scheduling, the appropriate queue wide lock should be held.
690  */
attempt_merge(struct request_queue * q,struct request * req,struct request * next)691 static struct request *attempt_merge(struct request_queue *q,
692 				     struct request *req, struct request *next)
693 {
694 	if (!rq_mergeable(req) || !rq_mergeable(next))
695 		return NULL;
696 
697 	if (req_op(req) != req_op(next))
698 		return NULL;
699 
700 	if (req->bio->bi_write_hint != next->bio->bi_write_hint)
701 		return NULL;
702 	if (req->bio->bi_write_stream != next->bio->bi_write_stream)
703 		return NULL;
704 	if (req->bio->bi_ioprio != next->bio->bi_ioprio)
705 		return NULL;
706 	if (!blk_atomic_write_mergeable_rqs(req, next))
707 		return NULL;
708 
709 	/*
710 	 * If we are allowed to merge, then append bio list
711 	 * from next to rq and release next. merge_requests_fn
712 	 * will have updated segment counts, update sector
713 	 * counts here. Handle DISCARDs separately, as they
714 	 * have separate settings.
715 	 */
716 
717 	switch (blk_try_req_merge(req, next)) {
718 	case ELEVATOR_DISCARD_MERGE:
719 		if (!req_attempt_discard_merge(q, req, next))
720 			return NULL;
721 		break;
722 	case ELEVATOR_BACK_MERGE:
723 		if (!ll_merge_requests_fn(q, req, next))
724 			return NULL;
725 		break;
726 	default:
727 		return NULL;
728 	}
729 
730 	/*
731 	 * If failfast settings disagree or any of the two is already
732 	 * a mixed merge, mark both as mixed before proceeding.  This
733 	 * makes sure that all involved bios have mixable attributes
734 	 * set properly.
735 	 */
736 	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
737 	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
738 	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
739 		blk_rq_set_mixed_merge(req);
740 		blk_rq_set_mixed_merge(next);
741 	}
742 
743 	/*
744 	 * At this point we have either done a back merge or front merge. We
745 	 * need the smaller start_time_ns of the merged requests to be the
746 	 * current request for accounting purposes.
747 	 */
748 	if (next->start_time_ns < req->start_time_ns)
749 		req->start_time_ns = next->start_time_ns;
750 
751 	req->biotail->bi_next = next->bio;
752 	req->biotail = next->biotail;
753 
754 	req->__data_len += blk_rq_bytes(next);
755 
756 	if (!blk_discard_mergable(req))
757 		elv_merge_requests(q, req, next);
758 
759 	blk_crypto_rq_put_keyslot(next);
760 
761 	/*
762 	 * 'next' is going away, so update stats accordingly
763 	 */
764 	blk_account_io_merge_request(next);
765 
766 	trace_block_rq_merge(next);
767 
768 	/*
769 	 * ownership of bio passed from next to req, return 'next' for
770 	 * the caller to free
771 	 */
772 	next->bio = NULL;
773 	return next;
774 }
775 
attempt_back_merge(struct request_queue * q,struct request * rq)776 static struct request *attempt_back_merge(struct request_queue *q,
777 		struct request *rq)
778 {
779 	struct request *next = elv_latter_request(q, rq);
780 
781 	if (next)
782 		return attempt_merge(q, rq, next);
783 
784 	return NULL;
785 }
786 
attempt_front_merge(struct request_queue * q,struct request * rq)787 static struct request *attempt_front_merge(struct request_queue *q,
788 		struct request *rq)
789 {
790 	struct request *prev = elv_former_request(q, rq);
791 
792 	if (prev)
793 		return attempt_merge(q, prev, rq);
794 
795 	return NULL;
796 }
797 
798 /*
799  * Try to merge 'next' into 'rq'. Return true if the merge happened, false
800  * otherwise. The caller is responsible for freeing 'next' if the merge
801  * happened.
802  */
blk_attempt_req_merge(struct request_queue * q,struct request * rq,struct request * next)803 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
804 			   struct request *next)
805 {
806 	return attempt_merge(q, rq, next);
807 }
808 
blk_rq_merge_ok(struct request * rq,struct bio * bio)809 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
810 {
811 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
812 		return false;
813 
814 	if (req_op(rq) != bio_op(bio))
815 		return false;
816 
817 	if (!blk_cgroup_mergeable(rq, bio))
818 		return false;
819 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
820 		return false;
821 	if (!bio_crypt_rq_ctx_compatible(rq, bio))
822 		return false;
823 	if (rq->bio->bi_write_hint != bio->bi_write_hint)
824 		return false;
825 	if (rq->bio->bi_write_stream != bio->bi_write_stream)
826 		return false;
827 	if (rq->bio->bi_ioprio != bio->bi_ioprio)
828 		return false;
829 	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
830 		return false;
831 
832 	return true;
833 }
834 
blk_try_merge(struct request * rq,struct bio * bio)835 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
836 {
837 	if (blk_discard_mergable(rq))
838 		return ELEVATOR_DISCARD_MERGE;
839 	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
840 		return ELEVATOR_BACK_MERGE;
841 	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
842 		return ELEVATOR_FRONT_MERGE;
843 	return ELEVATOR_NO_MERGE;
844 }
845 
blk_account_io_merge_bio(struct request * req)846 static void blk_account_io_merge_bio(struct request *req)
847 {
848 	if (req->rq_flags & RQF_IO_STAT) {
849 		part_stat_lock();
850 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
851 		part_stat_unlock();
852 	}
853 }
854 
bio_attempt_back_merge(struct request * req,struct bio * bio,unsigned int nr_segs)855 enum bio_merge_status bio_attempt_back_merge(struct request *req,
856 		struct bio *bio, unsigned int nr_segs)
857 {
858 	const blk_opf_t ff = bio_failfast(bio);
859 
860 	if (!ll_back_merge_fn(req, bio, nr_segs))
861 		return BIO_MERGE_FAILED;
862 
863 	trace_block_bio_backmerge(bio);
864 	rq_qos_merge(req->q, req, bio);
865 
866 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
867 		blk_rq_set_mixed_merge(req);
868 
869 	blk_update_mixed_merge(req, bio, false);
870 
871 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
872 		blk_zone_write_plug_bio_merged(bio);
873 
874 	req->biotail->bi_next = bio;
875 	req->biotail = bio;
876 	req->__data_len += bio->bi_iter.bi_size;
877 
878 	bio_crypt_free_ctx(bio);
879 
880 	blk_account_io_merge_bio(req);
881 	return BIO_MERGE_OK;
882 }
883 
bio_attempt_front_merge(struct request * req,struct bio * bio,unsigned int nr_segs)884 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
885 		struct bio *bio, unsigned int nr_segs)
886 {
887 	const blk_opf_t ff = bio_failfast(bio);
888 
889 	/*
890 	 * A front merge for writes to sequential zones of a zoned block device
891 	 * can happen only if the user submitted writes out of order. Do not
892 	 * merge such write to let it fail.
893 	 */
894 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
895 		return BIO_MERGE_FAILED;
896 
897 	if (!ll_front_merge_fn(req, bio, nr_segs))
898 		return BIO_MERGE_FAILED;
899 
900 	trace_block_bio_frontmerge(bio);
901 	rq_qos_merge(req->q, req, bio);
902 
903 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
904 		blk_rq_set_mixed_merge(req);
905 
906 	blk_update_mixed_merge(req, bio, true);
907 
908 	bio->bi_next = req->bio;
909 	req->bio = bio;
910 
911 	req->__sector = bio->bi_iter.bi_sector;
912 	req->__data_len += bio->bi_iter.bi_size;
913 
914 	bio_crypt_do_front_merge(req, bio);
915 
916 	blk_account_io_merge_bio(req);
917 	return BIO_MERGE_OK;
918 }
919 
bio_attempt_discard_merge(struct request_queue * q,struct request * req,struct bio * bio)920 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
921 		struct request *req, struct bio *bio)
922 {
923 	unsigned short segments = blk_rq_nr_discard_segments(req);
924 
925 	if (segments >= queue_max_discard_segments(q))
926 		goto no_merge;
927 	if (blk_rq_sectors(req) + bio_sectors(bio) >
928 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
929 		goto no_merge;
930 
931 	rq_qos_merge(q, req, bio);
932 
933 	req->biotail->bi_next = bio;
934 	req->biotail = bio;
935 	req->__data_len += bio->bi_iter.bi_size;
936 	req->nr_phys_segments = segments + 1;
937 
938 	blk_account_io_merge_bio(req);
939 	return BIO_MERGE_OK;
940 no_merge:
941 	req_set_nomerge(q, req);
942 	return BIO_MERGE_FAILED;
943 }
944 
blk_attempt_bio_merge(struct request_queue * q,struct request * rq,struct bio * bio,unsigned int nr_segs,bool sched_allow_merge)945 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
946 						   struct request *rq,
947 						   struct bio *bio,
948 						   unsigned int nr_segs,
949 						   bool sched_allow_merge)
950 {
951 	if (!blk_rq_merge_ok(rq, bio))
952 		return BIO_MERGE_NONE;
953 
954 	switch (blk_try_merge(rq, bio)) {
955 	case ELEVATOR_BACK_MERGE:
956 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
957 			return bio_attempt_back_merge(rq, bio, nr_segs);
958 		break;
959 	case ELEVATOR_FRONT_MERGE:
960 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
961 			return bio_attempt_front_merge(rq, bio, nr_segs);
962 		break;
963 	case ELEVATOR_DISCARD_MERGE:
964 		return bio_attempt_discard_merge(q, rq, bio);
965 	default:
966 		return BIO_MERGE_NONE;
967 	}
968 
969 	return BIO_MERGE_FAILED;
970 }
971 
972 /**
973  * blk_attempt_plug_merge - try to merge with %current's plugged list
974  * @q: request_queue new bio is being queued at
975  * @bio: new bio being queued
976  * @nr_segs: number of segments in @bio
977  * from the passed in @q already in the plug list
978  *
979  * Determine whether @bio being queued on @q can be merged with the previous
980  * request on %current's plugged list.  Returns %true if merge was successful,
981  * otherwise %false.
982  *
983  * Plugging coalesces IOs from the same issuer for the same purpose without
984  * going through @q->queue_lock.  As such it's more of an issuing mechanism
985  * than scheduling, and the request, while may have elvpriv data, is not
986  * added on the elevator at this point.  In addition, we don't have
987  * reliable access to the elevator outside queue lock.  Only check basic
988  * merging parameters without querying the elevator.
989  *
990  * Caller must ensure !blk_queue_nomerges(q) beforehand.
991  */
blk_attempt_plug_merge(struct request_queue * q,struct bio * bio,unsigned int nr_segs)992 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
993 		unsigned int nr_segs)
994 {
995 	struct blk_plug *plug = current->plug;
996 	struct request *rq;
997 
998 	if (!plug || rq_list_empty(&plug->mq_list))
999 		return false;
1000 
1001 	rq = plug->mq_list.tail;
1002 	if (rq->q == q)
1003 		return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1004 			BIO_MERGE_OK;
1005 	else if (!plug->multiple_queues)
1006 		return false;
1007 
1008 	rq_list_for_each(&plug->mq_list, rq) {
1009 		if (rq->q != q)
1010 			continue;
1011 		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1012 		    BIO_MERGE_OK)
1013 			return true;
1014 		break;
1015 	}
1016 	return false;
1017 }
1018 
1019 /*
1020  * Iterate list of requests and see if we can merge this bio with any
1021  * of them.
1022  */
blk_bio_list_merge(struct request_queue * q,struct list_head * list,struct bio * bio,unsigned int nr_segs)1023 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1024 			struct bio *bio, unsigned int nr_segs)
1025 {
1026 	struct request *rq;
1027 	int checked = 8;
1028 
1029 	list_for_each_entry_reverse(rq, list, queuelist) {
1030 		if (!checked--)
1031 			break;
1032 
1033 		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1034 		case BIO_MERGE_NONE:
1035 			continue;
1036 		case BIO_MERGE_OK:
1037 			return true;
1038 		case BIO_MERGE_FAILED:
1039 			return false;
1040 		}
1041 
1042 	}
1043 
1044 	return false;
1045 }
1046 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1047 
blk_mq_sched_try_merge(struct request_queue * q,struct bio * bio,unsigned int nr_segs,struct request ** merged_request)1048 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1049 		unsigned int nr_segs, struct request **merged_request)
1050 {
1051 	struct request *rq;
1052 
1053 	switch (elv_merge(q, &rq, bio)) {
1054 	case ELEVATOR_BACK_MERGE:
1055 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1056 			return false;
1057 		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1058 			return false;
1059 		*merged_request = attempt_back_merge(q, rq);
1060 		if (!*merged_request)
1061 			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1062 		return true;
1063 	case ELEVATOR_FRONT_MERGE:
1064 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1065 			return false;
1066 		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1067 			return false;
1068 		*merged_request = attempt_front_merge(q, rq);
1069 		if (!*merged_request)
1070 			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1071 		return true;
1072 	case ELEVATOR_DISCARD_MERGE:
1073 		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1074 	default:
1075 		return false;
1076 	}
1077 }
1078 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1079