xref: /linux/block/blk-merge.c (revision 55a42f78ffd386e01a5404419f8c5ded7db70a21)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to segment and merge handling
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/part_stat.h>
11 #include <linux/blk-cgroup.h>
12 
13 #include <trace/events/block.h>
14 
15 #include "blk.h"
16 #include "blk-mq-sched.h"
17 #include "blk-rq-qos.h"
18 #include "blk-throttle.h"
19 
20 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
21 {
22 	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
23 }
24 
25 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
26 {
27 	struct bvec_iter iter = bio->bi_iter;
28 	int idx;
29 
30 	bio_get_first_bvec(bio, bv);
31 	if (bv->bv_len == bio->bi_iter.bi_size)
32 		return;		/* this bio only has a single bvec */
33 
34 	bio_advance_iter(bio, &iter, iter.bi_size);
35 
36 	if (!iter.bi_bvec_done)
37 		idx = iter.bi_idx - 1;
38 	else	/* in the middle of bvec */
39 		idx = iter.bi_idx;
40 
41 	*bv = bio->bi_io_vec[idx];
42 
43 	/*
44 	 * iter.bi_bvec_done records actual length of the last bvec
45 	 * if this bio ends in the middle of one io vector
46 	 */
47 	if (iter.bi_bvec_done)
48 		bv->bv_len = iter.bi_bvec_done;
49 }
50 
51 static inline bool bio_will_gap(struct request_queue *q,
52 		struct request *prev_rq, struct bio *prev, struct bio *next)
53 {
54 	struct bio_vec pb, nb;
55 
56 	if (!bio_has_data(prev) || !queue_virt_boundary(q))
57 		return false;
58 
59 	/*
60 	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
61 	 * is quite difficult to respect the sg gap limit.  We work hard to
62 	 * merge a huge number of small single bios in case of mkfs.
63 	 */
64 	if (prev_rq)
65 		bio_get_first_bvec(prev_rq->bio, &pb);
66 	else
67 		bio_get_first_bvec(prev, &pb);
68 	if (pb.bv_offset & queue_virt_boundary(q))
69 		return true;
70 
71 	/*
72 	 * We don't need to worry about the situation that the merged segment
73 	 * ends in unaligned virt boundary:
74 	 *
75 	 * - if 'pb' ends aligned, the merged segment ends aligned
76 	 * - if 'pb' ends unaligned, the next bio must include
77 	 *   one single bvec of 'nb', otherwise the 'nb' can't
78 	 *   merge with 'pb'
79 	 */
80 	bio_get_last_bvec(prev, &pb);
81 	bio_get_first_bvec(next, &nb);
82 	if (biovec_phys_mergeable(q, &pb, &nb))
83 		return false;
84 	return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
85 }
86 
87 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
88 {
89 	return bio_will_gap(req->q, req, req->biotail, bio);
90 }
91 
92 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
93 {
94 	return bio_will_gap(req->q, NULL, bio, req->bio);
95 }
96 
97 /*
98  * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
99  * is defined as 'unsigned int', meantime it has to be aligned to with the
100  * logical block size, which is the minimum accepted unit by hardware.
101  */
102 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
103 {
104 	return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
105 }
106 
107 /*
108  * bio_submit_split_bioset - Submit a bio, splitting it at a designated sector
109  * @bio:		the original bio to be submitted and split
110  * @split_sectors:	the sector count at which to split
111  * @bs:			the bio set used for allocating the new split bio
112  *
113  * The original bio is modified to contain the remaining sectors and submitted.
114  * The caller is responsible for submitting the returned bio.
115  *
116  * If succeed, the newly allocated bio representing the initial part will be
117  * returned, on failure NULL will be returned and original bio will fail.
118  */
119 struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
120 				    struct bio_set *bs)
121 {
122 	struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs);
123 
124 	if (IS_ERR(split)) {
125 		bio->bi_status = errno_to_blk_status(PTR_ERR(split));
126 		bio_endio(bio);
127 		return NULL;
128 	}
129 
130 	bio_chain(split, bio);
131 	trace_block_split(split, bio->bi_iter.bi_sector);
132 	WARN_ON_ONCE(bio_zone_write_plugging(bio));
133 
134 	if (should_fail_bio(bio))
135 		bio_io_error(bio);
136 	else if (!blk_throtl_bio(bio))
137 		submit_bio_noacct_nocheck(bio, true);
138 
139 	return split;
140 }
141 EXPORT_SYMBOL_GPL(bio_submit_split_bioset);
142 
143 static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
144 {
145 	if (unlikely(split_sectors < 0)) {
146 		bio->bi_status = errno_to_blk_status(split_sectors);
147 		bio_endio(bio);
148 		return NULL;
149 	}
150 
151 	if (split_sectors) {
152 		bio = bio_submit_split_bioset(bio, split_sectors,
153 				&bio->bi_bdev->bd_disk->bio_split);
154 		if (bio)
155 			bio->bi_opf |= REQ_NOMERGE;
156 	}
157 
158 	return bio;
159 }
160 
161 struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
162 		unsigned *nsegs)
163 {
164 	unsigned int max_discard_sectors, granularity;
165 	sector_t tmp;
166 	unsigned split_sectors;
167 
168 	*nsegs = 1;
169 
170 	granularity = max(lim->discard_granularity >> 9, 1U);
171 
172 	max_discard_sectors =
173 		min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
174 	max_discard_sectors -= max_discard_sectors % granularity;
175 	if (unlikely(!max_discard_sectors))
176 		return bio;
177 
178 	if (bio_sectors(bio) <= max_discard_sectors)
179 		return bio;
180 
181 	split_sectors = max_discard_sectors;
182 
183 	/*
184 	 * If the next starting sector would be misaligned, stop the discard at
185 	 * the previous aligned sector.
186 	 */
187 	tmp = bio->bi_iter.bi_sector + split_sectors -
188 		((lim->discard_alignment >> 9) % granularity);
189 	tmp = sector_div(tmp, granularity);
190 
191 	if (split_sectors > tmp)
192 		split_sectors -= tmp;
193 
194 	return bio_submit_split(bio, split_sectors);
195 }
196 
197 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
198 						bool is_atomic)
199 {
200 	/*
201 	 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
202 	 * both non-zero.
203 	 */
204 	if (is_atomic && lim->atomic_write_boundary_sectors)
205 		return lim->atomic_write_boundary_sectors;
206 
207 	return lim->chunk_sectors;
208 }
209 
210 /*
211  * Return the maximum number of sectors from the start of a bio that may be
212  * submitted as a single request to a block device. If enough sectors remain,
213  * align the end to the physical block size. Otherwise align the end to the
214  * logical block size. This approach minimizes the number of non-aligned
215  * requests that are submitted to a block device if the start of a bio is not
216  * aligned to a physical block boundary.
217  */
218 static inline unsigned get_max_io_size(struct bio *bio,
219 				       const struct queue_limits *lim)
220 {
221 	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
222 	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
223 	bool is_atomic = bio->bi_opf & REQ_ATOMIC;
224 	unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
225 	unsigned max_sectors, start, end;
226 
227 	/*
228 	 * We ignore lim->max_sectors for atomic writes because it may less
229 	 * than the actual bio size, which we cannot tolerate.
230 	 */
231 	if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
232 		max_sectors = lim->max_write_zeroes_sectors;
233 	else if (is_atomic)
234 		max_sectors = lim->atomic_write_max_sectors;
235 	else
236 		max_sectors = lim->max_sectors;
237 
238 	if (boundary_sectors) {
239 		max_sectors = min(max_sectors,
240 			blk_boundary_sectors_left(bio->bi_iter.bi_sector,
241 					      boundary_sectors));
242 	}
243 
244 	start = bio->bi_iter.bi_sector & (pbs - 1);
245 	end = (start + max_sectors) & ~(pbs - 1);
246 	if (end > start)
247 		return end - start;
248 	return max_sectors & ~(lbs - 1);
249 }
250 
251 /**
252  * bvec_split_segs - verify whether or not a bvec should be split in the middle
253  * @lim:      [in] queue limits to split based on
254  * @bv:       [in] bvec to examine
255  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
256  *            by the number of segments from @bv that may be appended to that
257  *            bio without exceeding @max_segs
258  * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
259  *            by the number of bytes from @bv that may be appended to that
260  *            bio without exceeding @max_bytes
261  * @max_segs: [in] upper bound for *@nsegs
262  * @max_bytes: [in] upper bound for *@bytes
263  *
264  * When splitting a bio, it can happen that a bvec is encountered that is too
265  * big to fit in a single segment and hence that it has to be split in the
266  * middle. This function verifies whether or not that should happen. The value
267  * %true is returned if and only if appending the entire @bv to a bio with
268  * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
269  * the block driver.
270  */
271 static bool bvec_split_segs(const struct queue_limits *lim,
272 		const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
273 		unsigned max_segs, unsigned max_bytes)
274 {
275 	unsigned max_len = max_bytes - *bytes;
276 	unsigned len = min(bv->bv_len, max_len);
277 	unsigned total_len = 0;
278 	unsigned seg_size = 0;
279 
280 	while (len && *nsegs < max_segs) {
281 		seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
282 
283 		(*nsegs)++;
284 		total_len += seg_size;
285 		len -= seg_size;
286 
287 		if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
288 			break;
289 	}
290 
291 	*bytes += total_len;
292 
293 	/* tell the caller to split the bvec if it is too big to fit */
294 	return len > 0 || bv->bv_len > max_len;
295 }
296 
297 static unsigned int bio_split_alignment(struct bio *bio,
298 		const struct queue_limits *lim)
299 {
300 	if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
301 		return lim->zone_write_granularity;
302 	return lim->logical_block_size;
303 }
304 
305 /**
306  * bio_split_io_at - check if and where to split a bio
307  * @bio:  [in] bio to be split
308  * @lim:  [in] queue limits to split based on
309  * @segs: [out] number of segments in the bio with the first half of the sectors
310  * @max_bytes: [in] maximum number of bytes per bio
311  * @len_align_mask: [in] length alignment mask for each vector
312  *
313  * Find out if @bio needs to be split to fit the queue limits in @lim and a
314  * maximum size of @max_bytes.  Returns a negative error number if @bio can't be
315  * split, 0 if the bio doesn't have to be split, or a positive sector offset if
316  * @bio needs to be split.
317  */
318 int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
319 		unsigned *segs, unsigned max_bytes, unsigned len_align_mask)
320 {
321 	struct bio_vec bv, bvprv, *bvprvp = NULL;
322 	struct bvec_iter iter;
323 	unsigned nsegs = 0, bytes = 0;
324 
325 	bio_for_each_bvec(bv, bio, iter) {
326 		if (bv.bv_offset & lim->dma_alignment ||
327 		    bv.bv_len & len_align_mask)
328 			return -EINVAL;
329 
330 		/*
331 		 * If the queue doesn't support SG gaps and adding this
332 		 * offset would create a gap, disallow it.
333 		 */
334 		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
335 			goto split;
336 
337 		if (nsegs < lim->max_segments &&
338 		    bytes + bv.bv_len <= max_bytes &&
339 		    bv.bv_offset + bv.bv_len <= lim->min_segment_size) {
340 			nsegs++;
341 			bytes += bv.bv_len;
342 		} else {
343 			if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
344 					lim->max_segments, max_bytes))
345 				goto split;
346 		}
347 
348 		bvprv = bv;
349 		bvprvp = &bvprv;
350 	}
351 
352 	*segs = nsegs;
353 	return 0;
354 split:
355 	if (bio->bi_opf & REQ_ATOMIC)
356 		return -EINVAL;
357 
358 	/*
359 	 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
360 	 * with EAGAIN if splitting is required and return an error pointer.
361 	 */
362 	if (bio->bi_opf & REQ_NOWAIT)
363 		return -EAGAIN;
364 
365 	*segs = nsegs;
366 
367 	/*
368 	 * Individual bvecs might not be logical block aligned. Round down the
369 	 * split size so that each bio is properly block size aligned, even if
370 	 * we do not use the full hardware limits.
371 	 *
372 	 * It is possible to submit a bio that can't be split into a valid io:
373 	 * there may either be too many discontiguous vectors for the max
374 	 * segments limit, or contain virtual boundary gaps without having a
375 	 * valid block sized split. A zero byte result means one of those
376 	 * conditions occured.
377 	 */
378 	bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
379 	if (!bytes)
380 		return -EINVAL;
381 
382 	/*
383 	 * Bio splitting may cause subtle trouble such as hang when doing sync
384 	 * iopoll in direct IO routine. Given performance gain of iopoll for
385 	 * big IO can be trival, disable iopoll when split needed.
386 	 */
387 	bio_clear_polled(bio);
388 	return bytes >> SECTOR_SHIFT;
389 }
390 EXPORT_SYMBOL_GPL(bio_split_io_at);
391 
392 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
393 		unsigned *nr_segs)
394 {
395 	return bio_submit_split(bio,
396 		bio_split_rw_at(bio, lim, nr_segs,
397 			get_max_io_size(bio, lim) << SECTOR_SHIFT));
398 }
399 
400 /*
401  * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
402  *
403  * But we want the nr_segs calculation provided by bio_split_rw_at, and having
404  * a good sanity check that the submitter built the bio correctly is nice to
405  * have as well.
406  */
407 struct bio *bio_split_zone_append(struct bio *bio,
408 		const struct queue_limits *lim, unsigned *nr_segs)
409 {
410 	int split_sectors;
411 
412 	split_sectors = bio_split_rw_at(bio, lim, nr_segs,
413 			lim->max_zone_append_sectors << SECTOR_SHIFT);
414 	if (WARN_ON_ONCE(split_sectors > 0))
415 		split_sectors = -EINVAL;
416 	return bio_submit_split(bio, split_sectors);
417 }
418 
419 struct bio *bio_split_write_zeroes(struct bio *bio,
420 		const struct queue_limits *lim, unsigned *nsegs)
421 {
422 	unsigned int max_sectors = get_max_io_size(bio, lim);
423 
424 	*nsegs = 0;
425 
426 	/*
427 	 * An unset limit should normally not happen, as bio submission is keyed
428 	 * off having a non-zero limit.  But SCSI can clear the limit in the
429 	 * I/O completion handler, and we can race and see this.  Splitting to a
430 	 * zero limit obviously doesn't make sense, so band-aid it here.
431 	 */
432 	if (!max_sectors)
433 		return bio;
434 	if (bio_sectors(bio) <= max_sectors)
435 		return bio;
436 	return bio_submit_split(bio, max_sectors);
437 }
438 
439 /**
440  * bio_split_to_limits - split a bio to fit the queue limits
441  * @bio:     bio to be split
442  *
443  * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
444  * if so split off a bio fitting the limits from the beginning of @bio and
445  * return it.  @bio is shortened to the remainder and re-submitted.
446  *
447  * The split bio is allocated from @q->bio_split, which is provided by the
448  * block layer.
449  */
450 struct bio *bio_split_to_limits(struct bio *bio)
451 {
452 	unsigned int nr_segs;
453 
454 	return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
455 }
456 EXPORT_SYMBOL(bio_split_to_limits);
457 
458 unsigned int blk_recalc_rq_segments(struct request *rq)
459 {
460 	unsigned int nr_phys_segs = 0;
461 	unsigned int bytes = 0;
462 	struct req_iterator iter;
463 	struct bio_vec bv;
464 
465 	if (!rq->bio)
466 		return 0;
467 
468 	switch (bio_op(rq->bio)) {
469 	case REQ_OP_DISCARD:
470 	case REQ_OP_SECURE_ERASE:
471 		if (queue_max_discard_segments(rq->q) > 1) {
472 			struct bio *bio = rq->bio;
473 
474 			for_each_bio(bio)
475 				nr_phys_segs++;
476 			return nr_phys_segs;
477 		}
478 		return 1;
479 	case REQ_OP_WRITE_ZEROES:
480 		return 0;
481 	default:
482 		break;
483 	}
484 
485 	rq_for_each_bvec(bv, rq, iter)
486 		bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
487 				UINT_MAX, UINT_MAX);
488 	return nr_phys_segs;
489 }
490 
491 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
492 						  sector_t offset)
493 {
494 	struct request_queue *q = rq->q;
495 	struct queue_limits *lim = &q->limits;
496 	unsigned int max_sectors, boundary_sectors;
497 	bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
498 
499 	if (blk_rq_is_passthrough(rq))
500 		return q->limits.max_hw_sectors;
501 
502 	boundary_sectors = blk_boundary_sectors(lim, is_atomic);
503 	max_sectors = blk_queue_get_max_sectors(rq);
504 
505 	if (!boundary_sectors ||
506 	    req_op(rq) == REQ_OP_DISCARD ||
507 	    req_op(rq) == REQ_OP_SECURE_ERASE)
508 		return max_sectors;
509 	return min(max_sectors,
510 		   blk_boundary_sectors_left(offset, boundary_sectors));
511 }
512 
513 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
514 		unsigned int nr_phys_segs)
515 {
516 	if (!blk_cgroup_mergeable(req, bio))
517 		goto no_merge;
518 
519 	if (blk_integrity_merge_bio(req->q, req, bio) == false)
520 		goto no_merge;
521 
522 	/* discard request merge won't add new segment */
523 	if (req_op(req) == REQ_OP_DISCARD)
524 		return 1;
525 
526 	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
527 		goto no_merge;
528 
529 	/*
530 	 * This will form the start of a new hw segment.  Bump both
531 	 * counters.
532 	 */
533 	req->nr_phys_segments += nr_phys_segs;
534 	if (bio_integrity(bio))
535 		req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
536 									bio);
537 	return 1;
538 
539 no_merge:
540 	req_set_nomerge(req->q, req);
541 	return 0;
542 }
543 
544 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
545 {
546 	if (req_gap_back_merge(req, bio))
547 		return 0;
548 	if (blk_integrity_rq(req) &&
549 	    integrity_req_gap_back_merge(req, bio))
550 		return 0;
551 	if (!bio_crypt_ctx_back_mergeable(req, bio))
552 		return 0;
553 	if (blk_rq_sectors(req) + bio_sectors(bio) >
554 	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
555 		req_set_nomerge(req->q, req);
556 		return 0;
557 	}
558 
559 	return ll_new_hw_segment(req, bio, nr_segs);
560 }
561 
562 static int ll_front_merge_fn(struct request *req, struct bio *bio,
563 		unsigned int nr_segs)
564 {
565 	if (req_gap_front_merge(req, bio))
566 		return 0;
567 	if (blk_integrity_rq(req) &&
568 	    integrity_req_gap_front_merge(req, bio))
569 		return 0;
570 	if (!bio_crypt_ctx_front_mergeable(req, bio))
571 		return 0;
572 	if (blk_rq_sectors(req) + bio_sectors(bio) >
573 	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
574 		req_set_nomerge(req->q, req);
575 		return 0;
576 	}
577 
578 	return ll_new_hw_segment(req, bio, nr_segs);
579 }
580 
581 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
582 		struct request *next)
583 {
584 	unsigned short segments = blk_rq_nr_discard_segments(req);
585 
586 	if (segments >= queue_max_discard_segments(q))
587 		goto no_merge;
588 	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
589 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
590 		goto no_merge;
591 
592 	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
593 	return true;
594 no_merge:
595 	req_set_nomerge(q, req);
596 	return false;
597 }
598 
599 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
600 				struct request *next)
601 {
602 	int total_phys_segments;
603 
604 	if (req_gap_back_merge(req, next->bio))
605 		return 0;
606 
607 	/*
608 	 * Will it become too large?
609 	 */
610 	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
611 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
612 		return 0;
613 
614 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
615 	if (total_phys_segments > blk_rq_get_max_segments(req))
616 		return 0;
617 
618 	if (!blk_cgroup_mergeable(req, next->bio))
619 		return 0;
620 
621 	if (blk_integrity_merge_rq(q, req, next) == false)
622 		return 0;
623 
624 	if (!bio_crypt_ctx_merge_rq(req, next))
625 		return 0;
626 
627 	/* Merge is OK... */
628 	req->nr_phys_segments = total_phys_segments;
629 	req->nr_integrity_segments += next->nr_integrity_segments;
630 	return 1;
631 }
632 
633 /**
634  * blk_rq_set_mixed_merge - mark a request as mixed merge
635  * @rq: request to mark as mixed merge
636  *
637  * Description:
638  *     @rq is about to be mixed merged.  Make sure the attributes
639  *     which can be mixed are set in each bio and mark @rq as mixed
640  *     merged.
641  */
642 static void blk_rq_set_mixed_merge(struct request *rq)
643 {
644 	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
645 	struct bio *bio;
646 
647 	if (rq->rq_flags & RQF_MIXED_MERGE)
648 		return;
649 
650 	/*
651 	 * @rq will no longer represent mixable attributes for all the
652 	 * contained bios.  It will just track those of the first one.
653 	 * Distributes the attributs to each bio.
654 	 */
655 	for (bio = rq->bio; bio; bio = bio->bi_next) {
656 		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
657 			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
658 		bio->bi_opf |= ff;
659 	}
660 	rq->rq_flags |= RQF_MIXED_MERGE;
661 }
662 
663 static inline blk_opf_t bio_failfast(const struct bio *bio)
664 {
665 	if (bio->bi_opf & REQ_RAHEAD)
666 		return REQ_FAILFAST_MASK;
667 
668 	return bio->bi_opf & REQ_FAILFAST_MASK;
669 }
670 
671 /*
672  * After we are marked as MIXED_MERGE, any new RA bio has to be updated
673  * as failfast, and request's failfast has to be updated in case of
674  * front merge.
675  */
676 static inline void blk_update_mixed_merge(struct request *req,
677 		struct bio *bio, bool front_merge)
678 {
679 	if (req->rq_flags & RQF_MIXED_MERGE) {
680 		if (bio->bi_opf & REQ_RAHEAD)
681 			bio->bi_opf |= REQ_FAILFAST_MASK;
682 
683 		if (front_merge) {
684 			req->cmd_flags &= ~REQ_FAILFAST_MASK;
685 			req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
686 		}
687 	}
688 }
689 
690 static void blk_account_io_merge_request(struct request *req)
691 {
692 	if (req->rq_flags & RQF_IO_STAT) {
693 		part_stat_lock();
694 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
695 		part_stat_local_dec(req->part,
696 				    in_flight[op_is_write(req_op(req))]);
697 		part_stat_unlock();
698 	}
699 }
700 
701 static enum elv_merge blk_try_req_merge(struct request *req,
702 					struct request *next)
703 {
704 	if (blk_discard_mergable(req))
705 		return ELEVATOR_DISCARD_MERGE;
706 	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
707 		return ELEVATOR_BACK_MERGE;
708 
709 	return ELEVATOR_NO_MERGE;
710 }
711 
712 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
713 					      struct bio *bio)
714 {
715 	return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
716 }
717 
718 static bool blk_atomic_write_mergeable_rqs(struct request *rq,
719 					   struct request *next)
720 {
721 	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
722 }
723 
724 /*
725  * For non-mq, this has to be called with the request spinlock acquired.
726  * For mq with scheduling, the appropriate queue wide lock should be held.
727  */
728 static struct request *attempt_merge(struct request_queue *q,
729 				     struct request *req, struct request *next)
730 {
731 	if (!rq_mergeable(req) || !rq_mergeable(next))
732 		return NULL;
733 
734 	if (req_op(req) != req_op(next))
735 		return NULL;
736 
737 	if (req->bio->bi_write_hint != next->bio->bi_write_hint)
738 		return NULL;
739 	if (req->bio->bi_write_stream != next->bio->bi_write_stream)
740 		return NULL;
741 	if (req->bio->bi_ioprio != next->bio->bi_ioprio)
742 		return NULL;
743 	if (!blk_atomic_write_mergeable_rqs(req, next))
744 		return NULL;
745 
746 	/*
747 	 * If we are allowed to merge, then append bio list
748 	 * from next to rq and release next. merge_requests_fn
749 	 * will have updated segment counts, update sector
750 	 * counts here. Handle DISCARDs separately, as they
751 	 * have separate settings.
752 	 */
753 
754 	switch (blk_try_req_merge(req, next)) {
755 	case ELEVATOR_DISCARD_MERGE:
756 		if (!req_attempt_discard_merge(q, req, next))
757 			return NULL;
758 		break;
759 	case ELEVATOR_BACK_MERGE:
760 		if (!ll_merge_requests_fn(q, req, next))
761 			return NULL;
762 		break;
763 	default:
764 		return NULL;
765 	}
766 
767 	/*
768 	 * If failfast settings disagree or any of the two is already
769 	 * a mixed merge, mark both as mixed before proceeding.  This
770 	 * makes sure that all involved bios have mixable attributes
771 	 * set properly.
772 	 */
773 	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
774 	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
775 	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
776 		blk_rq_set_mixed_merge(req);
777 		blk_rq_set_mixed_merge(next);
778 	}
779 
780 	/*
781 	 * At this point we have either done a back merge or front merge. We
782 	 * need the smaller start_time_ns of the merged requests to be the
783 	 * current request for accounting purposes.
784 	 */
785 	if (next->start_time_ns < req->start_time_ns)
786 		req->start_time_ns = next->start_time_ns;
787 
788 	req->biotail->bi_next = next->bio;
789 	req->biotail = next->biotail;
790 
791 	req->__data_len += blk_rq_bytes(next);
792 
793 	if (!blk_discard_mergable(req))
794 		elv_merge_requests(q, req, next);
795 
796 	blk_crypto_rq_put_keyslot(next);
797 
798 	/*
799 	 * 'next' is going away, so update stats accordingly
800 	 */
801 	blk_account_io_merge_request(next);
802 
803 	trace_block_rq_merge(next);
804 
805 	/*
806 	 * ownership of bio passed from next to req, return 'next' for
807 	 * the caller to free
808 	 */
809 	next->bio = NULL;
810 	return next;
811 }
812 
813 static struct request *attempt_back_merge(struct request_queue *q,
814 		struct request *rq)
815 {
816 	struct request *next = elv_latter_request(q, rq);
817 
818 	if (next)
819 		return attempt_merge(q, rq, next);
820 
821 	return NULL;
822 }
823 
824 static struct request *attempt_front_merge(struct request_queue *q,
825 		struct request *rq)
826 {
827 	struct request *prev = elv_former_request(q, rq);
828 
829 	if (prev)
830 		return attempt_merge(q, prev, rq);
831 
832 	return NULL;
833 }
834 
835 /*
836  * Try to merge 'next' into 'rq'. Return true if the merge happened, false
837  * otherwise. The caller is responsible for freeing 'next' if the merge
838  * happened.
839  */
840 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
841 			   struct request *next)
842 {
843 	return attempt_merge(q, rq, next);
844 }
845 
846 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
847 {
848 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
849 		return false;
850 
851 	if (req_op(rq) != bio_op(bio))
852 		return false;
853 
854 	if (!blk_cgroup_mergeable(rq, bio))
855 		return false;
856 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
857 		return false;
858 	if (!bio_crypt_rq_ctx_compatible(rq, bio))
859 		return false;
860 	if (rq->bio->bi_write_hint != bio->bi_write_hint)
861 		return false;
862 	if (rq->bio->bi_write_stream != bio->bi_write_stream)
863 		return false;
864 	if (rq->bio->bi_ioprio != bio->bi_ioprio)
865 		return false;
866 	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
867 		return false;
868 
869 	return true;
870 }
871 
872 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
873 {
874 	if (blk_discard_mergable(rq))
875 		return ELEVATOR_DISCARD_MERGE;
876 	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
877 		return ELEVATOR_BACK_MERGE;
878 	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
879 		return ELEVATOR_FRONT_MERGE;
880 	return ELEVATOR_NO_MERGE;
881 }
882 
883 static void blk_account_io_merge_bio(struct request *req)
884 {
885 	if (req->rq_flags & RQF_IO_STAT) {
886 		part_stat_lock();
887 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
888 		part_stat_unlock();
889 	}
890 }
891 
892 enum bio_merge_status bio_attempt_back_merge(struct request *req,
893 		struct bio *bio, unsigned int nr_segs)
894 {
895 	const blk_opf_t ff = bio_failfast(bio);
896 
897 	if (!ll_back_merge_fn(req, bio, nr_segs))
898 		return BIO_MERGE_FAILED;
899 
900 	trace_block_bio_backmerge(bio);
901 	rq_qos_merge(req->q, req, bio);
902 
903 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
904 		blk_rq_set_mixed_merge(req);
905 
906 	blk_update_mixed_merge(req, bio, false);
907 
908 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
909 		blk_zone_write_plug_bio_merged(bio);
910 
911 	req->biotail->bi_next = bio;
912 	req->biotail = bio;
913 	req->__data_len += bio->bi_iter.bi_size;
914 
915 	bio_crypt_free_ctx(bio);
916 
917 	blk_account_io_merge_bio(req);
918 	return BIO_MERGE_OK;
919 }
920 
921 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
922 		struct bio *bio, unsigned int nr_segs)
923 {
924 	const blk_opf_t ff = bio_failfast(bio);
925 
926 	/*
927 	 * A front merge for writes to sequential zones of a zoned block device
928 	 * can happen only if the user submitted writes out of order. Do not
929 	 * merge such write to let it fail.
930 	 */
931 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
932 		return BIO_MERGE_FAILED;
933 
934 	if (!ll_front_merge_fn(req, bio, nr_segs))
935 		return BIO_MERGE_FAILED;
936 
937 	trace_block_bio_frontmerge(bio);
938 	rq_qos_merge(req->q, req, bio);
939 
940 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
941 		blk_rq_set_mixed_merge(req);
942 
943 	blk_update_mixed_merge(req, bio, true);
944 
945 	bio->bi_next = req->bio;
946 	req->bio = bio;
947 
948 	req->__sector = bio->bi_iter.bi_sector;
949 	req->__data_len += bio->bi_iter.bi_size;
950 
951 	bio_crypt_do_front_merge(req, bio);
952 
953 	blk_account_io_merge_bio(req);
954 	return BIO_MERGE_OK;
955 }
956 
957 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
958 		struct request *req, struct bio *bio)
959 {
960 	unsigned short segments = blk_rq_nr_discard_segments(req);
961 
962 	if (segments >= queue_max_discard_segments(q))
963 		goto no_merge;
964 	if (blk_rq_sectors(req) + bio_sectors(bio) >
965 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
966 		goto no_merge;
967 
968 	rq_qos_merge(q, req, bio);
969 
970 	req->biotail->bi_next = bio;
971 	req->biotail = bio;
972 	req->__data_len += bio->bi_iter.bi_size;
973 	req->nr_phys_segments = segments + 1;
974 
975 	blk_account_io_merge_bio(req);
976 	return BIO_MERGE_OK;
977 no_merge:
978 	req_set_nomerge(q, req);
979 	return BIO_MERGE_FAILED;
980 }
981 
982 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
983 						   struct request *rq,
984 						   struct bio *bio,
985 						   unsigned int nr_segs,
986 						   bool sched_allow_merge)
987 {
988 	if (!blk_rq_merge_ok(rq, bio))
989 		return BIO_MERGE_NONE;
990 
991 	switch (blk_try_merge(rq, bio)) {
992 	case ELEVATOR_BACK_MERGE:
993 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
994 			return bio_attempt_back_merge(rq, bio, nr_segs);
995 		break;
996 	case ELEVATOR_FRONT_MERGE:
997 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
998 			return bio_attempt_front_merge(rq, bio, nr_segs);
999 		break;
1000 	case ELEVATOR_DISCARD_MERGE:
1001 		return bio_attempt_discard_merge(q, rq, bio);
1002 	default:
1003 		return BIO_MERGE_NONE;
1004 	}
1005 
1006 	return BIO_MERGE_FAILED;
1007 }
1008 
1009 /**
1010  * blk_attempt_plug_merge - try to merge with %current's plugged list
1011  * @q: request_queue new bio is being queued at
1012  * @bio: new bio being queued
1013  * @nr_segs: number of segments in @bio
1014  * from the passed in @q already in the plug list
1015  *
1016  * Determine whether @bio being queued on @q can be merged with the previous
1017  * request on %current's plugged list.  Returns %true if merge was successful,
1018  * otherwise %false.
1019  *
1020  * Plugging coalesces IOs from the same issuer for the same purpose without
1021  * going through @q->queue_lock.  As such it's more of an issuing mechanism
1022  * than scheduling, and the request, while may have elvpriv data, is not
1023  * added on the elevator at this point.  In addition, we don't have
1024  * reliable access to the elevator outside queue lock.  Only check basic
1025  * merging parameters without querying the elevator.
1026  *
1027  * Caller must ensure !blk_queue_nomerges(q) beforehand.
1028  */
1029 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1030 		unsigned int nr_segs)
1031 {
1032 	struct blk_plug *plug = current->plug;
1033 	struct request *rq;
1034 
1035 	if (!plug || rq_list_empty(&plug->mq_list))
1036 		return false;
1037 
1038 	rq = plug->mq_list.tail;
1039 	if (rq->q == q)
1040 		return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1041 			BIO_MERGE_OK;
1042 	else if (!plug->multiple_queues)
1043 		return false;
1044 
1045 	rq_list_for_each(&plug->mq_list, rq) {
1046 		if (rq->q != q)
1047 			continue;
1048 		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1049 		    BIO_MERGE_OK)
1050 			return true;
1051 		break;
1052 	}
1053 	return false;
1054 }
1055 
1056 /*
1057  * Iterate list of requests and see if we can merge this bio with any
1058  * of them.
1059  */
1060 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1061 			struct bio *bio, unsigned int nr_segs)
1062 {
1063 	struct request *rq;
1064 	int checked = 8;
1065 
1066 	list_for_each_entry_reverse(rq, list, queuelist) {
1067 		if (!checked--)
1068 			break;
1069 
1070 		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1071 		case BIO_MERGE_NONE:
1072 			continue;
1073 		case BIO_MERGE_OK:
1074 			return true;
1075 		case BIO_MERGE_FAILED:
1076 			return false;
1077 		}
1078 
1079 	}
1080 
1081 	return false;
1082 }
1083 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1084 
1085 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1086 		unsigned int nr_segs, struct request **merged_request)
1087 {
1088 	struct request *rq;
1089 
1090 	switch (elv_merge(q, &rq, bio)) {
1091 	case ELEVATOR_BACK_MERGE:
1092 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1093 			return false;
1094 		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1095 			return false;
1096 		*merged_request = attempt_back_merge(q, rq);
1097 		if (!*merged_request)
1098 			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1099 		return true;
1100 	case ELEVATOR_FRONT_MERGE:
1101 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1102 			return false;
1103 		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1104 			return false;
1105 		*merged_request = attempt_front_merge(q, rq);
1106 		if (!*merged_request)
1107 			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1108 		return true;
1109 	case ELEVATOR_DISCARD_MERGE:
1110 		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1111 	default:
1112 		return false;
1113 	}
1114 }
1115 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1116