xref: /linux/block/blk-merge.c (revision 6f7e6393d1ce636bb7ec77a7fe7b77458fddf701)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to segment and merge handling
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/part_stat.h>
11 #include <linux/blk-cgroup.h>
12 
13 #include <trace/events/block.h>
14 
15 #include "blk.h"
16 #include "blk-mq-sched.h"
17 #include "blk-rq-qos.h"
18 #include "blk-throttle.h"
19 
20 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
21 {
22 	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
23 }
24 
25 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
26 {
27 	struct bvec_iter iter = bio->bi_iter;
28 	int idx;
29 
30 	bio_get_first_bvec(bio, bv);
31 	if (bv->bv_len == bio->bi_iter.bi_size)
32 		return;		/* this bio only has a single bvec */
33 
34 	bio_advance_iter(bio, &iter, iter.bi_size);
35 
36 	if (!iter.bi_bvec_done)
37 		idx = iter.bi_idx - 1;
38 	else	/* in the middle of bvec */
39 		idx = iter.bi_idx;
40 
41 	*bv = bio->bi_io_vec[idx];
42 
43 	/*
44 	 * iter.bi_bvec_done records actual length of the last bvec
45 	 * if this bio ends in the middle of one io vector
46 	 */
47 	if (iter.bi_bvec_done)
48 		bv->bv_len = iter.bi_bvec_done;
49 }
50 
51 static inline bool bio_will_gap(struct request_queue *q,
52 		struct request *prev_rq, struct bio *prev, struct bio *next)
53 {
54 	struct bio_vec pb, nb;
55 
56 	if (!bio_has_data(prev) || !queue_virt_boundary(q))
57 		return false;
58 
59 	/*
60 	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
61 	 * is quite difficult to respect the sg gap limit.  We work hard to
62 	 * merge a huge number of small single bios in case of mkfs.
63 	 */
64 	if (prev_rq)
65 		bio_get_first_bvec(prev_rq->bio, &pb);
66 	else
67 		bio_get_first_bvec(prev, &pb);
68 	if (pb.bv_offset & queue_virt_boundary(q))
69 		return true;
70 
71 	/*
72 	 * We don't need to worry about the situation that the merged segment
73 	 * ends in unaligned virt boundary:
74 	 *
75 	 * - if 'pb' ends aligned, the merged segment ends aligned
76 	 * - if 'pb' ends unaligned, the next bio must include
77 	 *   one single bvec of 'nb', otherwise the 'nb' can't
78 	 *   merge with 'pb'
79 	 */
80 	bio_get_last_bvec(prev, &pb);
81 	bio_get_first_bvec(next, &nb);
82 	if (biovec_phys_mergeable(q, &pb, &nb))
83 		return false;
84 	return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
85 }
86 
87 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
88 {
89 	return bio_will_gap(req->q, req, req->biotail, bio);
90 }
91 
92 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
93 {
94 	return bio_will_gap(req->q, NULL, bio, req->bio);
95 }
96 
97 /*
98  * The maximum size that a bio can fit has to be aligned down to the
99  * logical block size, which is the minimum accepted unit by hardware.
100  */
101 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
102 {
103 	return round_down(BIO_MAX_SIZE, lim->logical_block_size) >>
104 			SECTOR_SHIFT;
105 }
106 
107 /*
108  * bio_submit_split_bioset - Submit a bio, splitting it at a designated sector
109  * @bio:		the original bio to be submitted and split
110  * @split_sectors:	the sector count at which to split
111  * @bs:			the bio set used for allocating the new split bio
112  *
113  * The original bio is modified to contain the remaining sectors and submitted.
114  * The caller is responsible for submitting the returned bio.
115  *
116  * If succeed, the newly allocated bio representing the initial part will be
117  * returned, on failure NULL will be returned and original bio will fail.
118  */
119 struct bio *bio_submit_split_bioset(struct bio *bio, unsigned int split_sectors,
120 				    struct bio_set *bs)
121 {
122 	struct bio *split = bio_split(bio, split_sectors, GFP_NOIO, bs);
123 
124 	if (IS_ERR(split)) {
125 		bio->bi_status = errno_to_blk_status(PTR_ERR(split));
126 		bio_endio(bio);
127 		return NULL;
128 	}
129 
130 	bio_chain(split, bio);
131 	trace_block_split(split, bio->bi_iter.bi_sector);
132 	WARN_ON_ONCE(bio_zone_write_plugging(bio));
133 
134 	if (should_fail_bio(bio))
135 		bio_io_error(bio);
136 	else if (!blk_throtl_bio(bio))
137 		submit_bio_noacct_nocheck(bio, true);
138 
139 	return split;
140 }
141 EXPORT_SYMBOL_GPL(bio_submit_split_bioset);
142 
143 static struct bio *bio_submit_split(struct bio *bio, int split_sectors)
144 {
145 	if (unlikely(split_sectors < 0)) {
146 		bio->bi_status = errno_to_blk_status(split_sectors);
147 		bio_endio(bio);
148 		return NULL;
149 	}
150 
151 	if (split_sectors) {
152 		bio = bio_submit_split_bioset(bio, split_sectors,
153 				&bio->bi_bdev->bd_disk->bio_split);
154 		if (bio)
155 			bio->bi_opf |= REQ_NOMERGE;
156 	}
157 
158 	return bio;
159 }
160 
161 static struct bio *__bio_split_discard(struct bio *bio,
162 		const struct queue_limits *lim, unsigned *nsegs,
163 		unsigned int max_sectors)
164 {
165 	unsigned int max_discard_sectors, granularity;
166 	sector_t tmp;
167 	unsigned split_sectors;
168 
169 	*nsegs = 1;
170 
171 	granularity = max(lim->discard_granularity >> 9, 1U);
172 
173 	max_discard_sectors = min(max_sectors, bio_allowed_max_sectors(lim));
174 	max_discard_sectors -= max_discard_sectors % granularity;
175 	if (unlikely(!max_discard_sectors))
176 		return bio;
177 
178 	if (bio_sectors(bio) <= max_discard_sectors)
179 		return bio;
180 
181 	split_sectors = max_discard_sectors;
182 
183 	/*
184 	 * If the next starting sector would be misaligned, stop the discard at
185 	 * the previous aligned sector.
186 	 */
187 	tmp = bio->bi_iter.bi_sector + split_sectors -
188 		((lim->discard_alignment >> 9) % granularity);
189 	tmp = sector_div(tmp, granularity);
190 
191 	if (split_sectors > tmp)
192 		split_sectors -= tmp;
193 
194 	return bio_submit_split(bio, split_sectors);
195 }
196 
197 struct bio *bio_split_discard(struct bio *bio, const struct queue_limits *lim,
198 		unsigned *nsegs)
199 {
200 	unsigned int max_sectors;
201 
202 	if (bio_op(bio) == REQ_OP_SECURE_ERASE)
203 		max_sectors = lim->max_secure_erase_sectors;
204 	else
205 		max_sectors = lim->max_discard_sectors;
206 
207 	return __bio_split_discard(bio, lim, nsegs, max_sectors);
208 }
209 
210 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
211 						bool is_atomic)
212 {
213 	/*
214 	 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
215 	 * both non-zero.
216 	 */
217 	if (is_atomic && lim->atomic_write_boundary_sectors)
218 		return lim->atomic_write_boundary_sectors;
219 
220 	return lim->chunk_sectors;
221 }
222 
223 /*
224  * Return the maximum number of sectors from the start of a bio that may be
225  * submitted as a single request to a block device. If enough sectors remain,
226  * align the end to the physical block size. Otherwise align the end to the
227  * logical block size. This approach minimizes the number of non-aligned
228  * requests that are submitted to a block device if the start of a bio is not
229  * aligned to a physical block boundary.
230  */
231 static inline unsigned get_max_io_size(struct bio *bio,
232 				       const struct queue_limits *lim)
233 {
234 	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
235 	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
236 	bool is_atomic = bio->bi_opf & REQ_ATOMIC;
237 	unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
238 	unsigned max_sectors, start, end;
239 
240 	/*
241 	 * We ignore lim->max_sectors for atomic writes because it may less
242 	 * than the actual bio size, which we cannot tolerate.
243 	 */
244 	if (bio_op(bio) == REQ_OP_WRITE_ZEROES)
245 		max_sectors = lim->max_write_zeroes_sectors;
246 	else if (is_atomic)
247 		max_sectors = lim->atomic_write_max_sectors;
248 	else
249 		max_sectors = lim->max_sectors;
250 
251 	if (boundary_sectors) {
252 		max_sectors = min(max_sectors,
253 			blk_boundary_sectors_left(bio->bi_iter.bi_sector,
254 					      boundary_sectors));
255 	}
256 
257 	start = bio->bi_iter.bi_sector & (pbs - 1);
258 	end = (start + max_sectors) & ~(pbs - 1);
259 	if (end > start)
260 		return end - start;
261 	return max_sectors & ~(lbs - 1);
262 }
263 
264 /**
265  * bvec_split_segs - verify whether or not a bvec should be split in the middle
266  * @lim:      [in] queue limits to split based on
267  * @bv:       [in] bvec to examine
268  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
269  *            by the number of segments from @bv that may be appended to that
270  *            bio without exceeding @max_segs
271  * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
272  *            by the number of bytes from @bv that may be appended to that
273  *            bio without exceeding @max_bytes
274  * @max_segs: [in] upper bound for *@nsegs
275  * @max_bytes: [in] upper bound for *@bytes
276  *
277  * When splitting a bio, it can happen that a bvec is encountered that is too
278  * big to fit in a single segment and hence that it has to be split in the
279  * middle. This function verifies whether or not that should happen. The value
280  * %true is returned if and only if appending the entire @bv to a bio with
281  * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
282  * the block driver.
283  */
284 static bool bvec_split_segs(const struct queue_limits *lim,
285 		const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
286 		unsigned max_segs, unsigned max_bytes)
287 {
288 	unsigned max_len = max_bytes - *bytes;
289 	unsigned len = min(bv->bv_len, max_len);
290 	unsigned total_len = 0;
291 	unsigned seg_size = 0;
292 
293 	while (len && *nsegs < max_segs) {
294 		seg_size = get_max_segment_size(lim, bvec_phys(bv) + total_len, len);
295 
296 		(*nsegs)++;
297 		total_len += seg_size;
298 		len -= seg_size;
299 
300 		if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
301 			break;
302 	}
303 
304 	*bytes += total_len;
305 
306 	/* tell the caller to split the bvec if it is too big to fit */
307 	return len > 0 || bv->bv_len > max_len;
308 }
309 
310 static unsigned int bio_split_alignment(struct bio *bio,
311 		const struct queue_limits *lim)
312 {
313 	if (op_is_write(bio_op(bio)) && lim->zone_write_granularity)
314 		return lim->zone_write_granularity;
315 	return lim->logical_block_size;
316 }
317 
318 static inline unsigned int bvec_seg_gap(struct bio_vec *bvprv,
319 					struct bio_vec *bv)
320 {
321 	return bv->bv_offset | (bvprv->bv_offset + bvprv->bv_len);
322 }
323 
324 /**
325  * bio_split_io_at - check if and where to split a bio
326  * @bio:  [in] bio to be split
327  * @lim:  [in] queue limits to split based on
328  * @segs: [out] number of segments in the bio with the first half of the sectors
329  * @max_bytes: [in] maximum number of bytes per bio
330  * @len_align_mask: [in] length alignment mask for each vector
331  *
332  * Find out if @bio needs to be split to fit the queue limits in @lim and a
333  * maximum size of @max_bytes.  Returns a negative error number if @bio can't be
334  * split, 0 if the bio doesn't have to be split, or a positive sector offset if
335  * @bio needs to be split.
336  */
337 int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
338 		unsigned *segs, unsigned max_bytes, unsigned len_align_mask)
339 {
340 	struct bio_crypt_ctx *bc = bio_crypt_ctx(bio);
341 	struct bio_vec bv, bvprv, *bvprvp = NULL;
342 	unsigned nsegs = 0, bytes = 0, gaps = 0;
343 	struct bvec_iter iter;
344 	unsigned start_align_mask = lim->dma_alignment;
345 
346 	if (bc) {
347 		start_align_mask |= (bc->bc_key->crypto_cfg.data_unit_size - 1);
348 		len_align_mask |= (bc->bc_key->crypto_cfg.data_unit_size - 1);
349 	}
350 
351 	bio_for_each_bvec(bv, bio, iter) {
352 		if (bv.bv_offset & start_align_mask ||
353 		    bv.bv_len & len_align_mask)
354 			return -EINVAL;
355 
356 		/*
357 		 * If the queue doesn't support SG gaps and adding this
358 		 * offset would create a gap, disallow it.
359 		 */
360 		if (bvprvp) {
361 			if (bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
362 				goto split;
363 			gaps |= bvec_seg_gap(bvprvp, &bv);
364 		}
365 
366 		if (nsegs < lim->max_segments &&
367 		    bytes + bv.bv_len <= max_bytes &&
368 		    bv.bv_offset + bv.bv_len <= lim->max_fast_segment_size) {
369 			nsegs++;
370 			bytes += bv.bv_len;
371 		} else {
372 			if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
373 					lim->max_segments, max_bytes))
374 				goto split;
375 		}
376 
377 		bvprv = bv;
378 		bvprvp = &bvprv;
379 	}
380 
381 	*segs = nsegs;
382 	bio->bi_bvec_gap_bit = ffs(gaps);
383 	return 0;
384 split:
385 	if (bio->bi_opf & REQ_ATOMIC)
386 		return -EINVAL;
387 
388 	/*
389 	 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
390 	 * with EAGAIN if splitting is required and return an error pointer.
391 	 */
392 	if (bio->bi_opf & REQ_NOWAIT)
393 		return -EAGAIN;
394 
395 	*segs = nsegs;
396 
397 	/*
398 	 * Individual bvecs might not be logical block aligned. Round down the
399 	 * split size so that each bio is properly block size aligned, even if
400 	 * we do not use the full hardware limits.
401 	 *
402 	 * It is possible to submit a bio that can't be split into a valid io:
403 	 * there may either be too many discontiguous vectors for the max
404 	 * segments limit, or contain virtual boundary gaps without having a
405 	 * valid block sized split. A zero byte result means one of those
406 	 * conditions occured.
407 	 */
408 	bytes = ALIGN_DOWN(bytes, bio_split_alignment(bio, lim));
409 	if (!bytes)
410 		return -EINVAL;
411 
412 	/*
413 	 * Bio splitting may cause subtle trouble such as hang when doing sync
414 	 * iopoll in direct IO routine. Given performance gain of iopoll for
415 	 * big IO can be trival, disable iopoll when split needed.
416 	 */
417 	bio_clear_polled(bio);
418 	bio->bi_bvec_gap_bit = ffs(gaps);
419 	return bytes >> SECTOR_SHIFT;
420 }
421 EXPORT_SYMBOL_GPL(bio_split_io_at);
422 
423 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
424 		unsigned *nr_segs)
425 {
426 	return bio_submit_split(bio,
427 		bio_split_rw_at(bio, lim, nr_segs,
428 			get_max_io_size(bio, lim) << SECTOR_SHIFT));
429 }
430 
431 /*
432  * REQ_OP_ZONE_APPEND bios must never be split by the block layer.
433  *
434  * But we want the nr_segs calculation provided by bio_split_rw_at, and having
435  * a good sanity check that the submitter built the bio correctly is nice to
436  * have as well.
437  */
438 struct bio *bio_split_zone_append(struct bio *bio,
439 		const struct queue_limits *lim, unsigned *nr_segs)
440 {
441 	int split_sectors;
442 
443 	split_sectors = bio_split_rw_at(bio, lim, nr_segs,
444 			lim->max_zone_append_sectors << SECTOR_SHIFT);
445 	if (WARN_ON_ONCE(split_sectors > 0))
446 		split_sectors = -EINVAL;
447 	return bio_submit_split(bio, split_sectors);
448 }
449 
450 struct bio *bio_split_write_zeroes(struct bio *bio,
451 		const struct queue_limits *lim, unsigned *nsegs)
452 {
453 	unsigned int max_sectors = get_max_io_size(bio, lim);
454 
455 	*nsegs = 0;
456 
457 	/*
458 	 * An unset limit should normally not happen, as bio submission is keyed
459 	 * off having a non-zero limit.  But SCSI can clear the limit in the
460 	 * I/O completion handler, and we can race and see this.  Splitting to a
461 	 * zero limit obviously doesn't make sense, so band-aid it here.
462 	 */
463 	if (!max_sectors)
464 		return bio;
465 	if (bio_sectors(bio) <= max_sectors)
466 		return bio;
467 	return bio_submit_split(bio, max_sectors);
468 }
469 
470 /**
471  * bio_split_to_limits - split a bio to fit the queue limits
472  * @bio:     bio to be split
473  *
474  * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
475  * if so split off a bio fitting the limits from the beginning of @bio and
476  * return it.  @bio is shortened to the remainder and re-submitted.
477  *
478  * The split bio is allocated from @q->bio_split, which is provided by the
479  * block layer.
480  */
481 struct bio *bio_split_to_limits(struct bio *bio)
482 {
483 	unsigned int nr_segs;
484 
485 	return __bio_split_to_limits(bio, bdev_limits(bio->bi_bdev), &nr_segs);
486 }
487 EXPORT_SYMBOL(bio_split_to_limits);
488 
489 unsigned int blk_recalc_rq_segments(struct request *rq)
490 {
491 	unsigned int nr_phys_segs = 0;
492 	unsigned int bytes = 0;
493 	struct req_iterator iter;
494 	struct bio_vec bv;
495 
496 	if (!rq->bio)
497 		return 0;
498 
499 	switch (bio_op(rq->bio)) {
500 	case REQ_OP_DISCARD:
501 	case REQ_OP_SECURE_ERASE:
502 		if (queue_max_discard_segments(rq->q) > 1) {
503 			struct bio *bio = rq->bio;
504 
505 			for_each_bio(bio)
506 				nr_phys_segs++;
507 			return nr_phys_segs;
508 		}
509 		return 1;
510 	case REQ_OP_WRITE_ZEROES:
511 		return 0;
512 	default:
513 		break;
514 	}
515 
516 	rq_for_each_bvec(bv, rq, iter)
517 		bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
518 				UINT_MAX, BIO_MAX_SIZE);
519 	return nr_phys_segs;
520 }
521 
522 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
523 						  sector_t offset)
524 {
525 	struct request_queue *q = rq->q;
526 	struct queue_limits *lim = &q->limits;
527 	unsigned int max_sectors, boundary_sectors;
528 	bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
529 
530 	if (blk_rq_is_passthrough(rq))
531 		return q->limits.max_hw_sectors;
532 
533 	boundary_sectors = blk_boundary_sectors(lim, is_atomic);
534 	max_sectors = blk_queue_get_max_sectors(rq);
535 
536 	if (!boundary_sectors ||
537 	    req_op(rq) == REQ_OP_DISCARD ||
538 	    req_op(rq) == REQ_OP_SECURE_ERASE)
539 		return max_sectors;
540 	return min(max_sectors,
541 		   blk_boundary_sectors_left(offset, boundary_sectors));
542 }
543 
544 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
545 		unsigned int nr_phys_segs)
546 {
547 	if (!blk_cgroup_mergeable(req, bio))
548 		goto no_merge;
549 
550 	if (blk_integrity_merge_bio(req->q, req, bio) == false)
551 		goto no_merge;
552 
553 	/* discard request merge won't add new segment */
554 	if (req_op(req) == REQ_OP_DISCARD)
555 		return 1;
556 
557 	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
558 		goto no_merge;
559 
560 	/*
561 	 * This will form the start of a new hw segment.  Bump both
562 	 * counters.
563 	 */
564 	req->nr_phys_segments += nr_phys_segs;
565 	if (bio_integrity(bio))
566 		req->nr_integrity_segments += blk_rq_count_integrity_sg(req->q,
567 									bio);
568 	return 1;
569 
570 no_merge:
571 	req_set_nomerge(req->q, req);
572 	return 0;
573 }
574 
575 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
576 {
577 	if (req_gap_back_merge(req, bio))
578 		return 0;
579 	if (blk_integrity_rq(req) &&
580 	    integrity_req_gap_back_merge(req, bio))
581 		return 0;
582 	if (!bio_crypt_ctx_back_mergeable(req, bio))
583 		return 0;
584 	if (blk_rq_sectors(req) + bio_sectors(bio) >
585 	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
586 		req_set_nomerge(req->q, req);
587 		return 0;
588 	}
589 
590 	return ll_new_hw_segment(req, bio, nr_segs);
591 }
592 
593 static int ll_front_merge_fn(struct request *req, struct bio *bio,
594 		unsigned int nr_segs)
595 {
596 	if (req_gap_front_merge(req, bio))
597 		return 0;
598 	if (blk_integrity_rq(req) &&
599 	    integrity_req_gap_front_merge(req, bio))
600 		return 0;
601 	if (!bio_crypt_ctx_front_mergeable(req, bio))
602 		return 0;
603 	if (blk_rq_sectors(req) + bio_sectors(bio) >
604 	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
605 		req_set_nomerge(req->q, req);
606 		return 0;
607 	}
608 
609 	return ll_new_hw_segment(req, bio, nr_segs);
610 }
611 
612 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
613 		struct request *next)
614 {
615 	unsigned short segments = blk_rq_nr_discard_segments(req);
616 
617 	if (segments >= queue_max_discard_segments(q))
618 		goto no_merge;
619 	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
620 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
621 		goto no_merge;
622 
623 	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
624 	return true;
625 no_merge:
626 	req_set_nomerge(q, req);
627 	return false;
628 }
629 
630 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
631 				struct request *next)
632 {
633 	int total_phys_segments;
634 
635 	if (req_gap_back_merge(req, next->bio))
636 		return 0;
637 
638 	/*
639 	 * Will it become too large?
640 	 */
641 	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
642 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
643 		return 0;
644 
645 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
646 	if (total_phys_segments > blk_rq_get_max_segments(req))
647 		return 0;
648 
649 	if (!blk_cgroup_mergeable(req, next->bio))
650 		return 0;
651 
652 	if (blk_integrity_merge_rq(q, req, next) == false)
653 		return 0;
654 
655 	if (!bio_crypt_ctx_merge_rq(req, next))
656 		return 0;
657 
658 	/* Merge is OK... */
659 	req->nr_phys_segments = total_phys_segments;
660 	req->nr_integrity_segments += next->nr_integrity_segments;
661 	return 1;
662 }
663 
664 /**
665  * blk_rq_set_mixed_merge - mark a request as mixed merge
666  * @rq: request to mark as mixed merge
667  *
668  * Description:
669  *     @rq is about to be mixed merged.  Make sure the attributes
670  *     which can be mixed are set in each bio and mark @rq as mixed
671  *     merged.
672  */
673 static void blk_rq_set_mixed_merge(struct request *rq)
674 {
675 	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
676 	struct bio *bio;
677 
678 	if (rq->rq_flags & RQF_MIXED_MERGE)
679 		return;
680 
681 	/*
682 	 * @rq will no longer represent mixable attributes for all the
683 	 * contained bios.  It will just track those of the first one.
684 	 * Distributes the attributs to each bio.
685 	 */
686 	for (bio = rq->bio; bio; bio = bio->bi_next) {
687 		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
688 			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
689 		bio->bi_opf |= ff;
690 	}
691 	rq->rq_flags |= RQF_MIXED_MERGE;
692 }
693 
694 static inline blk_opf_t bio_failfast(const struct bio *bio)
695 {
696 	if (bio->bi_opf & REQ_RAHEAD)
697 		return REQ_FAILFAST_MASK;
698 
699 	return bio->bi_opf & REQ_FAILFAST_MASK;
700 }
701 
702 /*
703  * After we are marked as MIXED_MERGE, any new RA bio has to be updated
704  * as failfast, and request's failfast has to be updated in case of
705  * front merge.
706  */
707 static inline void blk_update_mixed_merge(struct request *req,
708 		struct bio *bio, bool front_merge)
709 {
710 	if (req->rq_flags & RQF_MIXED_MERGE) {
711 		if (bio->bi_opf & REQ_RAHEAD)
712 			bio->bi_opf |= REQ_FAILFAST_MASK;
713 
714 		if (front_merge) {
715 			req->cmd_flags &= ~REQ_FAILFAST_MASK;
716 			req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
717 		}
718 	}
719 }
720 
721 static void blk_account_io_merge_request(struct request *req)
722 {
723 	if (req->rq_flags & RQF_IO_STAT) {
724 		part_stat_lock();
725 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
726 		part_stat_local_dec(req->part,
727 				    in_flight[op_is_write(req_op(req))]);
728 		part_stat_unlock();
729 	}
730 }
731 
732 static enum elv_merge blk_try_req_merge(struct request *req,
733 					struct request *next)
734 {
735 	if (blk_discard_mergable(req))
736 		return ELEVATOR_DISCARD_MERGE;
737 	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
738 		return ELEVATOR_BACK_MERGE;
739 
740 	return ELEVATOR_NO_MERGE;
741 }
742 
743 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
744 					      struct bio *bio)
745 {
746 	return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
747 }
748 
749 static bool blk_atomic_write_mergeable_rqs(struct request *rq,
750 					   struct request *next)
751 {
752 	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
753 }
754 
755 u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
756 	       u8 gaps_bit)
757 {
758 	struct bio_vec pb, nb;
759 
760 	if (!bio_has_data(prev))
761 		return 0;
762 
763 	gaps_bit = min_not_zero(gaps_bit, prev->bi_bvec_gap_bit);
764 	gaps_bit = min_not_zero(gaps_bit, next->bi_bvec_gap_bit);
765 
766 	bio_get_last_bvec(prev, &pb);
767 	bio_get_first_bvec(next, &nb);
768 	if (!biovec_phys_mergeable(q, &pb, &nb))
769 		gaps_bit = min_not_zero(gaps_bit, ffs(bvec_seg_gap(&pb, &nb)));
770 	return gaps_bit;
771 }
772 
773 /*
774  * For non-mq, this has to be called with the request spinlock acquired.
775  * For mq with scheduling, the appropriate queue wide lock should be held.
776  */
777 static struct request *attempt_merge(struct request_queue *q,
778 				     struct request *req, struct request *next)
779 {
780 	if (!rq_mergeable(req) || !rq_mergeable(next))
781 		return NULL;
782 
783 	if (req_op(req) != req_op(next))
784 		return NULL;
785 
786 	if (req->bio->bi_write_hint != next->bio->bi_write_hint)
787 		return NULL;
788 	if (req->bio->bi_write_stream != next->bio->bi_write_stream)
789 		return NULL;
790 	if (req->bio->bi_ioprio != next->bio->bi_ioprio)
791 		return NULL;
792 	if (!blk_atomic_write_mergeable_rqs(req, next))
793 		return NULL;
794 
795 	/*
796 	 * If we are allowed to merge, then append bio list
797 	 * from next to rq and release next. merge_requests_fn
798 	 * will have updated segment counts, update sector
799 	 * counts here. Handle DISCARDs separately, as they
800 	 * have separate settings.
801 	 */
802 
803 	switch (blk_try_req_merge(req, next)) {
804 	case ELEVATOR_DISCARD_MERGE:
805 		if (!req_attempt_discard_merge(q, req, next))
806 			return NULL;
807 		break;
808 	case ELEVATOR_BACK_MERGE:
809 		if (!ll_merge_requests_fn(q, req, next))
810 			return NULL;
811 		break;
812 	default:
813 		return NULL;
814 	}
815 
816 	/*
817 	 * If failfast settings disagree or any of the two is already
818 	 * a mixed merge, mark both as mixed before proceeding.  This
819 	 * makes sure that all involved bios have mixable attributes
820 	 * set properly.
821 	 */
822 	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
823 	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
824 	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
825 		blk_rq_set_mixed_merge(req);
826 		blk_rq_set_mixed_merge(next);
827 	}
828 
829 	/*
830 	 * At this point we have either done a back merge or front merge. We
831 	 * need the smaller start_time_ns of the merged requests to be the
832 	 * current request for accounting purposes.
833 	 */
834 	if (next->start_time_ns < req->start_time_ns)
835 		req->start_time_ns = next->start_time_ns;
836 
837 	req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, next->bio,
838 					min_not_zero(next->phys_gap_bit,
839 						     req->phys_gap_bit));
840 	req->biotail->bi_next = next->bio;
841 	req->biotail = next->biotail;
842 
843 	req->__data_len += blk_rq_bytes(next);
844 
845 	if (!blk_discard_mergable(req))
846 		elv_merge_requests(q, req, next);
847 
848 	blk_crypto_rq_put_keyslot(next);
849 
850 	/*
851 	 * 'next' is going away, so update stats accordingly
852 	 */
853 	blk_account_io_merge_request(next);
854 
855 	trace_block_rq_merge(next);
856 
857 	/*
858 	 * ownership of bio passed from next to req, return 'next' for
859 	 * the caller to free
860 	 */
861 	next->bio = NULL;
862 	return next;
863 }
864 
865 static struct request *attempt_back_merge(struct request_queue *q,
866 		struct request *rq)
867 {
868 	struct request *next = elv_latter_request(q, rq);
869 
870 	if (next)
871 		return attempt_merge(q, rq, next);
872 
873 	return NULL;
874 }
875 
876 static struct request *attempt_front_merge(struct request_queue *q,
877 		struct request *rq)
878 {
879 	struct request *prev = elv_former_request(q, rq);
880 
881 	if (prev)
882 		return attempt_merge(q, prev, rq);
883 
884 	return NULL;
885 }
886 
887 /*
888  * Try to merge 'next' into 'rq'. Return true if the merge happened, false
889  * otherwise. The caller is responsible for freeing 'next' if the merge
890  * happened.
891  */
892 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
893 			   struct request *next)
894 {
895 	return attempt_merge(q, rq, next);
896 }
897 
898 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
899 {
900 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
901 		return false;
902 
903 	if (req_op(rq) != bio_op(bio))
904 		return false;
905 
906 	if (!blk_cgroup_mergeable(rq, bio))
907 		return false;
908 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
909 		return false;
910 	if (!bio_crypt_rq_ctx_compatible(rq, bio))
911 		return false;
912 	if (rq->bio->bi_write_hint != bio->bi_write_hint)
913 		return false;
914 	if (rq->bio->bi_write_stream != bio->bi_write_stream)
915 		return false;
916 	if (rq->bio->bi_ioprio != bio->bi_ioprio)
917 		return false;
918 	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
919 		return false;
920 
921 	return true;
922 }
923 
924 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
925 {
926 	if (blk_discard_mergable(rq))
927 		return ELEVATOR_DISCARD_MERGE;
928 	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
929 		return ELEVATOR_BACK_MERGE;
930 	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
931 		return ELEVATOR_FRONT_MERGE;
932 	return ELEVATOR_NO_MERGE;
933 }
934 
935 static void blk_account_io_merge_bio(struct request *req)
936 {
937 	if (req->rq_flags & RQF_IO_STAT) {
938 		part_stat_lock();
939 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
940 		part_stat_unlock();
941 	}
942 }
943 
944 enum bio_merge_status bio_attempt_back_merge(struct request *req,
945 		struct bio *bio, unsigned int nr_segs)
946 {
947 	const blk_opf_t ff = bio_failfast(bio);
948 
949 	if (!ll_back_merge_fn(req, bio, nr_segs))
950 		return BIO_MERGE_FAILED;
951 
952 	trace_block_bio_backmerge(bio);
953 	rq_qos_merge(req->q, req, bio);
954 
955 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
956 		blk_rq_set_mixed_merge(req);
957 
958 	blk_update_mixed_merge(req, bio, false);
959 
960 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
961 		blk_zone_write_plug_bio_merged(bio);
962 
963 	req->phys_gap_bit = bio_seg_gap(req->q, req->biotail, bio,
964 					req->phys_gap_bit);
965 	req->biotail->bi_next = bio;
966 	req->biotail = bio;
967 	req->__data_len += bio->bi_iter.bi_size;
968 
969 	bio_crypt_free_ctx(bio);
970 
971 	blk_account_io_merge_bio(req);
972 	return BIO_MERGE_OK;
973 }
974 
975 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
976 		struct bio *bio, unsigned int nr_segs)
977 {
978 	const blk_opf_t ff = bio_failfast(bio);
979 
980 	/*
981 	 * A front merge for writes to sequential zones of a zoned block device
982 	 * can happen only if the user submitted writes out of order. Do not
983 	 * merge such write to let it fail.
984 	 */
985 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
986 		return BIO_MERGE_FAILED;
987 
988 	if (!ll_front_merge_fn(req, bio, nr_segs))
989 		return BIO_MERGE_FAILED;
990 
991 	trace_block_bio_frontmerge(bio);
992 	rq_qos_merge(req->q, req, bio);
993 
994 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
995 		blk_rq_set_mixed_merge(req);
996 
997 	blk_update_mixed_merge(req, bio, true);
998 
999 	req->phys_gap_bit = bio_seg_gap(req->q, bio, req->bio,
1000 					req->phys_gap_bit);
1001 	bio->bi_next = req->bio;
1002 	req->bio = bio;
1003 
1004 	req->__sector = bio->bi_iter.bi_sector;
1005 	req->__data_len += bio->bi_iter.bi_size;
1006 
1007 	bio_crypt_do_front_merge(req, bio);
1008 
1009 	blk_account_io_merge_bio(req);
1010 	return BIO_MERGE_OK;
1011 }
1012 
1013 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
1014 		struct request *req, struct bio *bio)
1015 {
1016 	unsigned short segments = blk_rq_nr_discard_segments(req);
1017 
1018 	if (segments >= queue_max_discard_segments(q))
1019 		goto no_merge;
1020 	if (blk_rq_sectors(req) + bio_sectors(bio) >
1021 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
1022 		goto no_merge;
1023 
1024 	rq_qos_merge(q, req, bio);
1025 
1026 	req->biotail->bi_next = bio;
1027 	req->biotail = bio;
1028 	req->__data_len += bio->bi_iter.bi_size;
1029 	req->nr_phys_segments = segments + 1;
1030 
1031 	blk_account_io_merge_bio(req);
1032 	return BIO_MERGE_OK;
1033 no_merge:
1034 	req_set_nomerge(q, req);
1035 	return BIO_MERGE_FAILED;
1036 }
1037 
1038 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
1039 						   struct request *rq,
1040 						   struct bio *bio,
1041 						   unsigned int nr_segs,
1042 						   bool sched_allow_merge)
1043 {
1044 	if (!blk_rq_merge_ok(rq, bio))
1045 		return BIO_MERGE_NONE;
1046 
1047 	switch (blk_try_merge(rq, bio)) {
1048 	case ELEVATOR_BACK_MERGE:
1049 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1050 			return bio_attempt_back_merge(rq, bio, nr_segs);
1051 		break;
1052 	case ELEVATOR_FRONT_MERGE:
1053 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1054 			return bio_attempt_front_merge(rq, bio, nr_segs);
1055 		break;
1056 	case ELEVATOR_DISCARD_MERGE:
1057 		return bio_attempt_discard_merge(q, rq, bio);
1058 	default:
1059 		return BIO_MERGE_NONE;
1060 	}
1061 
1062 	return BIO_MERGE_FAILED;
1063 }
1064 
1065 /**
1066  * blk_attempt_plug_merge - try to merge with %current's plugged list
1067  * @q: request_queue new bio is being queued at
1068  * @bio: new bio being queued
1069  * @nr_segs: number of segments in @bio
1070  * from the passed in @q already in the plug list
1071  *
1072  * Determine whether @bio being queued on @q can be merged with the previous
1073  * request on %current's plugged list.  Returns %true if merge was successful,
1074  * otherwise %false.
1075  *
1076  * Plugging coalesces IOs from the same issuer for the same purpose without
1077  * going through @q->queue_lock.  As such it's more of an issuing mechanism
1078  * than scheduling, and the request, while may have elvpriv data, is not
1079  * added on the elevator at this point.  In addition, we don't have
1080  * reliable access to the elevator outside queue lock.  Only check basic
1081  * merging parameters without querying the elevator.
1082  *
1083  * Caller must ensure !blk_queue_nomerges(q) beforehand.
1084  */
1085 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1086 		unsigned int nr_segs)
1087 {
1088 	struct blk_plug *plug = current->plug;
1089 	struct request *rq;
1090 
1091 	if (!plug || rq_list_empty(&plug->mq_list))
1092 		return false;
1093 
1094 	rq = plug->mq_list.tail;
1095 	if (rq->q == q)
1096 		return blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1097 			BIO_MERGE_OK;
1098 	else if (!plug->multiple_queues)
1099 		return false;
1100 
1101 	rq_list_for_each(&plug->mq_list, rq) {
1102 		if (rq->q != q)
1103 			continue;
1104 		if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1105 		    BIO_MERGE_OK)
1106 			return true;
1107 		break;
1108 	}
1109 	return false;
1110 }
1111 
1112 /*
1113  * Iterate list of requests and see if we can merge this bio with any
1114  * of them.
1115  */
1116 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1117 			struct bio *bio, unsigned int nr_segs)
1118 {
1119 	struct request *rq;
1120 	int checked = 8;
1121 
1122 	list_for_each_entry_reverse(rq, list, queuelist) {
1123 		if (!checked--)
1124 			break;
1125 
1126 		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1127 		case BIO_MERGE_NONE:
1128 			continue;
1129 		case BIO_MERGE_OK:
1130 			return true;
1131 		case BIO_MERGE_FAILED:
1132 			return false;
1133 		}
1134 
1135 	}
1136 
1137 	return false;
1138 }
1139 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1140 
1141 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1142 		unsigned int nr_segs, struct request **merged_request)
1143 {
1144 	struct request *rq;
1145 
1146 	switch (elv_merge(q, &rq, bio)) {
1147 	case ELEVATOR_BACK_MERGE:
1148 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1149 			return false;
1150 		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1151 			return false;
1152 		*merged_request = attempt_back_merge(q, rq);
1153 		if (!*merged_request)
1154 			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1155 		return true;
1156 	case ELEVATOR_FRONT_MERGE:
1157 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1158 			return false;
1159 		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1160 			return false;
1161 		*merged_request = attempt_front_merge(q, rq);
1162 		if (!*merged_request)
1163 			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1164 		return true;
1165 	case ELEVATOR_DISCARD_MERGE:
1166 		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1167 	default:
1168 		return false;
1169 	}
1170 }
1171 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1172