xref: /linux/block/blk-merge.c (revision 5b026e34120766408e76ba19a0e33a9dc996f9f0)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to segment and merge handling
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/blk-integrity.h>
10 #include <linux/scatterlist.h>
11 #include <linux/part_stat.h>
12 #include <linux/blk-cgroup.h>
13 
14 #include <trace/events/block.h>
15 
16 #include "blk.h"
17 #include "blk-mq-sched.h"
18 #include "blk-rq-qos.h"
19 #include "blk-throttle.h"
20 
21 static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv)
22 {
23 	*bv = mp_bvec_iter_bvec(bio->bi_io_vec, bio->bi_iter);
24 }
25 
26 static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv)
27 {
28 	struct bvec_iter iter = bio->bi_iter;
29 	int idx;
30 
31 	bio_get_first_bvec(bio, bv);
32 	if (bv->bv_len == bio->bi_iter.bi_size)
33 		return;		/* this bio only has a single bvec */
34 
35 	bio_advance_iter(bio, &iter, iter.bi_size);
36 
37 	if (!iter.bi_bvec_done)
38 		idx = iter.bi_idx - 1;
39 	else	/* in the middle of bvec */
40 		idx = iter.bi_idx;
41 
42 	*bv = bio->bi_io_vec[idx];
43 
44 	/*
45 	 * iter.bi_bvec_done records actual length of the last bvec
46 	 * if this bio ends in the middle of one io vector
47 	 */
48 	if (iter.bi_bvec_done)
49 		bv->bv_len = iter.bi_bvec_done;
50 }
51 
52 static inline bool bio_will_gap(struct request_queue *q,
53 		struct request *prev_rq, struct bio *prev, struct bio *next)
54 {
55 	struct bio_vec pb, nb;
56 
57 	if (!bio_has_data(prev) || !queue_virt_boundary(q))
58 		return false;
59 
60 	/*
61 	 * Don't merge if the 1st bio starts with non-zero offset, otherwise it
62 	 * is quite difficult to respect the sg gap limit.  We work hard to
63 	 * merge a huge number of small single bios in case of mkfs.
64 	 */
65 	if (prev_rq)
66 		bio_get_first_bvec(prev_rq->bio, &pb);
67 	else
68 		bio_get_first_bvec(prev, &pb);
69 	if (pb.bv_offset & queue_virt_boundary(q))
70 		return true;
71 
72 	/*
73 	 * We don't need to worry about the situation that the merged segment
74 	 * ends in unaligned virt boundary:
75 	 *
76 	 * - if 'pb' ends aligned, the merged segment ends aligned
77 	 * - if 'pb' ends unaligned, the next bio must include
78 	 *   one single bvec of 'nb', otherwise the 'nb' can't
79 	 *   merge with 'pb'
80 	 */
81 	bio_get_last_bvec(prev, &pb);
82 	bio_get_first_bvec(next, &nb);
83 	if (biovec_phys_mergeable(q, &pb, &nb))
84 		return false;
85 	return __bvec_gap_to_prev(&q->limits, &pb, nb.bv_offset);
86 }
87 
88 static inline bool req_gap_back_merge(struct request *req, struct bio *bio)
89 {
90 	return bio_will_gap(req->q, req, req->biotail, bio);
91 }
92 
93 static inline bool req_gap_front_merge(struct request *req, struct bio *bio)
94 {
95 	return bio_will_gap(req->q, NULL, bio, req->bio);
96 }
97 
98 /*
99  * The max size one bio can handle is UINT_MAX becasue bvec_iter.bi_size
100  * is defined as 'unsigned int', meantime it has to be aligned to with the
101  * logical block size, which is the minimum accepted unit by hardware.
102  */
103 static unsigned int bio_allowed_max_sectors(const struct queue_limits *lim)
104 {
105 	return round_down(UINT_MAX, lim->logical_block_size) >> SECTOR_SHIFT;
106 }
107 
108 static struct bio *bio_split_discard(struct bio *bio,
109 				     const struct queue_limits *lim,
110 				     unsigned *nsegs, struct bio_set *bs)
111 {
112 	unsigned int max_discard_sectors, granularity;
113 	sector_t tmp;
114 	unsigned split_sectors;
115 
116 	*nsegs = 1;
117 
118 	granularity = max(lim->discard_granularity >> 9, 1U);
119 
120 	max_discard_sectors =
121 		min(lim->max_discard_sectors, bio_allowed_max_sectors(lim));
122 	max_discard_sectors -= max_discard_sectors % granularity;
123 	if (unlikely(!max_discard_sectors))
124 		return NULL;
125 
126 	if (bio_sectors(bio) <= max_discard_sectors)
127 		return NULL;
128 
129 	split_sectors = max_discard_sectors;
130 
131 	/*
132 	 * If the next starting sector would be misaligned, stop the discard at
133 	 * the previous aligned sector.
134 	 */
135 	tmp = bio->bi_iter.bi_sector + split_sectors -
136 		((lim->discard_alignment >> 9) % granularity);
137 	tmp = sector_div(tmp, granularity);
138 
139 	if (split_sectors > tmp)
140 		split_sectors -= tmp;
141 
142 	return bio_split(bio, split_sectors, GFP_NOIO, bs);
143 }
144 
145 static struct bio *bio_split_write_zeroes(struct bio *bio,
146 					  const struct queue_limits *lim,
147 					  unsigned *nsegs, struct bio_set *bs)
148 {
149 	*nsegs = 0;
150 	if (!lim->max_write_zeroes_sectors)
151 		return NULL;
152 	if (bio_sectors(bio) <= lim->max_write_zeroes_sectors)
153 		return NULL;
154 	return bio_split(bio, lim->max_write_zeroes_sectors, GFP_NOIO, bs);
155 }
156 
157 static inline unsigned int blk_boundary_sectors(const struct queue_limits *lim,
158 						bool is_atomic)
159 {
160 	/*
161 	 * chunk_sectors must be a multiple of atomic_write_boundary_sectors if
162 	 * both non-zero.
163 	 */
164 	if (is_atomic && lim->atomic_write_boundary_sectors)
165 		return lim->atomic_write_boundary_sectors;
166 
167 	return lim->chunk_sectors;
168 }
169 
170 /*
171  * Return the maximum number of sectors from the start of a bio that may be
172  * submitted as a single request to a block device. If enough sectors remain,
173  * align the end to the physical block size. Otherwise align the end to the
174  * logical block size. This approach minimizes the number of non-aligned
175  * requests that are submitted to a block device if the start of a bio is not
176  * aligned to a physical block boundary.
177  */
178 static inline unsigned get_max_io_size(struct bio *bio,
179 				       const struct queue_limits *lim)
180 {
181 	unsigned pbs = lim->physical_block_size >> SECTOR_SHIFT;
182 	unsigned lbs = lim->logical_block_size >> SECTOR_SHIFT;
183 	bool is_atomic = bio->bi_opf & REQ_ATOMIC;
184 	unsigned boundary_sectors = blk_boundary_sectors(lim, is_atomic);
185 	unsigned max_sectors, start, end;
186 
187 	/*
188 	 * We ignore lim->max_sectors for atomic writes because it may less
189 	 * than the actual bio size, which we cannot tolerate.
190 	 */
191 	if (is_atomic)
192 		max_sectors = lim->atomic_write_max_sectors;
193 	else
194 		max_sectors = lim->max_sectors;
195 
196 	if (boundary_sectors) {
197 		max_sectors = min(max_sectors,
198 			blk_boundary_sectors_left(bio->bi_iter.bi_sector,
199 					      boundary_sectors));
200 	}
201 
202 	start = bio->bi_iter.bi_sector & (pbs - 1);
203 	end = (start + max_sectors) & ~(pbs - 1);
204 	if (end > start)
205 		return end - start;
206 	return max_sectors & ~(lbs - 1);
207 }
208 
209 /**
210  * get_max_segment_size() - maximum number of bytes to add as a single segment
211  * @lim: Request queue limits.
212  * @start_page: See below.
213  * @offset: Offset from @start_page where to add a segment.
214  *
215  * Returns the maximum number of bytes that can be added as a single segment.
216  */
217 static inline unsigned get_max_segment_size(const struct queue_limits *lim,
218 		struct page *start_page, unsigned long offset)
219 {
220 	unsigned long mask = lim->seg_boundary_mask;
221 
222 	offset = mask & (page_to_phys(start_page) + offset);
223 
224 	/*
225 	 * Prevent an overflow if mask = ULONG_MAX and offset = 0 by adding 1
226 	 * after having calculated the minimum.
227 	 */
228 	return min(mask - offset, (unsigned long)lim->max_segment_size - 1) + 1;
229 }
230 
231 /**
232  * bvec_split_segs - verify whether or not a bvec should be split in the middle
233  * @lim:      [in] queue limits to split based on
234  * @bv:       [in] bvec to examine
235  * @nsegs:    [in,out] Number of segments in the bio being built. Incremented
236  *            by the number of segments from @bv that may be appended to that
237  *            bio without exceeding @max_segs
238  * @bytes:    [in,out] Number of bytes in the bio being built. Incremented
239  *            by the number of bytes from @bv that may be appended to that
240  *            bio without exceeding @max_bytes
241  * @max_segs: [in] upper bound for *@nsegs
242  * @max_bytes: [in] upper bound for *@bytes
243  *
244  * When splitting a bio, it can happen that a bvec is encountered that is too
245  * big to fit in a single segment and hence that it has to be split in the
246  * middle. This function verifies whether or not that should happen. The value
247  * %true is returned if and only if appending the entire @bv to a bio with
248  * *@nsegs segments and *@sectors sectors would make that bio unacceptable for
249  * the block driver.
250  */
251 static bool bvec_split_segs(const struct queue_limits *lim,
252 		const struct bio_vec *bv, unsigned *nsegs, unsigned *bytes,
253 		unsigned max_segs, unsigned max_bytes)
254 {
255 	unsigned max_len = min(max_bytes, UINT_MAX) - *bytes;
256 	unsigned len = min(bv->bv_len, max_len);
257 	unsigned total_len = 0;
258 	unsigned seg_size = 0;
259 
260 	while (len && *nsegs < max_segs) {
261 		seg_size = get_max_segment_size(lim, bv->bv_page,
262 						bv->bv_offset + total_len);
263 		seg_size = min(seg_size, len);
264 
265 		(*nsegs)++;
266 		total_len += seg_size;
267 		len -= seg_size;
268 
269 		if ((bv->bv_offset + total_len) & lim->virt_boundary_mask)
270 			break;
271 	}
272 
273 	*bytes += total_len;
274 
275 	/* tell the caller to split the bvec if it is too big to fit */
276 	return len > 0 || bv->bv_len > max_len;
277 }
278 
279 /**
280  * bio_split_rw - split a bio in two bios
281  * @bio:  [in] bio to be split
282  * @lim:  [in] queue limits to split based on
283  * @segs: [out] number of segments in the bio with the first half of the sectors
284  * @bs:	  [in] bio set to allocate the clone from
285  * @max_bytes: [in] maximum number of bytes per bio
286  *
287  * Clone @bio, update the bi_iter of the clone to represent the first sectors
288  * of @bio and update @bio->bi_iter to represent the remaining sectors. The
289  * following is guaranteed for the cloned bio:
290  * - That it has at most @max_bytes worth of data
291  * - That it has at most queue_max_segments(@q) segments.
292  *
293  * Except for discard requests the cloned bio will point at the bi_io_vec of
294  * the original bio. It is the responsibility of the caller to ensure that the
295  * original bio is not freed before the cloned bio. The caller is also
296  * responsible for ensuring that @bs is only destroyed after processing of the
297  * split bio has finished.
298  */
299 struct bio *bio_split_rw(struct bio *bio, const struct queue_limits *lim,
300 		unsigned *segs, struct bio_set *bs, unsigned max_bytes)
301 {
302 	struct bio_vec bv, bvprv, *bvprvp = NULL;
303 	struct bvec_iter iter;
304 	unsigned nsegs = 0, bytes = 0;
305 
306 	bio_for_each_bvec(bv, bio, iter) {
307 		/*
308 		 * If the queue doesn't support SG gaps and adding this
309 		 * offset would create a gap, disallow it.
310 		 */
311 		if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv.bv_offset))
312 			goto split;
313 
314 		if (nsegs < lim->max_segments &&
315 		    bytes + bv.bv_len <= max_bytes &&
316 		    bv.bv_offset + bv.bv_len <= PAGE_SIZE) {
317 			nsegs++;
318 			bytes += bv.bv_len;
319 		} else {
320 			if (bvec_split_segs(lim, &bv, &nsegs, &bytes,
321 					lim->max_segments, max_bytes))
322 				goto split;
323 		}
324 
325 		bvprv = bv;
326 		bvprvp = &bvprv;
327 	}
328 
329 	*segs = nsegs;
330 	return NULL;
331 split:
332 	if (bio->bi_opf & REQ_ATOMIC) {
333 		bio->bi_status = BLK_STS_INVAL;
334 		bio_endio(bio);
335 		return ERR_PTR(-EINVAL);
336 	}
337 	/*
338 	 * We can't sanely support splitting for a REQ_NOWAIT bio. End it
339 	 * with EAGAIN if splitting is required and return an error pointer.
340 	 */
341 	if (bio->bi_opf & REQ_NOWAIT) {
342 		bio->bi_status = BLK_STS_AGAIN;
343 		bio_endio(bio);
344 		return ERR_PTR(-EAGAIN);
345 	}
346 
347 	*segs = nsegs;
348 
349 	/*
350 	 * Individual bvecs might not be logical block aligned. Round down the
351 	 * split size so that each bio is properly block size aligned, even if
352 	 * we do not use the full hardware limits.
353 	 */
354 	bytes = ALIGN_DOWN(bytes, lim->logical_block_size);
355 
356 	/*
357 	 * Bio splitting may cause subtle trouble such as hang when doing sync
358 	 * iopoll in direct IO routine. Given performance gain of iopoll for
359 	 * big IO can be trival, disable iopoll when split needed.
360 	 */
361 	bio_clear_polled(bio);
362 	return bio_split(bio, bytes >> SECTOR_SHIFT, GFP_NOIO, bs);
363 }
364 EXPORT_SYMBOL_GPL(bio_split_rw);
365 
366 /**
367  * __bio_split_to_limits - split a bio to fit the queue limits
368  * @bio:     bio to be split
369  * @lim:     queue limits to split based on
370  * @nr_segs: returns the number of segments in the returned bio
371  *
372  * Check if @bio needs splitting based on the queue limits, and if so split off
373  * a bio fitting the limits from the beginning of @bio and return it.  @bio is
374  * shortened to the remainder and re-submitted.
375  *
376  * The split bio is allocated from @q->bio_split, which is provided by the
377  * block layer.
378  */
379 struct bio *__bio_split_to_limits(struct bio *bio,
380 				  const struct queue_limits *lim,
381 				  unsigned int *nr_segs)
382 {
383 	struct bio_set *bs = &bio->bi_bdev->bd_disk->bio_split;
384 	struct bio *split;
385 
386 	switch (bio_op(bio)) {
387 	case REQ_OP_DISCARD:
388 	case REQ_OP_SECURE_ERASE:
389 		split = bio_split_discard(bio, lim, nr_segs, bs);
390 		break;
391 	case REQ_OP_WRITE_ZEROES:
392 		split = bio_split_write_zeroes(bio, lim, nr_segs, bs);
393 		break;
394 	default:
395 		split = bio_split_rw(bio, lim, nr_segs, bs,
396 				get_max_io_size(bio, lim) << SECTOR_SHIFT);
397 		if (IS_ERR(split))
398 			return NULL;
399 		break;
400 	}
401 
402 	if (split) {
403 		/* there isn't chance to merge the split bio */
404 		split->bi_opf |= REQ_NOMERGE;
405 
406 		blkcg_bio_issue_init(split);
407 		bio_chain(split, bio);
408 		trace_block_split(split, bio->bi_iter.bi_sector);
409 		WARN_ON_ONCE(bio_zone_write_plugging(bio));
410 		submit_bio_noacct(bio);
411 		return split;
412 	}
413 	return bio;
414 }
415 
416 /**
417  * bio_split_to_limits - split a bio to fit the queue limits
418  * @bio:     bio to be split
419  *
420  * Check if @bio needs splitting based on the queue limits of @bio->bi_bdev, and
421  * if so split off a bio fitting the limits from the beginning of @bio and
422  * return it.  @bio is shortened to the remainder and re-submitted.
423  *
424  * The split bio is allocated from @q->bio_split, which is provided by the
425  * block layer.
426  */
427 struct bio *bio_split_to_limits(struct bio *bio)
428 {
429 	const struct queue_limits *lim = &bdev_get_queue(bio->bi_bdev)->limits;
430 	unsigned int nr_segs;
431 
432 	if (bio_may_exceed_limits(bio, lim))
433 		return __bio_split_to_limits(bio, lim, &nr_segs);
434 	return bio;
435 }
436 EXPORT_SYMBOL(bio_split_to_limits);
437 
438 unsigned int blk_recalc_rq_segments(struct request *rq)
439 {
440 	unsigned int nr_phys_segs = 0;
441 	unsigned int bytes = 0;
442 	struct req_iterator iter;
443 	struct bio_vec bv;
444 
445 	if (!rq->bio)
446 		return 0;
447 
448 	switch (bio_op(rq->bio)) {
449 	case REQ_OP_DISCARD:
450 	case REQ_OP_SECURE_ERASE:
451 		if (queue_max_discard_segments(rq->q) > 1) {
452 			struct bio *bio = rq->bio;
453 
454 			for_each_bio(bio)
455 				nr_phys_segs++;
456 			return nr_phys_segs;
457 		}
458 		return 1;
459 	case REQ_OP_WRITE_ZEROES:
460 		return 0;
461 	default:
462 		break;
463 	}
464 
465 	rq_for_each_bvec(bv, rq, iter)
466 		bvec_split_segs(&rq->q->limits, &bv, &nr_phys_segs, &bytes,
467 				UINT_MAX, UINT_MAX);
468 	return nr_phys_segs;
469 }
470 
471 static inline struct scatterlist *blk_next_sg(struct scatterlist **sg,
472 		struct scatterlist *sglist)
473 {
474 	if (!*sg)
475 		return sglist;
476 
477 	/*
478 	 * If the driver previously mapped a shorter list, we could see a
479 	 * termination bit prematurely unless it fully inits the sg table
480 	 * on each mapping. We KNOW that there must be more entries here
481 	 * or the driver would be buggy, so force clear the termination bit
482 	 * to avoid doing a full sg_init_table() in drivers for each command.
483 	 */
484 	sg_unmark_end(*sg);
485 	return sg_next(*sg);
486 }
487 
488 static unsigned blk_bvec_map_sg(struct request_queue *q,
489 		struct bio_vec *bvec, struct scatterlist *sglist,
490 		struct scatterlist **sg)
491 {
492 	unsigned nbytes = bvec->bv_len;
493 	unsigned nsegs = 0, total = 0;
494 
495 	while (nbytes > 0) {
496 		unsigned offset = bvec->bv_offset + total;
497 		unsigned len = min(get_max_segment_size(&q->limits,
498 				   bvec->bv_page, offset), nbytes);
499 		struct page *page = bvec->bv_page;
500 
501 		/*
502 		 * Unfortunately a fair number of drivers barf on scatterlists
503 		 * that have an offset larger than PAGE_SIZE, despite other
504 		 * subsystems dealing with that invariant just fine.  For now
505 		 * stick to the legacy format where we never present those from
506 		 * the block layer, but the code below should be removed once
507 		 * these offenders (mostly MMC/SD drivers) are fixed.
508 		 */
509 		page += (offset >> PAGE_SHIFT);
510 		offset &= ~PAGE_MASK;
511 
512 		*sg = blk_next_sg(sg, sglist);
513 		sg_set_page(*sg, page, len, offset);
514 
515 		total += len;
516 		nbytes -= len;
517 		nsegs++;
518 	}
519 
520 	return nsegs;
521 }
522 
523 static inline int __blk_bvec_map_sg(struct bio_vec bv,
524 		struct scatterlist *sglist, struct scatterlist **sg)
525 {
526 	*sg = blk_next_sg(sg, sglist);
527 	sg_set_page(*sg, bv.bv_page, bv.bv_len, bv.bv_offset);
528 	return 1;
529 }
530 
531 /* only try to merge bvecs into one sg if they are from two bios */
532 static inline bool
533 __blk_segment_map_sg_merge(struct request_queue *q, struct bio_vec *bvec,
534 			   struct bio_vec *bvprv, struct scatterlist **sg)
535 {
536 
537 	int nbytes = bvec->bv_len;
538 
539 	if (!*sg)
540 		return false;
541 
542 	if ((*sg)->length + nbytes > queue_max_segment_size(q))
543 		return false;
544 
545 	if (!biovec_phys_mergeable(q, bvprv, bvec))
546 		return false;
547 
548 	(*sg)->length += nbytes;
549 
550 	return true;
551 }
552 
553 static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
554 			     struct scatterlist *sglist,
555 			     struct scatterlist **sg)
556 {
557 	struct bio_vec bvec, bvprv = { NULL };
558 	struct bvec_iter iter;
559 	int nsegs = 0;
560 	bool new_bio = false;
561 
562 	for_each_bio(bio) {
563 		bio_for_each_bvec(bvec, bio, iter) {
564 			/*
565 			 * Only try to merge bvecs from two bios given we
566 			 * have done bio internal merge when adding pages
567 			 * to bio
568 			 */
569 			if (new_bio &&
570 			    __blk_segment_map_sg_merge(q, &bvec, &bvprv, sg))
571 				goto next_bvec;
572 
573 			if (bvec.bv_offset + bvec.bv_len <= PAGE_SIZE)
574 				nsegs += __blk_bvec_map_sg(bvec, sglist, sg);
575 			else
576 				nsegs += blk_bvec_map_sg(q, &bvec, sglist, sg);
577  next_bvec:
578 			new_bio = false;
579 		}
580 		if (likely(bio->bi_iter.bi_size)) {
581 			bvprv = bvec;
582 			new_bio = true;
583 		}
584 	}
585 
586 	return nsegs;
587 }
588 
589 /*
590  * map a request to scatterlist, return number of sg entries setup. Caller
591  * must make sure sg can hold rq->nr_phys_segments entries
592  */
593 int __blk_rq_map_sg(struct request_queue *q, struct request *rq,
594 		struct scatterlist *sglist, struct scatterlist **last_sg)
595 {
596 	int nsegs = 0;
597 
598 	if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
599 		nsegs = __blk_bvec_map_sg(rq->special_vec, sglist, last_sg);
600 	else if (rq->bio)
601 		nsegs = __blk_bios_map_sg(q, rq->bio, sglist, last_sg);
602 
603 	if (*last_sg)
604 		sg_mark_end(*last_sg);
605 
606 	/*
607 	 * Something must have been wrong if the figured number of
608 	 * segment is bigger than number of req's physical segments
609 	 */
610 	WARN_ON(nsegs > blk_rq_nr_phys_segments(rq));
611 
612 	return nsegs;
613 }
614 EXPORT_SYMBOL(__blk_rq_map_sg);
615 
616 static inline unsigned int blk_rq_get_max_sectors(struct request *rq,
617 						  sector_t offset)
618 {
619 	struct request_queue *q = rq->q;
620 	struct queue_limits *lim = &q->limits;
621 	unsigned int max_sectors, boundary_sectors;
622 	bool is_atomic = rq->cmd_flags & REQ_ATOMIC;
623 
624 	if (blk_rq_is_passthrough(rq))
625 		return q->limits.max_hw_sectors;
626 
627 	boundary_sectors = blk_boundary_sectors(lim, is_atomic);
628 	max_sectors = blk_queue_get_max_sectors(rq);
629 
630 	if (!boundary_sectors ||
631 	    req_op(rq) == REQ_OP_DISCARD ||
632 	    req_op(rq) == REQ_OP_SECURE_ERASE)
633 		return max_sectors;
634 	return min(max_sectors,
635 		   blk_boundary_sectors_left(offset, boundary_sectors));
636 }
637 
638 static inline int ll_new_hw_segment(struct request *req, struct bio *bio,
639 		unsigned int nr_phys_segs)
640 {
641 	if (!blk_cgroup_mergeable(req, bio))
642 		goto no_merge;
643 
644 	if (blk_integrity_merge_bio(req->q, req, bio) == false)
645 		goto no_merge;
646 
647 	/* discard request merge won't add new segment */
648 	if (req_op(req) == REQ_OP_DISCARD)
649 		return 1;
650 
651 	if (req->nr_phys_segments + nr_phys_segs > blk_rq_get_max_segments(req))
652 		goto no_merge;
653 
654 	/*
655 	 * This will form the start of a new hw segment.  Bump both
656 	 * counters.
657 	 */
658 	req->nr_phys_segments += nr_phys_segs;
659 	return 1;
660 
661 no_merge:
662 	req_set_nomerge(req->q, req);
663 	return 0;
664 }
665 
666 int ll_back_merge_fn(struct request *req, struct bio *bio, unsigned int nr_segs)
667 {
668 	if (req_gap_back_merge(req, bio))
669 		return 0;
670 	if (blk_integrity_rq(req) &&
671 	    integrity_req_gap_back_merge(req, bio))
672 		return 0;
673 	if (!bio_crypt_ctx_back_mergeable(req, bio))
674 		return 0;
675 	if (blk_rq_sectors(req) + bio_sectors(bio) >
676 	    blk_rq_get_max_sectors(req, blk_rq_pos(req))) {
677 		req_set_nomerge(req->q, req);
678 		return 0;
679 	}
680 
681 	return ll_new_hw_segment(req, bio, nr_segs);
682 }
683 
684 static int ll_front_merge_fn(struct request *req, struct bio *bio,
685 		unsigned int nr_segs)
686 {
687 	if (req_gap_front_merge(req, bio))
688 		return 0;
689 	if (blk_integrity_rq(req) &&
690 	    integrity_req_gap_front_merge(req, bio))
691 		return 0;
692 	if (!bio_crypt_ctx_front_mergeable(req, bio))
693 		return 0;
694 	if (blk_rq_sectors(req) + bio_sectors(bio) >
695 	    blk_rq_get_max_sectors(req, bio->bi_iter.bi_sector)) {
696 		req_set_nomerge(req->q, req);
697 		return 0;
698 	}
699 
700 	return ll_new_hw_segment(req, bio, nr_segs);
701 }
702 
703 static bool req_attempt_discard_merge(struct request_queue *q, struct request *req,
704 		struct request *next)
705 {
706 	unsigned short segments = blk_rq_nr_discard_segments(req);
707 
708 	if (segments >= queue_max_discard_segments(q))
709 		goto no_merge;
710 	if (blk_rq_sectors(req) + bio_sectors(next->bio) >
711 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
712 		goto no_merge;
713 
714 	req->nr_phys_segments = segments + blk_rq_nr_discard_segments(next);
715 	return true;
716 no_merge:
717 	req_set_nomerge(q, req);
718 	return false;
719 }
720 
721 static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
722 				struct request *next)
723 {
724 	int total_phys_segments;
725 
726 	if (req_gap_back_merge(req, next->bio))
727 		return 0;
728 
729 	/*
730 	 * Will it become too large?
731 	 */
732 	if ((blk_rq_sectors(req) + blk_rq_sectors(next)) >
733 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
734 		return 0;
735 
736 	total_phys_segments = req->nr_phys_segments + next->nr_phys_segments;
737 	if (total_phys_segments > blk_rq_get_max_segments(req))
738 		return 0;
739 
740 	if (!blk_cgroup_mergeable(req, next->bio))
741 		return 0;
742 
743 	if (blk_integrity_merge_rq(q, req, next) == false)
744 		return 0;
745 
746 	if (!bio_crypt_ctx_merge_rq(req, next))
747 		return 0;
748 
749 	/* Merge is OK... */
750 	req->nr_phys_segments = total_phys_segments;
751 	return 1;
752 }
753 
754 /**
755  * blk_rq_set_mixed_merge - mark a request as mixed merge
756  * @rq: request to mark as mixed merge
757  *
758  * Description:
759  *     @rq is about to be mixed merged.  Make sure the attributes
760  *     which can be mixed are set in each bio and mark @rq as mixed
761  *     merged.
762  */
763 static void blk_rq_set_mixed_merge(struct request *rq)
764 {
765 	blk_opf_t ff = rq->cmd_flags & REQ_FAILFAST_MASK;
766 	struct bio *bio;
767 
768 	if (rq->rq_flags & RQF_MIXED_MERGE)
769 		return;
770 
771 	/*
772 	 * @rq will no longer represent mixable attributes for all the
773 	 * contained bios.  It will just track those of the first one.
774 	 * Distributes the attributs to each bio.
775 	 */
776 	for (bio = rq->bio; bio; bio = bio->bi_next) {
777 		WARN_ON_ONCE((bio->bi_opf & REQ_FAILFAST_MASK) &&
778 			     (bio->bi_opf & REQ_FAILFAST_MASK) != ff);
779 		bio->bi_opf |= ff;
780 	}
781 	rq->rq_flags |= RQF_MIXED_MERGE;
782 }
783 
784 static inline blk_opf_t bio_failfast(const struct bio *bio)
785 {
786 	if (bio->bi_opf & REQ_RAHEAD)
787 		return REQ_FAILFAST_MASK;
788 
789 	return bio->bi_opf & REQ_FAILFAST_MASK;
790 }
791 
792 /*
793  * After we are marked as MIXED_MERGE, any new RA bio has to be updated
794  * as failfast, and request's failfast has to be updated in case of
795  * front merge.
796  */
797 static inline void blk_update_mixed_merge(struct request *req,
798 		struct bio *bio, bool front_merge)
799 {
800 	if (req->rq_flags & RQF_MIXED_MERGE) {
801 		if (bio->bi_opf & REQ_RAHEAD)
802 			bio->bi_opf |= REQ_FAILFAST_MASK;
803 
804 		if (front_merge) {
805 			req->cmd_flags &= ~REQ_FAILFAST_MASK;
806 			req->cmd_flags |= bio->bi_opf & REQ_FAILFAST_MASK;
807 		}
808 	}
809 }
810 
811 static void blk_account_io_merge_request(struct request *req)
812 {
813 	if (blk_do_io_stat(req)) {
814 		part_stat_lock();
815 		part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
816 		part_stat_local_dec(req->part,
817 				    in_flight[op_is_write(req_op(req))]);
818 		part_stat_unlock();
819 	}
820 }
821 
822 static enum elv_merge blk_try_req_merge(struct request *req,
823 					struct request *next)
824 {
825 	if (blk_discard_mergable(req))
826 		return ELEVATOR_DISCARD_MERGE;
827 	else if (blk_rq_pos(req) + blk_rq_sectors(req) == blk_rq_pos(next))
828 		return ELEVATOR_BACK_MERGE;
829 
830 	return ELEVATOR_NO_MERGE;
831 }
832 
833 static bool blk_atomic_write_mergeable_rq_bio(struct request *rq,
834 					      struct bio *bio)
835 {
836 	return (rq->cmd_flags & REQ_ATOMIC) == (bio->bi_opf & REQ_ATOMIC);
837 }
838 
839 static bool blk_atomic_write_mergeable_rqs(struct request *rq,
840 					   struct request *next)
841 {
842 	return (rq->cmd_flags & REQ_ATOMIC) == (next->cmd_flags & REQ_ATOMIC);
843 }
844 
845 /*
846  * For non-mq, this has to be called with the request spinlock acquired.
847  * For mq with scheduling, the appropriate queue wide lock should be held.
848  */
849 static struct request *attempt_merge(struct request_queue *q,
850 				     struct request *req, struct request *next)
851 {
852 	if (!rq_mergeable(req) || !rq_mergeable(next))
853 		return NULL;
854 
855 	if (req_op(req) != req_op(next))
856 		return NULL;
857 
858 	if (rq_data_dir(req) != rq_data_dir(next))
859 		return NULL;
860 
861 	/* Don't merge requests with different write hints. */
862 	if (req->write_hint != next->write_hint)
863 		return NULL;
864 
865 	if (req->ioprio != next->ioprio)
866 		return NULL;
867 
868 	if (!blk_atomic_write_mergeable_rqs(req, next))
869 		return NULL;
870 
871 	/*
872 	 * If we are allowed to merge, then append bio list
873 	 * from next to rq and release next. merge_requests_fn
874 	 * will have updated segment counts, update sector
875 	 * counts here. Handle DISCARDs separately, as they
876 	 * have separate settings.
877 	 */
878 
879 	switch (blk_try_req_merge(req, next)) {
880 	case ELEVATOR_DISCARD_MERGE:
881 		if (!req_attempt_discard_merge(q, req, next))
882 			return NULL;
883 		break;
884 	case ELEVATOR_BACK_MERGE:
885 		if (!ll_merge_requests_fn(q, req, next))
886 			return NULL;
887 		break;
888 	default:
889 		return NULL;
890 	}
891 
892 	/*
893 	 * If failfast settings disagree or any of the two is already
894 	 * a mixed merge, mark both as mixed before proceeding.  This
895 	 * makes sure that all involved bios have mixable attributes
896 	 * set properly.
897 	 */
898 	if (((req->rq_flags | next->rq_flags) & RQF_MIXED_MERGE) ||
899 	    (req->cmd_flags & REQ_FAILFAST_MASK) !=
900 	    (next->cmd_flags & REQ_FAILFAST_MASK)) {
901 		blk_rq_set_mixed_merge(req);
902 		blk_rq_set_mixed_merge(next);
903 	}
904 
905 	/*
906 	 * At this point we have either done a back merge or front merge. We
907 	 * need the smaller start_time_ns of the merged requests to be the
908 	 * current request for accounting purposes.
909 	 */
910 	if (next->start_time_ns < req->start_time_ns)
911 		req->start_time_ns = next->start_time_ns;
912 
913 	req->biotail->bi_next = next->bio;
914 	req->biotail = next->biotail;
915 
916 	req->__data_len += blk_rq_bytes(next);
917 
918 	if (!blk_discard_mergable(req))
919 		elv_merge_requests(q, req, next);
920 
921 	blk_crypto_rq_put_keyslot(next);
922 
923 	/*
924 	 * 'next' is going away, so update stats accordingly
925 	 */
926 	blk_account_io_merge_request(next);
927 
928 	trace_block_rq_merge(next);
929 
930 	/*
931 	 * ownership of bio passed from next to req, return 'next' for
932 	 * the caller to free
933 	 */
934 	next->bio = NULL;
935 	return next;
936 }
937 
938 static struct request *attempt_back_merge(struct request_queue *q,
939 		struct request *rq)
940 {
941 	struct request *next = elv_latter_request(q, rq);
942 
943 	if (next)
944 		return attempt_merge(q, rq, next);
945 
946 	return NULL;
947 }
948 
949 static struct request *attempt_front_merge(struct request_queue *q,
950 		struct request *rq)
951 {
952 	struct request *prev = elv_former_request(q, rq);
953 
954 	if (prev)
955 		return attempt_merge(q, prev, rq);
956 
957 	return NULL;
958 }
959 
960 /*
961  * Try to merge 'next' into 'rq'. Return true if the merge happened, false
962  * otherwise. The caller is responsible for freeing 'next' if the merge
963  * happened.
964  */
965 bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
966 			   struct request *next)
967 {
968 	return attempt_merge(q, rq, next);
969 }
970 
971 bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
972 {
973 	if (!rq_mergeable(rq) || !bio_mergeable(bio))
974 		return false;
975 
976 	if (req_op(rq) != bio_op(bio))
977 		return false;
978 
979 	/* different data direction or already started, don't merge */
980 	if (bio_data_dir(bio) != rq_data_dir(rq))
981 		return false;
982 
983 	/* don't merge across cgroup boundaries */
984 	if (!blk_cgroup_mergeable(rq, bio))
985 		return false;
986 
987 	/* only merge integrity protected bio into ditto rq */
988 	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
989 		return false;
990 
991 	/* Only merge if the crypt contexts are compatible */
992 	if (!bio_crypt_rq_ctx_compatible(rq, bio))
993 		return false;
994 
995 	/* Don't merge requests with different write hints. */
996 	if (rq->write_hint != bio->bi_write_hint)
997 		return false;
998 
999 	if (rq->ioprio != bio_prio(bio))
1000 		return false;
1001 
1002 	if (blk_atomic_write_mergeable_rq_bio(rq, bio) == false)
1003 		return false;
1004 
1005 	return true;
1006 }
1007 
1008 enum elv_merge blk_try_merge(struct request *rq, struct bio *bio)
1009 {
1010 	if (blk_discard_mergable(rq))
1011 		return ELEVATOR_DISCARD_MERGE;
1012 	else if (blk_rq_pos(rq) + blk_rq_sectors(rq) == bio->bi_iter.bi_sector)
1013 		return ELEVATOR_BACK_MERGE;
1014 	else if (blk_rq_pos(rq) - bio_sectors(bio) == bio->bi_iter.bi_sector)
1015 		return ELEVATOR_FRONT_MERGE;
1016 	return ELEVATOR_NO_MERGE;
1017 }
1018 
1019 static void blk_account_io_merge_bio(struct request *req)
1020 {
1021 	if (!blk_do_io_stat(req))
1022 		return;
1023 
1024 	part_stat_lock();
1025 	part_stat_inc(req->part, merges[op_stat_group(req_op(req))]);
1026 	part_stat_unlock();
1027 }
1028 
1029 enum bio_merge_status bio_attempt_back_merge(struct request *req,
1030 		struct bio *bio, unsigned int nr_segs)
1031 {
1032 	const blk_opf_t ff = bio_failfast(bio);
1033 
1034 	if (!ll_back_merge_fn(req, bio, nr_segs))
1035 		return BIO_MERGE_FAILED;
1036 
1037 	trace_block_bio_backmerge(bio);
1038 	rq_qos_merge(req->q, req, bio);
1039 
1040 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1041 		blk_rq_set_mixed_merge(req);
1042 
1043 	blk_update_mixed_merge(req, bio, false);
1044 
1045 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
1046 		blk_zone_write_plug_bio_merged(bio);
1047 
1048 	req->biotail->bi_next = bio;
1049 	req->biotail = bio;
1050 	req->__data_len += bio->bi_iter.bi_size;
1051 
1052 	bio_crypt_free_ctx(bio);
1053 
1054 	blk_account_io_merge_bio(req);
1055 	return BIO_MERGE_OK;
1056 }
1057 
1058 static enum bio_merge_status bio_attempt_front_merge(struct request *req,
1059 		struct bio *bio, unsigned int nr_segs)
1060 {
1061 	const blk_opf_t ff = bio_failfast(bio);
1062 
1063 	/*
1064 	 * A front merge for writes to sequential zones of a zoned block device
1065 	 * can happen only if the user submitted writes out of order. Do not
1066 	 * merge such write to let it fail.
1067 	 */
1068 	if (req->rq_flags & RQF_ZONE_WRITE_PLUGGING)
1069 		return BIO_MERGE_FAILED;
1070 
1071 	if (!ll_front_merge_fn(req, bio, nr_segs))
1072 		return BIO_MERGE_FAILED;
1073 
1074 	trace_block_bio_frontmerge(bio);
1075 	rq_qos_merge(req->q, req, bio);
1076 
1077 	if ((req->cmd_flags & REQ_FAILFAST_MASK) != ff)
1078 		blk_rq_set_mixed_merge(req);
1079 
1080 	blk_update_mixed_merge(req, bio, true);
1081 
1082 	bio->bi_next = req->bio;
1083 	req->bio = bio;
1084 
1085 	req->__sector = bio->bi_iter.bi_sector;
1086 	req->__data_len += bio->bi_iter.bi_size;
1087 
1088 	bio_crypt_do_front_merge(req, bio);
1089 
1090 	blk_account_io_merge_bio(req);
1091 	return BIO_MERGE_OK;
1092 }
1093 
1094 static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
1095 		struct request *req, struct bio *bio)
1096 {
1097 	unsigned short segments = blk_rq_nr_discard_segments(req);
1098 
1099 	if (segments >= queue_max_discard_segments(q))
1100 		goto no_merge;
1101 	if (blk_rq_sectors(req) + bio_sectors(bio) >
1102 	    blk_rq_get_max_sectors(req, blk_rq_pos(req)))
1103 		goto no_merge;
1104 
1105 	rq_qos_merge(q, req, bio);
1106 
1107 	req->biotail->bi_next = bio;
1108 	req->biotail = bio;
1109 	req->__data_len += bio->bi_iter.bi_size;
1110 	req->nr_phys_segments = segments + 1;
1111 
1112 	blk_account_io_merge_bio(req);
1113 	return BIO_MERGE_OK;
1114 no_merge:
1115 	req_set_nomerge(q, req);
1116 	return BIO_MERGE_FAILED;
1117 }
1118 
1119 static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
1120 						   struct request *rq,
1121 						   struct bio *bio,
1122 						   unsigned int nr_segs,
1123 						   bool sched_allow_merge)
1124 {
1125 	if (!blk_rq_merge_ok(rq, bio))
1126 		return BIO_MERGE_NONE;
1127 
1128 	switch (blk_try_merge(rq, bio)) {
1129 	case ELEVATOR_BACK_MERGE:
1130 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1131 			return bio_attempt_back_merge(rq, bio, nr_segs);
1132 		break;
1133 	case ELEVATOR_FRONT_MERGE:
1134 		if (!sched_allow_merge || blk_mq_sched_allow_merge(q, rq, bio))
1135 			return bio_attempt_front_merge(rq, bio, nr_segs);
1136 		break;
1137 	case ELEVATOR_DISCARD_MERGE:
1138 		return bio_attempt_discard_merge(q, rq, bio);
1139 	default:
1140 		return BIO_MERGE_NONE;
1141 	}
1142 
1143 	return BIO_MERGE_FAILED;
1144 }
1145 
1146 /**
1147  * blk_attempt_plug_merge - try to merge with %current's plugged list
1148  * @q: request_queue new bio is being queued at
1149  * @bio: new bio being queued
1150  * @nr_segs: number of segments in @bio
1151  * from the passed in @q already in the plug list
1152  *
1153  * Determine whether @bio being queued on @q can be merged with the previous
1154  * request on %current's plugged list.  Returns %true if merge was successful,
1155  * otherwise %false.
1156  *
1157  * Plugging coalesces IOs from the same issuer for the same purpose without
1158  * going through @q->queue_lock.  As such it's more of an issuing mechanism
1159  * than scheduling, and the request, while may have elvpriv data, is not
1160  * added on the elevator at this point.  In addition, we don't have
1161  * reliable access to the elevator outside queue lock.  Only check basic
1162  * merging parameters without querying the elevator.
1163  *
1164  * Caller must ensure !blk_queue_nomerges(q) beforehand.
1165  */
1166 bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
1167 		unsigned int nr_segs)
1168 {
1169 	struct blk_plug *plug = current->plug;
1170 	struct request *rq;
1171 
1172 	if (!plug || rq_list_empty(plug->mq_list))
1173 		return false;
1174 
1175 	rq_list_for_each(&plug->mq_list, rq) {
1176 		if (rq->q == q) {
1177 			if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
1178 			    BIO_MERGE_OK)
1179 				return true;
1180 			break;
1181 		}
1182 
1183 		/*
1184 		 * Only keep iterating plug list for merges if we have multiple
1185 		 * queues
1186 		 */
1187 		if (!plug->multiple_queues)
1188 			break;
1189 	}
1190 	return false;
1191 }
1192 
1193 /*
1194  * Iterate list of requests and see if we can merge this bio with any
1195  * of them.
1196  */
1197 bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
1198 			struct bio *bio, unsigned int nr_segs)
1199 {
1200 	struct request *rq;
1201 	int checked = 8;
1202 
1203 	list_for_each_entry_reverse(rq, list, queuelist) {
1204 		if (!checked--)
1205 			break;
1206 
1207 		switch (blk_attempt_bio_merge(q, rq, bio, nr_segs, true)) {
1208 		case BIO_MERGE_NONE:
1209 			continue;
1210 		case BIO_MERGE_OK:
1211 			return true;
1212 		case BIO_MERGE_FAILED:
1213 			return false;
1214 		}
1215 
1216 	}
1217 
1218 	return false;
1219 }
1220 EXPORT_SYMBOL_GPL(blk_bio_list_merge);
1221 
1222 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
1223 		unsigned int nr_segs, struct request **merged_request)
1224 {
1225 	struct request *rq;
1226 
1227 	switch (elv_merge(q, &rq, bio)) {
1228 	case ELEVATOR_BACK_MERGE:
1229 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1230 			return false;
1231 		if (bio_attempt_back_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1232 			return false;
1233 		*merged_request = attempt_back_merge(q, rq);
1234 		if (!*merged_request)
1235 			elv_merged_request(q, rq, ELEVATOR_BACK_MERGE);
1236 		return true;
1237 	case ELEVATOR_FRONT_MERGE:
1238 		if (!blk_mq_sched_allow_merge(q, rq, bio))
1239 			return false;
1240 		if (bio_attempt_front_merge(rq, bio, nr_segs) != BIO_MERGE_OK)
1241 			return false;
1242 		*merged_request = attempt_front_merge(q, rq);
1243 		if (!*merged_request)
1244 			elv_merged_request(q, rq, ELEVATOR_FRONT_MERGE);
1245 		return true;
1246 	case ELEVATOR_DISCARD_MERGE:
1247 		return bio_attempt_discard_merge(q, rq, bio) == BIO_MERGE_OK;
1248 	default:
1249 		return false;
1250 	}
1251 }
1252 EXPORT_SYMBOL_GPL(blk_mq_sched_try_merge);
1253