xref: /freebsd/sys/kern/subr_disk.c (revision 6afd9210909a1a7f6ed5e5a15fc9d1cf815a555e)
19454b2d8SWarner Losh /*-
264de3fddSPedro F. Giffuni  * SPDX-License-Identifier: Beerware
364de3fddSPedro F. Giffuni  *
4da9e4f55SPoul-Henning Kamp  * ----------------------------------------------------------------------------
5da9e4f55SPoul-Henning Kamp  * "THE BEER-WARE LICENSE" (Revision 42):
6da9e4f55SPoul-Henning Kamp  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
7da9e4f55SPoul-Henning Kamp  * can do whatever you want with this stuff. If we meet some day, and you think
8da9e4f55SPoul-Henning Kamp  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9da9e4f55SPoul-Henning Kamp  * ----------------------------------------------------------------------------
10d4619572SLuigi Rizzo  *
11d4619572SLuigi Rizzo  * The bioq_disksort() (and the specification of the bioq API)
12d4619572SLuigi Rizzo  * have been written by Luigi Rizzo and Fabio Checconi under the same
13d4619572SLuigi Rizzo  * license as above.
14da9e4f55SPoul-Henning Kamp  */
15da9e4f55SPoul-Henning Kamp 
16677b542eSDavid E. O'Brien #include <sys/cdefs.h>
17677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
18677b542eSDavid E. O'Brien 
19417fb7f6SPoul-Henning Kamp #include "opt_geom.h"
20417fb7f6SPoul-Henning Kamp 
21da9e4f55SPoul-Henning Kamp #include <sys/param.h>
22da9e4f55SPoul-Henning Kamp #include <sys/systm.h>
239626b608SPoul-Henning Kamp #include <sys/bio.h>
24da9e4f55SPoul-Henning Kamp #include <sys/conf.h>
25da9e4f55SPoul-Henning Kamp #include <sys/disk.h>
26a971acbcSWarner Losh #include <sys/sysctl.h>
2781750927SPoul-Henning Kamp #include <geom/geom_disk.h>
28f90c382cSPoul-Henning Kamp 
29a971acbcSWarner Losh static int bioq_batchsize = 0;
30a971acbcSWarner Losh SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW,
31a971acbcSWarner Losh     &bioq_batchsize, 0, "BIOQ batch size");
32a971acbcSWarner Losh 
331a996ed1SEdward Tomasz Napierala /*-
34f90c382cSPoul-Henning Kamp  * Disk error is the preface to plaintive error messages
35f90c382cSPoul-Henning Kamp  * about failing disk transfers.  It prints messages of the form
36f90c382cSPoul-Henning Kamp  * 	"hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
37f90c382cSPoul-Henning Kamp  * blkdone should be -1 if the position of the error is unknown.
38f90c382cSPoul-Henning Kamp  * The message is printed with printf.
39f90c382cSPoul-Henning Kamp  */
40f90c382cSPoul-Henning Kamp void
41f90c382cSPoul-Henning Kamp disk_err(struct bio *bp, const char *what, int blkdone, int nl)
42f90c382cSPoul-Henning Kamp {
43f90c382cSPoul-Henning Kamp 	daddr_t sn;
44f90c382cSPoul-Henning Kamp 
45a9463ba8SPoul-Henning Kamp 	if (bp->bio_dev != NULL)
46f90c382cSPoul-Henning Kamp 		printf("%s: %s ", devtoname(bp->bio_dev), what);
47a9463ba8SPoul-Henning Kamp 	else if (bp->bio_disk != NULL)
48a9463ba8SPoul-Henning Kamp 		printf("%s%d: %s ",
49a9463ba8SPoul-Henning Kamp 		    bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
50a9463ba8SPoul-Henning Kamp 	else
51a9463ba8SPoul-Henning Kamp 		printf("disk??: %s ", what);
52f90c382cSPoul-Henning Kamp 	switch(bp->bio_cmd) {
53f90c382cSPoul-Henning Kamp 	case BIO_READ:		printf("cmd=read "); break;
54f90c382cSPoul-Henning Kamp 	case BIO_WRITE:		printf("cmd=write "); break;
55f90c382cSPoul-Henning Kamp 	case BIO_DELETE:	printf("cmd=delete "); break;
56f90c382cSPoul-Henning Kamp 	case BIO_GETATTR:	printf("cmd=getattr "); break;
57c3618c65SPawel Jakub Dawidek 	case BIO_FLUSH:		printf("cmd=flush "); break;
58f90c382cSPoul-Henning Kamp 	default:		printf("cmd=%x ", bp->bio_cmd); break;
59f90c382cSPoul-Henning Kamp 	}
601ad9172fSPoul-Henning Kamp 	sn = bp->bio_pblkno;
61f90c382cSPoul-Henning Kamp 	if (bp->bio_bcount <= DEV_BSIZE) {
62f90c382cSPoul-Henning Kamp 		printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
63f90c382cSPoul-Henning Kamp 		return;
64f90c382cSPoul-Henning Kamp 	}
65f90c382cSPoul-Henning Kamp 	if (blkdone >= 0) {
66f90c382cSPoul-Henning Kamp 		sn += blkdone;
67f90c382cSPoul-Henning Kamp 		printf("fsbn %jd of ", (intmax_t)sn);
68f90c382cSPoul-Henning Kamp 	}
691ad9172fSPoul-Henning Kamp 	printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
701ad9172fSPoul-Henning Kamp 	    (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
71f90c382cSPoul-Henning Kamp 	if (nl)
72f90c382cSPoul-Henning Kamp 		printf("\n");
73f90c382cSPoul-Henning Kamp }
742382fb0aSPoul-Henning Kamp 
752382fb0aSPoul-Henning Kamp /*
76d086f85aSPoul-Henning Kamp  * BIO queue implementation
77d4619572SLuigi Rizzo  *
78d4619572SLuigi Rizzo  * Please read carefully the description below before making any change
79d4619572SLuigi Rizzo  * to the code, or you might change the behaviour of the data structure
80d4619572SLuigi Rizzo  * in undesirable ways.
81d4619572SLuigi Rizzo  *
82d4619572SLuigi Rizzo  * A bioq stores disk I/O request (bio), normally sorted according to
83d4619572SLuigi Rizzo  * the distance of the requested position (bio->bio_offset) from the
84d4619572SLuigi Rizzo  * current head position (bioq->last_offset) in the scan direction, i.e.
85d4619572SLuigi Rizzo  *
86d4619572SLuigi Rizzo  * 	(uoff_t)(bio_offset - last_offset)
87d4619572SLuigi Rizzo  *
88d4619572SLuigi Rizzo  * Note that the cast to unsigned (uoff_t) is fundamental to insure
89d4619572SLuigi Rizzo  * that the distance is computed in the scan direction.
90d4619572SLuigi Rizzo  *
91d4619572SLuigi Rizzo  * The main methods for manipulating the bioq are:
92d4619572SLuigi Rizzo  *
93d4619572SLuigi Rizzo  *   bioq_disksort()	performs an ordered insertion;
94d4619572SLuigi Rizzo  *
95d4619572SLuigi Rizzo  *   bioq_first()	return the head of the queue, without removing;
96d4619572SLuigi Rizzo  *
97d4619572SLuigi Rizzo  *   bioq_takefirst()	return and remove the head of the queue,
98d4619572SLuigi Rizzo  *		updating the 'current head position' as
99d4619572SLuigi Rizzo  *		bioq->last_offset = bio->bio_offset + bio->bio_length;
100d4619572SLuigi Rizzo  *
101d4619572SLuigi Rizzo  * When updating the 'current head position', we assume that the result of
102d4619572SLuigi Rizzo  * bioq_takefirst() is dispatched to the device, so bioq->last_offset
103d4619572SLuigi Rizzo  * represents the head position once the request is complete.
104d4619572SLuigi Rizzo  *
105d4619572SLuigi Rizzo  * If the bioq is manipulated using only the above calls, it starts
106d4619572SLuigi Rizzo  * with a sorted sequence of requests with bio_offset >= last_offset,
107d4619572SLuigi Rizzo  * possibly followed by another sorted sequence of requests with
108d4619572SLuigi Rizzo  * 0 <= bio_offset < bioq->last_offset
109d4619572SLuigi Rizzo  *
110d4619572SLuigi Rizzo  * NOTE: historical behaviour was to ignore bio->bio_length in the
111d4619572SLuigi Rizzo  *	update, but its use tracks the head position in a better way.
112d4619572SLuigi Rizzo  *	Historical behaviour was also to update the head position when
113d4619572SLuigi Rizzo  *	the request under service is complete, rather than when the
114d4619572SLuigi Rizzo  *	request is extracted from the queue. However, the current API
115d4619572SLuigi Rizzo  *	has no method to update the head position; secondly, once
116d4619572SLuigi Rizzo  *	a request has been submitted to the disk, we have no idea of
117d4619572SLuigi Rizzo  *	the actual head position, so the final one is our best guess.
118d4619572SLuigi Rizzo  *
119d4619572SLuigi Rizzo  * --- Direct queue manipulation ---
120d4619572SLuigi Rizzo  *
121d4619572SLuigi Rizzo  * A bioq uses an underlying TAILQ to store requests, so we also
122d4619572SLuigi Rizzo  * export methods to manipulate the TAILQ, in particular:
123d4619572SLuigi Rizzo  *
124d4619572SLuigi Rizzo  * bioq_insert_tail()	insert an entry at the end.
125d4619572SLuigi Rizzo  *		It also creates a 'barrier' so all subsequent
126d4619572SLuigi Rizzo  *		insertions through bioq_disksort() will end up
127d4619572SLuigi Rizzo  *		after this entry;
128d4619572SLuigi Rizzo  *
129d4619572SLuigi Rizzo  * bioq_insert_head()	insert an entry at the head, update
130d4619572SLuigi Rizzo  *		bioq->last_offset = bio->bio_offset so that
131d4619572SLuigi Rizzo  *		all subsequent insertions through bioq_disksort()
132d4619572SLuigi Rizzo  *		will end up after this entry;
133d4619572SLuigi Rizzo  *
134d4619572SLuigi Rizzo  * bioq_remove()	remove a generic element from the queue, act as
135d4619572SLuigi Rizzo  *		bioq_takefirst() if invoked on the head of the queue.
136d4619572SLuigi Rizzo  *
137f03f7a0cSJustin T. Gibbs  * The semantic of these methods is the same as the operations
138d4619572SLuigi Rizzo  * on the underlying TAILQ, but with additional guarantees on
139d4619572SLuigi Rizzo  * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
140d4619572SLuigi Rizzo  * can be useful for making sure that all previous ops are flushed
141d4619572SLuigi Rizzo  * to disk before continuing.
142d4619572SLuigi Rizzo  *
143d4619572SLuigi Rizzo  * Updating bioq->last_offset on a bioq_insert_head() guarantees
144d4619572SLuigi Rizzo  * that the bio inserted with the last bioq_insert_head() will stay
145d4619572SLuigi Rizzo  * at the head of the queue even after subsequent bioq_disksort().
146d4619572SLuigi Rizzo  *
147d4619572SLuigi Rizzo  * Note that when the direct queue manipulation functions are used,
148d4619572SLuigi Rizzo  * the queue may contain multiple inversion points (i.e. more than
149d4619572SLuigi Rizzo  * two sorted sequences of requests).
150d4619572SLuigi Rizzo  *
151d086f85aSPoul-Henning Kamp  */
152d086f85aSPoul-Henning Kamp 
153d086f85aSPoul-Henning Kamp void
154d086f85aSPoul-Henning Kamp bioq_init(struct bio_queue_head *head)
155d086f85aSPoul-Henning Kamp {
156d4619572SLuigi Rizzo 
157d086f85aSPoul-Henning Kamp 	TAILQ_INIT(&head->queue);
1584cb4df48SPoul-Henning Kamp 	head->last_offset = 0;
159d086f85aSPoul-Henning Kamp 	head->insert_point = NULL;
160a971acbcSWarner Losh 	head->total = 0;
161a971acbcSWarner Losh 	head->batched = 0;
162d086f85aSPoul-Henning Kamp }
163d086f85aSPoul-Henning Kamp 
164d086f85aSPoul-Henning Kamp void
165d086f85aSPoul-Henning Kamp bioq_remove(struct bio_queue_head *head, struct bio *bp)
166d086f85aSPoul-Henning Kamp {
167d4619572SLuigi Rizzo 
168f03f7a0cSJustin T. Gibbs 	if (head->insert_point == NULL) {
169d4619572SLuigi Rizzo 		if (bp == TAILQ_FIRST(&head->queue))
170d4619572SLuigi Rizzo 			head->last_offset = bp->bio_offset + bp->bio_length;
171f03f7a0cSJustin T. Gibbs 	} else if (bp == head->insert_point)
172d4619572SLuigi Rizzo 		head->insert_point = NULL;
173d4619572SLuigi Rizzo 
174d086f85aSPoul-Henning Kamp 	TAILQ_REMOVE(&head->queue, bp, bio_queue);
175a971acbcSWarner Losh 	head->total--;
176d086f85aSPoul-Henning Kamp }
177af6ca7f4SPoul-Henning Kamp 
178af6ca7f4SPoul-Henning Kamp void
179af6ca7f4SPoul-Henning Kamp bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
180af6ca7f4SPoul-Henning Kamp {
181af6ca7f4SPoul-Henning Kamp 	struct bio *bp;
182af6ca7f4SPoul-Henning Kamp 
183d298f919SPoul-Henning Kamp 	while ((bp = bioq_takefirst(head)) != NULL)
184b8404473SPoul-Henning Kamp 		biofinish(bp, stp, error);
185af6ca7f4SPoul-Henning Kamp }
186af6ca7f4SPoul-Henning Kamp 
187d086f85aSPoul-Henning Kamp void
188bf484316SPawel Jakub Dawidek bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
189bf484316SPawel Jakub Dawidek {
190bf484316SPawel Jakub Dawidek 
191f03f7a0cSJustin T. Gibbs 	if (head->insert_point == NULL)
192d4619572SLuigi Rizzo 		head->last_offset = bp->bio_offset;
193bf484316SPawel Jakub Dawidek 	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
194a971acbcSWarner Losh 	head->total++;
195a971acbcSWarner Losh 	head->batched = 0;
196bf484316SPawel Jakub Dawidek }
197bf484316SPawel Jakub Dawidek 
198bf484316SPawel Jakub Dawidek void
199d086f85aSPoul-Henning Kamp bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
200d086f85aSPoul-Henning Kamp {
201d086f85aSPoul-Henning Kamp 
202d086f85aSPoul-Henning Kamp 	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
203a971acbcSWarner Losh 	head->total++;
204d4619572SLuigi Rizzo 	head->insert_point = bp;
205f03f7a0cSJustin T. Gibbs 	head->last_offset = bp->bio_offset;
206d086f85aSPoul-Henning Kamp }
207d086f85aSPoul-Henning Kamp 
208d086f85aSPoul-Henning Kamp struct bio *
209d086f85aSPoul-Henning Kamp bioq_first(struct bio_queue_head *head)
210d086f85aSPoul-Henning Kamp {
211d086f85aSPoul-Henning Kamp 
212d086f85aSPoul-Henning Kamp 	return (TAILQ_FIRST(&head->queue));
213d086f85aSPoul-Henning Kamp }
214d086f85aSPoul-Henning Kamp 
215d298f919SPoul-Henning Kamp struct bio *
216d298f919SPoul-Henning Kamp bioq_takefirst(struct bio_queue_head *head)
217d298f919SPoul-Henning Kamp {
218d298f919SPoul-Henning Kamp 	struct bio *bp;
219d298f919SPoul-Henning Kamp 
220d298f919SPoul-Henning Kamp 	bp = TAILQ_FIRST(&head->queue);
221d298f919SPoul-Henning Kamp 	if (bp != NULL)
222d298f919SPoul-Henning Kamp 		bioq_remove(head, bp);
223d298f919SPoul-Henning Kamp 	return (bp);
224d298f919SPoul-Henning Kamp }
225d086f85aSPoul-Henning Kamp 
226d086f85aSPoul-Henning Kamp /*
227d4619572SLuigi Rizzo  * Compute the sorting key. The cast to unsigned is
228d4619572SLuigi Rizzo  * fundamental for correctness, see the description
229d4619572SLuigi Rizzo  * near the beginning of the file.
2302382fb0aSPoul-Henning Kamp  */
231d4619572SLuigi Rizzo static inline uoff_t
232d4619572SLuigi Rizzo bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
2332382fb0aSPoul-Henning Kamp {
234d4619572SLuigi Rizzo 
235d4619572SLuigi Rizzo 	return ((uoff_t)(bp->bio_offset - head->last_offset));
236d4619572SLuigi Rizzo }
2372382fb0aSPoul-Henning Kamp 
2382382fb0aSPoul-Henning Kamp /*
239d4619572SLuigi Rizzo  * Seek sort for disks.
240d4619572SLuigi Rizzo  *
241d4619572SLuigi Rizzo  * Sort all requests in a single queue while keeping
242d4619572SLuigi Rizzo  * track of the current position of the disk with last_offset.
243d4619572SLuigi Rizzo  * See above for details.
2442382fb0aSPoul-Henning Kamp  */
245d4619572SLuigi Rizzo void
246d4619572SLuigi Rizzo bioq_disksort(struct bio_queue_head *head, struct bio *bp)
247d4619572SLuigi Rizzo {
248f03f7a0cSJustin T. Gibbs 	struct bio *cur, *prev;
249f03f7a0cSJustin T. Gibbs 	uoff_t key;
250d4619572SLuigi Rizzo 
251f03f7a0cSJustin T. Gibbs 	if ((bp->bio_flags & BIO_ORDERED) != 0) {
252f03f7a0cSJustin T. Gibbs 		/*
253f03f7a0cSJustin T. Gibbs 		 * Ordered transactions can only be dispatched
254f03f7a0cSJustin T. Gibbs 		 * after any currently queued transactions.  They
255f03f7a0cSJustin T. Gibbs 		 * also have barrier semantics - no transactions
256f03f7a0cSJustin T. Gibbs 		 * queued in the future can pass them.
257f03f7a0cSJustin T. Gibbs 		 */
258f03f7a0cSJustin T. Gibbs 		bioq_insert_tail(head, bp);
259f03f7a0cSJustin T. Gibbs 		return;
260f03f7a0cSJustin T. Gibbs 	}
261f03f7a0cSJustin T. Gibbs 
262*6afd9210SAlexander Motin 	/*
263*6afd9210SAlexander Motin 	 * We should only sort requests of types that have concept of offset.
264*6afd9210SAlexander Motin 	 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree
265*6afd9210SAlexander Motin 	 * of ordering even if strict ordering is not requested explicitly.
266*6afd9210SAlexander Motin 	 */
267*6afd9210SAlexander Motin 	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
268*6afd9210SAlexander Motin 	    bp->bio_cmd != BIO_DELETE) {
269*6afd9210SAlexander Motin 		bioq_insert_tail(head, bp);
270*6afd9210SAlexander Motin 		return;
271*6afd9210SAlexander Motin 	}
272*6afd9210SAlexander Motin 
273a971acbcSWarner Losh 	if (bioq_batchsize > 0 && head->batched > bioq_batchsize) {
274a971acbcSWarner Losh 		bioq_insert_tail(head, bp);
275a971acbcSWarner Losh 		return;
276a971acbcSWarner Losh 	}
277a971acbcSWarner Losh 
278f03f7a0cSJustin T. Gibbs 	prev = NULL;
279f03f7a0cSJustin T. Gibbs 	key = bioq_bio_key(head, bp);
280d4619572SLuigi Rizzo 	cur = TAILQ_FIRST(&head->queue);
281d4619572SLuigi Rizzo 
282f03f7a0cSJustin T. Gibbs 	if (head->insert_point) {
283f03f7a0cSJustin T. Gibbs 		prev = head->insert_point;
284f03f7a0cSJustin T. Gibbs 		cur = TAILQ_NEXT(head->insert_point, bio_queue);
285f03f7a0cSJustin T. Gibbs 	}
286d4619572SLuigi Rizzo 
287d4619572SLuigi Rizzo 	while (cur != NULL && key >= bioq_bio_key(head, cur)) {
288d4619572SLuigi Rizzo 		prev = cur;
289d4619572SLuigi Rizzo 		cur = TAILQ_NEXT(cur, bio_queue);
2902382fb0aSPoul-Henning Kamp 	}
291d4619572SLuigi Rizzo 
292d4619572SLuigi Rizzo 	if (prev == NULL)
293d4619572SLuigi Rizzo 		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
294d4619572SLuigi Rizzo 	else
295d4619572SLuigi Rizzo 		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
296a971acbcSWarner Losh 	head->total++;
297a971acbcSWarner Losh 	head->batched++;
2982382fb0aSPoul-Henning Kamp }
299