xref: /freebsd/sys/kern/subr_disk.c (revision 685dc743dc3b5645e34836464128e1c0558b404b)
1 /*-
2  * SPDX-License-Identifier: Beerware
3  *
4  * ----------------------------------------------------------------------------
5  * "THE BEER-WARE LICENSE" (Revision 42):
6  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
7  * can do whatever you want with this stuff. If we meet some day, and you think
8  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9  * ----------------------------------------------------------------------------
10  *
11  * The bioq_disksort() (and the specification of the bioq API)
12  * have been written by Luigi Rizzo and Fabio Checconi under the same
13  * license as above.
14  */
15 
16 #include <sys/cdefs.h>
17 #include "opt_geom.h"
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/bio.h>
22 #include <sys/conf.h>
23 #include <sys/disk.h>
24 #include <sys/sysctl.h>
25 #include <geom/geom_disk.h>
26 
27 static int bioq_batchsize = 128;
28 SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW,
29     &bioq_batchsize, 0, "BIOQ batch size");
30 
31 /*-
32  * Disk error is the preface to plaintive error messages
33  * about failing disk transfers.  It prints messages of the form
34  * 	"hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
35  * blkdone should be -1 if the position of the error is unknown.
36  * The message is printed with printf.
37  */
38 void
disk_err(struct bio * bp,const char * what,int blkdone,int nl)39 disk_err(struct bio *bp, const char *what, int blkdone, int nl)
40 {
41 	daddr_t sn;
42 
43 	if (bp->bio_dev != NULL)
44 		printf("%s: %s ", devtoname(bp->bio_dev), what);
45 	else if (bp->bio_disk != NULL)
46 		printf("%s%d: %s ",
47 		    bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
48 	else
49 		printf("disk??: %s ", what);
50 	switch(bp->bio_cmd) {
51 	case BIO_READ:		printf("cmd=read "); break;
52 	case BIO_WRITE:		printf("cmd=write "); break;
53 	case BIO_DELETE:	printf("cmd=delete "); break;
54 	case BIO_GETATTR:	printf("cmd=getattr "); break;
55 	case BIO_FLUSH:		printf("cmd=flush "); break;
56 	default:		printf("cmd=%x ", bp->bio_cmd); break;
57 	}
58 	sn = bp->bio_pblkno;
59 	if (bp->bio_bcount <= DEV_BSIZE) {
60 		printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
61 		return;
62 	}
63 	if (blkdone >= 0) {
64 		sn += blkdone;
65 		printf("fsbn %jd of ", (intmax_t)sn);
66 	}
67 	printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
68 	    (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
69 	if (nl)
70 		printf("\n");
71 }
72 
73 /*
74  * BIO queue implementation
75  *
76  * Please read carefully the description below before making any change
77  * to the code, or you might change the behaviour of the data structure
78  * in undesirable ways.
79  *
80  * A bioq stores disk I/O request (bio), normally sorted according to
81  * the distance of the requested position (bio->bio_offset) from the
82  * current head position (bioq->last_offset) in the scan direction, i.e.
83  *
84  * 	(uoff_t)(bio_offset - last_offset)
85  *
86  * Note that the cast to unsigned (uoff_t) is fundamental to insure
87  * that the distance is computed in the scan direction.
88  *
89  * The main methods for manipulating the bioq are:
90  *
91  *   bioq_disksort()	performs an ordered insertion;
92  *
93  *   bioq_first()	return the head of the queue, without removing;
94  *
95  *   bioq_takefirst()	return and remove the head of the queue,
96  *		updating the 'current head position' as
97  *		bioq->last_offset = bio->bio_offset + bio->bio_length;
98  *
99  * When updating the 'current head position', we assume that the result of
100  * bioq_takefirst() is dispatched to the device, so bioq->last_offset
101  * represents the head position once the request is complete.
102  *
103  * If the bioq is manipulated using only the above calls, it starts
104  * with a sorted sequence of requests with bio_offset >= last_offset,
105  * possibly followed by another sorted sequence of requests with
106  * 0 <= bio_offset < bioq->last_offset
107  *
108  * NOTE: historical behaviour was to ignore bio->bio_length in the
109  *	update, but its use tracks the head position in a better way.
110  *	Historical behaviour was also to update the head position when
111  *	the request under service is complete, rather than when the
112  *	request is extracted from the queue. However, the current API
113  *	has no method to update the head position; secondly, once
114  *	a request has been submitted to the disk, we have no idea of
115  *	the actual head position, so the final one is our best guess.
116  *
117  * --- Direct queue manipulation ---
118  *
119  * A bioq uses an underlying TAILQ to store requests, so we also
120  * export methods to manipulate the TAILQ, in particular:
121  *
122  * bioq_insert_tail()	insert an entry at the end.
123  *		It also creates a 'barrier' so all subsequent
124  *		insertions through bioq_disksort() will end up
125  *		after this entry;
126  *
127  * bioq_insert_head()	insert an entry at the head, update
128  *		bioq->last_offset = bio->bio_offset so that
129  *		all subsequent insertions through bioq_disksort()
130  *		will end up after this entry;
131  *
132  * bioq_remove()	remove a generic element from the queue, act as
133  *		bioq_takefirst() if invoked on the head of the queue.
134  *
135  * The semantic of these methods is the same as the operations
136  * on the underlying TAILQ, but with additional guarantees on
137  * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
138  * can be useful for making sure that all previous ops are flushed
139  * to disk before continuing.
140  *
141  * Updating bioq->last_offset on a bioq_insert_head() guarantees
142  * that the bio inserted with the last bioq_insert_head() will stay
143  * at the head of the queue even after subsequent bioq_disksort().
144  *
145  * Note that when the direct queue manipulation functions are used,
146  * the queue may contain multiple inversion points (i.e. more than
147  * two sorted sequences of requests).
148  *
149  */
150 
151 void
bioq_init(struct bio_queue_head * head)152 bioq_init(struct bio_queue_head *head)
153 {
154 
155 	TAILQ_INIT(&head->queue);
156 	head->last_offset = 0;
157 	head->insert_point = NULL;
158 	head->total = 0;
159 	head->batched = 0;
160 }
161 
162 void
bioq_remove(struct bio_queue_head * head,struct bio * bp)163 bioq_remove(struct bio_queue_head *head, struct bio *bp)
164 {
165 
166 	if (head->insert_point == NULL) {
167 		if (bp == TAILQ_FIRST(&head->queue))
168 			head->last_offset = bp->bio_offset + bp->bio_length;
169 	} else if (bp == head->insert_point)
170 		head->insert_point = NULL;
171 
172 	TAILQ_REMOVE(&head->queue, bp, bio_queue);
173 	if (TAILQ_EMPTY(&head->queue))
174 		head->batched = 0;
175 	head->total--;
176 }
177 
178 void
bioq_flush(struct bio_queue_head * head,struct devstat * stp,int error)179 bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
180 {
181 	struct bio *bp;
182 
183 	while ((bp = bioq_takefirst(head)) != NULL)
184 		biofinish(bp, stp, error);
185 }
186 
187 void
bioq_insert_head(struct bio_queue_head * head,struct bio * bp)188 bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
189 {
190 
191 	if (head->insert_point == NULL)
192 		head->last_offset = bp->bio_offset;
193 	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
194 	head->total++;
195 	head->batched = 0;
196 }
197 
198 void
bioq_insert_tail(struct bio_queue_head * head,struct bio * bp)199 bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
200 {
201 
202 	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
203 	head->total++;
204 	head->batched = 0;
205 	head->insert_point = bp;
206 	head->last_offset = bp->bio_offset;
207 }
208 
209 struct bio *
bioq_first(struct bio_queue_head * head)210 bioq_first(struct bio_queue_head *head)
211 {
212 
213 	return (TAILQ_FIRST(&head->queue));
214 }
215 
216 struct bio *
bioq_takefirst(struct bio_queue_head * head)217 bioq_takefirst(struct bio_queue_head *head)
218 {
219 	struct bio *bp;
220 
221 	bp = TAILQ_FIRST(&head->queue);
222 	if (bp != NULL)
223 		bioq_remove(head, bp);
224 	return (bp);
225 }
226 
227 /*
228  * Compute the sorting key. The cast to unsigned is
229  * fundamental for correctness, see the description
230  * near the beginning of the file.
231  */
232 static inline uoff_t
bioq_bio_key(struct bio_queue_head * head,struct bio * bp)233 bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
234 {
235 
236 	return ((uoff_t)(bp->bio_offset - head->last_offset));
237 }
238 
239 /*
240  * Seek sort for disks.
241  *
242  * Sort all requests in a single queue while keeping
243  * track of the current position of the disk with last_offset.
244  * See above for details.
245  */
246 void
bioq_disksort(struct bio_queue_head * head,struct bio * bp)247 bioq_disksort(struct bio_queue_head *head, struct bio *bp)
248 {
249 	struct bio *cur, *prev;
250 	uoff_t key;
251 
252 	if ((bp->bio_flags & BIO_ORDERED) != 0) {
253 		/*
254 		 * Ordered transactions can only be dispatched
255 		 * after any currently queued transactions.  They
256 		 * also have barrier semantics - no transactions
257 		 * queued in the future can pass them.
258 		 */
259 		bioq_insert_tail(head, bp);
260 		return;
261 	}
262 
263 	/*
264 	 * We should only sort requests of types that have concept of offset.
265 	 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree
266 	 * of ordering even if strict ordering is not requested explicitly.
267 	 */
268 	if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
269 	    bp->bio_cmd != BIO_DELETE) {
270 		bioq_insert_tail(head, bp);
271 		return;
272 	}
273 
274 	if (bioq_batchsize > 0 && head->batched > bioq_batchsize) {
275 		bioq_insert_tail(head, bp);
276 		return;
277 	}
278 
279 	prev = NULL;
280 	key = bioq_bio_key(head, bp);
281 	cur = TAILQ_FIRST(&head->queue);
282 
283 	if (head->insert_point) {
284 		prev = head->insert_point;
285 		cur = TAILQ_NEXT(head->insert_point, bio_queue);
286 	}
287 
288 	while (cur != NULL && key >= bioq_bio_key(head, cur)) {
289 		prev = cur;
290 		cur = TAILQ_NEXT(cur, bio_queue);
291 	}
292 
293 	if (prev == NULL)
294 		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
295 	else
296 		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
297 	head->total++;
298 	head->batched++;
299 }
300