xref: /freebsd/sys/kern/subr_disk.c (revision b37f6c9805edb4b89f0a8c2b78f78a3dcfc0647b)
1 /*-
2  * SPDX-License-Identifier: Beerware
3  *
4  * ----------------------------------------------------------------------------
5  * "THE BEER-WARE LICENSE" (Revision 42):
6  * <phk@FreeBSD.ORG> wrote this file.  As long as you retain this notice you
7  * can do whatever you want with this stuff. If we meet some day, and you think
8  * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
9  * ----------------------------------------------------------------------------
10  *
11  * The bioq_disksort() (and the specification of the bioq API)
12  * have been written by Luigi Rizzo and Fabio Checconi under the same
13  * license as above.
14  */
15 
16 #include <sys/cdefs.h>
17 __FBSDID("$FreeBSD$");
18 
19 #include "opt_geom.h"
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/bio.h>
24 #include <sys/conf.h>
25 #include <sys/disk.h>
26 #include <geom/geom_disk.h>
27 
28 /*-
29  * Disk error is the preface to plaintive error messages
30  * about failing disk transfers.  It prints messages of the form
31  * 	"hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
32  * blkdone should be -1 if the position of the error is unknown.
33  * The message is printed with printf.
34  */
35 void
36 disk_err(struct bio *bp, const char *what, int blkdone, int nl)
37 {
38 	daddr_t sn;
39 
40 	if (bp->bio_dev != NULL)
41 		printf("%s: %s ", devtoname(bp->bio_dev), what);
42 	else if (bp->bio_disk != NULL)
43 		printf("%s%d: %s ",
44 		    bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
45 	else
46 		printf("disk??: %s ", what);
47 	switch(bp->bio_cmd) {
48 	case BIO_READ:		printf("cmd=read "); break;
49 	case BIO_WRITE:		printf("cmd=write "); break;
50 	case BIO_DELETE:	printf("cmd=delete "); break;
51 	case BIO_GETATTR:	printf("cmd=getattr "); break;
52 	case BIO_FLUSH:		printf("cmd=flush "); break;
53 	default:		printf("cmd=%x ", bp->bio_cmd); break;
54 	}
55 	sn = bp->bio_pblkno;
56 	if (bp->bio_bcount <= DEV_BSIZE) {
57 		printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
58 		return;
59 	}
60 	if (blkdone >= 0) {
61 		sn += blkdone;
62 		printf("fsbn %jd of ", (intmax_t)sn);
63 	}
64 	printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
65 	    (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
66 	if (nl)
67 		printf("\n");
68 }
69 
70 /*
71  * BIO queue implementation
72  *
73  * Please read carefully the description below before making any change
74  * to the code, or you might change the behaviour of the data structure
75  * in undesirable ways.
76  *
77  * A bioq stores disk I/O request (bio), normally sorted according to
78  * the distance of the requested position (bio->bio_offset) from the
79  * current head position (bioq->last_offset) in the scan direction, i.e.
80  *
81  * 	(uoff_t)(bio_offset - last_offset)
82  *
83  * Note that the cast to unsigned (uoff_t) is fundamental to insure
84  * that the distance is computed in the scan direction.
85  *
86  * The main methods for manipulating the bioq are:
87  *
88  *   bioq_disksort()	performs an ordered insertion;
89  *
90  *   bioq_first()	return the head of the queue, without removing;
91  *
92  *   bioq_takefirst()	return and remove the head of the queue,
93  *		updating the 'current head position' as
94  *		bioq->last_offset = bio->bio_offset + bio->bio_length;
95  *
96  * When updating the 'current head position', we assume that the result of
97  * bioq_takefirst() is dispatched to the device, so bioq->last_offset
98  * represents the head position once the request is complete.
99  *
100  * If the bioq is manipulated using only the above calls, it starts
101  * with a sorted sequence of requests with bio_offset >= last_offset,
102  * possibly followed by another sorted sequence of requests with
103  * 0 <= bio_offset < bioq->last_offset
104  *
105  * NOTE: historical behaviour was to ignore bio->bio_length in the
106  *	update, but its use tracks the head position in a better way.
107  *	Historical behaviour was also to update the head position when
108  *	the request under service is complete, rather than when the
109  *	request is extracted from the queue. However, the current API
110  *	has no method to update the head position; secondly, once
111  *	a request has been submitted to the disk, we have no idea of
112  *	the actual head position, so the final one is our best guess.
113  *
114  * --- Direct queue manipulation ---
115  *
116  * A bioq uses an underlying TAILQ to store requests, so we also
117  * export methods to manipulate the TAILQ, in particular:
118  *
119  * bioq_insert_tail()	insert an entry at the end.
120  *		It also creates a 'barrier' so all subsequent
121  *		insertions through bioq_disksort() will end up
122  *		after this entry;
123  *
124  * bioq_insert_head()	insert an entry at the head, update
125  *		bioq->last_offset = bio->bio_offset so that
126  *		all subsequent insertions through bioq_disksort()
127  *		will end up after this entry;
128  *
129  * bioq_remove()	remove a generic element from the queue, act as
130  *		bioq_takefirst() if invoked on the head of the queue.
131  *
132  * The semantic of these methods is the same as the operations
133  * on the underlying TAILQ, but with additional guarantees on
134  * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
135  * can be useful for making sure that all previous ops are flushed
136  * to disk before continuing.
137  *
138  * Updating bioq->last_offset on a bioq_insert_head() guarantees
139  * that the bio inserted with the last bioq_insert_head() will stay
140  * at the head of the queue even after subsequent bioq_disksort().
141  *
142  * Note that when the direct queue manipulation functions are used,
143  * the queue may contain multiple inversion points (i.e. more than
144  * two sorted sequences of requests).
145  *
146  */
147 
148 void
149 bioq_init(struct bio_queue_head *head)
150 {
151 
152 	TAILQ_INIT(&head->queue);
153 	head->last_offset = 0;
154 	head->insert_point = NULL;
155 }
156 
157 void
158 bioq_remove(struct bio_queue_head *head, struct bio *bp)
159 {
160 
161 	if (head->insert_point == NULL) {
162 		if (bp == TAILQ_FIRST(&head->queue))
163 			head->last_offset = bp->bio_offset + bp->bio_length;
164 	} else if (bp == head->insert_point)
165 		head->insert_point = NULL;
166 
167 	TAILQ_REMOVE(&head->queue, bp, bio_queue);
168 }
169 
170 void
171 bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
172 {
173 	struct bio *bp;
174 
175 	while ((bp = bioq_takefirst(head)) != NULL)
176 		biofinish(bp, stp, error);
177 }
178 
179 void
180 bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
181 {
182 
183 	if (head->insert_point == NULL)
184 		head->last_offset = bp->bio_offset;
185 	TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
186 }
187 
188 void
189 bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
190 {
191 
192 	TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
193 	head->insert_point = bp;
194 	head->last_offset = bp->bio_offset;
195 }
196 
197 struct bio *
198 bioq_first(struct bio_queue_head *head)
199 {
200 
201 	return (TAILQ_FIRST(&head->queue));
202 }
203 
204 struct bio *
205 bioq_takefirst(struct bio_queue_head *head)
206 {
207 	struct bio *bp;
208 
209 	bp = TAILQ_FIRST(&head->queue);
210 	if (bp != NULL)
211 		bioq_remove(head, bp);
212 	return (bp);
213 }
214 
215 /*
216  * Compute the sorting key. The cast to unsigned is
217  * fundamental for correctness, see the description
218  * near the beginning of the file.
219  */
220 static inline uoff_t
221 bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
222 {
223 
224 	return ((uoff_t)(bp->bio_offset - head->last_offset));
225 }
226 
227 /*
228  * Seek sort for disks.
229  *
230  * Sort all requests in a single queue while keeping
231  * track of the current position of the disk with last_offset.
232  * See above for details.
233  */
234 void
235 bioq_disksort(struct bio_queue_head *head, struct bio *bp)
236 {
237 	struct bio *cur, *prev;
238 	uoff_t key;
239 
240 	if ((bp->bio_flags & BIO_ORDERED) != 0) {
241 		/*
242 		 * Ordered transactions can only be dispatched
243 		 * after any currently queued transactions.  They
244 		 * also have barrier semantics - no transactions
245 		 * queued in the future can pass them.
246 		 */
247 		bioq_insert_tail(head, bp);
248 		return;
249 	}
250 
251 	prev = NULL;
252 	key = bioq_bio_key(head, bp);
253 	cur = TAILQ_FIRST(&head->queue);
254 
255 	if (head->insert_point) {
256 		prev = head->insert_point;
257 		cur = TAILQ_NEXT(head->insert_point, bio_queue);
258 	}
259 
260 	while (cur != NULL && key >= bioq_bio_key(head, cur)) {
261 		prev = cur;
262 		cur = TAILQ_NEXT(cur, bio_queue);
263 	}
264 
265 	if (prev == NULL)
266 		TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
267 	else
268 		TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
269 }
270