19454b2d8SWarner Losh /*-
264de3fddSPedro F. Giffuni * SPDX-License-Identifier: Beerware
364de3fddSPedro F. Giffuni *
4da9e4f55SPoul-Henning Kamp * ----------------------------------------------------------------------------
5da9e4f55SPoul-Henning Kamp * "THE BEER-WARE LICENSE" (Revision 42):
6da9e4f55SPoul-Henning Kamp * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you
7da9e4f55SPoul-Henning Kamp * can do whatever you want with this stuff. If we meet some day, and you think
8da9e4f55SPoul-Henning Kamp * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp
9da9e4f55SPoul-Henning Kamp * ----------------------------------------------------------------------------
10d4619572SLuigi Rizzo *
11d4619572SLuigi Rizzo * The bioq_disksort() (and the specification of the bioq API)
12d4619572SLuigi Rizzo * have been written by Luigi Rizzo and Fabio Checconi under the same
13d4619572SLuigi Rizzo * license as above.
14da9e4f55SPoul-Henning Kamp */
15da9e4f55SPoul-Henning Kamp
16677b542eSDavid E. O'Brien #include <sys/cdefs.h>
17417fb7f6SPoul-Henning Kamp #include "opt_geom.h"
18417fb7f6SPoul-Henning Kamp
19da9e4f55SPoul-Henning Kamp #include <sys/param.h>
20da9e4f55SPoul-Henning Kamp #include <sys/systm.h>
219626b608SPoul-Henning Kamp #include <sys/bio.h>
22da9e4f55SPoul-Henning Kamp #include <sys/conf.h>
23da9e4f55SPoul-Henning Kamp #include <sys/disk.h>
24a971acbcSWarner Losh #include <sys/sysctl.h>
2581750927SPoul-Henning Kamp #include <geom/geom_disk.h>
26f90c382cSPoul-Henning Kamp
27*3c0177b8SAlexander Motin static int bioq_batchsize = 128;
28a971acbcSWarner Losh SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW,
29a971acbcSWarner Losh &bioq_batchsize, 0, "BIOQ batch size");
30a971acbcSWarner Losh
311a996ed1SEdward Tomasz Napierala /*-
32f90c382cSPoul-Henning Kamp * Disk error is the preface to plaintive error messages
33f90c382cSPoul-Henning Kamp * about failing disk transfers. It prints messages of the form
34f90c382cSPoul-Henning Kamp * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347"
35f90c382cSPoul-Henning Kamp * blkdone should be -1 if the position of the error is unknown.
36f90c382cSPoul-Henning Kamp * The message is printed with printf.
37f90c382cSPoul-Henning Kamp */
38f90c382cSPoul-Henning Kamp void
disk_err(struct bio * bp,const char * what,int blkdone,int nl)39f90c382cSPoul-Henning Kamp disk_err(struct bio *bp, const char *what, int blkdone, int nl)
40f90c382cSPoul-Henning Kamp {
41f90c382cSPoul-Henning Kamp daddr_t sn;
42f90c382cSPoul-Henning Kamp
43a9463ba8SPoul-Henning Kamp if (bp->bio_dev != NULL)
44f90c382cSPoul-Henning Kamp printf("%s: %s ", devtoname(bp->bio_dev), what);
45a9463ba8SPoul-Henning Kamp else if (bp->bio_disk != NULL)
46a9463ba8SPoul-Henning Kamp printf("%s%d: %s ",
47a9463ba8SPoul-Henning Kamp bp->bio_disk->d_name, bp->bio_disk->d_unit, what);
48a9463ba8SPoul-Henning Kamp else
49a9463ba8SPoul-Henning Kamp printf("disk??: %s ", what);
50f90c382cSPoul-Henning Kamp switch(bp->bio_cmd) {
51f90c382cSPoul-Henning Kamp case BIO_READ: printf("cmd=read "); break;
52f90c382cSPoul-Henning Kamp case BIO_WRITE: printf("cmd=write "); break;
53f90c382cSPoul-Henning Kamp case BIO_DELETE: printf("cmd=delete "); break;
54f90c382cSPoul-Henning Kamp case BIO_GETATTR: printf("cmd=getattr "); break;
55c3618c65SPawel Jakub Dawidek case BIO_FLUSH: printf("cmd=flush "); break;
56f90c382cSPoul-Henning Kamp default: printf("cmd=%x ", bp->bio_cmd); break;
57f90c382cSPoul-Henning Kamp }
581ad9172fSPoul-Henning Kamp sn = bp->bio_pblkno;
59f90c382cSPoul-Henning Kamp if (bp->bio_bcount <= DEV_BSIZE) {
60f90c382cSPoul-Henning Kamp printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : "");
61f90c382cSPoul-Henning Kamp return;
62f90c382cSPoul-Henning Kamp }
63f90c382cSPoul-Henning Kamp if (blkdone >= 0) {
64f90c382cSPoul-Henning Kamp sn += blkdone;
65f90c382cSPoul-Henning Kamp printf("fsbn %jd of ", (intmax_t)sn);
66f90c382cSPoul-Henning Kamp }
671ad9172fSPoul-Henning Kamp printf("%jd-%jd", (intmax_t)bp->bio_pblkno,
681ad9172fSPoul-Henning Kamp (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE));
69f90c382cSPoul-Henning Kamp if (nl)
70f90c382cSPoul-Henning Kamp printf("\n");
71f90c382cSPoul-Henning Kamp }
722382fb0aSPoul-Henning Kamp
732382fb0aSPoul-Henning Kamp /*
74d086f85aSPoul-Henning Kamp * BIO queue implementation
75d4619572SLuigi Rizzo *
76d4619572SLuigi Rizzo * Please read carefully the description below before making any change
77d4619572SLuigi Rizzo * to the code, or you might change the behaviour of the data structure
78d4619572SLuigi Rizzo * in undesirable ways.
79d4619572SLuigi Rizzo *
80d4619572SLuigi Rizzo * A bioq stores disk I/O request (bio), normally sorted according to
81d4619572SLuigi Rizzo * the distance of the requested position (bio->bio_offset) from the
82d4619572SLuigi Rizzo * current head position (bioq->last_offset) in the scan direction, i.e.
83d4619572SLuigi Rizzo *
84d4619572SLuigi Rizzo * (uoff_t)(bio_offset - last_offset)
85d4619572SLuigi Rizzo *
86d4619572SLuigi Rizzo * Note that the cast to unsigned (uoff_t) is fundamental to insure
87d4619572SLuigi Rizzo * that the distance is computed in the scan direction.
88d4619572SLuigi Rizzo *
89d4619572SLuigi Rizzo * The main methods for manipulating the bioq are:
90d4619572SLuigi Rizzo *
91d4619572SLuigi Rizzo * bioq_disksort() performs an ordered insertion;
92d4619572SLuigi Rizzo *
93d4619572SLuigi Rizzo * bioq_first() return the head of the queue, without removing;
94d4619572SLuigi Rizzo *
95d4619572SLuigi Rizzo * bioq_takefirst() return and remove the head of the queue,
96d4619572SLuigi Rizzo * updating the 'current head position' as
97d4619572SLuigi Rizzo * bioq->last_offset = bio->bio_offset + bio->bio_length;
98d4619572SLuigi Rizzo *
99d4619572SLuigi Rizzo * When updating the 'current head position', we assume that the result of
100d4619572SLuigi Rizzo * bioq_takefirst() is dispatched to the device, so bioq->last_offset
101d4619572SLuigi Rizzo * represents the head position once the request is complete.
102d4619572SLuigi Rizzo *
103d4619572SLuigi Rizzo * If the bioq is manipulated using only the above calls, it starts
104d4619572SLuigi Rizzo * with a sorted sequence of requests with bio_offset >= last_offset,
105d4619572SLuigi Rizzo * possibly followed by another sorted sequence of requests with
106d4619572SLuigi Rizzo * 0 <= bio_offset < bioq->last_offset
107d4619572SLuigi Rizzo *
108d4619572SLuigi Rizzo * NOTE: historical behaviour was to ignore bio->bio_length in the
109d4619572SLuigi Rizzo * update, but its use tracks the head position in a better way.
110d4619572SLuigi Rizzo * Historical behaviour was also to update the head position when
111d4619572SLuigi Rizzo * the request under service is complete, rather than when the
112d4619572SLuigi Rizzo * request is extracted from the queue. However, the current API
113d4619572SLuigi Rizzo * has no method to update the head position; secondly, once
114d4619572SLuigi Rizzo * a request has been submitted to the disk, we have no idea of
115d4619572SLuigi Rizzo * the actual head position, so the final one is our best guess.
116d4619572SLuigi Rizzo *
117d4619572SLuigi Rizzo * --- Direct queue manipulation ---
118d4619572SLuigi Rizzo *
119d4619572SLuigi Rizzo * A bioq uses an underlying TAILQ to store requests, so we also
120d4619572SLuigi Rizzo * export methods to manipulate the TAILQ, in particular:
121d4619572SLuigi Rizzo *
122d4619572SLuigi Rizzo * bioq_insert_tail() insert an entry at the end.
123d4619572SLuigi Rizzo * It also creates a 'barrier' so all subsequent
124d4619572SLuigi Rizzo * insertions through bioq_disksort() will end up
125d4619572SLuigi Rizzo * after this entry;
126d4619572SLuigi Rizzo *
127d4619572SLuigi Rizzo * bioq_insert_head() insert an entry at the head, update
128d4619572SLuigi Rizzo * bioq->last_offset = bio->bio_offset so that
129d4619572SLuigi Rizzo * all subsequent insertions through bioq_disksort()
130d4619572SLuigi Rizzo * will end up after this entry;
131d4619572SLuigi Rizzo *
132d4619572SLuigi Rizzo * bioq_remove() remove a generic element from the queue, act as
133d4619572SLuigi Rizzo * bioq_takefirst() if invoked on the head of the queue.
134d4619572SLuigi Rizzo *
135f03f7a0cSJustin T. Gibbs * The semantic of these methods is the same as the operations
136d4619572SLuigi Rizzo * on the underlying TAILQ, but with additional guarantees on
137d4619572SLuigi Rizzo * subsequent bioq_disksort() calls. E.g. bioq_insert_tail()
138d4619572SLuigi Rizzo * can be useful for making sure that all previous ops are flushed
139d4619572SLuigi Rizzo * to disk before continuing.
140d4619572SLuigi Rizzo *
141d4619572SLuigi Rizzo * Updating bioq->last_offset on a bioq_insert_head() guarantees
142d4619572SLuigi Rizzo * that the bio inserted with the last bioq_insert_head() will stay
143d4619572SLuigi Rizzo * at the head of the queue even after subsequent bioq_disksort().
144d4619572SLuigi Rizzo *
145d4619572SLuigi Rizzo * Note that when the direct queue manipulation functions are used,
146d4619572SLuigi Rizzo * the queue may contain multiple inversion points (i.e. more than
147d4619572SLuigi Rizzo * two sorted sequences of requests).
148d4619572SLuigi Rizzo *
149d086f85aSPoul-Henning Kamp */
150d086f85aSPoul-Henning Kamp
151d086f85aSPoul-Henning Kamp void
bioq_init(struct bio_queue_head * head)152d086f85aSPoul-Henning Kamp bioq_init(struct bio_queue_head *head)
153d086f85aSPoul-Henning Kamp {
154d4619572SLuigi Rizzo
155d086f85aSPoul-Henning Kamp TAILQ_INIT(&head->queue);
1564cb4df48SPoul-Henning Kamp head->last_offset = 0;
157d086f85aSPoul-Henning Kamp head->insert_point = NULL;
158a971acbcSWarner Losh head->total = 0;
159a971acbcSWarner Losh head->batched = 0;
160d086f85aSPoul-Henning Kamp }
161d086f85aSPoul-Henning Kamp
162d086f85aSPoul-Henning Kamp void
bioq_remove(struct bio_queue_head * head,struct bio * bp)163d086f85aSPoul-Henning Kamp bioq_remove(struct bio_queue_head *head, struct bio *bp)
164d086f85aSPoul-Henning Kamp {
165d4619572SLuigi Rizzo
166f03f7a0cSJustin T. Gibbs if (head->insert_point == NULL) {
167d4619572SLuigi Rizzo if (bp == TAILQ_FIRST(&head->queue))
168d4619572SLuigi Rizzo head->last_offset = bp->bio_offset + bp->bio_length;
169f03f7a0cSJustin T. Gibbs } else if (bp == head->insert_point)
170d4619572SLuigi Rizzo head->insert_point = NULL;
171d4619572SLuigi Rizzo
172d086f85aSPoul-Henning Kamp TAILQ_REMOVE(&head->queue, bp, bio_queue);
173*3c0177b8SAlexander Motin if (TAILQ_EMPTY(&head->queue))
174*3c0177b8SAlexander Motin head->batched = 0;
175a971acbcSWarner Losh head->total--;
176d086f85aSPoul-Henning Kamp }
177af6ca7f4SPoul-Henning Kamp
178af6ca7f4SPoul-Henning Kamp void
bioq_flush(struct bio_queue_head * head,struct devstat * stp,int error)179af6ca7f4SPoul-Henning Kamp bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error)
180af6ca7f4SPoul-Henning Kamp {
181af6ca7f4SPoul-Henning Kamp struct bio *bp;
182af6ca7f4SPoul-Henning Kamp
183d298f919SPoul-Henning Kamp while ((bp = bioq_takefirst(head)) != NULL)
184b8404473SPoul-Henning Kamp biofinish(bp, stp, error);
185af6ca7f4SPoul-Henning Kamp }
186af6ca7f4SPoul-Henning Kamp
187d086f85aSPoul-Henning Kamp void
bioq_insert_head(struct bio_queue_head * head,struct bio * bp)188bf484316SPawel Jakub Dawidek bioq_insert_head(struct bio_queue_head *head, struct bio *bp)
189bf484316SPawel Jakub Dawidek {
190bf484316SPawel Jakub Dawidek
191f03f7a0cSJustin T. Gibbs if (head->insert_point == NULL)
192d4619572SLuigi Rizzo head->last_offset = bp->bio_offset;
193bf484316SPawel Jakub Dawidek TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
194a971acbcSWarner Losh head->total++;
195a971acbcSWarner Losh head->batched = 0;
196bf484316SPawel Jakub Dawidek }
197bf484316SPawel Jakub Dawidek
198bf484316SPawel Jakub Dawidek void
bioq_insert_tail(struct bio_queue_head * head,struct bio * bp)199d086f85aSPoul-Henning Kamp bioq_insert_tail(struct bio_queue_head *head, struct bio *bp)
200d086f85aSPoul-Henning Kamp {
201d086f85aSPoul-Henning Kamp
202d086f85aSPoul-Henning Kamp TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue);
203a971acbcSWarner Losh head->total++;
2047e48d711SWarner Losh head->batched = 0;
205d4619572SLuigi Rizzo head->insert_point = bp;
206f03f7a0cSJustin T. Gibbs head->last_offset = bp->bio_offset;
207d086f85aSPoul-Henning Kamp }
208d086f85aSPoul-Henning Kamp
209d086f85aSPoul-Henning Kamp struct bio *
bioq_first(struct bio_queue_head * head)210d086f85aSPoul-Henning Kamp bioq_first(struct bio_queue_head *head)
211d086f85aSPoul-Henning Kamp {
212d086f85aSPoul-Henning Kamp
213d086f85aSPoul-Henning Kamp return (TAILQ_FIRST(&head->queue));
214d086f85aSPoul-Henning Kamp }
215d086f85aSPoul-Henning Kamp
216d298f919SPoul-Henning Kamp struct bio *
bioq_takefirst(struct bio_queue_head * head)217d298f919SPoul-Henning Kamp bioq_takefirst(struct bio_queue_head *head)
218d298f919SPoul-Henning Kamp {
219d298f919SPoul-Henning Kamp struct bio *bp;
220d298f919SPoul-Henning Kamp
221d298f919SPoul-Henning Kamp bp = TAILQ_FIRST(&head->queue);
222d298f919SPoul-Henning Kamp if (bp != NULL)
223d298f919SPoul-Henning Kamp bioq_remove(head, bp);
224d298f919SPoul-Henning Kamp return (bp);
225d298f919SPoul-Henning Kamp }
226d086f85aSPoul-Henning Kamp
227d086f85aSPoul-Henning Kamp /*
228d4619572SLuigi Rizzo * Compute the sorting key. The cast to unsigned is
229d4619572SLuigi Rizzo * fundamental for correctness, see the description
230d4619572SLuigi Rizzo * near the beginning of the file.
2312382fb0aSPoul-Henning Kamp */
232d4619572SLuigi Rizzo static inline uoff_t
bioq_bio_key(struct bio_queue_head * head,struct bio * bp)233d4619572SLuigi Rizzo bioq_bio_key(struct bio_queue_head *head, struct bio *bp)
2342382fb0aSPoul-Henning Kamp {
235d4619572SLuigi Rizzo
236d4619572SLuigi Rizzo return ((uoff_t)(bp->bio_offset - head->last_offset));
237d4619572SLuigi Rizzo }
2382382fb0aSPoul-Henning Kamp
2392382fb0aSPoul-Henning Kamp /*
240d4619572SLuigi Rizzo * Seek sort for disks.
241d4619572SLuigi Rizzo *
242d4619572SLuigi Rizzo * Sort all requests in a single queue while keeping
243d4619572SLuigi Rizzo * track of the current position of the disk with last_offset.
244d4619572SLuigi Rizzo * See above for details.
2452382fb0aSPoul-Henning Kamp */
246d4619572SLuigi Rizzo void
bioq_disksort(struct bio_queue_head * head,struct bio * bp)247d4619572SLuigi Rizzo bioq_disksort(struct bio_queue_head *head, struct bio *bp)
248d4619572SLuigi Rizzo {
249f03f7a0cSJustin T. Gibbs struct bio *cur, *prev;
250f03f7a0cSJustin T. Gibbs uoff_t key;
251d4619572SLuigi Rizzo
252f03f7a0cSJustin T. Gibbs if ((bp->bio_flags & BIO_ORDERED) != 0) {
253f03f7a0cSJustin T. Gibbs /*
254f03f7a0cSJustin T. Gibbs * Ordered transactions can only be dispatched
255f03f7a0cSJustin T. Gibbs * after any currently queued transactions. They
256f03f7a0cSJustin T. Gibbs * also have barrier semantics - no transactions
257f03f7a0cSJustin T. Gibbs * queued in the future can pass them.
258f03f7a0cSJustin T. Gibbs */
259f03f7a0cSJustin T. Gibbs bioq_insert_tail(head, bp);
260f03f7a0cSJustin T. Gibbs return;
261f03f7a0cSJustin T. Gibbs }
262f03f7a0cSJustin T. Gibbs
2636afd9210SAlexander Motin /*
2646afd9210SAlexander Motin * We should only sort requests of types that have concept of offset.
2656afd9210SAlexander Motin * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree
2666afd9210SAlexander Motin * of ordering even if strict ordering is not requested explicitly.
2676afd9210SAlexander Motin */
2686afd9210SAlexander Motin if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE &&
2696afd9210SAlexander Motin bp->bio_cmd != BIO_DELETE) {
2706afd9210SAlexander Motin bioq_insert_tail(head, bp);
2716afd9210SAlexander Motin return;
2726afd9210SAlexander Motin }
2736afd9210SAlexander Motin
274a971acbcSWarner Losh if (bioq_batchsize > 0 && head->batched > bioq_batchsize) {
275a971acbcSWarner Losh bioq_insert_tail(head, bp);
276a971acbcSWarner Losh return;
277a971acbcSWarner Losh }
278a971acbcSWarner Losh
279f03f7a0cSJustin T. Gibbs prev = NULL;
280f03f7a0cSJustin T. Gibbs key = bioq_bio_key(head, bp);
281d4619572SLuigi Rizzo cur = TAILQ_FIRST(&head->queue);
282d4619572SLuigi Rizzo
283f03f7a0cSJustin T. Gibbs if (head->insert_point) {
284f03f7a0cSJustin T. Gibbs prev = head->insert_point;
285f03f7a0cSJustin T. Gibbs cur = TAILQ_NEXT(head->insert_point, bio_queue);
286f03f7a0cSJustin T. Gibbs }
287d4619572SLuigi Rizzo
288d4619572SLuigi Rizzo while (cur != NULL && key >= bioq_bio_key(head, cur)) {
289d4619572SLuigi Rizzo prev = cur;
290d4619572SLuigi Rizzo cur = TAILQ_NEXT(cur, bio_queue);
2912382fb0aSPoul-Henning Kamp }
292d4619572SLuigi Rizzo
293d4619572SLuigi Rizzo if (prev == NULL)
294d4619572SLuigi Rizzo TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue);
295d4619572SLuigi Rizzo else
296d4619572SLuigi Rizzo TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue);
297a971acbcSWarner Losh head->total++;
298a971acbcSWarner Losh head->batched++;
2992382fb0aSPoul-Henning Kamp }
300