19454b2d8SWarner Losh /*- 264de3fddSPedro F. Giffuni * SPDX-License-Identifier: Beerware 364de3fddSPedro F. Giffuni * 4da9e4f55SPoul-Henning Kamp * ---------------------------------------------------------------------------- 5da9e4f55SPoul-Henning Kamp * "THE BEER-WARE LICENSE" (Revision 42): 6da9e4f55SPoul-Henning Kamp * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7da9e4f55SPoul-Henning Kamp * can do whatever you want with this stuff. If we meet some day, and you think 8da9e4f55SPoul-Henning Kamp * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9da9e4f55SPoul-Henning Kamp * ---------------------------------------------------------------------------- 10d4619572SLuigi Rizzo * 11d4619572SLuigi Rizzo * The bioq_disksort() (and the specification of the bioq API) 12d4619572SLuigi Rizzo * have been written by Luigi Rizzo and Fabio Checconi under the same 13d4619572SLuigi Rizzo * license as above. 14da9e4f55SPoul-Henning Kamp */ 15da9e4f55SPoul-Henning Kamp 16677b542eSDavid E. O'Brien #include <sys/cdefs.h> 17677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$"); 18677b542eSDavid E. O'Brien 19417fb7f6SPoul-Henning Kamp #include "opt_geom.h" 20417fb7f6SPoul-Henning Kamp 21da9e4f55SPoul-Henning Kamp #include <sys/param.h> 22da9e4f55SPoul-Henning Kamp #include <sys/systm.h> 239626b608SPoul-Henning Kamp #include <sys/bio.h> 24da9e4f55SPoul-Henning Kamp #include <sys/conf.h> 25da9e4f55SPoul-Henning Kamp #include <sys/disk.h> 26a971acbcSWarner Losh #include <sys/sysctl.h> 2781750927SPoul-Henning Kamp #include <geom/geom_disk.h> 28f90c382cSPoul-Henning Kamp 29a971acbcSWarner Losh static int bioq_batchsize = 0; 30a971acbcSWarner Losh SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, 31a971acbcSWarner Losh &bioq_batchsize, 0, "BIOQ batch size"); 32a971acbcSWarner Losh 331a996ed1SEdward Tomasz Napierala /*- 34f90c382cSPoul-Henning Kamp * Disk error is the preface to plaintive error messages 35f90c382cSPoul-Henning Kamp * about failing disk transfers. It prints messages of the form 36f90c382cSPoul-Henning Kamp * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 37f90c382cSPoul-Henning Kamp * blkdone should be -1 if the position of the error is unknown. 38f90c382cSPoul-Henning Kamp * The message is printed with printf. 39f90c382cSPoul-Henning Kamp */ 40f90c382cSPoul-Henning Kamp void 41f90c382cSPoul-Henning Kamp disk_err(struct bio *bp, const char *what, int blkdone, int nl) 42f90c382cSPoul-Henning Kamp { 43f90c382cSPoul-Henning Kamp daddr_t sn; 44f90c382cSPoul-Henning Kamp 45a9463ba8SPoul-Henning Kamp if (bp->bio_dev != NULL) 46f90c382cSPoul-Henning Kamp printf("%s: %s ", devtoname(bp->bio_dev), what); 47a9463ba8SPoul-Henning Kamp else if (bp->bio_disk != NULL) 48a9463ba8SPoul-Henning Kamp printf("%s%d: %s ", 49a9463ba8SPoul-Henning Kamp bp->bio_disk->d_name, bp->bio_disk->d_unit, what); 50a9463ba8SPoul-Henning Kamp else 51a9463ba8SPoul-Henning Kamp printf("disk??: %s ", what); 52f90c382cSPoul-Henning Kamp switch(bp->bio_cmd) { 53f90c382cSPoul-Henning Kamp case BIO_READ: printf("cmd=read "); break; 54f90c382cSPoul-Henning Kamp case BIO_WRITE: printf("cmd=write "); break; 55f90c382cSPoul-Henning Kamp case BIO_DELETE: printf("cmd=delete "); break; 56f90c382cSPoul-Henning Kamp case BIO_GETATTR: printf("cmd=getattr "); break; 57c3618c65SPawel Jakub Dawidek case BIO_FLUSH: printf("cmd=flush "); break; 58f90c382cSPoul-Henning Kamp default: printf("cmd=%x ", bp->bio_cmd); break; 59f90c382cSPoul-Henning Kamp } 601ad9172fSPoul-Henning Kamp sn = bp->bio_pblkno; 61f90c382cSPoul-Henning Kamp if (bp->bio_bcount <= DEV_BSIZE) { 62f90c382cSPoul-Henning Kamp printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 63f90c382cSPoul-Henning Kamp return; 64f90c382cSPoul-Henning Kamp } 65f90c382cSPoul-Henning Kamp if (blkdone >= 0) { 66f90c382cSPoul-Henning Kamp sn += blkdone; 67f90c382cSPoul-Henning Kamp printf("fsbn %jd of ", (intmax_t)sn); 68f90c382cSPoul-Henning Kamp } 691ad9172fSPoul-Henning Kamp printf("%jd-%jd", (intmax_t)bp->bio_pblkno, 701ad9172fSPoul-Henning Kamp (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 71f90c382cSPoul-Henning Kamp if (nl) 72f90c382cSPoul-Henning Kamp printf("\n"); 73f90c382cSPoul-Henning Kamp } 742382fb0aSPoul-Henning Kamp 752382fb0aSPoul-Henning Kamp /* 76d086f85aSPoul-Henning Kamp * BIO queue implementation 77d4619572SLuigi Rizzo * 78d4619572SLuigi Rizzo * Please read carefully the description below before making any change 79d4619572SLuigi Rizzo * to the code, or you might change the behaviour of the data structure 80d4619572SLuigi Rizzo * in undesirable ways. 81d4619572SLuigi Rizzo * 82d4619572SLuigi Rizzo * A bioq stores disk I/O request (bio), normally sorted according to 83d4619572SLuigi Rizzo * the distance of the requested position (bio->bio_offset) from the 84d4619572SLuigi Rizzo * current head position (bioq->last_offset) in the scan direction, i.e. 85d4619572SLuigi Rizzo * 86d4619572SLuigi Rizzo * (uoff_t)(bio_offset - last_offset) 87d4619572SLuigi Rizzo * 88d4619572SLuigi Rizzo * Note that the cast to unsigned (uoff_t) is fundamental to insure 89d4619572SLuigi Rizzo * that the distance is computed in the scan direction. 90d4619572SLuigi Rizzo * 91d4619572SLuigi Rizzo * The main methods for manipulating the bioq are: 92d4619572SLuigi Rizzo * 93d4619572SLuigi Rizzo * bioq_disksort() performs an ordered insertion; 94d4619572SLuigi Rizzo * 95d4619572SLuigi Rizzo * bioq_first() return the head of the queue, without removing; 96d4619572SLuigi Rizzo * 97d4619572SLuigi Rizzo * bioq_takefirst() return and remove the head of the queue, 98d4619572SLuigi Rizzo * updating the 'current head position' as 99d4619572SLuigi Rizzo * bioq->last_offset = bio->bio_offset + bio->bio_length; 100d4619572SLuigi Rizzo * 101d4619572SLuigi Rizzo * When updating the 'current head position', we assume that the result of 102d4619572SLuigi Rizzo * bioq_takefirst() is dispatched to the device, so bioq->last_offset 103d4619572SLuigi Rizzo * represents the head position once the request is complete. 104d4619572SLuigi Rizzo * 105d4619572SLuigi Rizzo * If the bioq is manipulated using only the above calls, it starts 106d4619572SLuigi Rizzo * with a sorted sequence of requests with bio_offset >= last_offset, 107d4619572SLuigi Rizzo * possibly followed by another sorted sequence of requests with 108d4619572SLuigi Rizzo * 0 <= bio_offset < bioq->last_offset 109d4619572SLuigi Rizzo * 110d4619572SLuigi Rizzo * NOTE: historical behaviour was to ignore bio->bio_length in the 111d4619572SLuigi Rizzo * update, but its use tracks the head position in a better way. 112d4619572SLuigi Rizzo * Historical behaviour was also to update the head position when 113d4619572SLuigi Rizzo * the request under service is complete, rather than when the 114d4619572SLuigi Rizzo * request is extracted from the queue. However, the current API 115d4619572SLuigi Rizzo * has no method to update the head position; secondly, once 116d4619572SLuigi Rizzo * a request has been submitted to the disk, we have no idea of 117d4619572SLuigi Rizzo * the actual head position, so the final one is our best guess. 118d4619572SLuigi Rizzo * 119d4619572SLuigi Rizzo * --- Direct queue manipulation --- 120d4619572SLuigi Rizzo * 121d4619572SLuigi Rizzo * A bioq uses an underlying TAILQ to store requests, so we also 122d4619572SLuigi Rizzo * export methods to manipulate the TAILQ, in particular: 123d4619572SLuigi Rizzo * 124d4619572SLuigi Rizzo * bioq_insert_tail() insert an entry at the end. 125d4619572SLuigi Rizzo * It also creates a 'barrier' so all subsequent 126d4619572SLuigi Rizzo * insertions through bioq_disksort() will end up 127d4619572SLuigi Rizzo * after this entry; 128d4619572SLuigi Rizzo * 129d4619572SLuigi Rizzo * bioq_insert_head() insert an entry at the head, update 130d4619572SLuigi Rizzo * bioq->last_offset = bio->bio_offset so that 131d4619572SLuigi Rizzo * all subsequent insertions through bioq_disksort() 132d4619572SLuigi Rizzo * will end up after this entry; 133d4619572SLuigi Rizzo * 134d4619572SLuigi Rizzo * bioq_remove() remove a generic element from the queue, act as 135d4619572SLuigi Rizzo * bioq_takefirst() if invoked on the head of the queue. 136d4619572SLuigi Rizzo * 137f03f7a0cSJustin T. Gibbs * The semantic of these methods is the same as the operations 138d4619572SLuigi Rizzo * on the underlying TAILQ, but with additional guarantees on 139d4619572SLuigi Rizzo * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 140d4619572SLuigi Rizzo * can be useful for making sure that all previous ops are flushed 141d4619572SLuigi Rizzo * to disk before continuing. 142d4619572SLuigi Rizzo * 143d4619572SLuigi Rizzo * Updating bioq->last_offset on a bioq_insert_head() guarantees 144d4619572SLuigi Rizzo * that the bio inserted with the last bioq_insert_head() will stay 145d4619572SLuigi Rizzo * at the head of the queue even after subsequent bioq_disksort(). 146d4619572SLuigi Rizzo * 147d4619572SLuigi Rizzo * Note that when the direct queue manipulation functions are used, 148d4619572SLuigi Rizzo * the queue may contain multiple inversion points (i.e. more than 149d4619572SLuigi Rizzo * two sorted sequences of requests). 150d4619572SLuigi Rizzo * 151d086f85aSPoul-Henning Kamp */ 152d086f85aSPoul-Henning Kamp 153d086f85aSPoul-Henning Kamp void 154d086f85aSPoul-Henning Kamp bioq_init(struct bio_queue_head *head) 155d086f85aSPoul-Henning Kamp { 156d4619572SLuigi Rizzo 157d086f85aSPoul-Henning Kamp TAILQ_INIT(&head->queue); 1584cb4df48SPoul-Henning Kamp head->last_offset = 0; 159d086f85aSPoul-Henning Kamp head->insert_point = NULL; 160a971acbcSWarner Losh head->total = 0; 161a971acbcSWarner Losh head->batched = 0; 162d086f85aSPoul-Henning Kamp } 163d086f85aSPoul-Henning Kamp 164d086f85aSPoul-Henning Kamp void 165d086f85aSPoul-Henning Kamp bioq_remove(struct bio_queue_head *head, struct bio *bp) 166d086f85aSPoul-Henning Kamp { 167d4619572SLuigi Rizzo 168f03f7a0cSJustin T. Gibbs if (head->insert_point == NULL) { 169d4619572SLuigi Rizzo if (bp == TAILQ_FIRST(&head->queue)) 170d4619572SLuigi Rizzo head->last_offset = bp->bio_offset + bp->bio_length; 171f03f7a0cSJustin T. Gibbs } else if (bp == head->insert_point) 172d4619572SLuigi Rizzo head->insert_point = NULL; 173d4619572SLuigi Rizzo 174d086f85aSPoul-Henning Kamp TAILQ_REMOVE(&head->queue, bp, bio_queue); 175a971acbcSWarner Losh head->total--; 176d086f85aSPoul-Henning Kamp } 177af6ca7f4SPoul-Henning Kamp 178af6ca7f4SPoul-Henning Kamp void 179af6ca7f4SPoul-Henning Kamp bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 180af6ca7f4SPoul-Henning Kamp { 181af6ca7f4SPoul-Henning Kamp struct bio *bp; 182af6ca7f4SPoul-Henning Kamp 183d298f919SPoul-Henning Kamp while ((bp = bioq_takefirst(head)) != NULL) 184b8404473SPoul-Henning Kamp biofinish(bp, stp, error); 185af6ca7f4SPoul-Henning Kamp } 186af6ca7f4SPoul-Henning Kamp 187d086f85aSPoul-Henning Kamp void 188bf484316SPawel Jakub Dawidek bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 189bf484316SPawel Jakub Dawidek { 190bf484316SPawel Jakub Dawidek 191f03f7a0cSJustin T. Gibbs if (head->insert_point == NULL) 192d4619572SLuigi Rizzo head->last_offset = bp->bio_offset; 193bf484316SPawel Jakub Dawidek TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 194a971acbcSWarner Losh head->total++; 195a971acbcSWarner Losh head->batched = 0; 196bf484316SPawel Jakub Dawidek } 197bf484316SPawel Jakub Dawidek 198bf484316SPawel Jakub Dawidek void 199d086f85aSPoul-Henning Kamp bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 200d086f85aSPoul-Henning Kamp { 201d086f85aSPoul-Henning Kamp 202d086f85aSPoul-Henning Kamp TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 203a971acbcSWarner Losh head->total++; 204d4619572SLuigi Rizzo head->insert_point = bp; 205f03f7a0cSJustin T. Gibbs head->last_offset = bp->bio_offset; 206d086f85aSPoul-Henning Kamp } 207d086f85aSPoul-Henning Kamp 208d086f85aSPoul-Henning Kamp struct bio * 209d086f85aSPoul-Henning Kamp bioq_first(struct bio_queue_head *head) 210d086f85aSPoul-Henning Kamp { 211d086f85aSPoul-Henning Kamp 212d086f85aSPoul-Henning Kamp return (TAILQ_FIRST(&head->queue)); 213d086f85aSPoul-Henning Kamp } 214d086f85aSPoul-Henning Kamp 215d298f919SPoul-Henning Kamp struct bio * 216d298f919SPoul-Henning Kamp bioq_takefirst(struct bio_queue_head *head) 217d298f919SPoul-Henning Kamp { 218d298f919SPoul-Henning Kamp struct bio *bp; 219d298f919SPoul-Henning Kamp 220d298f919SPoul-Henning Kamp bp = TAILQ_FIRST(&head->queue); 221d298f919SPoul-Henning Kamp if (bp != NULL) 222d298f919SPoul-Henning Kamp bioq_remove(head, bp); 223d298f919SPoul-Henning Kamp return (bp); 224d298f919SPoul-Henning Kamp } 225d086f85aSPoul-Henning Kamp 226d086f85aSPoul-Henning Kamp /* 227d4619572SLuigi Rizzo * Compute the sorting key. The cast to unsigned is 228d4619572SLuigi Rizzo * fundamental for correctness, see the description 229d4619572SLuigi Rizzo * near the beginning of the file. 2302382fb0aSPoul-Henning Kamp */ 231d4619572SLuigi Rizzo static inline uoff_t 232d4619572SLuigi Rizzo bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 2332382fb0aSPoul-Henning Kamp { 234d4619572SLuigi Rizzo 235d4619572SLuigi Rizzo return ((uoff_t)(bp->bio_offset - head->last_offset)); 236d4619572SLuigi Rizzo } 2372382fb0aSPoul-Henning Kamp 2382382fb0aSPoul-Henning Kamp /* 239d4619572SLuigi Rizzo * Seek sort for disks. 240d4619572SLuigi Rizzo * 241d4619572SLuigi Rizzo * Sort all requests in a single queue while keeping 242d4619572SLuigi Rizzo * track of the current position of the disk with last_offset. 243d4619572SLuigi Rizzo * See above for details. 2442382fb0aSPoul-Henning Kamp */ 245d4619572SLuigi Rizzo void 246d4619572SLuigi Rizzo bioq_disksort(struct bio_queue_head *head, struct bio *bp) 247d4619572SLuigi Rizzo { 248f03f7a0cSJustin T. Gibbs struct bio *cur, *prev; 249f03f7a0cSJustin T. Gibbs uoff_t key; 250d4619572SLuigi Rizzo 251f03f7a0cSJustin T. Gibbs if ((bp->bio_flags & BIO_ORDERED) != 0) { 252f03f7a0cSJustin T. Gibbs /* 253f03f7a0cSJustin T. Gibbs * Ordered transactions can only be dispatched 254f03f7a0cSJustin T. Gibbs * after any currently queued transactions. They 255f03f7a0cSJustin T. Gibbs * also have barrier semantics - no transactions 256f03f7a0cSJustin T. Gibbs * queued in the future can pass them. 257f03f7a0cSJustin T. Gibbs */ 258f03f7a0cSJustin T. Gibbs bioq_insert_tail(head, bp); 259f03f7a0cSJustin T. Gibbs return; 260f03f7a0cSJustin T. Gibbs } 261f03f7a0cSJustin T. Gibbs 262*6afd9210SAlexander Motin /* 263*6afd9210SAlexander Motin * We should only sort requests of types that have concept of offset. 264*6afd9210SAlexander Motin * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree 265*6afd9210SAlexander Motin * of ordering even if strict ordering is not requested explicitly. 266*6afd9210SAlexander Motin */ 267*6afd9210SAlexander Motin if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && 268*6afd9210SAlexander Motin bp->bio_cmd != BIO_DELETE) { 269*6afd9210SAlexander Motin bioq_insert_tail(head, bp); 270*6afd9210SAlexander Motin return; 271*6afd9210SAlexander Motin } 272*6afd9210SAlexander Motin 273a971acbcSWarner Losh if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { 274a971acbcSWarner Losh bioq_insert_tail(head, bp); 275a971acbcSWarner Losh return; 276a971acbcSWarner Losh } 277a971acbcSWarner Losh 278f03f7a0cSJustin T. Gibbs prev = NULL; 279f03f7a0cSJustin T. Gibbs key = bioq_bio_key(head, bp); 280d4619572SLuigi Rizzo cur = TAILQ_FIRST(&head->queue); 281d4619572SLuigi Rizzo 282f03f7a0cSJustin T. Gibbs if (head->insert_point) { 283f03f7a0cSJustin T. Gibbs prev = head->insert_point; 284f03f7a0cSJustin T. Gibbs cur = TAILQ_NEXT(head->insert_point, bio_queue); 285f03f7a0cSJustin T. Gibbs } 286d4619572SLuigi Rizzo 287d4619572SLuigi Rizzo while (cur != NULL && key >= bioq_bio_key(head, cur)) { 288d4619572SLuigi Rizzo prev = cur; 289d4619572SLuigi Rizzo cur = TAILQ_NEXT(cur, bio_queue); 2902382fb0aSPoul-Henning Kamp } 291d4619572SLuigi Rizzo 292d4619572SLuigi Rizzo if (prev == NULL) 293d4619572SLuigi Rizzo TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 294d4619572SLuigi Rizzo else 295d4619572SLuigi Rizzo TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 296a971acbcSWarner Losh head->total++; 297a971acbcSWarner Losh head->batched++; 2982382fb0aSPoul-Henning Kamp } 299