1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * The bioq_disksort() (and the specification of the bioq API) 12 * have been written by Luigi Rizzo and Fabio Checconi under the same 13 * license as above. 14 */ 15 16 #include <sys/cdefs.h> 17 __FBSDID("$FreeBSD$"); 18 19 #include "opt_geom.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/bio.h> 24 #include <sys/conf.h> 25 #include <sys/disk.h> 26 #include <sys/sysctl.h> 27 #include <geom/geom_disk.h> 28 29 static int bioq_batchsize = 0; 30 SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, 31 &bioq_batchsize, 0, "BIOQ batch size"); 32 33 /*- 34 * Disk error is the preface to plaintive error messages 35 * about failing disk transfers. It prints messages of the form 36 * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 37 * blkdone should be -1 if the position of the error is unknown. 38 * The message is printed with printf. 39 */ 40 void 41 disk_err(struct bio *bp, const char *what, int blkdone, int nl) 42 { 43 daddr_t sn; 44 45 if (bp->bio_dev != NULL) 46 printf("%s: %s ", devtoname(bp->bio_dev), what); 47 else if (bp->bio_disk != NULL) 48 printf("%s%d: %s ", 49 bp->bio_disk->d_name, bp->bio_disk->d_unit, what); 50 else 51 printf("disk??: %s ", what); 52 switch(bp->bio_cmd) { 53 case BIO_READ: printf("cmd=read "); break; 54 case BIO_WRITE: printf("cmd=write "); break; 55 case BIO_DELETE: printf("cmd=delete "); break; 56 case BIO_GETATTR: printf("cmd=getattr "); break; 57 case BIO_FLUSH: printf("cmd=flush "); break; 58 default: printf("cmd=%x ", bp->bio_cmd); break; 59 } 60 sn = bp->bio_pblkno; 61 if (bp->bio_bcount <= DEV_BSIZE) { 62 printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 63 return; 64 } 65 if (blkdone >= 0) { 66 sn += blkdone; 67 printf("fsbn %jd of ", (intmax_t)sn); 68 } 69 printf("%jd-%jd", (intmax_t)bp->bio_pblkno, 70 (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 71 if (nl) 72 printf("\n"); 73 } 74 75 /* 76 * BIO queue implementation 77 * 78 * Please read carefully the description below before making any change 79 * to the code, or you might change the behaviour of the data structure 80 * in undesirable ways. 81 * 82 * A bioq stores disk I/O request (bio), normally sorted according to 83 * the distance of the requested position (bio->bio_offset) from the 84 * current head position (bioq->last_offset) in the scan direction, i.e. 85 * 86 * (uoff_t)(bio_offset - last_offset) 87 * 88 * Note that the cast to unsigned (uoff_t) is fundamental to insure 89 * that the distance is computed in the scan direction. 90 * 91 * The main methods for manipulating the bioq are: 92 * 93 * bioq_disksort() performs an ordered insertion; 94 * 95 * bioq_first() return the head of the queue, without removing; 96 * 97 * bioq_takefirst() return and remove the head of the queue, 98 * updating the 'current head position' as 99 * bioq->last_offset = bio->bio_offset + bio->bio_length; 100 * 101 * When updating the 'current head position', we assume that the result of 102 * bioq_takefirst() is dispatched to the device, so bioq->last_offset 103 * represents the head position once the request is complete. 104 * 105 * If the bioq is manipulated using only the above calls, it starts 106 * with a sorted sequence of requests with bio_offset >= last_offset, 107 * possibly followed by another sorted sequence of requests with 108 * 0 <= bio_offset < bioq->last_offset 109 * 110 * NOTE: historical behaviour was to ignore bio->bio_length in the 111 * update, but its use tracks the head position in a better way. 112 * Historical behaviour was also to update the head position when 113 * the request under service is complete, rather than when the 114 * request is extracted from the queue. However, the current API 115 * has no method to update the head position; secondly, once 116 * a request has been submitted to the disk, we have no idea of 117 * the actual head position, so the final one is our best guess. 118 * 119 * --- Direct queue manipulation --- 120 * 121 * A bioq uses an underlying TAILQ to store requests, so we also 122 * export methods to manipulate the TAILQ, in particular: 123 * 124 * bioq_insert_tail() insert an entry at the end. 125 * It also creates a 'barrier' so all subsequent 126 * insertions through bioq_disksort() will end up 127 * after this entry; 128 * 129 * bioq_insert_head() insert an entry at the head, update 130 * bioq->last_offset = bio->bio_offset so that 131 * all subsequent insertions through bioq_disksort() 132 * will end up after this entry; 133 * 134 * bioq_remove() remove a generic element from the queue, act as 135 * bioq_takefirst() if invoked on the head of the queue. 136 * 137 * The semantic of these methods is the same as the operations 138 * on the underlying TAILQ, but with additional guarantees on 139 * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 140 * can be useful for making sure that all previous ops are flushed 141 * to disk before continuing. 142 * 143 * Updating bioq->last_offset on a bioq_insert_head() guarantees 144 * that the bio inserted with the last bioq_insert_head() will stay 145 * at the head of the queue even after subsequent bioq_disksort(). 146 * 147 * Note that when the direct queue manipulation functions are used, 148 * the queue may contain multiple inversion points (i.e. more than 149 * two sorted sequences of requests). 150 * 151 */ 152 153 void 154 bioq_init(struct bio_queue_head *head) 155 { 156 157 TAILQ_INIT(&head->queue); 158 head->last_offset = 0; 159 head->insert_point = NULL; 160 head->total = 0; 161 head->batched = 0; 162 } 163 164 void 165 bioq_remove(struct bio_queue_head *head, struct bio *bp) 166 { 167 168 if (head->insert_point == NULL) { 169 if (bp == TAILQ_FIRST(&head->queue)) 170 head->last_offset = bp->bio_offset + bp->bio_length; 171 } else if (bp == head->insert_point) 172 head->insert_point = NULL; 173 174 TAILQ_REMOVE(&head->queue, bp, bio_queue); 175 head->total--; 176 } 177 178 void 179 bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 180 { 181 struct bio *bp; 182 183 while ((bp = bioq_takefirst(head)) != NULL) 184 biofinish(bp, stp, error); 185 } 186 187 void 188 bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 189 { 190 191 if (head->insert_point == NULL) 192 head->last_offset = bp->bio_offset; 193 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 194 head->total++; 195 head->batched = 0; 196 } 197 198 void 199 bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 200 { 201 202 TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 203 head->total++; 204 head->insert_point = bp; 205 head->last_offset = bp->bio_offset; 206 } 207 208 struct bio * 209 bioq_first(struct bio_queue_head *head) 210 { 211 212 return (TAILQ_FIRST(&head->queue)); 213 } 214 215 struct bio * 216 bioq_takefirst(struct bio_queue_head *head) 217 { 218 struct bio *bp; 219 220 bp = TAILQ_FIRST(&head->queue); 221 if (bp != NULL) 222 bioq_remove(head, bp); 223 return (bp); 224 } 225 226 /* 227 * Compute the sorting key. The cast to unsigned is 228 * fundamental for correctness, see the description 229 * near the beginning of the file. 230 */ 231 static inline uoff_t 232 bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 233 { 234 235 return ((uoff_t)(bp->bio_offset - head->last_offset)); 236 } 237 238 /* 239 * Seek sort for disks. 240 * 241 * Sort all requests in a single queue while keeping 242 * track of the current position of the disk with last_offset. 243 * See above for details. 244 */ 245 void 246 bioq_disksort(struct bio_queue_head *head, struct bio *bp) 247 { 248 struct bio *cur, *prev; 249 uoff_t key; 250 251 if ((bp->bio_flags & BIO_ORDERED) != 0) { 252 /* 253 * Ordered transactions can only be dispatched 254 * after any currently queued transactions. They 255 * also have barrier semantics - no transactions 256 * queued in the future can pass them. 257 */ 258 bioq_insert_tail(head, bp); 259 return; 260 } 261 262 if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { 263 bioq_insert_tail(head, bp); 264 return; 265 } 266 267 prev = NULL; 268 key = bioq_bio_key(head, bp); 269 cur = TAILQ_FIRST(&head->queue); 270 271 if (head->insert_point) { 272 prev = head->insert_point; 273 cur = TAILQ_NEXT(head->insert_point, bio_queue); 274 } 275 276 while (cur != NULL && key >= bioq_bio_key(head, cur)) { 277 prev = cur; 278 cur = TAILQ_NEXT(cur, bio_queue); 279 } 280 281 if (prev == NULL) 282 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 283 else 284 TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 285 head->total++; 286 head->batched++; 287 } 288