1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * The bioq_disksort() (and the specification of the bioq API) 12 * have been written by Luigi Rizzo and Fabio Checconi under the same 13 * license as above. 14 */ 15 16 #include <sys/cdefs.h> 17 __FBSDID("$FreeBSD$"); 18 19 #include "opt_geom.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/bio.h> 24 #include <sys/conf.h> 25 #include <sys/disk.h> 26 #include <sys/sysctl.h> 27 #include <geom/geom_disk.h> 28 29 static int bioq_batchsize = 0; 30 SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, 31 &bioq_batchsize, 0, "BIOQ batch size"); 32 33 /*- 34 * Disk error is the preface to plaintive error messages 35 * about failing disk transfers. It prints messages of the form 36 * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 37 * blkdone should be -1 if the position of the error is unknown. 38 * The message is printed with printf. 39 */ 40 void 41 disk_err(struct bio *bp, const char *what, int blkdone, int nl) 42 { 43 daddr_t sn; 44 45 if (bp->bio_dev != NULL) 46 printf("%s: %s ", devtoname(bp->bio_dev), what); 47 else if (bp->bio_disk != NULL) 48 printf("%s%d: %s ", 49 bp->bio_disk->d_name, bp->bio_disk->d_unit, what); 50 else 51 printf("disk??: %s ", what); 52 switch(bp->bio_cmd) { 53 case BIO_READ: printf("cmd=read "); break; 54 case BIO_WRITE: printf("cmd=write "); break; 55 case BIO_DELETE: printf("cmd=delete "); break; 56 case BIO_GETATTR: printf("cmd=getattr "); break; 57 case BIO_FLUSH: printf("cmd=flush "); break; 58 default: printf("cmd=%x ", bp->bio_cmd); break; 59 } 60 sn = bp->bio_pblkno; 61 if (bp->bio_bcount <= DEV_BSIZE) { 62 printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 63 return; 64 } 65 if (blkdone >= 0) { 66 sn += blkdone; 67 printf("fsbn %jd of ", (intmax_t)sn); 68 } 69 printf("%jd-%jd", (intmax_t)bp->bio_pblkno, 70 (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 71 if (nl) 72 printf("\n"); 73 } 74 75 /* 76 * BIO queue implementation 77 * 78 * Please read carefully the description below before making any change 79 * to the code, or you might change the behaviour of the data structure 80 * in undesirable ways. 81 * 82 * A bioq stores disk I/O request (bio), normally sorted according to 83 * the distance of the requested position (bio->bio_offset) from the 84 * current head position (bioq->last_offset) in the scan direction, i.e. 85 * 86 * (uoff_t)(bio_offset - last_offset) 87 * 88 * Note that the cast to unsigned (uoff_t) is fundamental to insure 89 * that the distance is computed in the scan direction. 90 * 91 * The main methods for manipulating the bioq are: 92 * 93 * bioq_disksort() performs an ordered insertion; 94 * 95 * bioq_first() return the head of the queue, without removing; 96 * 97 * bioq_takefirst() return and remove the head of the queue, 98 * updating the 'current head position' as 99 * bioq->last_offset = bio->bio_offset + bio->bio_length; 100 * 101 * When updating the 'current head position', we assume that the result of 102 * bioq_takefirst() is dispatched to the device, so bioq->last_offset 103 * represents the head position once the request is complete. 104 * 105 * If the bioq is manipulated using only the above calls, it starts 106 * with a sorted sequence of requests with bio_offset >= last_offset, 107 * possibly followed by another sorted sequence of requests with 108 * 0 <= bio_offset < bioq->last_offset 109 * 110 * NOTE: historical behaviour was to ignore bio->bio_length in the 111 * update, but its use tracks the head position in a better way. 112 * Historical behaviour was also to update the head position when 113 * the request under service is complete, rather than when the 114 * request is extracted from the queue. However, the current API 115 * has no method to update the head position; secondly, once 116 * a request has been submitted to the disk, we have no idea of 117 * the actual head position, so the final one is our best guess. 118 * 119 * --- Direct queue manipulation --- 120 * 121 * A bioq uses an underlying TAILQ to store requests, so we also 122 * export methods to manipulate the TAILQ, in particular: 123 * 124 * bioq_insert_tail() insert an entry at the end. 125 * It also creates a 'barrier' so all subsequent 126 * insertions through bioq_disksort() will end up 127 * after this entry; 128 * 129 * bioq_insert_head() insert an entry at the head, update 130 * bioq->last_offset = bio->bio_offset so that 131 * all subsequent insertions through bioq_disksort() 132 * will end up after this entry; 133 * 134 * bioq_remove() remove a generic element from the queue, act as 135 * bioq_takefirst() if invoked on the head of the queue. 136 * 137 * The semantic of these methods is the same as the operations 138 * on the underlying TAILQ, but with additional guarantees on 139 * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 140 * can be useful for making sure that all previous ops are flushed 141 * to disk before continuing. 142 * 143 * Updating bioq->last_offset on a bioq_insert_head() guarantees 144 * that the bio inserted with the last bioq_insert_head() will stay 145 * at the head of the queue even after subsequent bioq_disksort(). 146 * 147 * Note that when the direct queue manipulation functions are used, 148 * the queue may contain multiple inversion points (i.e. more than 149 * two sorted sequences of requests). 150 * 151 */ 152 153 void 154 bioq_init(struct bio_queue_head *head) 155 { 156 157 TAILQ_INIT(&head->queue); 158 head->last_offset = 0; 159 head->insert_point = NULL; 160 head->total = 0; 161 head->batched = 0; 162 } 163 164 void 165 bioq_remove(struct bio_queue_head *head, struct bio *bp) 166 { 167 168 if (head->insert_point == NULL) { 169 if (bp == TAILQ_FIRST(&head->queue)) 170 head->last_offset = bp->bio_offset + bp->bio_length; 171 } else if (bp == head->insert_point) 172 head->insert_point = NULL; 173 174 TAILQ_REMOVE(&head->queue, bp, bio_queue); 175 head->total--; 176 } 177 178 void 179 bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 180 { 181 struct bio *bp; 182 183 while ((bp = bioq_takefirst(head)) != NULL) 184 biofinish(bp, stp, error); 185 } 186 187 void 188 bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 189 { 190 191 if (head->insert_point == NULL) 192 head->last_offset = bp->bio_offset; 193 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 194 head->total++; 195 head->batched = 0; 196 } 197 198 void 199 bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 200 { 201 202 TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 203 head->total++; 204 head->batched = 0; 205 head->insert_point = bp; 206 head->last_offset = bp->bio_offset; 207 } 208 209 struct bio * 210 bioq_first(struct bio_queue_head *head) 211 { 212 213 return (TAILQ_FIRST(&head->queue)); 214 } 215 216 struct bio * 217 bioq_takefirst(struct bio_queue_head *head) 218 { 219 struct bio *bp; 220 221 bp = TAILQ_FIRST(&head->queue); 222 if (bp != NULL) 223 bioq_remove(head, bp); 224 return (bp); 225 } 226 227 /* 228 * Compute the sorting key. The cast to unsigned is 229 * fundamental for correctness, see the description 230 * near the beginning of the file. 231 */ 232 static inline uoff_t 233 bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 234 { 235 236 return ((uoff_t)(bp->bio_offset - head->last_offset)); 237 } 238 239 /* 240 * Seek sort for disks. 241 * 242 * Sort all requests in a single queue while keeping 243 * track of the current position of the disk with last_offset. 244 * See above for details. 245 */ 246 void 247 bioq_disksort(struct bio_queue_head *head, struct bio *bp) 248 { 249 struct bio *cur, *prev; 250 uoff_t key; 251 252 if ((bp->bio_flags & BIO_ORDERED) != 0) { 253 /* 254 * Ordered transactions can only be dispatched 255 * after any currently queued transactions. They 256 * also have barrier semantics - no transactions 257 * queued in the future can pass them. 258 */ 259 bioq_insert_tail(head, bp); 260 return; 261 } 262 263 /* 264 * We should only sort requests of types that have concept of offset. 265 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree 266 * of ordering even if strict ordering is not requested explicitly. 267 */ 268 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && 269 bp->bio_cmd != BIO_DELETE) { 270 bioq_insert_tail(head, bp); 271 return; 272 } 273 274 if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { 275 bioq_insert_tail(head, bp); 276 return; 277 } 278 279 prev = NULL; 280 key = bioq_bio_key(head, bp); 281 cur = TAILQ_FIRST(&head->queue); 282 283 if (head->insert_point) { 284 prev = head->insert_point; 285 cur = TAILQ_NEXT(head->insert_point, bio_queue); 286 } 287 288 while (cur != NULL && key >= bioq_bio_key(head, cur)) { 289 prev = cur; 290 cur = TAILQ_NEXT(cur, bio_queue); 291 } 292 293 if (prev == NULL) 294 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 295 else 296 TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 297 head->total++; 298 head->batched++; 299 } 300