1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * The bioq_disksort() (and the specification of the bioq API) 12 * have been written by Luigi Rizzo and Fabio Checconi under the same 13 * license as above. 14 */ 15 16 #include <sys/cdefs.h> 17 __FBSDID("$FreeBSD$"); 18 19 #include "opt_geom.h" 20 21 #include <sys/param.h> 22 #include <sys/systm.h> 23 #include <sys/bio.h> 24 #include <sys/conf.h> 25 #include <sys/disk.h> 26 #include <sys/sysctl.h> 27 #include <geom/geom_disk.h> 28 29 static int bioq_batchsize = 128; 30 SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, 31 &bioq_batchsize, 0, "BIOQ batch size"); 32 33 /*- 34 * Disk error is the preface to plaintive error messages 35 * about failing disk transfers. It prints messages of the form 36 * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 37 * blkdone should be -1 if the position of the error is unknown. 38 * The message is printed with printf. 39 */ 40 void 41 disk_err(struct bio *bp, const char *what, int blkdone, int nl) 42 { 43 daddr_t sn; 44 45 if (bp->bio_dev != NULL) 46 printf("%s: %s ", devtoname(bp->bio_dev), what); 47 else if (bp->bio_disk != NULL) 48 printf("%s%d: %s ", 49 bp->bio_disk->d_name, bp->bio_disk->d_unit, what); 50 else 51 printf("disk??: %s ", what); 52 switch(bp->bio_cmd) { 53 case BIO_READ: printf("cmd=read "); break; 54 case BIO_WRITE: printf("cmd=write "); break; 55 case BIO_DELETE: printf("cmd=delete "); break; 56 case BIO_GETATTR: printf("cmd=getattr "); break; 57 case BIO_FLUSH: printf("cmd=flush "); break; 58 default: printf("cmd=%x ", bp->bio_cmd); break; 59 } 60 sn = bp->bio_pblkno; 61 if (bp->bio_bcount <= DEV_BSIZE) { 62 printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 63 return; 64 } 65 if (blkdone >= 0) { 66 sn += blkdone; 67 printf("fsbn %jd of ", (intmax_t)sn); 68 } 69 printf("%jd-%jd", (intmax_t)bp->bio_pblkno, 70 (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 71 if (nl) 72 printf("\n"); 73 } 74 75 /* 76 * BIO queue implementation 77 * 78 * Please read carefully the description below before making any change 79 * to the code, or you might change the behaviour of the data structure 80 * in undesirable ways. 81 * 82 * A bioq stores disk I/O request (bio), normally sorted according to 83 * the distance of the requested position (bio->bio_offset) from the 84 * current head position (bioq->last_offset) in the scan direction, i.e. 85 * 86 * (uoff_t)(bio_offset - last_offset) 87 * 88 * Note that the cast to unsigned (uoff_t) is fundamental to insure 89 * that the distance is computed in the scan direction. 90 * 91 * The main methods for manipulating the bioq are: 92 * 93 * bioq_disksort() performs an ordered insertion; 94 * 95 * bioq_first() return the head of the queue, without removing; 96 * 97 * bioq_takefirst() return and remove the head of the queue, 98 * updating the 'current head position' as 99 * bioq->last_offset = bio->bio_offset + bio->bio_length; 100 * 101 * When updating the 'current head position', we assume that the result of 102 * bioq_takefirst() is dispatched to the device, so bioq->last_offset 103 * represents the head position once the request is complete. 104 * 105 * If the bioq is manipulated using only the above calls, it starts 106 * with a sorted sequence of requests with bio_offset >= last_offset, 107 * possibly followed by another sorted sequence of requests with 108 * 0 <= bio_offset < bioq->last_offset 109 * 110 * NOTE: historical behaviour was to ignore bio->bio_length in the 111 * update, but its use tracks the head position in a better way. 112 * Historical behaviour was also to update the head position when 113 * the request under service is complete, rather than when the 114 * request is extracted from the queue. However, the current API 115 * has no method to update the head position; secondly, once 116 * a request has been submitted to the disk, we have no idea of 117 * the actual head position, so the final one is our best guess. 118 * 119 * --- Direct queue manipulation --- 120 * 121 * A bioq uses an underlying TAILQ to store requests, so we also 122 * export methods to manipulate the TAILQ, in particular: 123 * 124 * bioq_insert_tail() insert an entry at the end. 125 * It also creates a 'barrier' so all subsequent 126 * insertions through bioq_disksort() will end up 127 * after this entry; 128 * 129 * bioq_insert_head() insert an entry at the head, update 130 * bioq->last_offset = bio->bio_offset so that 131 * all subsequent insertions through bioq_disksort() 132 * will end up after this entry; 133 * 134 * bioq_remove() remove a generic element from the queue, act as 135 * bioq_takefirst() if invoked on the head of the queue. 136 * 137 * The semantic of these methods is the same as the operations 138 * on the underlying TAILQ, but with additional guarantees on 139 * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 140 * can be useful for making sure that all previous ops are flushed 141 * to disk before continuing. 142 * 143 * Updating bioq->last_offset on a bioq_insert_head() guarantees 144 * that the bio inserted with the last bioq_insert_head() will stay 145 * at the head of the queue even after subsequent bioq_disksort(). 146 * 147 * Note that when the direct queue manipulation functions are used, 148 * the queue may contain multiple inversion points (i.e. more than 149 * two sorted sequences of requests). 150 * 151 */ 152 153 void 154 bioq_init(struct bio_queue_head *head) 155 { 156 157 TAILQ_INIT(&head->queue); 158 head->last_offset = 0; 159 head->insert_point = NULL; 160 head->total = 0; 161 head->batched = 0; 162 } 163 164 void 165 bioq_remove(struct bio_queue_head *head, struct bio *bp) 166 { 167 168 if (head->insert_point == NULL) { 169 if (bp == TAILQ_FIRST(&head->queue)) 170 head->last_offset = bp->bio_offset + bp->bio_length; 171 } else if (bp == head->insert_point) 172 head->insert_point = NULL; 173 174 TAILQ_REMOVE(&head->queue, bp, bio_queue); 175 if (TAILQ_EMPTY(&head->queue)) 176 head->batched = 0; 177 head->total--; 178 } 179 180 void 181 bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 182 { 183 struct bio *bp; 184 185 while ((bp = bioq_takefirst(head)) != NULL) 186 biofinish(bp, stp, error); 187 } 188 189 void 190 bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 191 { 192 193 if (head->insert_point == NULL) 194 head->last_offset = bp->bio_offset; 195 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 196 head->total++; 197 head->batched = 0; 198 } 199 200 void 201 bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 202 { 203 204 TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 205 head->total++; 206 head->batched = 0; 207 head->insert_point = bp; 208 head->last_offset = bp->bio_offset; 209 } 210 211 struct bio * 212 bioq_first(struct bio_queue_head *head) 213 { 214 215 return (TAILQ_FIRST(&head->queue)); 216 } 217 218 struct bio * 219 bioq_takefirst(struct bio_queue_head *head) 220 { 221 struct bio *bp; 222 223 bp = TAILQ_FIRST(&head->queue); 224 if (bp != NULL) 225 bioq_remove(head, bp); 226 return (bp); 227 } 228 229 /* 230 * Compute the sorting key. The cast to unsigned is 231 * fundamental for correctness, see the description 232 * near the beginning of the file. 233 */ 234 static inline uoff_t 235 bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 236 { 237 238 return ((uoff_t)(bp->bio_offset - head->last_offset)); 239 } 240 241 /* 242 * Seek sort for disks. 243 * 244 * Sort all requests in a single queue while keeping 245 * track of the current position of the disk with last_offset. 246 * See above for details. 247 */ 248 void 249 bioq_disksort(struct bio_queue_head *head, struct bio *bp) 250 { 251 struct bio *cur, *prev; 252 uoff_t key; 253 254 if ((bp->bio_flags & BIO_ORDERED) != 0) { 255 /* 256 * Ordered transactions can only be dispatched 257 * after any currently queued transactions. They 258 * also have barrier semantics - no transactions 259 * queued in the future can pass them. 260 */ 261 bioq_insert_tail(head, bp); 262 return; 263 } 264 265 /* 266 * We should only sort requests of types that have concept of offset. 267 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree 268 * of ordering even if strict ordering is not requested explicitly. 269 */ 270 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && 271 bp->bio_cmd != BIO_DELETE) { 272 bioq_insert_tail(head, bp); 273 return; 274 } 275 276 if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { 277 bioq_insert_tail(head, bp); 278 return; 279 } 280 281 prev = NULL; 282 key = bioq_bio_key(head, bp); 283 cur = TAILQ_FIRST(&head->queue); 284 285 if (head->insert_point) { 286 prev = head->insert_point; 287 cur = TAILQ_NEXT(head->insert_point, bio_queue); 288 } 289 290 while (cur != NULL && key >= bioq_bio_key(head, cur)) { 291 prev = cur; 292 cur = TAILQ_NEXT(cur, bio_queue); 293 } 294 295 if (prev == NULL) 296 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 297 else 298 TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 299 head->total++; 300 head->batched++; 301 } 302