1 /*- 2 * SPDX-License-Identifier: Beerware 3 * 4 * ---------------------------------------------------------------------------- 5 * "THE BEER-WARE LICENSE" (Revision 42): 6 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 7 * can do whatever you want with this stuff. If we meet some day, and you think 8 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 9 * ---------------------------------------------------------------------------- 10 * 11 * The bioq_disksort() (and the specification of the bioq API) 12 * have been written by Luigi Rizzo and Fabio Checconi under the same 13 * license as above. 14 */ 15 16 #include <sys/cdefs.h> 17 #include "opt_geom.h" 18 19 #include <sys/param.h> 20 #include <sys/systm.h> 21 #include <sys/bio.h> 22 #include <sys/conf.h> 23 #include <sys/disk.h> 24 #include <sys/sysctl.h> 25 #include <geom/geom_disk.h> 26 27 static int bioq_batchsize = 128; 28 SYSCTL_INT(_debug, OID_AUTO, bioq_batchsize, CTLFLAG_RW, 29 &bioq_batchsize, 0, "BIOQ batch size"); 30 31 /*- 32 * Disk error is the preface to plaintive error messages 33 * about failing disk transfers. It prints messages of the form 34 * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 35 * blkdone should be -1 if the position of the error is unknown. 36 * The message is printed with printf. 37 */ 38 void 39 disk_err(struct bio *bp, const char *what, int blkdone, int nl) 40 { 41 daddr_t sn; 42 43 if (bp->bio_dev != NULL) 44 printf("%s: %s ", devtoname(bp->bio_dev), what); 45 else if (bp->bio_disk != NULL) 46 printf("%s%d: %s ", 47 bp->bio_disk->d_name, bp->bio_disk->d_unit, what); 48 else 49 printf("disk??: %s ", what); 50 switch(bp->bio_cmd) { 51 case BIO_READ: printf("cmd=read "); break; 52 case BIO_WRITE: printf("cmd=write "); break; 53 case BIO_DELETE: printf("cmd=delete "); break; 54 case BIO_GETATTR: printf("cmd=getattr "); break; 55 case BIO_FLUSH: printf("cmd=flush "); break; 56 default: printf("cmd=%x ", bp->bio_cmd); break; 57 } 58 sn = bp->bio_pblkno; 59 if (bp->bio_bcount <= DEV_BSIZE) { 60 printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 61 return; 62 } 63 if (blkdone >= 0) { 64 sn += blkdone; 65 printf("fsbn %jd of ", (intmax_t)sn); 66 } 67 printf("%jd-%jd", (intmax_t)bp->bio_pblkno, 68 (intmax_t)(bp->bio_pblkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 69 if (nl) 70 printf("\n"); 71 } 72 73 /* 74 * BIO queue implementation 75 * 76 * Please read carefully the description below before making any change 77 * to the code, or you might change the behaviour of the data structure 78 * in undesirable ways. 79 * 80 * A bioq stores disk I/O request (bio), normally sorted according to 81 * the distance of the requested position (bio->bio_offset) from the 82 * current head position (bioq->last_offset) in the scan direction, i.e. 83 * 84 * (uoff_t)(bio_offset - last_offset) 85 * 86 * Note that the cast to unsigned (uoff_t) is fundamental to insure 87 * that the distance is computed in the scan direction. 88 * 89 * The main methods for manipulating the bioq are: 90 * 91 * bioq_disksort() performs an ordered insertion; 92 * 93 * bioq_first() return the head of the queue, without removing; 94 * 95 * bioq_takefirst() return and remove the head of the queue, 96 * updating the 'current head position' as 97 * bioq->last_offset = bio->bio_offset + bio->bio_length; 98 * 99 * When updating the 'current head position', we assume that the result of 100 * bioq_takefirst() is dispatched to the device, so bioq->last_offset 101 * represents the head position once the request is complete. 102 * 103 * If the bioq is manipulated using only the above calls, it starts 104 * with a sorted sequence of requests with bio_offset >= last_offset, 105 * possibly followed by another sorted sequence of requests with 106 * 0 <= bio_offset < bioq->last_offset 107 * 108 * NOTE: historical behaviour was to ignore bio->bio_length in the 109 * update, but its use tracks the head position in a better way. 110 * Historical behaviour was also to update the head position when 111 * the request under service is complete, rather than when the 112 * request is extracted from the queue. However, the current API 113 * has no method to update the head position; secondly, once 114 * a request has been submitted to the disk, we have no idea of 115 * the actual head position, so the final one is our best guess. 116 * 117 * --- Direct queue manipulation --- 118 * 119 * A bioq uses an underlying TAILQ to store requests, so we also 120 * export methods to manipulate the TAILQ, in particular: 121 * 122 * bioq_insert_tail() insert an entry at the end. 123 * It also creates a 'barrier' so all subsequent 124 * insertions through bioq_disksort() will end up 125 * after this entry; 126 * 127 * bioq_insert_head() insert an entry at the head, update 128 * bioq->last_offset = bio->bio_offset so that 129 * all subsequent insertions through bioq_disksort() 130 * will end up after this entry; 131 * 132 * bioq_remove() remove a generic element from the queue, act as 133 * bioq_takefirst() if invoked on the head of the queue. 134 * 135 * The semantic of these methods is the same as the operations 136 * on the underlying TAILQ, but with additional guarantees on 137 * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() 138 * can be useful for making sure that all previous ops are flushed 139 * to disk before continuing. 140 * 141 * Updating bioq->last_offset on a bioq_insert_head() guarantees 142 * that the bio inserted with the last bioq_insert_head() will stay 143 * at the head of the queue even after subsequent bioq_disksort(). 144 * 145 * Note that when the direct queue manipulation functions are used, 146 * the queue may contain multiple inversion points (i.e. more than 147 * two sorted sequences of requests). 148 * 149 */ 150 151 void 152 bioq_init(struct bio_queue_head *head) 153 { 154 155 TAILQ_INIT(&head->queue); 156 head->last_offset = 0; 157 head->insert_point = NULL; 158 head->total = 0; 159 head->batched = 0; 160 } 161 162 void 163 bioq_remove(struct bio_queue_head *head, struct bio *bp) 164 { 165 166 if (head->insert_point == NULL) { 167 if (bp == TAILQ_FIRST(&head->queue)) 168 head->last_offset = bp->bio_offset + bp->bio_length; 169 } else if (bp == head->insert_point) 170 head->insert_point = NULL; 171 172 TAILQ_REMOVE(&head->queue, bp, bio_queue); 173 if (TAILQ_EMPTY(&head->queue)) 174 head->batched = 0; 175 head->total--; 176 } 177 178 void 179 bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) 180 { 181 struct bio *bp; 182 183 while ((bp = bioq_takefirst(head)) != NULL) 184 biofinish(bp, stp, error); 185 } 186 187 void 188 bioq_insert_head(struct bio_queue_head *head, struct bio *bp) 189 { 190 191 if (head->insert_point == NULL) 192 head->last_offset = bp->bio_offset; 193 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 194 head->total++; 195 head->batched = 0; 196 } 197 198 void 199 bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) 200 { 201 202 TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); 203 head->total++; 204 head->batched = 0; 205 head->insert_point = bp; 206 head->last_offset = bp->bio_offset; 207 } 208 209 struct bio * 210 bioq_first(struct bio_queue_head *head) 211 { 212 213 return (TAILQ_FIRST(&head->queue)); 214 } 215 216 struct bio * 217 bioq_takefirst(struct bio_queue_head *head) 218 { 219 struct bio *bp; 220 221 bp = TAILQ_FIRST(&head->queue); 222 if (bp != NULL) 223 bioq_remove(head, bp); 224 return (bp); 225 } 226 227 /* 228 * Compute the sorting key. The cast to unsigned is 229 * fundamental for correctness, see the description 230 * near the beginning of the file. 231 */ 232 static inline uoff_t 233 bioq_bio_key(struct bio_queue_head *head, struct bio *bp) 234 { 235 236 return ((uoff_t)(bp->bio_offset - head->last_offset)); 237 } 238 239 /* 240 * Seek sort for disks. 241 * 242 * Sort all requests in a single queue while keeping 243 * track of the current position of the disk with last_offset. 244 * See above for details. 245 */ 246 void 247 bioq_disksort(struct bio_queue_head *head, struct bio *bp) 248 { 249 struct bio *cur, *prev; 250 uoff_t key; 251 252 if ((bp->bio_flags & BIO_ORDERED) != 0) { 253 /* 254 * Ordered transactions can only be dispatched 255 * after any currently queued transactions. They 256 * also have barrier semantics - no transactions 257 * queued in the future can pass them. 258 */ 259 bioq_insert_tail(head, bp); 260 return; 261 } 262 263 /* 264 * We should only sort requests of types that have concept of offset. 265 * Other types, such as BIO_FLUSH or BIO_ZONE, may imply some degree 266 * of ordering even if strict ordering is not requested explicitly. 267 */ 268 if (bp->bio_cmd != BIO_READ && bp->bio_cmd != BIO_WRITE && 269 bp->bio_cmd != BIO_DELETE) { 270 bioq_insert_tail(head, bp); 271 return; 272 } 273 274 if (bioq_batchsize > 0 && head->batched > bioq_batchsize) { 275 bioq_insert_tail(head, bp); 276 return; 277 } 278 279 prev = NULL; 280 key = bioq_bio_key(head, bp); 281 cur = TAILQ_FIRST(&head->queue); 282 283 if (head->insert_point) { 284 prev = head->insert_point; 285 cur = TAILQ_NEXT(head->insert_point, bio_queue); 286 } 287 288 while (cur != NULL && key >= bioq_bio_key(head, cur)) { 289 prev = cur; 290 cur = TAILQ_NEXT(cur, bio_queue); 291 } 292 293 if (prev == NULL) 294 TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); 295 else 296 TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); 297 head->total++; 298 head->batched++; 299 } 300