1 /* 2 * ---------------------------------------------------------------------------- 3 * "THE BEER-WARE LICENSE" (Revision 42): 4 * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you 5 * can do whatever you want with this stuff. If we meet some day, and you think 6 * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp 7 * ---------------------------------------------------------------------------- 8 * 9 * $FreeBSD$ 10 * 11 */ 12 13 #include "opt_geom.h" 14 15 #include <sys/param.h> 16 #include <sys/systm.h> 17 #include <sys/stdint.h> 18 #include <sys/bio.h> 19 #include <sys/conf.h> 20 #include <sys/disk.h> 21 #include <sys/disklabel.h> 22 #ifdef NO_GEOM 23 #include <sys/diskslice.h> 24 #include <sys/kernel.h> 25 #include <sys/malloc.h> 26 #include <sys/sysctl.h> 27 #include <machine/md_var.h> 28 #include <sys/ctype.h> 29 30 static MALLOC_DEFINE(M_DISK, "disk", "disk data"); 31 32 static d_strategy_t diskstrategy; 33 static d_open_t diskopen; 34 static d_close_t diskclose; 35 static d_ioctl_t diskioctl; 36 static d_psize_t diskpsize; 37 38 static LIST_HEAD(, disk) disklist = LIST_HEAD_INITIALIZER(&disklist); 39 40 void disk_dev_synth(dev_t dev); 41 42 void 43 disk_dev_synth(dev_t dev) 44 { 45 struct disk *dp; 46 int u, s, p; 47 dev_t pdev; 48 49 if (dksparebits(dev)) 50 return; 51 LIST_FOREACH(dp, &disklist, d_list) { 52 if (major(dev) != dp->d_devsw->d_maj) 53 continue; 54 u = dkunit(dev); 55 p = RAW_PART; 56 s = WHOLE_DISK_SLICE; 57 pdev = makedev(dp->d_devsw->d_maj, dkmakeminor(u, s, p)); 58 if (pdev->si_devsw == NULL) 59 return; /* Probably a unit we don't have */ 60 s = dkslice(dev); 61 p = dkpart(dev); 62 if (s == WHOLE_DISK_SLICE && p == RAW_PART) { 63 /* XXX: actually should not happen */ 64 dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 65 UID_ROOT, GID_OPERATOR, 0640, "%s%d", 66 dp->d_devsw->d_name, u); 67 dev_depends(pdev, dev); 68 return; 69 } 70 if (s == COMPATIBILITY_SLICE) { 71 dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 72 UID_ROOT, GID_OPERATOR, 0640, "%s%d%c", 73 dp->d_devsw->d_name, u, 'a' + p); 74 dev_depends(pdev, dev); 75 return; 76 } 77 if (p != RAW_PART) { 78 dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 79 UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d%c", 80 dp->d_devsw->d_name, u, s - BASE_SLICE + 1, 81 'a' + p); 82 } else { 83 dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 84 UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d", 85 dp->d_devsw->d_name, u, s - BASE_SLICE + 1); 86 make_dev_alias(dev, "%s%ds%dc", 87 dp->d_devsw->d_name, u, s - BASE_SLICE + 1); 88 } 89 dev_depends(pdev, dev); 90 return; 91 } 92 } 93 94 static void 95 disk_clone(void *arg, char *name, int namelen, dev_t *dev) 96 { 97 struct disk *dp; 98 char const *d; 99 char *e; 100 int j, u, s, p; 101 dev_t pdev; 102 103 if (*dev != NODEV) 104 return; 105 106 LIST_FOREACH(dp, &disklist, d_list) { 107 d = dp->d_devsw->d_name; 108 j = dev_stdclone(name, &e, d, &u); 109 if (j == 0) 110 continue; 111 if (u > DKMAXUNIT) 112 continue; 113 p = RAW_PART; 114 s = WHOLE_DISK_SLICE; 115 pdev = makedev(dp->d_devsw->d_maj, dkmakeminor(u, s, p)); 116 if (pdev->si_disk == NULL) 117 continue; 118 if (*e != '\0') { 119 j = dev_stdclone(e, &e, "s", &s); 120 if (j == 0) 121 s = COMPATIBILITY_SLICE; 122 else if (j == 1 || j == 2) 123 s += BASE_SLICE - 1; 124 if (!*e) 125 ; /* ad0s1 case */ 126 else if (e[1] != '\0') 127 return; /* can never be a disk name */ 128 else if (*e < 'a' || *e > 'h') 129 return; /* can never be a disk name */ 130 else 131 p = *e - 'a'; 132 } 133 if (s == WHOLE_DISK_SLICE && p == RAW_PART) { 134 return; 135 } else if (s >= BASE_SLICE && p != RAW_PART) { 136 *dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 137 UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d%c", 138 pdev->si_devsw->d_name, u, s - BASE_SLICE + 1, 139 p + 'a'); 140 } else if (s >= BASE_SLICE) { 141 *dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 142 UID_ROOT, GID_OPERATOR, 0640, "%s%ds%d", 143 pdev->si_devsw->d_name, u, s - BASE_SLICE + 1); 144 make_dev_alias(*dev, "%s%ds%dc", 145 pdev->si_devsw->d_name, u, s - BASE_SLICE + 1); 146 } else { 147 *dev = make_dev(pdev->si_devsw, dkmakeminor(u, s, p), 148 UID_ROOT, GID_OPERATOR, 0640, "%s%d%c", 149 pdev->si_devsw->d_name, u, p + 'a'); 150 } 151 dev_depends(pdev, *dev); 152 return; 153 } 154 } 155 156 static void 157 inherit_raw(dev_t pdev, dev_t dev) 158 { 159 dev->si_disk = pdev->si_disk; 160 dev->si_drv1 = pdev->si_drv1; 161 dev->si_drv2 = pdev->si_drv2; 162 dev->si_iosize_max = pdev->si_iosize_max; 163 dev->si_bsize_phys = pdev->si_bsize_phys; 164 dev->si_bsize_best = pdev->si_bsize_best; 165 } 166 167 dev_t 168 disk_create(int unit, struct disk *dp, int flags, struct cdevsw *cdevsw, struct cdevsw *proto) 169 { 170 static int once; 171 dev_t dev; 172 173 if (!once) { 174 EVENTHANDLER_REGISTER(dev_clone, disk_clone, 0, 1000); 175 once++; 176 } 177 178 bzero(dp, sizeof(*dp)); 179 dp->d_label = malloc(sizeof *dp->d_label, M_DEVBUF, M_ZERO); 180 181 if (proto->d_open != diskopen) { 182 *proto = *cdevsw; 183 proto->d_open = diskopen; 184 proto->d_close = diskclose; 185 proto->d_ioctl = diskioctl; 186 proto->d_strategy = diskstrategy; 187 proto->d_psize = diskpsize; 188 } 189 190 if (bootverbose) 191 printf("Creating DISK %s%d\n", cdevsw->d_name, unit); 192 dev = make_dev(proto, dkmakeminor(unit, WHOLE_DISK_SLICE, RAW_PART), 193 UID_ROOT, GID_OPERATOR, 0640, "%s%d", cdevsw->d_name, unit); 194 195 dev->si_disk = dp; 196 dp->d_dev = dev; 197 dp->d_dsflags = flags; 198 dp->d_devsw = cdevsw; 199 LIST_INSERT_HEAD(&disklist, dp, d_list); 200 201 return (dev); 202 } 203 204 static int 205 diskdumpconf(u_int onoff, dev_t dev, struct disk *dp) 206 { 207 struct dumperinfo di; 208 struct disklabel *dl; 209 210 if (!onoff) 211 return(set_dumper(NULL)); 212 dl = dsgetlabel(dev, dp->d_slice); 213 if (!dl) 214 return (ENXIO); 215 bzero(&di, sizeof di); 216 di.dumper = (dumper_t *)dp->d_devsw->d_dump; 217 di.priv = dp->d_dev; 218 di.blocksize = dl->d_secsize; 219 di.mediaoffset = (off_t)(dl->d_partitions[dkpart(dev)].p_offset + 220 dp->d_slice->dss_slices[dkslice(dev)].ds_offset) * DEV_BSIZE; 221 di.mediasize = 222 (off_t)(dl->d_partitions[dkpart(dev)].p_size) * DEV_BSIZE; 223 if (di.mediasize == 0) 224 return (EINVAL); 225 return(set_dumper(&di)); 226 } 227 228 void 229 disk_invalidate (struct disk *disk) 230 { 231 if (disk->d_slice) 232 dsgone(&disk->d_slice); 233 } 234 235 void 236 disk_destroy(dev_t dev) 237 { 238 LIST_REMOVE(dev->si_disk, d_list); 239 free(dev->si_disk->d_label, M_DEVBUF); 240 bzero(dev->si_disk, sizeof(*dev->si_disk)); 241 dev->si_disk = NULL; 242 destroy_dev(dev); 243 return; 244 } 245 246 struct disk * 247 disk_enumerate(struct disk *disk) 248 { 249 if (!disk) 250 return (LIST_FIRST(&disklist)); 251 else 252 return (LIST_NEXT(disk, d_list)); 253 } 254 255 static int 256 sysctl_disks(SYSCTL_HANDLER_ARGS) 257 { 258 struct disk *disk; 259 int error, first; 260 261 disk = NULL; 262 first = 1; 263 264 while ((disk = disk_enumerate(disk))) { 265 if (!first) { 266 error = SYSCTL_OUT(req, " ", 1); 267 if (error) 268 return error; 269 } else { 270 first = 0; 271 } 272 error = SYSCTL_OUT(req, disk->d_dev->si_name, strlen(disk->d_dev->si_name)); 273 if (error) 274 return error; 275 } 276 error = SYSCTL_OUT(req, "", 1); 277 return error; 278 } 279 280 SYSCTL_PROC(_kern, OID_AUTO, disks, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, 281 sysctl_disks, "A", "names of available disks"); 282 283 /* 284 * The cdevsw functions 285 */ 286 287 static int 288 diskopen(dev_t dev, int oflags, int devtype, struct thread *td) 289 { 290 dev_t pdev; 291 struct disk *dp; 292 int error; 293 294 error = 0; 295 pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 296 297 dp = pdev->si_disk; 298 if (!dp) 299 return (ENXIO); 300 301 while (dp->d_flags & DISKFLAG_LOCK) { 302 dp->d_flags |= DISKFLAG_WANTED; 303 error = tsleep(dp, PRIBIO | PCATCH, "diskopen", hz); 304 if (error) 305 return (error); 306 } 307 dp->d_flags |= DISKFLAG_LOCK; 308 309 if (!dsisopen(dp->d_slice)) { 310 if (!pdev->si_iosize_max) 311 pdev->si_iosize_max = dev->si_iosize_max; 312 error = dp->d_devsw->d_open(pdev, oflags, devtype, td); 313 dp->d_label->d_secsize = dp->d_sectorsize; 314 dp->d_label->d_secperunit = dp->d_mediasize / dp->d_sectorsize; 315 dp->d_label->d_nsectors = dp->d_fwsectors; 316 dp->d_label->d_ntracks = dp->d_fwheads; 317 } 318 319 /* Inherit properties from the whole/raw dev_t */ 320 inherit_raw(pdev, dev); 321 322 if (error) 323 goto out; 324 325 error = dsopen(dev, devtype, dp->d_dsflags, &dp->d_slice, dp->d_label); 326 327 if (!dsisopen(dp->d_slice)) 328 dp->d_devsw->d_close(pdev, oflags, devtype, td); 329 out: 330 dp->d_flags &= ~DISKFLAG_LOCK; 331 if (dp->d_flags & DISKFLAG_WANTED) { 332 dp->d_flags &= ~DISKFLAG_WANTED; 333 wakeup(dp); 334 } 335 336 return(error); 337 } 338 339 static int 340 diskclose(dev_t dev, int fflag, int devtype, struct thread *td) 341 { 342 struct disk *dp; 343 int error; 344 dev_t pdev; 345 346 error = 0; 347 pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 348 dp = pdev->si_disk; 349 if (!dp) 350 return (ENXIO); 351 dsclose(dev, devtype, dp->d_slice); 352 if (!dsisopen(dp->d_slice)) 353 error = dp->d_devsw->d_close(dp->d_dev, fflag, devtype, td); 354 return (error); 355 } 356 357 static void 358 diskstrategy(struct bio *bp) 359 { 360 dev_t pdev; 361 struct disk *dp; 362 363 pdev = dkmodpart(dkmodslice(bp->bio_dev, WHOLE_DISK_SLICE), RAW_PART); 364 dp = pdev->si_disk; 365 bp->bio_resid = bp->bio_bcount; 366 if (dp != bp->bio_dev->si_disk) 367 inherit_raw(pdev, bp->bio_dev); 368 369 if (!dp) { 370 biofinish(bp, NULL, ENXIO); 371 return; 372 } 373 374 if (dscheck(bp, dp->d_slice) <= 0) { 375 biodone(bp); 376 return; 377 } 378 379 if (bp->bio_bcount == 0) { 380 biodone(bp); 381 return; 382 } 383 384 KASSERT(dp->d_devsw != NULL, ("NULL devsw")); 385 KASSERT(dp->d_devsw->d_strategy != NULL, ("NULL d_strategy")); 386 dp->d_devsw->d_strategy(bp); 387 return; 388 389 } 390 391 static int 392 diskioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct thread *td) 393 { 394 struct disk *dp; 395 int error; 396 u_int u; 397 dev_t pdev; 398 399 pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 400 dp = pdev->si_disk; 401 if (!dp) 402 return (ENXIO); 403 if (cmd == DIOCSKERNELDUMP) { 404 u = *(u_int *)data; 405 return (diskdumpconf(u, dev, dp)); 406 } 407 if (cmd == DIOCGFRONTSTUFF) { 408 *(off_t *)data = 8192; /* XXX: crude but enough) */ 409 return (0); 410 } 411 error = dsioctl(dev, cmd, data, fflag, &dp->d_slice); 412 if (error == ENOIOCTL) 413 error = dp->d_devsw->d_ioctl(dev, cmd, data, fflag, td); 414 return (error); 415 } 416 417 static int 418 diskpsize(dev_t dev) 419 { 420 struct disk *dp; 421 dev_t pdev; 422 423 pdev = dkmodpart(dkmodslice(dev, WHOLE_DISK_SLICE), RAW_PART); 424 dp = pdev->si_disk; 425 if (!dp) 426 return (-1); 427 if (dp != dev->si_disk) { 428 dev->si_drv1 = pdev->si_drv1; 429 dev->si_drv2 = pdev->si_drv2; 430 /* XXX: don't set bp->b_dev->si_disk (?) */ 431 } 432 return (dssize(dev, &dp->d_slice)); 433 } 434 435 SYSCTL_INT(_debug_sizeof, OID_AUTO, disklabel, CTLFLAG_RD, 436 0, sizeof(struct disklabel), "sizeof(struct disklabel)"); 437 438 SYSCTL_INT(_debug_sizeof, OID_AUTO, diskslices, CTLFLAG_RD, 439 0, sizeof(struct diskslices), "sizeof(struct diskslices)"); 440 441 SYSCTL_INT(_debug_sizeof, OID_AUTO, disk, CTLFLAG_RD, 442 0, sizeof(struct disk), "sizeof(struct disk)"); 443 444 #endif /* NO_GEOM */ 445 446 /*- 447 * Disk error is the preface to plaintive error messages 448 * about failing disk transfers. It prints messages of the form 449 * "hp0g: BLABLABLA cmd=read fsbn 12345 of 12344-12347" 450 * blkdone should be -1 if the position of the error is unknown. 451 * The message is printed with printf. 452 */ 453 void 454 disk_err(struct bio *bp, const char *what, int blkdone, int nl) 455 { 456 daddr_t sn; 457 458 printf("%s: %s ", devtoname(bp->bio_dev), what); 459 switch(bp->bio_cmd) { 460 case BIO_READ: printf("cmd=read "); break; 461 case BIO_WRITE: printf("cmd=write "); break; 462 case BIO_DELETE: printf("cmd=delete "); break; 463 case BIO_GETATTR: printf("cmd=getattr "); break; 464 case BIO_SETATTR: printf("cmd=setattr "); break; 465 default: printf("cmd=%x ", bp->bio_cmd); break; 466 } 467 sn = bp->bio_blkno; 468 if (bp->bio_bcount <= DEV_BSIZE) { 469 printf("fsbn %jd%s", (intmax_t)sn, nl ? "\n" : ""); 470 return; 471 } 472 if (blkdone >= 0) { 473 sn += blkdone; 474 printf("fsbn %jd of ", (intmax_t)sn); 475 } 476 printf("%jd-%jd", (intmax_t)bp->bio_blkno, 477 (intmax_t)(bp->bio_blkno + (bp->bio_bcount - 1) / DEV_BSIZE)); 478 if (nl) 479 printf("\n"); 480 } 481 482 /* 483 * Seek sort for disks. 484 * 485 * The buf_queue keep two queues, sorted in ascending block order. The first 486 * queue holds those requests which are positioned after the current block 487 * (in the first request); the second, which starts at queue->switch_point, 488 * holds requests which came in after their block number was passed. Thus 489 * we implement a one way scan, retracting after reaching the end of the drive 490 * to the first request on the second queue, at which time it becomes the 491 * first queue. 492 * 493 * A one-way scan is natural because of the way UNIX read-ahead blocks are 494 * allocated. 495 */ 496 497 void 498 bioq_disksort(bioq, bp) 499 struct bio_queue_head *bioq; 500 struct bio *bp; 501 { 502 struct bio *bq; 503 struct bio *bn; 504 struct bio *be; 505 506 if (!atomic_cmpset_int(&bioq->busy, 0, 1)) 507 panic("Recursing in bioq_disksort()"); 508 be = TAILQ_LAST(&bioq->queue, bio_queue); 509 /* 510 * If the queue is empty or we are an 511 * ordered transaction, then it's easy. 512 */ 513 if ((bq = bioq_first(bioq)) == NULL) { 514 bioq_insert_tail(bioq, bp); 515 bioq->busy = 0; 516 return; 517 } else if (bioq->insert_point != NULL) { 518 519 /* 520 * A certain portion of the list is 521 * "locked" to preserve ordering, so 522 * we can only insert after the insert 523 * point. 524 */ 525 bq = bioq->insert_point; 526 } else { 527 528 /* 529 * If we lie before the last removed (currently active) 530 * request, and are not inserting ourselves into the 531 * "locked" portion of the list, then we must add ourselves 532 * to the second request list. 533 */ 534 if (bp->bio_pblkno < bioq->last_pblkno) { 535 536 bq = bioq->switch_point; 537 /* 538 * If we are starting a new secondary list, 539 * then it's easy. 540 */ 541 if (bq == NULL) { 542 bioq->switch_point = bp; 543 bioq_insert_tail(bioq, bp); 544 bioq->busy = 0; 545 return; 546 } 547 /* 548 * If we lie ahead of the current switch point, 549 * insert us before the switch point and move 550 * the switch point. 551 */ 552 if (bp->bio_pblkno < bq->bio_pblkno) { 553 bioq->switch_point = bp; 554 TAILQ_INSERT_BEFORE(bq, bp, bio_queue); 555 bioq->busy = 0; 556 return; 557 } 558 } else { 559 if (bioq->switch_point != NULL) 560 be = TAILQ_PREV(bioq->switch_point, 561 bio_queue, bio_queue); 562 /* 563 * If we lie between last_pblkno and bq, 564 * insert before bq. 565 */ 566 if (bp->bio_pblkno < bq->bio_pblkno) { 567 TAILQ_INSERT_BEFORE(bq, bp, bio_queue); 568 bioq->busy = 0; 569 return; 570 } 571 } 572 } 573 574 /* 575 * Request is at/after our current position in the list. 576 * Optimize for sequential I/O by seeing if we go at the tail. 577 */ 578 if (bp->bio_pblkno > be->bio_pblkno) { 579 TAILQ_INSERT_AFTER(&bioq->queue, be, bp, bio_queue); 580 bioq->busy = 0; 581 return; 582 } 583 584 /* Otherwise, insertion sort */ 585 while ((bn = TAILQ_NEXT(bq, bio_queue)) != NULL) { 586 587 /* 588 * We want to go after the current request if it is the end 589 * of the first request list, or if the next request is a 590 * larger cylinder than our request. 591 */ 592 if (bn == bioq->switch_point 593 || bp->bio_pblkno < bn->bio_pblkno) 594 break; 595 bq = bn; 596 } 597 TAILQ_INSERT_AFTER(&bioq->queue, bq, bp, bio_queue); 598 bioq->busy = 0; 599 } 600 601 602