1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/queue.h> 39 #include <sys/errno.h> 40 #include <sys/stat.h> 41 #include <sys/ioctl.h> 42 #include <sys/disk.h> 43 44 #include <assert.h> 45 #include <err.h> 46 #include <fcntl.h> 47 #include <stdio.h> 48 #include <stdlib.h> 49 #include <string.h> 50 #include <pthread.h> 51 #include <pthread_np.h> 52 #include <signal.h> 53 #include <sysexits.h> 54 #include <unistd.h> 55 56 #include <machine/atomic.h> 57 58 #include "bhyverun.h" 59 #include "mevent.h" 60 #include "block_if.h" 61 62 #define BLOCKIF_SIG 0xb109b109 63 64 #define BLOCKIF_NUMTHR 8 65 #define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) 66 67 enum blockop { 68 BOP_READ, 69 BOP_WRITE, 70 BOP_FLUSH, 71 BOP_DELETE 72 }; 73 74 enum blockstat { 75 BST_FREE, 76 BST_BLOCK, 77 BST_PEND, 78 BST_BUSY, 79 BST_DONE 80 }; 81 82 struct blockif_elem { 83 TAILQ_ENTRY(blockif_elem) be_link; 84 struct blockif_req *be_req; 85 enum blockop be_op; 86 enum blockstat be_status; 87 pthread_t be_tid; 88 off_t be_block; 89 }; 90 91 struct blockif_ctxt { 92 int bc_magic; 93 int bc_fd; 94 int bc_ischr; 95 int bc_isgeom; 96 int bc_candelete; 97 int bc_rdonly; 98 off_t bc_size; 99 int bc_sectsz; 100 int bc_psectsz; 101 int bc_psectoff; 102 int bc_closing; 103 pthread_t bc_btid[BLOCKIF_NUMTHR]; 104 pthread_mutex_t bc_mtx; 105 pthread_cond_t bc_cond; 106 107 /* Request elements and free/pending/busy queues */ 108 TAILQ_HEAD(, blockif_elem) bc_freeq; 109 TAILQ_HEAD(, blockif_elem) bc_pendq; 110 TAILQ_HEAD(, blockif_elem) bc_busyq; 111 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 112 }; 113 114 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 115 116 struct blockif_sig_elem { 117 pthread_mutex_t bse_mtx; 118 pthread_cond_t bse_cond; 119 int bse_pending; 120 struct blockif_sig_elem *bse_next; 121 }; 122 123 static struct blockif_sig_elem *blockif_bse_head; 124 125 static int 126 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 127 enum blockop op) 128 { 129 struct blockif_elem *be, *tbe; 130 off_t off; 131 int i; 132 133 be = TAILQ_FIRST(&bc->bc_freeq); 134 assert(be != NULL); 135 assert(be->be_status == BST_FREE); 136 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 137 be->be_req = breq; 138 be->be_op = op; 139 switch (op) { 140 case BOP_READ: 141 case BOP_WRITE: 142 case BOP_DELETE: 143 off = breq->br_offset; 144 for (i = 0; i < breq->br_iovcnt; i++) 145 off += breq->br_iov[i].iov_len; 146 break; 147 default: 148 off = OFF_MAX; 149 } 150 be->be_block = off; 151 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 152 if (tbe->be_block == breq->br_offset) 153 break; 154 } 155 if (tbe == NULL) { 156 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 157 if (tbe->be_block == breq->br_offset) 158 break; 159 } 160 } 161 if (tbe == NULL) 162 be->be_status = BST_PEND; 163 else 164 be->be_status = BST_BLOCK; 165 TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 166 return (be->be_status == BST_PEND); 167 } 168 169 static int 170 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 171 { 172 struct blockif_elem *be; 173 174 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 175 if (be->be_status == BST_PEND) 176 break; 177 assert(be->be_status == BST_BLOCK); 178 } 179 if (be == NULL) 180 return (0); 181 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 182 be->be_status = BST_BUSY; 183 be->be_tid = t; 184 TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 185 *bep = be; 186 return (1); 187 } 188 189 static void 190 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 191 { 192 struct blockif_elem *tbe; 193 194 if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 195 TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 196 else 197 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 198 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 199 if (tbe->be_req->br_offset == be->be_block) 200 tbe->be_status = BST_PEND; 201 } 202 be->be_tid = 0; 203 be->be_status = BST_FREE; 204 be->be_req = NULL; 205 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 206 } 207 208 static void 209 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 210 { 211 struct blockif_req *br; 212 off_t arg[2]; 213 ssize_t clen, len, off, boff, voff; 214 int i, err; 215 216 br = be->be_req; 217 if (br->br_iovcnt <= 1) 218 buf = NULL; 219 err = 0; 220 switch (be->be_op) { 221 case BOP_READ: 222 if (buf == NULL) { 223 if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 224 br->br_offset)) < 0) 225 err = errno; 226 else 227 br->br_resid -= len; 228 break; 229 } 230 i = 0; 231 off = voff = 0; 232 while (br->br_resid > 0) { 233 len = MIN(br->br_resid, MAXPHYS); 234 if (pread(bc->bc_fd, buf, len, br->br_offset + 235 off) < 0) { 236 err = errno; 237 break; 238 } 239 boff = 0; 240 do { 241 clen = MIN(len - boff, br->br_iov[i].iov_len - 242 voff); 243 memcpy(br->br_iov[i].iov_base + voff, 244 buf + boff, clen); 245 if (clen < br->br_iov[i].iov_len - voff) 246 voff += clen; 247 else { 248 i++; 249 voff = 0; 250 } 251 boff += clen; 252 } while (boff < len); 253 off += len; 254 br->br_resid -= len; 255 } 256 break; 257 case BOP_WRITE: 258 if (bc->bc_rdonly) { 259 err = EROFS; 260 break; 261 } 262 if (buf == NULL) { 263 if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 264 br->br_offset)) < 0) 265 err = errno; 266 else 267 br->br_resid -= len; 268 break; 269 } 270 i = 0; 271 off = voff = 0; 272 while (br->br_resid > 0) { 273 len = MIN(br->br_resid, MAXPHYS); 274 boff = 0; 275 do { 276 clen = MIN(len - boff, br->br_iov[i].iov_len - 277 voff); 278 memcpy(buf + boff, 279 br->br_iov[i].iov_base + voff, clen); 280 if (clen < br->br_iov[i].iov_len - voff) 281 voff += clen; 282 else { 283 i++; 284 voff = 0; 285 } 286 boff += clen; 287 } while (boff < len); 288 if (pwrite(bc->bc_fd, buf, len, br->br_offset + 289 off) < 0) { 290 err = errno; 291 break; 292 } 293 off += len; 294 br->br_resid -= len; 295 } 296 break; 297 case BOP_FLUSH: 298 if (bc->bc_ischr) { 299 if (ioctl(bc->bc_fd, DIOCGFLUSH)) 300 err = errno; 301 } else if (fsync(bc->bc_fd)) 302 err = errno; 303 break; 304 case BOP_DELETE: 305 if (!bc->bc_candelete) 306 err = EOPNOTSUPP; 307 else if (bc->bc_rdonly) 308 err = EROFS; 309 else if (bc->bc_ischr) { 310 arg[0] = br->br_offset; 311 arg[1] = br->br_resid; 312 if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 313 err = errno; 314 else 315 br->br_resid = 0; 316 } else 317 err = EOPNOTSUPP; 318 break; 319 default: 320 err = EINVAL; 321 break; 322 } 323 324 be->be_status = BST_DONE; 325 326 (*br->br_callback)(br, err); 327 } 328 329 static void * 330 blockif_thr(void *arg) 331 { 332 struct blockif_ctxt *bc; 333 struct blockif_elem *be; 334 pthread_t t; 335 uint8_t *buf; 336 337 bc = arg; 338 if (bc->bc_isgeom) 339 buf = malloc(MAXPHYS); 340 else 341 buf = NULL; 342 t = pthread_self(); 343 344 pthread_mutex_lock(&bc->bc_mtx); 345 for (;;) { 346 while (blockif_dequeue(bc, t, &be)) { 347 pthread_mutex_unlock(&bc->bc_mtx); 348 blockif_proc(bc, be, buf); 349 pthread_mutex_lock(&bc->bc_mtx); 350 blockif_complete(bc, be); 351 } 352 /* Check ctxt status here to see if exit requested */ 353 if (bc->bc_closing) 354 break; 355 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 356 } 357 pthread_mutex_unlock(&bc->bc_mtx); 358 359 if (buf) 360 free(buf); 361 pthread_exit(NULL); 362 return (NULL); 363 } 364 365 static void 366 blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 367 { 368 struct blockif_sig_elem *bse; 369 370 for (;;) { 371 /* 372 * Process the entire list even if not intended for 373 * this thread. 374 */ 375 do { 376 bse = blockif_bse_head; 377 if (bse == NULL) 378 return; 379 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 380 (uintptr_t)bse, 381 (uintptr_t)bse->bse_next)); 382 383 pthread_mutex_lock(&bse->bse_mtx); 384 bse->bse_pending = 0; 385 pthread_cond_signal(&bse->bse_cond); 386 pthread_mutex_unlock(&bse->bse_mtx); 387 } 388 } 389 390 static void 391 blockif_init(void) 392 { 393 mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 394 (void) signal(SIGCONT, SIG_IGN); 395 } 396 397 struct blockif_ctxt * 398 blockif_open(const char *optstr, const char *ident) 399 { 400 char tname[MAXCOMLEN + 1]; 401 char name[MAXPATHLEN]; 402 char *nopt, *xopts, *cp; 403 struct blockif_ctxt *bc; 404 struct stat sbuf; 405 struct diocgattr_arg arg; 406 off_t size, psectsz, psectoff; 407 int extra, fd, i, sectsz; 408 int nocache, sync, ro, candelete, geom, ssopt, pssopt; 409 #ifndef WITHOUT_CAPSICUM 410 cap_rights_t rights; 411 cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 412 #endif 413 414 pthread_once(&blockif_once, blockif_init); 415 416 fd = -1; 417 ssopt = 0; 418 nocache = 0; 419 sync = 0; 420 ro = 0; 421 422 /* 423 * The first element in the optstring is always a pathname. 424 * Optional elements follow 425 */ 426 nopt = xopts = strdup(optstr); 427 while (xopts != NULL) { 428 cp = strsep(&xopts, ","); 429 if (cp == nopt) /* file or device pathname */ 430 continue; 431 else if (!strcmp(cp, "nocache")) 432 nocache = 1; 433 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 434 sync = 1; 435 else if (!strcmp(cp, "ro")) 436 ro = 1; 437 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 438 ; 439 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 440 pssopt = ssopt; 441 else { 442 fprintf(stderr, "Invalid device option \"%s\"\n", cp); 443 goto err; 444 } 445 } 446 447 extra = 0; 448 if (nocache) 449 extra |= O_DIRECT; 450 if (sync) 451 extra |= O_SYNC; 452 453 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 454 if (fd < 0 && !ro) { 455 /* Attempt a r/w fail with a r/o open */ 456 fd = open(nopt, O_RDONLY | extra); 457 ro = 1; 458 } 459 460 if (fd < 0) { 461 warn("Could not open backing file: %s", nopt); 462 goto err; 463 } 464 465 if (fstat(fd, &sbuf) < 0) { 466 warn("Could not stat backing file %s", nopt); 467 goto err; 468 } 469 470 #ifndef WITHOUT_CAPSICUM 471 cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 472 CAP_WRITE); 473 if (ro) 474 cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 475 476 if (cap_rights_limit(fd, &rights) == -1 && errno != ENOSYS) 477 errx(EX_OSERR, "Unable to apply rights for sandbox"); 478 #endif 479 480 /* 481 * Deal with raw devices 482 */ 483 size = sbuf.st_size; 484 sectsz = DEV_BSIZE; 485 psectsz = psectoff = 0; 486 candelete = geom = 0; 487 if (S_ISCHR(sbuf.st_mode)) { 488 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 489 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 490 perror("Could not fetch dev blk/sector size"); 491 goto err; 492 } 493 assert(size != 0); 494 assert(sectsz != 0); 495 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 496 ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 497 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 498 arg.len = sizeof(arg.value.i); 499 if (ioctl(fd, DIOCGATTR, &arg) == 0) 500 candelete = arg.value.i; 501 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 502 geom = 1; 503 } else 504 psectsz = sbuf.st_blksize; 505 506 #ifndef WITHOUT_CAPSICUM 507 if (cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1 && errno != ENOSYS) 508 errx(EX_OSERR, "Unable to apply rights for sandbox"); 509 #endif 510 511 if (ssopt != 0) { 512 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 513 ssopt > pssopt) { 514 fprintf(stderr, "Invalid sector size %d/%d\n", 515 ssopt, pssopt); 516 goto err; 517 } 518 519 /* 520 * Some backend drivers (e.g. cd0, ada0) require that the I/O 521 * size be a multiple of the device's sector size. 522 * 523 * Validate that the emulated sector size complies with this 524 * requirement. 525 */ 526 if (S_ISCHR(sbuf.st_mode)) { 527 if (ssopt < sectsz || (ssopt % sectsz) != 0) { 528 fprintf(stderr, "Sector size %d incompatible " 529 "with underlying device sector size %d\n", 530 ssopt, sectsz); 531 goto err; 532 } 533 } 534 535 sectsz = ssopt; 536 psectsz = pssopt; 537 psectoff = 0; 538 } 539 540 bc = calloc(1, sizeof(struct blockif_ctxt)); 541 if (bc == NULL) { 542 perror("calloc"); 543 goto err; 544 } 545 546 bc->bc_magic = BLOCKIF_SIG; 547 bc->bc_fd = fd; 548 bc->bc_ischr = S_ISCHR(sbuf.st_mode); 549 bc->bc_isgeom = geom; 550 bc->bc_candelete = candelete; 551 bc->bc_rdonly = ro; 552 bc->bc_size = size; 553 bc->bc_sectsz = sectsz; 554 bc->bc_psectsz = psectsz; 555 bc->bc_psectoff = psectoff; 556 pthread_mutex_init(&bc->bc_mtx, NULL); 557 pthread_cond_init(&bc->bc_cond, NULL); 558 TAILQ_INIT(&bc->bc_freeq); 559 TAILQ_INIT(&bc->bc_pendq); 560 TAILQ_INIT(&bc->bc_busyq); 561 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 562 bc->bc_reqs[i].be_status = BST_FREE; 563 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 564 } 565 566 for (i = 0; i < BLOCKIF_NUMTHR; i++) { 567 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 568 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 569 pthread_set_name_np(bc->bc_btid[i], tname); 570 } 571 572 return (bc); 573 err: 574 if (fd >= 0) 575 close(fd); 576 return (NULL); 577 } 578 579 static int 580 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 581 enum blockop op) 582 { 583 int err; 584 585 err = 0; 586 587 pthread_mutex_lock(&bc->bc_mtx); 588 if (!TAILQ_EMPTY(&bc->bc_freeq)) { 589 /* 590 * Enqueue and inform the block i/o thread 591 * that there is work available 592 */ 593 if (blockif_enqueue(bc, breq, op)) 594 pthread_cond_signal(&bc->bc_cond); 595 } else { 596 /* 597 * Callers are not allowed to enqueue more than 598 * the specified blockif queue limit. Return an 599 * error to indicate that the queue length has been 600 * exceeded. 601 */ 602 err = E2BIG; 603 } 604 pthread_mutex_unlock(&bc->bc_mtx); 605 606 return (err); 607 } 608 609 int 610 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 611 { 612 613 assert(bc->bc_magic == BLOCKIF_SIG); 614 return (blockif_request(bc, breq, BOP_READ)); 615 } 616 617 int 618 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 619 { 620 621 assert(bc->bc_magic == BLOCKIF_SIG); 622 return (blockif_request(bc, breq, BOP_WRITE)); 623 } 624 625 int 626 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 627 { 628 629 assert(bc->bc_magic == BLOCKIF_SIG); 630 return (blockif_request(bc, breq, BOP_FLUSH)); 631 } 632 633 int 634 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 635 { 636 637 assert(bc->bc_magic == BLOCKIF_SIG); 638 return (blockif_request(bc, breq, BOP_DELETE)); 639 } 640 641 int 642 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 643 { 644 struct blockif_elem *be; 645 646 assert(bc->bc_magic == BLOCKIF_SIG); 647 648 pthread_mutex_lock(&bc->bc_mtx); 649 /* 650 * Check pending requests. 651 */ 652 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 653 if (be->be_req == breq) 654 break; 655 } 656 if (be != NULL) { 657 /* 658 * Found it. 659 */ 660 blockif_complete(bc, be); 661 pthread_mutex_unlock(&bc->bc_mtx); 662 663 return (0); 664 } 665 666 /* 667 * Check in-flight requests. 668 */ 669 TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 670 if (be->be_req == breq) 671 break; 672 } 673 if (be == NULL) { 674 /* 675 * Didn't find it. 676 */ 677 pthread_mutex_unlock(&bc->bc_mtx); 678 return (EINVAL); 679 } 680 681 /* 682 * Interrupt the processing thread to force it return 683 * prematurely via it's normal callback path. 684 */ 685 while (be->be_status == BST_BUSY) { 686 struct blockif_sig_elem bse, *old_head; 687 688 pthread_mutex_init(&bse.bse_mtx, NULL); 689 pthread_cond_init(&bse.bse_cond, NULL); 690 691 bse.bse_pending = 1; 692 693 do { 694 old_head = blockif_bse_head; 695 bse.bse_next = old_head; 696 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 697 (uintptr_t)old_head, 698 (uintptr_t)&bse)); 699 700 pthread_kill(be->be_tid, SIGCONT); 701 702 pthread_mutex_lock(&bse.bse_mtx); 703 while (bse.bse_pending) 704 pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 705 pthread_mutex_unlock(&bse.bse_mtx); 706 } 707 708 pthread_mutex_unlock(&bc->bc_mtx); 709 710 /* 711 * The processing thread has been interrupted. Since it's not 712 * clear if the callback has been invoked yet, return EBUSY. 713 */ 714 return (EBUSY); 715 } 716 717 int 718 blockif_close(struct blockif_ctxt *bc) 719 { 720 void *jval; 721 int i; 722 723 assert(bc->bc_magic == BLOCKIF_SIG); 724 725 /* 726 * Stop the block i/o thread 727 */ 728 pthread_mutex_lock(&bc->bc_mtx); 729 bc->bc_closing = 1; 730 pthread_mutex_unlock(&bc->bc_mtx); 731 pthread_cond_broadcast(&bc->bc_cond); 732 for (i = 0; i < BLOCKIF_NUMTHR; i++) 733 pthread_join(bc->bc_btid[i], &jval); 734 735 /* XXX Cancel queued i/o's ??? */ 736 737 /* 738 * Release resources 739 */ 740 bc->bc_magic = 0; 741 close(bc->bc_fd); 742 free(bc); 743 744 return (0); 745 } 746 747 /* 748 * Return virtual C/H/S values for a given block. Use the algorithm 749 * outlined in the VHD specification to calculate values. 750 */ 751 void 752 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 753 { 754 off_t sectors; /* total sectors of the block dev */ 755 off_t hcyl; /* cylinders times heads */ 756 uint16_t secpt; /* sectors per track */ 757 uint8_t heads; 758 759 assert(bc->bc_magic == BLOCKIF_SIG); 760 761 sectors = bc->bc_size / bc->bc_sectsz; 762 763 /* Clamp the size to the largest possible with CHS */ 764 if (sectors > 65535UL*16*255) 765 sectors = 65535UL*16*255; 766 767 if (sectors >= 65536UL*16*63) { 768 secpt = 255; 769 heads = 16; 770 hcyl = sectors / secpt; 771 } else { 772 secpt = 17; 773 hcyl = sectors / secpt; 774 heads = (hcyl + 1023) / 1024; 775 776 if (heads < 4) 777 heads = 4; 778 779 if (hcyl >= (heads * 1024) || heads > 16) { 780 secpt = 31; 781 heads = 16; 782 hcyl = sectors / secpt; 783 } 784 if (hcyl >= (heads * 1024)) { 785 secpt = 63; 786 heads = 16; 787 hcyl = sectors / secpt; 788 } 789 } 790 791 *c = hcyl / heads; 792 *h = heads; 793 *s = secpt; 794 } 795 796 /* 797 * Accessors 798 */ 799 off_t 800 blockif_size(struct blockif_ctxt *bc) 801 { 802 803 assert(bc->bc_magic == BLOCKIF_SIG); 804 return (bc->bc_size); 805 } 806 807 int 808 blockif_sectsz(struct blockif_ctxt *bc) 809 { 810 811 assert(bc->bc_magic == BLOCKIF_SIG); 812 return (bc->bc_sectsz); 813 } 814 815 void 816 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 817 { 818 819 assert(bc->bc_magic == BLOCKIF_SIG); 820 *size = bc->bc_psectsz; 821 *off = bc->bc_psectoff; 822 } 823 824 int 825 blockif_queuesz(struct blockif_ctxt *bc) 826 { 827 828 assert(bc->bc_magic == BLOCKIF_SIG); 829 return (BLOCKIF_MAXREQ - 1); 830 } 831 832 int 833 blockif_is_ro(struct blockif_ctxt *bc) 834 { 835 836 assert(bc->bc_magic == BLOCKIF_SIG); 837 return (bc->bc_rdonly); 838 } 839 840 int 841 blockif_candelete(struct blockif_ctxt *bc) 842 { 843 844 assert(bc->bc_magic == BLOCKIF_SIG); 845 return (bc->bc_candelete); 846 } 847