1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/queue.h> 39 #include <sys/errno.h> 40 #include <sys/stat.h> 41 #include <sys/ioctl.h> 42 #include <sys/disk.h> 43 44 #include <assert.h> 45 #ifndef WITHOUT_CAPSICUM 46 #include <capsicum_helpers.h> 47 #endif 48 #include <err.h> 49 #include <fcntl.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <pthread.h> 54 #include <pthread_np.h> 55 #include <signal.h> 56 #include <sysexits.h> 57 #include <unistd.h> 58 59 #include <machine/atomic.h> 60 61 #include "bhyverun.h" 62 #include "mevent.h" 63 #include "block_if.h" 64 65 #define BLOCKIF_SIG 0xb109b109 66 67 #define BLOCKIF_NUMTHR 8 68 #define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) 69 70 enum blockop { 71 BOP_READ, 72 BOP_WRITE, 73 BOP_FLUSH, 74 BOP_DELETE 75 }; 76 77 enum blockstat { 78 BST_FREE, 79 BST_BLOCK, 80 BST_PEND, 81 BST_BUSY, 82 BST_DONE 83 }; 84 85 struct blockif_elem { 86 TAILQ_ENTRY(blockif_elem) be_link; 87 struct blockif_req *be_req; 88 enum blockop be_op; 89 enum blockstat be_status; 90 pthread_t be_tid; 91 off_t be_block; 92 }; 93 94 struct blockif_ctxt { 95 int bc_magic; 96 int bc_fd; 97 int bc_ischr; 98 int bc_isgeom; 99 int bc_candelete; 100 int bc_rdonly; 101 off_t bc_size; 102 int bc_sectsz; 103 int bc_psectsz; 104 int bc_psectoff; 105 int bc_closing; 106 pthread_t bc_btid[BLOCKIF_NUMTHR]; 107 pthread_mutex_t bc_mtx; 108 pthread_cond_t bc_cond; 109 110 /* Request elements and free/pending/busy queues */ 111 TAILQ_HEAD(, blockif_elem) bc_freeq; 112 TAILQ_HEAD(, blockif_elem) bc_pendq; 113 TAILQ_HEAD(, blockif_elem) bc_busyq; 114 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 115 }; 116 117 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 118 119 struct blockif_sig_elem { 120 pthread_mutex_t bse_mtx; 121 pthread_cond_t bse_cond; 122 int bse_pending; 123 struct blockif_sig_elem *bse_next; 124 }; 125 126 static struct blockif_sig_elem *blockif_bse_head; 127 128 static int 129 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 130 enum blockop op) 131 { 132 struct blockif_elem *be, *tbe; 133 off_t off; 134 int i; 135 136 be = TAILQ_FIRST(&bc->bc_freeq); 137 assert(be != NULL); 138 assert(be->be_status == BST_FREE); 139 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 140 be->be_req = breq; 141 be->be_op = op; 142 switch (op) { 143 case BOP_READ: 144 case BOP_WRITE: 145 case BOP_DELETE: 146 off = breq->br_offset; 147 for (i = 0; i < breq->br_iovcnt; i++) 148 off += breq->br_iov[i].iov_len; 149 break; 150 default: 151 off = OFF_MAX; 152 } 153 be->be_block = off; 154 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 155 if (tbe->be_block == breq->br_offset) 156 break; 157 } 158 if (tbe == NULL) { 159 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 160 if (tbe->be_block == breq->br_offset) 161 break; 162 } 163 } 164 if (tbe == NULL) 165 be->be_status = BST_PEND; 166 else 167 be->be_status = BST_BLOCK; 168 TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 169 return (be->be_status == BST_PEND); 170 } 171 172 static int 173 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 174 { 175 struct blockif_elem *be; 176 177 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 178 if (be->be_status == BST_PEND) 179 break; 180 assert(be->be_status == BST_BLOCK); 181 } 182 if (be == NULL) 183 return (0); 184 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 185 be->be_status = BST_BUSY; 186 be->be_tid = t; 187 TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 188 *bep = be; 189 return (1); 190 } 191 192 static void 193 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 194 { 195 struct blockif_elem *tbe; 196 197 if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 198 TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 199 else 200 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 201 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 202 if (tbe->be_req->br_offset == be->be_block) 203 tbe->be_status = BST_PEND; 204 } 205 be->be_tid = 0; 206 be->be_status = BST_FREE; 207 be->be_req = NULL; 208 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 209 } 210 211 static void 212 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 213 { 214 struct blockif_req *br; 215 off_t arg[2]; 216 ssize_t clen, len, off, boff, voff; 217 int i, err; 218 219 br = be->be_req; 220 if (br->br_iovcnt <= 1) 221 buf = NULL; 222 err = 0; 223 switch (be->be_op) { 224 case BOP_READ: 225 if (buf == NULL) { 226 if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 227 br->br_offset)) < 0) 228 err = errno; 229 else 230 br->br_resid -= len; 231 break; 232 } 233 i = 0; 234 off = voff = 0; 235 while (br->br_resid > 0) { 236 len = MIN(br->br_resid, MAXPHYS); 237 if (pread(bc->bc_fd, buf, len, br->br_offset + 238 off) < 0) { 239 err = errno; 240 break; 241 } 242 boff = 0; 243 do { 244 clen = MIN(len - boff, br->br_iov[i].iov_len - 245 voff); 246 memcpy(br->br_iov[i].iov_base + voff, 247 buf + boff, clen); 248 if (clen < br->br_iov[i].iov_len - voff) 249 voff += clen; 250 else { 251 i++; 252 voff = 0; 253 } 254 boff += clen; 255 } while (boff < len); 256 off += len; 257 br->br_resid -= len; 258 } 259 break; 260 case BOP_WRITE: 261 if (bc->bc_rdonly) { 262 err = EROFS; 263 break; 264 } 265 if (buf == NULL) { 266 if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 267 br->br_offset)) < 0) 268 err = errno; 269 else 270 br->br_resid -= len; 271 break; 272 } 273 i = 0; 274 off = voff = 0; 275 while (br->br_resid > 0) { 276 len = MIN(br->br_resid, MAXPHYS); 277 boff = 0; 278 do { 279 clen = MIN(len - boff, br->br_iov[i].iov_len - 280 voff); 281 memcpy(buf + boff, 282 br->br_iov[i].iov_base + voff, clen); 283 if (clen < br->br_iov[i].iov_len - voff) 284 voff += clen; 285 else { 286 i++; 287 voff = 0; 288 } 289 boff += clen; 290 } while (boff < len); 291 if (pwrite(bc->bc_fd, buf, len, br->br_offset + 292 off) < 0) { 293 err = errno; 294 break; 295 } 296 off += len; 297 br->br_resid -= len; 298 } 299 break; 300 case BOP_FLUSH: 301 if (bc->bc_ischr) { 302 if (ioctl(bc->bc_fd, DIOCGFLUSH)) 303 err = errno; 304 } else if (fsync(bc->bc_fd)) 305 err = errno; 306 break; 307 case BOP_DELETE: 308 if (!bc->bc_candelete) 309 err = EOPNOTSUPP; 310 else if (bc->bc_rdonly) 311 err = EROFS; 312 else if (bc->bc_ischr) { 313 arg[0] = br->br_offset; 314 arg[1] = br->br_resid; 315 if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 316 err = errno; 317 else 318 br->br_resid = 0; 319 } else 320 err = EOPNOTSUPP; 321 break; 322 default: 323 err = EINVAL; 324 break; 325 } 326 327 be->be_status = BST_DONE; 328 329 (*br->br_callback)(br, err); 330 } 331 332 static void * 333 blockif_thr(void *arg) 334 { 335 struct blockif_ctxt *bc; 336 struct blockif_elem *be; 337 pthread_t t; 338 uint8_t *buf; 339 340 bc = arg; 341 if (bc->bc_isgeom) 342 buf = malloc(MAXPHYS); 343 else 344 buf = NULL; 345 t = pthread_self(); 346 347 pthread_mutex_lock(&bc->bc_mtx); 348 for (;;) { 349 while (blockif_dequeue(bc, t, &be)) { 350 pthread_mutex_unlock(&bc->bc_mtx); 351 blockif_proc(bc, be, buf); 352 pthread_mutex_lock(&bc->bc_mtx); 353 blockif_complete(bc, be); 354 } 355 /* Check ctxt status here to see if exit requested */ 356 if (bc->bc_closing) 357 break; 358 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 359 } 360 pthread_mutex_unlock(&bc->bc_mtx); 361 362 if (buf) 363 free(buf); 364 pthread_exit(NULL); 365 return (NULL); 366 } 367 368 static void 369 blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 370 { 371 struct blockif_sig_elem *bse; 372 373 for (;;) { 374 /* 375 * Process the entire list even if not intended for 376 * this thread. 377 */ 378 do { 379 bse = blockif_bse_head; 380 if (bse == NULL) 381 return; 382 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 383 (uintptr_t)bse, 384 (uintptr_t)bse->bse_next)); 385 386 pthread_mutex_lock(&bse->bse_mtx); 387 bse->bse_pending = 0; 388 pthread_cond_signal(&bse->bse_cond); 389 pthread_mutex_unlock(&bse->bse_mtx); 390 } 391 } 392 393 static void 394 blockif_init(void) 395 { 396 mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 397 (void) signal(SIGCONT, SIG_IGN); 398 } 399 400 struct blockif_ctxt * 401 blockif_open(const char *optstr, const char *ident) 402 { 403 char tname[MAXCOMLEN + 1]; 404 char name[MAXPATHLEN]; 405 char *nopt, *xopts, *cp; 406 struct blockif_ctxt *bc; 407 struct stat sbuf; 408 struct diocgattr_arg arg; 409 off_t size, psectsz, psectoff; 410 int extra, fd, i, sectsz; 411 int nocache, sync, ro, candelete, geom, ssopt, pssopt; 412 #ifndef WITHOUT_CAPSICUM 413 cap_rights_t rights; 414 cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 415 #endif 416 417 pthread_once(&blockif_once, blockif_init); 418 419 fd = -1; 420 ssopt = 0; 421 nocache = 0; 422 sync = 0; 423 ro = 0; 424 425 /* 426 * The first element in the optstring is always a pathname. 427 * Optional elements follow 428 */ 429 nopt = xopts = strdup(optstr); 430 while (xopts != NULL) { 431 cp = strsep(&xopts, ","); 432 if (cp == nopt) /* file or device pathname */ 433 continue; 434 else if (!strcmp(cp, "nocache")) 435 nocache = 1; 436 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 437 sync = 1; 438 else if (!strcmp(cp, "ro")) 439 ro = 1; 440 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 441 ; 442 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 443 pssopt = ssopt; 444 else { 445 fprintf(stderr, "Invalid device option \"%s\"\n", cp); 446 goto err; 447 } 448 } 449 450 extra = 0; 451 if (nocache) 452 extra |= O_DIRECT; 453 if (sync) 454 extra |= O_SYNC; 455 456 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 457 if (fd < 0 && !ro) { 458 /* Attempt a r/w fail with a r/o open */ 459 fd = open(nopt, O_RDONLY | extra); 460 ro = 1; 461 } 462 463 if (fd < 0) { 464 warn("Could not open backing file: %s", nopt); 465 goto err; 466 } 467 468 if (fstat(fd, &sbuf) < 0) { 469 warn("Could not stat backing file %s", nopt); 470 goto err; 471 } 472 473 #ifndef WITHOUT_CAPSICUM 474 cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 475 CAP_WRITE); 476 if (ro) 477 cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 478 479 if (caph_rights_limit(fd, &rights) == -1) 480 errx(EX_OSERR, "Unable to apply rights for sandbox"); 481 #endif 482 483 /* 484 * Deal with raw devices 485 */ 486 size = sbuf.st_size; 487 sectsz = DEV_BSIZE; 488 psectsz = psectoff = 0; 489 candelete = geom = 0; 490 if (S_ISCHR(sbuf.st_mode)) { 491 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 492 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 493 perror("Could not fetch dev blk/sector size"); 494 goto err; 495 } 496 assert(size != 0); 497 assert(sectsz != 0); 498 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 499 ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 500 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 501 arg.len = sizeof(arg.value.i); 502 if (ioctl(fd, DIOCGATTR, &arg) == 0) 503 candelete = arg.value.i; 504 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 505 geom = 1; 506 } else 507 psectsz = sbuf.st_blksize; 508 509 #ifndef WITHOUT_CAPSICUM 510 if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) 511 errx(EX_OSERR, "Unable to apply rights for sandbox"); 512 #endif 513 514 if (ssopt != 0) { 515 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 516 ssopt > pssopt) { 517 fprintf(stderr, "Invalid sector size %d/%d\n", 518 ssopt, pssopt); 519 goto err; 520 } 521 522 /* 523 * Some backend drivers (e.g. cd0, ada0) require that the I/O 524 * size be a multiple of the device's sector size. 525 * 526 * Validate that the emulated sector size complies with this 527 * requirement. 528 */ 529 if (S_ISCHR(sbuf.st_mode)) { 530 if (ssopt < sectsz || (ssopt % sectsz) != 0) { 531 fprintf(stderr, "Sector size %d incompatible " 532 "with underlying device sector size %d\n", 533 ssopt, sectsz); 534 goto err; 535 } 536 } 537 538 sectsz = ssopt; 539 psectsz = pssopt; 540 psectoff = 0; 541 } 542 543 bc = calloc(1, sizeof(struct blockif_ctxt)); 544 if (bc == NULL) { 545 perror("calloc"); 546 goto err; 547 } 548 549 bc->bc_magic = BLOCKIF_SIG; 550 bc->bc_fd = fd; 551 bc->bc_ischr = S_ISCHR(sbuf.st_mode); 552 bc->bc_isgeom = geom; 553 bc->bc_candelete = candelete; 554 bc->bc_rdonly = ro; 555 bc->bc_size = size; 556 bc->bc_sectsz = sectsz; 557 bc->bc_psectsz = psectsz; 558 bc->bc_psectoff = psectoff; 559 pthread_mutex_init(&bc->bc_mtx, NULL); 560 pthread_cond_init(&bc->bc_cond, NULL); 561 TAILQ_INIT(&bc->bc_freeq); 562 TAILQ_INIT(&bc->bc_pendq); 563 TAILQ_INIT(&bc->bc_busyq); 564 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 565 bc->bc_reqs[i].be_status = BST_FREE; 566 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 567 } 568 569 for (i = 0; i < BLOCKIF_NUMTHR; i++) { 570 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 571 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 572 pthread_set_name_np(bc->bc_btid[i], tname); 573 } 574 575 return (bc); 576 err: 577 if (fd >= 0) 578 close(fd); 579 free(nopt); 580 return (NULL); 581 } 582 583 static int 584 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 585 enum blockop op) 586 { 587 int err; 588 589 err = 0; 590 591 pthread_mutex_lock(&bc->bc_mtx); 592 if (!TAILQ_EMPTY(&bc->bc_freeq)) { 593 /* 594 * Enqueue and inform the block i/o thread 595 * that there is work available 596 */ 597 if (blockif_enqueue(bc, breq, op)) 598 pthread_cond_signal(&bc->bc_cond); 599 } else { 600 /* 601 * Callers are not allowed to enqueue more than 602 * the specified blockif queue limit. Return an 603 * error to indicate that the queue length has been 604 * exceeded. 605 */ 606 err = E2BIG; 607 } 608 pthread_mutex_unlock(&bc->bc_mtx); 609 610 return (err); 611 } 612 613 int 614 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 615 { 616 617 assert(bc->bc_magic == BLOCKIF_SIG); 618 return (blockif_request(bc, breq, BOP_READ)); 619 } 620 621 int 622 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 623 { 624 625 assert(bc->bc_magic == BLOCKIF_SIG); 626 return (blockif_request(bc, breq, BOP_WRITE)); 627 } 628 629 int 630 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 631 { 632 633 assert(bc->bc_magic == BLOCKIF_SIG); 634 return (blockif_request(bc, breq, BOP_FLUSH)); 635 } 636 637 int 638 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 639 { 640 641 assert(bc->bc_magic == BLOCKIF_SIG); 642 return (blockif_request(bc, breq, BOP_DELETE)); 643 } 644 645 int 646 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 647 { 648 struct blockif_elem *be; 649 650 assert(bc->bc_magic == BLOCKIF_SIG); 651 652 pthread_mutex_lock(&bc->bc_mtx); 653 /* 654 * Check pending requests. 655 */ 656 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 657 if (be->be_req == breq) 658 break; 659 } 660 if (be != NULL) { 661 /* 662 * Found it. 663 */ 664 blockif_complete(bc, be); 665 pthread_mutex_unlock(&bc->bc_mtx); 666 667 return (0); 668 } 669 670 /* 671 * Check in-flight requests. 672 */ 673 TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 674 if (be->be_req == breq) 675 break; 676 } 677 if (be == NULL) { 678 /* 679 * Didn't find it. 680 */ 681 pthread_mutex_unlock(&bc->bc_mtx); 682 return (EINVAL); 683 } 684 685 /* 686 * Interrupt the processing thread to force it return 687 * prematurely via it's normal callback path. 688 */ 689 while (be->be_status == BST_BUSY) { 690 struct blockif_sig_elem bse, *old_head; 691 692 pthread_mutex_init(&bse.bse_mtx, NULL); 693 pthread_cond_init(&bse.bse_cond, NULL); 694 695 bse.bse_pending = 1; 696 697 do { 698 old_head = blockif_bse_head; 699 bse.bse_next = old_head; 700 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 701 (uintptr_t)old_head, 702 (uintptr_t)&bse)); 703 704 pthread_kill(be->be_tid, SIGCONT); 705 706 pthread_mutex_lock(&bse.bse_mtx); 707 while (bse.bse_pending) 708 pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 709 pthread_mutex_unlock(&bse.bse_mtx); 710 } 711 712 pthread_mutex_unlock(&bc->bc_mtx); 713 714 /* 715 * The processing thread has been interrupted. Since it's not 716 * clear if the callback has been invoked yet, return EBUSY. 717 */ 718 return (EBUSY); 719 } 720 721 int 722 blockif_close(struct blockif_ctxt *bc) 723 { 724 void *jval; 725 int i; 726 727 assert(bc->bc_magic == BLOCKIF_SIG); 728 729 /* 730 * Stop the block i/o thread 731 */ 732 pthread_mutex_lock(&bc->bc_mtx); 733 bc->bc_closing = 1; 734 pthread_mutex_unlock(&bc->bc_mtx); 735 pthread_cond_broadcast(&bc->bc_cond); 736 for (i = 0; i < BLOCKIF_NUMTHR; i++) 737 pthread_join(bc->bc_btid[i], &jval); 738 739 /* XXX Cancel queued i/o's ??? */ 740 741 /* 742 * Release resources 743 */ 744 bc->bc_magic = 0; 745 close(bc->bc_fd); 746 free(bc); 747 748 return (0); 749 } 750 751 /* 752 * Return virtual C/H/S values for a given block. Use the algorithm 753 * outlined in the VHD specification to calculate values. 754 */ 755 void 756 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 757 { 758 off_t sectors; /* total sectors of the block dev */ 759 off_t hcyl; /* cylinders times heads */ 760 uint16_t secpt; /* sectors per track */ 761 uint8_t heads; 762 763 assert(bc->bc_magic == BLOCKIF_SIG); 764 765 sectors = bc->bc_size / bc->bc_sectsz; 766 767 /* Clamp the size to the largest possible with CHS */ 768 if (sectors > 65535UL*16*255) 769 sectors = 65535UL*16*255; 770 771 if (sectors >= 65536UL*16*63) { 772 secpt = 255; 773 heads = 16; 774 hcyl = sectors / secpt; 775 } else { 776 secpt = 17; 777 hcyl = sectors / secpt; 778 heads = (hcyl + 1023) / 1024; 779 780 if (heads < 4) 781 heads = 4; 782 783 if (hcyl >= (heads * 1024) || heads > 16) { 784 secpt = 31; 785 heads = 16; 786 hcyl = sectors / secpt; 787 } 788 if (hcyl >= (heads * 1024)) { 789 secpt = 63; 790 heads = 16; 791 hcyl = sectors / secpt; 792 } 793 } 794 795 *c = hcyl / heads; 796 *h = heads; 797 *s = secpt; 798 } 799 800 /* 801 * Accessors 802 */ 803 off_t 804 blockif_size(struct blockif_ctxt *bc) 805 { 806 807 assert(bc->bc_magic == BLOCKIF_SIG); 808 return (bc->bc_size); 809 } 810 811 int 812 blockif_sectsz(struct blockif_ctxt *bc) 813 { 814 815 assert(bc->bc_magic == BLOCKIF_SIG); 816 return (bc->bc_sectsz); 817 } 818 819 void 820 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 821 { 822 823 assert(bc->bc_magic == BLOCKIF_SIG); 824 *size = bc->bc_psectsz; 825 *off = bc->bc_psectoff; 826 } 827 828 int 829 blockif_queuesz(struct blockif_ctxt *bc) 830 { 831 832 assert(bc->bc_magic == BLOCKIF_SIG); 833 return (BLOCKIF_MAXREQ - 1); 834 } 835 836 int 837 blockif_is_ro(struct blockif_ctxt *bc) 838 { 839 840 assert(bc->bc_magic == BLOCKIF_SIG); 841 return (bc->bc_rdonly); 842 } 843 844 int 845 blockif_candelete(struct blockif_ctxt *bc) 846 { 847 848 assert(bc->bc_magic == BLOCKIF_SIG); 849 return (bc->bc_candelete); 850 } 851