1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/queue.h> 39 #include <sys/errno.h> 40 #include <sys/stat.h> 41 #include <sys/ioctl.h> 42 #include <sys/disk.h> 43 44 #include <assert.h> 45 #ifndef WITHOUT_CAPSICUM 46 #include <capsicum_helpers.h> 47 #endif 48 #include <err.h> 49 #include <fcntl.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <pthread.h> 54 #include <pthread_np.h> 55 #include <signal.h> 56 #include <sysexits.h> 57 #include <unistd.h> 58 59 #include <machine/atomic.h> 60 61 #include "bhyverun.h" 62 #include "mevent.h" 63 #include "block_if.h" 64 65 #define BLOCKIF_SIG 0xb109b109 66 67 #define BLOCKIF_NUMTHR 8 68 #define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) 69 70 enum blockop { 71 BOP_READ, 72 BOP_WRITE, 73 BOP_FLUSH, 74 BOP_DELETE 75 }; 76 77 enum blockstat { 78 BST_FREE, 79 BST_BLOCK, 80 BST_PEND, 81 BST_BUSY, 82 BST_DONE 83 }; 84 85 struct blockif_elem { 86 TAILQ_ENTRY(blockif_elem) be_link; 87 struct blockif_req *be_req; 88 enum blockop be_op; 89 enum blockstat be_status; 90 pthread_t be_tid; 91 off_t be_block; 92 }; 93 94 struct blockif_ctxt { 95 int bc_magic; 96 int bc_fd; 97 int bc_ischr; 98 int bc_isgeom; 99 int bc_candelete; 100 int bc_rdonly; 101 off_t bc_size; 102 int bc_sectsz; 103 int bc_psectsz; 104 int bc_psectoff; 105 int bc_closing; 106 pthread_t bc_btid[BLOCKIF_NUMTHR]; 107 pthread_mutex_t bc_mtx; 108 pthread_cond_t bc_cond; 109 110 /* Request elements and free/pending/busy queues */ 111 TAILQ_HEAD(, blockif_elem) bc_freeq; 112 TAILQ_HEAD(, blockif_elem) bc_pendq; 113 TAILQ_HEAD(, blockif_elem) bc_busyq; 114 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 115 }; 116 117 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 118 119 struct blockif_sig_elem { 120 pthread_mutex_t bse_mtx; 121 pthread_cond_t bse_cond; 122 int bse_pending; 123 struct blockif_sig_elem *bse_next; 124 }; 125 126 static struct blockif_sig_elem *blockif_bse_head; 127 128 static int 129 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 130 enum blockop op) 131 { 132 struct blockif_elem *be, *tbe; 133 off_t off; 134 int i; 135 136 be = TAILQ_FIRST(&bc->bc_freeq); 137 assert(be != NULL); 138 assert(be->be_status == BST_FREE); 139 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 140 be->be_req = breq; 141 be->be_op = op; 142 switch (op) { 143 case BOP_READ: 144 case BOP_WRITE: 145 case BOP_DELETE: 146 off = breq->br_offset; 147 for (i = 0; i < breq->br_iovcnt; i++) 148 off += breq->br_iov[i].iov_len; 149 break; 150 default: 151 off = OFF_MAX; 152 } 153 be->be_block = off; 154 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 155 if (tbe->be_block == breq->br_offset) 156 break; 157 } 158 if (tbe == NULL) { 159 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 160 if (tbe->be_block == breq->br_offset) 161 break; 162 } 163 } 164 if (tbe == NULL) 165 be->be_status = BST_PEND; 166 else 167 be->be_status = BST_BLOCK; 168 TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 169 return (be->be_status == BST_PEND); 170 } 171 172 static int 173 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 174 { 175 struct blockif_elem *be; 176 177 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 178 if (be->be_status == BST_PEND) 179 break; 180 assert(be->be_status == BST_BLOCK); 181 } 182 if (be == NULL) 183 return (0); 184 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 185 be->be_status = BST_BUSY; 186 be->be_tid = t; 187 TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 188 *bep = be; 189 return (1); 190 } 191 192 static void 193 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 194 { 195 struct blockif_elem *tbe; 196 197 if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 198 TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 199 else 200 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 201 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 202 if (tbe->be_req->br_offset == be->be_block) 203 tbe->be_status = BST_PEND; 204 } 205 be->be_tid = 0; 206 be->be_status = BST_FREE; 207 be->be_req = NULL; 208 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 209 } 210 211 static void 212 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 213 { 214 struct blockif_req *br; 215 off_t arg[2]; 216 ssize_t clen, len, off, boff, voff; 217 int i, err; 218 219 br = be->be_req; 220 if (br->br_iovcnt <= 1) 221 buf = NULL; 222 err = 0; 223 switch (be->be_op) { 224 case BOP_READ: 225 if (buf == NULL) { 226 if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 227 br->br_offset)) < 0) 228 err = errno; 229 else 230 br->br_resid -= len; 231 break; 232 } 233 i = 0; 234 off = voff = 0; 235 while (br->br_resid > 0) { 236 len = MIN(br->br_resid, MAXPHYS); 237 if (pread(bc->bc_fd, buf, len, br->br_offset + 238 off) < 0) { 239 err = errno; 240 break; 241 } 242 boff = 0; 243 do { 244 clen = MIN(len - boff, br->br_iov[i].iov_len - 245 voff); 246 memcpy(br->br_iov[i].iov_base + voff, 247 buf + boff, clen); 248 if (clen < br->br_iov[i].iov_len - voff) 249 voff += clen; 250 else { 251 i++; 252 voff = 0; 253 } 254 boff += clen; 255 } while (boff < len); 256 off += len; 257 br->br_resid -= len; 258 } 259 break; 260 case BOP_WRITE: 261 if (bc->bc_rdonly) { 262 err = EROFS; 263 break; 264 } 265 if (buf == NULL) { 266 if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 267 br->br_offset)) < 0) 268 err = errno; 269 else 270 br->br_resid -= len; 271 break; 272 } 273 i = 0; 274 off = voff = 0; 275 while (br->br_resid > 0) { 276 len = MIN(br->br_resid, MAXPHYS); 277 boff = 0; 278 do { 279 clen = MIN(len - boff, br->br_iov[i].iov_len - 280 voff); 281 memcpy(buf + boff, 282 br->br_iov[i].iov_base + voff, clen); 283 if (clen < br->br_iov[i].iov_len - voff) 284 voff += clen; 285 else { 286 i++; 287 voff = 0; 288 } 289 boff += clen; 290 } while (boff < len); 291 if (pwrite(bc->bc_fd, buf, len, br->br_offset + 292 off) < 0) { 293 err = errno; 294 break; 295 } 296 off += len; 297 br->br_resid -= len; 298 } 299 break; 300 case BOP_FLUSH: 301 if (bc->bc_ischr) { 302 if (ioctl(bc->bc_fd, DIOCGFLUSH)) 303 err = errno; 304 } else if (fsync(bc->bc_fd)) 305 err = errno; 306 break; 307 case BOP_DELETE: 308 if (!bc->bc_candelete) 309 err = EOPNOTSUPP; 310 else if (bc->bc_rdonly) 311 err = EROFS; 312 else if (bc->bc_ischr) { 313 arg[0] = br->br_offset; 314 arg[1] = br->br_resid; 315 if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 316 err = errno; 317 else 318 br->br_resid = 0; 319 } else 320 err = EOPNOTSUPP; 321 break; 322 default: 323 err = EINVAL; 324 break; 325 } 326 327 be->be_status = BST_DONE; 328 329 (*br->br_callback)(br, err); 330 } 331 332 static void * 333 blockif_thr(void *arg) 334 { 335 struct blockif_ctxt *bc; 336 struct blockif_elem *be; 337 pthread_t t; 338 uint8_t *buf; 339 340 bc = arg; 341 if (bc->bc_isgeom) 342 buf = malloc(MAXPHYS); 343 else 344 buf = NULL; 345 t = pthread_self(); 346 347 pthread_mutex_lock(&bc->bc_mtx); 348 for (;;) { 349 while (blockif_dequeue(bc, t, &be)) { 350 pthread_mutex_unlock(&bc->bc_mtx); 351 blockif_proc(bc, be, buf); 352 pthread_mutex_lock(&bc->bc_mtx); 353 blockif_complete(bc, be); 354 } 355 /* Check ctxt status here to see if exit requested */ 356 if (bc->bc_closing) 357 break; 358 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 359 } 360 pthread_mutex_unlock(&bc->bc_mtx); 361 362 if (buf) 363 free(buf); 364 pthread_exit(NULL); 365 return (NULL); 366 } 367 368 static void 369 blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 370 { 371 struct blockif_sig_elem *bse; 372 373 for (;;) { 374 /* 375 * Process the entire list even if not intended for 376 * this thread. 377 */ 378 do { 379 bse = blockif_bse_head; 380 if (bse == NULL) 381 return; 382 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 383 (uintptr_t)bse, 384 (uintptr_t)bse->bse_next)); 385 386 pthread_mutex_lock(&bse->bse_mtx); 387 bse->bse_pending = 0; 388 pthread_cond_signal(&bse->bse_cond); 389 pthread_mutex_unlock(&bse->bse_mtx); 390 } 391 } 392 393 static void 394 blockif_init(void) 395 { 396 mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 397 (void) signal(SIGCONT, SIG_IGN); 398 } 399 400 struct blockif_ctxt * 401 blockif_open(const char *optstr, const char *ident) 402 { 403 char tname[MAXCOMLEN + 1]; 404 char name[MAXPATHLEN]; 405 char *nopt, *xopts, *cp; 406 struct blockif_ctxt *bc; 407 struct stat sbuf; 408 struct diocgattr_arg arg; 409 off_t size, psectsz, psectoff; 410 int extra, fd, i, sectsz; 411 int nocache, sync, ro, candelete, geom, ssopt, pssopt; 412 #ifndef WITHOUT_CAPSICUM 413 cap_rights_t rights; 414 cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 415 #endif 416 417 pthread_once(&blockif_once, blockif_init); 418 419 fd = -1; 420 ssopt = 0; 421 nocache = 0; 422 sync = 0; 423 ro = 0; 424 425 /* 426 * The first element in the optstring is always a pathname. 427 * Optional elements follow 428 */ 429 nopt = xopts = strdup(optstr); 430 while (xopts != NULL) { 431 cp = strsep(&xopts, ","); 432 if (cp == nopt) /* file or device pathname */ 433 continue; 434 else if (!strcmp(cp, "nocache")) 435 nocache = 1; 436 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 437 sync = 1; 438 else if (!strcmp(cp, "ro")) 439 ro = 1; 440 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 441 ; 442 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 443 pssopt = ssopt; 444 else { 445 fprintf(stderr, "Invalid device option \"%s\"\n", cp); 446 goto err; 447 } 448 } 449 450 extra = 0; 451 if (nocache) 452 extra |= O_DIRECT; 453 if (sync) 454 extra |= O_SYNC; 455 456 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 457 if (fd < 0 && !ro) { 458 /* Attempt a r/w fail with a r/o open */ 459 fd = open(nopt, O_RDONLY | extra); 460 ro = 1; 461 } 462 463 if (fd < 0) { 464 warn("Could not open backing file: %s", nopt); 465 goto err; 466 } 467 468 if (fstat(fd, &sbuf) < 0) { 469 warn("Could not stat backing file %s", nopt); 470 goto err; 471 } 472 473 #ifndef WITHOUT_CAPSICUM 474 cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 475 CAP_WRITE); 476 if (ro) 477 cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 478 479 if (caph_rights_limit(fd, &rights) == -1) 480 errx(EX_OSERR, "Unable to apply rights for sandbox"); 481 #endif 482 483 /* 484 * Deal with raw devices 485 */ 486 size = sbuf.st_size; 487 sectsz = DEV_BSIZE; 488 psectsz = psectoff = 0; 489 candelete = geom = 0; 490 if (S_ISCHR(sbuf.st_mode)) { 491 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 492 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 493 perror("Could not fetch dev blk/sector size"); 494 goto err; 495 } 496 assert(size != 0); 497 assert(sectsz != 0); 498 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 499 ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 500 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 501 arg.len = sizeof(arg.value.i); 502 if (ioctl(fd, DIOCGATTR, &arg) == 0) 503 candelete = arg.value.i; 504 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 505 geom = 1; 506 } else 507 psectsz = sbuf.st_blksize; 508 509 #ifndef WITHOUT_CAPSICUM 510 if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) 511 errx(EX_OSERR, "Unable to apply rights for sandbox"); 512 #endif 513 514 if (ssopt != 0) { 515 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 516 ssopt > pssopt) { 517 fprintf(stderr, "Invalid sector size %d/%d\n", 518 ssopt, pssopt); 519 goto err; 520 } 521 522 /* 523 * Some backend drivers (e.g. cd0, ada0) require that the I/O 524 * size be a multiple of the device's sector size. 525 * 526 * Validate that the emulated sector size complies with this 527 * requirement. 528 */ 529 if (S_ISCHR(sbuf.st_mode)) { 530 if (ssopt < sectsz || (ssopt % sectsz) != 0) { 531 fprintf(stderr, "Sector size %d incompatible " 532 "with underlying device sector size %d\n", 533 ssopt, sectsz); 534 goto err; 535 } 536 } 537 538 sectsz = ssopt; 539 psectsz = pssopt; 540 psectoff = 0; 541 } 542 543 bc = calloc(1, sizeof(struct blockif_ctxt)); 544 if (bc == NULL) { 545 perror("calloc"); 546 goto err; 547 } 548 549 bc->bc_magic = BLOCKIF_SIG; 550 bc->bc_fd = fd; 551 bc->bc_ischr = S_ISCHR(sbuf.st_mode); 552 bc->bc_isgeom = geom; 553 bc->bc_candelete = candelete; 554 bc->bc_rdonly = ro; 555 bc->bc_size = size; 556 bc->bc_sectsz = sectsz; 557 bc->bc_psectsz = psectsz; 558 bc->bc_psectoff = psectoff; 559 pthread_mutex_init(&bc->bc_mtx, NULL); 560 pthread_cond_init(&bc->bc_cond, NULL); 561 TAILQ_INIT(&bc->bc_freeq); 562 TAILQ_INIT(&bc->bc_pendq); 563 TAILQ_INIT(&bc->bc_busyq); 564 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 565 bc->bc_reqs[i].be_status = BST_FREE; 566 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 567 } 568 569 for (i = 0; i < BLOCKIF_NUMTHR; i++) { 570 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 571 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 572 pthread_set_name_np(bc->bc_btid[i], tname); 573 } 574 575 return (bc); 576 err: 577 if (fd >= 0) 578 close(fd); 579 free(cp); 580 free(xopts); 581 free(nopt); 582 return (NULL); 583 } 584 585 static int 586 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 587 enum blockop op) 588 { 589 int err; 590 591 err = 0; 592 593 pthread_mutex_lock(&bc->bc_mtx); 594 if (!TAILQ_EMPTY(&bc->bc_freeq)) { 595 /* 596 * Enqueue and inform the block i/o thread 597 * that there is work available 598 */ 599 if (blockif_enqueue(bc, breq, op)) 600 pthread_cond_signal(&bc->bc_cond); 601 } else { 602 /* 603 * Callers are not allowed to enqueue more than 604 * the specified blockif queue limit. Return an 605 * error to indicate that the queue length has been 606 * exceeded. 607 */ 608 err = E2BIG; 609 } 610 pthread_mutex_unlock(&bc->bc_mtx); 611 612 return (err); 613 } 614 615 int 616 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 617 { 618 619 assert(bc->bc_magic == BLOCKIF_SIG); 620 return (blockif_request(bc, breq, BOP_READ)); 621 } 622 623 int 624 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 625 { 626 627 assert(bc->bc_magic == BLOCKIF_SIG); 628 return (blockif_request(bc, breq, BOP_WRITE)); 629 } 630 631 int 632 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 633 { 634 635 assert(bc->bc_magic == BLOCKIF_SIG); 636 return (blockif_request(bc, breq, BOP_FLUSH)); 637 } 638 639 int 640 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 641 { 642 643 assert(bc->bc_magic == BLOCKIF_SIG); 644 return (blockif_request(bc, breq, BOP_DELETE)); 645 } 646 647 int 648 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 649 { 650 struct blockif_elem *be; 651 652 assert(bc->bc_magic == BLOCKIF_SIG); 653 654 pthread_mutex_lock(&bc->bc_mtx); 655 /* 656 * Check pending requests. 657 */ 658 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 659 if (be->be_req == breq) 660 break; 661 } 662 if (be != NULL) { 663 /* 664 * Found it. 665 */ 666 blockif_complete(bc, be); 667 pthread_mutex_unlock(&bc->bc_mtx); 668 669 return (0); 670 } 671 672 /* 673 * Check in-flight requests. 674 */ 675 TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 676 if (be->be_req == breq) 677 break; 678 } 679 if (be == NULL) { 680 /* 681 * Didn't find it. 682 */ 683 pthread_mutex_unlock(&bc->bc_mtx); 684 return (EINVAL); 685 } 686 687 /* 688 * Interrupt the processing thread to force it return 689 * prematurely via it's normal callback path. 690 */ 691 while (be->be_status == BST_BUSY) { 692 struct blockif_sig_elem bse, *old_head; 693 694 pthread_mutex_init(&bse.bse_mtx, NULL); 695 pthread_cond_init(&bse.bse_cond, NULL); 696 697 bse.bse_pending = 1; 698 699 do { 700 old_head = blockif_bse_head; 701 bse.bse_next = old_head; 702 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 703 (uintptr_t)old_head, 704 (uintptr_t)&bse)); 705 706 pthread_kill(be->be_tid, SIGCONT); 707 708 pthread_mutex_lock(&bse.bse_mtx); 709 while (bse.bse_pending) 710 pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 711 pthread_mutex_unlock(&bse.bse_mtx); 712 } 713 714 pthread_mutex_unlock(&bc->bc_mtx); 715 716 /* 717 * The processing thread has been interrupted. Since it's not 718 * clear if the callback has been invoked yet, return EBUSY. 719 */ 720 return (EBUSY); 721 } 722 723 int 724 blockif_close(struct blockif_ctxt *bc) 725 { 726 void *jval; 727 int i; 728 729 assert(bc->bc_magic == BLOCKIF_SIG); 730 731 /* 732 * Stop the block i/o thread 733 */ 734 pthread_mutex_lock(&bc->bc_mtx); 735 bc->bc_closing = 1; 736 pthread_mutex_unlock(&bc->bc_mtx); 737 pthread_cond_broadcast(&bc->bc_cond); 738 for (i = 0; i < BLOCKIF_NUMTHR; i++) 739 pthread_join(bc->bc_btid[i], &jval); 740 741 /* XXX Cancel queued i/o's ??? */ 742 743 /* 744 * Release resources 745 */ 746 bc->bc_magic = 0; 747 close(bc->bc_fd); 748 free(bc); 749 750 return (0); 751 } 752 753 /* 754 * Return virtual C/H/S values for a given block. Use the algorithm 755 * outlined in the VHD specification to calculate values. 756 */ 757 void 758 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 759 { 760 off_t sectors; /* total sectors of the block dev */ 761 off_t hcyl; /* cylinders times heads */ 762 uint16_t secpt; /* sectors per track */ 763 uint8_t heads; 764 765 assert(bc->bc_magic == BLOCKIF_SIG); 766 767 sectors = bc->bc_size / bc->bc_sectsz; 768 769 /* Clamp the size to the largest possible with CHS */ 770 if (sectors > 65535UL*16*255) 771 sectors = 65535UL*16*255; 772 773 if (sectors >= 65536UL*16*63) { 774 secpt = 255; 775 heads = 16; 776 hcyl = sectors / secpt; 777 } else { 778 secpt = 17; 779 hcyl = sectors / secpt; 780 heads = (hcyl + 1023) / 1024; 781 782 if (heads < 4) 783 heads = 4; 784 785 if (hcyl >= (heads * 1024) || heads > 16) { 786 secpt = 31; 787 heads = 16; 788 hcyl = sectors / secpt; 789 } 790 if (hcyl >= (heads * 1024)) { 791 secpt = 63; 792 heads = 16; 793 hcyl = sectors / secpt; 794 } 795 } 796 797 *c = hcyl / heads; 798 *h = heads; 799 *s = secpt; 800 } 801 802 /* 803 * Accessors 804 */ 805 off_t 806 blockif_size(struct blockif_ctxt *bc) 807 { 808 809 assert(bc->bc_magic == BLOCKIF_SIG); 810 return (bc->bc_size); 811 } 812 813 int 814 blockif_sectsz(struct blockif_ctxt *bc) 815 { 816 817 assert(bc->bc_magic == BLOCKIF_SIG); 818 return (bc->bc_sectsz); 819 } 820 821 void 822 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 823 { 824 825 assert(bc->bc_magic == BLOCKIF_SIG); 826 *size = bc->bc_psectsz; 827 *off = bc->bc_psectoff; 828 } 829 830 int 831 blockif_queuesz(struct blockif_ctxt *bc) 832 { 833 834 assert(bc->bc_magic == BLOCKIF_SIG); 835 return (BLOCKIF_MAXREQ - 1); 836 } 837 838 int 839 blockif_is_ro(struct blockif_ctxt *bc) 840 { 841 842 assert(bc->bc_magic == BLOCKIF_SIG); 843 return (bc->bc_rdonly); 844 } 845 846 int 847 blockif_candelete(struct blockif_ctxt *bc) 848 { 849 850 assert(bc->bc_magic == BLOCKIF_SIG); 851 return (bc->bc_candelete); 852 } 853