1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #ifndef WITHOUT_CAPSICUM 36 #include <sys/capsicum.h> 37 #endif 38 #include <sys/queue.h> 39 #include <sys/errno.h> 40 #include <sys/stat.h> 41 #include <sys/ioctl.h> 42 #include <sys/disk.h> 43 44 #include <assert.h> 45 #ifndef WITHOUT_CAPSICUM 46 #include <capsicum_helpers.h> 47 #endif 48 #include <err.h> 49 #include <fcntl.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <string.h> 53 #include <pthread.h> 54 #include <pthread_np.h> 55 #include <signal.h> 56 #include <sysexits.h> 57 #include <unistd.h> 58 59 #include <machine/atomic.h> 60 61 #include "bhyverun.h" 62 #include "debug.h" 63 #include "mevent.h" 64 #include "block_if.h" 65 66 #define BLOCKIF_SIG 0xb109b109 67 68 #define BLOCKIF_NUMTHR 8 69 #define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) 70 71 enum blockop { 72 BOP_READ, 73 BOP_WRITE, 74 BOP_FLUSH, 75 BOP_DELETE 76 }; 77 78 enum blockstat { 79 BST_FREE, 80 BST_BLOCK, 81 BST_PEND, 82 BST_BUSY, 83 BST_DONE 84 }; 85 86 struct blockif_elem { 87 TAILQ_ENTRY(blockif_elem) be_link; 88 struct blockif_req *be_req; 89 enum blockop be_op; 90 enum blockstat be_status; 91 pthread_t be_tid; 92 off_t be_block; 93 }; 94 95 struct blockif_ctxt { 96 int bc_magic; 97 int bc_fd; 98 int bc_ischr; 99 int bc_isgeom; 100 int bc_candelete; 101 int bc_rdonly; 102 off_t bc_size; 103 int bc_sectsz; 104 int bc_psectsz; 105 int bc_psectoff; 106 int bc_closing; 107 pthread_t bc_btid[BLOCKIF_NUMTHR]; 108 pthread_mutex_t bc_mtx; 109 pthread_cond_t bc_cond; 110 111 /* Request elements and free/pending/busy queues */ 112 TAILQ_HEAD(, blockif_elem) bc_freeq; 113 TAILQ_HEAD(, blockif_elem) bc_pendq; 114 TAILQ_HEAD(, blockif_elem) bc_busyq; 115 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 116 }; 117 118 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 119 120 struct blockif_sig_elem { 121 pthread_mutex_t bse_mtx; 122 pthread_cond_t bse_cond; 123 int bse_pending; 124 struct blockif_sig_elem *bse_next; 125 }; 126 127 static struct blockif_sig_elem *blockif_bse_head; 128 129 static int 130 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 131 enum blockop op) 132 { 133 struct blockif_elem *be, *tbe; 134 off_t off; 135 int i; 136 137 be = TAILQ_FIRST(&bc->bc_freeq); 138 assert(be != NULL); 139 assert(be->be_status == BST_FREE); 140 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 141 be->be_req = breq; 142 be->be_op = op; 143 switch (op) { 144 case BOP_READ: 145 case BOP_WRITE: 146 case BOP_DELETE: 147 off = breq->br_offset; 148 for (i = 0; i < breq->br_iovcnt; i++) 149 off += breq->br_iov[i].iov_len; 150 break; 151 default: 152 off = OFF_MAX; 153 } 154 be->be_block = off; 155 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 156 if (tbe->be_block == breq->br_offset) 157 break; 158 } 159 if (tbe == NULL) { 160 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 161 if (tbe->be_block == breq->br_offset) 162 break; 163 } 164 } 165 if (tbe == NULL) 166 be->be_status = BST_PEND; 167 else 168 be->be_status = BST_BLOCK; 169 TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 170 return (be->be_status == BST_PEND); 171 } 172 173 static int 174 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 175 { 176 struct blockif_elem *be; 177 178 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 179 if (be->be_status == BST_PEND) 180 break; 181 assert(be->be_status == BST_BLOCK); 182 } 183 if (be == NULL) 184 return (0); 185 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 186 be->be_status = BST_BUSY; 187 be->be_tid = t; 188 TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 189 *bep = be; 190 return (1); 191 } 192 193 static void 194 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 195 { 196 struct blockif_elem *tbe; 197 198 if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 199 TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 200 else 201 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 202 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 203 if (tbe->be_req->br_offset == be->be_block) 204 tbe->be_status = BST_PEND; 205 } 206 be->be_tid = 0; 207 be->be_status = BST_FREE; 208 be->be_req = NULL; 209 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 210 } 211 212 static void 213 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 214 { 215 struct blockif_req *br; 216 off_t arg[2]; 217 ssize_t clen, len, off, boff, voff; 218 int i, err; 219 220 br = be->be_req; 221 if (br->br_iovcnt <= 1) 222 buf = NULL; 223 err = 0; 224 switch (be->be_op) { 225 case BOP_READ: 226 if (buf == NULL) { 227 if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 228 br->br_offset)) < 0) 229 err = errno; 230 else 231 br->br_resid -= len; 232 break; 233 } 234 i = 0; 235 off = voff = 0; 236 while (br->br_resid > 0) { 237 len = MIN(br->br_resid, MAXPHYS); 238 if (pread(bc->bc_fd, buf, len, br->br_offset + 239 off) < 0) { 240 err = errno; 241 break; 242 } 243 boff = 0; 244 do { 245 clen = MIN(len - boff, br->br_iov[i].iov_len - 246 voff); 247 memcpy(br->br_iov[i].iov_base + voff, 248 buf + boff, clen); 249 if (clen < br->br_iov[i].iov_len - voff) 250 voff += clen; 251 else { 252 i++; 253 voff = 0; 254 } 255 boff += clen; 256 } while (boff < len); 257 off += len; 258 br->br_resid -= len; 259 } 260 break; 261 case BOP_WRITE: 262 if (bc->bc_rdonly) { 263 err = EROFS; 264 break; 265 } 266 if (buf == NULL) { 267 if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 268 br->br_offset)) < 0) 269 err = errno; 270 else 271 br->br_resid -= len; 272 break; 273 } 274 i = 0; 275 off = voff = 0; 276 while (br->br_resid > 0) { 277 len = MIN(br->br_resid, MAXPHYS); 278 boff = 0; 279 do { 280 clen = MIN(len - boff, br->br_iov[i].iov_len - 281 voff); 282 memcpy(buf + boff, 283 br->br_iov[i].iov_base + voff, clen); 284 if (clen < br->br_iov[i].iov_len - voff) 285 voff += clen; 286 else { 287 i++; 288 voff = 0; 289 } 290 boff += clen; 291 } while (boff < len); 292 if (pwrite(bc->bc_fd, buf, len, br->br_offset + 293 off) < 0) { 294 err = errno; 295 break; 296 } 297 off += len; 298 br->br_resid -= len; 299 } 300 break; 301 case BOP_FLUSH: 302 if (bc->bc_ischr) { 303 if (ioctl(bc->bc_fd, DIOCGFLUSH)) 304 err = errno; 305 } else if (fsync(bc->bc_fd)) 306 err = errno; 307 break; 308 case BOP_DELETE: 309 if (!bc->bc_candelete) 310 err = EOPNOTSUPP; 311 else if (bc->bc_rdonly) 312 err = EROFS; 313 else if (bc->bc_ischr) { 314 arg[0] = br->br_offset; 315 arg[1] = br->br_resid; 316 if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 317 err = errno; 318 else 319 br->br_resid = 0; 320 } else 321 err = EOPNOTSUPP; 322 break; 323 default: 324 err = EINVAL; 325 break; 326 } 327 328 be->be_status = BST_DONE; 329 330 (*br->br_callback)(br, err); 331 } 332 333 static void * 334 blockif_thr(void *arg) 335 { 336 struct blockif_ctxt *bc; 337 struct blockif_elem *be; 338 pthread_t t; 339 uint8_t *buf; 340 341 bc = arg; 342 if (bc->bc_isgeom) 343 buf = malloc(MAXPHYS); 344 else 345 buf = NULL; 346 t = pthread_self(); 347 348 pthread_mutex_lock(&bc->bc_mtx); 349 for (;;) { 350 while (blockif_dequeue(bc, t, &be)) { 351 pthread_mutex_unlock(&bc->bc_mtx); 352 blockif_proc(bc, be, buf); 353 pthread_mutex_lock(&bc->bc_mtx); 354 blockif_complete(bc, be); 355 } 356 /* Check ctxt status here to see if exit requested */ 357 if (bc->bc_closing) 358 break; 359 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 360 } 361 pthread_mutex_unlock(&bc->bc_mtx); 362 363 if (buf) 364 free(buf); 365 pthread_exit(NULL); 366 return (NULL); 367 } 368 369 static void 370 blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 371 { 372 struct blockif_sig_elem *bse; 373 374 for (;;) { 375 /* 376 * Process the entire list even if not intended for 377 * this thread. 378 */ 379 do { 380 bse = blockif_bse_head; 381 if (bse == NULL) 382 return; 383 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 384 (uintptr_t)bse, 385 (uintptr_t)bse->bse_next)); 386 387 pthread_mutex_lock(&bse->bse_mtx); 388 bse->bse_pending = 0; 389 pthread_cond_signal(&bse->bse_cond); 390 pthread_mutex_unlock(&bse->bse_mtx); 391 } 392 } 393 394 static void 395 blockif_init(void) 396 { 397 mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 398 (void) signal(SIGCONT, SIG_IGN); 399 } 400 401 struct blockif_ctxt * 402 blockif_open(const char *optstr, const char *ident) 403 { 404 char tname[MAXCOMLEN + 1]; 405 char name[MAXPATHLEN]; 406 char *nopt, *xopts, *cp; 407 struct blockif_ctxt *bc; 408 struct stat sbuf; 409 struct diocgattr_arg arg; 410 off_t size, psectsz, psectoff; 411 int extra, fd, i, sectsz; 412 int nocache, sync, ro, candelete, geom, ssopt, pssopt; 413 #ifndef WITHOUT_CAPSICUM 414 cap_rights_t rights; 415 cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 416 #endif 417 418 pthread_once(&blockif_once, blockif_init); 419 420 fd = -1; 421 ssopt = 0; 422 nocache = 0; 423 sync = 0; 424 ro = 0; 425 426 /* 427 * The first element in the optstring is always a pathname. 428 * Optional elements follow 429 */ 430 nopt = xopts = strdup(optstr); 431 while (xopts != NULL) { 432 cp = strsep(&xopts, ","); 433 if (cp == nopt) /* file or device pathname */ 434 continue; 435 else if (!strcmp(cp, "nocache")) 436 nocache = 1; 437 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 438 sync = 1; 439 else if (!strcmp(cp, "ro")) 440 ro = 1; 441 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 442 ; 443 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 444 pssopt = ssopt; 445 else { 446 EPRINTLN("Invalid device option \"%s\"", cp); 447 goto err; 448 } 449 } 450 451 extra = 0; 452 if (nocache) 453 extra |= O_DIRECT; 454 if (sync) 455 extra |= O_SYNC; 456 457 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 458 if (fd < 0 && !ro) { 459 /* Attempt a r/w fail with a r/o open */ 460 fd = open(nopt, O_RDONLY | extra); 461 ro = 1; 462 } 463 464 if (fd < 0) { 465 warn("Could not open backing file: %s", nopt); 466 goto err; 467 } 468 469 if (fstat(fd, &sbuf) < 0) { 470 warn("Could not stat backing file %s", nopt); 471 goto err; 472 } 473 474 #ifndef WITHOUT_CAPSICUM 475 cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 476 CAP_WRITE); 477 if (ro) 478 cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 479 480 if (caph_rights_limit(fd, &rights) == -1) 481 errx(EX_OSERR, "Unable to apply rights for sandbox"); 482 #endif 483 484 /* 485 * Deal with raw devices 486 */ 487 size = sbuf.st_size; 488 sectsz = DEV_BSIZE; 489 psectsz = psectoff = 0; 490 candelete = geom = 0; 491 if (S_ISCHR(sbuf.st_mode)) { 492 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 493 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 494 perror("Could not fetch dev blk/sector size"); 495 goto err; 496 } 497 assert(size != 0); 498 assert(sectsz != 0); 499 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 500 ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 501 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 502 arg.len = sizeof(arg.value.i); 503 if (ioctl(fd, DIOCGATTR, &arg) == 0) 504 candelete = arg.value.i; 505 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 506 geom = 1; 507 } else 508 psectsz = sbuf.st_blksize; 509 510 #ifndef WITHOUT_CAPSICUM 511 if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) 512 errx(EX_OSERR, "Unable to apply rights for sandbox"); 513 #endif 514 515 if (ssopt != 0) { 516 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 517 ssopt > pssopt) { 518 EPRINTLN("Invalid sector size %d/%d", 519 ssopt, pssopt); 520 goto err; 521 } 522 523 /* 524 * Some backend drivers (e.g. cd0, ada0) require that the I/O 525 * size be a multiple of the device's sector size. 526 * 527 * Validate that the emulated sector size complies with this 528 * requirement. 529 */ 530 if (S_ISCHR(sbuf.st_mode)) { 531 if (ssopt < sectsz || (ssopt % sectsz) != 0) { 532 EPRINTLN("Sector size %d incompatible " 533 "with underlying device sector size %d", 534 ssopt, sectsz); 535 goto err; 536 } 537 } 538 539 sectsz = ssopt; 540 psectsz = pssopt; 541 psectoff = 0; 542 } 543 544 bc = calloc(1, sizeof(struct blockif_ctxt)); 545 if (bc == NULL) { 546 perror("calloc"); 547 goto err; 548 } 549 550 bc->bc_magic = BLOCKIF_SIG; 551 bc->bc_fd = fd; 552 bc->bc_ischr = S_ISCHR(sbuf.st_mode); 553 bc->bc_isgeom = geom; 554 bc->bc_candelete = candelete; 555 bc->bc_rdonly = ro; 556 bc->bc_size = size; 557 bc->bc_sectsz = sectsz; 558 bc->bc_psectsz = psectsz; 559 bc->bc_psectoff = psectoff; 560 pthread_mutex_init(&bc->bc_mtx, NULL); 561 pthread_cond_init(&bc->bc_cond, NULL); 562 TAILQ_INIT(&bc->bc_freeq); 563 TAILQ_INIT(&bc->bc_pendq); 564 TAILQ_INIT(&bc->bc_busyq); 565 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 566 bc->bc_reqs[i].be_status = BST_FREE; 567 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 568 } 569 570 for (i = 0; i < BLOCKIF_NUMTHR; i++) { 571 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 572 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 573 pthread_set_name_np(bc->bc_btid[i], tname); 574 } 575 576 return (bc); 577 err: 578 if (fd >= 0) 579 close(fd); 580 free(nopt); 581 return (NULL); 582 } 583 584 static int 585 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 586 enum blockop op) 587 { 588 int err; 589 590 err = 0; 591 592 pthread_mutex_lock(&bc->bc_mtx); 593 if (!TAILQ_EMPTY(&bc->bc_freeq)) { 594 /* 595 * Enqueue and inform the block i/o thread 596 * that there is work available 597 */ 598 if (blockif_enqueue(bc, breq, op)) 599 pthread_cond_signal(&bc->bc_cond); 600 } else { 601 /* 602 * Callers are not allowed to enqueue more than 603 * the specified blockif queue limit. Return an 604 * error to indicate that the queue length has been 605 * exceeded. 606 */ 607 err = E2BIG; 608 } 609 pthread_mutex_unlock(&bc->bc_mtx); 610 611 return (err); 612 } 613 614 int 615 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 616 { 617 618 assert(bc->bc_magic == BLOCKIF_SIG); 619 return (blockif_request(bc, breq, BOP_READ)); 620 } 621 622 int 623 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 624 { 625 626 assert(bc->bc_magic == BLOCKIF_SIG); 627 return (blockif_request(bc, breq, BOP_WRITE)); 628 } 629 630 int 631 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 632 { 633 634 assert(bc->bc_magic == BLOCKIF_SIG); 635 return (blockif_request(bc, breq, BOP_FLUSH)); 636 } 637 638 int 639 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 640 { 641 642 assert(bc->bc_magic == BLOCKIF_SIG); 643 return (blockif_request(bc, breq, BOP_DELETE)); 644 } 645 646 int 647 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 648 { 649 struct blockif_elem *be; 650 651 assert(bc->bc_magic == BLOCKIF_SIG); 652 653 pthread_mutex_lock(&bc->bc_mtx); 654 /* 655 * Check pending requests. 656 */ 657 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 658 if (be->be_req == breq) 659 break; 660 } 661 if (be != NULL) { 662 /* 663 * Found it. 664 */ 665 blockif_complete(bc, be); 666 pthread_mutex_unlock(&bc->bc_mtx); 667 668 return (0); 669 } 670 671 /* 672 * Check in-flight requests. 673 */ 674 TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 675 if (be->be_req == breq) 676 break; 677 } 678 if (be == NULL) { 679 /* 680 * Didn't find it. 681 */ 682 pthread_mutex_unlock(&bc->bc_mtx); 683 return (EINVAL); 684 } 685 686 /* 687 * Interrupt the processing thread to force it return 688 * prematurely via it's normal callback path. 689 */ 690 while (be->be_status == BST_BUSY) { 691 struct blockif_sig_elem bse, *old_head; 692 693 pthread_mutex_init(&bse.bse_mtx, NULL); 694 pthread_cond_init(&bse.bse_cond, NULL); 695 696 bse.bse_pending = 1; 697 698 do { 699 old_head = blockif_bse_head; 700 bse.bse_next = old_head; 701 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 702 (uintptr_t)old_head, 703 (uintptr_t)&bse)); 704 705 pthread_kill(be->be_tid, SIGCONT); 706 707 pthread_mutex_lock(&bse.bse_mtx); 708 while (bse.bse_pending) 709 pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 710 pthread_mutex_unlock(&bse.bse_mtx); 711 } 712 713 pthread_mutex_unlock(&bc->bc_mtx); 714 715 /* 716 * The processing thread has been interrupted. Since it's not 717 * clear if the callback has been invoked yet, return EBUSY. 718 */ 719 return (EBUSY); 720 } 721 722 int 723 blockif_close(struct blockif_ctxt *bc) 724 { 725 void *jval; 726 int i; 727 728 assert(bc->bc_magic == BLOCKIF_SIG); 729 730 /* 731 * Stop the block i/o thread 732 */ 733 pthread_mutex_lock(&bc->bc_mtx); 734 bc->bc_closing = 1; 735 pthread_mutex_unlock(&bc->bc_mtx); 736 pthread_cond_broadcast(&bc->bc_cond); 737 for (i = 0; i < BLOCKIF_NUMTHR; i++) 738 pthread_join(bc->bc_btid[i], &jval); 739 740 /* XXX Cancel queued i/o's ??? */ 741 742 /* 743 * Release resources 744 */ 745 bc->bc_magic = 0; 746 close(bc->bc_fd); 747 free(bc); 748 749 return (0); 750 } 751 752 /* 753 * Return virtual C/H/S values for a given block. Use the algorithm 754 * outlined in the VHD specification to calculate values. 755 */ 756 void 757 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 758 { 759 off_t sectors; /* total sectors of the block dev */ 760 off_t hcyl; /* cylinders times heads */ 761 uint16_t secpt; /* sectors per track */ 762 uint8_t heads; 763 764 assert(bc->bc_magic == BLOCKIF_SIG); 765 766 sectors = bc->bc_size / bc->bc_sectsz; 767 768 /* Clamp the size to the largest possible with CHS */ 769 if (sectors > 65535UL*16*255) 770 sectors = 65535UL*16*255; 771 772 if (sectors >= 65536UL*16*63) { 773 secpt = 255; 774 heads = 16; 775 hcyl = sectors / secpt; 776 } else { 777 secpt = 17; 778 hcyl = sectors / secpt; 779 heads = (hcyl + 1023) / 1024; 780 781 if (heads < 4) 782 heads = 4; 783 784 if (hcyl >= (heads * 1024) || heads > 16) { 785 secpt = 31; 786 heads = 16; 787 hcyl = sectors / secpt; 788 } 789 if (hcyl >= (heads * 1024)) { 790 secpt = 63; 791 heads = 16; 792 hcyl = sectors / secpt; 793 } 794 } 795 796 *c = hcyl / heads; 797 *h = heads; 798 *s = secpt; 799 } 800 801 /* 802 * Accessors 803 */ 804 off_t 805 blockif_size(struct blockif_ctxt *bc) 806 { 807 808 assert(bc->bc_magic == BLOCKIF_SIG); 809 return (bc->bc_size); 810 } 811 812 int 813 blockif_sectsz(struct blockif_ctxt *bc) 814 { 815 816 assert(bc->bc_magic == BLOCKIF_SIG); 817 return (bc->bc_sectsz); 818 } 819 820 void 821 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 822 { 823 824 assert(bc->bc_magic == BLOCKIF_SIG); 825 *size = bc->bc_psectsz; 826 *off = bc->bc_psectoff; 827 } 828 829 int 830 blockif_queuesz(struct blockif_ctxt *bc) 831 { 832 833 assert(bc->bc_magic == BLOCKIF_SIG); 834 return (BLOCKIF_MAXREQ - 1); 835 } 836 837 int 838 blockif_is_ro(struct blockif_ctxt *bc) 839 { 840 841 assert(bc->bc_magic == BLOCKIF_SIG); 842 return (bc->bc_rdonly); 843 } 844 845 int 846 blockif_candelete(struct blockif_ctxt *bc) 847 { 848 849 assert(bc->bc_magic == BLOCKIF_SIG); 850 return (bc->bc_candelete); 851 } 852