1 /*- 2 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #ifndef WITHOUT_CAPSICUM 34 #include <sys/capsicum.h> 35 #endif 36 #include <sys/queue.h> 37 #include <sys/errno.h> 38 #include <sys/stat.h> 39 #include <sys/ioctl.h> 40 #include <sys/disk.h> 41 42 #include <assert.h> 43 #include <err.h> 44 #include <fcntl.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <string.h> 48 #include <pthread.h> 49 #include <pthread_np.h> 50 #include <signal.h> 51 #include <sysexits.h> 52 #include <unistd.h> 53 54 #include <machine/atomic.h> 55 56 #include "bhyverun.h" 57 #include "mevent.h" 58 #include "block_if.h" 59 60 #define BLOCKIF_SIG 0xb109b109 61 62 #define BLOCKIF_NUMTHR 8 63 #define BLOCKIF_MAXREQ (64 + BLOCKIF_NUMTHR) 64 65 enum blockop { 66 BOP_READ, 67 BOP_WRITE, 68 BOP_FLUSH, 69 BOP_DELETE 70 }; 71 72 enum blockstat { 73 BST_FREE, 74 BST_BLOCK, 75 BST_PEND, 76 BST_BUSY, 77 BST_DONE 78 }; 79 80 struct blockif_elem { 81 TAILQ_ENTRY(blockif_elem) be_link; 82 struct blockif_req *be_req; 83 enum blockop be_op; 84 enum blockstat be_status; 85 pthread_t be_tid; 86 off_t be_block; 87 }; 88 89 struct blockif_ctxt { 90 int bc_magic; 91 int bc_fd; 92 int bc_ischr; 93 int bc_isgeom; 94 int bc_candelete; 95 int bc_rdonly; 96 off_t bc_size; 97 int bc_sectsz; 98 int bc_psectsz; 99 int bc_psectoff; 100 int bc_closing; 101 pthread_t bc_btid[BLOCKIF_NUMTHR]; 102 pthread_mutex_t bc_mtx; 103 pthread_cond_t bc_cond; 104 105 /* Request elements and free/pending/busy queues */ 106 TAILQ_HEAD(, blockif_elem) bc_freeq; 107 TAILQ_HEAD(, blockif_elem) bc_pendq; 108 TAILQ_HEAD(, blockif_elem) bc_busyq; 109 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 110 }; 111 112 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 113 114 struct blockif_sig_elem { 115 pthread_mutex_t bse_mtx; 116 pthread_cond_t bse_cond; 117 int bse_pending; 118 struct blockif_sig_elem *bse_next; 119 }; 120 121 static struct blockif_sig_elem *blockif_bse_head; 122 123 static int 124 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 125 enum blockop op) 126 { 127 struct blockif_elem *be, *tbe; 128 off_t off; 129 int i; 130 131 be = TAILQ_FIRST(&bc->bc_freeq); 132 assert(be != NULL); 133 assert(be->be_status == BST_FREE); 134 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 135 be->be_req = breq; 136 be->be_op = op; 137 switch (op) { 138 case BOP_READ: 139 case BOP_WRITE: 140 case BOP_DELETE: 141 off = breq->br_offset; 142 for (i = 0; i < breq->br_iovcnt; i++) 143 off += breq->br_iov[i].iov_len; 144 break; 145 default: 146 off = OFF_MAX; 147 } 148 be->be_block = off; 149 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 150 if (tbe->be_block == breq->br_offset) 151 break; 152 } 153 if (tbe == NULL) { 154 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 155 if (tbe->be_block == breq->br_offset) 156 break; 157 } 158 } 159 if (tbe == NULL) 160 be->be_status = BST_PEND; 161 else 162 be->be_status = BST_BLOCK; 163 TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 164 return (be->be_status == BST_PEND); 165 } 166 167 static int 168 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 169 { 170 struct blockif_elem *be; 171 172 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 173 if (be->be_status == BST_PEND) 174 break; 175 assert(be->be_status == BST_BLOCK); 176 } 177 if (be == NULL) 178 return (0); 179 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 180 be->be_status = BST_BUSY; 181 be->be_tid = t; 182 TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 183 *bep = be; 184 return (1); 185 } 186 187 static void 188 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 189 { 190 struct blockif_elem *tbe; 191 192 if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 193 TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 194 else 195 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 196 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 197 if (tbe->be_req->br_offset == be->be_block) 198 tbe->be_status = BST_PEND; 199 } 200 be->be_tid = 0; 201 be->be_status = BST_FREE; 202 be->be_req = NULL; 203 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 204 } 205 206 static void 207 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 208 { 209 struct blockif_req *br; 210 off_t arg[2]; 211 ssize_t clen, len, off, boff, voff; 212 int i, err; 213 214 br = be->be_req; 215 if (br->br_iovcnt <= 1) 216 buf = NULL; 217 err = 0; 218 switch (be->be_op) { 219 case BOP_READ: 220 if (buf == NULL) { 221 if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 222 br->br_offset)) < 0) 223 err = errno; 224 else 225 br->br_resid -= len; 226 break; 227 } 228 i = 0; 229 off = voff = 0; 230 while (br->br_resid > 0) { 231 len = MIN(br->br_resid, MAXPHYS); 232 if (pread(bc->bc_fd, buf, len, br->br_offset + 233 off) < 0) { 234 err = errno; 235 break; 236 } 237 boff = 0; 238 do { 239 clen = MIN(len - boff, br->br_iov[i].iov_len - 240 voff); 241 memcpy(br->br_iov[i].iov_base + voff, 242 buf + boff, clen); 243 if (clen < br->br_iov[i].iov_len - voff) 244 voff += clen; 245 else { 246 i++; 247 voff = 0; 248 } 249 boff += clen; 250 } while (boff < len); 251 off += len; 252 br->br_resid -= len; 253 } 254 break; 255 case BOP_WRITE: 256 if (bc->bc_rdonly) { 257 err = EROFS; 258 break; 259 } 260 if (buf == NULL) { 261 if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 262 br->br_offset)) < 0) 263 err = errno; 264 else 265 br->br_resid -= len; 266 break; 267 } 268 i = 0; 269 off = voff = 0; 270 while (br->br_resid > 0) { 271 len = MIN(br->br_resid, MAXPHYS); 272 boff = 0; 273 do { 274 clen = MIN(len - boff, br->br_iov[i].iov_len - 275 voff); 276 memcpy(buf + boff, 277 br->br_iov[i].iov_base + voff, clen); 278 if (clen < br->br_iov[i].iov_len - voff) 279 voff += clen; 280 else { 281 i++; 282 voff = 0; 283 } 284 boff += clen; 285 } while (boff < len); 286 if (pwrite(bc->bc_fd, buf, len, br->br_offset + 287 off) < 0) { 288 err = errno; 289 break; 290 } 291 off += len; 292 br->br_resid -= len; 293 } 294 break; 295 case BOP_FLUSH: 296 if (bc->bc_ischr) { 297 if (ioctl(bc->bc_fd, DIOCGFLUSH)) 298 err = errno; 299 } else if (fsync(bc->bc_fd)) 300 err = errno; 301 break; 302 case BOP_DELETE: 303 if (!bc->bc_candelete) 304 err = EOPNOTSUPP; 305 else if (bc->bc_rdonly) 306 err = EROFS; 307 else if (bc->bc_ischr) { 308 arg[0] = br->br_offset; 309 arg[1] = br->br_resid; 310 if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 311 err = errno; 312 else 313 br->br_resid = 0; 314 } else 315 err = EOPNOTSUPP; 316 break; 317 default: 318 err = EINVAL; 319 break; 320 } 321 322 be->be_status = BST_DONE; 323 324 (*br->br_callback)(br, err); 325 } 326 327 static void * 328 blockif_thr(void *arg) 329 { 330 struct blockif_ctxt *bc; 331 struct blockif_elem *be; 332 pthread_t t; 333 uint8_t *buf; 334 335 bc = arg; 336 if (bc->bc_isgeom) 337 buf = malloc(MAXPHYS); 338 else 339 buf = NULL; 340 t = pthread_self(); 341 342 pthread_mutex_lock(&bc->bc_mtx); 343 for (;;) { 344 while (blockif_dequeue(bc, t, &be)) { 345 pthread_mutex_unlock(&bc->bc_mtx); 346 blockif_proc(bc, be, buf); 347 pthread_mutex_lock(&bc->bc_mtx); 348 blockif_complete(bc, be); 349 } 350 /* Check ctxt status here to see if exit requested */ 351 if (bc->bc_closing) 352 break; 353 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 354 } 355 pthread_mutex_unlock(&bc->bc_mtx); 356 357 if (buf) 358 free(buf); 359 pthread_exit(NULL); 360 return (NULL); 361 } 362 363 static void 364 blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 365 { 366 struct blockif_sig_elem *bse; 367 368 for (;;) { 369 /* 370 * Process the entire list even if not intended for 371 * this thread. 372 */ 373 do { 374 bse = blockif_bse_head; 375 if (bse == NULL) 376 return; 377 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 378 (uintptr_t)bse, 379 (uintptr_t)bse->bse_next)); 380 381 pthread_mutex_lock(&bse->bse_mtx); 382 bse->bse_pending = 0; 383 pthread_cond_signal(&bse->bse_cond); 384 pthread_mutex_unlock(&bse->bse_mtx); 385 } 386 } 387 388 static void 389 blockif_init(void) 390 { 391 mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 392 (void) signal(SIGCONT, SIG_IGN); 393 } 394 395 struct blockif_ctxt * 396 blockif_open(const char *optstr, const char *ident) 397 { 398 char tname[MAXCOMLEN + 1]; 399 char name[MAXPATHLEN]; 400 char *nopt, *xopts, *cp; 401 struct blockif_ctxt *bc; 402 struct stat sbuf; 403 struct diocgattr_arg arg; 404 off_t size, psectsz, psectoff; 405 int extra, fd, i, sectsz; 406 int nocache, sync, ro, candelete, geom, ssopt, pssopt; 407 #ifndef WITHOUT_CAPSICUM 408 cap_rights_t rights; 409 cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 410 #endif 411 412 pthread_once(&blockif_once, blockif_init); 413 414 fd = -1; 415 ssopt = 0; 416 nocache = 0; 417 sync = 0; 418 ro = 0; 419 420 /* 421 * The first element in the optstring is always a pathname. 422 * Optional elements follow 423 */ 424 nopt = xopts = strdup(optstr); 425 while (xopts != NULL) { 426 cp = strsep(&xopts, ","); 427 if (cp == nopt) /* file or device pathname */ 428 continue; 429 else if (!strcmp(cp, "nocache")) 430 nocache = 1; 431 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 432 sync = 1; 433 else if (!strcmp(cp, "ro")) 434 ro = 1; 435 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 436 ; 437 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 438 pssopt = ssopt; 439 else { 440 fprintf(stderr, "Invalid device option \"%s\"\n", cp); 441 goto err; 442 } 443 } 444 445 extra = 0; 446 if (nocache) 447 extra |= O_DIRECT; 448 if (sync) 449 extra |= O_SYNC; 450 451 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 452 if (fd < 0 && !ro) { 453 /* Attempt a r/w fail with a r/o open */ 454 fd = open(nopt, O_RDONLY | extra); 455 ro = 1; 456 } 457 458 if (fd < 0) { 459 warn("Could not open backing file: %s", nopt); 460 goto err; 461 } 462 463 if (fstat(fd, &sbuf) < 0) { 464 warn("Could not stat backing file %s", nopt); 465 goto err; 466 } 467 468 #ifndef WITHOUT_CAPSICUM 469 cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 470 CAP_WRITE); 471 if (ro) 472 cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 473 474 if (cap_rights_limit(fd, &rights) == -1 && errno != ENOSYS) 475 errx(EX_OSERR, "Unable to apply rights for sandbox"); 476 #endif 477 478 /* 479 * Deal with raw devices 480 */ 481 size = sbuf.st_size; 482 sectsz = DEV_BSIZE; 483 psectsz = psectoff = 0; 484 candelete = geom = 0; 485 if (S_ISCHR(sbuf.st_mode)) { 486 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 487 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 488 perror("Could not fetch dev blk/sector size"); 489 goto err; 490 } 491 assert(size != 0); 492 assert(sectsz != 0); 493 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 494 ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 495 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 496 arg.len = sizeof(arg.value.i); 497 if (ioctl(fd, DIOCGATTR, &arg) == 0) 498 candelete = arg.value.i; 499 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 500 geom = 1; 501 } else 502 psectsz = sbuf.st_blksize; 503 504 #ifndef WITHOUT_CAPSICUM 505 if (cap_ioctls_limit(fd, cmds, nitems(cmds)) == -1 && errno != ENOSYS) 506 errx(EX_OSERR, "Unable to apply rights for sandbox"); 507 #endif 508 509 if (ssopt != 0) { 510 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 511 ssopt > pssopt) { 512 fprintf(stderr, "Invalid sector size %d/%d\n", 513 ssopt, pssopt); 514 goto err; 515 } 516 517 /* 518 * Some backend drivers (e.g. cd0, ada0) require that the I/O 519 * size be a multiple of the device's sector size. 520 * 521 * Validate that the emulated sector size complies with this 522 * requirement. 523 */ 524 if (S_ISCHR(sbuf.st_mode)) { 525 if (ssopt < sectsz || (ssopt % sectsz) != 0) { 526 fprintf(stderr, "Sector size %d incompatible " 527 "with underlying device sector size %d\n", 528 ssopt, sectsz); 529 goto err; 530 } 531 } 532 533 sectsz = ssopt; 534 psectsz = pssopt; 535 psectoff = 0; 536 } 537 538 bc = calloc(1, sizeof(struct blockif_ctxt)); 539 if (bc == NULL) { 540 perror("calloc"); 541 goto err; 542 } 543 544 bc->bc_magic = BLOCKIF_SIG; 545 bc->bc_fd = fd; 546 bc->bc_ischr = S_ISCHR(sbuf.st_mode); 547 bc->bc_isgeom = geom; 548 bc->bc_candelete = candelete; 549 bc->bc_rdonly = ro; 550 bc->bc_size = size; 551 bc->bc_sectsz = sectsz; 552 bc->bc_psectsz = psectsz; 553 bc->bc_psectoff = psectoff; 554 pthread_mutex_init(&bc->bc_mtx, NULL); 555 pthread_cond_init(&bc->bc_cond, NULL); 556 TAILQ_INIT(&bc->bc_freeq); 557 TAILQ_INIT(&bc->bc_pendq); 558 TAILQ_INIT(&bc->bc_busyq); 559 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 560 bc->bc_reqs[i].be_status = BST_FREE; 561 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 562 } 563 564 for (i = 0; i < BLOCKIF_NUMTHR; i++) { 565 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 566 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 567 pthread_set_name_np(bc->bc_btid[i], tname); 568 } 569 570 return (bc); 571 err: 572 if (fd >= 0) 573 close(fd); 574 return (NULL); 575 } 576 577 static int 578 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 579 enum blockop op) 580 { 581 int err; 582 583 err = 0; 584 585 pthread_mutex_lock(&bc->bc_mtx); 586 if (!TAILQ_EMPTY(&bc->bc_freeq)) { 587 /* 588 * Enqueue and inform the block i/o thread 589 * that there is work available 590 */ 591 if (blockif_enqueue(bc, breq, op)) 592 pthread_cond_signal(&bc->bc_cond); 593 } else { 594 /* 595 * Callers are not allowed to enqueue more than 596 * the specified blockif queue limit. Return an 597 * error to indicate that the queue length has been 598 * exceeded. 599 */ 600 err = E2BIG; 601 } 602 pthread_mutex_unlock(&bc->bc_mtx); 603 604 return (err); 605 } 606 607 int 608 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 609 { 610 611 assert(bc->bc_magic == BLOCKIF_SIG); 612 return (blockif_request(bc, breq, BOP_READ)); 613 } 614 615 int 616 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 617 { 618 619 assert(bc->bc_magic == BLOCKIF_SIG); 620 return (blockif_request(bc, breq, BOP_WRITE)); 621 } 622 623 int 624 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 625 { 626 627 assert(bc->bc_magic == BLOCKIF_SIG); 628 return (blockif_request(bc, breq, BOP_FLUSH)); 629 } 630 631 int 632 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 633 { 634 635 assert(bc->bc_magic == BLOCKIF_SIG); 636 return (blockif_request(bc, breq, BOP_DELETE)); 637 } 638 639 int 640 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 641 { 642 struct blockif_elem *be; 643 644 assert(bc->bc_magic == BLOCKIF_SIG); 645 646 pthread_mutex_lock(&bc->bc_mtx); 647 /* 648 * Check pending requests. 649 */ 650 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 651 if (be->be_req == breq) 652 break; 653 } 654 if (be != NULL) { 655 /* 656 * Found it. 657 */ 658 blockif_complete(bc, be); 659 pthread_mutex_unlock(&bc->bc_mtx); 660 661 return (0); 662 } 663 664 /* 665 * Check in-flight requests. 666 */ 667 TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 668 if (be->be_req == breq) 669 break; 670 } 671 if (be == NULL) { 672 /* 673 * Didn't find it. 674 */ 675 pthread_mutex_unlock(&bc->bc_mtx); 676 return (EINVAL); 677 } 678 679 /* 680 * Interrupt the processing thread to force it return 681 * prematurely via it's normal callback path. 682 */ 683 while (be->be_status == BST_BUSY) { 684 struct blockif_sig_elem bse, *old_head; 685 686 pthread_mutex_init(&bse.bse_mtx, NULL); 687 pthread_cond_init(&bse.bse_cond, NULL); 688 689 bse.bse_pending = 1; 690 691 do { 692 old_head = blockif_bse_head; 693 bse.bse_next = old_head; 694 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 695 (uintptr_t)old_head, 696 (uintptr_t)&bse)); 697 698 pthread_kill(be->be_tid, SIGCONT); 699 700 pthread_mutex_lock(&bse.bse_mtx); 701 while (bse.bse_pending) 702 pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 703 pthread_mutex_unlock(&bse.bse_mtx); 704 } 705 706 pthread_mutex_unlock(&bc->bc_mtx); 707 708 /* 709 * The processing thread has been interrupted. Since it's not 710 * clear if the callback has been invoked yet, return EBUSY. 711 */ 712 return (EBUSY); 713 } 714 715 int 716 blockif_close(struct blockif_ctxt *bc) 717 { 718 void *jval; 719 int i; 720 721 assert(bc->bc_magic == BLOCKIF_SIG); 722 723 /* 724 * Stop the block i/o thread 725 */ 726 pthread_mutex_lock(&bc->bc_mtx); 727 bc->bc_closing = 1; 728 pthread_mutex_unlock(&bc->bc_mtx); 729 pthread_cond_broadcast(&bc->bc_cond); 730 for (i = 0; i < BLOCKIF_NUMTHR; i++) 731 pthread_join(bc->bc_btid[i], &jval); 732 733 /* XXX Cancel queued i/o's ??? */ 734 735 /* 736 * Release resources 737 */ 738 bc->bc_magic = 0; 739 close(bc->bc_fd); 740 free(bc); 741 742 return (0); 743 } 744 745 /* 746 * Return virtual C/H/S values for a given block. Use the algorithm 747 * outlined in the VHD specification to calculate values. 748 */ 749 void 750 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 751 { 752 off_t sectors; /* total sectors of the block dev */ 753 off_t hcyl; /* cylinders times heads */ 754 uint16_t secpt; /* sectors per track */ 755 uint8_t heads; 756 757 assert(bc->bc_magic == BLOCKIF_SIG); 758 759 sectors = bc->bc_size / bc->bc_sectsz; 760 761 /* Clamp the size to the largest possible with CHS */ 762 if (sectors > 65535UL*16*255) 763 sectors = 65535UL*16*255; 764 765 if (sectors >= 65536UL*16*63) { 766 secpt = 255; 767 heads = 16; 768 hcyl = sectors / secpt; 769 } else { 770 secpt = 17; 771 hcyl = sectors / secpt; 772 heads = (hcyl + 1023) / 1024; 773 774 if (heads < 4) 775 heads = 4; 776 777 if (hcyl >= (heads * 1024) || heads > 16) { 778 secpt = 31; 779 heads = 16; 780 hcyl = sectors / secpt; 781 } 782 if (hcyl >= (heads * 1024)) { 783 secpt = 63; 784 heads = 16; 785 hcyl = sectors / secpt; 786 } 787 } 788 789 *c = hcyl / heads; 790 *h = heads; 791 *s = secpt; 792 } 793 794 /* 795 * Accessors 796 */ 797 off_t 798 blockif_size(struct blockif_ctxt *bc) 799 { 800 801 assert(bc->bc_magic == BLOCKIF_SIG); 802 return (bc->bc_size); 803 } 804 805 int 806 blockif_sectsz(struct blockif_ctxt *bc) 807 { 808 809 assert(bc->bc_magic == BLOCKIF_SIG); 810 return (bc->bc_sectsz); 811 } 812 813 void 814 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 815 { 816 817 assert(bc->bc_magic == BLOCKIF_SIG); 818 *size = bc->bc_psectsz; 819 *off = bc->bc_psectoff; 820 } 821 822 int 823 blockif_queuesz(struct blockif_ctxt *bc) 824 { 825 826 assert(bc->bc_magic == BLOCKIF_SIG); 827 return (BLOCKIF_MAXREQ - 1); 828 } 829 830 int 831 blockif_is_ro(struct blockif_ctxt *bc) 832 { 833 834 assert(bc->bc_magic == BLOCKIF_SIG); 835 return (bc->bc_rdonly); 836 } 837 838 int 839 blockif_candelete(struct blockif_ctxt *bc) 840 { 841 842 assert(bc->bc_magic == BLOCKIF_SIG); 843 return (bc->bc_candelete); 844 } 845