1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2013 Peter Grehan <grehan@freebsd.org> 5 * All rights reserved. 6 * Copyright 2020 Joyent, Inc. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $FreeBSD$ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include <sys/param.h> 36 #ifndef WITHOUT_CAPSICUM 37 #include <sys/capsicum.h> 38 #endif 39 #include <sys/queue.h> 40 #include <sys/errno.h> 41 #include <sys/stat.h> 42 #include <sys/ioctl.h> 43 #include <sys/disk.h> 44 45 #include <assert.h> 46 #ifndef WITHOUT_CAPSICUM 47 #include <capsicum_helpers.h> 48 #endif 49 #include <err.h> 50 #include <fcntl.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <string.h> 54 #include <pthread.h> 55 #include <pthread_np.h> 56 #include <signal.h> 57 #include <sysexits.h> 58 #include <unistd.h> 59 60 #include <machine/atomic.h> 61 62 #include "bhyverun.h" 63 #include "debug.h" 64 #include "mevent.h" 65 #include "block_if.h" 66 67 #define BLOCKIF_SIG 0xb109b109 68 69 #define BLOCKIF_NUMTHR 8 70 #define BLOCKIF_MAXREQ (BLOCKIF_RING_MAX + BLOCKIF_NUMTHR) 71 72 enum blockop { 73 BOP_READ, 74 BOP_WRITE, 75 BOP_FLUSH, 76 BOP_DELETE 77 }; 78 79 enum blockstat { 80 BST_FREE, 81 BST_BLOCK, 82 BST_PEND, 83 BST_BUSY, 84 BST_DONE 85 }; 86 87 struct blockif_elem { 88 TAILQ_ENTRY(blockif_elem) be_link; 89 struct blockif_req *be_req; 90 enum blockop be_op; 91 enum blockstat be_status; 92 pthread_t be_tid; 93 off_t be_block; 94 }; 95 96 struct blockif_ctxt { 97 int bc_magic; 98 int bc_fd; 99 int bc_ischr; 100 int bc_isgeom; 101 int bc_candelete; 102 int bc_rdonly; 103 off_t bc_size; 104 int bc_sectsz; 105 int bc_psectsz; 106 int bc_psectoff; 107 int bc_closing; 108 pthread_t bc_btid[BLOCKIF_NUMTHR]; 109 pthread_mutex_t bc_mtx; 110 pthread_cond_t bc_cond; 111 112 /* Request elements and free/pending/busy queues */ 113 TAILQ_HEAD(, blockif_elem) bc_freeq; 114 TAILQ_HEAD(, blockif_elem) bc_pendq; 115 TAILQ_HEAD(, blockif_elem) bc_busyq; 116 struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; 117 }; 118 119 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT; 120 121 struct blockif_sig_elem { 122 pthread_mutex_t bse_mtx; 123 pthread_cond_t bse_cond; 124 int bse_pending; 125 struct blockif_sig_elem *bse_next; 126 }; 127 128 static struct blockif_sig_elem *blockif_bse_head; 129 130 static int 131 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, 132 enum blockop op) 133 { 134 struct blockif_elem *be, *tbe; 135 off_t off; 136 int i; 137 138 be = TAILQ_FIRST(&bc->bc_freeq); 139 assert(be != NULL); 140 assert(be->be_status == BST_FREE); 141 TAILQ_REMOVE(&bc->bc_freeq, be, be_link); 142 be->be_req = breq; 143 be->be_op = op; 144 switch (op) { 145 case BOP_READ: 146 case BOP_WRITE: 147 case BOP_DELETE: 148 off = breq->br_offset; 149 for (i = 0; i < breq->br_iovcnt; i++) 150 off += breq->br_iov[i].iov_len; 151 break; 152 default: 153 off = OFF_MAX; 154 } 155 be->be_block = off; 156 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 157 if (tbe->be_block == breq->br_offset) 158 break; 159 } 160 if (tbe == NULL) { 161 TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { 162 if (tbe->be_block == breq->br_offset) 163 break; 164 } 165 } 166 if (tbe == NULL) 167 be->be_status = BST_PEND; 168 else 169 be->be_status = BST_BLOCK; 170 TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); 171 return (be->be_status == BST_PEND); 172 } 173 174 static int 175 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) 176 { 177 struct blockif_elem *be; 178 179 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 180 if (be->be_status == BST_PEND) 181 break; 182 assert(be->be_status == BST_BLOCK); 183 } 184 if (be == NULL) 185 return (0); 186 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 187 be->be_status = BST_BUSY; 188 be->be_tid = t; 189 TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); 190 *bep = be; 191 return (1); 192 } 193 194 static void 195 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) 196 { 197 struct blockif_elem *tbe; 198 199 if (be->be_status == BST_DONE || be->be_status == BST_BUSY) 200 TAILQ_REMOVE(&bc->bc_busyq, be, be_link); 201 else 202 TAILQ_REMOVE(&bc->bc_pendq, be, be_link); 203 TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { 204 if (tbe->be_req->br_offset == be->be_block) 205 tbe->be_status = BST_PEND; 206 } 207 be->be_tid = 0; 208 be->be_status = BST_FREE; 209 be->be_req = NULL; 210 TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); 211 } 212 213 static void 214 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf) 215 { 216 struct blockif_req *br; 217 off_t arg[2]; 218 ssize_t clen, len, off, boff, voff; 219 int i, err; 220 221 br = be->be_req; 222 if (br->br_iovcnt <= 1) 223 buf = NULL; 224 err = 0; 225 switch (be->be_op) { 226 case BOP_READ: 227 if (buf == NULL) { 228 if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt, 229 br->br_offset)) < 0) 230 err = errno; 231 else 232 br->br_resid -= len; 233 break; 234 } 235 i = 0; 236 off = voff = 0; 237 while (br->br_resid > 0) { 238 len = MIN(br->br_resid, MAXPHYS); 239 if (pread(bc->bc_fd, buf, len, br->br_offset + 240 off) < 0) { 241 err = errno; 242 break; 243 } 244 boff = 0; 245 do { 246 clen = MIN(len - boff, br->br_iov[i].iov_len - 247 voff); 248 memcpy(br->br_iov[i].iov_base + voff, 249 buf + boff, clen); 250 if (clen < br->br_iov[i].iov_len - voff) 251 voff += clen; 252 else { 253 i++; 254 voff = 0; 255 } 256 boff += clen; 257 } while (boff < len); 258 off += len; 259 br->br_resid -= len; 260 } 261 break; 262 case BOP_WRITE: 263 if (bc->bc_rdonly) { 264 err = EROFS; 265 break; 266 } 267 if (buf == NULL) { 268 if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt, 269 br->br_offset)) < 0) 270 err = errno; 271 else 272 br->br_resid -= len; 273 break; 274 } 275 i = 0; 276 off = voff = 0; 277 while (br->br_resid > 0) { 278 len = MIN(br->br_resid, MAXPHYS); 279 boff = 0; 280 do { 281 clen = MIN(len - boff, br->br_iov[i].iov_len - 282 voff); 283 memcpy(buf + boff, 284 br->br_iov[i].iov_base + voff, clen); 285 if (clen < br->br_iov[i].iov_len - voff) 286 voff += clen; 287 else { 288 i++; 289 voff = 0; 290 } 291 boff += clen; 292 } while (boff < len); 293 if (pwrite(bc->bc_fd, buf, len, br->br_offset + 294 off) < 0) { 295 err = errno; 296 break; 297 } 298 off += len; 299 br->br_resid -= len; 300 } 301 break; 302 case BOP_FLUSH: 303 if (bc->bc_ischr) { 304 if (ioctl(bc->bc_fd, DIOCGFLUSH)) 305 err = errno; 306 } else if (fsync(bc->bc_fd)) 307 err = errno; 308 break; 309 case BOP_DELETE: 310 if (!bc->bc_candelete) 311 err = EOPNOTSUPP; 312 else if (bc->bc_rdonly) 313 err = EROFS; 314 else if (bc->bc_ischr) { 315 arg[0] = br->br_offset; 316 arg[1] = br->br_resid; 317 if (ioctl(bc->bc_fd, DIOCGDELETE, arg)) 318 err = errno; 319 else 320 br->br_resid = 0; 321 } else 322 err = EOPNOTSUPP; 323 break; 324 default: 325 err = EINVAL; 326 break; 327 } 328 329 be->be_status = BST_DONE; 330 331 (*br->br_callback)(br, err); 332 } 333 334 static void * 335 blockif_thr(void *arg) 336 { 337 struct blockif_ctxt *bc; 338 struct blockif_elem *be; 339 pthread_t t; 340 uint8_t *buf; 341 342 bc = arg; 343 if (bc->bc_isgeom) 344 buf = malloc(MAXPHYS); 345 else 346 buf = NULL; 347 t = pthread_self(); 348 349 pthread_mutex_lock(&bc->bc_mtx); 350 for (;;) { 351 while (blockif_dequeue(bc, t, &be)) { 352 pthread_mutex_unlock(&bc->bc_mtx); 353 blockif_proc(bc, be, buf); 354 pthread_mutex_lock(&bc->bc_mtx); 355 blockif_complete(bc, be); 356 } 357 /* Check ctxt status here to see if exit requested */ 358 if (bc->bc_closing) 359 break; 360 pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); 361 } 362 pthread_mutex_unlock(&bc->bc_mtx); 363 364 if (buf) 365 free(buf); 366 pthread_exit(NULL); 367 return (NULL); 368 } 369 370 static void 371 blockif_sigcont_handler(int signal, enum ev_type type, void *arg) 372 { 373 struct blockif_sig_elem *bse; 374 375 for (;;) { 376 /* 377 * Process the entire list even if not intended for 378 * this thread. 379 */ 380 do { 381 bse = blockif_bse_head; 382 if (bse == NULL) 383 return; 384 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 385 (uintptr_t)bse, 386 (uintptr_t)bse->bse_next)); 387 388 pthread_mutex_lock(&bse->bse_mtx); 389 bse->bse_pending = 0; 390 pthread_cond_signal(&bse->bse_cond); 391 pthread_mutex_unlock(&bse->bse_mtx); 392 } 393 } 394 395 static void 396 blockif_init(void) 397 { 398 mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL); 399 (void) signal(SIGCONT, SIG_IGN); 400 } 401 402 struct blockif_ctxt * 403 blockif_open(const char *optstr, const char *ident) 404 { 405 char tname[MAXCOMLEN + 1]; 406 char name[MAXPATHLEN]; 407 char *nopt, *xopts, *cp; 408 struct blockif_ctxt *bc; 409 struct stat sbuf; 410 struct diocgattr_arg arg; 411 off_t size, psectsz, psectoff; 412 int extra, fd, i, sectsz; 413 int nocache, sync, ro, candelete, geom, ssopt, pssopt; 414 int nodelete; 415 416 #ifndef WITHOUT_CAPSICUM 417 cap_rights_t rights; 418 cap_ioctl_t cmds[] = { DIOCGFLUSH, DIOCGDELETE }; 419 #endif 420 421 pthread_once(&blockif_once, blockif_init); 422 423 fd = -1; 424 ssopt = 0; 425 nocache = 0; 426 sync = 0; 427 ro = 0; 428 nodelete = 0; 429 430 /* 431 * The first element in the optstring is always a pathname. 432 * Optional elements follow 433 */ 434 nopt = xopts = strdup(optstr); 435 while (xopts != NULL) { 436 cp = strsep(&xopts, ","); 437 if (cp == nopt) /* file or device pathname */ 438 continue; 439 else if (!strcmp(cp, "nocache")) 440 nocache = 1; 441 else if (!strcmp(cp, "nodelete")) 442 nodelete = 1; 443 else if (!strcmp(cp, "sync") || !strcmp(cp, "direct")) 444 sync = 1; 445 else if (!strcmp(cp, "ro")) 446 ro = 1; 447 else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2) 448 ; 449 else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1) 450 pssopt = ssopt; 451 else { 452 EPRINTLN("Invalid device option \"%s\"", cp); 453 goto err; 454 } 455 } 456 457 extra = 0; 458 if (nocache) 459 extra |= O_DIRECT; 460 if (sync) 461 extra |= O_SYNC; 462 463 fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra); 464 if (fd < 0 && !ro) { 465 /* Attempt a r/w fail with a r/o open */ 466 fd = open(nopt, O_RDONLY | extra); 467 ro = 1; 468 } 469 470 if (fd < 0) { 471 warn("Could not open backing file: %s", nopt); 472 goto err; 473 } 474 475 if (fstat(fd, &sbuf) < 0) { 476 warn("Could not stat backing file %s", nopt); 477 goto err; 478 } 479 480 #ifndef WITHOUT_CAPSICUM 481 cap_rights_init(&rights, CAP_FSYNC, CAP_IOCTL, CAP_READ, CAP_SEEK, 482 CAP_WRITE); 483 if (ro) 484 cap_rights_clear(&rights, CAP_FSYNC, CAP_WRITE); 485 486 if (caph_rights_limit(fd, &rights) == -1) 487 errx(EX_OSERR, "Unable to apply rights for sandbox"); 488 #endif 489 490 /* 491 * Deal with raw devices 492 */ 493 size = sbuf.st_size; 494 sectsz = DEV_BSIZE; 495 psectsz = psectoff = 0; 496 candelete = geom = 0; 497 if (S_ISCHR(sbuf.st_mode)) { 498 if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 || 499 ioctl(fd, DIOCGSECTORSIZE, §sz)) { 500 perror("Could not fetch dev blk/sector size"); 501 goto err; 502 } 503 assert(size != 0); 504 assert(sectsz != 0); 505 if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0) 506 ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff); 507 strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name)); 508 arg.len = sizeof(arg.value.i); 509 if (nodelete == 0 && ioctl(fd, DIOCGATTR, &arg) == 0) 510 candelete = arg.value.i; 511 if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0) 512 geom = 1; 513 } else 514 psectsz = sbuf.st_blksize; 515 516 #ifndef WITHOUT_CAPSICUM 517 if (caph_ioctls_limit(fd, cmds, nitems(cmds)) == -1) 518 errx(EX_OSERR, "Unable to apply rights for sandbox"); 519 #endif 520 521 if (ssopt != 0) { 522 if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 || 523 ssopt > pssopt) { 524 EPRINTLN("Invalid sector size %d/%d", 525 ssopt, pssopt); 526 goto err; 527 } 528 529 /* 530 * Some backend drivers (e.g. cd0, ada0) require that the I/O 531 * size be a multiple of the device's sector size. 532 * 533 * Validate that the emulated sector size complies with this 534 * requirement. 535 */ 536 if (S_ISCHR(sbuf.st_mode)) { 537 if (ssopt < sectsz || (ssopt % sectsz) != 0) { 538 EPRINTLN("Sector size %d incompatible " 539 "with underlying device sector size %d", 540 ssopt, sectsz); 541 goto err; 542 } 543 } 544 545 sectsz = ssopt; 546 psectsz = pssopt; 547 psectoff = 0; 548 } 549 550 bc = calloc(1, sizeof(struct blockif_ctxt)); 551 if (bc == NULL) { 552 perror("calloc"); 553 goto err; 554 } 555 556 bc->bc_magic = BLOCKIF_SIG; 557 bc->bc_fd = fd; 558 bc->bc_ischr = S_ISCHR(sbuf.st_mode); 559 bc->bc_isgeom = geom; 560 bc->bc_candelete = candelete; 561 bc->bc_rdonly = ro; 562 bc->bc_size = size; 563 bc->bc_sectsz = sectsz; 564 bc->bc_psectsz = psectsz; 565 bc->bc_psectoff = psectoff; 566 pthread_mutex_init(&bc->bc_mtx, NULL); 567 pthread_cond_init(&bc->bc_cond, NULL); 568 TAILQ_INIT(&bc->bc_freeq); 569 TAILQ_INIT(&bc->bc_pendq); 570 TAILQ_INIT(&bc->bc_busyq); 571 for (i = 0; i < BLOCKIF_MAXREQ; i++) { 572 bc->bc_reqs[i].be_status = BST_FREE; 573 TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); 574 } 575 576 for (i = 0; i < BLOCKIF_NUMTHR; i++) { 577 pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); 578 snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); 579 pthread_set_name_np(bc->bc_btid[i], tname); 580 } 581 582 return (bc); 583 err: 584 if (fd >= 0) 585 close(fd); 586 free(nopt); 587 return (NULL); 588 } 589 590 static int 591 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, 592 enum blockop op) 593 { 594 int err; 595 596 err = 0; 597 598 pthread_mutex_lock(&bc->bc_mtx); 599 if (!TAILQ_EMPTY(&bc->bc_freeq)) { 600 /* 601 * Enqueue and inform the block i/o thread 602 * that there is work available 603 */ 604 if (blockif_enqueue(bc, breq, op)) 605 pthread_cond_signal(&bc->bc_cond); 606 } else { 607 /* 608 * Callers are not allowed to enqueue more than 609 * the specified blockif queue limit. Return an 610 * error to indicate that the queue length has been 611 * exceeded. 612 */ 613 err = E2BIG; 614 } 615 pthread_mutex_unlock(&bc->bc_mtx); 616 617 return (err); 618 } 619 620 int 621 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq) 622 { 623 624 assert(bc->bc_magic == BLOCKIF_SIG); 625 return (blockif_request(bc, breq, BOP_READ)); 626 } 627 628 int 629 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq) 630 { 631 632 assert(bc->bc_magic == BLOCKIF_SIG); 633 return (blockif_request(bc, breq, BOP_WRITE)); 634 } 635 636 int 637 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq) 638 { 639 640 assert(bc->bc_magic == BLOCKIF_SIG); 641 return (blockif_request(bc, breq, BOP_FLUSH)); 642 } 643 644 int 645 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq) 646 { 647 648 assert(bc->bc_magic == BLOCKIF_SIG); 649 return (blockif_request(bc, breq, BOP_DELETE)); 650 } 651 652 int 653 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) 654 { 655 struct blockif_elem *be; 656 657 assert(bc->bc_magic == BLOCKIF_SIG); 658 659 pthread_mutex_lock(&bc->bc_mtx); 660 /* 661 * Check pending requests. 662 */ 663 TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { 664 if (be->be_req == breq) 665 break; 666 } 667 if (be != NULL) { 668 /* 669 * Found it. 670 */ 671 blockif_complete(bc, be); 672 pthread_mutex_unlock(&bc->bc_mtx); 673 674 return (0); 675 } 676 677 /* 678 * Check in-flight requests. 679 */ 680 TAILQ_FOREACH(be, &bc->bc_busyq, be_link) { 681 if (be->be_req == breq) 682 break; 683 } 684 if (be == NULL) { 685 /* 686 * Didn't find it. 687 */ 688 pthread_mutex_unlock(&bc->bc_mtx); 689 return (EINVAL); 690 } 691 692 /* 693 * Interrupt the processing thread to force it return 694 * prematurely via it's normal callback path. 695 */ 696 while (be->be_status == BST_BUSY) { 697 struct blockif_sig_elem bse, *old_head; 698 699 pthread_mutex_init(&bse.bse_mtx, NULL); 700 pthread_cond_init(&bse.bse_cond, NULL); 701 702 bse.bse_pending = 1; 703 704 do { 705 old_head = blockif_bse_head; 706 bse.bse_next = old_head; 707 } while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head, 708 (uintptr_t)old_head, 709 (uintptr_t)&bse)); 710 711 pthread_kill(be->be_tid, SIGCONT); 712 713 pthread_mutex_lock(&bse.bse_mtx); 714 while (bse.bse_pending) 715 pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx); 716 pthread_mutex_unlock(&bse.bse_mtx); 717 } 718 719 pthread_mutex_unlock(&bc->bc_mtx); 720 721 /* 722 * The processing thread has been interrupted. Since it's not 723 * clear if the callback has been invoked yet, return EBUSY. 724 */ 725 return (EBUSY); 726 } 727 728 int 729 blockif_close(struct blockif_ctxt *bc) 730 { 731 void *jval; 732 int i; 733 734 assert(bc->bc_magic == BLOCKIF_SIG); 735 736 /* 737 * Stop the block i/o thread 738 */ 739 pthread_mutex_lock(&bc->bc_mtx); 740 bc->bc_closing = 1; 741 pthread_mutex_unlock(&bc->bc_mtx); 742 pthread_cond_broadcast(&bc->bc_cond); 743 for (i = 0; i < BLOCKIF_NUMTHR; i++) 744 pthread_join(bc->bc_btid[i], &jval); 745 746 /* XXX Cancel queued i/o's ??? */ 747 748 /* 749 * Release resources 750 */ 751 bc->bc_magic = 0; 752 close(bc->bc_fd); 753 free(bc); 754 755 return (0); 756 } 757 758 /* 759 * Return virtual C/H/S values for a given block. Use the algorithm 760 * outlined in the VHD specification to calculate values. 761 */ 762 void 763 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s) 764 { 765 off_t sectors; /* total sectors of the block dev */ 766 off_t hcyl; /* cylinders times heads */ 767 uint16_t secpt; /* sectors per track */ 768 uint8_t heads; 769 770 assert(bc->bc_magic == BLOCKIF_SIG); 771 772 sectors = bc->bc_size / bc->bc_sectsz; 773 774 /* Clamp the size to the largest possible with CHS */ 775 if (sectors > 65535UL*16*255) 776 sectors = 65535UL*16*255; 777 778 if (sectors >= 65536UL*16*63) { 779 secpt = 255; 780 heads = 16; 781 hcyl = sectors / secpt; 782 } else { 783 secpt = 17; 784 hcyl = sectors / secpt; 785 heads = (hcyl + 1023) / 1024; 786 787 if (heads < 4) 788 heads = 4; 789 790 if (hcyl >= (heads * 1024) || heads > 16) { 791 secpt = 31; 792 heads = 16; 793 hcyl = sectors / secpt; 794 } 795 if (hcyl >= (heads * 1024)) { 796 secpt = 63; 797 heads = 16; 798 hcyl = sectors / secpt; 799 } 800 } 801 802 *c = hcyl / heads; 803 *h = heads; 804 *s = secpt; 805 } 806 807 /* 808 * Accessors 809 */ 810 off_t 811 blockif_size(struct blockif_ctxt *bc) 812 { 813 814 assert(bc->bc_magic == BLOCKIF_SIG); 815 return (bc->bc_size); 816 } 817 818 int 819 blockif_sectsz(struct blockif_ctxt *bc) 820 { 821 822 assert(bc->bc_magic == BLOCKIF_SIG); 823 return (bc->bc_sectsz); 824 } 825 826 void 827 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off) 828 { 829 830 assert(bc->bc_magic == BLOCKIF_SIG); 831 *size = bc->bc_psectsz; 832 *off = bc->bc_psectoff; 833 } 834 835 int 836 blockif_queuesz(struct blockif_ctxt *bc) 837 { 838 839 assert(bc->bc_magic == BLOCKIF_SIG); 840 return (BLOCKIF_MAXREQ - 1); 841 } 842 843 int 844 blockif_is_ro(struct blockif_ctxt *bc) 845 { 846 847 assert(bc->bc_magic == BLOCKIF_SIG); 848 return (bc->bc_rdonly); 849 } 850 851 int 852 blockif_candelete(struct blockif_ctxt *bc) 853 { 854 855 assert(bc->bc_magic == BLOCKIF_SIG); 856 return (bc->bc_candelete); 857 } 858