xref: /freebsd/usr.sbin/bhyve/block_if.c (revision 5dae51da3da0cc94d17bd67b308fad304ebec7e0)
1 /*-
2  * Copyright (c) 2013  Peter Grehan <grehan@freebsd.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/queue.h>
34 #include <sys/errno.h>
35 #include <sys/stat.h>
36 #include <sys/ioctl.h>
37 #include <sys/disk.h>
38 
39 #include <assert.h>
40 #include <err.h>
41 #include <fcntl.h>
42 #include <stdio.h>
43 #include <stdlib.h>
44 #include <string.h>
45 #include <pthread.h>
46 #include <pthread_np.h>
47 #include <signal.h>
48 #include <unistd.h>
49 
50 #include <machine/atomic.h>
51 
52 #include "bhyverun.h"
53 #include "mevent.h"
54 #include "block_if.h"
55 
56 #define BLOCKIF_SIG	0xb109b109
57 
58 #define BLOCKIF_NUMTHR	8
59 #define BLOCKIF_MAXREQ	(64 + BLOCKIF_NUMTHR)
60 
61 enum blockop {
62 	BOP_READ,
63 	BOP_WRITE,
64 	BOP_FLUSH,
65 	BOP_DELETE
66 };
67 
68 enum blockstat {
69 	BST_FREE,
70 	BST_BLOCK,
71 	BST_PEND,
72 	BST_BUSY,
73 	BST_DONE
74 };
75 
76 struct blockif_elem {
77 	TAILQ_ENTRY(blockif_elem) be_link;
78 	struct blockif_req  *be_req;
79 	enum blockop	     be_op;
80 	enum blockstat	     be_status;
81 	pthread_t            be_tid;
82 	off_t		     be_block;
83 };
84 
85 struct blockif_ctxt {
86 	int			bc_magic;
87 	int			bc_fd;
88 	int			bc_ischr;
89 	int			bc_isgeom;
90 	int			bc_candelete;
91 	int			bc_rdonly;
92 	off_t			bc_size;
93 	int			bc_sectsz;
94 	int			bc_psectsz;
95 	int			bc_psectoff;
96 	int			bc_closing;
97 	pthread_t		bc_btid[BLOCKIF_NUMTHR];
98         pthread_mutex_t		bc_mtx;
99         pthread_cond_t		bc_cond;
100 
101 	/* Request elements and free/pending/busy queues */
102 	TAILQ_HEAD(, blockif_elem) bc_freeq;
103 	TAILQ_HEAD(, blockif_elem) bc_pendq;
104 	TAILQ_HEAD(, blockif_elem) bc_busyq;
105 	struct blockif_elem	bc_reqs[BLOCKIF_MAXREQ];
106 };
107 
108 static pthread_once_t blockif_once = PTHREAD_ONCE_INIT;
109 
110 struct blockif_sig_elem {
111 	pthread_mutex_t			bse_mtx;
112 	pthread_cond_t			bse_cond;
113 	int				bse_pending;
114 	struct blockif_sig_elem		*bse_next;
115 };
116 
117 static struct blockif_sig_elem *blockif_bse_head;
118 
119 static int
120 blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
121 		enum blockop op)
122 {
123 	struct blockif_elem *be, *tbe;
124 	off_t off;
125 	int i;
126 
127 	be = TAILQ_FIRST(&bc->bc_freeq);
128 	assert(be != NULL);
129 	assert(be->be_status == BST_FREE);
130 	TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
131 	be->be_req = breq;
132 	be->be_op = op;
133 	switch (op) {
134 	case BOP_READ:
135 	case BOP_WRITE:
136 	case BOP_DELETE:
137 		off = breq->br_offset;
138 		for (i = 0; i < breq->br_iovcnt; i++)
139 			off += breq->br_iov[i].iov_len;
140 		break;
141 	default:
142 		off = OFF_MAX;
143 	}
144 	be->be_block = off;
145 	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
146 		if (tbe->be_block == breq->br_offset)
147 			break;
148 	}
149 	if (tbe == NULL) {
150 		TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
151 			if (tbe->be_block == breq->br_offset)
152 				break;
153 		}
154 	}
155 	if (tbe == NULL)
156 		be->be_status = BST_PEND;
157 	else
158 		be->be_status = BST_BLOCK;
159 	TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
160 	return (be->be_status == BST_PEND);
161 }
162 
163 static int
164 blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
165 {
166 	struct blockif_elem *be;
167 
168 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
169 		if (be->be_status == BST_PEND)
170 			break;
171 		assert(be->be_status == BST_BLOCK);
172 	}
173 	if (be == NULL)
174 		return (0);
175 	TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
176 	be->be_status = BST_BUSY;
177 	be->be_tid = t;
178 	TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
179 	*bep = be;
180 	return (1);
181 }
182 
183 static void
184 blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
185 {
186 	struct blockif_elem *tbe;
187 
188 	if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
189 		TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
190 	else
191 		TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
192 	TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
193 		if (tbe->be_req->br_offset == be->be_block)
194 			tbe->be_status = BST_PEND;
195 	}
196 	be->be_tid = 0;
197 	be->be_status = BST_FREE;
198 	be->be_req = NULL;
199 	TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
200 }
201 
202 static void
203 blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
204 {
205 	struct blockif_req *br;
206 	off_t arg[2];
207 	ssize_t clen, len, off, boff, voff;
208 	int i, err;
209 
210 	br = be->be_req;
211 	if (br->br_iovcnt <= 1)
212 		buf = NULL;
213 	err = 0;
214 	switch (be->be_op) {
215 	case BOP_READ:
216 		if (buf == NULL) {
217 			if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
218 				   br->br_offset)) < 0)
219 				err = errno;
220 			else
221 				br->br_resid -= len;
222 			break;
223 		}
224 		i = 0;
225 		off = voff = 0;
226 		while (br->br_resid > 0) {
227 			len = MIN(br->br_resid, MAXPHYS);
228 			if (pread(bc->bc_fd, buf, len, br->br_offset +
229 			    off) < 0) {
230 				err = errno;
231 				break;
232 			}
233 			boff = 0;
234 			do {
235 				clen = MIN(len - boff, br->br_iov[i].iov_len -
236 				    voff);
237 				memcpy(br->br_iov[i].iov_base + voff,
238 				    buf + boff, clen);
239 				if (clen < br->br_iov[i].iov_len - voff)
240 					voff += clen;
241 				else {
242 					i++;
243 					voff = 0;
244 				}
245 				boff += clen;
246 			} while (boff < len);
247 			off += len;
248 			br->br_resid -= len;
249 		}
250 		break;
251 	case BOP_WRITE:
252 		if (bc->bc_rdonly) {
253 			err = EROFS;
254 			break;
255 		}
256 		if (buf == NULL) {
257 			if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
258 				    br->br_offset)) < 0)
259 				err = errno;
260 			else
261 				br->br_resid -= len;
262 			break;
263 		}
264 		i = 0;
265 		off = voff = 0;
266 		while (br->br_resid > 0) {
267 			len = MIN(br->br_resid, MAXPHYS);
268 			boff = 0;
269 			do {
270 				clen = MIN(len - boff, br->br_iov[i].iov_len -
271 				    voff);
272 				memcpy(buf + boff,
273 				    br->br_iov[i].iov_base + voff, clen);
274 				if (clen < br->br_iov[i].iov_len - voff)
275 					voff += clen;
276 				else {
277 					i++;
278 					voff = 0;
279 				}
280 				boff += clen;
281 			} while (boff < len);
282 			if (pwrite(bc->bc_fd, buf, len, br->br_offset +
283 			    off) < 0) {
284 				err = errno;
285 				break;
286 			}
287 			off += len;
288 			br->br_resid -= len;
289 		}
290 		break;
291 	case BOP_FLUSH:
292 		if (bc->bc_ischr) {
293 			if (ioctl(bc->bc_fd, DIOCGFLUSH))
294 				err = errno;
295 		} else if (fsync(bc->bc_fd))
296 			err = errno;
297 		break;
298 	case BOP_DELETE:
299 		if (!bc->bc_candelete)
300 			err = EOPNOTSUPP;
301 		else if (bc->bc_rdonly)
302 			err = EROFS;
303 		else if (bc->bc_ischr) {
304 			arg[0] = br->br_offset;
305 			arg[1] = br->br_resid;
306 			if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
307 				err = errno;
308 			else
309 				br->br_resid = 0;
310 		} else
311 			err = EOPNOTSUPP;
312 		break;
313 	default:
314 		err = EINVAL;
315 		break;
316 	}
317 
318 	be->be_status = BST_DONE;
319 
320 	(*br->br_callback)(br, err);
321 }
322 
323 static void *
324 blockif_thr(void *arg)
325 {
326 	struct blockif_ctxt *bc;
327 	struct blockif_elem *be;
328 	pthread_t t;
329 	uint8_t *buf;
330 
331 	bc = arg;
332 	if (bc->bc_isgeom)
333 		buf = malloc(MAXPHYS);
334 	else
335 		buf = NULL;
336 	t = pthread_self();
337 
338 	pthread_mutex_lock(&bc->bc_mtx);
339 	for (;;) {
340 		while (blockif_dequeue(bc, t, &be)) {
341 			pthread_mutex_unlock(&bc->bc_mtx);
342 			blockif_proc(bc, be, buf);
343 			pthread_mutex_lock(&bc->bc_mtx);
344 			blockif_complete(bc, be);
345 		}
346 		/* Check ctxt status here to see if exit requested */
347 		if (bc->bc_closing)
348 			break;
349 		pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
350 	}
351 	pthread_mutex_unlock(&bc->bc_mtx);
352 
353 	if (buf)
354 		free(buf);
355 	pthread_exit(NULL);
356 	return (NULL);
357 }
358 
359 static void
360 blockif_sigcont_handler(int signal, enum ev_type type, void *arg)
361 {
362 	struct blockif_sig_elem *bse;
363 
364 	for (;;) {
365 		/*
366 		 * Process the entire list even if not intended for
367 		 * this thread.
368 		 */
369 		do {
370 			bse = blockif_bse_head;
371 			if (bse == NULL)
372 				return;
373 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
374 					    (uintptr_t)bse,
375 					    (uintptr_t)bse->bse_next));
376 
377 		pthread_mutex_lock(&bse->bse_mtx);
378 		bse->bse_pending = 0;
379 		pthread_cond_signal(&bse->bse_cond);
380 		pthread_mutex_unlock(&bse->bse_mtx);
381 	}
382 }
383 
384 static void
385 blockif_init(void)
386 {
387 	mevent_add(SIGCONT, EVF_SIGNAL, blockif_sigcont_handler, NULL);
388 	(void) signal(SIGCONT, SIG_IGN);
389 }
390 
391 struct blockif_ctxt *
392 blockif_open(const char *optstr, const char *ident)
393 {
394 	char tname[MAXCOMLEN + 1];
395 	char name[MAXPATHLEN];
396 	char *nopt, *xopts, *cp;
397 	struct blockif_ctxt *bc;
398 	struct stat sbuf;
399 	struct diocgattr_arg arg;
400 	off_t size, psectsz, psectoff;
401 	int extra, fd, i, sectsz;
402 	int nocache, sync, ro, candelete, geom, ssopt, pssopt;
403 
404 	pthread_once(&blockif_once, blockif_init);
405 
406 	fd = -1;
407 	ssopt = 0;
408 	nocache = 0;
409 	sync = 0;
410 	ro = 0;
411 
412 	/*
413 	 * The first element in the optstring is always a pathname.
414 	 * Optional elements follow
415 	 */
416 	nopt = xopts = strdup(optstr);
417 	while (xopts != NULL) {
418 		cp = strsep(&xopts, ",");
419 		if (cp == nopt)		/* file or device pathname */
420 			continue;
421 		else if (!strcmp(cp, "nocache"))
422 			nocache = 1;
423 		else if (!strcmp(cp, "sync") || !strcmp(cp, "direct"))
424 			sync = 1;
425 		else if (!strcmp(cp, "ro"))
426 			ro = 1;
427 		else if (sscanf(cp, "sectorsize=%d/%d", &ssopt, &pssopt) == 2)
428 			;
429 		else if (sscanf(cp, "sectorsize=%d", &ssopt) == 1)
430 			pssopt = ssopt;
431 		else {
432 			fprintf(stderr, "Invalid device option \"%s\"\n", cp);
433 			goto err;
434 		}
435 	}
436 
437 	extra = 0;
438 	if (nocache)
439 		extra |= O_DIRECT;
440 	if (sync)
441 		extra |= O_SYNC;
442 
443 	fd = open(nopt, (ro ? O_RDONLY : O_RDWR) | extra);
444 	if (fd < 0 && !ro) {
445 		/* Attempt a r/w fail with a r/o open */
446 		fd = open(nopt, O_RDONLY | extra);
447 		ro = 1;
448 	}
449 
450 	if (fd < 0) {
451 		warn("Could not open backing file: %s", nopt);
452 		goto err;
453 	}
454 
455         if (fstat(fd, &sbuf) < 0) {
456 		warn("Could not stat backing file %s", nopt);
457 		goto err;
458         }
459 
460         /*
461 	 * Deal with raw devices
462 	 */
463         size = sbuf.st_size;
464 	sectsz = DEV_BSIZE;
465 	psectsz = psectoff = 0;
466 	candelete = geom = 0;
467 	if (S_ISCHR(sbuf.st_mode)) {
468 		if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
469 		    ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
470 			perror("Could not fetch dev blk/sector size");
471 			goto err;
472 		}
473 		assert(size != 0);
474 		assert(sectsz != 0);
475 		if (ioctl(fd, DIOCGSTRIPESIZE, &psectsz) == 0 && psectsz > 0)
476 			ioctl(fd, DIOCGSTRIPEOFFSET, &psectoff);
477 		strlcpy(arg.name, "GEOM::candelete", sizeof(arg.name));
478 		arg.len = sizeof(arg.value.i);
479 		if (ioctl(fd, DIOCGATTR, &arg) == 0)
480 			candelete = arg.value.i;
481 		if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
482 			geom = 1;
483 	} else
484 		psectsz = sbuf.st_blksize;
485 
486 	if (ssopt != 0) {
487 		if (!powerof2(ssopt) || !powerof2(pssopt) || ssopt < 512 ||
488 		    ssopt > pssopt) {
489 			fprintf(stderr, "Invalid sector size %d/%d\n",
490 			    ssopt, pssopt);
491 			goto err;
492 		}
493 
494 		/*
495 		 * Some backend drivers (e.g. cd0, ada0) require that the I/O
496 		 * size be a multiple of the device's sector size.
497 		 *
498 		 * Validate that the emulated sector size complies with this
499 		 * requirement.
500 		 */
501 		if (S_ISCHR(sbuf.st_mode)) {
502 			if (ssopt < sectsz || (ssopt % sectsz) != 0) {
503 				fprintf(stderr, "Sector size %d incompatible "
504 				    "with underlying device sector size %d\n",
505 				    ssopt, sectsz);
506 				goto err;
507 			}
508 		}
509 
510 		sectsz = ssopt;
511 		psectsz = pssopt;
512 		psectoff = 0;
513 	}
514 
515 	bc = calloc(1, sizeof(struct blockif_ctxt));
516 	if (bc == NULL) {
517 		perror("calloc");
518 		goto err;
519 	}
520 
521 	bc->bc_magic = BLOCKIF_SIG;
522 	bc->bc_fd = fd;
523 	bc->bc_ischr = S_ISCHR(sbuf.st_mode);
524 	bc->bc_isgeom = geom;
525 	bc->bc_candelete = candelete;
526 	bc->bc_rdonly = ro;
527 	bc->bc_size = size;
528 	bc->bc_sectsz = sectsz;
529 	bc->bc_psectsz = psectsz;
530 	bc->bc_psectoff = psectoff;
531 	pthread_mutex_init(&bc->bc_mtx, NULL);
532 	pthread_cond_init(&bc->bc_cond, NULL);
533 	TAILQ_INIT(&bc->bc_freeq);
534 	TAILQ_INIT(&bc->bc_pendq);
535 	TAILQ_INIT(&bc->bc_busyq);
536 	for (i = 0; i < BLOCKIF_MAXREQ; i++) {
537 		bc->bc_reqs[i].be_status = BST_FREE;
538 		TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
539 	}
540 
541 	for (i = 0; i < BLOCKIF_NUMTHR; i++) {
542 		pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
543 		snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
544 		pthread_set_name_np(bc->bc_btid[i], tname);
545 	}
546 
547 	return (bc);
548 err:
549 	if (fd >= 0)
550 		close(fd);
551 	return (NULL);
552 }
553 
554 static int
555 blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
556 		enum blockop op)
557 {
558 	int err;
559 
560 	err = 0;
561 
562 	pthread_mutex_lock(&bc->bc_mtx);
563 	if (!TAILQ_EMPTY(&bc->bc_freeq)) {
564 		/*
565 		 * Enqueue and inform the block i/o thread
566 		 * that there is work available
567 		 */
568 		if (blockif_enqueue(bc, breq, op))
569 			pthread_cond_signal(&bc->bc_cond);
570 	} else {
571 		/*
572 		 * Callers are not allowed to enqueue more than
573 		 * the specified blockif queue limit. Return an
574 		 * error to indicate that the queue length has been
575 		 * exceeded.
576 		 */
577 		err = E2BIG;
578 	}
579 	pthread_mutex_unlock(&bc->bc_mtx);
580 
581 	return (err);
582 }
583 
584 int
585 blockif_read(struct blockif_ctxt *bc, struct blockif_req *breq)
586 {
587 
588 	assert(bc->bc_magic == BLOCKIF_SIG);
589 	return (blockif_request(bc, breq, BOP_READ));
590 }
591 
592 int
593 blockif_write(struct blockif_ctxt *bc, struct blockif_req *breq)
594 {
595 
596 	assert(bc->bc_magic == BLOCKIF_SIG);
597 	return (blockif_request(bc, breq, BOP_WRITE));
598 }
599 
600 int
601 blockif_flush(struct blockif_ctxt *bc, struct blockif_req *breq)
602 {
603 
604 	assert(bc->bc_magic == BLOCKIF_SIG);
605 	return (blockif_request(bc, breq, BOP_FLUSH));
606 }
607 
608 int
609 blockif_delete(struct blockif_ctxt *bc, struct blockif_req *breq)
610 {
611 
612 	assert(bc->bc_magic == BLOCKIF_SIG);
613 	return (blockif_request(bc, breq, BOP_DELETE));
614 }
615 
616 int
617 blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
618 {
619 	struct blockif_elem *be;
620 
621 	assert(bc->bc_magic == BLOCKIF_SIG);
622 
623 	pthread_mutex_lock(&bc->bc_mtx);
624 	/*
625 	 * Check pending requests.
626 	 */
627 	TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
628 		if (be->be_req == breq)
629 			break;
630 	}
631 	if (be != NULL) {
632 		/*
633 		 * Found it.
634 		 */
635 		blockif_complete(bc, be);
636 		pthread_mutex_unlock(&bc->bc_mtx);
637 
638 		return (0);
639 	}
640 
641 	/*
642 	 * Check in-flight requests.
643 	 */
644 	TAILQ_FOREACH(be, &bc->bc_busyq, be_link) {
645 		if (be->be_req == breq)
646 			break;
647 	}
648 	if (be == NULL) {
649 		/*
650 		 * Didn't find it.
651 		 */
652 		pthread_mutex_unlock(&bc->bc_mtx);
653 		return (EINVAL);
654 	}
655 
656 	/*
657 	 * Interrupt the processing thread to force it return
658 	 * prematurely via it's normal callback path.
659 	 */
660 	while (be->be_status == BST_BUSY) {
661 		struct blockif_sig_elem bse, *old_head;
662 
663 		pthread_mutex_init(&bse.bse_mtx, NULL);
664 		pthread_cond_init(&bse.bse_cond, NULL);
665 
666 		bse.bse_pending = 1;
667 
668 		do {
669 			old_head = blockif_bse_head;
670 			bse.bse_next = old_head;
671 		} while (!atomic_cmpset_ptr((uintptr_t *)&blockif_bse_head,
672 					    (uintptr_t)old_head,
673 					    (uintptr_t)&bse));
674 
675 		pthread_kill(be->be_tid, SIGCONT);
676 
677 		pthread_mutex_lock(&bse.bse_mtx);
678 		while (bse.bse_pending)
679 			pthread_cond_wait(&bse.bse_cond, &bse.bse_mtx);
680 		pthread_mutex_unlock(&bse.bse_mtx);
681 	}
682 
683 	pthread_mutex_unlock(&bc->bc_mtx);
684 
685 	/*
686 	 * The processing thread has been interrupted.  Since it's not
687 	 * clear if the callback has been invoked yet, return EBUSY.
688 	 */
689 	return (EBUSY);
690 }
691 
692 int
693 blockif_close(struct blockif_ctxt *bc)
694 {
695 	void *jval;
696 	int i;
697 
698 	assert(bc->bc_magic == BLOCKIF_SIG);
699 
700 	/*
701 	 * Stop the block i/o thread
702 	 */
703 	pthread_mutex_lock(&bc->bc_mtx);
704 	bc->bc_closing = 1;
705 	pthread_mutex_unlock(&bc->bc_mtx);
706 	pthread_cond_broadcast(&bc->bc_cond);
707 	for (i = 0; i < BLOCKIF_NUMTHR; i++)
708 		pthread_join(bc->bc_btid[i], &jval);
709 
710 	/* XXX Cancel queued i/o's ??? */
711 
712 	/*
713 	 * Release resources
714 	 */
715 	bc->bc_magic = 0;
716 	close(bc->bc_fd);
717 	free(bc);
718 
719 	return (0);
720 }
721 
722 /*
723  * Return virtual C/H/S values for a given block. Use the algorithm
724  * outlined in the VHD specification to calculate values.
725  */
726 void
727 blockif_chs(struct blockif_ctxt *bc, uint16_t *c, uint8_t *h, uint8_t *s)
728 {
729 	off_t sectors;		/* total sectors of the block dev */
730 	off_t hcyl;		/* cylinders times heads */
731 	uint16_t secpt;		/* sectors per track */
732 	uint8_t heads;
733 
734 	assert(bc->bc_magic == BLOCKIF_SIG);
735 
736 	sectors = bc->bc_size / bc->bc_sectsz;
737 
738 	/* Clamp the size to the largest possible with CHS */
739 	if (sectors > 65535UL*16*255)
740 		sectors = 65535UL*16*255;
741 
742 	if (sectors >= 65536UL*16*63) {
743 		secpt = 255;
744 		heads = 16;
745 		hcyl = sectors / secpt;
746 	} else {
747 		secpt = 17;
748 		hcyl = sectors / secpt;
749 		heads = (hcyl + 1023) / 1024;
750 
751 		if (heads < 4)
752 			heads = 4;
753 
754 		if (hcyl >= (heads * 1024) || heads > 16) {
755 			secpt = 31;
756 			heads = 16;
757 			hcyl = sectors / secpt;
758 		}
759 		if (hcyl >= (heads * 1024)) {
760 			secpt = 63;
761 			heads = 16;
762 			hcyl = sectors / secpt;
763 		}
764 	}
765 
766 	*c = hcyl / heads;
767 	*h = heads;
768 	*s = secpt;
769 }
770 
771 /*
772  * Accessors
773  */
774 off_t
775 blockif_size(struct blockif_ctxt *bc)
776 {
777 
778 	assert(bc->bc_magic == BLOCKIF_SIG);
779 	return (bc->bc_size);
780 }
781 
782 int
783 blockif_sectsz(struct blockif_ctxt *bc)
784 {
785 
786 	assert(bc->bc_magic == BLOCKIF_SIG);
787 	return (bc->bc_sectsz);
788 }
789 
790 void
791 blockif_psectsz(struct blockif_ctxt *bc, int *size, int *off)
792 {
793 
794 	assert(bc->bc_magic == BLOCKIF_SIG);
795 	*size = bc->bc_psectsz;
796 	*off = bc->bc_psectoff;
797 }
798 
799 int
800 blockif_queuesz(struct blockif_ctxt *bc)
801 {
802 
803 	assert(bc->bc_magic == BLOCKIF_SIG);
804 	return (BLOCKIF_MAXREQ - 1);
805 }
806 
807 int
808 blockif_is_ro(struct blockif_ctxt *bc)
809 {
810 
811 	assert(bc->bc_magic == BLOCKIF_SIG);
812 	return (bc->bc_rdonly);
813 }
814 
815 int
816 blockif_candelete(struct blockif_ctxt *bc)
817 {
818 
819 	assert(bc->bc_magic == BLOCKIF_SIG);
820 	return (bc->bc_candelete);
821 }
822